| | 1 | #include <stdio.h> |
| | 2 | #include <stdlib.h> |
| | 3 | #include <stdint.h> |
| | 4 | #include <math.h> |
| | 5 | #include "gmp.h" |
| | 6 | #include "galrep.h" |
| | 7 | |
| | 8 | |
| | 9 | /* |
| | 10 | Copyright 2009 Andrew V. Sutherland |
| | 11 | |
| | 12 | This file is part of galrep. |
| | 13 | |
| | 14 | galrep is free software: you can redistribute it and/or modify |
| | 15 | it under the terms of the GNU General Public License as published by |
| | 16 | the Free Software Foundation, either version 2 of the License, or |
| | 17 | (at your option) any later version. |
| | 18 | |
| | 19 | galrep is distributed in the hope that it will be useful, |
| | 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| | 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| | 22 | GNU General Public License for more details. |
| | 23 | |
| | 24 | You should have received a copy of the GNU General Public License |
| | 25 | along with smalljac. If not, see <http://www.gnu.org/licenses/>. |
| | 26 | */ |
| | 27 | |
| | 28 | // NB: The int datatype is assumed to hold at least 32-bits |
| | 29 | |
| | 30 | #define MASK_BITS 32 |
| | 31 | typedef uint32_t bitmask_t; |
| | 32 | |
| | 33 | static int elltab[GALREP_ELL_COUNT] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59 }; |
| | 34 | static int ellitab[GALREP_MAX_ELL+1]; // ellitab[ell] = i iff elltab[i] = ell, set to -1 ow. initialized by galrep_load_gl2data |
| | 35 | |
| | 36 | /* |
| | 37 | General utility stuff for barebones modular arithmetic and bitmask manipulation. |
| | 38 | Note that we assume throughout that the modulus p is small enough so that 4*p^2 fits in an unsigned int |
| | 39 | We include this utility code here just to make things self contained. |
| | 40 | */ |
| | 41 | static inline void swab16 (uint16_t *x) { *x = ((*x&(uint16_t)0x00ffU) << 8) | ((*x&(uint16_t)0xff00U) >> 8); } |
| | 42 | static inline void swab32 (uint32_t *x) { *x = ((*x&(uint32_t)0x000000ffU) << 24) | ((*x&(uint32_t)0x0000ff00U) << 8) | ((*x&(uint32_t)0x00ff0000U) >> 8) | ((*x&(uint32_t)0xff000000U) >> 24); } |
| | 43 | |
| | 44 | double log2 (double x); |
| | 45 | |
| | 46 | // the modular aritmetic below is about twice as fast as the % operator on an AMD Athlon 64 (YMMV) |
| | 47 | static unsigned int ui_mod_i (unsigned int x, unsigned int m, double mi) |
| | 48 | { register unsigned int t, z; t = mi * x - 0.5; z = x-t*m; if ( z >= m ) z-= m; return z; } |
| | 49 | static unsigned int ui_mod (unsigned int x, unsigned int m) { return ui_mod_i (x, m, 1.0/m); } |
| | 50 | static int i_mod(int x, int m) { register int t; if ( x >= 0 ) return ui_mod(x,m); t = - (int)ui_mod((unsigned int)(-x),m); return (t<0?t+m:t); } |
| | 51 | int gcdext (int a, int b, int *x, int *y); |
| | 52 | int legendre (int a, int b); |
| | 53 | static int inverse_mod_p (int a, int p) { int y; if ( gcdext(p, a, 0, &y) != 1 ) return 0; else return (y >= 0 ? y : y+p ); } |
| | 54 | static int residue_mod_p (int a, int p) { return ( legendre (a, p) < 0 ? 0 : 1 ); } |
| | 55 | |
| | 56 | static inline void bitset (bitmask_t *mask, int i) { mask[i/MASK_BITS] |= (1UL<<(i&(MASK_BITS-1))); } |
| | 57 | static inline void bitclr (bitmask_t *mask, int i) { mask[i/MASK_BITS] &= ~(1UL<<(i&(MASK_BITS-1))); } |
| | 58 | static inline int bittest (bitmask_t *mask, int i) { return ( (mask[i/MASK_BITS]>>(i&(MASK_BITS-1))) & 1UL ); } |
| | 59 | static inline void maskand (bitmask_t *mask, bitmask_t *mask1, bitmask_t *mask2, int words) { register int i; for ( i = 0 ; i < words ; i++ ) mask[i] = mask1[i] & mask2[i]; } |
| | 60 | static inline void maskor (bitmask_t *mask, bitmask_t *mask1, bitmask_t *mask2, int words) { register int i; for ( i = 0 ; i < words ; i++ ) mask[i] = mask1[i] | mask2[i]; } |
| | 61 | static inline int maskclear (bitmask_t *mask, int words) { register int i; for ( i = 0 ; i < words ; i++ ) mask[i] = 0; } |
| | 62 | static inline int masksetall (bitmask_t *mask, int words) { register int i; for ( i = 0 ; i < words ; i++ ) mask[i] = ~((bitmask_t)0UL); } |
| | 63 | static inline int masknull (bitmask_t *mask, int words) { register int i; for ( i = 0 ; i < words ; i++ ) if ( mask[i] ) return 0; return 1; } |
| | 64 | static inline int maskequal (bitmask_t *mask1, bitmask_t *mask2, int words) { register int i; for ( i = 0 ; i < words ; i++ ) if ( mask1[i] != mask2[i] ) return 0; return 1; } |
| | 65 | static inline void maskcopy (bitmask_t *mask1, bitmask_t *mask2, int words) { register int i; for ( i = 0 ; i < words ; i++ ) mask1[i] = mask2[i]; } |
| | 66 | |
| | 67 | /* |
| | 68 | ecdata routines go here - functions to load/unload/lookup precomputed group structure data |
| | 69 | for every elliptic curve over Fp for various small p > 3. |
| | 70 | */ |
| | 71 | |
| | 72 | /* |
| | 73 | Elliptic curve data for small primes in [4,32768] (the exact number depends on the file loaded). |
| | 74 | The jdata table includes for each j-invariant over F_p: |
| | 75 | 1) the absolute value of the trace (stored in the high 11 bits) |
| | 76 | 2) bit 0 indicating the sign of the trace for the twist of the form y^2=x^3+Ax+B for which A/B is a quadratic residue |
| | 77 | 3) bits 1,2,3,4 indicating whether either twist has full 2,3,5,7-torsion mod p. |
| | 78 | If bit 1 is set, both twists have 2-torsion, and for odd ell the twist for which p+1-t = 0 mod ell is the one with ell-torsion |
| | 79 | For ell from 11 to 59, if p is 1 mod ell then a p-terminated list of increasing j-invariants with full ell-torsion is stored. |
| | 80 | |
| | 81 | When p=1 mod 3, the data for j-invariant 0 should be ignored, since there are 6 twists, not 2, and we don't distinguish them (we could, but we don't) |
| | 82 | Similarly, when p=1 mod 4, the data for j-invariant 1728 should be ignored, since there are 4 twists, not 2. |
| | 83 | |
| | 84 | Note that we *could* distinguish the twists for 0 and 1728, we just don't *need* to, we will just ignore primes where this occurs. |
| | 85 | In the worst case, when we have a curve j-invariant 0 or 1728 over Q, we only use half the primes, but otherwise we use almost all of them. |
| | 86 | */ |
| | 87 | |
| | 88 | #define TORP 13 |
| | 89 | #define ECDATA_ID 12345 |
| | 90 | |
| | 91 | static struct ectab_entry_struct { |
| | 92 | uint16_t p; // a prime p in the range [4,32768] |
| | 93 | uint16_t ptor; // mask in which the nth bit is set iff p is 1 mod the (n+5)th prime (n=0 for 11, n=12 for 59) |
| | 94 | uint16_t *jdata; // see details above |
| | 95 | uint16_t *torlist[TORP]; // torlist[i] points to a zero-terminated list of j-invariants with ell-torsion, where ell is the (n+5)th prime (n=0 for ell=11) |
| | 96 | } *ecdata; |
| | 97 | static int ecdatacnt; |
| | 98 | |
| | 99 | |
| | 100 | void galrep_ecdata_unload (void) |
| | 101 | { |
| | 102 | int i; |
| | 103 | |
| | 104 | if ( ecdata ) { |
| | 105 | for ( i = 0 ; i < ecdatacnt ; i++ ) if ( ecdata[i].jdata ) { free (ecdata[i].jdata); ecdata[i].jdata = 0; } |
| | 106 | free (ecdata); |
| | 107 | } |
| | 108 | ecdata = 0; ecdatacnt = 0; |
| | 109 | } |
| | 110 | |
| | 111 | int galrep_ecdata_load (char *filename) |
| | 112 | { |
| | 113 | FILE *fp; |
| | 114 | struct ectab_entry_struct *tabspace, *tabnext; |
| | 115 | char *s, *buf; |
| | 116 | uint16_t x16 , pcnt, *ps, *pe; |
| | 117 | int i, j, swab_flag, bytes; |
| | 118 | |
| | 119 | if ( ecdata ) galrep_ecdata_unload(); |
| | 120 | if ( ! filename || ! filename[0] ) filename = (char*)GALREP_ECDATA_FILENAME; |
| | 121 | fp = fopen (filename, "rb"); |
| | 122 | if ( ! fp ) return GALREP_FILE_NOT_FOUND; |
| | 123 | if ( fread(&x16,2,1,fp) != 1 ) goto read_error; |
| | 124 | if ( x16 != ECDATA_ID ) { swab_flag = 1; swab16(&x16); if ( x16 != ECDATA_ID ) { fclose(fp); return GALREP_BAD_ECDATA; } } else { swab_flag = 0; } |
| | 125 | if ( fread(&x16,2,1,fp) != 1 ) goto read_error; if ( swab_flag ) swab16(&x16); |
| | 126 | ecdatacnt = x16; bytes = ecdatacnt * sizeof(*ecdata); |
| | 127 | ecdata = (struct ectab_entry_struct *) malloc(bytes); |
| | 128 | if ( ! ecdata ) { fclose(fp); galrep_ecdata_unload(); return GALREP_MALLOC_FAILED; } |
| | 129 | for ( i = 0 ; i < ecdatacnt ; i++ ) { |
| | 130 | if ( fread(&x16,2,1,fp) != 1 ) goto read_error; if ( swab_flag ) swab16(&x16); ecdata[i].p = x16; |
| | 131 | if ( fread(&x16,2,1,fp) != 1 ) goto read_error; if ( swab_flag ) swab16(&x16); ecdata[i].ptor = x16; |
| | 132 | if ( fread(&x16,2,1,fp) != 1 ) goto read_error; if ( swab_flag ) swab16(&x16); pcnt = x16; |
| | 133 | if ( pcnt < ecdata[i].p ) { fclose(fp); printf ("bad pcnt=%d for prime %d in ECDATA file %s\n", pcnt, ecdata[i].p, filename); galrep_ecdata_unload(); return GALREP_BAD_ECDATA; } |
| | 134 | ecdata[i].jdata = (uint16_t *) malloc(2*pcnt); |
| | 135 | pe = ecdata[i].jdata + pcnt; |
| | 136 | if ( ! ecdata[i].jdata ) { fclose (fp); galrep_ecdata_unload(); return GALREP_MALLOC_FAILED; } |
| | 137 | if ( fread(ecdata[i].jdata,2,pcnt,fp) != pcnt ) goto read_error; |
| | 138 | if ( swab_flag ) for ( ps = ecdata[i].jdata ; ps < pe ; ps++ ) swab16(ps); |
| | 139 | ps = ecdata[i].jdata + ecdata[i].p; |
| | 140 | for ( j = 0 ; j < TORP ; j++ ) { |
| | 141 | if ( ecdata[i].ptor & (((uint16_t)1)<<j) ) { |
| | 142 | ecdata[i].torlist[j] = ps; |
| | 143 | for ( ; ps < pe && *ps<ecdata[i].p ; ps++ ); // be careful not to read past the end of the buffer if it isn't properly terminated |
| | 144 | if ( ps++==pe ) { fclose(fp); printf ("invalid record format for prime %d in ECDATA file %s\n", ecdata[i].p, filename); galrep_ecdata_unload(); return GALREP_BAD_ECDATA; } |
| | 145 | } else { |
| | 146 | ecdata[i].torlist[j] = 0; |
| | 147 | } |
| | 148 | } |
| | 149 | if ( ps != pe ) { fclose(fp); printf ("invalid record format for prime %d in ECDATA file %s\n", ecdata[i].p, filename); galrep_ecdata_unload(); return GALREP_BAD_ECDATA; } |
| | 150 | } |
| | 151 | fclose(fp); |
| | 152 | /* printf("Loaded elliptic curve group data for %d primes up to %d from file %s\n", ecdatacnt, ecdata[ecdatacnt-1].p, filename);*/ |
| | 153 | return 0; |
| | 154 | |
| | 155 | read_error: |
| | 156 | fclose(fp); galrep_ecdata_unload(); |
| | 157 | return GALREP_FILE_READ_ERROR; |
| | 158 | } |
| | 159 | |
| | 160 | int galrep_ecdata_maxp (void) { return ( !ecdatacnt ? 0 : ecdata[ecdatacnt-1].p ); } |
| | 161 | |
| | 162 | /* |
| | 163 | Given an elliptic curve E in the form y^2=x^3+Ax+B over F_p , returns the trace and optionally a bitmask |
| | 164 | identifying full ell-torsion subgroups (the ith bit is set iff for the (i+1)st prime ell either E or its twist has full ell-torsion over F_p). |
| | 165 | The prime p is specified by its index in ecdata, which is pi(p)-3 (i.e. pi=0 corresponds to 5). |
| | 166 | |
| | 167 | On input, the bitmask tormask should have the bits set for which torsion information is desired. |
| | 168 | Bits above 17 (corresponding to 59) will be ignored. |
| | 169 | */ |
| | 170 | int ecdata_lookup (int *trace, uint32_t *tormask, int A, int B, int pi) |
| | 171 | { |
| | 172 | uint16_t *jp; |
| | 173 | uint32_t mask; |
| | 174 | register int i,j,k,t1,t2,jinv,p; // we assume an int is at least 32 bits |
| | 175 | |
| | 176 | if ( ! ecdata || pi >= ecdatacnt ) return GALREP_ECDATA_UNAVAILABLE; |
| | 177 | p = ecdata[pi].p; |
| | 178 | |
| | 179 | if ( ! B ) { // handle j-invariant 1728 separately |
| | 180 | if ( ! A ) return GALREP_SINGULAR_CURVE; // this can happen when reducing mod p |
| | 181 | if ( !(p&2) ) return GALREP_ECDATA_UNAVAILABLE; // ignore requests for j-invariant 1728 when p is 1 mod 4, we don't distinguish quartic twists |
| | 182 | *trace = 0; // trace is always zero when p = 3 mod 4 |
| | 183 | *tormask &= ( residue_mod_p (A,p) ? 0 : 1 ); // we can't have full odd-torsion, and we get full 2-torsion iff A is a non-residue (for p = 3 mod 4) |
| | 184 | return 0; |
| | 185 | } |
| | 186 | if ( ! A && (p%3)==1 ) return GALREP_ECDATA_UNAVAILABLE; // ignore requests for j-invariant 0 when p is 1 mod 3, we don't distinguish sextic twists |
| | 187 | |
| | 188 | t1 = ui_mod(A*A,p); t1 = ui_mod(4*t1*A,p); // t1 = 4A^3 |
| | 189 | t2 = ui_mod(B*B,p); |
| | 190 | t2 = ui_mod(t1+27*t2,p); // t2 = 4A^3 + 27B^2 |
| | 191 | if ( ! t2 ) return GALREP_SINGULAR_CURVE; |
| | 192 | t2 = inverse_mod_p (t2, p); |
| | 193 | t1 = ui_mod(t1*t2, p); |
| | 194 | jinv = ui_mod(1728*t1,p); // j = 1728 * 4A^3 / (4A^3+27B^2) |
| | 195 | t1 = ecdata[pi].jdata[jinv]; // lookup data for E/Fp in the database via its j-invariant |
| | 196 | *trace = t1 >> 5; // get the absolute value of the trace |
| | 197 | if ( t1&0x10 ) *trace = - (*trace); // check sign bit |
| | 198 | if ( A ) { |
| | 199 | if ( ! residue_mod_p (ui_mod(A*B,p),p) ) *trace = -(*trace); // adjust trace for the twist we have. Note that B/A is a residue iff A*B is. |
| | 200 | } else { |
| | 201 | if ( ! residue_mod_p (B,p) ) *trace = -(*trace); // when A is 0, we can just use B to distinguish twists (and there are only 2 because p=2 mod 3) |
| | 202 | } |
| | 203 | mask = (t1 & 0xf); // get 2,3,5,7-torsion info |
| | 204 | *tormask &= (0xfffffff0|mask); // set 2,3,5,7-torsion info |
| | 205 | mask = ecdata[pi].ptor; // find out what other torsion info is possible for this p (i.e. ell for which p=1 mod ell) |
| | 206 | mask = *tormask & (mask << 4); // restrict to ell's the caller actually cares about |
| | 207 | if ( ! mask ) { *tormask &= (0xf|mask); return 0; } // if nothing to do, return |
| | 208 | for ( i = 0 ; i < TORP ; i++ ) { // for each ell from 11 to 59 |
| | 209 | if ( mask&(((uint32_t)1)<<(i+4)) ) { // if we are interested in this ell |
| | 210 | for ( jp = ecdata[pi].torlist[i] ; *jp < p && *jp != jinv ; jp++ );// do a linear search of the list of j-invariants with ell-torsion |
| | 211 | if ( *jp != jinv ) mask &= ~((uint32_t)1<<(i+4)); // if our j-invariant wasn't found, clear the corresponding bit |
| | 212 | } |
| | 213 | } |
| | 214 | *tormask &= (0xf|mask); |
| | 215 | return 0; |
| | 216 | } |
| | 217 | |
| | 218 | #define MAX_MASK_SIZE (GALREP_MAX_CCS/32+((GALREP_MAX_CCS&0x1f)?1:0)) // largest possible mask size |
| | 219 | |
| | 220 | struct cc_struct { |
| | 221 | int tag; // legacy identifier, now deprecated |
| | 222 | int dups; // number of distinct conjugacy classes with the same signature -- we only store data for one of these |
| | 223 | int order; // subgroup size for this conjugacy class |
| | 224 | int pcnt; // minimum number of primes we need to test to obtain a correct result with prob > 1-1/2^MAX_ERRBND (heuristically) |
| | 225 | uint32_t flags; // bitmask identitying certain properties of the conjugacy class, see GALREP_CC_FLAG_xxx definitions in galrep.h |
| | 226 | int gencnt; // number of generators for representative subgroup |
| | 227 | uint32_t gens[GALREP_MAX_GENERATORS]; // each generating matrix A is encoded as A[1][1]<<24 + A[1][0]<<16 + A[0][1]<<8 + A[0][0] |
| | 228 | bitmask_t upmask[MAX_MASK_SIZE]; // bit i is set for each class whose signature contains the signature of the current class |
| | 229 | uint16_t *counts; // points to an array of counts -- the ith element counts non-trivial elements with i=(det-1)*ell+tr |
| | 230 | }; |
| | 231 | |
| | 232 | static struct { |
| | 233 | int ell; // currently the ith entry in this table always has ell equal to the (i+1)st prime (i.e. entry 0 has ell=2) |
| | 234 | int msize; // size of subgroup masks for this ell, in words |
| | 235 | int cccnt; // number of conjugacy classes in GL(2,Z/ellZ) that span determinants with distinct signatures |
| | 236 | bitmask_t fullmask[MAX_MASK_SIZE]; // points to mask with a bit set for every proper subgroup of GL(2,Z/ellZ) |
| | 237 | bitmask_t *masks; // array of ell*(ell-1) subgroup masks, ith entry corresponds to i=(det-1)*ell+tr |
| | 238 | struct cc_struct *ccs; // points to and array of cc_count cc_structs |
| | 239 | } gl2data[GALREP_ELL_COUNT]; |
| | 240 | static int gl2datacnt; // number of entries currently loaded |
| | 241 | |
| | 242 | |
| | 243 | #define GL2DATA_ID 123456789 |
| | 244 | |
| | 245 | |
| | 246 | void galrep_gl2data_unload (void) |
| | 247 | { |
| | 248 | int i, j; |
| | 249 | |
| | 250 | for ( i = 0 ; i < gl2datacnt ; i++ ) { |
| | 251 | if ( gl2data[i].masks ) { free (gl2data[i].masks); gl2data[i].masks = 0; } |
| | 252 | if ( gl2data[i].ccs ) { |
| | 253 | for ( j = 0 ; j < gl2data[i].cccnt ; j++ ) if ( gl2data[i].ccs[j].counts ) free (gl2data[i].ccs[j].counts); |
| | 254 | gl2data[i].ccs = 0; |
| | 255 | } |
| | 256 | } |
| | 257 | gl2datacnt = 0; |
| | 258 | } |
| | 259 | |
| | 260 | |
| | 261 | int galrep_gl2data_load (char *filename) |
| | 262 | { |
| | 263 | FILE *fp; |
| | 264 | uint8_t x8; |
| | 265 | uint16_t x16; |
| | 266 | uint32_t x32; |
| | 267 | int i, j, k, ell, msize, swab; |
| | 268 | |
| | 269 | // intialize inverse elltab |
| | 270 | for ( i = 0 ; i <= GALREP_MAX_ELL ; i++ ) ellitab[i] = -1; |
| | 271 | for ( i = 0 ; i < GALREP_ELL_COUNT ; i++ ) ellitab[elltab[i]] = i; |
| | 272 | if ( ! filename || ! filename[0] ) filename = (char*) GALREP_GL2DATA_FILENAME; |
| | 273 | fp = fopen (filename, "rb"); |
| | 274 | if ( ! fp ) return GALREP_FILE_NOT_FOUND; |
| | 275 | if ( (i=fread (&x32,4,1,fp)) != 1 ) { printf ("fread returned %d\n", i); goto read_error; } |
| | 276 | if ( x32 != GL2DATA_ID ) { swab = 1; swab32(&x32); if ( x32 != GL2DATA_ID ) { fclose(fp); return GALREP_BAD_GL2DATA; } } else swab = 0; |
| | 277 | |
| | 278 | for ( i = 0 ; i < GALREP_ELL_COUNT; i++ ) { |
| | 279 | if ( fread (&x8,1,1, fp) != 1 ) break; gl2data[i].ell = ell = x8; |
| | 280 | if ( gl2data[i].ell != elltab[i] ) { fclose(fp); return GALREP_BAD_GL2DATA; } |
| | 281 | if ( fread (&x8,1,1, fp) != 1 ) goto read_error; gl2data[i].msize = msize = x8; |
| | 282 | if ( fread (&x16,2,1, fp) != 1 ) goto read_error; if ( swab ) swab16(&x16); gl2data[i].cccnt = x16; |
| | 283 | if ( gl2data[i].cccnt > GALREP_MAX_CCS || 32*gl2data[i].msize < gl2data[i].cccnt ) { fclose(fp); galrep_gl2data_unload (); return GALREP_BAD_GL2DATA; } |
| | 284 | if ( fread (gl2data[i].fullmask,4,msize,fp) != msize ) goto read_error; |
| | 285 | if ( swab ) for ( k = 0 ; k < msize ; k++ ) swab32(gl2data[i].fullmask+k); |
| | 286 | gl2data[i].masks = (bitmask_t *) malloc (ell*(ell-1)*msize*sizeof(bitmask_t)); |
| | 287 | if ( ! gl2data[i].masks ) { fclose(fp); galrep_gl2data_unload (); return GALREP_MALLOC_FAILED; } |
| | 288 | if ( fread(gl2data[i].masks,4,ell*(ell-1)*msize,fp) != ell*(ell-1)*msize ) goto read_error; |
| | 289 | if ( swab ) for ( j = 0 ; j < ell*(ell-1)*msize ; j++ ) swab32(gl2data[i].masks+j); |
| | 290 | gl2data[i].ccs = (struct cc_struct *) calloc(gl2data[i].cccnt,sizeof(struct cc_struct)); |
| | 291 | if ( ! gl2data[i].ccs ) { fclose(fp); galrep_gl2data_unload (); return GALREP_MALLOC_FAILED; } |
| | 292 | for ( j = 0 ; j < gl2data[i].cccnt ; j++ ) { |
| | 293 | if ( fread (&x16,2,1,fp) != 1 ) goto read_error; if ( swab ) swab16(&x16); gl2data[i].ccs[j].tag = x16; |
| | 294 | if ( fread (&x8,1,1,fp) != 1 ) goto read_error; gl2data[i].ccs[j].dups = x8; |
| | 295 | if ( fread (&x8,1,1,fp) != 1 ) goto read_error; gl2data[i].ccs[j].gencnt = x8; |
| | 296 | if ( gl2data[i].ccs[j].gencnt > GALREP_MAX_GENERATORS ) { fclose(fp); return GALREP_BAD_GL2DATA; } |
| | 297 | if ( fread (&x32,4,1,fp) != 1 ) goto read_error; if ( swab ) swab32(&x32); gl2data[i].ccs[j].order = x32; |
| | 298 | if ( fread (&x32,4,1,fp) != 1 ) goto read_error; if ( swab ) swab32(&x32); gl2data[i].ccs[j].flags = x32; |
| | 299 | if ( fread (&x32,4,1,fp) != 1 ) goto read_error; if ( swab ) swab32(&x32); gl2data[i].ccs[j].pcnt = x32; |
| | 300 | if ( fread (gl2data[i].ccs[j].upmask,4,msize,fp) != msize ) goto read_error; |
| | 301 | if ( swab ) for ( k = 0 ; k < msize ; k++ ) swab32(gl2data[i].ccs[j].upmask+k); |
| | 302 | if ( fread (gl2data[i].ccs[j].gens,4,gl2data[i].ccs[j].gencnt,fp) != gl2data[i].ccs[j].gencnt ) goto read_error; |
| | 303 | if ( swab ) for ( k = 0 ; k < gl2data[i].ccs[j].gencnt ; k++ ) swab32(gl2data[i].ccs[j].gens+k); |
| | 304 | gl2data[i].ccs[j].counts = (uint16_t *) malloc(ell*(ell-1)*sizeof(uint16_t)); |
| | 305 | if ( ! gl2data[i].ccs[j].counts ) { fclose (fp); galrep_gl2data_unload (); return GALREP_MALLOC_FAILED; } |
| | 306 | if ( fread (gl2data[i].ccs[j].counts,2,ell*(ell-1),fp) != ell*(ell-1) ) goto read_error; |
| | 307 | if ( swab ) for ( k = 0 ; k < ell*(ell-1) ; k++ ) swab16(gl2data[i].ccs[j].counts+k); |
| | 308 | } |
| | 309 | } |
| | 310 | fclose (fp); |
| | 311 | gl2datacnt = i; |
| | 312 | /* printf("Loaded conjugacy class data for GL(2,Z/ellZ) for %d primes up to %d from file %s\n", gl2datacnt, gl2data[i-1].ell, filename);*/ |
| | 313 | return 0; |
| | 314 | read_error: |
| | 315 | fclose(fp); galrep_ecdata_unload(); |
| | 316 | return GALREP_FILE_READ_ERROR; |
| | 317 | } |
| | 318 | |
| | 319 | int galrep_gl2data_maxl (void) { return ( !gl2datacnt ? 0 : gl2data[gl2datacnt-1].ell ); } |
| | 320 | |
| | 321 | int galrep_gl2_cc_count (int ell) |
| | 322 | { |
| | 323 | int i; |
| | 324 | |
| | 325 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 326 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 327 | return gl2data[i].cccnt; |
| | 328 | } |
| | 329 | |
| | 330 | int galrep_gl2_cc_order (int ell, int id) |
| | 331 | { |
| | 332 | int i; |
| | 333 | |
| | 334 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 335 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 336 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 337 | return gl2data[i].ccs[id].order; |
| | 338 | } |
| | 339 | |
| | 340 | int galrep_gl2_cc_index (int ell, int id) |
| | 341 | { |
| | 342 | int n, o; |
| | 343 | |
| | 344 | o = galrep_gl2_cc_order(ell,id); |
| | 345 | if ( o < 0 ) return o; |
| | 346 | n = ell*(ell-1)*(ell-1)*(ell+1); // we assume ell is small enough to avoid overflow here |
| | 347 | return n/o; |
| | 348 | } |
| | 349 | |
| | 350 | int galrep_gl2_cc_tag (int ell, int id) |
| | 351 | { |
| | 352 | int i; |
| | 353 | |
| | 354 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 355 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 356 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 357 | return gl2data[i].ccs[id].tag; |
| | 358 | } |
| | 359 | |
| | 360 | int galrep_gl2_cc_dups (int ell, int id) |
| | 361 | { |
| | 362 | int i; |
| | 363 | |
| | 364 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 365 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 366 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 367 | return gl2data[i].ccs[id].dups; |
| | 368 | } |
| | 369 | |
| | 370 | int galrep_gl2_cc_flags (int ell, int id) |
| | 371 | { |
| | 372 | int i; |
| | 373 | |
| | 374 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 375 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 376 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 377 | return gl2data[i].ccs[id].flags; |
| | 378 | } |
| | 379 | |
| | 380 | int galrep_gl2_cc_gen_count (int ell, int id) |
| | 381 | { |
| | 382 | int i; |
| | 383 | |
| | 384 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 385 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 386 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 387 | return gl2data[i].ccs[id].gencnt; |
| | 388 | } |
| | 389 | |
| | 390 | int galrep_gl2_cc_gen (int ell, int id, int n) |
| | 391 | { |
| | 392 | int i; |
| | 393 | |
| | 394 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 395 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 396 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 397 | if ( n < 0 || n > gl2data[i].ccs[id].gencnt ) return GALREP_BAD_CC_GEN_ID; |
| | 398 | return gl2data[i].ccs[id].gens[n]; |
| | 399 | } |
| | 400 | |
| | 401 | int galrep_gl2_cc_freq (int ell, int id, int det, int tr) |
| | 402 | { |
| | 403 | int i,k,cnt; |
| | 404 | |
| | 405 | if ( ell < 0 || ell > GALREP_MAX_ELL || (i=ellitab[ell]) < 0 ) return GALREP_BAD_ELL; |
| | 406 | if ( i >= gl2datacnt ) return GALREP_GL2DATA_UNAVAILABLE; |
| | 407 | if ( id < 0 || id >= gl2data[i].cccnt ) return GALREP_BAD_CC_ID; |
| | 408 | if ( det < -1 || det == 0 || det >= ell ) return GALREP_BAD_DET; |
| | 409 | if ( tr < -1 || tr >= ell ) return GALREP_BAD_TR; |
| | 410 | if ( det >= 1 && tr >= 0 ) { |
| | 411 | k = (det-1)*ell+tr; |
| | 412 | cnt = gl2data[i].ccs[id].counts[k]; |
| | 413 | if ( det==1 && (tr==2 || (ell==2&&!tr)) ) cnt++; |
| | 414 | } else if ( tr >= 0 ) { |
| | 415 | if ( tr==2 || (!tr && ell==2) ) cnt = 1; else cnt = 0; |
| | 416 | for ( det = 1 ; det < ell ; det++ ) { |
| | 417 | k = (det-1)*ell+tr; |
| | 418 | cnt += gl2data[i].ccs[id].counts[k]; |
| | 419 | } |
| | 420 | } else if ( det >= 0 ) { |
| | 421 | if ( det==1 ) cnt = 1; else cnt = 0; |
| | 422 | for ( tr = 0 ; tr < ell ; tr++ ) { |
| | 423 | k = (det-1)*ell+tr; |
| | 424 | cnt += gl2data[i].ccs[id].counts[k]; |
| | 425 | } |
| | 426 | } else { |
| | 427 | cnt = gl2data[i].ccs[id].order; |
| | 428 | } |
| | 429 | return cnt; |
| | 430 | } |
| | 431 | |
| | 432 | struct curve_ctx { |
| | 433 | int start_index, end_index; |
| | 434 | bitmask_t submask[GALREP_ELL_COUNT][MAX_MASK_SIZE]; |
| | 435 | int pcnt[GALREP_ELL_COUNT]; |
| | 436 | int done[GALREP_ELL_COUNT]; |
| | 437 | int cc[GALREP_ELL_COUNT]; |
| | 438 | int done_count; |
| | 439 | int errbnd; |
| | 440 | }; |
| | 441 | |
| | 442 | void process_frobenius (struct curve_ctx *ctx, int p, int ap, uint32_t tormask); |
| | 443 | int setup_ctx (struct curve_ctx *ctx, int minell, int maxell, int errbnd); |
| | 444 | |
| | 445 | int galrep_ec_modl_image (int ell, mpz_t A, mpz_t B, int errbnd) |
| | 446 | { int cc, err; err = galrep_ec_modl_images (&cc, ell, ell, A, B, errbnd); if ( err < 0 ) return err; return cc; } |
| | 447 | |
| | 448 | int galrep_ec_modl_images (int *ccs, int min, int max, mpz_t A, mpz_t B, int errbnd) |
| | 449 | { |
| | 450 | uint32_t tormask; |
| | 451 | int a, b, p, ap; |
| | 452 | struct curve_ctx ctx; |
| | 453 | int ell_count, err; |
| | 454 | register int i,j; |
| | 455 | |
| | 456 | if ( (err=setup_ctx (&ctx, min, max, errbnd)) < 0 ) return err; |
| | 457 | ell_count = ctx.end_index - ctx.start_index; |
| | 458 | for ( i = 0 ; i < ecdatacnt ; i++ ) { |
| | 459 | p = ecdata[i].p; |
| | 460 | a = (int) mpz_fdiv_ui (A, p); b = (int) mpz_fdiv_ui (B, p); |
| | 461 | tormask = 0x1FF; |
| | 462 | if ( ecdata_lookup (&ap, &tormask, a, b, i) < 0 ) continue; // skip primes where we get an error, e.g. primes of bad reduction |
| | 463 | process_frobenius (&ctx, p, ap, tormask); |
| | 464 | if ( ctx.done_count == ell_count ) break; |
| | 465 | } |
| | 466 | if ( i==ecdatacnt ) return GALREP_OUT_OF_PRIMES; |
| | 467 | for ( j = 0 ; j < ell_count ; j++ ) ccs[j] = ctx.cc[ctx.start_index+j]; |
| | 468 | return i+1; // return the number of primes we used, just in case someone is curious |
| | 469 | } |
| | 470 | |
| | 471 | int galrep_ec_non_surjective (int min, int max, mpz_t A, mpz_t B, int errbnd) |
| | 472 | { |
| | 473 | int ccs[GALREP_ELL_COUNT]; |
| | 474 | int i, j, err, retval; |
| | 475 | |
| | 476 | if ( max > GALREP_MAX_ELL ) return GALREP_BAD_ELL; |
| | 477 | err = galrep_ec_modl_images (ccs, min, max, A, B, errbnd); |
| | 478 | if ( err < 0 ) return err; |
| | 479 | retval = 0; |
| | 480 | for ( i = 0 ; elltab[i] < min ; i++ ); |
| | 481 | for ( j = i ; elltab[i] <= max ; i++) if ( ccs[i-j] > 0 ) retval |= (1<<i); |
| | 482 | return retval; |
| | 483 | } |
| | 484 | |
| | 485 | int galrep_ec_modl_image_i (int ell, long A, long B, int errbnd) |
| | 486 | { int cc, err; err = galrep_ec_modl_images_i (&cc, ell, ell, A, B, errbnd); if ( err < 0 ) return err; return cc; } |
| | 487 | |
| | 488 | int galrep_ec_modl_images_i (int *ccs, int min, int max, long A, long B, int errbnd) |
| | 489 | { |
| | 490 | uint32_t tormask; |
| | 491 | int a, b, p, ap; |
| | 492 | struct curve_ctx ctx; |
| | 493 | int ell_count, err; |
| | 494 | register int i,j; |
| | 495 | |
| | 496 | if ( (err=setup_ctx (&ctx, min, max, errbnd)) < 0 ) return err; |
| | 497 | ell_count = ctx.end_index - ctx.start_index; |
| | 498 | for ( i = 0 ; i < ecdatacnt ; i++ ) { |
| | 499 | p = ecdata[i].p; |
| | 500 | a = i_mod (A, p); b = i_mod (B, p); |
| | 501 | tormask = 0x1FF; |
| | 502 | if ( ecdata_lookup (&ap, &tormask, a, b, i) < 0 ) continue; // skip primes where we get an error, e.g. primes of bad reduction |
| | 503 | process_frobenius (&ctx, p, ap, tormask); |
| | 504 | if ( ctx.done_count == ell_count ) break; |
| | 505 | } |
| | 506 | if ( i==ecdatacnt ) return GALREP_OUT_OF_PRIMES; |
| | 507 | for ( j = 0 ; j < ell_count ; j++ ) ccs[j] = ctx.cc[ctx.start_index+j]; |
| | 508 | return i+1; // return the number of primes we used, just in case someone is curious |
| | 509 | } |
| | 510 | |
| | 511 | int galrep_ec_non_surjective_i (int min, int max, long A, long B, int errbnd) |
| | 512 | { |
| | 513 | int ccs[GALREP_ELL_COUNT]; |
| | 514 | int i, j, err, retval; |
| | 515 | |
| | 516 | if ( max > GALREP_MAX_ELL ) return GALREP_BAD_ELL; |
| | 517 | err = galrep_ec_modl_images_i (ccs, min, max, A, B, errbnd); |
| | 518 | if ( err < 0 ) return err; |
| | 519 | retval = 0; |
| | 520 | for ( i = 0 ; elltab[i] < min ; i++ ); |
| | 521 | for ( j = i ; elltab[i] <= max ; i++) if ( ccs[i-j] > 0 ) retval |= (1<<i); |
| | 522 | return retval; |
| | 523 | } |
| | 524 | |
| | 525 | int setup_ctx (struct curve_ctx *ctx, int minell, int maxell, int errbnd) |
| | 526 | { |
| | 527 | int i, err; |
| | 528 | |
| | 529 | if ( maxell > GALREP_MAX_ELL ) return GALREP_BAD_ELL; |
| | 530 | if ( errbnd < 0 || errbnd > GALREP_MAX_ERRBND ) return GALREP_BAD_ERRBND; |
| | 531 | if ( ! errbnd ) errbnd = GALREP_DEFAULT_ERRBND; |
| | 532 | if ( ! ecdatacnt && (err=galrep_ecdata_load(0)) ) return err; |
| | 533 | if ( ! gl2datacnt && (err=galrep_gl2data_load(0)) ) return err; |
| | 534 | |
| | 535 | ctx->done_count = 0; ctx->errbnd = errbnd; |
| | 536 | for ( i = 0 ; i < GALREP_ELL_COUNT && elltab[i] < minell ; i++ ); |
| | 537 | ctx->start_index = i; |
| | 538 | while ( i < GALREP_ELL_COUNT && elltab[i] <= maxell ) i++; |
| | 539 | ctx->end_index = i; |
| | 540 | if ( ctx->start_index == ctx->end_index ) return GALREP_BAD_ELL; |
| | 541 | for ( i = ctx->start_index ; i < ctx->end_index ; i++ ) { ctx->done[i] = ctx->pcnt[i] = 0; maskcopy (ctx->submask[i], gl2data[i].fullmask, gl2data[i].msize); } |
| | 542 | return 0; |
| | 543 | } |
| | 544 | |
| | 545 | void process_frobenius (struct curve_ctx *ctx, int p, int ap, uint32_t tormask) |
| | 546 | { |
| | 547 | register int i, j, k, ell, msize; |
| | 548 | |
| | 549 | // printf ("process_frobenius: p=%d, ap=%d, tor=%x (hex)\n", p, ap, tormask); |
| | 550 | for ( i = ctx->start_index ; i < ctx->end_index ; i++ ) { |
| | 551 | if ( ctx->done[i] ) continue; |
| | 552 | ell = gl2data[i].ell; |
| | 553 | if ( p==ell ) continue; |
| | 554 | msize = gl2data[i].msize; |
| | 555 | ctx->pcnt[i]++; |
| | 556 | // Only process frobenius elements pi_p that act non-trivially mod ell, i.e. for which we do not have full ell-torsion mod p |
| | 557 | // Note that tormask doesn't distinguish the trace, so if ell is odd we also check that the group order is divisible by ell |
| | 558 | if ( ! (tormask&((uint32_t)1<<i)) || (i && ui_mod(p+1-ap,ell)) ) { |
| | 559 | j = ui_mod(p,ell); k = i_mod(ap,ell); |
| | 560 | k = (j-1)*ell+k; |
| | 561 | maskand(ctx->submask[i],ctx->submask[i],gl2data[i].masks+k*msize,msize); |
| | 562 | if ( masknull(ctx->submask[i], msize) ) { ctx->cc[i] = 0; ctx->done[i] = 1; ctx->done_count++; continue; } |
| | 563 | } |
| | 564 | if ( ctx->pcnt[i] > ctx->errbnd && !(ctx->pcnt[i]&0x7) ) { // only check every 8th prime to avoid a lot of fruitless checking |
| | 565 | // we do a linear search here, we could use a binary search or a hash table, but the lists are pretty short (< 200) |
| | 566 | for ( j = 0 ; j < gl2data[i].cccnt ; j++ ) if ( maskequal (ctx->submask[i],gl2data[i].ccs[j].upmask,msize) ) break; |
| | 567 | if ( j < gl2data[i].cccnt && ctx->pcnt[i]*GALREP_MAX_ERRBND >= gl2data[i].ccs[j].pcnt*ctx->errbnd ) { ctx->cc[i] = j; ctx->done[i] = 1; ctx->done_count++; continue; } |
| | 568 | } |
| | 569 | } |
| | 570 | } |
| | 571 | |
| | 572 | |
| | 573 | // both a and b must be nonnegative |
| | 574 | int gcdext (int a, int b, int *x, int *y) |
| | 575 | { |
| | 576 | register int q, r, s, t, r0, r1, s0, s1, t0, t1; |
| | 577 | |
| | 578 | if ( a < b ) return gcdext (b, a, y, x); |
| | 579 | if ( b == 0 ) { |
| | 580 | if ( x ) *x = 1; |
| | 581 | if ( y ) *y = 0; |
| | 582 | return a; |
| | 583 | } |
| | 584 | if ( x ) { s0 = 1; s1 = 0; } |
| | 585 | if ( y ) { t1 = 1; t0 = 0; } |
| | 586 | r0 = a; r1 = b; |
| | 587 | while ( r1 > 0 ) { |
| | 588 | q = r0/r1; r = r0 - q*r1; |
| | 589 | r0 = r1; r1 = r; |
| | 590 | if ( y ) { t = t0 - q*t1; t0 = t1; t1 = t; } |
| | 591 | if ( x ) { s = s0 - q*s1; s0 = s1; s1 = s; } |
| | 592 | } |
| | 593 | if ( x ) *x = s0; |
| | 594 | if ( y ) *y = t0; |
| | 595 | return r0; |
| | 596 | } |
| | 597 | |
| | 598 | // both a and b must be nonnegative and b must be odd. this is not verified |
| | 599 | int legendre (int a, int b) |
| | 600 | { |
| | 601 | register int r, k, v; |
| | 602 | |
| | 603 | k = 1; |
| | 604 | while ( a ) { |
| | 605 | for ( v = 0 ; ! (a&0x1) ; v++ ) a >>= 1; |
| | 606 | if ( v&0x1 ) if ( (b&0x7) ==3 || (b&0x7) ==5 ) k = -k; |
| | 607 | if ( (a&0x3) == 3 && (b&0x3) == 3 ) k = -k; |
| | 608 | r = a; a = ui_mod(b,r); b = r; |
| | 609 | } |
| | 610 | return ( b == 1 ? k : 0 ); |
| | 611 | } |