From 5e92ed2a5936ad820bf5feb01900b226aae3de34 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 25 Aug 2004 02:43:43 +0000 Subject: [PATCH] added tomsfastmath-0.01 --- LICENSE | 7 + SPONSORS | 5 + TODO | 6 + changes.txt | 2 + comba_mult_gen.c | 50 + comba_sqr_gen.c | 54 + demo/stest.c | 144 + demo/test.c | 537 ++++ doc/tfm.pdf | Bin 0 -> 119610 bytes fp_2expt.c | 35 + fp_add.c | 39 + fp_add_d.c | 18 + fp_addmod.c | 19 + fp_cmp.c | 27 + fp_cmp_d.c | 34 + fp_cmp_mag.c | 31 + fp_cnt_lsb.c | 42 + fp_count_bits.c | 32 + fp_div.c | 153 + fp_div_2.c | 49 + fp_div_2d.c | 75 + fp_div_d.c | 89 + fp_exptmod.c | 170 + fp_gcd.c | 51 + fp_invmod.c | 98 + fp_isprime.c | 74 + fp_lcm.c | 27 + fp_lshd.c | 34 + fp_mod.c | 18 + fp_mod_2d.c | 38 + fp_mod_d.c | 16 + fp_montgomery_calc_normalization.c | 38 + fp_montgomery_reduce.c | 249 ++ fp_montgomery_setup.c | 44 + fp_mul.c | 134 + fp_mul_2.c | 63 + fp_mul_2d.c | 43 + fp_mul_comba.c | 772 +++++ fp_mul_d.c | 36 + fp_mulmod.c | 18 + fp_prime_miller_rabin.c | 73 + fp_prime_random_ex.c | 97 + fp_radix_size.c | 14 + fp_read_radix.c | 66 + fp_read_signed_bin.c | 23 + fp_read_unsigned_bin.c | 24 + fp_reverse.c | 27 + fp_rshd.c | 36 + fp_s_rmap.c | 13 + fp_set.c | 17 + fp_signed_bin_size.c | 15 + fp_sqr.c | 107 + fp_sqr_comba.c | 956 ++++++ fp_sqrmod.c | 19 + fp_sub.c | 46 + fp_sub_d.c | 18 + fp_submod.c | 20 + fp_to_signed_bin.c | 16 + fp_to_unsigned_bin.c | 25 + fp_toradix.c | 55 + fp_unsigned_bin_size.c | 16 + gen.pl | 17 + makefile | 78 + makefile.gba | 55 + mtest/makefile | 9 + mtest/mtest.c | 320 ++ pre_gen/mpi.c | 4459 ++++++++++++++++++++++++++ random_txt_files/amd64.txt | 43 + random_txt_files/exptmod_timings.txt | 45 + random_txt_files/ltm_times.txt | 37 + random_txt_files/old_sqr_times.txt | 14 + s_fp_add.c | 37 + s_fp_sub.c | 31 + tfm.h | 290 ++ tfm.tex | 580 ++++ 75 files changed, 11069 insertions(+) create mode 100644 LICENSE create mode 100644 SPONSORS create mode 100644 TODO create mode 100644 changes.txt create mode 100644 comba_mult_gen.c create mode 100644 comba_sqr_gen.c create mode 100644 demo/stest.c create mode 100644 demo/test.c create mode 100644 doc/tfm.pdf create mode 100644 fp_2expt.c create mode 100644 fp_add.c create mode 100644 fp_add_d.c create mode 100644 fp_addmod.c create mode 100644 fp_cmp.c create mode 100644 fp_cmp_d.c create mode 100644 fp_cmp_mag.c create mode 100644 fp_cnt_lsb.c create mode 100644 fp_count_bits.c create mode 100644 fp_div.c create mode 100644 fp_div_2.c create mode 100644 fp_div_2d.c create mode 100644 fp_div_d.c create mode 100644 fp_exptmod.c create mode 100644 fp_gcd.c create mode 100644 fp_invmod.c create mode 100644 fp_isprime.c create mode 100644 fp_lcm.c create mode 100644 fp_lshd.c create mode 100644 fp_mod.c create mode 100644 fp_mod_2d.c create mode 100644 fp_mod_d.c create mode 100644 fp_montgomery_calc_normalization.c create mode 100644 fp_montgomery_reduce.c create mode 100644 fp_montgomery_setup.c create mode 100644 fp_mul.c create mode 100644 fp_mul_2.c create mode 100644 fp_mul_2d.c create mode 100644 fp_mul_comba.c create mode 100644 fp_mul_d.c create mode 100644 fp_mulmod.c create mode 100644 fp_prime_miller_rabin.c create mode 100644 fp_prime_random_ex.c create mode 100644 fp_radix_size.c create mode 100644 fp_read_radix.c create mode 100644 fp_read_signed_bin.c create mode 100644 fp_read_unsigned_bin.c create mode 100644 fp_reverse.c create mode 100644 fp_rshd.c create mode 100644 fp_s_rmap.c create mode 100644 fp_set.c create mode 100644 fp_signed_bin_size.c create mode 100644 fp_sqr.c create mode 100644 fp_sqr_comba.c create mode 100644 fp_sqrmod.c create mode 100644 fp_sub.c create mode 100644 fp_sub_d.c create mode 100644 fp_submod.c create mode 100644 fp_to_signed_bin.c create mode 100644 fp_to_unsigned_bin.c create mode 100644 fp_toradix.c create mode 100644 fp_unsigned_bin_size.c create mode 100644 gen.pl create mode 100644 makefile create mode 100644 makefile.gba create mode 100644 mtest/makefile create mode 100644 mtest/mtest.c create mode 100644 pre_gen/mpi.c create mode 100644 random_txt_files/amd64.txt create mode 100644 random_txt_files/exptmod_timings.txt create mode 100644 random_txt_files/ltm_times.txt create mode 100644 random_txt_files/old_sqr_times.txt create mode 100644 s_fp_add.c create mode 100644 s_fp_sub.c create mode 100644 tfm.h create mode 100644 tfm.tex diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..669856c --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +TomsFastMath is public domain. + +Note some ideas were borrowed from LibTomMath and OpenSSL. All of the code is original or ported +from LibTomMath [no code was ported from OpenSSL]. As such the origins and status of this code +are both public domain. + +-- Tom St Denis diff --git a/SPONSORS b/SPONSORS new file mode 100644 index 0000000..d7df358 --- /dev/null +++ b/SPONSORS @@ -0,0 +1,5 @@ +Development of TomsFastMath was sponsored by three groups. Two companies that use LTC and LTM commercially +and one individual who decided he wanted to help out by being generous. + +Thanks goes to them [though they wished to remain anonymous] and people like them. + diff --git a/TODO b/TODO new file mode 100644 index 0000000..096f7ff --- /dev/null +++ b/TODO @@ -0,0 +1,6 @@ +1. Write more documentation ;-) +2. Ports to PPC and MIPS +3. Fix any lingering bugs, add additional requested functionality. + +NOTE: The library is still fairly new. I've tested it quite a bit but that doesn't mean surprises +can't happen. Please test the results you get for correctness. diff --git a/changes.txt b/changes.txt new file mode 100644 index 0000000..41322cd --- /dev/null +++ b/changes.txt @@ -0,0 +1,2 @@ +August 25th, 2004 +TFM 0.01 -- Initial Release diff --git a/comba_mult_gen.c b/comba_mult_gen.c new file mode 100644 index 0000000..0dcc8b5 --- /dev/null +++ b/comba_mult_gen.c @@ -0,0 +1,50 @@ +/* program emits a NxN comba multiplier */ +#include + +int main(int argc, char **argv) +{ + int N, x, y, z; + N = atoi(argv[1]); + + /* print out preamble */ +printf( +"void fp_mul_comba%d(fp_int *A, fp_int *B, fp_int *C)\n" +"{\n" +" fp_word t;\n" +" fp_digit c0, c1, c2, at[%d];\n" +"\n" +" memcpy(at, A->dp, %d * sizeof(fp_digit));\n" +" memcpy(at+%d, B->dp, %d * sizeof(fp_digit));\n" +" COMBA_START;\n" +"\n" +" COMBA_CLEAR;\n", N, N+N, N, N, N, N); + + /* now do the rows */ + for (x = 0; x < (N+N-1); x++) { +printf( +" /* %d */\n", x); +if (x > 0) { +printf( +" COMBA_FORWARD;\n"); +} + for (y = 0; y < N; y++) { + for (z = 0; z < N; z++) { + if ((y+z)==x) { + printf(" MULADD(at[%d], at[%d]); ", y, z+N); + } + } + } +printf( +"\n" +" COMBA_STORE(C->dp[%d]);\n", x); + } +printf( +" COMBA_STORE2(C->dp[%d]);\n" +" C->used = %d;\n" +" C->sign = A->sign ^ B->sign;\n" +" fp_clamp(C);\n" +" COMBA_FINI;\n" +"}\n\n\n", N+N-1, N+N, N+N); + + return 0; +} diff --git a/comba_sqr_gen.c b/comba_sqr_gen.c new file mode 100644 index 0000000..8c34f13 --- /dev/null +++ b/comba_sqr_gen.c @@ -0,0 +1,54 @@ +/* Generates squaring comba code... it learns it knows our secrets! */ +#include + +int main(int argc, char **argv) +{ + int x, y, z, N; + N = atoi(argv[1]); + +printf( +"void fp_sqr_comba%d(fp_int *A, fp_int *B)\n" +"{\n" +" fp_word t;\n" +" fp_digit *a, b[%d], c0, c1, c2;\n" +"\n" +" a = A->dp;\n" +" COMBA_START; \n" +"\n" +" /* clear carries */\n" +" CLEAR_CARRY;\n" +"\n" +" /* output 0 */\n" +" SQRADD(a[0],a[0]);\n" +" COMBA_STORE(b[0]);\n", N, N+N); + + for (x = 1; x < N+N-1; x++) { +printf( +"\n /* output %d */\n" +" CARRY_FORWARD;\n ", x); + for (y = 0; y < N; y++) { + for (z = 0; z < N; z++) { + if (y<=z && (y+z)==x) { + if (y == z) { + printf("SQRADD(a[%d], a[%d]); ", y, y); + } else { + printf("SQRADD2(a[%d], a[%d]); ", y, z); + } + } + } + } +printf("\n COMBA_STORE(b[%d]);\n", x); + } +printf(" COMBA_STORE2(b[%d]);\n", N+N-1); + +printf( +" COMBA_FINI;\n" +"\n" +" B->used = %d;\n" +" B->sign = FP_ZPOS;\n" +" memcpy(B->dp, b, %d * sizeof(fp_digit));\n" +" fp_clamp(B);\n" +"}\n\n\n", N+N, N+N); + + return 0; +} diff --git a/demo/stest.c b/demo/stest.c new file mode 100644 index 0000000..ede8f81 --- /dev/null +++ b/demo/stest.c @@ -0,0 +1,144 @@ +/* A simple static test program. */ +#include + +#ifdef GBA_MODE +#include + #define DISPLAY(x) modetxt_puts(vfb, x, 1) +#endif + +#ifndef DISPLAY + #define DISPLAY(x) printf(x) +#endif + + +#ifdef GBA_MODE +int c_main(void) +#else +int main(void) +#endif +{ + fp_int a,b,c,d,e,f; + fp_digit dp; + + fp_init(&a); + fp_init(&b); + fp_init(&c); + fp_init(&d); + fp_init(&e); + fp_init(&f); + +#ifdef GBA_MODE + install_common(); + modetxt_init(); + modetxt_gotoxy(0,0); +#endif + + /* test multiplication */ + fp_read_radix(&a, "3453534534535345345341230891273", 10); + fp_read_radix(&b, "2394873294871238934718923" , 10); + fp_read_radix(&c, "8270777629674273015508507050766235312931312159028658979", 10); + fp_mul(&a, &b, &d); + if (fp_cmp(&c, &d)) { + DISPLAY("mul failed\n"); + return 0; + } else { + DISPLAY("mul passed\n"); + } + + /* test multiplication */ + fp_read_radix(&a, "30481290320498235987349712308523652378643912563478232907782361237864278207235782364578264891274789264278634289739", 10); + fp_read_radix(&b, "48761478126387263782638276327836287632836278362837627838736278362923698724823749238732" , 10); + fp_read_radix(&c, "1486312771227034563307950634490737985563993459700941115664257275795366623795590136120579100118233580357115074068815507257715906295105536107921754177810976863679300283932188006885811950341132768970948", 10); + fp_mul(&a, &b, &d); + if (fp_cmp(&c, &d)) { + DISPLAY("mul failed\n"); + return 0; + } else { + DISPLAY("mul passed\n"); + } + + /* test multiplication */ + fp_read_radix(&a, "115792089237316195423570985008687907853269984665640564039457584007913129639935", 10); + fp_read_radix(&b, "174224571863520493293247799005065324265471" , 10); + fp_read_radix(&c, "20173827172553973356686868531273530268200710714389071377794102651988800859098544338487575161443744102709980552583184385", 10); + fp_mul(&a, &b, &d); + if (fp_cmp(&c, &d)) { + DISPLAY("mul failed\n"); + return 0; + } else { + DISPLAY("mul passed\n"); + } + + /* test squaring */ + fp_read_radix(&a, "298723982748923478923473927489237289347238947238947238947238972893", 10); + fp_read_radix(&b, "89236017869379132235512787068367546521309689412262624434964313994127411682542855190667724226920696163962644836740110835385588789449" , 10); + fp_sqr(&a, &c); + if (fp_cmp(&c, &b)) { + DISPLAY("sqr failed\n"); + return 0; + } else { + DISPLAY("sqr passed\n"); + } + + fp_read_radix(&a, "397823894238973128942895123894327123941724927848927349274897238978927593487012378490184789429812734982738972389", 10); + fp_read_radix(&b, "158263850827461677491961439999264901067636282938352531932899298293270945997930087353471903166601507321298827087008336951419604640736464667188494668962822678461626245753696845719301945679092882499787869509090904187704367321" , 10); + fp_sqr(&a, &c); + if (fp_cmp(&c, &b)) { + DISPLAY("sqr failed\n"); + return 0; + } else { + DISPLAY("sqr passed\n"); + } + + fp_read_radix(&a, "13407807929942597099574024998205846127479365820592393377723561443721764030073546976801874298166903427690031858186486050853753882811946569946433649006084095", 10); + fp_read_radix(&b, "179769313486231590772930519078902473361797697894230657273430081157732675805500963132708477322407536021120113879871393357658789768814416622492847430639474097562152033539671286128252223189553839160721441767298250321715263238814402734379959506792230903356495130620869925267845538430714092411695463462326211969025" , 10); + fp_sqr(&a, &c); + if (fp_cmp(&c, &b)) { + DISPLAY("sqr failed\n"); + return 0; + } else { + DISPLAY("sqr passed\n"); + } + + + /* montgomery reductions */ + fp_read_radix(&a, "234892374892374893489123428937892781237863278637826327367637836278362783627836783678363", 10); + fp_read_radix(&b, "4447823492749823749234123489273987393983289319382762756425425425642727352327452374521", 10); + fp_read_radix(&c, "2396271882990732698083317035605836523697277786556053771759862552557086442129695099100", 10); + fp_montgomery_setup(&b, &dp); + fp_montgomery_reduce(&a, &b, dp); + if (fp_cmp(&a, &c)) { + DISPLAY("mont failed\n"); + return 0; + } else { + DISPLAY("mont passed\n"); + } + + fp_read_radix(&a, "2348923748923748934891234456645654645645684576353428937892781237863278637826327367637836278362783627836783678363", 10); + fp_read_radix(&b, "444782349274982374923412348927398739398328931938276275642542542564272735232745237452123424324324444121111119", 10); + fp_read_radix(&c, "45642613844554582908652603086180267403823312390990082328515008314514368668691233331246183943400359349283420", 10); + fp_montgomery_setup(&b, &dp); + fp_montgomery_reduce(&a, &b, dp); + if (fp_cmp(&a, &c)) { + DISPLAY("mont failed\n"); + return 0; + } else { + DISPLAY("mont passed\n"); + } + + fp_read_radix(&a, "234823424242342923748923748934891234456645654645645684576353424972378234762378623891236834132352375235378462378489378927812378632786378263273676378362783627555555555539568389052478124618461834763837685723645827529034853490580134568947341278498542893481762349723907847892983627836783678363", 10); + fp_read_radix(&b, "44478234927456563455982374923412348927398739398328931938276275642485623481638279025465891276312903262837562349056234783648712314678120389173890128905425242424239784256427", 10); + fp_read_radix(&c, "33160865265453361650564031464519042126185632333462754084489985719613480783282357410514898819797738034600484519472656152351777186694609218202276509271061460265488348645081", 10); + fp_montgomery_setup(&b, &dp); + fp_montgomery_reduce(&a, &b, dp); + if (fp_cmp(&a, &c)) { + DISPLAY("mont failed\n"); + return 0; + } else { + DISPLAY("mont passed\n"); + } + + + return 0; +} + diff --git a/demo/test.c b/demo/test.c new file mode 100644 index 0000000..486bb03 --- /dev/null +++ b/demo/test.c @@ -0,0 +1,537 @@ +/* TFM demo program */ +#include + +void draw(fp_int *a) +{ + int x; + printf("%d, %d, ", a->used, a->sign); + for (x = a->used - 1; x >= 0; x--) { + printf("%08lx ", a->dp[x]); + } + printf("\n"); +} + +int myrng(unsigned char *dst, int len, void *dat) +{ + int x; + for (x = 0; x < len; x++) dst[x] = rand() & 0xFF; + return len; +} + +/* RDTSC from Scott Duplichan */ +static ulong64 TIMFUNC (void) + { + #if defined __GNUC__ + #if defined(__i386__) || defined(__x86_64__) + unsigned long long a; + __asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx"); + return a; + #else /* gcc-IA64 version */ + unsigned long result; + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + while (__builtin_expect ((int) result == -1, 0)) + __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); + return result; + #endif + + // Microsoft and Intel Windows compilers + #elif defined _M_IX86 + __asm rdtsc + #elif defined _M_AMD64 + return __rdtsc (); + #elif defined _M_IA64 + #if defined __INTEL_COMPILER + #include + #endif + return __getReg (3116); + #else + #error need rdtsc function for this build + #endif + } + + char cmd[4096], buf[4096]; + +int main(void) +{ + fp_int a,b,c,d,e,f; + fp_digit fp; + int n, err; + unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n, + div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, t, cnt, rr, ix; + ulong64 t1, t2; + + fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); + fp_zero(&a); draw(&a); + + /* test set and simple shifts */ + printf("Testing mul/div 2\n"); + fp_set(&a, 1); draw(&a); + for (n = 0; n <= DIGIT_BIT; n++) { + fp_mul_2(&a, &a); printf("(%d) ", fp_count_bits(&a)); + draw(&a); + + } + for (n = 0; n <= (DIGIT_BIT + 1); n++) { + fp_div_2(&a, &a); + draw(&a); + } + fp_set(&a, 1); + + /* test lshd/rshd */ + printf("testing lshd/rshd\n"); + fp_lshd(&a, 3); draw(&a); + fp_rshd(&a, 3); draw(&a); + + /* test more complicated shifts */ + printf("Testing mul/div 2d\n"); + fp_mul_2d(&a, DIGIT_BIT/2, &a); draw(&a); + fp_div_2d(&a, DIGIT_BIT/2, &a, NULL); draw(&a); + + fp_mul_2d(&a, DIGIT_BIT + DIGIT_BIT/2, &a); draw(&a); + fp_div_2d(&a, DIGIT_BIT + DIGIT_BIT/2, &a, NULL); draw(&a); + + /* test neg/abs */ + printf("testing neg/abs\n"); + fp_neg(&a, &a); draw(&a); + fp_neg(&a, &a); draw(&a); + fp_neg(&a, &a); draw(&a); + fp_abs(&a, &a); draw(&a); + + /* test comparisons */ + fp_set(&b, 3); + fp_set(&c, 4); fp_neg(&c, &c); + fp_set(&d, 1); + printf("Testing compares\n%d, %d, %d, %d\n", fp_cmp(&a, &b), fp_cmp(&a, &c), fp_cmp(&a, &d), fp_cmp(&b, &c)); + + /* test add/sub */ + printf("Testing add/sub \n"); + fp_set(&a, ((fp_digit)1)<<(DIGIT_BIT-1)); draw(&a); + fp_set(&b, ((fp_digit)1)<<(DIGIT_BIT-2)); + fp_add(&a, &b, &a); draw(&a); + fp_add(&a, &b, &a); draw(&a); + fp_add(&a, &b, &a); draw(&a); + printf("sub...\n"); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a); + + /* test mul_d */ + printf("Testing mul_d and div_d\n"); + fp_set(&a, 1); + fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a); + fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a); + fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a); + printf("div_d\n"); + fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a); + fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a); + fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a); + + /* testing read radix */ + printf("Testing read_radix\n"); + fp_read_radix(&a, "123456789012345678901234567890", 16); draw(&a); + + /* test mont */ + printf("Montgomery test\n"); + fp_set(&a, 1); + fp_lshd(&a, 4); + fp_add_d(&a, 1, &a); + fp_montgomery_setup(&a, &fp); + fp_montgomery_calc_normalization(&b, &a); + + fp_read_radix(&d, "123456789123", 16); + for (n = 0; n < 100000; n++) { + fp_add_d(&d, 1, &d); fp_sqrmod(&d, &a, &d); + fp_mul(&d, &b, &c); + fp_montgomery_reduce(&c, &a, fp); + if (fp_cmp(&c, &d) != FP_EQ) { + printf("Failed mont %d\n", n); + draw(&a); + draw(&d); + draw(&c); + return EXIT_FAILURE; + } + } + printf("Passed.\n"); + + /* test for size */ + for (ix = 8*DIGIT_BIT; ix < 10*DIGIT_BIT; ix++) { + printf("Testing (not safe-prime): %9d bits \r", ix); fflush(stdout); + err = fp_prime_random_ex(&a, 8, ix, (rand()&1)?TFM_PRIME_2MSB_OFF:TFM_PRIME_2MSB_ON, myrng, NULL); + if (err != FP_OKAY) { + printf("failed with err code %d\n", err); + return EXIT_FAILURE; + } + if (fp_count_bits(&a) != ix) { + printf("Prime is %d not %d bits!!!\n", fp_count_bits(&a), ix); + return EXIT_FAILURE; + } + } + printf("\n\n"); + + +#if 0 + /* do some timings... */ + printf("Addition:\n"); + for (t = 2; t <= FP_SIZE/2; t += 2) { + fp_zero(&a); + fp_zero(&b); + fp_zero(&c); + for (ix = 0; ix < t; ix++) { + a.dp[ix] = ix; + b.dp[ix] = ix; + } + a.used = t; + b.used = t; + t2 = -1; + for (ix = 0; ix < 2500; ++ix) { + t1 = TIMFUNC(); + fp_add(&a, &b, &c); fp_add(&a, &b, &c); + fp_add(&a, &b, &c); fp_add(&a, &b, &c); + fp_add(&a, &b, &c); fp_add(&a, &b, &c); + fp_add(&a, &b, &c); fp_add(&a, &b, &c); + t2 = (TIMFUNC() - t1)>>3; + if (t1>2; + if (t1>2; + if (t1>1; + fp_copy(&b, &c); + fp_copy(&b, &d); + if (t1>1; + fp_copy(&b, &c); + fp_copy(&b, &d); + if (t1+85{~dQf{O2iid{3D%SI&qz zBUX~ii-^%O(lJAkj$XzVK(P@p5ZD=7LhOPko5Ihzx(|9PTFFJ@uwY~n~jFJ^7v zY$9S}WM^!`#|P!)>}XD<$OifBR!oMoNyFtB6EVs3?4 z)c$f#@OqhS=a9a**a2!d(arKeeU+UZ?KKM`l74-Ko3@s;WmF@dgtq%|*?+f4Fatc4&Xgm&b-Djc|h)leOIl@^Ih z*4E%g$wZs%HJJBmwzQti9#82O56gxzol3UhxrsXFT}kU1W+BephQnxeWnJJjdUr&m zDRn*cl_S=LAo7&trg-GmMh5W+T~V755&bP|PbK*QzG2P!#l+V5{{r;q-Cv;o1?69_ zOzfOc|Mw9S!@mG$V*GCqcTw)N-4H;yd_|?n7OLF%sn@|_pGw9S2_Og+Q9`?p^h1f| z@WVUhIl-7)Ur^uH&XtpUFf{=%#oA-Pa}Aw-SSlP(s@7CbS8ezW&E~AnqQ*pn7iAAf zF&=DGYLLc3`F+1)-8GbcS*f3s!Xl;Nyr(T=qjjwirYMoEz{R)M?-Zb#8FAAkvpnaE z4m&9dQL2u`@Y)m-1I1fS z(XsQXTkL3`Le|ea7@(kejV#Y7Y-F07*6f0&BEx-eUgyt3HPLt~#cm~S2stHF@9O8m!T^6>V4ltkcfDP8Ar7d@WV$iaNRJ?Qu)q>$ll7Z+y=u*7x zZ`ty^b}d?keKATfcrz{!OL2H<9Un(Lp?cv#Oyc!4v#?XKUW01onHT&{yf&MstzO(9 zI_O0VyRAA}>;?ce)(sj?pAE2#xXG=ttkcfhvES+#`)An2Ruv+PCR}qyYcHPe+=GN| zZEz(LQ#{Q(DC%8lz=^#$J_+S|s~EOO!Mxw^?myDw(QIT@Z~5BFCvu1HKY;rM4AcL0 z+W$Vse;s!=*8d!L=70AW=Krd}Txm+hZm=QrV1FWVm4uqS_+$lKx@v)pxh7Wm=b&n7 zG}c+y(xA)udWRO3h0?BC!~-?ZUiL@s3FC9(oAu;BCg_VGU4B*UXz+QYcaaQj?9bB3 z*_I}*=3>ZEh)Yr@NcTz)03=AHNKnPkkF(@gVUfCu=D9%zH zd9_tt_Y1=c=3rL=hMXnqc(4|utO_J!4V!@OUz-k{VCVUx$+y(VfkvYEay4u8NQc8KN z7G;!{wyc;gxVhi#IS3I`U%305BZ->QJ3xou_@*){69ZN6EvgIP$*+_6P6 zYCRDIV2sv8LGS!s(SAv_dRiu^L>z3&pRw_lY0emekN7aC0TrodCAi8OxzfH3vy~GO z#o>#5Mbw>ZwAj@oh_!NKCbs(!`y|;c7YpSlE|XhaJ4L};s{ISeUQ1bIGb+~)e{gc> z7;dwLI^w;vGU!dZdj8q0-sR_wDF_2K#F#|D4lr_i~Lm5V6t6=P6c}s{C%d_`GjM#^{jVk(FIdo#QMrerVzsRZ)~a{HRU@v-kzZTSE$FN77u{=BI%#{tHcu{^+FvcBpPxg zPddVp2P$NxhgIK&wI%1N#<<(;P33K)UrAL*D65FqPD(;Q>I4^vdLO5*L2Ibod*5?X zgVQ_LdnG1i*XxP4IxE&9uQMRiX5*br3R@w-aDaf1Tu_*J_7qW9s7a*_K9LsySpgJT zh;>+%XP!?k&?_CqAjkrwa%!F@AAA5>?gV3>JjZ-cNp#wj;|zF^G7}u3X+NdVF`@XN z08{<2FMh8O&?)cK*f0BA>)n^I=W`WH_X8ZHK{hkH)?G`HP!#bHsvsGy#Wq1i=sq2EbN3ukN~)YTocaOx_)(j^&B%> zb)g_p#PM+MJTj+25DV8akt=PzIB{k)*iix!MWwf%mwCic{9(;CFqD}cH+9ZyDW%E$ zW}$xU@rF@4FM@c8P~qB^v(0`EsuE7^qY;Ie3Vw*kyuCs|+Z$Fc#DcnfUZ9e1b-yr^l&R-xhF*E-IA}i~^ekZJd zDTM!Z-YC}Cuss|>^hWz$GCi#ODn=!&xv<>xt!CdG?*( ztk?KgAE89L#zz8Jz-nf-qJ7aXm*f^ro>u70g1A&ubhCU-*-$AnQIqQY=ALpVG#ApO zf@M-gJx)79t+u8^m5K>y@%f!1QY{bcSL4JKfy?9Rbsy%as8#qi_D(npqxC@HmiGk6a{99SHEb{P*8-im^{5OCmCj*WLiav zVEtyWB2Mk!7wnbfLI*P?Io7~Gbm&AhY4cqjFqW5`%LliB3P zgp$B~R$#b(2TZi1Wq-edBnVJ}uOYc`xRYFma*u&x#c}-MpOL(1Y+!pK5`+AZxSGlk zdbPkX73wj|@Q|@#X5HawYwKi_5ZT8C^1E}cknuv>64E1c6vS zs|SCl_j~c)LeJsr>+RVDTmBDQQGZ&XaDUyU;>Vq46*yjxlpgR%%Pnp`E(ZLnek78i zxb0@bRO5$j$B=^doEhA@hZM-Ap3VWxu`GAT$zsPGx^5p|Iel@*!hTfblp`&G12)fG zFnICKf4UIpDqrAajUh+54g1Su00ZixMR4=AjN&8IJMweSueQy?5x;-R zv?BM7Yw@-r9->rkv6u z@xCAOxI)=MSZ4NqKyzpfFUv2~`TLhn?2C6iN*oOEmyCVUJm6J9_l(ZR&pCW6Yf)fU zlzY&#?C3AwCic}+X?q^)kS}QucX}x!0B47R8@6+Z)L+)B-SYou)LTSeRGM0kMT)$4@5W}6#?__2c5}w3k zcqp9?ID=}Yzh=}F@8RHR$EQH-iN96rhTjfO3CWMMTl#KDUwr~% z$^nFESipjJ5%syjFwbR>a4K02csDB|pTkqLX4X_6uJvY~_Y4$F^jV^g>?$uI+K^#c zn6VUf7b45`?%MIM)j5eh)d$+~$)P|)&mqlKZkRVBZBoSz_9j!O@f(g@_BH~rZ;{vSfzPsIkO1;{LIe;ix2{P^=!ic z9%D}dFAX=m4Ix#97xsEF9q@-#3U@^Zqoh~W3rUCjj^)`Qq~#pFL8B%D9eVYnvl!fZ zVAwkavKeifEo4ve=<+!t_%#Tt5Dbkg$0Y5o5h5ryYm5;!Z8`*fTnu4xUM|`aVsVp( z&J^#_)-&qvkV`!h%ITIC!%rCF2=$n3}k2%qpTyj1Kd_WZxlMtK3+4V3xNcg+VqtN80V&?&8YhF(GLLUs@_?Ii5cCCrQhhOQh z=ZC|u8*05XdcKd?$uyFKtY3Z5y(6eqjFDF8y;Fm-`A~b3{+0MXZ?mr-`-Aj%ChsNL zP*zYX>uco}Nz8s=DDe+zpzY+SoQ#4dziwnvl?I-KHJO zHcH^pQd=lc(g?Hx>A8Id3Zz0D&c@c!K8KrrjJDj#tk`#add_bJm5OR`{7~y7 zmMb;}9r}&(J5-q4{?M>Y6~p6Om~i5DgQr8>{)Rd5WV0!fwMLWfd62BHMd8=EyPm}L zLh1x-7=I zsKg-#uDzcLAC!I@Y_DEVTWzS8S1=mWFeY_H5R1{zGXNJZ$Q?ac?g2lU>7!zE6;g&!f+6Q=;F2+B`m+y^u5@^X&BLWzGR>-X->!4 zeK(dBMI;qov^F$Vg^uC`#0=}5rcb}(JQ~7vc{?hWarH*MX8SS)Hqk2I(@@L6e1n=g zNyVebPExFr<{#oaBS@%BX2?MBU6E(TtD|zYb2Nly0+|?;;9~PKzMfk8SlD6~(sar4 z8ixzR9d?YJd7BHCYFk(5bX?L{W0blv`@!@}848v-LaVmCOB*iTFcw$|g6Ih~Oct)b zty<15PRMnblA6d*yT6qnm*lY=O#{qghmdcln0d$vLadk!%C^5WXu+!x^Cy705YuJf zY@+_magNoh=GBLBUR=Dr8-{lh;=>PbA2>#8=i2UCLiC!I9ooNkJ$0C#I7ovC__yCl zbvQ`NohzO?!agcCr?3IlkzQ4VrU)M7NJ$Wo-15BT0YmChg591PEZ-~>3KnOSD1b0$ z)5M_<#{ygzd-L~;5q=N8i;9rc30v$ploxDY*ATeGJy+%PXfa91iut&`P&ff0t%7k|SM?hLm|!z#y~5plEU zQq!fh7+myYvK=iwPZKAR(|xT@yC~Df8J|HD(jnDI3_2L<@W3&j*ScOl-CjA#2NL8k zY=xeFzCOnYW~<6mmjGYzshBv?(_p(+>0TJcQ^@C*hM(crfrQoI#R?rgeoGV;brGt3Jyc5ucMg(~2CztE0 z5ip2IQTrM&i8M^Eh=zHQm3v93+O?UOH8&tECacYZFqkjWiq|CBk={7ImU%iL1I821 z8A`-ad==D`5EYDOXVMxc@Wm2NqqLumC25wC_<7To%PE-zCXz+ZRum{qQe~If^Ia-% zOBo_pwjl}%ML~i*iHa7&cgEW_m!Yhm(4F(tGnBwlj7{5B1r@&3iJF((_)*U7^IJ`jX-g1wcI&WgatmNUBeR zd|nd9LuIs8weeBzcuY9qM>*#&mPeTI>JVKLo*~J~*b3>u0#4#4?4(eoZ8S~kCi$?F zxbq#d-JmRbL3F=9Uz*UGJeP3BgZZW?gb7r=+pFW(ih~SGuPqW7Yh2mOv{+A7;XrHU zQ?zPZ0p|1B+GRVMclS)Yb}`dtkpu7l9`+i9fV^h!Ip+=EVB99#vfOiCcSUP9SaMXwkUHp00$tWkZY5w?9^ze zIE(>I|F{i<8)MY9@zq;Y|t2ORDTBfg$T%Gn%m2b1oC3bP1JvQq}80wb+s)4 z<|{I1>AHA$eEn9znCZ!x{qlBXPjoQ!f|@WKLFIsLdDDP#j`V5qa&@@B-~Y5ebzL5x z+%f$)Jkt8!S*ty{J=rf^q_pdu`7Gj)5$G` zAfc$?d{WU<9{)fKK^5?AXH_^d!;J+cuxkfBEF$+o(oe>CF4+M#ZLZ(Qjkon;Ynmj9 z4cJURroY9;K2}(qd-6&NAY~}`l|4>ib2j^VJr0ltdwbMqU?{ufT6t1VsZccVeM|s< z%DQt%#;yi!d0Rxdo;^1C?wL+jBW=RUGF)>}(Jq(+#HOOkC!07=aWnoRaTLD$J_`Q! zAT-P)aR<5l7Q_c2TF&bo$cK>f=+ChPj^6?-;m>crZz;HZe4}+@^KfapI$sOU<^Pk; z^hljNi?rMvv>B-K+!N3wykXQaf%k#^2C=u_cf6z=a-2z1u{v&gk|{QI18yfxN+*Ip zamvsyf9Rv%%%RAJCz)zuwTSjNP5aBEe}zgZ2(S1BfkmPrcywpg{RRpqFDWXtv}j(# zXfB-q{Mf)6&s*+?k7B+6J5}WL1{S%G>0$6$PBKb3Rgr4!h$4cItgIXqkO-NKRE$>< z;5ZSjH|OhW!$_=r4|EAV(Q7AtvqmrJ!QcsjWJcxuTyv1|ZDaSRk~P|Zo)A(-*ofH>`=fOY z_yfiwZST(Qsf#_r;J*!X5XgXYHP<$Kok8AoD={0ikzU zIx8#Wvbj_~wB-k&&cBOy)*K%_9^3WzG``YlBh?n?m%*!{#j}nz(H2J)y!=r$QN+xJ zTo`p?a+rz6BF}uqwGnF415g)3=O5I*v$}kFU-z=VCzruZ-`^oh!cZ*o!>B>7YB8V< z!dL2{XizMh;NNT=}+sq7Q&?pTN8W z>%!;FovB2h70W^*PX2MSxi!q*`%o(>$kMzPWO9(pD40}0!~3)B7IXDXqZ@XX0#RC1 z`*~I631Kq<0W`k5`#Bb6Z{%p#_eqY1F~X4ra^N(#Cmrn@Ks)S@A4Sr#o4_e#lTW(% zg`W3d$<&dtiE?GC7G3kDQa8W6^XXe_Tf1j=-|;eeI!$uP993J|Kk>4!h*bE=2x2>D zbh)D%is2wyrGja6M8yFqW^v^xZANGxV^607x0!m%ftqB-Unv8VMHCY8VcV`A6uLk^ z{ZbLUyk{-Ofm|9Rd&fF)=Y_hwY975rw4;u!hYRi3Vhl0&RPw_NSm1tLqj`%hlXkpJ zEiCp&=CuxX&eo=KuG>=yR1D;=EW?li@tb-SM---iSL5UY=?#X_Q68D`oK@8Ci*m-T- zTfNzzw1LmYaXo-bLyF6#H`**&&TN9h9Ff|aHAOpEwaG(y|`e54XuFh+fNY^l98YvtQ)J^+~{9zlo zxHbF;^mO37>(bqa0UA3%OH%#7mww~|jRa;4f+IAn$|it6Nu3n&Dh(&W$T3UNbcVVx zQfn3!{9d2;g4qqIvQLwS88f&mHS~dM3w)Jr&_#=m*{7`GHDldVBfhC}xlE{}ye81l ze(A6=W|4LmiiH(Dj9Qsy7nEPA{ukizz5#EQ`evTto<8V$NW^eC@G#KxRY!+cLmb9v zuSRcsK9e8TR5t}plpm1!2PY(&cQsIzOP)#+SAoi9oGRn*4NXxDft0SU+5rKHgn(gyiDcWN)!x5(B35qCM9-kt*F8BuIY-)=h5 z30X;>=np~q;5Km-GkF7(1TcD(31aLe-E&`RRO!QcN30G!m~S^ zv4hW5)xz#J^9Y1EU$WR+?&6TsvJ^9WU3GX6g$-`ehI$ zfvLi|g$o+a9TCO}3z${h{)ZL)3RXc|zDig}ED`%|c9xsy(gbLOkO>rh3 zZ2DmIu;X*6G=rUtG-SfV{4VJ=xvY$3QMiwa2#lhfMlkwK!#$pM8AT_lj>rW~xGd^V zUKMf<*_X%!43CM-Asob?vmeD=($n3Bllul0m%FW33qXHLFG0y~NF{O_OV9HJ%UcQ^ zWMK%hGt$_0IwPcW*2^f7#pzv!rqUk$T7NUeG1=vCK5#!1VLorsDqD{=XVGO8X=|ph zfAH8tb$6hElWyyF;j&;aws%!hThIqplL?v%5AUMIUdePx1nXXXzq1nA;BCz&2qHBX zjr)`JqD6P*oBA`8F#+o(oye%Srl)8!59Y$H*?i@!ys_-z0i5kLp%dPE+6bvjfMVq- zd1dp$jWuV3q$_0>W&VVGc>J%!_C&vz0>@0&PMKFqG^!)O zFkzt&I@8V^gU(JpGM{@VJX_|9>X$AFG~;k&XQyUy}XbF(vl@DyGz| zt|fOMhH~xH#6NJps7f`sN$(Ae*5skj9;~dR@{WWAY>NyN(+z!6 z&f_%W)^nr%5oaL6Wc@vGtMe13QJkWni1(gV+y+ePKpL1Q;fyX# z1cQV*7?3mcsEi7i0Lp-f*mKdVou)G8z-mNsJnFeq&!~I=0(z10nJ70UQq+fi(rkrC z6ak$l!&^7x7gV5!dgjr_mVQKeYe#)DlsXm3voI*lS+^mEvsk^5+!u)ogvyPQ(5l&K%T2s!6>a^wz zNVVyJl^9LOM;kO^7|iMp(@%=9AK-Qn`>MDUZL^V==}gouLR=qEck+EK)m z5>xX%=_DY4m`08ETAQ24f0)PnefJS?x#(b6_AQQA4XP4;WTkJ*KitmGiW-@8O)0U( zqk$M|0(O?9RKdz@SVpNmC0o!6zS;zBlhVZ@@7ZqPA;(q065-V<}$R9jrH!Zar|B|(vNB_5rNuy^XB zqpj0HOo%aGjGsGo()rp_UC@>zc-iQxflN-}4qd1yEXIfDwFDz@6*0B}vOn~2E`5dv zLST>W0bN*fbb3xBj!8|qoPXSDU#IZ78wlhfrLHhzlEW@krABws)0JY)sw?=_Z zjH0I#H;Myu7{LS>@8yEb$pe|Uq}p}OdBXE~S^;qBvZZF7<*Hi;#pJkY-X;2^B9f9Q44r0s#zu!X6EzL&$X+V37U!^PLwB1s2KTg9IdqK>TG3O0v0Wl~2eVZ%m{JPb=VPU&rGI$8O3umCvS-yTop zWgl40Z%tGSdEc8#Bh>?~3KP<$-`7alV^RKx0|L!8Gt=Vpfp6j5;xvTv?%~54_YW5m;pg*9@C@qO#exFGBkf#d>OiWWPdPAO1c?=W42ft z*A)Ls17aKQ6m@i>X3|;}s3KZcz&Tgz8*6f(T#{9~Xrnfp*<-=pk4v6q*fLGFUNX>O zZCsXGAFHn=RoOUsEUf10wuhf05dxVo_wYD+gs^WW!ZR_ za>DLX!GD;`OXHgPx`ZzW3O*_SMd@D0#s?UlTuJtV8f8D)P~Dk^t;C{cXs+a}df{cI zW1Dehs{+?wZz!TgEH^#yWBFus4z6MW{VaWD9FF8eB*|# zFE)DX+7hC+9aapc9wlC(RwxF>ehidsjkQ jX;~4{ADTgKQJbs{t`QlS*YpKnZXv zJQ~t@lVODMYT8ldgaV_bRB{t2$jXEvDh^9Q7S|~faUy$y668fXnRGzlqy#|+MjtI6 z0QN-t$W%vQ%fAr{Kzj8csQvamu5_f@j67S)OdE%W=7h(1J@iQ}yI!>2pY2B7*a(JV#>gZWf z^_IS*YFXWIo9HwcX-K{BxwSW3mgzIxZI2kE#%u7K#!6^R1y zElf#<=&{O%(~5vc(O!G%g(vPC&=n9%(IC8_>K=$^Fs=xShKrvH_;C^ENQE6JP|0Nn zQ!GT@aL)>MS_aC`Il6m_Ww`hK4b)|Iu zem{W;K6RJ`7Hx@4ogzl4jr2jr4!)z$8hk~PrKpIWx1^Ek_Un81w^vP=kcn}r^hn~=ILqa~$ z1McuIP_FHd6+O2-LP3TAOxI2qQ5sW;${IG=nW!9wL)j-F|&WV)g&At ztPl-(+`S~RDRttqLoQdB+%U+ZFBR|%Uh+JyXlF7-jUUU?e-47eqs@QzD2Vi z54bgkb`W=`i^8YTY!+fP(pnex%IN05cwZttJ3y6|+Z~Nk@Kd0ouqxtnZQHavAi+0g zi$EV~?F#&thN~05ITJ^Q%<`1Mz%!&P&f}hl`Kn4yKM=Oee22SeMJ`Hi(65vTDC(0$){Kk1 z1tY+$2dlh*W06Vqa?honqOKS@;8y~M1lmh0-H^+w9=kH(@xcFZ7Ms`R zbmXL?{ayMWrYLPs5#_`kLRa6zaOh$XLavB(5tM)|;qHL$qp>XjwH-A`2^AAkp&p zBnv6h?k7Q1bi!Ydw};SaFdTyg$@p;UMS(M-7=9Ly>?p6)_-wR1KO)M8YYAvgmeeXn z579=ja-bggFoYDOu36jNn8objorycGHv?ye1>}EeUn852J42Z-AF~UdT-Ay|r0OcQ1*32qoTlb&&ozK? zgb7rIZQ``$?wOb*Xuy%dPsjb#?=eyb6(6ME81_h~Le~nbLoCe*HIP&CfR!ULnZf{1G)$6o#Zp$jSwPm!)_A zQkW!dxpg+s6E*P`O2A^_;?tJ6KpU#IQ)f!SGKu;*Gl#sdY0Y&ufGUPLSY^g>b07u!0M(_mF=I~yKnP^W zV>6-FCX>e+(SM`P=%tM!v~L+%FTS_j;4la=8z3h8B=7r7Fq^6s580J3EjuP-l&aIs z;eV!s+S;d~#L5$$(Bp(Ag(04~CP9F08zql9dFqAlO4b#MY!Ge^8wC&(2VdbKcNAJ- zCYl<;ZH_TJG^-2wpzPn z$R~VCu+PvTS9lUFXz**PuvAe~Su{**Ofr_pQEO%V{W-ym^TpJ?3c`JF=^__txCZ1l=$}bTKeYa z*_Sjtq=lI$j;4W9>uzZV#gw&^~QKa*90v{M<%|;TC zOh5W>+*CTXQ*BtC!dR%*CQxt_8-@)1MgA=B_Fy6y zjYOz?o%i9W21rYJ_;lV45k~+Ej#?s9S4E(!TBLr*oNY&5*{K{@I$rg@bu|;FJb?UW+nl+L>ONV#XuRU!Z&LCGfQGr?~W!DR1; z;2Wvsmqu-!m9#&b023MGW&ZhuNE6c6?zlD>3O72c1~Hrs(4pG9rkSH8vA95Abo&D8 zDbsjbAP8ENZ#HUd>6xI-NQj!L{7-bY zQp(G@R-1HW7csCwE%jPB+<-1h8o;h6Bt}v;q;D>+Dzw3CToT$>%UZI;&7oRCO_2e~ zEsn3BoLefpvx(`V>M%zhB$_iIvcB;8McJbmQnpyOXwCx^F~uhtm^3wi12mn}@s-=8 zw3%wSRg%0>u(U&cmAE4j;C5r*svk{0(9G!|F9%ZM2*uyN?2Bem5AI4Fz|p;^wMpsW zflEKJo!i{d+0s|Il@IAZRIO&ZJ3znM7ocS6)$75qZPY}UOYO#jY)p;sLXA|pUdS>Y zC-_BhV{mWv^Qp4)G4hxiPdr^YE7^1=@7#i+iX3>B)j!Co*UNL(`IggX-kV~T0Bc2S z+ToiIROuR#0<999nBWJhWcpa1tME+IUi%WE`yieGs5SF$%Vpl(7LaDeWL>jKXJ4{` zTktk)5QtKwczo|wn?_NS%7)%I;D$cX1>olcMU`yqiTSx)+ksI8DwpogmePklhufpw z5sKoqo#ftY5~0p3n=rl@T3Zi)JutYLDs>cY!kE1 z_;brx2=%)N(bm90T_PgX&hpqh$WQqYpzX0i6!qk_wbt{pgwk2_9N;E2o7I2Z&l6mb zEcY+qF-KKe^&nlNZ&As^5Mg*uF{j}@vWLkDpAp^wuv79QsUmR^gi|m?acuv(kXjKv zO}y7dCsnu?L9c&*!#^$f>BhYx8X|+eASzhKu_9Wh-lKhf2{8VfGQzxi`bI}jRb($J zFrsX_S5<@(92IfHwJc2crTSN|lb4Lbp*>YZ_h4)Uk*%H_vgk9N@Fzcz@dta*j8)0| zD|UV|F4w=g;GY{@j4Z6I|5zoA41fJy!CwsVKV-nj@K+|}e_c~8Yih(|k0Q=I5b+1Z zsuHxk@gW=vDzeT0xw<_G^FgHFPip2ix>r9b9O~;`QJaC;5PGOt5G~->W)9a`QQ1@R zR^f>Uo>T|;^u2Im>~rUS$DwaP>nnX6s8$$>ev^*NvCp#V$k5Rp#;*@N8g>9o%#!cP zkfG;gmK*I8T-toiLkpuY^=-=1{c`=3nR+9Zdh;15iC~c?s|T^annpCCkhr4T(~H&p z`sL{SwC-yrpt5%_#^2NF^}Rq3EgF=Si4xW=(RV@EYP$mdTxkx9)XB^2EW3qxbNEo>g zr`4idbBj*xArIL4xJ~ln;>}h;W{`_?0wdK;2b9=hpll|gCGyx}Jj|5qhUqLv#Db`K zQqOi(<<}%ajm*l4D`zI?bUNtZo&n!-BzNxw=)ATu{H85>q6}c z+xq&vU!1u&VkY9wZ`cvJS7ttIB@u*^#`Dg|Y*eqrY#>{*F(aS%!`|A?*L^&NXz?6A z4vbZtL~m}~%{nH#M0vbkNk6V_`dZWIX-}js0G(4P`nU3|sRW@?*TEE}tqm6fn66p2 z*{>0jv)i&lkE-d$W@sdP*qm86rFGE42*|9q2*Al*d0KD=>{F?^P5AR!F!4Y$%Ys}$44@R3%ge)2RX5;S%dNlN z?OEytTiaZquSBtLl|FW?fLmJ!Jj&L%}%U1qc?uyp$W^j6qCXSVeub zn3+JD8_T$=bjg^Y$(+(>Myfg~i-E4gODUx`H{;MqGPDDAC2?J9#<%@+?}_q%tJcLppd- ze#ckPCo_CeCz4iBEENuhrs*>A#8u{EafgL<}t6a@KLk-Q&GwoS*o*r+^Dbf zEFTS1elRj$1aZ_C!XsF4>{qm}AxaKD+7A^z-qNHioFp&LQ-$GyH^*BZgZ?uwt`7$& zVUY&t6bs|7va?l2Mj;lM&{`}?fMCH+mo$RQ_~AeZV{xxcSREiE2pc{W*La&1H|d!< z0e-Ml&{k`9@nI1LJjN{qu0g&xF&6Uw7Xm4rkEXEkX z)b5Ui`)Gu-;iw~b!|l=2D{wQon7!K$<~)&fzv0dCr8E;#sW@i3Nw{^X=FIiMr`TLX z22Vk@l$%@Djy%ltOa17WI1j8kS0RW9Y_H(;HPpXSD!=kh~n>U>Vwk(wl}b%-U{YO!xE28S~eFHEp1r%Hg=`RC#m{U-{oOg zY(W%tP}eAvmVSJ86!MC$%44Hlm3@5v1q|mTjRBZP@!oVL$pdCYgrNT^lq>+`#CN^oQ zOU{&2HM6y)z-2tElch( zyJQR-GX4VL?5ynZL9ie{$oceZQ$6~=0|?j=5h)bx-}8U^IAAg1g7XgZ7H^Z0Ab{b6 zZddJKTAO&?>CsHX%vKOJSxT+7rpZz!_cK_ZgdP`cTOV|-8u{fQZPG@ASeOs0>OuwO z@b`i}Xw|3-0c^wsg^Hgm%kzgRDOCz0Ti;pCcl$y141}5opSIUahW8>hGBvc!Z&Q-I zn5;x$Y)t=j!ziF1Y#8n}0rL=HJlxTiq#blNm4N{&^`SiB<(HQB@wscs(x(bpv7HQ) z;i>bJ67mc0HYH!qiaj4^-`OUImvzJDM9!}{XU*zCQZLd3Fusaa^cp5dEK|pcaFjeL zk@J=K;EFbl7FFywjy^8j(B`LDcLO|{x`QUC-JgcXzV15kIm=qbC!2jy62sp56`ZK} z`-7CY zuG%Hw0sl%WNm+c+*Dy?j^Szf|l%*1*2p9Fh>Q`h2bz2k&O05XEhrE}j(ow0RMJ$R% z1NgxeHbymyPQ4r5(L#g=)6sB7jg{zWlVw+I3VdaKyvLf}+OnTaPV{3F)JC0$mEdJ& zU{?p;i3C$qh;x6Y*85K7GUKwS$!$KnFN@cKnmAaWML>{o+gB~5wFdwd$KfV@`4ETf z4V6UH^pmxZ({4)fwgE!WDfnn}aP&1%C%6}6Bup1dWZ|tMY9jUFiYlY(FHU@%m>QgHWuOHgU26nGfU`FZ@mX;R&Diw-xE<-BL&< zoc07^l)nyBE97xN09rtTLU`>}7lk05G&?W8(jfB?R;<}V0>RPX#o+-QL|21o%cg@K zShX+D7mSWPK+QH*`HCtCZvS6mR@v%UMz+>^3@ZK9GjM#jG}$6k(YshPcUR-=qgrU4 z@POF3SBll-+r%;?Wa@feW$MA8SwG6wpD@#ffMya!fx! z6vR~jL-*|a@_d961k_&tFciTkNPtwg>&^6Zc)X-_-dq`tASh&qjoxhk+&v!;a(xUx zfftT;D#^f`FjF)8M$m)qg*!-GC2|;HN>oH~sN5E6L5y}95icNLxO=WGLw}?z5vw4c zZ=D-Gt)_M?o=o7|l84klE^N~D{wS}N4B_{OKzVVqRH+=`TE#;*-M*f9KFn(b5SCy> zM73p!=&@zZj0?sq2#40Kn|5maWIS7bT5Sc(=<*cRY zjuh~Ujy5nAOLtW1WNH_XTrkg~drkndGCPpW zN>&QiL{O8?k18l= zk4SX0G1{&kwHSp4r`FpNCm?LLZ76%{AkcYrKqo98uIHou=gIX6I^iinrHujuVSOIWANOV5o)Gx$U*-iW?uQ zPiKwkiXYY3v=MeGC##&nwmcpAY)|%kQG&; zXu1WG(2QWV8srcg$SBZ7F`77}5Rq2-dReK97JX#8R_fKW*_ z0q363dQjX1T(ACced_f(kKE2+lO_HZ77C-PR%t+3<}gR>BXd?>m&fFo0C^}em!{$o ze4xmr3bvQ$4EGm?wm2pJBqN3SCmAhJEsWHmNN2!sP#I@Hn0B3~02=wE+!YTV3iARZ z1@*&2;85}OSX|NVcsCeG@g{lpqjH)M z0x^!n(4kWu=8F>>Jr|CRX^c>8gCxter}{<{{Oi}wYAk-l1PE6Mwh3s+*V*QJDG+uf z{$BktV2=f?Jo3&SOJI}Zcw>r@2^mX%VVF`IGz<=B z%H$}e+K}UZ_D94}d=G&~!qzzjb}=FsRQCqJF3%kgeo=i485gKIdJJIAG0_2~KQ2d} z;+soi%(DR>R7r^(_{dP+!0gx%2zyrMsW3BT=+0or&XEYPx3-h`YG_Zu>Hz&n{U-K7 zMqU*C5VDqoXitoJdiuIWiDZrF);L+8FQ+~tDG`5u5;t6ba*EzCnZ$*NI6x0+S<}@o z*^=hO#VB-JW%Sgt;2DBT>FvpXDbMh?EAs5<6+tOnM?s5Dno1(_;{< zEST{H*Ug^?v&)ZxJEU?rfIvi>Y=U`oA&iWfCvRX{AITc?PlZb!y`ITu3>=pOG8>w% z0CaBfM-T=HGzMY`W5^B$gn;vnaZxWG$6-j{YB_MHh%=598uEQTeSK^OOx@QA6P=eq zOmjM90;)GprFrbaRZO`-YPu)eEa)K_)eYR?vH(Dgm6_c@gH@6Dte7ShqUn?J<1APD5hVL9L zYx0s9RIU_7;PgyN?PVa`A_$Ysg`x8*9D<#ucoi&@DqznKn){v{{sJ$WG+-gR_LkMO z#sT=6KIb$7dZ;X`4%Y9>TJ97Z0VBLNff zQ=!cRij!Y|7STKu>-J2GOD!a>-%883B1;N2qY!apvtmGjbNRBV8uc4`7t%z;hy!TW z#2yQN60l~&KLVg7vPnm|<(6Y}#QH)7+kmRzR3kEao6k20s2@(o>G24!%Y+0gXAum~ zo_KE=!K{`yEftZy?*O%QnE3eF3!%KGC}Y0SlM*;$iso0mH0wc$T?vYSFXCj2tg%zd&Gy zgq0b*8t^NPfBs|sYv0-7PHKrYycyvT}$x|w?7TI)%U9Dum8Em+D%ymvC`n6^sE z&IVZcE@@jEsm0Bud1>7Nz)EYp&;y{wA5iK+W|t^nCZbP16H|XdFSbb!OVfNhAWJ7O zrYg3&!JIi>rVF3&a|7HvyKThotHAhmEEK2rz}tiUpF>ag7WE%G(CN)i47%6tS)5Ym zefu4luIcA#Wl7KZ3rZ=QfLFk5q9d_Ajc|?8CBQiu8{?h~UT%`t4&9moR?7J?q$_${ zT2@-%q&@o*#Ri_RX&iLX34#PT@r__Gut{R|{M{+lv)4=folr&yzhRy78Of6trx8{78Z$iJFYEvvs^_vpu^ zT)}AZ?)WC%BZY*{eweXNT>&zXi0h$njR{v{*T&Z~j}uoa5{*FtdLcb#4oBL~Tb>6E zxdG@?@1r(L(aOA4|A3}%b>_Y)!|zNct^XNOHmX;P-2tnzi(=D7&jp_w&PUUtcWZzC z+Gye3poLLL9wdt@n;*W~da`E7_=x#uKuHI3XC;krR4!q)@^H3)?c12X{q6?ljS^ep zmS>vu)}n_R2|?5JHT<|Zcyc<@v0~KE&TbjSz*djRTIV|by{h8dSsYwx-RT8sC5mrtX&_>g*vUe#=2TRxfHpf({)8%n4mn;&2G)Ky7 zq5z+IFO+e!(Fe2n0ADuEsU5o!_*zI~dzItodhTjH9+Vc+Ou6)ka|q6WbJ)S=Bga0t z=`DIW9&4e~!08MKAXNUqx%IQd_FQJowkHHp;43@+gas4&IvlLYXk_r+^CpcfYTvC- zLpXLcQC&Y%0jOnOmI??P;EwIDx9_p%blr&M_j{3kQ5DzEiK#1e+*#eQT+ul5>RUpq zzcvEp0LPqG4;^dO@!kF%Be4?Fv`DPG&t~m!&H|$dCiK@8&n8HghN1muCv6h1s475T z_!SuyQPmE7Z3(uaN|Q0mgS2Y5O%uG>tlDk6KnsD0Bu$@SI`2=wo=Alm-XXR7xLXxT z^=DW;ft>?re|y4YL4L-5S&CX-LiH#-(pSq`?f%`6C2j~fHC<6C`(X8~=9$oHOGEO= zt?HUAbuB4?bL)pabV(v&1t7FE1`rG6X#(Xd1Nmq4NAMIx<2VGWBGLC4%)|wyVs+oY z^I@|t+zXX~3Gv*P>%7${{1{0^5c=^#Ivl6gxW10*cvDa*T0HC`BOuDiQ`!UCBJ6z) zP?V5BbPK1U3-TIzMs`$3I0)!5@dXhroo+>y!Jt`?NhtAZ66T8#Tr^ zsBo#V2d_K%aA_@O;w#hUv^;9qTlcCYeGofoff1 z+9C_lW!h#`i>nCnL1VdwD$a~ZC+&X!DOvWqn}lk!64E%B>-d!m1%4i{kL$#TRg zAt;T!Z5r2Ao*twa>&nM$7KRn5(4BRTjodbQaTKwym+}=Z3m%X~9P-j8J-K?orwsKd z#~MTl6?g3V#qx1B95v zN?2438?tgOVJFv@5p6Ol0#T&jJp`I`*M4u2l>o}jO03#c*-(m$ z6882G^SJn!>J?Hyec4tr`oP_iTTf+|ws{M!Cl5FKgxS&Dg7Gq+C3z1X4e5)9Dzh5` z+lPDWW@qEVna-N_-cW?9CDQ?_+u_>9;4cC@b<(uWCLkQtwEdvE{EZNtLFdJ9{-b2{r_x_yZ;8)V2vY8Gaba?XQ%yw zZ++ihjI$cfIu%%rk->-M$45^PvM}7{W=dB5c&g}8fnyP(0Uc`i1#QmK*;y(3KTclO zl3sr9zgK#Wznx-o8@JK0blq?3uPCyjr{o zAvG4i+#fST3IrVEJ!Hy^`SMxfcF!k?!-XXjhp zT6F9QnZrjNI~EX1gfv%eS8}#0(1f7N4lUN58;xENaPqDb#M!kSegIVH2qC#M$pj%V zbR<(YD4EFEc|UEHQgQNscxb=&ozz%$TA;!PD@jnXiGy4+=}&4f%!Y2Qb4e#83&4-> z0>wu@jx7k2^Y=qMvXP|L<~SALWkftR+FoGqpEH`*XfO!TKO>}Go{Dxx`!}0aul5t= zxp^Muf*Y;41i;J4h});8628Y4)v&i?=h^@$oCtT*taW=%Kw_2Mr{ zdc3Pr+*a6h^H@{%CLzusK$_*Ya%{$ zVJrf?6Vlke*w9PEXYKg_Ait`B!40CP+GyE)H3p6u8ea!?*w^Mim)HSdmh$g_7z^}) zvRs`Chpx@aMOb zB{cFV)1V$%qHHOfJ1;8X#ACt?$fw9WeDaD)B2}V`TKa{rU`Qd+SC(F$cZZe72TL|r zZIeJ-8|bY<<v{3*64Fve>PI9P2tQzFI03+@0p>7SEI|U94KU!X(mRdI4~YRJ zUIUM!7oxP7&oCb1f`6y7=HcN}hAW1P!qMTYQQhL`ZN%q+EocCpDlYMfQgc%46XOZ` z(winEp++IN1LXC@RjdYyVzEgQEM8TeZ_ys2^+7c9>NumV2ADZ>hq-G>B@j;58zT?O z>sY(Inj4Te(HWT$kJxs=fniaW43mvwHSd8TD7rF~m79*U$gn&-dvub}6RDX3P>3KX zhUqseq@t}Q9O)ctNw^mzhS~A{V8;&xV0gf4NmAJeXEYoUxb>RI1 zHy1=0+LaEJZ|aNxoh}Iu@XB320>_qAm9=59f?YMf%vWaZ_~iDo6Q) zbvGI_1%D>L20|G%PtMHXgiGgXCb(?)v)bsteorEk6%F`zYCaiNP#!8 zxN%Tg^_Q_WC%*urk;)*|9De0K67HFpBxp7S_i z7#><+J`L(fq!#=Z_(T!pNo50&qP3G*#wDE7C~eZwGLtm}J9EYXtq-!&ocx_>75r}0 z9eEWwSlamafI;W4s)^sxh8c(x4Fp)f{0Hs2g-zHF=CCx!)|Iufdrm`siW+%au6e38xIJ|CnqAUxu_Z z1~~j`j2G4*hmvl~f(4FE1W`7?LP|1%64h1XvDvnz56*APS#PEr(p51)5p`8xesQt% z27o>DuG>1WEkfD8+jLdgN3TLq>>j9VnGZr&jAihKVZn+-A5E>Vz>#YUD9g9GF+qgK zRtB-A7%7JdN4cjS&iOXEM88oyT!xM{_{lPImnV`xy&s)vtH4f#*;qPffDoy0D3xca z7-XK?@5U8a%Jm0cDjL1EzZ`)99s;n_SJmA=@#p`eG*PkALOBa4%5a-3$-diX_;j9P zpS1i^4>I}Hmfr*R!Nv1N(eI;V$HZb27tRrUPs>zuzqz1ar{$Rc?XHJsQj*G!O*IAbqI zY0_u?xlB3#kZxRtOVrRXM%I-Yo!!fUfd=jN@i7+9D3>^6Z1FT|xfY^;Bd)BoOHZ5} zBY@vege}eXI7Tz^5AaKe(8Z zM=-z343_l=hq=&>m3^52bvWvY>=^@p`7*X);}A-mLY(p& zKGv4Ixu~UOy|;m&gjy(tGH@O0@a}W?C!WwjVTQ|PnQp=3xvSvD9DaBv4TL}G_CCdv zdT;uexEPWNm++NXsaAv`B_G4SZZw2CiWu5L)h=9oqs#!XGkEN0U@XF`n&+9~Qwr** zlZo~Eh=A=9p;qMbqo>tZ(w@f_yeBU08AE=)#7}-C^Kz9se0v|0} z;d@sQ3*KXl+1~E@D!@gJlqX%RM6(-ZLvvWIG<}}5U$`Eg0c{(ynR(lZjynpR1BP`#BOwUjdSv0OAgE)i=mYmWt&c zM&ka>K6x@cv7^Ljh>`g)d^LSE+=*6KOV|fx1 z-up%%NxaDpE2EKJdfCrhl4k zuV~{78q8D=?VU{GJs&O&zvHr-qYpq48;DmdE~W{yN~HoYSFbg`6av@~*N$+`wf)6_ z2FyPhd+AzLaPvXN?IoM@A7|}lL0Ha$ejshZ9j4(2&k3pu|7OAj+qtr$y5-)eE>^17eU=cDDeieYE9gHL zL^Ar;Y#s6>a?~v1`yA!SLL+8lPXL*-F7B%R7J#_Sz^Yn4jOn$$$R~XiEs4Sf@2pM3d=EIt|obyGp^3lhz5g z7O>gjR32xQ-GF6tufx|x&Pi771<&@z_pcTAUO*P!G7++{iz{B9MkcJFjPlbB0jLU7 zvvD!^$Y=IGDVGNkTD==xTpAi98Wetoc_dKNw9zXb<0mVyg$3-b7!GVI@_p`)xG3_3 zH7j^i;XOW6pq>37y4X%T90sWLAtuqQQ0uUIUE=&U_$>Xz-B3O^gcTXtDu@QCn*|To zhRlNopIv)!AHkK{?eW#?^R=hrv&a3CQdw-gVI0?Pxjy>x@yv;k*Hgw%_^&;CUbxe? z0*n2(R-jSXgH-X#=f^Mp-QUmL0s%n?B4WFqG3Y=f0Q$);aSi4fbSp=647QR{@Ac|o z-R4x5vQ3=0!_LDPLpGg|$b6{-ZzLk~XVj78?Ov0)q&Tp=i(qvi-EZe2os($J>R2kX@LJc>ZUs90SqowgT11DW+eIp0#lhL6yQU`}HNx%@vtK!MY&Y?dV zdoEX?f7E;ob{>zidM*?8vU;+s>BvlbidKqzG3E>$rNBfRVv4jE{h-obzKEzPJfX+9 zk%3ukuotLO-jh3hdPDBRah3DtoeEgfZ2UL{*Ijwmo5RS}ym$cG^KCr=csfit^wK8N z*Uw8gT6E};2=Z+Ofn>FQ6X3i*B@Gjr7D=~2peLi0k5K;+pR~qQRHG-~#ixEZZ#ZZ1 zW`3y}+4Ng)dYUL6qVylrM^hR{4|Zp`UctpG5H6G?;g zKF)H>`*UP{-wc6eUj9a2_OOx3~3 zD#GR+Qh>d)^O5dtr6~Lrp49`&4}G3xv~X{&2|ZP0>&KAikAA7aU_{6vvFzu54Vl^U z%Z^@*FLun1PVC4NBoc(E5!?j?1P|9^V12L9fWLt%F#0V~MMOaU%FZ}QU154g{?8IQ ztF6mYi_n^-e(l;yJ?F$)sP`l}4f22mluuOU>d-MrFv9xccRl88{IsdxZ4v%SCTj!9 z&X9a*F*^iJi+KT=Sie5}KCvsg(GxVe{*zY98sYUU%iul_%SH0$iq%f|q73GnIvpDA z`UHgH{n7+DH@S#zBD#C9jC)L_IP|f60ImkXTIgEtpK?pZ&JOTfPwvl;-2?gHHr|uO zRf4hZ6CpGDxxbX3w=+}qgbuI!_hS67+}`RRogNQBy`rkU8?Y6k-kFsy;eAOY zFtcij%0VmQn~RMecer^Kr%ZmD_tjc~!j=u9MS*RA_n`!By!Ss955g5XE8p;=nl?8^ zK8RDb^1e;+9_Oli;7_48?@vun*I$G`D3hovLSYw=Pl{-r*hsF(fv5M# zrG=A0@%qF4nV|2QUEN|2boMiM#tQ+t?WNBYLKz_3DXJuFH7$&`Q1MusyF-lNl(C$a zY<800NvwRv-sQez$#6e8Ca(q#cV`52UKcv#kum*ll|Q=BBvvwUdoDu&%?Dx=v zy8t3IrgJitmubAIc7}Z`rP&d4RiJ>a^ptvcy&1?oiOJ*2TM2^c`Vew$nM+4@s)uS? zRL5i|f1()4@JpBaAA-B|sNzpjr?O;zP6=hbKl6M7M%OD<10FWeiIo{5eBx7UmS=U8 z10G$PhbF8pZWM^p!_?8Dv@iqks_E7tcSP=#?t$X0cpHOEU?Rs0&a>8{-i1vKACc%o z6%h=t`U&)ciONS^?)#te5Lnn-StcGwj?x5%w$5BEf=C#$RfZhXj(JdEW5-x5a7#lv z2&@&ExXcMEA<$xijndrNblJoW^;@rwowQYk(cFX>+k(U?$QYtHgdA>Yz8Gjpj=CWH zmWk58zdL_pKZ6bCoR%&~Qk6j~uTu?T#Tl{#YL6hBEmnk8)w@TY-((ltXyh2?QCH%N zBaJYEYX>3_u^-=t$x$(-ixM4mRRby<@nKb42@4Ns^+2ExWsg2%q5)bB$uBF@Jb!C#2se~zYpd^@8@v0pZ1)QN5#kKUKBvi<&6}G)aVg_na$N;{ z^cV9y?m#j;g@%Ve=Ps(V*!2`?NEt6aIe`*7z!P;z*!TwlaE3xR!DA&kkb-6xD?GsX z?L>jKVmzrJMsO$2RG0RxTlvAu8c4xdAjv6{4;r3 zB>cL04vzU6s6RC$vxHWk%Nxzu+0gnhx6H>KL{ES<8v)xUgm;+OiJ4p}MD{ z@iMzKtc1wacB3YB(?#`jL`r>C#WX->C{z~M&4>gr`+nBT{K4o{?f{j($KBr;mMy;> zKYNcSkMpn&5GJClT(OxF4~6K#X93LF@>TBS(&L7xiLRWyC&udm(XyCc+m~(A-)x*c zw{iIDSjx=kSi>I~+W8{@DELRNW{$ z@huAMl5($LEjOYgrQ?J3L_;e|S^y;hz@WZ6QsZ-i7`$~}HzkW?VazNmsfnHWyAw7A z$s4~vw^fJ{D=S%{MPW=9UE=ZCMQnfEj8fNcY*oVSdmn2XB8RX=I&Ny!$B7=y63HSy zO&hhL`jrwR`Q6IKu68g_@maz^XRo=N%LAR<6SD%(5>#F>uNAb<-iQa<4-Sl%vqYz> z{kZcOrbhQ6bYVOdl=zZ*s){@I_pE?jLH89u=4q~v2nD?N>+;QPAEPSwZ=GsYU%Gm} zaL9~OFz(GV2VMwLSjIn{a&`p~+D?GXgupDmwG513+DsLnbuLS(>39pChgq_A&(%4- zWbBweESJ8QtFygn2~W=d;m`VCQtz+5oc>=63a0;HHevd&%qCYFYqmtAC{Ij$__M(e z@G&FQ%RNI80UA6Y)d|!L5QvND&Eu&{Mu{{DGI1fF-nJ`_RT8bpfEoA#`q2h}!yT+g2T72i>^xCEKO^cIlSM~D2j2W&z4jddl2w^o7O*^1cpjVolh?|I2Z}vmK zqTe&fM(3%8EHr&o>be1uTZ_l5r?*f+f#!!s=rL-tZd|=9%aMhNt=WhY7gzi%B?pbs6j0phZ3z;;Cz2|byfa- z?eTDv^!UjeI^8bc=`yipH3udGho-nivu$sI2#K{!*+D>Q#9Ct9IbuExnbw~*I0Zsb zSed<@<*P8cCr@4<)sU^fHo^PmF9+>ni|rEXKXdr!|L?}zD&yS9Vzu(SgSD+8#&`2k+s;9Mlnpn5;SNI)dYQGY&M zV9GVS@gF|9Z{H#yOg3SlWrvJYZ$~8$+7JZ5X|jydQ2-pxG2`dLcMgPD=QDD-q&c@_ z`0-Qzq+DKt)tOSBNj_vqk93N~XK!+g-TGb81GQu($4 zc*`QmTTEbP@O+9ZU(Gs0NcFG^%i4mg+8ZA{z;<(($+F6h2ru95fKp^st<1kWj$?`t$8mTdLr4SZHgoG7*WQ)*!SEU&rPf%i>AlB@$ zf`;|vMiYQA^DJT^`yt}MZ+igYp%WWK?aU%_jg%_&H0`KnAtjV?bprm#yGNmh%+a+0 z&ZZkcrPKb#ZFgoz_L!osER-mbTWex~BTU51dgm4$1AqR%LBPqUvWgltxP{_O(=zkI z4PzXU>F=zl4G!YGN9&zP=K#crY9A*0eO}Z-mKe_fV;n&Nnry?A$H9<2N{;?i1<7j^ zsjUy@IU}jVvki?Q#TGSn`tTak_)Rvg8iy9#M2hJ8F-PTf7p;#I=|pKS5J@EVRNGkKy6)feGBqVu%kf9tOi`i1jvB}Ah(0Q%S6xO~d= zeH$Y%gS)fNAt+UyW0HL30=(3@4LfpXhr*$!tXQGtNuXUxT3b9_HEU96hw+ZmYTXl?vGBPoJo;Nh;!d_nfK|T*UWugw zi0HA)$D)M9*yOpR04T%DCKEa}(vJ8HX)O5*&L>Fd1V0J4OBOU5VCw^Zn`)NC+g-EC zbPy*Px3qN33WIAL$BSiyK_#3%${?IXMAt?e&2)<2ouQhJvN!{BCMgtN2&$BRprokB zboeOQ6R<4{GtKeSY=9SmHTV;3(MVi5YOoz$k*Z=jBW7rkr)%2P*&#APuFnHW5z7Zn z8;1NIzODi@&(L5|u2Knh>%{%oAXEhcnKK?CcIZY`_ao2Bm!)ydH_@nOWe|qHHtzp2 zP(8OP4wX$b8>R%p$gFLK73eIFbWT@Y=f(^etIZ9Mhnl>A66}g4DDA5t40HEHMjTr= zgWe{nD;E@mxzqei+_Ac-+EIdf-+#9sPqf`bfCR4Zb0Sz=lVH8 z$9kn@gnz0)Zf=Ku)C1qlWbNHfA22QWEli4XUzDPiIzC>qUAAGnGhll0Hh+n{($Xgm z!FE`p6ypF%Ogww?YQgHpf|DIA8Z>RE1|{{vuI(P`eDtQB)a}l`KHWz3XEp4U{V)yU zf(#@ZeCXTo(@s2_5`Kk+PWHWpVjP}0z~e|mDrfo0b*mNLdoD(K>LXdke|Q=6Vv#%> zdDpLg`1V!cZ}PEIAg-?l)m@aC-H_qYlF`wU`i#cxV(yc@@aVK4V*jN;)9?e3$6zDWiX4DCVwMl#)HwOK+M}_s=co|0fvI zqIO}osEWLE@`-}_AsFH5wFB2cpg>N7NTPELNe~$CkiB2y!R!F``NH*bR_t0JE}d|G z{{GtKck+2H6N)fi{}p1^fxZc2%eX(YU!-kZ_O@$GXkr?lpnOlA2lok1oJ6f8O-SA3 z&EP(UASU*<1N_gNK$sVcy9>Q{W_A?K4CzA>lQ{h^JF4X+OrIDzQp8sDS;AuiVln8> z?!(%{?qeAzM=FfRZpduNoJlw^N#8~1G=(mtcyXCd$?lTaJ5%Y=U0ux%E)_hMZ}|BB zw05{(r*<6!;$S$Cb+(j?Lls)DPsl~Ogu_MvHbeg_vDYoG^fs%G&s*2Vbg^jI*3(#o zABBNhocn!7EG~1{lc8BKlPEF+R#2#J(pSPkddP^ZOu(mVs#C5-aOw2$JM4bzbr|}y z2kT}oj39o1{ZD~IB*l^VWUa0M#%YMN3{)h!RLM^b9WeY)bx&p1nF11! z36KSD6Dzi-&*9fWyn?lPoYhwRIFC>+uXQfB2QMey5uU8r+atLNf64V`(_nt@bq3J= z0R=>lbJni+(iPw>8+sa8)#|o~8vNt?l%uh;Ba>4WpxlLd1 zJmrXTE$W4iQU@D$_B>Sb8%Mr(^?7{^ii{Derjx3uhx`1Jt5m8vIsp?QY zp&!yiQ|v&Yj%AqjP_w4hR=DPXAdpf#ANA5`(4*5w0+F+Rzq;$#Y=s}8&y5uIZ+3zs zFwqpwx>eQF&D>wYp+8T_y%_;Gixwx#=Sm~OI#_nX$%+Lf&5Qi8c3s{*lo16Zqr1vm zoHV;d=b81z$d1X*{Vpl+N&{<<>dOFh+uA4p;DkAj(B~|jbwg#QRpovb-tz}&AtE+4 zyrK_Dre~I{IX59ES7ooX@{R*!Z~qwAGpxq*RVN@{b9c zYkM;E(V5LrDUA>Qjavwoxq&YM3ubhNjFBG29pIo!pvPkgk_HJCODqn8A`_|BNe^8v z`A|un3Rsv~+R;vPyxj5-J-}#PkVUx#4Sb-F55xNW*4xkDPfb6xF0gNlZXT=w9i$Af zjEdjKAAt&%bzZq0iW}x*A*{LbpknQvg4)M>f-rJjlwKBBfKNG}T(_MLXo%nw=&-Uh zNo59|j>pRI;W8N`{Mqw+4Q0ili^(;dLEn{zza+nt+KP9|xejyE<*Mj1Sl#tQ*01m2={3IM{Z1Ao3>?cnZ6=gX9N z%J-D5#80tzt8lDf2cI4P_aABVHok{=@fyYcJXt~M`VK=I$EC^2h~_huSO9Y{uB)uB zcSpt^?`LO5j|~!@1a&RaS@Lxke;X&vrVur7vG>mh@CtEMmnq zq4`1+xuRy9aAa-5^1)q<24<>$gS^6XSdp*()w6EHvK_NhK2-KE(_Ln;ZG-qBoswWX zeBUwr{zhpn@b;bG?UX=a$h(2kYCU69$UuJ*(=`rNkoS`a&(~^%ASQ}EUf1l<${}PY z4UozS1HNU6j&+F)TB5lVTBXJk+?vCD2aw4v$^3)AAco5jOY^=dfc-OIN_!Ofo1RJi z_v?X~DD^WO*#P(HK?otKfmadknn5=@8%;<7m+g2}ux9BArqi2Ws_q54*vtP~oCLDa za@|iDr7=ksWI!a9gX3D@z4^-KuI=e;oz}nDRdLMOWL0)pMKC-9#H~;BSRbBW@K{Qh z2miyL@V{}`49u+mrx$1b4}Svle+5%_rz_ZDvB8YKc<1eJrvk+gDk76~n^MsOW7Yox zd){1KVHUY0jzx0*cr21Fi<+&{vGNB6X0UIl*~ufU;CouK1g(j!+w*C#%*`xAeK&iv zX|Sw2nO@Ed)g(UZ3Ud9fkkIpI^@mz*T&8?=?Xjday)b!s+WL(02KL5M);Esa$SaaC!`=B>^-gc|8&f2$agey@o6ARePi3hm#IvY1n_j+bbM5pv%dG0vG_UoI@T>Lv zk=QonYvW_aGyS^p#`1uQPY_62DAf}a2>f>cOSCZ-sYat789kUd?d=yy1x+OMKO@%r zQNG^4Li9crK=oEL-P5H-;2lZi2k;U5DvEL;`pR$Tk@s_ywFiP|j66c2R*e?(o6{WP z+)4xlCk0Wg0=Bx`uJ!EU845lo99>GAt3YsxSH-n)6o#+M5n6T+dUa0V7*VEABSTD~er#-T zFj)LeO*Qx@w>A<;MM0$BSfwqh-)qd(qUnSuI-5t{btXPdZvYCBI3~jj((R)2svJ<7 zRvq`R@B{V2@6Zqd2-5)spJZ80M=&cxycsu4aw|uL^C|Z)444u4;^HL~5jAr4@HzEB zbhaYVv(`u>zGw)&A!Tr!fpdtH=HD$^*%;80v%h{}K0=fmJNa+At)< zNr=0ULDb2rw~&BSdwffGdP2H!4gQ($P!V#eu)hGWA%P>tY<;{Uqq5%P9w| zO0!R%>Yp^+UT+@bP~YV5$r+aP1Pu5d8LlOJfX7y=oWi~;$iJ~HCTO(8RO(rl-(-{> z@`#oM#4(OQbcqKA&oQwrM}n|vLu~o@dAv-%e%R$#N|`ttHsPxlgX{ipH?6%QfLSVo zs73NUp?8rA(z>@Yf)HPXuIUs$#WW*QA8X@cBuO;cP#h%ol=#8M+jXGd-m|4+F?L_U zewjf(%uO)!DLWZ}B|E++u|@_N@+%TWF#?V_FeI!s(2pD|bK;BaZhu z^FU=$_G^aW;YItV&pF>b-%x3j*D(of@1Jn+eC9FxoJ-~i{BEz~ZeVLX#=8x+bH#{H zMW(VR)|EDT^!4^nkq%3c%ZcSgT%K@32b8)$6;>Xs{;z@QA>2H$nCaO5`1t0duq5wf zsRxkOGSCA^=;<11@mVnT4JKpOu|K9U35OVXI}UhffE*%UaLe7XLesM;3Tl z&&I;eT1U?YNQ9QQw$PE&vsDF13kcx@=z~)Kuzntfu)oQ}^uq-H?5{tJ3}_4gv^)=~=zldIrthz5A7uP#Kg{1P z_`4zf6Y{8kul&!pGymHD@1Lma4`+I?4VLfC`deQxe>cs?ZD)D3{{Ij3g{T=YR`C5* zF4o`3_3(Y)$8xd$KalH3+yVC0pP}aATWo)k>!0=Q-|36(4|{zi*WY6x+po7f|BPMz zPA>M}#GZ#8_8;qu{hy6Jzm@9`8~;;Z58EjJMXp~6{C`njzllW;8-G5Qi=OVE3`Y2X z-zkiq?hkAFQ)552q5G?7kN7SBOR<8U{*S@qv1kvZnnzKU{sA-mQz86cXpR0ie)%II z_Agqa|KImadWPS)CjCP$+FwLt_}_O&$?v;O%LbpC*t48L*AA7rk- zh{X8+z%vnGyjC9KehHE z5Y+`vy^L&~08-Kt`1Ald34A(QI)+D;J}O0)3Htk)?4y6bE8}N)mZhfyHvIel09krQ zV4MDW>tjNSEF&HCkER1;8Cano_?5t?7?_}crh>@QvqAsR*`IYiOo@Kg#Y6|q^s_ET zCg>mP16KQ+ryfiA=&9e``e?c-`|nGAfZ)GmVP=PBeuy*xS!PCP=7)9z{zCx|Sov20 zEbP!MKMP=Cfo6HI4PaS+7W*4vSs(Yyqo*D<@sW~$Q@{iI{3QV!3pCr$YS|c}*?u;G zjSiaar;>j!;%|z50FS?lebj_Unt3eZpMCf5rDcEIjgO@Ly)J}Th< zr{F&@*e`|9(=h{mkQzCQ{L5Cx8!MmoIa z2ByH}OlW|dt)7`8K0ON?G(f`0#>U9pK*>ng))1eOnH5-&p_VmpNs}C)4KM*%Q2bcD z6f!c^W5oXfc>nZ+hx4Cple5#d{l235y^Zwz0N?|*dcfA10#{H4^mHtA^#HH+%ng8* z&@(+udAPZ4Y_0XQ%%B}pbaXYfv^8~gH*C%Vf~?Oc?9X9OnV6mg_S{G4-x9Qc<&R@u zcUUgP<4IOl`nG|3ZBBH#7<1p~`2@HLQg*Ubm5m#(o{YY;P@tDzWu8}NZf?Doj=8an zzO{CmmxZ>dkg>ToR75oJ>y_y#i7K-1^H&mpC(p)Yir`~ZQdH#Ql0eEQ*m-I*s)(ya zT3LBWs>#DWJw4fZI08KhJX-x*F%DjX+xs~RF?O}zW5|VLoP0C51;~w*o%kQ|bsYsS z=fBmxqd&c&994RX_Sy~R7WG^4S5=A4nB$MfAj^U+RPU7HZ@5k`Tt{Q76yh&vzYR5A zXWaTBH7c3!ZOv42a^q*plgz*61`rd{en|E{7UpYjJKjDXP`>0cqgubX9q-1abLY7o zm#)e$b+C2-bjM6JZ-sT0l)$lvZe5r7l^j9$`n_LdbhxNtvbUYEbLjmxb@`>Bu(I%d zqg;iS*kDs^bez;~?ED3n8~^-n#nG$Yqqw8bb?%b_gg(r;FHNsli)-q^c$ZuUj9mMXp80hT%rqFNqa~TV7!OiibQk zM%znAPK(i)&JBpFa|{j1?0b3g)arGvM_swGChKj8h|!FjUYv!6j34lqK>Sd8rn}Y<*m{jGHWR79g=4SF%W)SqQTp0H`!0kuVxr;T8n&rUp z86~UK?YMPq`gGvi6HDk9FF-+`&`ptte_wI`X@S3{9uJ23-JDq%|9D~WIC=Nqy04^! zD6f> z2mE$h3M5%B)<{W=olUOl8_&3WJp^!7&FTo)hJjHz#_V#x6M8+Fihv4Z!>NK%>V9H+ zp>3DF;8vEQ^o#Iq=aL0hUquRdL7BeAu4y}$=fiuUv2(8#d&A{EBSBw@&WSnS-BC@Z zp@@D<^S3i8Cxx)+6=V{`uy1}}t1LA$kO$uAob49fkW=)!aEdvQ#YWw%a!%RA0zTRJ zaCfAjq+%jb@FsA3qM=L3PH!tqP@SSCphidg-B#|>n66oMFiZ8QEMP7Pm?t7Br=nJY zl6e*&3}#5v$G;DsY%fT?C)6TMGa(g3*m*-WvHLc^LTN{R-iEhLA*zW>pm&l2beVqR zkPEK6b$^#lRFakq&t`s!OCBU85a&4XvU~MqN8=Kxr1pz9e5>5+j=4)3Jm^!yFS}s=wo^RFXw)GAQ$Yd?cGHS!U z^jRriDw*~B5EnXGmPW*+ADYm*e^=){T6~Rl(*>sVmcO7loM`Umc9gdtU7)_yTo?^? z`$bd3*>0^Xclv#i@Ceo=mM};}0fwC(^N>{!xvutthRmjUsjN;7z<&WWwgqF}BR0(R%s_6JMkW5Z(5qkEOj{xPy$#q@gq^>n%V+k{~Y1bYQ! zhn8rJuZRm?iFCCK{kY)M;Ji%qygWm@fo%$(oR%WL>Jv6TrB##T%J~)$kGl{}?&vX0 zJ!<{rcw;c6cJkx&CinO$VpZ-LRRYhkU>mE>>FgC?mD_)s6NT)GSgDX=?%vAaK9>qD zH-qf6pXJ$nsj&8^Z!Go2YC25V^zTI=j2vpcUr>Wt(?=gd=4!x2_IuaWrxR3z7FE4G zEhux2Wyx_RUdWdx^@}lqzeWqpOzHStK>G#S*x3Hu=J@X`pZ^Mv@QcaIDJXvrXn%nt zz&YrTh(<@p{y!VhnCbrxM;I9&MKpbVfW8qx-xQz^JXqOT*y`zOn_7QIJU`BU{IvZ3 zs@7Cb-xl!j>&Hbc8z2@jvM~Wzn%da_bS%uwv;c-qmWF!f088K>7Pvq4aO*5s<<}k$W;(jx~Tw4k@SqY)1r8R1?yGN6ZQzNC;U>^OAX%&>itpJRt z2!cm#BkZh&d@h=L-#-+lY%L#-@A|3X-YYR(t#a4Fp-?g5ZVy=?>W@#KhbL**&MYsRN1q5JERqh>4Uc?G!%a=5 zg@uNU^L6o?D~9=e@*FmY{K~I7I2&ZKwp0}nXQO=mV{U62$20)ubne8w;G%0&i#u7- z=XkbBcB@fuQiI1I(tWz;Nv&BJV;4;dyW5mcWTU>+0f&JsPkw#0&X$Ce$sW@U*m(E( z?sDZ)mIElthzJ;N*$%%<^!vyONn@_+kR)M=Od*-Z0@#Vh3DpUCW2Wjz70Hhh64^^R zEHYH&F2>KR6IDbd^1EbJ3lt`v8)I+2D$diFU4LIZfoY7&>^Ub?oU1Q4|K4r_(HPlkED zL>3k$@5*9jUlEGJ2+B(-xIsnW65v`8To*KOrrm#IJlie{aN}2d{UYa!(SWJmp7Y01 zs@mz&kKbIDcPnd-zcz>8nC(klOv5U3tR#CfLgc}!(C<(ck|;7qtw!PZX14~l-fO-n z=V^XAwQpKv8f02Kr-6s~a-(kqmjB|#KG#9d33EFD_TDdeWk}RJCw;!$Ucz2Um@Nzv zFB6wtcVnKYUHAx(*-d_@pgp@g1kQQfZ6N0@%@n)o&ayd_{)YJqdu3;^|Bi-lKue(Y ztaSP9AR8wn08~G1Mke(Q+2+OQP3DS8$q@Cqq68@Gi3iw9w!v-xF4oKX+N$i7bH)DU z)wbI!M63IA;?Kn#1GkjkU{wIrQ=<9liuYH01Ly%yV+onz8Xa8Ckg6BCrg+;g$Mxp; z8^Gz>MeU0H=*Kzus3_0ou!|*~k>98i!l$=ISBs^B#MC@ZCP?^_kM%XZ?J3(cVP+cR zE^NV4|GL*sINlPjaKY48$s}=sH}`WopRPP-CgrW;;}dCCN&?b7aVuicqzr>S4iHP= zCVcpj+4gi*tidbu$T9f{WU?hC(7l;ilzm?JVcTKm%qp_*Q5fX$GZjvZ!aqNndxGNx z{8F7S2p&^rdmAY;_mC0-U>>o`6*h;DO&~5|yyCTVhrAG~uW-*AXo_l6_*+iLb5nL< zNoE-y*nuX!-~ru9d#gi~4W~{OS%2pY18SDBA!}t(TIEu%%t9|Q#K>=a0*ZaB0m#L< z?oHRNxxw{t$!ypc7K-LX4kHITBl?&}-m@(PB{S1y2yq_S;v+AGtPjb6i5sfd)L58$>_3@nMpAM)w5Jm3A?qMK7m*E@D@uA5@L98 z%AFF>)Tf0TLpgf!6pcGaV#kabWH9A2K^AxFV{xM8cL_dY3w^2z0&f;v0*AT!oT;Ek zM{;h5lHAkuor*p!W?+-)h;2%d++J)(h3~UBRly!@&fl0pKE#guYlccu9bjDkG8I`H87HN_iWi&zE%h1=rUC)ie7=! zgd*SXMB=8W#emk-XAFz=3nrbrvCM@Pt{wFuupWL_!Q1Zja4^Kf`uP3-o($Wc;7Xx6QJnCsOxiKZlc!G{2$EJNGYgoh1Ei)RQ-aYEssAo((`N2sV<}|^-^6L zkv@&RISO#(+33$|)p@(-<~Z`qls6&pn-$@n@AbQTnkY-QXB-u@W%KB+Oke5LJirm# z#y9;7!)h>-SLv{$6dL3kJ_&hYTUbUM#<3s6Ej)d#MA0Wvfx0y1)1*?=qmdh5Oj3UP ziSd9EGjy<;Y5bVf05|$G&zR%a{fgGRstslS4DsA{=>?Y!#{uo$+VzObvB+=LD6+Z- zf$BzlRK`xWpJ)vdVc5JK^kzIGygyW0!eiQPn}necLhMQMb5(UDrP^eR5@#h$=X#Ya zqMh=5ahUCb!ONS3=G-i`pMHkaV=>_i$!S+c&cIg^phTJDh|993u=6bj8}YhXrRS+ucPN_Y}p|Qa`pYh`Y>!h3>Pq%k) zUSFLi36bZyQziPw4T(e*mrhF&(7br%3Mk}^nBjYEW9~@Sf_J~l7n4e~O6@u7N2g{? z>Vn8UNz9eKLsthY2J~pXW8#=>UV-7T<8E8j@X#r%(jkywu(3PhO00j!uo{NudcrMp zvvCe)O0U}&D<&ecWlw!>Jmcm{IJ0)Rf02!}Lp)b=4^rXC1{+Q`6LhWtB?^I=7}O>lJ1(BDj!wRTuDY2 z$5fQ6A%%0tR0^A-4M>6sn!cv^!to+|@FEv1gm=+|gUgs)|*D*JbTd*>5 z;t9@C{U^IxYCcLQ`+6)+B}SCSTpaGw0GF2YoD#$jD*{Rb4diaz-;!cLLkva9?xJa< z+xX~FY~)4c^FRILn6j*5%0@JPXy^!PZ(5j5x2ovE~71>pKz=B zfeV@FGLBp}qTF~oMSA-?Ln_(fp+3L4a*sLzZXfSn_YOTdQQSPL7nuUzYk!8XPf@6k z1?kOEqwiLf%WN=*DTiecHdBNtRw|rOm;;HO3BFx&NN!~G=}WJO`kdV$=TtoMLeq3r z0$uY97@0;g=Qct#&PZZIW(`lW^~4$2#w-d)OqDRj>_9_s>#Y;6TNDn27gx|I8JtX8 z(Ht*E3iQ~p;#XE}@}MEQ-u5RqD|HUbngY6zQSi;1@NzrfLB4i5IlFxM1FDS zk29aE31>^cW^*cRDQK@fn+ZEx8Q|BN)pNGQLFC11hCSX3HP{?Fo$<} z4MIcnG^j-k*IiM_MY1x6voOq|@$y`HASA_O`IC2ADDOZ%#EZ#<9OsSUH3X(hL9pJo zwe?#s9M3nOm-wSPxYL)eZu{dBRl$U+kE0;;nZB6E^{#DglD!ZxWami|)Mh;LML$(K zZgE$B3rXRt=EXpX%a?eP74RX6kKZXrQIzyhaL%enbp!gYP}O>$LngR!BXq9JK@ zbG!yM6N>dHrWTQH2BJE0R!~?Ad?uu=uOLiE0U*nq>)w7r$*~T(iI*@JLTHqR|KDN z)hyCpz#eyn9aJavUmiTgHhb@+r@F`yczE08m9KzC?sg^FizzeZ> zHc4yHvyAeutbMF^c63fC_Eh}ILyI*0()wUzL?ziI)B92?;%-8jh6o%;Y4Hk1CNNYv zZ=Ccd2S0Iol&`j8w#-NyS@StYaL}z+%;aTwe~{Ho)2O%3rq83VdOcUxLhw-_!+ykM zqfeflm~Qk6N{~G_QLhuK8W*7v#k?fNB(+cjd?04Ce|gsIbI}Y{Xn5#nFg&3#xVx*g zt$?#pH`Tl*Sd*ROaj#Xhc2{2~n@;;$7k_KN+xhN2^`A6)9E;%G)Y)A^a9Y zj5r@1jyi?DpCp;A#<~UFGoFSg5;1KoB+L<6n+p<(_*(MDwPt;%Mce;XW9ZjT?~}Sp z#_O?Y{1%NMri)z*YYi=H$*f}mbJV__F5S2ENl#FcgXx6nL_P8ezM8EW#H;YWxx69w zyRC@h6u>p4GLc;h^UNKv$GZTtX&PP%-jKgasuIv#BOnnAGTSi}>6Mr^78hqvg|j8e z^;Zu!4DoT@&33;zkpI-|!};E#SUAoEm8ia)-9hiJp5QDN;y4?3_x$dqWO9r|S=$D` z^ZTzZDu%J|jYR^s<@-8_bNediV6{a<&o0#JXYsQ1_ zp~A;*SyQZ;?Otw{&ZBJVnJbpn`mpq(RcC=%S(|>0|y*sX~cjJUJ)o zc>EI7)U1HM=PfEWqA4`Jk`lT}<2(9D*YOX<84MeU*BfJsQ8y+#5KDfY-h=zlT$1hV z^HNf84f!pSPt>-bOBOy6G-q9llrvzHWlHiv9x{-76*}L#ON^bx&L}j%Rh>hloK(me zd1Ayw;9%J`}84owxWJyz0A7e7Kj^aK0fQ!SiPcK*4Ke+L# zaC$nG;kbU|Tf~G;DWFA=E3LGFt9xYAYE)Ris>R05`g3N0#kRXqD7@B-As{EGB-EXU;58_9x80>D+ciud zt0XPrq@$yzrYqwt%FAm~n-M$#d1mt^#Ly}mOg^K?pFFt7pW^V#3>792t_!=7(q(VyrhG)9#m20mNMfUg zzlMJOkspjSB>kZ`lAlwcgM>JL z1);757Ekh?;$j*>P#*LqO2n?mUMK9B05s{WU;!bL~URf zHUf6=d4te&D?+P|vx)ut3y&8EV@@sS3F;YpLwROk;sUhU6*RhQE;EOtX30ha( zc3E!0kjLB@yD;$5A0;irsYJjz8wBUmI=y+vlV?q({ zwN;_BD=L1$c5GY7|fz{^)ieA-P4 zXn>85f}tRP^Z?NiDTN|c&B~PMlUS310t?Ioy^USwv{l%JX{i&fO~3&uP|f%6GUW?MJAlIYq*Dv`>&XS2*+aDW|HypscmmYRIh@rI5st#(%(i~3MG#NgCA zR3jSd+P(7nWlrvv6G3;FJ=VJ@ZhAcESMiB);ajJ>EIY+vAtf)&8oBjE0#6sjt7bHx zl2-K)^P$}8*Dy5naS+nVWUeq7hYEp)U_y@>Gx9WEMlwm^gL6eECdE;vH^N_NyghO) zXwLZ%#jm=2xy&ha90iXpD&Tx%%wsAN=lqW zm!0HKca>i$4wpB|M?l^zHUb zC@3J`{g7Mn{sedZwXi240Ix!vt(Pb1l=}Rw9ds!npJhOz1G94G7*>CYigBLed5u$94 z-^AndkBgY8#^-_o<~m!zRH%i64_qhAi1^CBm-cL661zUEt8@g{l==ko@q~dBUbU0V z15-YPP$Dt%%MKFpRRo8p^9BLJg5FDdZ)exwsicTA9I8w6cnm@As<_($4tz4yv&0K* zxkfc@*CzI=P!{q%S;*1sr|H~7!_v9=hCQ`;Vnk8)LPf+HT@>S^2}MfL#R&o;VUcS( z#OA$Vi7(#?VaBUYI9{Ws_(10u6%YbaAr3T&TOzuexCfXA+NnAMN5=BxM)CGU*%9Bo zRZ^4Dz*S$x+Kygh+V6Wdfke;a>P^gt#GfA7C4t|e3Vo)hANkeV$XeTD7 zgZkJDXLRZ5ermw2L4%)&P3VpY<4dtJ*ZMadQbMEDA5hR@yjtO*tYp=K_d39&F%h0d zT49&8+u5oS3kbyUc_K%)bXdU~(Y-QiJzmO7iiQnvo6)RjaKb2lN0<?K}t8j~rNREqlglpbc@uoGOEQP?SI_r1}zQpFf zE|@`UMBvB<^ff1>$0v>oRpEOtbbd~_n&I0r3HQI&rf3ok>c85NVT)={%~%b&dp)&V zRpE8x&$AcjKa+_R@)n7>gj~B?MPeZ-sVVo8opyU^5VdxJ@Rq7c>;2} zzUnWtxq7lb6@_I8cii#vQ0~ZId#kORMyXVNuEsXHyX1^Jg(B`-(dmq6-2i8zoBiBO zEvi)D7<(<7bv;yQCdz2Y#%kB6H_6?w7^m%h=@FaL4sPZVc&DbTgrW58CP|6qC8l<^ zCZBHZCL@cV=I~H!RGBRm<>D=}E4l1y6iP?zW3yzOZBsvq4UmRa8BS{_!2*NNpk7sY zFWW&SgKDxT-PiD;a z!GkzFHh=L5p?os^5|>9drN?JiD$imTWAmV`p54E(jS&3BwK3MH7VE8iqjvT zfZ3I<-rVmKE}WBTq$zzgzHh1yc)NJileY~6?a?dI7uQXJgrtCdcf%8((*?S#Q}g*C zXThr~1^Zoo5qu+P*Hu)A?DD`d5%7|+$ofb}eAdXn5xwCj` zjyuIB2yI*~bhvj7=VH!S2%IxmZ*{md`2{CfKYuxS#_c0+$-Zw(v1h~82(q_gEx%ir zGFS#bg&1AW*Gy_ZmiNR!^CcF?vLW|t%IRwx1YvAZARu9>2B9)BJ1fRN8N;DIq_wgYZD$Zi-F}7y@dhg`X^$i z{b+`mYeVUN+chGl_asN*G-y((%~xzY!TVp}6onM7i3@qN*ZR1*7aidey8V}uF3!WQ zRHsBwQ4grlG=}|WKUdXJO_nw$L)L!-sA6fwp{T?4hK%`6tMu%D9)}=w3#RkR8T2s& zn&E|N`0Z|TF?jm7WY&Xo`8;dS(ByG%&qSX@vxMtoM=+;v{z_Uoi%q(bB;O9~`@6e8 z5P1g62tsDV`?6F~ilV#_jrgRQbT_zecb-IMw|FqKXfD}vg?GR2ww^in{`Ksmg&R7~{J5v(c z8!}?EGbV@Oo9HPG)9MFp8<20M+?_3fqZq7ey@63m1-9aR4PxkzV zaPdd_Eii%nPsYXHQRik{3J*`P$T}}l4;ojNe%)JLc?S4dd@|h zTFfEJKX)LSZ;@nsUR_(1no3L!6-=z@p^1!4561^Ch{G!&*sXNhY4w43f^qxOU}zxx zgPcPClFNnL^0#IeQEfjk-l&}FeE<3L!69+AcemKsgLtM_`Jzi*UiZP|;PJOHY_U4C zIyz?y>*R;@8}tw~?lJI@8(k8OYDkEiz4S7pvh#u^=QVI9Q$;m)Vc{ZSP%x1**FGZI zL~HyxZQ?11Q$A%0kndd+i*3|WiqoA0gNok@hzVF53tWEVR|i5Fy|DmY)pVbe09`0v zcqw-AHXno9f*3=i0ma#xh|74ErZQ7sB3@2@Q__Wi=>Vm`(2g%Y5appK#J5m@H4<%( z@+eCJS17+SqM|_d4xJ7)?X(C2UW8XF(exXFFc!hI2t;j+D%A1Hq;Jwt`a7p70DD{w z3C--47F!cVV3m6Ys`q^6&m~+p(1Lq0SU>rANi-#=ZKNlC)`u<(7DLJKAk=Gh!=``d zl19ixU=$q`C%rvsyyL~f21g@U6$nis$pUGSn=v)gZ}fwUwrR8;MJ8?B_Tic(5=sJ?MjHXm9>OXlQV3^&i5%&3>z?@Q z)v?6#@;RJl>Y>|c9tS>#g1KE`Hv)(_I0LN6-RpozA1Gut3cmZb&e!_HK6y+BCdLz! zNlWj}E>$>fW-BkPZab9Ddk_argcVUxoH7PvC>Uc?1_pVS@C=u$+jqdK&bualKmB#N0&mq=^7AtCh@1s_n?*D`WXWjSFpOc!~ zAUP>eDq?n|dnGfX>0|I9`}ni;o|ytE*$lDo;n$x zh+h<9)RiM(-&AEWR5q&>(i@|gjh6Z!4@#GvhK@3W@WbIAm0q3+C~jFMVpv1@w8y1w zeD?BS1<6{mb6%(6VS*TJ=D5|eS(+WhIS7kOEkkrAc(KuN(1(qKo{?FVSrONqJlF)e z%woJM%}70atdHn8PVO&>$mUn~-=FTnD)!o2Pgm0CgU3NGydF`e`NLha2P9vR&v|r+EG3U zp6=lc+$S-{N!ErWTFC0SD4CXSB$e5uC&Wz>Ee>04%MsBCrO>o`lZn#GS5!sh5e(7A z)IO}AXOx#0bF|Q5UUIiI66|QAPa`cwe`#(ayQs=yxlgES$Ez8`s%c~gyOzh9 z?KEdsaC|eFOF4{Ebx*bEd9h1Nqo@>LPO|Cey=nJR3r(B*Ddu3EJ=U9)csNpft4ME8 zh1J*=j>+*CP=Xexv@NqS9MzN~X6;E{I_=cg9KMGGu96;Ok?p}0C!Pi8GkH1ui5Ae+ z6Htq!ayswuAm_5+FOoe8mY(+~c7nIk1(+k+sUb_Mmk zncft1*0UlN*)>8!0fly95hFa3Uq%Y4!E5%He7aBGhilx(z9hg~*^t$oqhpGhQ)Wq9EWBu@$WRwCk851)J zPLLOT+VQf^{>e^eB?6opx-V$VzEV5Y*9Ldubaw;GmAWvR*>MEBXNK*DhusSpvxVVi4)pZp1aZ6i6$1y~%7wZ|&JMFQJ$$d6inn4&`q5iB8jVFbQ zyO~ERN2a8&&wBxFISExZt%;e@EEHx?fi4#cH6XEg+icJQF(d$6xOzPRdFRUGg=WYH zZ3--+C9YMu$R}Sbsp6oy_C^75Ztmtgnifb(<==Ew@J7<776hO5bq^WRY6LBw5*~P5 ztkCp;m-2JG?4D>SRC8djIPiSgz=?NWdA0VO7Zal%t~L0)5t?Ps6JsM_VN_AqA2n;z zkb6!uSRw4HpK}cwuEs`#|9OtL7(whaFj8NT!=_C}0YYnF{Se{3_V$+5?=!q?uRYy0 z-9hIp{hQvSWlqB+mQUPSiZ)PF$dH+o7E+GKR6xKX^V5$QOCy_#&0!;dLZMU;lomlV zgl0lPQi4Al@jr^N6!vM7qAQl=uOOC4!d^~UnlWaoDt|E}GATt#2uz?cv$$ zWg>%=8C}w)UIw;694QI4G|TM)rL49!wm;Q!C)-DHi7c7M5PI%baEB>mgdI{!=h7;k z9M)50Lq}knxo??!XWHIqVULZFmn8GvTrU)_2-M-kWJs_xfu8e&`5 zI$v!v!6sIf?w*dmclGlX7u%bcyiFcO;dRxgxAQa-ZJ+d9wvZQc5vtdSv_51lq7^S# zA;Y1o?7f^Hvx8z6%#f{U2qxlRf3?iES>rlMYtSO$Ww9#XkDQi+E6V)3m~|!YWsEdM z7lA^I(bT1>{Zz=Lb7A5&G*#iLGnI+gC97Y}PW z^8AXtiVtx8amoiUx$S`<%?`}cW@TdkO}Z8nI}m06nXdKy^#3GXi|N1cqv?QG{>b=v z4Aj3Teh2|M$KQFzT0qXQ;U7%lKUl(lf#*Lnvc7BRd;ZI#Z65v`;QVJU$)Dg{!9`kr z8>J=Q_ap0?Y6&QFePe1l{d<&lWWk9Ee7|Q<&z_c3_UXIt3(_L&h?SHL!#70VQI&JVB=PX zUFFEwtWO9rQC5wQNfo&I)=&t@Cf2G2r8gfxe~cEpv3edQvl2$CN<`+6D9tCl**?cm zN;!2+NO?a!1FCgSgsVO_yJi)o0!eobCbj77NwfmHK0}EHm3VRZj-~~}P!+VubeRk( z@`Z%dB&174cwSeLXNctj&k&@j2@wNCw22}`sEJq*`U4HMX%P4a2)E#Epm-KgcoBrg z!Wa0tPgM3NY3A{b%( z%C}3Kcxhvsh(YOsFQJ->noX&7m2YjN@?HwpuDV9MkZ&(*A{HWhhV#Q9(-TS8AwDc& zj`sz9d$?d?8FzHfU#5IWtf{DC@AH!x(|gxkp)4ETrEV;uM&d;iRJ%~fjdsBJz7ltH z`kZ`LrcVDxY8T%KMbf%s;>F?R*LDP{C`f{&{7O7nZ)%zCYo;nf{qfX>=^cJ^9h=>5U8}Oynm_3EBD9Gff7w+Qh2N`q^ zxAC$sru~6w>-=@|j14s*CBy+frXt(i6Xuhq;uNx=p@kD-ze1Uk z?@O8oPSa%5gl7J!o}Bmd`GkmEnj7yve3p67h?ES)mz?8KGG=C11z4DCP^YUwJqg%i zX5sK-<6gCBlsK8Z6$Fa-9e43__Sc$Z&m}z3&4J&Ob=|7g?HA|yx#0b-ucE_7{ryc6+1ZK%m49FyR7VfmZFZn@_afJWDFto0>bo6b` zt|cHCon7TQo*=8((<=3_o7wtK8FM5hrYKA3*tDq3@XpwfR5VITr{<0xMTH|k;fDn~ z>&7yOe&M{iGJ>7`wjt=w>UYJlaNd(MAN*5ojqkd0TW8nbqR)H~g75RB-M(6cHX_i2 zcl@HoTZXMaSS?|;)qrZ|utYxMZmoWO7*V71)|)V&Dw}jNYUIS6C}eb~D>8pM3GI-f zysfqUmLS7qbwoJmm>tSU@;*3`Pa*P+P@J#`?bRLVO7DGQGN-N1YK0{793QtW!4xfj za3?Ezj;)v&vRQ-;45L?^jqNG_F0sP1DAugfv?hjD4VDf$ggLYK^&jGvE9Gc3W8S)4 zx!G!{wKvJPJ5%c3%$1FRqWQhCO^?!doZIn*gO{(0dDVIou{NQ6QN6wBH@w>p)1cQ{2y(=E`^+~2tWm-icXEgslin_!AF)7VGiCu1G|kxF_6pV-;#9w=po+uVssTldZpeCEy`B;sZZm(8iO?LJwIK; zKo_nr-h3wApD`{Ct{k`0U{U5(%-w^8$I!)W7Tckd1Yn>Ec+4DsKy1QEZ+&lviM(j@>^f`GBhUR*8BpL|mn}I0jxVO~h8Mg}9r3L}|Oua%dxBcD2fpJu%b%wWxaY9?n2Jg2-TH%6t8 zeQ>j1@(YV$UW;~NGWVihDz`WO9o)vU1;X!O(61FrdLU@|1p=5Kp@4sz`6?%)%&YPp z2k|M>Gq^EoJzBa1?hNv|d^3vI5iC|C+nb%KTr!(60p8|CG!1FH*N2 zCvJUD+EN07slTUT{o53*zkn>}haGbMTmyYL|8Hl{|JZEhQSMc$vV)qu!Bq45rv)m? z8nxKarKeQqnBs^!yyaQsl;m+3K^{!G-eS=#Hu@f!VhDbOvO)-+y!5)cRBS$h&GzN#NGjN z2?u9wdmT&P)gMme(7S%_mpBljTSI4(NHTz|kGwv_ z#wecne!x&4XT9gOP8Z#NqTYb5KJ|JVwLup>Rsu>0S7qdz-T`eEdJB3abfvCPD{WN9 zowU z7f~3x?3Oj!5OWDAs!WWUNC(fdJn*^loku6N@)3Eoi{0Z&$7j4S@)J8_a~Z5$wmG?X zBS;yM`CdgwAxqe*1N=!2!6YnIL{un!K+%e2v8Uep7#Dc$OP8z;n6f$YTWUDN?VMp8 zvgx8@qQv(w_4iT=rZ$6OX29LSQ0{h!Qr82HHY157;&fZQa9!0o_-m3nqy`yYC8}}a zpKrs0Va&#Rmh9z=g1wl5#Z;CW?%z`(V4v@x?t0dz$pUa5d3UhmbdmOC+C)o`1?HSFvY{V4=wZZ|}o0VMRX>0}4`a5wpKH=dr zoY8Q>Eye$jxpxeXEKu9EC${a0ZF^#8l8J3&V%tt8wr$(CZQJNL`Lg%B`JP?pd~enH zasKt{uIj4puIjbcbKlQ(y&mihPJ;oLyzGH4s!K~%VgxVFX#fU{Oi3|3YuK|05TM)} zR+~`?Da{{mOS+M`yCul!k8LR6Ct9;5X3QI~E0}0$od^PKPV*QO27~-7oFOfi#c`96 zqpA&`9u;hHWvfK!q^dtw^m8|QRX9x1vZU7Lw{zb!+G2lPWm*ov8U)L{D#;t~NCiM47BkQpj7HkNR!(eM-mAQPY=b}a!s~;fZ+>zN(rBJF`Hpr{pId)ay;FEfp7wMuA2)O@bu zK+-fiv(B0Jb9nBQ=wu4;2GPw>v+G;=`)=1^)n8WuD9EnBB`uICz~WiPP$KI!?An7&Ve3Ci7=8jY5nITbCj7z?LM5&!RRv+5T%de{pR341GUZ9 zu`M0+hY{YoOtG|f@f)c~qNR+x9dE1Ro;&Zhqz4Vgh7E#_wa;j3e%hLP!l`kR|Cy3)u#gfv3bq4HI*F z0Pd_xlbYn~=}?L-Tn5N#8%P`S#eT-Y6K!;-aCZ$!KtL>#A?^(!pQ+Tj4Le?=X)qt~ z36aWELUrrnYy0a%{MB&*pKRCKz3W&5JvDu|RdaFark96SCrSj!>r+b_p3oax@u7eH z7J#%$FUWsjC;kGitJV`g9A`SSw3{yCdt5cYtupeFH~fKFXlnOnu!YxHkf853;OLe4!~3eMkm)ZfGrg}#e$o*@CkuADAkZXlV4ai%6AtN zAI*67C}sEAM5K_XL)ifF>qj5_0eYl{;n`f?SzRzvfc{BgZIfjjM#FeduH^}IcP!3x{s7bcHND%dKdogqCszvC{vnr*~W53g@6O>6}V z{Zg&1fH&dkR12;VGqQmY(!snOwuP+R|9Qm;5g6~Bv`sYD4MX$HwpEKns1r=zfXr=tL<1%JY8G`ZMR=Nd@qS$1!8+OUX0 zb&Auw6<1|%&6&UbSy#(ZymMQ-2HN;nT^-0Thtl$9@UKlyExk2VCc*dnZ!+1VO8eSRQ^rVY5J5i?>Q8y z3<=dXM}41=OO|%lvhIT#Aov_F+&#ADsXHpP7aTfJ9y%Q=Asj}WKNS}HeeMF57Ojz$ z=?VAIiw@&86z1v6ewI}3+wUo8uI}+mQr%`o7DVau2NTe6%pZ>L?Q9}y3Hp+gK7;*( zh;sHG$UVlnE$yj4M{b>V1PP1%DAGRc>jNxtY< z0&r(No!uv`OC!_KB`)1{yB6eYpGUDi0_UIFZj~Nd%bWQ;r`~)lc+DQew6XDdEo*eu z#+q_eZ89J;RccXuu?`VzaWluc%BB|^7Z-EQT1^tP7S817V>q-&){qzGDo!H0a^339 zzhq_7Ev?)0`SsU+a-<^5M&)GKb(*PEB#HTCH91%4%t0ww#r;T(L7S)6e1dDL{3YZC zO6P1pCXqfoTi>6;y&{Um(|Z=ZpG;bLcriBH830^4;gFNJ_o2)*PmwR?)3~ohG?S9x zlMr7hBaW)5AsCVNQ#xCPph1`G4{gU3JmUb82mff6YSA_FxIoCMh~| zXf5~ zjNMcn%@~|E-PhD-7zOGn=qC^?Aby?MAifj*4!PE-oS%D2heCAxa8@ z^glbcP(<4G70*W;A|w*oYzz=aD~~oTHcf5#F`-lgwC~L3y8J=>eYBy;C<72psxKVw zvhXJheLZ%kWvv$2>eCC?;zx4$wByhO6C84LjPM>L{b~-5h%jgxRo>7O4m^QwW}GJ+ zzaAtR!`jtE-El4W${2`WW9RptOj)`vY)eOf#nxxbNW{3MYxeHRmkdG8(G)27>1sJg z+lLO2YWdsc-cugvDuAxMr2ncM5`M6pJ|P9{)rHN~@x`)1&i4O$zw=qXM5*Px5~A7o z&MLqei%UYorka`LvM0UdoEN+-eWa;7hhR8pOj1aG6eI|3IvZPW1~c5M7tn)j=7f3C z?4*#{BBc9hnZC}-K+Q#50&96o;aw}Gvz?EFs7-t&e<2i2l}@@s!s#YHo7>C$wl|Q4$c>?&z$B)Hp{1l?Wl15y zg&IemY)4h>W1q$DI;G%NTMv}k;y+wOh~fG`f5qMuPvz`)N}eO5Hy*^a`qf9STogcx zw-jd1q8OmQZg(iP<3rU2V*j+3ZO_|J^+}-a6g?Jdw8 zqOfryyo1`xXpw2C-ECokdO`8DPELAdj)+idog%Q4j5DT=V>!3m=Ix}rqBVhg1|hmZ z6W!ew^?65Axeb~$s7eu}^X2DC@2GWFi$gn&$4A$~=BGXI>sr*|sr`5yPyNzW?H9jG zL-UvBFU8fy6HAXg#Z{g40Y(n}TjH;3pGACEFwGObKn(5H8ZebWNk zEN}zLTW|8m(<4&OCva3rwTHII39^7L1;{~wl@#%9h+EyXSA7ER-(#*u=BaiB7KXCA zm)2#1rGzDD4mrCzp8GfZ7KS8c6UcElQb}3ZJ>kK(@%;>Rbx+x_TX?H90r8wKZ2a*;P|hSt)-U3QHQB`$|B@Lu%_nDSN~{j?ym@CTS%qUJni! zfXdIas3?GrMgjX%vTXj2a_Sb{8+ZAMMm9~S0(&@u0lNIP3=0ggYdH7QG)0Gs+aB7V z)F_h({YDHyQGQER;PsMPrh(K%_LrB5pjWyw2qjc+V~57B7KP;cT_ZaA$=tnJ_ke(4 zL~Q-EIhsyt+0ubA%rn>n?W2!7Y}}Tkk9_{;`6JwaWDjsJb9CRiOWSp1*x zH!8?y23GejC~E=u;PN)A>k*;)+HN=EJl=P37|+JC)Ou44#6ZiGrz71Bjh!+(FVapprQ78g0hbzPHtG z>nbPOREI#j!))&Z*Ec)dVeE$^oH+j88PJFoY1roH@VMmg7~oA?6*TNO9`S?1DO=pU zMh-bRq!6M!>zT{!lxGXO;5PLJS1u#gJ-^aF6z%Mz zos)2ZQEyV6UTr>0cGr2k>b6&M_|4!)(6mb_Z=d+gV(qlv2N?uHLM&+PaDcFSbe@%d ze@nTerq|J~M{I)sSB2Y91KpxgP>rBxRA1I?Yg5er1t6m4y5-f{xd?l=wSJc-_MZl# zf58F&Wi-da`A-xe>))rO|EFMBSW;bGS?M1GQI`J~2BQCA5-Rk!XxQcp9p&)vlgj_r z*8D1;{)2h_KL~{X!fXB_Xa6G*Hj4iWgq3iIt`6wjtD5y=ox_l>v?Ngn6!8ODQhHZq zvq@TS5PvyiC=;sUk5MD;6N0uuki^W(>8U|tX#Vi+0OcC)H>8wFMKB+)Pr=Q&Sy6py z8@C<2RA-Z!xT>;0bnfihdbFRsiU}Bx{4QZPxmw)c-@N=y$w639ce-*LajCG1BO29n zz5iJe*ZTU;fvid^!b|g!ThAdUA|sBrw{N!iuMR$mgaj=^$+bVJMfWPS%HTnPfmD5w z;0o%sbP8A%V^XY`j;^yrLM0=TvdsHvv6XOdJ@axH;X{ZfoL1R=R{*Lo8x^Lr;cMBCFH ziBfqeaS8~0yuO8qD31Gp%CR?- zoDUJwIz?f}y*=R2ft$|3Gcn@ik~7D%a&4ciooPJ_p$e{l0!ka|D?W(0hz3XvnVHRP zLwKF^-?qQ$ck<%->kN981~{r)y_$kwJ36?Ss3x7{b+7 zR@fME%T;vPE!w}0_WMbXzTBf382I~kxLB~dq}+q7)J3H^A30U+cZt#Ky6$$UO}a9I zQk-f)6aIw-#P5Hp&1*pFam_@$767}TnzAYc^;IX9=M9Swo*hGW=CFg z-JGqdDb0uYtcIDn7NrT0WR%Ls9=!MG_(uHxL{w!iJTq~?G;*U!D+OxoqP>g0o|Zis3p zm`~XKsusF8(DU`d@DU|%dLn;*U1B289^kFNnsUXUNBF6EIOurekk80$0P-?9C^4<` z99Zn+(+MM}IZZj;#2~h6GIxp+NCkQ@`LU(jrs~FNKgU7hq!=1n@Mq@k*g4Oe2 ziuAX+W|OY?R(SmuO`g$b_2!iS9*GmD@tEsmK}vP7w3OpNJg95u=Sf>n!7G1-*#adz z)$vnv!9U-GkK-Gz%Y#;M%cNH=#QJw^56UOl%DV4DRb*Booyim->vL)SFcY-A#c5ch z^=FPLDXj+x`g&{uS}o7p`6q1Jn*;Bw4-;4pfhoT-26z$!2ijX)b&rk!or_2sB3BSH z>GsFC=ECD}`E1x0jrnKK>GimEJ(|Xp_4CPP7V>DNdSN*7Yg>_oK>L1}3!=Rt;UnCL z!<%S{Av?FSQvM~w5t5@E9*-m7&a}9FcGBi%?(d%S(QY%I^Q0Ry$k=iE=;8Fb!u%Mh zkC1(_n?yE6+aD|ws}mF%Fgn4M@zKfA^%b{#ESnrIKCZJ-uCy2BpCFNvH;|m9ygZ*R zurA#sF3~MsV826rTjEs_PReDqHRR9~e$}*@qfz}K(`|Wsnn~lYKL20}Oh zGuvHHD%Zd!k6(6ssUV(&wGahi=V02@6&bJCVJ%g%rRqo=64g|d4;?QaZd28ia8JC* zidv3NOV!gz!~JcR3x@FB#16l9nr$Fr(>M@VJWXFYqc`sR2k6;p=JtO&9sT8>^e?9) zMrNjeI5z*vr~X@f^uNvL#KqKTZN{ozb|Chni>|Z3>e-G!r5c~gZ zD*cCZ(SIZ?3jB?r_^&bCKlzD&VJH6k^1(l&xWBiYT>cXSk@>&1o&I|KKhNR*inRZi zvs;ELl%k?ZqL-1{6>ns&oGX#J_D&ECCi{G{2tUx6#@BvWYCgUrQyzy zVWN=Qg!5}*5o8SgrVBQ*%;W*n*}(#XS+d6&eyk(<5DDW}lf&_JF1e!Zs+X(xOXsfF zjb9A!uZ+-vz4|AyGM6wL7Z{ZefMmXjx1^a{@j#x^thXRLJPEvot?7(xb4*fHRlPdW z7uX&wA*=$ynq|kk-s$S)kPBY{`9>5B+1d;~=S$o*Fm4c{#@E-t<;weTXz1k=3=y64 zYAuY@F5d4{0GY3sot9fuwvf!u&=Ii^kx;5Du2utcxchSRfQc+sf~;XeAoVZ@Hys5+ z_&poUlvI6=PFd-Yd@V4 zx*%J-jtV3yM^HX9TlDwKY}zU zvkUHLn3RMAFyCN!1#>#1vLLSqWRZeP=p3hDCL!yZ?sWP<4;76uJqmXM2yfyGW@-)) z{tsGSwiM#6ksjdM{9htYbu9N!5bAChcauNj790&{Fw`s@Ey_q-Y|(d^IV4S*bH;vbp&(uBb;B;q7*TB6TJlKARITT;nZIQ0>qvk5M(dbz0+>Y$WTYQ0x+wPn}VGpC@N`g zC{x`_33j|ekR1A__DsR;tX4cVghB+;-Okq=Ratw8e+-dTe2alnsx)@iXRs$~=41vg zSegqvNCgJg^mr`~@q8JCX#oigBlQZg31pgUW=5Tak?6?+gX$e(USv?z2!b7=UOh-j zTVpkU6q=TPuO@%=i0iW(lVQmOeJ%izYjfJSm;c%u*$Hz%Q}gAAB1s63-e}Eaicf=AnhT6@c!L-J5uGi`(Vq#ydQ~aNVoF2?mp95s>QKqfwxqR_39W}SVvFju@wj(cME!~%}KX?M+M78i!Co!u11$}8Lpa5X28 zS^@SmJOnAYkjUwcBC+B(9kLoAN^2H0{$)rFvlt^yLfi$6Q`thyd&G%H>D?D|!F5tx zt5~#*54idUOm)OW^g~CB2RzumDKHLWhW#UUmdu~@Kp(5YWg%!-7qzy}eDuxLB6$-a zAh>%~ab>_($j)CD>cZ0W_)tsN)jhn{inJNnQmd3zjR5&#c=ER&sx!>lOu%bAC0D-CfYl{t-irB!2zKa4F)OWR@wjVWh@-wt9h&L9^)^W3!~n&Hmv>%A?2cmP-t(wV zET(XMjAd@2+)zv||MGl94qYqqSCTbnU(sG{dM5$DD??je<2;b?y6p89AJ{`uCkV0< zFZtxWCqK~{D-k_~bz1Sftju78CFj(XClqy61Lf&)pe=Mx3UlTzuNp)!9pX63swqS8!Tw9jV_I>ZMl3betV!wtkkYQ9gx3ojxV zx853QrlB&7;cx+}gHM}-DnFm+-CNwNnEDRz`i7&i)5h$)T6K^)d5U2dO)FQm0p&IR z<{CRWR{fwMTq_9_(rU(tUUwD0YJ&Uje$8nZc7?^C6qTl^WGGE4y^*T+RH_}Ec=f)v zEi^YTmjUF_g`;WU3l9AbiNN zxh%px7eN8Nu*A5O=p_yW60D72S;0ix)HgCe+77iE1kh5g$o=QG#`bic({;r5saY>s zx#Ti!*^V}u zR4lm91av{FhJtXW8+rU*;BpBcU&!{1@Odk-_x9tzo090FAHl9MIsP3x%MO%QoPVDJ zTfRSULg{&Dc0<*q0@h<~6J)gQYTz#Autdm3?=|^am>sZMi23g8(m}RDP2xsg9u$x^ zBGa1QfA&)E$-b!af}U(3lk%iV4TiqQCV@|(mU7_86m?_Sq&(ypeThibM;C1JEepX_ zjOF+}r5~(-V-~6zeKP>gn1FjSN8>Yln@E(s`BaK>>c=uX0kIKYOzED-XOb!L#)Y#e;v?LCj2FFYAhWb%EAo8ZWl z9!NhWh}1HnEI>Ot(kz-gF9?to37axQf{>J4r;J7~b}Zy=nham~d7aL|7>Mwep8lmW zi0XWnI1_6cfTu^Wmas8AR&Q39OLc{6 z736FzgBES4PYR!P#wzenC00Sv`m%a;3J7#|HV90}6s9KXL?$c8es-)tueN9HJ`~-l zS1dFcak~cn7*u&NsVMWMW}86M+VTc?aAPS^;YWdPlqGhfb$ll|eSoOnMbB!m8mnVd zK*U8~jaJjX(88@Sdo1S+2@}_TKkm8Aqa=yh-cBh;}4`7vZ3kpT`hXzb^x* zZm!|0i5iF&E-wO1ro<#7b^Yzz=% zcik@XT9pdDyauiebn)1i`Zip(C0D?Zx?X>Nt_-bghJR>0(>AY>sXMUGR<^mz3-s`b zACUhs|Ec(hyOX45DP09OET{zF#f5k);&mQDr3VO7;o&A7`}1ir#Ce2yJfgr42?DrR z0xlFX2LDvi@P!-1FV9yL!bGlc$>suI!Ns|0Vb#`~sM1&d0Z8L`@|-oe2e4MD%TFws zWprL?EE&oKuhf3fylrW2>PAs@^u|#P3*xDJQ;bHP)nV4ovBZu?}x(1=Jy9qBPXve`P+6-1J_>s(Fe7LpqV&nb#ZWz)3yD)|Bxr0azs z#)YIDSg2Jac&-)nAY4ARc>2%}dT(Z3LYB&ze&EO7*0MQ5zi^s0&V6Xb3o;=OKOq4)SO703zYG*Yn<0u|;M3pdpCA!FSJA^3Bi`yt zj0=Ceb_OZmi%9_88j*g$JvfBzR=2<1N6>c%jrv>X@nI3^#xc(S*zn@pKea=^mjvF( z5wb6Y%?d~_70~Z{sBG1R45A%GIL=AKy%1El-?(lwpbDonU0POXf}T?5Zrzv4Z(Z-@$-j6C_;f0JZ|x{6 zG+b);?pXM!g0O2L;Kd%2S!*8Q)?9xuH8=mp;kTBktD^0BmF2AaoYS)@<+Zz4qz0{V@VX*JxGKDmt z(WWGHd;e+O^>^W|!B5LyDV;lsdw{ra1)1k8aj&mgk)vp=cF6sCd+m2)(8rF?*Py15 zp`;8R>0x105UnGZJak`FCf`lk)b-iOywA5l@n=x1MnSw*>JJ)QE~(zcFmKY*F3U~{ zi(2Mb=5#^e;PHd)tH`a;6jK5Q+bsI=)xbi5*V-#@-%^*?RJND!7PrjzZldj>(#`&= zX(*1#dpP(4vuKwx5dLn)`R}98$F$!cFNv8L`*}&1rTTX?MeL#~gSdp+x^hctgp%s` zMG%7Pv71aENpx(09}RZYpd1XOfV>DyAI-(7wf1(UZX<%3nfqTFVOAUfya6d!UqoJ; zASgq*e2Y~vuiO!rTMn7dzDIGx^}zt8x~s{-oa3i+oO(p}zCHpO6C2Z(77C(DK`9Mf zql!xb6%QKRy>BbKhY`d>1u%uw^L&f>@uFa4LqR>VUhZb5p6@ret#n<{VsY|@mo%|t zrJq1o#vOCbs&+3Vx8M^(cl-9ZzKu>zBiqN0+xtbX0(|Iff~ubSJ)2a`aDhDP+A&-N zeQ!`s)!misnfjb~wD#es^@53#ElOS_hDSa~=B~SAqChDzjaN3~dY*}NX^;B;;^1g= z=Ra?6;c@I|uYj~#Z+O|*+?=H?>v>PQVj@79{y0A)&cfu^^M_mErc3a~5gKVxcPg$8 zKN{0-<%Mo-*+4{ZuGnl@?y-&i>D?@!&XeogdLql)8LQJsatZqV2!*6U6o6AWirzf@ z$xx9{ExdPI+3_>yOp8GN-DdKY&l*Nlj*1zd#d^pcp_+ino@5I84+kxhxj{koBpxkN zk%2BVzsaa}lZ5rr<>rNHB`uRlgqj0?!f&dBv}Dp`H*0Xa4u|--eDtthLl2oyerM`H z&xDGcpNUHqbC-V z{diU86rApxRo^eGU2a{pqM)8$t)vd=;Z=8Q#f92Kb>6(Dd+5?sU4cw;CQh6t2~X6T z3zA7UvC>%(SC;yH-;Yq?u%}}fO%ZF+{K0X+)fmRTzYTT_HU6iu-QNw4ENl#%f3Ma5 ztNZSMJGN60RThwu`df_|MwBx-?b!v7n1yA1ov;r z67%1d1po1E{7a_xKS-ATD(?K(*_*PNBB}~{Wo}NXuweve0dfQMdiOT)sqjQUSd!BK z99n$w1|+aNdYTB5NH_RN=LS`HcJVotE{<@{1~Mr!<4nJiCRwNy51j$}d<1^S0M8*u z)sMJz!$#&K$Is$RjVtRf49FN$6g=!$Z6(kunAgW#2l-Pp!Yy9u$8OHOZbBx!B>e<% zZ9$QNrc~?PPG-h!YxW6T`a1;fYRXJ?xs&L1K=Lw)kfDokd)ra~&}C)9T0Z|?ogyZkmkOD}4b)+wc}5c@0~bPQv0SQsm3ZbC9DHbTDf z?Mng_Y~v7P%)%qsu4AKnGeOJc@`F0Koz>=$k()n2PUKX z$(`f4qGL}CaacNJJ}JE}On1p85g5!_DTA;k=nUS5o5q98Fexe0^r_5y;skuguhnYs zym+y=Uem&6*4(m=JcsiWl#3@kO_A>7N7us3bnBN4*?l5Mos-~4eR!VSY%RwLQI+&~ zqXdk)DybLe_UDymNY&Ew_M%oySdxW4+m$SNX&D-Z*w8S#oQUwb6@Hhl7Ine0oPBMb z^W%&y{&=3}qxJgAYO_p|ESGFnksu(+Opy}(voE=Aj9Ai)YC+{PG2aAZqt*;~VG^n_ zDg{#{s_&o>!5N>^?z+u4W_`5U44|B&o;cuUZC@)UF-e&fnvo@|8=`Ts(bZIzw9) z``KneD|$P;Zpz_ZO=4Xx4EWgeaaO8jGLN@rrOAeo6*Y-O_QmV>QxunCOJg^b9AvFg z2=e*C^9=&us#cf%i#uyL%91ugR+pZTPzw+1_tCu?7Q1>e^c#uKoo_~#2zWmUxwjx1r}ERvxb$G+X|!k!tdNlKUoGI;+7e#45a)8-ar zu%XBf)Y{9B`6*aQsG`LTpslx9H~NZ%GMu#9v26G#l9j|+OBN7?6gVeXnn5#CO#LEj zUR0@$LedJOW>xSslzdwhU|DI($Gn0~aqu`VLr$n3Aj5B|a^XC!xp$SYW3BZp;;dsQ zhToAuJoT_)*YIrUa-9auen8=)Y4dKne7wY2k#BS9l6#`1n95A~10#-htJBS;j{tK6 z>Pk$1F{a5-hY=wEmJh#c4htX-s{5{6h0#>QFX?|3hhOKfT%}5uwHvxMg=szUPAmLn zc5%5D?wb_)?lHrX=0N^V1}d|3A{kv3Zw9fF0*A$lDHm$6nwI<_mj}8-RYQYART{B` zL6G$D0JL^A-TT}=7wtfJ`O|Y6rz4tgOQ{*2DRx4Q66?0qOaaBeM1-FiU>Z-2SK*i_ zM9M1i%1_hK2Qy4eml$a7YQnSLn$uWdFQcJXloMQ70NaJa=qJ9Mg(;4EBEloV0VU?V z>sxc*oCgskm7k!ASkx17TK&_%%Gm2?+>7O1lOsm7X6LUNQ1+ejO1qIBPOy({DD4tr z40E_YcMf^JF9sku6lObH7nmjnRlIG{e&BiWUPN|Ivp1BF=VKX-9%U=>v=vfPIxwNV zweJ${k`~a>^MOl0?+H^Gh{WID9F~T+)}Y-o66<1Hsf+9xG)Y+pT|zgtRm0(^s5?W4 z%mh_KO@wj#cCtF#?od;Forft}9Cf+T>=G6+Kl=wy7@qO2nwYTqKIRT0NLrjr-VI>`a~$ zwHC{zM@K0IccWO{0q0*AA3s;CmI6u0NF@mma$E)K&t}^hj&ITF4q4iA zXmUBh(rt9WtPX?=aBEtV^z1H-QTiew*;81z1+y<^@iwx&Yr5wPC+Tf*9Q?pVu=QnAq4kj>%YOz zN))SwD0fM@e#2%!KUCE-si`kIHti>` z##xu$!vQ;X(oe8@u;$$VVF-A$n!7VO=!4n`f0gq~J(mD4Gn5!nlE?*ROB$&Xks%%2 zI#d4?LCI6(;Mw(>fHL`@*K#ijeQXnkVqC6tFf-}PDSav0QNOX^#t;+n1RD=1VTdc` zb-UjyfgGS=E|*nHDGYAeZs_|oKb~oxqA2N}(@KhFdvo9HGyUNI0zChQ#yQ;wq2`NR zgB1j_T$A`(J!>*U_&roJa4w@BBKv?1t8&yFT*VEy$!H%$)32`J)3=S%ffQ2cGD(5yVtBe~K zW=eqmIag(ThP)vQ=y`5jxm9U*T}^}opnovHAY7klMwncNuheP}%4V);*C!ffdg7 zUr;76B9a^|gP*PlF=-*^R|B-9md+&?WkeN+CZQ(JrztYt~v~Jua-sHLrdZ65(;Pdb5 zhI3@-NN{I&d2zlB;n|($^EXn=Ut_~_$Zy)|OtdEAT5Yk3BR@X539v!4jjEXFmmgYE z&HBO1`!id#Ll$l*=Mj&qAF~!CxGcA@2hmZ~!9~)8ms~{Bnux!dFTCt*n?Qu!9n(`1lLHyz96Mmz^Wj|F8NNO7AsXSHH@0y8yJMllS=v$iKDRtulID}AI%R4_1@)?6 z&r|buGFyu%FdEL5TNDRDxRagyCEJ5V7}Rv7`1dbzxOs4?VtUtRU{a$uE3GrdZys1) z5!}}-IwQli(q4H8o>s?N1DCZHfr~Yp$N$qE%U>?6{}P2Waj^eWf&2d# zh@z$-q%QeagSFznDkO5A;*7T z0cK_T*A?KuyAG*X)r0C4RIwvNHRxN0vtR_J8w0ntW-k|Kh&> zcMar!E-|S5FA7NgFB{~4*(m?b|LACK`1KhKe%l%ShaKeqvkLMr?`8B4tK^sS@=Kff zZ#QO4|E(19*W>?rDdKM{&i|@qwW zCO+yeY7>VdGMB>qVYQ&Pn0Iq5Ht*3|tI0zrdrN~_-*2%h9{jf|8cwaPqvmnp69d6i z!1Y8Z5|G57HczzhqIMp~wfc)W=XNHqK~V2#6`iy%&-UV39px#SInRMuhe$=Xu^dXV z)x4WI>489pWQVX-wz7PcJab9tyr?-VYj~=#RqkfKu|gar!Dw8nkW3Dng7&m#oll{~`tACqoF}DZ46vwMYgDk>D~LN*M$Cq1SdCu zb8^7J!b+i$h_+LiYj4gBtJc6Qm#DEvkof4jESY!n~xnTL7a_AkWwHY=>4H zE|ueF7QZ0=NIIqu41aOr@j_=uO}|^dnO8mU16d#{2@GJGMatKOY)0k1liw8)!yrEdNk@Haen9E=% zLq?dxQNyt65Jdub@-Cxtbe@a?^C!Ml(Eb7*mASVbRx<2oXhb1Po|c`v zPBSK9|4od9F|FyR#}e^TJIQV7+Of+(FgY7_&<#7W5`~wI};1Rn8Z5t2+ZKw3J8O38BJYUDhV&j3pZa z@6Ira8Qv@;E_zi_yuLw)wn&JHA!eJia;T6EmlHLd8g5Ng( zdKp*sd;RGN;8>Ny+@3^pP(kJ)eOw5|nHGe?jg5@M`Eo&n0~?mNj@MfHCx96o^-Wel zsLA*W;hp%0$y{Z|gkD>jQ)ExB*<1}Z{)_2pRZ=y%+$31IO8bNDl5>;w zdh2G(Nbs2fGOD<2d**((ZdDl+o5Q>S2tR%i<)mvi*a`S!P-|v{Y&AxSuc*Sckx9EK z)rosB_$sZ$40h}9GMpqiVBZ7L9kwza3i@@3ar3rTzkNYxzC_;6ayoL=|G(x3P zGMnEs==g_VGI{yG2XroIrkC+5YAutEktHjhn_Wd{NT9N6l%_E$$0VjkcW&(o`IWGZ z*K@qn7*M2QS+L!ASVP{zMTrgtmh${-ViJ)c~d{a2DR9MhD-maG_sWF^g z6mJ7Zz$j_ERb%u=a1&Ff%M#aDLi%G{-G@Px>XP_5yf5Mq5+^bmXDtrS2z4@cu1Sgv zfXJCveFSo^xC*IG-pN(`>_$9e$#S^uO~~qX&A4 z>O)!Q1tD^m-h9M%edovwFnIQdBtk^2iwIcNAw?5YlHv!3x;i+Cu@k#v>nIU0AJ7_a zClA7#d=%5_&q)jE)zq2Yg1-}sbM#y;`GNx;=SPJ!DroAKMqL%@dLG-1PV9z*95gDj zO&Y!KmbEFzYUFp5Q$YINAYXh$hEO7waoowz`^c|t`}p=F7Z>q<3~{#OB-PsgFqg|z z%JdJmL8|PXxK%()(r9?xp{qAJ_NZSAB{=KGQO>uLT|*ZKH)Uf;BRORYCC+bI56uP+jYD{(Ne5WYZ;XAbB1s`~minri(PZh8n=>>y9_t5%q zd_#M8h@rHYE;9~BzR|ZzS8mFX ztxonDcs9^ZmFW8&1GGgc%{F-+Y|<4t%8TIa%0&0sJKIrf_-Mfo3U8PVh&FCyYB;Ay zXAbg<>Dd`wX7vY~__H?gu*I4!%@26E!}*$p(8%ujlQQ_@gdvRf4$;QNU}si>+LYe+ zb5I8aorh}v9LcsZRm%m2T58jiSB$D`Je8n62nqCOn|*({Cb5YOlT}l}l_(lM#D(&oRxx4VqH|E3knST#iD{@B+XLnPGnW#%*NYI&ZZ$*c2^eY zxeZOW___-cLFXrcjY%`0N@CP>OT>+g)C(jLz~mI=)atVYKyFx_Zevl%Ek;*Y?>!8Z zu-1VJVL1DEK1tiiG-@VsJ`Pk1s`N4!l!PkCZZhh*`050mD9UyXWn|n}cqhP~AY~c7 z>2|`{Q{@E4~Kn#xf4x>#-I|2I)3gE}KZYytwQesgzU*FpEEnKE|ao+6zO5DHxC!DQmC^ zFf+G!2ZWCfCkx7EXvVCs&f$zBORa@YC>b8ue}H?76Un>KTI*exO~n#WyDX7sq+}IY zW8tBqve~~D-cIFf%F0-~!ZIY;x!`izE~Dq@E;8R_-1>EQoIaT~0OL`( zhYCYj`GyL8^GA_t+pzE%iFYj2M%ZVr!6yOL;-Xp;K-l{Uj`@}aBKaY|U13_9h|qd2 z`fJW~*ks=)?Zp}C(Yu1cZ7yRI;d(vj$>Y;UjrYoO0`UCltq7}Ef3`AptZwStc-#sa z68!DjZ+P|kRNP>cP`8rR!=*XW;rN~-MDZr&Szu(w=Jbt~Vxssce!>=9gwueREY(1e zyaLm@*1YpaW6ox2jyM!TeGPo~QS{W2b$$ES-_M`m=eFb=3ybBo!F8oeO#xG9@J z$-LkJ3~sSUk7sN2C;DVV-J=Rebfy#Y(yVJ91Xaz_qqPR({8$ z0|)kl1KLP8ih~m7#Hx#BJIEdPUYScZNaNIUiY3p*OJ2;CVMJAUR8(|yRAo3j_StEK z=IJ@VcTJ_dQssq!$87XAH9d`@ z5=-j6DACkkl_<+hOSJy(3KRr?RvgAZcSso$)0xznBams}4GSbjW81_V(@ z&`&Ksimpv4b8(LRN%{Ynd#m8M)--F=k}O7xnVFfHnVDHBW@cH;%*@QPn9%}@*-)pe?CrswaNi;2jaoq4r0BKMc?de?f^@ALbKl`P_wZIX!nZuilf2=y-UJ8x|h z+}VosntNC~iNj`ARU8{XOnqz-rnV2@X>3W;W)0lYOA));D4{#wYBX(8Ii}%t4i~HL zy6-1g+7jb=6=cVgUrV}5CyQUMLquJ=1%(*D4h5b+G*rT{_3&Nqs~u zc!2q(cupw7$(+}M1eV%>7Gr0_b5#~l2Rm#Cx&9;w^8Og+j}dvKMa^sfHH0>KNy8k! zNwy--gWl7LdYt!)KU)$CmM*Mg?7=Qsiq?uB=Gfj(Vaop7bIOf3`#dsxd3)Prqq{+C z>tJdIM|}MP_w9+SkfF<>eX!Ql&Tq|2cv9FPLCPGJm0-)_C5z8a9q~-#( z=HY!X&sLLoU_3(gEn$cjUeEja^Fh69DB)BMM8iTQQ?GtK z=ux`IbyW43*%1EAT9T)To3uSy4Hx*Ru&!|Mz~rn~+lCP1&U>2UKz0>H2AlZKd+iI& zE6Q&bWHoX=sB^Kr+RCB1$E3FU+GvH@!7iK@Uxi}URRF{R<@O3__k?^2gy*I@=Wk}pA^r^aFiAj1gF$KB;jvyF zPk4_-I!Fc@g#<+cX4EJ0j`HQIC(xAhDDq(A8ygYoz3tgUR5!I_3+4rCIycW{I~Ecr z=Co0z9o-V*RV1fLpkx<;()t~)c$_m&xgavs^&3Eb2o^wbi6X{M&b-HV?hDiB zS+_$mzhpXZXn#mbYw>YQ767?H;f5b@T-0@oym?;m5zmUVl_@I(XO zy-5Lb_l6m1!UMyse543R{2-h22EJTeK9&=+=b1&+Sh-32>El)c(@!~kJkaRKICnrFnt7y->^3NE5QOQAHM0`uZy`s*ic`4 z_}5NNg~jqeXLrRpp24t-ivDKsO}D#JE#ZpiiyAO4kp|(k61AjmRR{|LC~EC>0%`5y zV*q?#@f~%NYbaNAM9lOyqF8yrqMEz zOiPq_W%2CN+{Iq#&%f3Qb6cyYr|tk$go;BFB;R-goLqJzRVX7b$ZPAj?IOjDn)># zMTjKZXIc(huKN}x|8}YM#)DI{(4#@m7(;vsceq~WA?7N&_9arZ#vBl)QgTyg@w*29 zBYqDe1ty?7djL?Zz7(3i#3&;|F()Wu>h1yQ;|IKkY~p?lqpujBXfLsAjT#e;Xf5b7 zTd&AO@RO%nRYOI!l(wRAz$Wk|6Ykhmh2th{pU6EG~&8`&i4NP25u0I!UQ0AJ0+0P<% z{lew2wCn52ZEjqe9zvXKv4P*a_~SbaOnXOZ9QbC8EMZX-0JP6SrQ`%2Mu>v2CQ!A3 zJ_mopCK419`7vaH8(cVe9o?zIcZ76_1-RfK0d>*Dl?MFa{y7#A4PIk^Ps4KQY~^en zflRcj;W-ZzRSQ<)wgbUA1vuEwEWCp2K=KC|2rf4SxMY?J-f`P)={$jFEg*13s9a=p zZ;_~KO^xK5{i68__#x#LhYzoSpO_WR+3==kulo`qCMJP5-8FH-fEjrEky-zjV!j;x zvO7<(+h>}_)W?P`RDW0t{b@JNRk*V=g52FJH)j6@K~HgrKynUlpqLc$pE(3)xqH)Y z-2SS9;#?Y)O#>$WTke@8Z?l7dfg82u%(t}ps~+Of1d}w8qo8H(=i!Trh*_zGlOCO# zifW39wVaZpwEd|s%+j~B&I}fG#(V3lN{OvU?)BK>#=C91$3JptbA5_C#a%q13{Bls{XkLjY6UeW0MW|bhMIay61$|& z;9*0Bm6)-TTP`ypVJS)RnVpekg^Dc3)T0()hK6|^QIfB23jQtXt(}J`+X*&(*K6pV zFt%Daemi>U&A~3l?>BjpVl;Sr=WSne#M!#H@Yp4>8Q)6;krR}QzqUL>{iv~!r9s0F zzY0O2OkbZ~Pi{o-iFd1}%Xm=XSx+O5H@@Kb$4KrMFSN|}Q|_+Q+kC$z1(H**^S)%| zG9bw`Om}BPuVikPjDEe>E=np9(AXo|S(1|zNhcJ{U(u8&#m^rtN6$87tj>=5@!U*Y za&e#(sy*}Gu0R~#=*y_+*hn~-Iyf`SY!s+uuA*c#)llcG7ih|{RU}Z*?KK7S6vsz&=|Xen zJ~v#z&=2jhuud*#a^wCHYv)S!9b%7dALquLW4)BC)aI3KRF|%>c8>&g)bu!^kpjcL z90oMxVu$AwwC@r;H`Dm0aR>zGR54Dj{H;hyrJ|~a#8%493)!7>b#4h^RxVKbUGQ^) zcdW=ttWw^$^S|+)jlWh>hI|@8F_($1zE9o1l&D@cDjwi9MO%inC3miH6*vG8hVOA`Y?e(Y#Q2+>>$t{IbMI^jUc4%8u0uyz@FD7nqIT?= zk#C@rHXAQ9&5k$ME_=o$K0hUS$0c~Xr2Qpo*rid`^;0c#O0Fw@h4FQ`D z?33pdzpBtE1_F%cXDV4*EgKwIZGOi#HB~E8lhYxY?j=c7P@RkxK8{^gdf)8@NS1B? ztgIT2F+(Yo6|aY)BuUyX9t{s_-d|V@!z@bLuIzXYm#mA$pYsxN4?5gjf5n-Ex@TE9 zEvHFX#96yVIDq*^9Qu90#Sr|8ur6(c)FE_!0BPW6pc^S@z-C)(0M`&pLAZl|7jf*W z0DcQ5jAQ`jK;oC&toCp4)~Ez&6rd!!i&@Rv9Q`aLo(KRlAsQNJrnX+*Z|kOp-L z&@qNeE@x56j1qCV0J-z?aJZ0qkz4_-T@i)gwf!txsmpg(Io;8Wf=ADZI4r&?e>+;e)O6am5}?0zv_!y-f4@&_tEGnwzb?gJAjs{N{&H@EIp4-$X?_1Mvc z?-XYrCqz_b&Y{k%;k@@ND`sjN2r7j4eue26Leh)=+g6V?@oQklxMcNa^Cq;WmR4f2 z=~P0ztn;EiHK+ai)RN1+0c(PHqE6Dyvy$s{!N;9@kBJ>tBx=Mjex9b_JwK=sp8T5z zzTW=9V&MEZ49#5fGS;l2@Z7s_4VkjOTZ-~ zeW4t*Y1I6jMR9tL+AbcbqVf4fG^34J>N?Ig7K*+CE8e>jGdeX`Mcj~xfW}uXbg`53CMJzXR@d-}&6BUFZwB}(7O^Lpq;ghMa3Hsk z;;AA2PGEVfHU0~PcAg+5Q(&%mILL6y-B84Y(;xsAkL}rj6Rq&@7?ruH^u_2~WdX<$ zsz#`wTx+`2pFy;%gtU}i6)(TZW4B_f6)jXV(vK=Ey0}oW4z2LQ;M@B7WrZtlZY-Bb z!X-6p3tEbyMs%z%TG91Sfw0{*xjnuj{`?4Y+^5{n-dx3wMA9x?gkZGrL6QSjtq|zK zJNaeUc-$+fDS&jzXu~BGRbYZvU0{WM0k-}+xIqV|EkVAm z(-rM+8J)p8vS{74489<-7>@7)*ijD!Z1&LuWZF*O);0@D+z&HCyCP6K8%|hXYM5W( z@ZM)v2FfNJ4?OR_Y86RN)YhZcd0 zyOtMe#;VN=l|HI)q=~7@U^)zp`9(nol>N)L*Dz7eq)t9M{q>sg%8LOe*X= zn(iD52QayQX}ZQBqeT}o%&KTarDiyGcafQwb%ExsO<#mUrC(xZZGh$02OnB~GMekY zZtl<%Oi+iX^u~$eG8uwm;DphQSzA8OrOo>>NL!#3L#LEhR!jqRoYwEjVvQGVUq2b9F)p=foX2GfKxaI_$d(3SY(i@r0^>ITj!+P2 zpgMR#3r|??knf}AT<$#?1h#W6l&%~b8G_7?DqwRCT3G?mXaa#?gmlWwUMgXL+Xd|8` zhkxI@4n$&cx~;-}U&^{jllyW6%bzmb;}^xcN{t^R+T+R~*uL!lG~zcl9gVbuJ+-c$ zJ1d0epqs_RGbUo>81b}Lg`*f(DnJa+BK8Lmx|HVT=(Pv`-g5kti)c_?tlrsmO;^_@ zv0*$j)fTLnY{ijp4yf=ay_U~^{#{~pXn(PqWq!?s~#ZyBX(SrJZ23t)e7*tkKD)-#)3COKVXl}YI+FZ5O8 zE6!5r8ye4yI$1n7ZT;3rcpSN+oj768P()tVj1h%u^>x&EWq8y+3wCK-HiHH^_m>{$ z+jfkW-te2-R~n%-z#y!EAVz*sr{u7!Czj5zm!{}03QQmK)6K{0p4o*CZy#+r3q=pV zh5**(@~HthE`_7qc2$Y$R$Hfhbs2|LAyIuJQeHsH)=YiB&%@4JH;B`^~j?EGPZLn2AT?~kh>x7qIA3AdyH%|DxA_Hw55-MuH zv}yJunrAi2qFNk94L%zoYhAy zfBcAfW>4Ec4MsryD5{UkABH{;*=t&E<5u3-V@fgegKWk9CmKk?kwaB1Jc0Or# z#F|wEG-=j^g6zwgvpjs@YuFZ2D{bC9jb-%!rkyy;6?Cob4S7SSsgHJ8m&bydBm!!| z=$rrACtqnqEqEvMZ1x@lX1rPmPA|A;{XSh=usAKi)wlTtPkWW=(c8Df(XEBe!{&MX z=w=hR(!jDOz};NGQs0Dn(9{~VbOg1Vdw~y~gM*kyK~pb(HB$ZN$EN-8&GReFoV8i#^Ivl z8%5!oWVPh|*hN|1y_?RU{)2kew47-s?VF@t2CXre)6?pIRPxHu%^vh)0MzvJgQErVVzMc zc29`3b(>;#s_I+iv({gfhfCAQXSLdJUa*p$IW(g3+$(HFO^}EM4WE!N&w8nSb?ozf zUqKdy=D@62_4YleSk`c;1Qr*NIdwsEa_&fj3gBx8<~`^+&~Efi=OYh;3PKE%M^A!* z6D{~ofsfaSV-WL@%M-Wj+Qv}|y0*vVNw1pqFlSNStZGSdU-y)guCg?mEM26Bat`UV zT{N`?tBWos2NzK+y=I3bD=PxtQDk(N;2R_G&iZpTe0P%kMp@*T(d)N*W+$YUu)}tf zz$b3c?-w9)+&Zk5p_^~SXzW#Ldu;t3&0;wpcI`;&Mf>RJ>s2@QBw68q8oB;#j$oo^ zWBli^@h?e_|Hs^minNl3`rm4`IQ}K|@=3e-$H2wJ`oC(y{l6c$K22kPh1D=K{>#Ai zN9XOo)JHg)e==qMa8G^OyZ)HKWIkgsihm2jsQ+ySGokx42=m`*G%lYL*=NI+fg|9r zW7)sa*4Y1dw6#B6djCpWt5S3Sv|hFG5yU$xL&+Y5cda9pZ&x~(g1{;7g*PZxLY3kh zq7x%PgawcXa{Nr;Pc=itM}S~t&z0WRE?2E^Xk9!qZ&I_Y__6n+hO1+uvy&_LzUtu^ z&$X&K-nsslCYPu0#>K-+*G04Y90FGO@XoUJYRmkp0Q-nY3o3t#FN(7Qm88tDF@K7!(q1LOs9|6_*IxVd~;n8SgbH(J7%4?CMS+MR2 zcnh{FS(diW8Cy^_C8rB|OI8)^E3KB^&56yqSO7JBUlFd#?q|aQQy05V!X5eM3*ph^Q5L4HEw0UwlzLi5{+MN{#byun3cw@mYGi>E^!hetE;LY&e{qe;38 zAO!f1!&3hQd5Ti_0g*dQ_;=lD_PTHJ(LqZnFrj<$w zD$c_fYeb-`M;c)rwMU@iL)I_KBYDH_y=4Y$O@Aro5=gh##}D%;M3>HAv@-YV5u0L* zl9{P!`qr~)vJKUXQ$;Ahlp`tDBWngc1ZtgE{QZ+V%4^H>8)Ky+D4A2UK;YO{zO5y4 zzzaPEye%^lJ8|9Ypj2V?ej^t|1xihp6xFF{13%7G5MG-^VFaM2qMIAjdIa(FsJaFQ)SHIapjZa5gsXe-^^I>1~b-es-z`gJGamI|g^ zljMnu=_sy&@8t#5(7fWrf?ijyE!e)zqPDa_Z~Bvzmxv0iLLWQNRzYN7PdnpIW?x_c zRon@G^(fs_vIKcB&7%L-g;qCF*vdqDsMuy5pPaz`NY_qEc;(#qz*iH8bWQYaZaHUk zm%eoijkk9=XEbJ}bV*k52_H?X0O+Z`nqH$yF=~uwA__DurWs842OGW*EIjK*X~S}QL*3^N$aVH)O9pfou!>_XZL7vmDr`}kT&{#y%Tmgto@6T zWciDpDW`h{Jd)z&C3>tjgyI&a0D@{^`Puo^y1bl`Hno7VFC2Hg?pJSxWa{Jtk5#p! z8{q{cZ-iTwD71oOw%hXghNwxicPo{w=Rsduzx=qmBZ2SOZSHRyKLm8%(eoj7+tQ92 zkpNS?)6c;AN)w+FFBOfJqvh8h+*Wv90p~#`Nq&Xi9-M7iCYPYErNgA-ckF4bLg!*2 z_5b?Q`lWqiNbR1KcT5e~bIh13aH4~&+4m_u<~O7e+07I(@#b8kq>O%qAt6i%B@Bv4 zle0lnm)7eW+fVj8IbiNB*XPOhFU0A?o)yzW6%Q2(4lMAr`TefPbl213jlnEK>F~|K zH#!d~51hnz_Cb9}QP#2WWut}4p$5nr(J{Y(WF zd2r#*A{?O7wwPS-x=Cu5CsLF_BG1Q!Nc3_BZPH+~2T)pdR9Yd}b5h(o-exjd%Px8B z56?!HoF;2Bf%&XV7C5;h!voD7hgw7Tdhgv3NTAvTuQ`5ZQ~byXl9Li+wuprDO3x;` zhkXM3sFZ>wj*fBXERF?V0jTKBs7BHBV#3)}jO3BY9DCe{!uDBSw=?yDH2#^7$|wWF z_LM4yhLYti+L;)FiCZncNooKC;~;HyI^N{pX-jGH`P6)g8OD)d-bGHQJY7>D}P6o{<)MZzy4wE_B zGqi%fxMI&-uroy*+8^d~{2E(zIaUm!@hK_;wCoBvMIQq~MRo8XaK|xoP#m@EYT;Um z?JXFEcEE_;8A5u_!TS z=X&1~?UCSF`1T&E@By#iQLzUcv=VX=S`RI!oDAVC@zqq)izV=tLdj{q!pUvkhk-^N zjeyCXT~5J9$&IDf!pQ(!CDdMPE^3Fd*c_#dS7L>q0v$`1x9il-5m6? za*jR?WHiwXE2?|u*w|Q-(Kv=N(u4S+c;U%-dH9RaRv)$MegmHsyS$usRxRs#wk&8~ z<&1-+$4M7)0`0t{xlFaN8rB&)`NHqZ(4NUaRLC%GDilYSugnyMexzE&IW(OlZxPBS z4)WKt!QZ1UF3;ig-8w#ZX4QIaL8c}S-$!6uKylk5cdZ=Y03%|a5ACN`j^5sT2dc>Z zN#DLAd>i2RLrEl55kSPistXIBON1T6PVvpjESN!rAmF3x?rYO63|H2&e+cwmxMLHj zu(*5VGR4`8TV4EFw64Xi0^TDD-pVi+Tc%V}-4H+|qn6B1+X{n>nxvXuVG-5zK+2W; z^QS09u6SrfecP(3bm*I*wu)StC%aVrNYz9Vh3 zG9qdnzjq_w{T62Pwz=mb;(*^-f1Hg|@*QxobO99;hMZ+<5H3&e*nVD!-Zm<9krh^X zQahXWh_z{_GVwI_<94cX<9ZAzWh_4m^_RDDE>Oi8m3cr2B2~^a;L|2ZM!t$oWp8D^ zWqH|MI!<#NPsgu0JZi($GcYRu32`^BP)4AQ>9XXz?&vy@8#kGA&uf2?X0Vyfmif-~ z`;&-qn$B+DIz0R`R$yrX6PzAgnvYOj{^ZDWqy!(acm^f4Oo0M9*wtHZm!w=2KrF6I zRwmFpU?K#`hK`t~xHEz3%L6|jr@>JkLzKl##i?7x8$Yr|_Y7h^SZV=;Q>t9Kxog-v zf2$P>%6W*}Vli@rqUw3qvUn<8(+IKT0|fRgQ;eE-wZ%pzo;)ePXfUOj8p#fCrM{VB44g5su`vCqAF%!T=Kr*^Rg_a# z7ZLayH{)LggwHz5zY7Qq9G|`1EcE|9hW7d9zsAu1cKKg1v_C%l&l_8|&;IVe3J4sZ z{M&zSZ2zth7||Jj_Hh3lj`4}c_=^C*_HWs%zg_ zqr@DDOfZ@qVV*V4kNO^+EE9h)T~yb+G%U@ciz7TqFT5NWD zTwLR2P@qW!<8}uh;a~{{L!X#eu#UjlxB4H`I9=xuPk{*9cgS6+fpnlq$p-N$7Kl#I zhW*8`SkUtdkdqY!JKKiy&^JxNG91gG$0o}`SWQpMK{Et$or)<95?boqN}cp{a#`n{ z2FfLtQJO+=tghUa0knFylyjJKV@|D_LO=w>x%k8Fa!ly6H;|wIcZ^9Th$M*9gcFjC zQNjqu!UQRE?1bgIOva^iCM$SllWQ%Pzb)%-91cgy$N}TMoX)ZOgZnx&sZGOg2=J-C z!n}aOhy&6@zB5mgph&KUX3|nW%-eyq%+7 zS8wE(fd63fOF)!q5u96)~gm)@~JrnOAecFdd@MWoFJuIve-W{kDxiHpM#8tON%qf`z#t- z{{RGUtRCkp_jYWEzziFvUtZ->i~(>{IE@KSk^I-KM)~i?r%D{DR;)Trbt_OFo6UgT zoK>wn=(b)^wZhS6JCE9c>FS16i=LkmSuEJNSnXQPMvJOsfxZy1;3p6IA&&sXi6vM` z=4+Q1o-$ju-ytqXtR&kAi0~{9BA2|07=?XaiZRtG0Qr_-b31)@D&5b@?|xk`@q^g` zjddTu<(c_sAMyrz?-vpt{tbjfVf0pK5ZqipBm>P;Dg+78d88}KHAM8{`>36>yJIpe z9pYDlg$!-g)pYMP#Hza4f>Qqe3)$hLlu4H6#`Ydi$vf6u$3ZgKdhM2SC?Qr_?E&+l^4J}Q~i^}cp9K6HTtf-)HRYEH76Fhd~ zKyJTtVyb5lkr);%dHhVfx8dGV`KIEATA*>{DA0qU#x(Is2&O+j*Zmw7U@^tx5E%!iao2Jr{gJ|o$>T} zI504VgZNaU;WvT(wTm6zT2J?ed;4##&KJNrVn%+l>5&G-_mcB>GgPSJ2S}!V6h=S0 zwt!9M2{Jmx>xq>yRpPW6)G@Iv47!z$IGspq&@c&J(SHGFdUWMhJ^ER!l}+5;+?$+B zE`#vA_Tbp%SJKMpRa8=}(RB*Ei_3U7?4tcB4M}Wu#&v3)-h;#-YXUFm7nB;PV5f#* z(Ypx3dWE?A2(Ffo4*K|M9`e@9Ucx6h%wBT1E$B%v?t`h#6}{hl+yhY~1XYQ#v}E0) zZ4db<7GlGZMbeZksCDF$Q7)Nawje;62%ost2&1YU$@z0OB?A;Qk`^Y;sM6cVqRp8$ zYp3e+M55C-ssL6F^+hVymH3IQ&9K>K_VQ2kTYxxq20bf>DIAu^V5q{U3|| z!%_9OhwdMbY6d3Oe;!q=f9l`=(@`a%DJCH(_z%RB5A**oib71y|1=o=71J*GNq_ny z!~A!{(Vx^OGdgp+|1Na9(|P>mfMfl)=HtIz|DPXl|Io?*X*`Nj^;A}#$K?(2=KvR` zxd9YrgFN{Z=KoGSXTjSghYHRQ*fui#Y7ifCjV!NwjlK`Qf*+9IM~@F29o+#oC^%E7 zC^a^GmYm$oI7+VReB}yguyKW7F`CyoS#Mf=GWvDuegE#g@$hm#{CDz`+;g?wb~|8N z8Sg`+H@jh`qvMp0XIG(;dqn`ZKwauS$|dq-%)tVa$E~0K_XjZ;{9reCFS)wv9@;ii z0}qEP8t%6X-k)am?Iq0**bhMp26b&T=>S(>882b=MAGc{P$&4v<$UM}x$N^Udd3;= zByW+-m-5<{+NFWoDKV~>0by&oQ1}SvhZsoMh$z$4NCuyu(^{xS1A)(0@SYs_tlYG^ zXsMf6Wnt-OoNJD#mL~b=`y8e`H;gxkOv#W2z^Q}d?4~3IhNg_!()bOD8WWWU$qc&V znx;@YlhlT)^!-QqdE@l^6rzRmOwqR{WL!}!_1QrN#8Q5 zn!~w9UoOF)eA(e$z)~P-55Om|3UQr&J4bQ@Mke7@HW>92+O4hVc&iR~rl|pm%jL?9 z5!9gTI_IKn84lj>)auY${^SI0v1$+ZuAZfPQrJ43_1_7wsIOJ9phqZod9LMq-2fZ! zR6VQgIzG4n1FqeE%C=A{z<1_O-r?MoP$u^zm|@NmXjT*Lcf?SC0E8co(+mpS+sp7l z@kR768CR*_jWSK>*Z4S$kwKu&MN>Um+tLc32F5nns`TdbF8himLF}0W;g}0UgBqfU zj8ilA=&U}FGs}VAVLn5DS?1jD@#CvIh5PFi0rFuTHHwb=eOPvx(HZ)`4y-BX>_@zQ z+wV0}*mQ|oymR|w5uP|!UW-#z&7h>@MhHgpo8>WsFe9^4l(`H|uJ&Hi#i z-4w^xm`lsCq({Gojn~cj0d43$@iWmvslWF}dmvLaLHS@H88*h7KA5cVq{nW=00AFW zA)AbT{~`_G#|;DWM_2($ly^kna4fjvq4zYeSwwyh(KJ+XLuC_$RcQ^98b}WWA_e zjaB>(jVsC&5WHYKW}3~&K=WOq*+3qYmML$c+6KNgS7bV6`G6$Vz&4 zE?1*x*&qj-teg5WIPPWi73h-wY(fS}!j`0#*6izsYFRp~qT%G(gi4?&)vBswjYD0a z+|lljkh1*t&uGTV{B|=xXlC4_sb5u2-d;Y)O_F|7XYwxEw|sl7$O76mXIv+9Yt__Q zMx{^yTSmd*P=%=+M8k_qoiB?0ZXcr0D(I|pe->pOUrchjDGBv^{9ri2VBjqzjUA;V z`-n&J2egAC>~kZw`7qo4XvO$!gU%C3DlZc;k!N4KMjzNruijQiC}gU6~B2maoCq%WM-mGxk!wQLJY zOQOvKnjOTO5t6SbY7ajArYk(#G1{5f^B@dlgAh+loVBE6<_Z$c7W;gpGv+ z6iM2Pm{gmLM0t^j^7LCrix{j3Yj%%@PP3f;iUjJpp?1wxHL%zw&R-Fz2O(6hETZsAo}hzl;4OX+XrBrm~&1yWM7@RjnZZT-=Rmgm92IPgXq zP6`teDx{|2^&W4dRqi9fs%9H!8LW3q^wQ^}CZT%dU z^yF0?VuuApd!egyc%T>gUSO25*aAkS+QbC|GcHO%MT9F0M?E-S4f~VEkRJse5d;$dR3bFl&I zlu@wxz#Uc)A*ZjFBVg`U#4C@Q3ksS)NynnOn;>)#dsMtQe#LoO%nP3JHSzQEb*B_t z=YG{9HfHf|Z_?-a}d_7E}|HUkXS<7~09C7}{ zNah5yMBWvXYa^jnstOD4Ecq8W&g2tA7g-*2H{+GQrZvpho8y7*Z^O&N<^7PJ!u<+M z8Mn}tKr#9>=4}HecS*a@-fE9gf7gY9(vjP5*}|QPCrzf8F~J)nIu+)9jd_Sd zp8BjDoy>v2HLjo@OwYp58${m-ldjyP>4HA@TfN#1lt zLe20BfY9Y)dVfa|4K}QF0~^wy^ln16@1v%aWl$r^+(yKi6%0a??k@rv{3THo^D`D>#99<{?d0vIRMevaf^RFYw z==n|`*5xS|EdFD=-lKFUqrkE${SYt_na#pH?B;uGGHF={KN5*~b(7w_;UDw3%9|1< zjNJ*MtG<}(QF*WYq~W;#6IR4f>V}0L_ye>wKmVH5W>%8RiR-%mdKOk9xZ@R@$5;qA z$1rfX7v7iC7fDA3^?o4+U9#*vP7T`KSQ|b>_Rt}>Yye8a+0fWj6Z>?td0`-{5u1%o zd&$WhSRvk=lTkghR^B_}_127MQ?4bof)$gH*x_aJV|3Pc8vV@SncVIm%AXeAn?$jq zNthMR>4|8Uyv)yaWZ&Yc@z>0I^K@Cx2q6zsO zdgbsi2(>+Zn49#=6lhJZRy$8vi@_H-wUMBfC{br`LzkZFah~NHeKh9>jL~2uz4y+_ zuP8a%mLaxoPWB&u9(A*cd$S8GE@>xK#SJuZ#UtK^v&+%mq?_%UXpiGsCj}}}#Nosy zrqhurr^$qqd%=Y%_cW9wB7-fhf)cE#_N-{e;9<1Uj+EM2a;WWXgqAKi+MAq#2JavP z!x2>fY54imsKUs>$o8jB$NK*^TA`pSCo3uRIs7O||0P<%`e(F)fsyI|TcM7X{ojSU zzeFqkk*oOY*kfwz>_GRocB;SPoW%cu!Ik~nK=fa&RG&bnzm7itmbv(>+WKEqDSmz@ ze>L|cshKM)nPGM@?0cvPTSk#8k@d<}Y0>1WKWGk8hPOm4D10M;f(YPWB8Uy#UGHEc zK!Cun7v^WCrd$B6aLH7~NpUHxSSVUB87doVxojE)Nby^DsLhFKsPDouj7 z{gzBDI#{mAv~in(i+9ny}!x``f+Df5X%l6dn^7k*NzQ2?5gr;PQ;!W4on z7*gyn!?*a#5{)U*DI${#Ca-9LKc8|e*C`E4dZ)-k-c%ZLDBVoXKvI4BuKGX|?;r2s z!T44hlB}^lTXHbm`Q!fn#QN&$#(in{baid<$g}tY%R9pdF^ER%@Fd=Emc${PzlFd> zB336|df12(?qP_@KTVL#ls^k}Jeg)RLoj0Ayt)PU+H~dkch7S_XRKI1^O8Qp(UNvT za#;Z%#catDN0meg_K6ERt3$d3zS^v|KSd6eexSw5Vi72r$0Wsg8H&GUvU+X%zW6ex zk^<|l-*oBN5ZUjK*fJs`))2o3Wt@*Lce=SgGVO#iw-V|0Qa}#uI7-y$%KdY{iVO^} z2N!N{LkFt5yEZv7Rfqlxbxtqt5xkk+_4BMHTq@`A3hK@nD>FXM#gem<}8F_bh;R`agW}&IRt_qb&?&Y;I3UBkzE?( zu8OB`uETLqyHZ~lU=2!|T1VbtM{okYAhQKa{Jve!UST|ZLYm~(cy)ioBD{owG;xXa z^?DjI&#=f?gw_P&$eYA-Em6YFO$FsJi<(Lsus{aGu%gcNBG4h}=1tQRdpKMSB^n77 zjlQrn|LTR^ziyPPs{uCKl0D)z{&Bu(Eyf@1b;zY+Yn#f>^yNW$d+V|7*39227Yq`r z0M7~sX^*cT&sELExjjN%Eq|#=yM^$K-M+FVtC`c}N;{DP5>Y?Sok2po+T`EK;&Vwhsu?UI^5805)5a&V`=EB71BcaPdgZHz+e#9P zWIQL&E^Qz`Fwd;~JafiVD4T4^v-Ogbc7XadfXQy7kt$sRk^ngpv6xc%1h78?4)DT>9vHJHg%fc&{XXQqcEtT^wl^WQ z_uywXdfVYY@1Lx6h*V!Psa>l-xLvu|^VKjXW-3#XKj97W2R~;gwa+js zjJswR+DabAv{oQ4h+t>t#+c8!z*6u##5r`vMYzsg9|T2u)JyJXXQkLy(Qa1whCrts zA9CPAE((BTRJjG^Ja5Jz!3O_h0g<^4Nnx~`_p$m6nO^Oju7`=VXj1_CEh&$PtR@3= zH`?aWqpO*t`h6o}I&>r7BwSQ#iW)hbHLtE;UJpBW;M2>m`^4ya$km);_(4qsm&PgF zYGZEqc+wN;)w?^g#zhTeoOsWiKWm)G`%;45l{W1kxq8Ml07Hs;96m)@=++ zm!3_~HF1qnhtD{hgM{+lC#{F%!qb0;<9%g3GpEh^179R zFbm%$$c4j+J-f>R1CCe@)(hxWK$)mj)VECuDf`!ct;Q(W^Oy2u;@Q>$#MuBp45=Kn|`-?|9w2%1v@Gl{J z+>3tCoPjvPP_77P{14}$LAQ2CRxgobrLIirTBe_z8qNfQ;nJx}9js2^eEzeMXQ(Ky z2MC$3%yCb8^~(VAU#vsKvPT|MhJn-syb|MK(*AW3+&a$hLfMp6l=-^R;oK8;wN9cYne^79fIrJb}?4$tj^$w&xr%9m?8RaI0x*){twa;YzMzx_z3=Vg^=C{U|;KT}*tDK#pU8kq|n9`FQM4_BTj52_hN zt|H`FCs@#QMLN<6S%!GLf@gunfF<(= z^{KX=7&G#B0*UT=%JD+W!Fc%J)N7b(NLg`Ru zl9wR^sEx$SNqX185qey=tJ3FRRwxC;jv%z(u;AR)@Yz8-GUib5QNzV`bSIM1Gjh`* zau2F24J5Ro+S6bRqs$P#b)r|0z~1`o>McYE=tEn5H&ntgGx>SF=-w=nHC`Jxfdw2H z?gya9^8<_B*yq{=>X;mrs8pP`HCe^xMov5X`kmaxZl=MCTQ`yxDh9H7|Jv5S>;kGs ziC{%&RyovNbe8qG=03-K#Wa+PIu+(Ru^Omz6K4u~YUr9((Vikf68yy4d3~3kQ(~8G zgkt3?YpF?*oz6QOMfo?S5_O?qDaRIHLm#0R8)$diR&(XXY#j00j^eaKd z*Cx~8RuU`HU4bra_!c%G1^|A(0v&Oc(&PJ_j#s^`&H?5&GnZPs;=5 zg;oFs+VTlv@-*iQm*p7kOj3wn@@rup{r}b8cYsG#bbkY(E=VXMAPAv1A=~fnixNX9 z2B`@(bdo?qNiYc@NUuhk6oDWRdITvV1WmvoHK?H}C`M@-I)*ApOa8y#+`XHDH6ivo;hxvAK0vfnRPGJKiq5 zvDx?O`oSaoYYq*WzV*PkRtMTP_F1sjwlXq&O7FCt%N{LB>r$jfWbe{@ro8+?^ZBXq z-ab`2uX!tFm#5Ff;IykrsWUDtUO%_zl1?jr_~Og;>-tZ4Iio={TUNVH74*f@0~W1) zd1{Mkjk+`%@Ktf^{sYZxbPF3WyW`=P7rr%n@>{j0JZYLx#y_iKiFXb*-}rrp!A{;peTGJlZ4U9*_Yr-|Uun>eS$>RVw<19bFQ*J$p`3uLdVBe7>t>lb8B#s~i2tm}9lue49SL=H%)J z!v69PkB$lr8XC~N`L&-;E^oSQ_U1nKr=B_Lv9>@=kJn3$`1;AL-MYqMFPr>4YGj3v zjGS6)_-@1D-nC2zqDQZp6npc|njzIL-QSlOpJ1A_Vrll+zy&3bu1TGHvuVqU@hcOh zgUyXgeFs!Z=-_$ei1F3gbv?qb23bEW{6*Z3LDQ>^O^JN4&9C0Eedj9Gu6FlA*c*P5mLl#sCIWa^C7Sud41)wX)Xg7C$zirP4 z6;dje&wOdshVYT~PJc0^{I;^)7o2Dn?)%}wi%-MmuP?at@m+?$`Sruf3#-rE=`-6JTcWIg=9_<~H%UCSvT=ikKlw|UZ5B0KydrCL%#467 zjT~~JF0j27QnpyE z-+}fuXV@q>ie?yD+HuKAzCl>&T^wqD!%-iM_>I2$`_PFmlVn+AW7UUtN!4-5z@BRk);_q%)^^Q`L)F5Hwtn&vUhr4+(_T%gH}v(~>er*v@Yj+zU%q%{^b@Zp zl5f%=&*MRV*gx7~@f^^;!E1 z*E{)ieu?$9uN3&}VkF-9+^0iUjaRB<^~^}0k&zlcbHw|)7NY}J8eC? zsPMfDm$$B*UH|sas~+C3(yzGgVXdE@F8uWF-4bVq-``ND?6FnliXN<6vry`VuUfP{ z-@WSlK9$E6GxR;A|hOGE*<^9H)4SP+QIs8|j3nk_}IQGqmCC5v@GWYO} z9vzF@e`=f9HgL(1!yb?Pt52)8aA)&V)_T9k`z6Fp>RPGScUii6{oZX;^OfMDW&N8! zyuRg3^_bzWB@Ws=d*Ekl>xVWPG_lv=cN-;q{Nt^Hvwj%q`Qx0mGyQ@mcAer;@AQcN z^PAmXy|S#qze!rH6Z4C|R<6?dT0S+KMOvj(mWgw^mO6FMGpTnE50a`%q^{@PPJ2ERS^y=JrD^=-X* zn#Y=XiRss_+drRFA+2<9!bVfYu-Cf`o$xf#(4}O(q)kkGBf5HQqbmyW%5yZyx!6U+?QNBS+eo2Np4P^*kPSCgaKSMjQM7 zZt<8u{nD&Cbvy1@K74=Zn3#S~j@ADmZcD_#KVKTwtIF{vVb>Qt`SEDYhZ%de{<&z` zEThK|zxb9HGU}c29`*Fx{hg!U+4y<44n;Z!`HUV_aPxzi$IFiQm@?nPR{6Dn7QY0P zTyZAt#I;?g{Vtpu_0o^k`=mTA+w`-;tM4~#|LueQ2~GCKgaoX9uVu%Vr?i;-Y0a2r zeFu(Bo)^$1Y0IY52@eXNY34aI@K)LN3m>gK^Lp|3vZ@z4+T1f{@37+2OEx{7vAgz# z64y@jFZ;!KQ__6H)ah{tlbZgVx_tlrzYBEPd+M!%A%3&p+}gwT@;Gbj#!Gzrb~)4U zQ0$U9e^^d_S^7r6JH?Os?KwZ`oeqa?6|qztT(HBo>oH^Jp6zmR_%=)0@XubxzR#eR%n~ zuzQ(Xe<|?LYjO6}tJSJkPN|TxE3{XM`dw2t-KzZ3!|yDwd>%Bo&U@cS6+C;WThHXe z`*$38ymd;4uados4|+7nt77{R5w<5qzuL6?(EaqYYbN^*Z8xId{%^nE^+(}l?-u#F z>digdCja_q>sO;zc_wFEIeF))ZP4sBXN#OUo}PK7`L2i3@5eJQT2p(>__oOHCc}3$ z-BsQ5qq(hX?${n**gO4lg;l|0D|kFOyKF_(QU6(0Wa-uWkGj5dq~x!|@10n6Bkr4T zZe98GY~ZF1K0~bAKfSdp+kVo=Trw8X=CSR z1Anw3tbbHsXiR-?1kF`pZv%eoK}4*bW`Y2`xS8MxpF7X|1c&tNKcHVoNXG4%ccMqm zK0fmPOGb~|6~@19%KE7PqZ;4$%G%Xodg#Stb$=RhZr8MwDkaBD=d-so`sSOo!&fd< zxVo%w#~usgR&-ju^1`MK{ldb-x13+{NzlT$WlIvyf6{&ayrrLXFO-o{zwqUHgVrzV ze64})6244|dAU*Hb+076^6|R$6K}*EJ=VB>WB=rEO37~ft+B|NPsXR6+cB=$AETDfNIN$nJELOnU(POXbouJ$^yO!d%-ff; znWmjGI=jm#3X<7OnHVJNwoNu-P25 zxj^jU?2iUMsj>f_-y-|XzK6GZ`V89MkS9Z-kKWbFgO>3c2ug{tl(dr~#56kF%5_H&vKA0IVvu>ZOhlY{Z4 z%-gNEHSak1-NBuD4Avd!dilw3lYf|hvSyKUPZpkBz5C?8AJV=!u;CR@Cwe!^ z9@MDku$H4nKir)ecc;vV>(YduJ!%}vj89JUSw7+P2S+D28vjz@8}DSLe&$n7A3QiC zdw0lZlPcf+d`UzR>ITfJI@s=e_D!u^ZcSe1{mUx?N1v0il---?=R1p zmvUo1WjoMhaJ~uvFHLi-ea~~Enz!oV*Nx2A&R=p_<_yXzCmH@-CyFepcA``}gag!T zoZjKXIM+A_{+gwe`9?&|j4Ay9%=gz{=3*u4KT&uu>H4VnQr~C9x7*ZcPv7#7 zA{zdE=7a2ADFr-CwbpzxYGL$v-_TA2$Fz!_JbnB6$*b0_OPU-UF}Tyfr~xY`M-K3B zH8^5GiH{bnDYGnoSzJ)ShJ^D=@TKq%oi>$O7_=;?^P;7NcAm8Re7SN*?ZmhDN(8AuX_6s!(%!g9vGeAQJ367&x#I;=vS*zuSLM~k*&p@IdNW)KK6dctf=vB{_`89v zcO-ln*=m$$>eTxU{<`?ek>xF;j`U9KeI~SOqG?yc&CH1Ki14-%jpsEw5&2!Ihs)3P zsP{|P-+KSt``D_H>5tEj+Iw^UxzLm$V=wPWzy3%w{jU$>L?RS-q^1b(_Tw z8koFn=!@M1dZ1a!h@5=_JuG^;-*%LfY$gzx53tgJa5~8d!r6%cMA92T05kLv31LP- z{Ab5oMXX#+05pRNN^%ZNXJ&EI6lg5ykA?IDy)70=B+}4~=FFg}RWEs&te}nF05c37 zv*2*Jm;@YYu$WL8#4(%L4|E*O+DLRvoCUX>EM|+B$qr-)HAyTBLVz%0B24%ht$Ze3 zKIb0}fbDI`|Rnblx{aG4-X5Kh8ea{NM>U?4J)#5aeUZ8{?a$_N~0 z+)ⅆxU7N2v_j%vklg?o%~_2%k_bSaP~vV**0;Tuy`;DxB`O=7%h6jbog;>K;w$i?{Cf7T_fe#x#@Fm1Af>!Lfw+k5k(Iw6TRte6sJmFkY_ z(S}u5;wlRjVYPxg4Uk+e*;H3JyJq3+QdR0LIy0oqY=`uLBTW`?j0EmtJv}%d9LeVv z#PL~&tGM`%__zf(Os)7WLMzmx6)M+^b}V+T@{kIYC6`fJ40`Bk&3{IyW+n?&xE|ao z=gRkI=1M)uQP+{{sBLhQ6^T*?Z3sz}pgyfspO_oc!^|INM1O3MDl4SWjxpI_%c2fe zU6g|~B1i#RLFGXjtTw7=m?fZ-7!&HyTrk1RP?B?4Is^*9Yg{i#F|_9y5SvM(92pW{CO%at8{ateI;ddI}{=#cH9NMo%Qnum#i=vjLnYP}{^FLYxg}_m9+MzgsfQb)?RLR-6K#ttRhKWRhp;L}Jm>5UHP$X0sA`!;VF1gYU zbK)^0N7C`L1OcFNDmn?cl}iVf8yl1j2Vp|8P(h+B2}V0r5DEQfS%Cw~iBJH_Gt_~{ z!GdyOYFJ07U2{PI(OF@AP!dTsP!;B@0ypAqbb`ua;8?>*D3r%@yipxF;AUt)CCM;! z7;pBXv>@z6CpFLcx3{T|Vn6?TPw%WkaPcF)+*qhy?;T*-_@07h^}A zD?2JPqmoTVw{nXoQ?C_HtgCqvP64m`g5< zHF!Z*BvHp^)tSI@VxflGqv~@_!mEQJY^B%gz{BTo#K5>lC|9)LzyQU9<&RidLg^8) zD`u;JqnDyZD0$|9=?KqD67@uHF=F7cKv5!LZZLBAp*Ad^qL)GqJKTIH9NxR|$?syxXkg9mn)pf=D)Itjm45$qbV^3H@=@GFD1KI&+k3@?ihiqDnI&G`Z z`Y!5;-pV7hz-UF%iBY1bG*)fUxHhye(O`uFv%w4`HO!D4_K%?mD-3;u9=?wtlNrG+ z02olS!TKk8Bp5F?T+!h0v9H00z>Z`Eec20Q5;>kB7Zjk1BV8>FECGoHdn*cE2{$R< z=?LSq9Y(My2MUOst^{P50Uss>M!H+*_>w(=o7BMygNE?Ik>n{LN%NO<`K+h3C+g>w z5tX($Fb6||08WM!gjERVGDb1hXD}pW)P*4h2Gmr^==1$g^-Ux<0d?~~;b`PdKruGq ze>%dg^f!esMot4va>?mWS-5Z`u?~`sKAj2PPi{V-4Vr|0DxWEM_}LDvBZnQ~`Y28z z9}uaMvvGmk;jjrL3Jm4=3^0fD$qs0fa83n2)>YbJPTY)vB0;=p*<#SpjiQs#t#lNv zbVwm;d?vIRWS3>F2p8GmDv3HaNC(P_a&TDbpb8wshx#xk>7mkJpb{2-jp&f0^X!x$ z06R#k3TioWCspyC<;8axr~E7k1-KvpbHGhdk3z$MU}BbNvQyM)&>|@_A-rDEAR7e* z1#(bSaqM0&|970vJhGwEH!(1o9d$13m_sOeY|h!|c%Q+JI#+fS2vFEnIuTWPo(2u< z)ZI6TQQPL-4aV`N{%(<5i<<@oo8?VFv1ls#DsLJTMncYl^?y@?GFal&px`wIK%bd} zu5v7|t{T)LYfyx?gs}=CvOy0?^dlj$++h+L6f10w2BiZ{w+9gzXigospK>%Pq=J=R zT!T_0a?PRkLyp$zZ*fHYZo+7VVxR`aXR%wuatPmw@f#b>Uo zqMWdDWqB0aCPGFXxP**i^vFe`X8G^4a{m`(RGiBH=Sb1pt$TF!bLdgMn}C|#LPcPf zyeUxzW}tCxh!WUfC`zz$?KBCn^B`a@xfPm&I|@v!g9IhY-4!g(5Oi&_LA#LAEIjz& zz{4^kh6@J`0)cmJ${2u|p(9{r38hDTX2#As6XEQUC=UaNIbd=e6Z(w13eQe-T)F&Chc!2rXX*w@ua}yI1yx#TJ-;G&|CwT&yN#fm3y9m_6YO~YU2+9?c5nhp|x=$ zlbZ&`0-PrS6#;mfRQXG~eAZLibEyy7q0{d86nL&V>bt`KX&kQQl+Vz=V%*C2+;ZY1 zT*0j>qiNsunQJ{)xbj&%&qMo4`CRjVXg}BTj&bmr7>}cUWo$2;|2NimRh@9=pC_3u z-wq1$6OzfF>DEf7Q9e8FqB;?0%#&m?kYK~M9Bo=F!thqC`P=~Hxf8-%b1S#}sUope zVB>}ZEmPWBI439UCfczsNu0c4$VP-)fRATc@Z1LP%nqdl{5m7zkHletYLNRu7f|jG z;bumGkywxY5$$nXP`1@@L_yu=v%ZUZ>?;!SILblq;XB96(?A?E`eTLtXNC99Gjx1r zLSMGr4jLk;tePx&0!YyOC0#!2 zDeZ~+d1XSSEfU$mj5u`cWJZ}#FUE{I7iJX11wma!(|{n|^JLTYBD_{IQD1vPLB3(1 zm-9oIJ7K&*m_^7Eb13mh3_UTzTyh@VwqRG7Nz}pd8zah^XjaLaiK?7R4s#AnL^Ac* z{p274lPHWFEQEovdmfnOCI+9Gu$b^bxeWqp1;OkX6yfYY%L*J=tVkH*5cu#Mr{}TK z5jm0-8=!2Q0~I+sj$9CcFjkh&ILf(`s@yciV8wSzCvXTt0gie&qJbHK6btDT5-Q^& zjGbZ5#|KzpHx+5{h!!SX^R7Mu%jS=s0;2Pah!zH za0VwqA%d7`5*-}m%8ts+xQZvZ`+3sYzMa2shgPs@1Kf|q8{J$^Y`Nu0Z}QHA9WezP zPoRlt2#ADr-iD=#FxOmMM&TYF5>K8Y;p7SQ0Vc-+ze}5l$HEh8^vN9f_*v|F;=Ts3 zD%nT=w`@S~bMghgd`8Yu)3_#2?kwA4?8Tuj;k`PIRUz9Jj zOB}s+%>@P6Q&uLulw6@Jzl+|3&OB?&&!m(n2MRD3XK+C!GnN=}*vSE7XZVasnG11M zCWkx-N&%rDDarcc2{D~~)l=FM_4CSzN?T%J+=({Bc*$W%C!t)%=U@W{26AOcx}2OU znIV;_aTQO5NS-I2er-Ajb`jUSbFpg;@|$H`>{^&Ud6G{C=7oyjP!W~<$CC++ic%Jl zXtSc;Kb}nBp1iUjP8GwAeTF4mw}M?xCdeIO0i{D?&m1eFFf18Rh7ObW(d7+#1USW^ zaQ9OQP*Iwb3aqCj;^8WaAm{(W$pkQud@_MIQAh##qyYzt?~aoR=rE|tfR2(2Ju&qz#MQBBI6>_0CB*?20krrB!L_F{(DX)$n1!d2~Ku&5DEkk%Mq8P zob(KKeD28vgV_K0=Pec2moT`QwkEe@3G&+=@?hHHXV2sb6Bj)xbC^LJ{3;vuBQJlt zed$G$n$797n(q@Rl#PsV5yFLo<`^jv>{{Qet~!biaI~A z2gEN9mO{JpffI&9y<(TafS@JdV&{PQKP1st_7{nG9D6W2eioy~iX!?+6^KQXXAF4z zjnB;D@EuRx0xrl#6CV#yS;xWx36v}ksS$}li03$hoAX7&{+XlKuDPIr=sCAKNHIt5 zq%6LR-jmW$1NgSmqm4-Mn2V3e?Y?(u>oFO`g%b8K=Af_ABPa3M&Qkfo=9)%tCJVmO0jfI=A z8r=O*^e|7lR8dcgo^#FlIk)7x8q2x}x+%AUxxqwb@t@-!JIK^Pq(<{#dDfP^Mx@SI- z>)?4_`H7g6j*Io?Z98(41h?viJ99eAR)PV z(1@fqZF7lt9e6pAF%Rsi@}xImt~sw_I=1FzugQQ=1z<+({)tP6ym;u6h===WgeiFV znQodL&K2;>^?AjUaP&jT**0;TaJrmH;80);m&kQ+$yToGmb>(WbwxkfS0v(gyoUhZ zAjd1mc0?aBe!-ovQW$vU7{nD-25M1H&=+dpG9oxMs(#DBj$W<9BBNvEmyUQzP4rTOX3at)LTJ^#7Zo}X ztBnE2PoO;E+Bw?VJ41)&-7eYMqijZMA;~>mf0u`7ZCn_-2s&ar?usUjB7Cojx z4Z}B(L<=(^IIo6*k#d2Nb=BqUcCLYHn8kosU%A2b*i=!KLs-QK7F5A74#ol8V2I49 zVP>Npo{1L5z=;aRD`H@66-*Da5hkq~rYBXkFuZI_(`TGJ;pT|CE&@vkc&lKjY0>sY zkN1^e%~F*E%!p8x3Wl>vTrX5Gy<{{%BWqv=9&J*=5J;BX+A!Jp!Ut73J#N^z&7~Eg z6*qj64Mw)QE>=DSrqnP!A}?B6;mQEkN_9D_iROtGX2Q~=f+6;7hpD52VTO1RL<6&! z5zbS?(5@DrUXRPK?)dmt8+Bb1j(=!jsEas;x}4r*aF;h&Z|=0BE>YJ-JX$;FdfZ0D zDQs0aIF`I$sDgoaX{)PY=nHg@1_onJTMk$E-Rk0mETdTbQ`Q%>tH;KR3I^KqqNN6A zb(3km9xjEJ2727Z)bN$wjstmGd9&lLn5G;LdO;7X+eNU~ZEQA)&Y-HCL2sh9tAcSH zZoGjzG0@lQa_F<0oTD@FnbhS>c8pjH15LH`15vef)??XlYuASP)yRS#m%w3RtJ>wx zsT)sXD;74Ws+`_t;ENknFa}C-uT=%p;~h@0WwbCJvs1xfyqICrs9<_r*Ku24pi$g- z4dZoN!=T;VWd@gl-RO*$WogHTn33B$X~4UlG<`<@jM$^o^ch}}hAvQy+#*ny!=YD3 zrue7G2k_CX(!j7@Y5R=2ZgLLMhl!%DYq#m)iD_WCAfs&;#}u$FqArJPQf_(>Z!vJA zDcW_T0fHZH>$qLwsd051*u~_=GyfDC@J=dBEENoN;S04iFkH9R(gp3ht;I&{lW5ui zK8dd`6-XyutFY3D4Dn{&fy7y0lf1$<47>F8%y6}QEFj&{xdBbbN-1z{ni_n(C zOC5NrgMZF(;I%98y45he{lcv;Ca65^e4s8inlyE>9%$P&L+feV1?Rfc#h8nHU@KeO zKRXQsO}lo;rG{3t1l;BgY>F_2y3cr_jdr~l4|YU;wFapO}H;)HH=fqAOs zA=q4QcBdIjn3m3FBV8|bpDlX&s2Ug;LE8q7#JI^c)C>-ntLs`#CO4S|nceJ8ymQcv z$6@KYt-Dz7-Ruo48*XbS^rhCSHzR=WQI-~PX? literal 0 HcmV?d00001 diff --git a/fp_2expt.c b/fp_2expt.c new file mode 100644 index 0000000..b534a10 --- /dev/null +++ b/fp_2expt.c @@ -0,0 +1,35 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = 2**b */ +void fp_2expt(fp_int *a, int b) +{ + int z; + + /* zero a as per default */ + fp_zero (a); + + if (b < 0) { + return; + } + + z = b / DIGIT_BIT; + if (z >= FP_SIZE) { + return; + } + + /* set the used count of where the bit will go */ + a->used = z + 1; + + /* put the single bit in its place */ + a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT); +} + diff --git a/fp_add.c b/fp_add.c new file mode 100644 index 0000000..c3e05e3 --- /dev/null +++ b/fp_add.c @@ -0,0 +1,39 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (fp_cmp_mag (a, b) == FP_LT) { + c->sign = sb; + s_fp_sub (b, a, c); + } else { + c->sign = sa; + s_fp_sub (a, b, c); + } + } +} diff --git a/fp_add_d.c b/fp_add_d.c new file mode 100644 index 0000000..879f974 --- /dev/null +++ b/fp_add_d.c @@ -0,0 +1,18 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_add(a,&tmp,c); +} diff --git a/fp_addmod.c b/fp_addmod.c new file mode 100644 index 0000000..c14c7e3 --- /dev/null +++ b/fp_addmod.c @@ -0,0 +1,19 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_add(a, b, &tmp); + return fp_mod(&tmp, c, d); +} diff --git a/fp_cmp.c b/fp_cmp.c new file mode 100644 index 0000000..1d49b78 --- /dev/null +++ b/fp_cmp.c @@ -0,0 +1,27 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp(fp_int *a, fp_int *b) +{ + if (a->sign == FP_NEG && b->sign == FP_ZPOS) { + return FP_LT; + } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) { + return FP_GT; + } else { + /* compare digits */ + if (a->sign == FP_NEG) { + /* if negative compare opposite direction */ + return fp_cmp_mag(b, a); + } else { + return fp_cmp_mag(a, b); + } + } +} diff --git a/fp_cmp_d.c b/fp_cmp_d.c new file mode 100644 index 0000000..389d30f --- /dev/null +++ b/fp_cmp_d.c @@ -0,0 +1,34 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b) +{ + /* compare based on sign */ + if ((b && a->used == 0) || a->sign == FP_NEG) { + return FP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return FP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return FP_GT; + } else if (a->dp[0] < b) { + return FP_LT; + } else { + return FP_EQ; + } + +} diff --git a/fp_cmp_mag.c b/fp_cmp_mag.c new file mode 100644 index 0000000..3c4b0ec --- /dev/null +++ b/fp_cmp_mag.c @@ -0,0 +1,31 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp_mag(fp_int *a, fp_int *b) +{ + int x; + + if (a->used > b->used) { + return FP_GT; + } else if (a->used < b->used) { + return FP_LT; + } else { + for (x = a->used - 1; x >= 0; x--) { + if (a->dp[x] > b->dp[x]) { + return FP_GT; + } else if (a->dp[x] < b->dp[x]) { + return FP_LT; + } + } + } + return FP_EQ; +} + diff --git a/fp_cnt_lsb.c b/fp_cnt_lsb.c new file mode 100644 index 0000000..edf7a51 --- /dev/null +++ b/fp_cnt_lsb.c @@ -0,0 +1,42 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a) +{ + int x; + fp_digit q, qq; + + /* easy out */ + if (fp_iszero(a) == 1) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++); + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + diff --git a/fp_count_bits.c b/fp_count_bits.c new file mode 100644 index 0000000..0c9e565 --- /dev/null +++ b/fp_count_bits.c @@ -0,0 +1,32 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_count_bits (fp_int * a) +{ + int r; + fp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((fp_digit) 0)) { + ++r; + q >>= ((fp_digit) 1); + } + return r; +} diff --git a/fp_div.c b/fp_div.c new file mode 100644 index 0000000..5a1dbee --- /dev/null +++ b/fp_div.c @@ -0,0 +1,153 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int q, x, y, t1, t2; + int n, t, i, norm, neg; + + /* is divisor zero ? */ + if (fp_iszero (b) == 1) { + return FP_VAL; + } + + /* if a < b then q=0, r = a */ + if (fp_cmp_mag (a, b) == FP_LT) { + if (d != NULL) { + fp_copy (a, d); + } + if (c != NULL) { + fp_zero (c); + } + return FP_OKAY; + } + + fp_init(&q); + q.used = a->used + 2; + + fp_init(&t1); + fp_init(&t2); + fp_init_copy(&x, a); + fp_init_copy(&y, b); + + /* fix the sign */ + neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG; + x.sign = y.sign = FP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ + norm = fp_count_bits(&y) % DIGIT_BIT; + if (norm < (int)(DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + fp_mul_2d (&x, norm, &x); + fp_mul_2d (&y, norm, &y); + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x.used - 1; + t = y.used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + fp_lshd (&y, n - t); /* y = y*b**{n-t} */ + + while (fp_cmp (&x, &y) != FP_LT) { + ++(q.dp[n - t]); + fp_sub (&x, &y, &x); + } + + /* reset y by shifting it back down */ + fp_rshd (&y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x.used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x.dp[i] == y.dp[t]) { + q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1); + } else { + fp_word tmp; + tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT); + tmp |= ((fp_word) x.dp[i - 1]); + tmp /= ((fp_word) y.dp[t]); + q.dp[i - t - 1] = (fp_digit) (tmp); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q.dp[i - t - 1] = (q.dp[i - t - 1] + 1); + do { + q.dp[i - t - 1] = (q.dp[i - t - 1] - 1); + + /* find left hand */ + fp_zero (&t1); + t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; + t1.dp[1] = y.dp[t]; + t1.used = 2; + fp_mul_d (&t1, q.dp[i - t - 1], &t1); + + /* find right hand */ + t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; + t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; + t2.dp[2] = x.dp[i]; + t2.used = 3; + } while (fp_cmp_mag(&t1, &t2) == FP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + fp_mul_d (&y, q.dp[i - t - 1], &t1); + fp_lshd (&t1, i - t - 1); + fp_sub (&x, &t1, &x); + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x.sign == FP_NEG) { + fp_copy (&y, &t1); + fp_lshd (&t1, i - t - 1); + fp_add (&x, &t1, &x); + q.dp[i - t - 1] = q.dp[i - t - 1] - 1; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x.sign = x.used == 0 ? FP_ZPOS : a->sign; + + if (c != NULL) { + fp_clamp (&q); + fp_copy (&q, c); + c->sign = neg; + } + + if (d != NULL) { + fp_div_2d (&x, norm, &x, NULL); + +/* the following is a kludge, essentially we were seeing the right remainder but + with excess digits that should have been zero + */ + for (i = b->used; i < x.used; i++) { + x.dp[i] = 0; + } + fp_clamp(&x); + fp_copy (&x, d); + } + + return FP_OKAY; +} diff --git a/fp_div_2.c b/fp_div_2.c new file mode 100644 index 0000000..5652bf0 --- /dev/null +++ b/fp_div_2.c @@ -0,0 +1,49 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a/2 */ +void fp_div_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero excess digits */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + fp_clamp (b); +} diff --git a/fp_div_2d.c b/fp_div_2d.c new file mode 100644 index 0000000..f46690c --- /dev/null +++ b/fp_div_2d.c @@ -0,0 +1,75 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a / 2**b */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d) +{ + fp_digit D, r, rr; + int x; + fp_int t; + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + fp_copy (a, c); + if (d != NULL) { + fp_zero (d); + } + return; + } + + fp_init(&t); + + /* get the remainder */ + if (d != NULL) { + fp_mod_2d (a, b, &t); + } + + /* copy */ + fp_copy(a, c); + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + fp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (fp_digit) (b % DIGIT_BIT); + if (D != 0) { + register fp_digit *tmpc, mask, shift; + + /* mask */ + mask = (((fp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from the previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + } + fp_clamp (c); + if (d != NULL) { + fp_copy (&t, d); + } +} diff --git a/fp_div_d.c b/fp_div_d.c new file mode 100644 index 0000000..3d0db07 --- /dev/null +++ b/fp_div_d.c @@ -0,0 +1,89 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static int s_is_power_of_two(fp_digit b, int *p) +{ + int x; + + for (x = 1; x < DIGIT_BIT; x++) { + if (b == (((fp_digit)1)< cb + d == a */ +int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d) +{ + fp_int q; + fp_word w; + fp_digit t; + int ix; + + /* cannot divide by zero */ + if (b == 0) { + return FP_VAL; + } + + /* quick outs */ + if (b == 1 || fp_iszero(a) == 1) { + if (d != NULL) { + *d = 0; + } + if (c != NULL) { + fp_copy(a, c); + } + return FP_OKAY; + } + + /* power of two ? */ + if (s_is_power_of_two(b, &ix) == 1) { + if (d != NULL) { + *d = a->dp[0] & ((((fp_digit)1)<used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]); + + if (w >= b) { + t = (fp_digit)(w / b); + w -= ((fp_word)t) * ((fp_word)b); + } else { + t = 0; + } + q.dp[ix] = (fp_digit)t; + } + + if (d != NULL) { + *d = (fp_digit)w; + } + + if (c != NULL) { + fp_clamp(&q); + fp_copy(&q, c); + } + + return FP_OKAY; +} + diff --git a/fp_exptmod.c b/fp_exptmod.c new file mode 100644 index 0000000..eb5457f --- /dev/null +++ b/fp_exptmod.c @@ -0,0 +1,170 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* y = g**x (mod b) + * Some restrictions... x must be positive and < b + */ + +int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + fp_int M[64], res; + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + + /* find window size */ + x = fp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else { + winsize = 6; + } + + /* init M array */ + memset(M, 0, sizeof(fp_int)*(1< P so we reduce it first */ + fp_mod(G, P, &M[1]); + } else { + fp_copy(G, &M[1]); + } + fp_mulmod (&M[1], &res, P, &M[1]); + + /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ + fp_copy (&M[1], &M[1 << (winsize - 1)]); + for (x = 0; x < (winsize - 1); x++) { + fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]); + fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp); + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + fp_mul(&M[x - 1], &M[1], &M[x]); + fp_montgomery_reduce(&M[x], P, mp); + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + } + + /* then multiply */ + fp_mul(&res, &M[bitbuf], &res); + fp_montgomery_reduce(&res, P, mp); + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + fp_mul(&res, &M[1], &res); + fp_montgomery_reduce(&res, P, mp); + } + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + fp_montgomery_reduce(&res, P, mp); + + /* swap res with Y */ + fp_copy (&res, Y); + return FP_OKAY; +} diff --git a/fp_gcd.c b/fp_gcd.c new file mode 100644 index 0000000..08900b4 --- /dev/null +++ b/fp_gcd.c @@ -0,0 +1,51 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = (a, b) */ +void fp_gcd(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int u, v, r; + + /* either zero than gcd is the largest */ + if (fp_iszero (a) == 1 && fp_iszero (b) == 0) { + fp_abs (b, c); + return; + } + if (fp_iszero (a) == 0 && fp_iszero (b) == 1) { + fp_abs (a, c); + return; + } + + /* optimized. At this point if a == 0 then + * b must equal zero too + */ + if (fp_iszero (a) == 1) { + fp_zero(c); + return; + } + + /* sort inputs */ + if (fp_cmp_mag(a, b) != FP_LT) { + fp_init_copy(&u, a); + fp_init_copy(&v, b); + } else { + fp_init_copy(&u, b); + fp_init_copy(&v, a); + } + + fp_zero(&r); + while (fp_iszero(&v) == FP_NO) { + fp_mod(&u, &v, &r); + fp_copy(&v, &u); + fp_copy(&r, &v); + } + fp_copy(&u, c); +} diff --git a/fp_invmod.c b/fp_invmod.c new file mode 100644 index 0000000..d6c184f --- /dev/null +++ b/fp_invmod.c @@ -0,0 +1,98 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = 1/a (mod b) for odd b only */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int x, y, u, v, B, D; + int neg; + + /* 2. [modified] b must be odd */ + if (fp_iseven (b) == FP_YES) { + return FP_VAL; + } + + /* init all our temps */ + fp_init(&x); fp_init(&y); + fp_init(&u); fp_init(&v); + fp_init(&B); fp_init(&D); + + /* x == modulus, y == value to invert */ + fp_copy(b, &x); + + /* we need y = |a| */ + fp_abs(a, &y); + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + fp_copy(&x, &u); + fp_copy(&y, &v); + fp_set (&D, 1); + +top: + /* 4. while u is even do */ + while (fp_iseven (&u) == FP_YES) { + /* 4.1 u = u/2 */ + fp_div_2 (&u, &u); + + /* 4.2 if B is odd then */ + if (fp_isodd (&B) == FP_YES) { + fp_sub (&B, &x, &B); + } + /* B = B/2 */ + fp_div_2 (&B, &B); + } + + /* 5. while v is even do */ + while (fp_iseven (&v) == FP_YES) { + /* 5.1 v = v/2 */ + fp_div_2 (&v, &v); + + /* 5.2 if D is odd then */ + if (fp_isodd (&D) == FP_YES) { + /* D = (D-x)/2 */ + fp_sub (&D, &x, &D); + } + /* D = D/2 */ + fp_div_2 (&D, &D); + } + + /* 6. if u >= v then */ + if (fp_cmp (&u, &v) != FP_LT) { + /* u = u - v, B = B - D */ + fp_sub (&u, &v, &u); + fp_sub (&B, &D, &B); + } else { + /* v - v - u, D = D - B */ + fp_sub (&v, &u, &v); + fp_sub (&D, &B, &D); + } + + /* if not zero goto step 4 */ + if (fp_iszero (&u) == FP_NO) { + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (fp_cmp_d (&v, 1) != FP_EQ) { + return FP_VAL; + } + + /* b is now the inverse */ + neg = a->sign; + while (D.sign == FP_NEG) { + fp_add (&D, b, &D); + } + fp_copy (&D, c); + c->sign = neg; + return FP_OKAY; +} diff --git a/fp_isprime.c b/fp_isprime.c new file mode 100644 index 0000000..e9a6991 --- /dev/null +++ b/fp_isprime.c @@ -0,0 +1,74 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a few primes */ +static const fp_digit primes[256] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +}; + +int fp_isprime(fp_int *a) +{ + fp_int b; + fp_digit d; + int r, res; + + /* do trial division */ + for (r = 0; r < 256; r++) { + fp_mod_d(a, primes[r], &d); + if (d == 0) { + return FP_NO; + } + } + + /* now do 8 miller rabins */ + for (r = 0; r < 8; r++) { + fp_set(&b, primes[r]); + fp_prime_miller_rabin(a, &b, &res); + if (res == FP_NO) { + return FP_NO; + } + } + return FP_YES; +} diff --git a/fp_lcm.c b/fp_lcm.c new file mode 100644 index 0000000..5182a58 --- /dev/null +++ b/fp_lcm.c @@ -0,0 +1,27 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = [a, b] */ +void fp_lcm(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int t1, t2; + + fp_init(&t1); + fp_init(&t2); + fp_gcd(a, b, &t1); + if (fp_cmp_mag(a, b) == FP_GT) { + fp_div(a, &t1, &t2, NULL); + fp_mul(b, &t2, c); + } else { + fp_div(b, &t1, &t2, NULL); + fp_mul(a, &t2, c); + } +} diff --git a/fp_lshd.c b/fp_lshd.c new file mode 100644 index 0000000..e569453 --- /dev/null +++ b/fp_lshd.c @@ -0,0 +1,34 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_lshd(fp_int *a, int x) +{ + int y; + + /* move up and truncate as required */ + y = MIN(a->used + x - 1, (int)(FP_SIZE-1)); + + /* store new size */ + a->used = y + 1; + + /* move digits */ + for (; y >= x; y--) { + a->dp[y] = a->dp[y-x]; + } + + /* zero lower digits */ + for (; y >= 0; y--) { + a->dp[y] = 0; + } + + /* clamp digits */ + fp_clamp(a); +} diff --git a/fp_mod.c b/fp_mod.c new file mode 100644 index 0000000..bec0593 --- /dev/null +++ b/fp_mod.c @@ -0,0 +1,18 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c) +{ + return fp_div(a, b, NULL, c); +} + + diff --git a/fp_mod_2d.c b/fp_mod_2d.c new file mode 100644 index 0000000..dc2103c --- /dev/null +++ b/fp_mod_2d.c @@ -0,0 +1,38 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod 2**d */ +void fp_mod_2d(fp_int *a, int b, fp_int *c) +{ + int x; + + /* zero if count less than or equal to zero */ + if (b <= 0) { + fp_zero(c); + return; + } + + /* get copy of input */ + fp_copy(a, c); + + /* if 2**d is larger than we just return */ + if (b >= (DIGIT_BIT * a->used)) { + return; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b); + fp_clamp (c); +} diff --git a/fp_mod_d.c b/fp_mod_d.c new file mode 100644 index 0000000..7ac1ac6 --- /dev/null +++ b/fp_mod_d.c @@ -0,0 +1,16 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c) +{ + return fp_div_d(a, b, NULL, c); +} diff --git a/fp_montgomery_calc_normalization.c b/fp_montgomery_calc_normalization.c new file mode 100644 index 0000000..3f3331d --- /dev/null +++ b/fp_montgomery_calc_normalization.c @@ -0,0 +1,38 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) +{ + int x, bits; + + /* how many bits of last digit does b use */ + bits = fp_count_bits (b) % DIGIT_BIT; + + /* compute A = B^(n-1) * 2^(bits-1) */ + if (b->used > 1) { + fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1); + } else { + fp_set(a, 1); + ++bits; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + fp_mul_2 (a, a); + if (fp_cmp_mag (a, b) != FP_LT) { + s_fp_sub (a, b, a); + } + } +} + diff --git a/fp_montgomery_reduce.c b/fp_montgomery_reduce.c new file mode 100644 index 0000000..2f4fbe7 --- /dev/null +++ b/fp_montgomery_reduce.c @@ -0,0 +1,249 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +#if defined(TFM_X86) + +/* x86-32 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movl %7,%%eax \n\t" \ +"mull %6 \n\t" \ +"addl %%eax,%0 \n\t" \ +"adcl %%edx,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%eax", "%edx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movq %7,%%rax \n\t" \ +"mulq %6 \n\t" \ +"addq %%rax,%0 \n\t" \ +"adcq %%rdx,%1 \n\t" \ +"adcq $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%rax", "%rdx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movq %1,%%rax \n\t" \ +"addq %%rax,%6 \n\t" \ +"movq %2,%%rax \n\t" \ +"adcq %%rax,%7 \n\t" \ +"adcq $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%rax", "%cc"); + +#elif defined(TFM_SSE2) + +/* SSE2 code */ + +#define MONT_START \ +asm("movd %0,%%mm2"::"g"(mp)); + +#define MONT_FINI \ +asm("emms"); + +#define LOOP_START \ +asm(\ +"movd %0,%%mm1 \n\t" \ +"pmuludq %%mm2,%%mm1 \n\t" \ +:: "g"(c[x]), "g"(mp)); + +#define INNERMUL \ +asm( \ +"movd %6,%%mm0 \n\t" \ +"pmuludq %%mm1,%%mm0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"psrlq $32, %%mm0 \n\t" \ +"addl %%eax,%0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"adcl %%eax,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(*tmpm++) \ + : "%eax", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_ARM) + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +/* NOTE: later write it using two regs instead of three for _c + ... */ +#define INNERMUL \ +asm( \ +"UMULL r0,r1,%0,%1 \n\t" \ +"LDR r2,[%2] \n\t" \ +"ADDS r2,r2,r0 \n\t" \ +"STR r2,[%2] \n\t" \ +"LDR r2,[%3] \n\t" \ +"ADCS r2,r2,r1 \n\t" \ +"STR r2,[%3] \n\t" \ +"LDR r2,[%4] \n\t" \ +"ADC r2,r2,#0 \n\t" \ +"STR r2,[%4] \n\t" \ +::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc"); + +#define PROPCARRY \ +asm( \ +"LDR r0,[%1] \n\t" \ +"LDR r1,[%0,#4] \n\t" \ +"ADDS r0,r0,r1 \n\t" \ +"STR r0,[%0,#4] \n\t" \ +"LDR r0,[%2] \n\t" \ +"LDR r1,[%1,#4] \n\t" \ +"ADCS r0,r0,r1 \n\t" \ +"STR r0,[%1,#4] \n\t" \ +"LDR r0,[%2,#4] \n\t" \ +"ADC r0,r0,#0 \n\t" \ +"STR r0,[%2,#4] \n\t" \ +::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc"); + +#else + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ + t = ((fp_word)mu) * ((fp_word)*tmpm++); \ + _c[OFF0] += t; if (_c[OFF0] < (fp_digit)t) ++_c[OFF1]; \ + _c[OFF1] += (t>>DIGIT_BIT); if (_c[OFF1] < (fp_digit)(t>>DIGIT_BIT)) ++_c[OFF2]; \ + +#define PROPCARRY \ + _c[OFF0+1] += _c[OFF1]; if (_c[OFF0+1] < _c[OFF1]) ++_c[OFF1+1]; \ + _c[OFF1+1] += _c[OFF2]; if (_c[OFF1+1] < _c[OFF2]) ++_c[OFF2+1]; + + +#endif + + +#define OFF0 (0) +#define OFF1 (FP_SIZE) +#define OFF2 (FP_SIZE+FP_SIZE) + +/* computes x/R == x (mod N) via Montgomery Reduction */ +void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) +{ + fp_digit c[3*FP_SIZE], *_c, *tmpm, mu; + int oldused, x, y, pa; + fp_word t; + + /* now zero the buff */ + pa = m->used; + memset(c, 0, sizeof(c)); + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + + MONT_START; + + /* now let's get bizz-sy! */ + for (x = 0; x < pa; x++) { + /* get Mu for this round */ + LOOP_START; + + /* our friendly neighbourhood alias */ + _c = c + x; + tmpm = m->dp; + + for (y = 0; y < pa; y++) { + INNERMUL; + ++_c; + } + /* send carry up man... */ + _c = c + x; + PROPCARRY; + } + + /* fix the rest of the carries */ + _c = c + pa; + for (; x < pa * 2 + 2; x++) { + PROPCARRY; + ++_c; + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } +} diff --git a/fp_montgomery_setup.c b/fp_montgomery_setup.c new file mode 100644 index 0000000..0d22b6c --- /dev/null +++ b/fp_montgomery_setup.c @@ -0,0 +1,44 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* setups the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *rho) +{ + fp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = a->dp[0]; + + if ((b & 1) == 0) { + return FP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#ifdef FP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + *rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x)); + + return FP_OKAY; +} + diff --git a/fp_mul.c b/fp_mul.c new file mode 100644 index 0000000..06459b5 --- /dev/null +++ b/fp_mul.c @@ -0,0 +1,134 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul(fp_int *A, fp_int *B, fp_int *C) +{ + int r, y, yy, s; + fp_int ac, bd, comp, amb, cmd, t1, t2; + + y = MAX(A->used, B->used); + yy = MIN(A->used, B->used); + if (yy <= 8 || y <= 64) { + + /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size + of the largest input. We also want to avoid doing excess mults if the + inputs are not close to the next power of two. That is, for example, + if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications + */ + if (y <= 4) { + fp_mul_comba4(A,B,C); + } else if (y <= 8) { + fp_mul_comba8(A,B,C); + } else if (y <= 16 && y >= 12) { + fp_mul_comba16(A,B,C); +#ifdef TFM_HUGE + } else if (y <= 32 && y >= 28) { + fp_mul_comba32(A,B,C); +#endif + } else { + fp_mul_comba(A,B,C); + } + } else { + /* do the karatsuba action + + if A = ab and B = cd for ||a|| = r we need to solve + + ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = yy >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + +// fp_copy(B, &t2); fp_rshd(&t2, r); + for (s = 0; s < B->used - r; s++) { + t2.dp[s] = B->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t2.dp[s] = 0; + } + if (B->used >= r) { + t2.used = B->used - r; + } else { + t2.used = 0; + } + t2.sign = B->sign; + + fp_copy(&t1, &amb); fp_copy(&t2, &cmd); + fp_zero(&ac); + fp_mul(&t1, &t2, &ac); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); +// fp_mod_2d(B, r * DIGIT_BIT, &t2); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + t2.dp[s] = B->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + t2.dp[s] = 0; + } + t1.used = r; + t2.used = r; + fp_clamp(&t1); + fp_clamp(&t2); + + fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd); + fp_zero(&bd); + fp_mul(&t1, &t2, &bd); + + /* now get the (a-b)(c-d) term */ + fp_zero(&comp); + fp_mul(&amb, &cmd, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &ac, &comp); + fp_add(&comp, &bd, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&ac, r+r); + + /* now sum them together */ + s = A->sign ^ B->sign; + fp_zero(C); + fp_add(&ac, &comp, C); + fp_add(&bd, C, C); + C->sign = C->used ? s : FP_ZPOS; + } +} + diff --git a/fp_mul_2.c b/fp_mul_2.c new file mode 100644 index 0000000..951f4fd --- /dev/null +++ b/fp_mul_2.c @@ -0,0 +1,63 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_mul_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r); + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0 && b->used != (FP_SIZE-1)) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; +} + diff --git a/fp_mul_2d.c b/fp_mul_2d.c new file mode 100644 index 0000000..3dc2b98 --- /dev/null +++ b/fp_mul_2d.c @@ -0,0 +1,43 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * 2**d */ +void fp_mul_2d(fp_int *a, int b, fp_int *c) +{ + fp_digit carry, carrytmp, shift; + int x; + + /* copy it */ + fp_copy(a, c); + + /* handle whole digits */ + if (b >= DIGIT_BIT) { + fp_lshd(c, b/DIGIT_BIT); + } + b %= DIGIT_BIT; + + /* shift the digits */ + if (b != 0) { + carry = 0; + shift = DIGIT_BIT - b; + for (x = 0; x < c->used; x++) { + carrytmp = c->dp[x] >> shift; + c->dp[x] = (c->dp[x] << b) + carry; + carry = carrytmp; + } + /* store last carry if room */ + if (carry && x < FP_SIZE) { + c->dp[c->used++] = carry; + } + } + fp_clamp(c); +} + diff --git a/fp_mul_comba.c b/fp_mul_comba.c new file mode 100644 index 0000000..a6146cf --- /dev/null +++ b/fp_mul_comba.c @@ -0,0 +1,772 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ + +/* About this file... + +*/ + +#include + +/* these are the combas. Worship them. */ +#if defined(TFM_X86) +/* Generic x86 optimized code */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%rax","%rdx","%cc"); + +#elif defined(TFM_SSE2) +/* use SSE2 optimizations */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI \ + asm("emms"); + +/* this should multiply i and j */ + #define MULADD(i, j) \ + asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); + +#elif defined(TFM_ARM) +/* ARM code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2, %2, #0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#else +/* ISO C code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +#endif + + +/* generic PxQ multiplier */ +void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) +{ + int ix, iy, iz, tx, ty, pa; + fp_digit c0, c1, c2, *tmpx, *tmpy; + fp_word t; + fp_int tmp, *dst; + + COMBA_START; + COMBA_CLEAR; + + /* get size of output and trim */ + pa = A->used + B->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + if (A == C || B == C) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(C); + dst = C; + } + + for (ix = 0; ix < pa; ix++) { + /* get offsets into the two bignums */ + ty = MIN(ix, B->used-1); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = B->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* execute loop */ + COMBA_FORWARD; + for (iz = 0; iz < iy; ++iz) { + MULADD(*tmpx++, *tmpy--); + } + + /* store term */ + COMBA_STORE(dst->dp[ix]); + } + /* store final carry */ + COMBA_STORE2(dst->dp[ix]); + COMBA_FINI; + + dst->used = pa; + fp_clamp(dst); + dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS; + fp_copy(dst, C); +} + +void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(fp_digit)); + memcpy(at+4, B->dp, 4 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + + +void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(fp_digit)); + memcpy(at+8, B->dp, 8 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + + +void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(fp_digit)); + memcpy(at+16, B->dp, 16 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#ifdef TFM_HUGE + +void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(fp_digit)); + memcpy(at+32, B->dp, 32 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#endif diff --git a/fp_mul_d.c b/fp_mul_d.c new file mode 100644 index 0000000..dcf43d4 --- /dev/null +++ b/fp_mul_d.c @@ -0,0 +1,36 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_word w; + int x, oldused; + + oldused = c->used; + c->used = a->used; + c->sign = a->sign; + w = 0; + for (x = 0; x < a->used; x++) { + w = ((fp_word)a->dp[x]) * ((fp_word)b) + w; + c->dp[x] = (fp_digit)w; + w = w >> DIGIT_BIT; + } + if (w != 0 && (a->used != FP_SIZE)) { + c->dp[c->used++] = w; + ++x; + } + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + diff --git a/fp_mulmod.c b/fp_mulmod.c new file mode 100644 index 0000000..c9d008a --- /dev/null +++ b/fp_mulmod.c @@ -0,0 +1,18 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_mul(a, b, &tmp); + return fp_mod(&tmp, c, d); +} diff --git a/fp_prime_miller_rabin.c b/fp_prime_miller_rabin.c new file mode 100644 index 0000000..92c6dd2 --- /dev/null +++ b/fp_prime_miller_rabin.c @@ -0,0 +1,73 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result) +{ + fp_int n1, y, r; + int s, j; + + /* default */ + *result = FP_NO; + + /* ensure b > 1 */ + if (fp_cmp_d(b, 1) != FP_GT) { + return; + } + + /* get n1 = a - 1 */ + fp_init_copy(&n1, a); + fp_sub_d(&n1, 1, &n1); + + /* set 2**s * r = n1 */ + fp_init_copy(&r, &n1); + + /* count the number of least significant bits + * which are zero + */ + s = fp_cnt_lsb(&r); + + /* now divide n - 1 by 2**s */ + fp_div_2d (&r, s, &r, NULL); + + /* compute y = b**r mod a */ + fp_init(&y); + fp_exptmod(b, &r, a, &y); + + /* if y != 1 and y != n1 do */ + if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) { + fp_sqrmod (&y, a, &y); + + /* if y == 1 then composite */ + if (fp_cmp_d (&y, 1) == FP_EQ) { + return; + } + ++j; + } + + /* if y != n1 then composite */ + if (fp_cmp (&y, &n1) != FP_EQ) { + return; + } + } + + /* probably prime now */ + *result = FP_YES; +} diff --git a/fp_prime_random_ex.c b/fp_prime_random_ex.c new file mode 100644 index 0000000..45e494d --- /dev/null +++ b/fp_prime_random_ex.c @@ -0,0 +1,97 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat) +{ + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; + + /* sanity check the input */ + if (size <= 1 || t <= 0) { + return FP_VAL; + } + + /* TFM_PRIME_SAFE implies TFM_PRIME_BBS */ + if (flags & TFM_PRIME_SAFE) { + flags |= TFM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3)+(size&7?1:0); + + /* we need a buffer of bsize bytes */ + tmp = malloc(bsize); + if (tmp == NULL) { + return FP_MEM; + } + + /* calc the maskAND value for the MSbyte*/ + maskAND = 0xFF >> (8 - (size & 7)); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = (size - 2) >> 3; + if (flags & TFM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & TFM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 1; + if (flags & TFM_PRIME_BBS) { + maskOR_lsb |= 3; + } + + do { + /* read the bytes */ + if (cb(tmp, bsize, dat) != bsize) { + err = FP_VAL; + goto error; + } + + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; + + /* read it in */ + fp_read_unsigned_bin(a, tmp, bsize); + + /* is it prime? */ + res = fp_isprime(a); + if (res == FP_NO) continue; + + if (flags & TFM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + fp_sub_d(a, 1, a); + fp_div_2(a, a); + + /* is it prime? */ + res = fp_isprime(a); + } + } while (res == FP_NO); + + if (flags & TFM_PRIME_SAFE) { + /* restore a to the original value */ + fp_mul_2(a, a); + fp_add_d(a, 1, a); + } + + err = FP_OKAY; +error: + free(tmp); + return err; +} diff --git a/fp_radix_size.c b/fp_radix_size.c new file mode 100644 index 0000000..c163fec --- /dev/null +++ b/fp_radix_size.c @@ -0,0 +1,14 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_radix_size(fp_int *a, int radix, int *size) +{ +} diff --git a/fp_read_radix.c b/fp_read_radix.c new file mode 100644 index 0000000..bcfa068 --- /dev/null +++ b/fp_read_radix.c @@ -0,0 +1,66 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_read_radix(fp_int *a, char *str, int radix) +{ + int y, neg; + char ch; + + /* make sure the radix is ok */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = FP_NEG; + } else { + neg = FP_ZPOS; + } + + /* set the integer to the default of zero */ + fp_zero (a); + + /* process each digit of the string */ + while (*str) { + /* if the radix < 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (char) ((radix < 36) ? toupper (*str) : *str); + for (y = 0; y < 64; y++) { + if (ch == fp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + fp_mul_d (a, (fp_digit) radix, a); + fp_add_d (a, (fp_digit) y, a); + } else { + break; + } + ++str; + } + + /* set the sign only if a != 0 */ + if (fp_iszero(a) != FP_YES) { + a->sign = neg; + } + return FP_OKAY; +} diff --git a/fp_read_signed_bin.c b/fp_read_signed_bin.c new file mode 100644 index 0000000..6721cf3 --- /dev/null +++ b/fp_read_signed_bin.c @@ -0,0 +1,23 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_signed_bin(fp_int *a, unsigned char *b, int c) +{ + /* read magnitude */ + fp_read_unsigned_bin (a, b + 1, c - 1); + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = FP_ZPOS; + } else { + a->sign = FP_NEG; + } +} diff --git a/fp_read_unsigned_bin.c b/fp_read_unsigned_bin.c new file mode 100644 index 0000000..5cab45e --- /dev/null +++ b/fp_read_unsigned_bin.c @@ -0,0 +1,24 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c) +{ + /* zero the int */ + fp_zero (a); + + /* read the bytes in */ + for (; c > 0; c--) { + fp_mul_2d (a, 8, a); + a->dp[0] |= *b++; + a->used += 1; + } + fp_clamp (a); +} diff --git a/fp_reverse.c b/fp_reverse.c new file mode 100644 index 0000000..de5ea41 --- /dev/null +++ b/fp_reverse.c @@ -0,0 +1,27 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* reverse an array, used for radix code */ +void bn_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} diff --git a/fp_rshd.c b/fp_rshd.c new file mode 100644 index 0000000..225d1cd --- /dev/null +++ b/fp_rshd.c @@ -0,0 +1,36 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_rshd(fp_int *a, int x) +{ + int y; + + /* too many digits just zero and return */ + if (x >= a->used) { + fp_zero(a); + return; + } + + /* shift */ + for (y = 0; y < a->used - x; y++) { + a->dp[y] = a->dp[y+x]; + } + + /* zero rest */ + for (; y < a->used; y++) { + a->dp[y] = 0; + } + + /* decrement count */ + a->used -= x; + fp_clamp(a); +} + diff --git a/fp_s_rmap.c b/fp_s_rmap.c new file mode 100644 index 0000000..53ac5b3 --- /dev/null +++ b/fp_s_rmap.c @@ -0,0 +1,13 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* chars used in radix conversions */ +const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; diff --git a/fp_set.c b/fp_set.c new file mode 100644 index 0000000..01428b5 --- /dev/null +++ b/fp_set.c @@ -0,0 +1,17 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_set(fp_int *a, fp_digit b) +{ + fp_zero(a); + a->dp[0] = b; + a->used = b ? 1 : 0; +} diff --git a/fp_signed_bin_size.c b/fp_signed_bin_size.c new file mode 100644 index 0000000..016163f --- /dev/null +++ b/fp_signed_bin_size.c @@ -0,0 +1,15 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_signed_bin_size(fp_int *a) +{ + return 1 + fp_unsigned_bin_size (a); +} diff --git a/fp_sqr.c b/fp_sqr.c new file mode 100644 index 0000000..663c444 --- /dev/null +++ b/fp_sqr.c @@ -0,0 +1,107 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a*a */ +void fp_sqr(fp_int *A, fp_int *B) +{ + int r, y, s; + fp_int aa, bb, comp, amb, t1; + + y = A->used; + if (y <= 48) { + if (y <= 4) { + fp_sqr_comba4(A,B); + } else if (y <= 8) { + fp_sqr_comba8(A,B); + } else if (y <= 16 && y >= 12) { + fp_sqr_comba16(A,B); +#ifdef TFM_HUGE + } else if (y <= 32 && y >= 28) { + fp_sqr_comba32(A,B); +#endif + } else { + fp_sqr_comba(A, B); + } + + } else { + /* do the karatsuba action + + if A = ab ||a|| = r we need to solve + + a^2*r^2 + (-(a-b)^2 + a^2 + b^2)*r + b^2 + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = y >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + fp_copy(&t1, &amb); + fp_zero(&aa); + fp_sqr(&t1, &aa); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + t1.used = r; + fp_clamp(&t1); + + fp_sub(&amb, &t1, &amb); + fp_zero(&bb); + fp_sqr(&t1, &bb); + + /* now get the (a-b) term */ + fp_zero(&comp); + fp_sqr(&amb, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &aa, &comp); + fp_add(&comp, &bb, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&aa, r+r); + + /* now sum them together */ + fp_zero(B); + fp_add(&aa, &comp, B); + fp_add(&bb, B, B); + B->sign = FP_ZPOS; + } +} + diff --git a/fp_sqr_comba.c b/fp_sqr_comba.c new file mode 100644 index 0000000..84de74f --- /dev/null +++ b/fp_sqr_comba.c @@ -0,0 +1,956 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* About this file... +*/ + +#if defined(TFM_X86) + +/* x86-32 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %%eax \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%rax","%rdx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%rax","%rdx","%cc"); + + +#elif defined(TFM_SSE2) + +/* SSE2 Optimized */ +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI \ + asm("emms"); + +#define SQRADD(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "pmuludq %%mm0,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_ARM) + +/* ARM code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%6 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#else + +/* ISO C portable code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +#endif + +/* generic comba squarer */ +void fp_sqr_comba(fp_int *A, fp_int *B) +{ + int pa, ix, iz; + fp_digit c0, c1, c2; + fp_int tmp, *dst; + fp_word t; + + /* get size of output and trim */ + pa = A->used + A->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* number of output digits to produce */ + COMBA_START; + CLEAR_CARRY; + + if (A == B) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(B); + dst = B; + } + + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + fp_digit *tmpy, *tmpx; + + /* get offsets into the two bignums */ + ty = MIN(A->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = A->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* forward carries */ + CARRY_FORWARD; + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + SQRADD2(*tmpx++, *tmpy--); + } + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + SQRADD(A->dp[ix>>1], A->dp[ix>>1]); + } + + /* store it */ + COMBA_STORE(dst->dp[ix]); + } + COMBA_STORE2(dst->dp[ix]); + + COMBA_FINI; + + /* setup dest */ + dst->used = pa; + fp_clamp (dst); + if (dst != B) { + fp_copy(dst, B); + } +} + +void fp_sqr_comba4(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[8], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 8 * sizeof(fp_digit)); + fp_clamp(B); +} + + +void fp_sqr_comba8(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[16], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 16 * sizeof(fp_digit)); + fp_clamp(B); +} + + +void fp_sqr_comba16(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[32], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[0], a[8]); SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[0], a[9]); SQRADD2(a[1], a[8]); SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[0], a[10]); SQRADD2(a[1], a[9]); SQRADD2(a[2], a[8]); SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[0], a[11]); SQRADD2(a[1], a[10]); SQRADD2(a[2], a[9]); SQRADD2(a[3], a[8]); SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[0], a[12]); SQRADD2(a[1], a[11]); SQRADD2(a[2], a[10]); SQRADD2(a[3], a[9]); SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[0], a[13]); SQRADD2(a[1], a[12]); SQRADD2(a[2], a[11]); SQRADD2(a[3], a[10]); SQRADD2(a[4], a[9]); SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[0], a[14]); SQRADD2(a[1], a[13]); SQRADD2(a[2], a[12]); SQRADD2(a[3], a[11]); SQRADD2(a[4], a[10]); SQRADD2(a[5], a[9]); SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[0], a[15]); SQRADD2(a[1], a[14]); SQRADD2(a[2], a[13]); SQRADD2(a[3], a[12]); SQRADD2(a[4], a[11]); SQRADD2(a[5], a[10]); SQRADD2(a[6], a[9]); SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[1], a[15]); SQRADD2(a[2], a[14]); SQRADD2(a[3], a[13]); SQRADD2(a[4], a[12]); SQRADD2(a[5], a[11]); SQRADD2(a[6], a[10]); SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[2], a[15]); SQRADD2(a[3], a[14]); SQRADD2(a[4], a[13]); SQRADD2(a[5], a[12]); SQRADD2(a[6], a[11]); SQRADD2(a[7], a[10]); SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[3], a[15]); SQRADD2(a[4], a[14]); SQRADD2(a[5], a[13]); SQRADD2(a[6], a[12]); SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[4], a[15]); SQRADD2(a[5], a[14]); SQRADD2(a[6], a[13]); SQRADD2(a[7], a[12]); SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[5], a[15]); SQRADD2(a[6], a[14]); SQRADD2(a[7], a[13]); SQRADD2(a[8], a[12]); SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[6], a[15]); SQRADD2(a[7], a[14]); SQRADD2(a[8], a[13]); SQRADD2(a[9], a[12]); SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[7], a[15]); SQRADD2(a[8], a[14]); SQRADD2(a[9], a[13]); SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[8], a[15]); SQRADD2(a[9], a[14]); SQRADD2(a[10], a[13]); SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[9], a[15]); SQRADD2(a[10], a[14]); SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[10], a[15]); SQRADD2(a[11], a[14]); SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 32 * sizeof(fp_digit)); + fp_clamp(B); +} + +#ifdef TFM_HUGE + +void fp_sqr_comba32(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[64], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[0], a[8]); SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[0], a[9]); SQRADD2(a[1], a[8]); SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[0], a[10]); SQRADD2(a[1], a[9]); SQRADD2(a[2], a[8]); SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[0], a[11]); SQRADD2(a[1], a[10]); SQRADD2(a[2], a[9]); SQRADD2(a[3], a[8]); SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[0], a[12]); SQRADD2(a[1], a[11]); SQRADD2(a[2], a[10]); SQRADD2(a[3], a[9]); SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[0], a[13]); SQRADD2(a[1], a[12]); SQRADD2(a[2], a[11]); SQRADD2(a[3], a[10]); SQRADD2(a[4], a[9]); SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[0], a[14]); SQRADD2(a[1], a[13]); SQRADD2(a[2], a[12]); SQRADD2(a[3], a[11]); SQRADD2(a[4], a[10]); SQRADD2(a[5], a[9]); SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[0], a[15]); SQRADD2(a[1], a[14]); SQRADD2(a[2], a[13]); SQRADD2(a[3], a[12]); SQRADD2(a[4], a[11]); SQRADD2(a[5], a[10]); SQRADD2(a[6], a[9]); SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[0], a[16]); SQRADD2(a[1], a[15]); SQRADD2(a[2], a[14]); SQRADD2(a[3], a[13]); SQRADD2(a[4], a[12]); SQRADD2(a[5], a[11]); SQRADD2(a[6], a[10]); SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[0], a[17]); SQRADD2(a[1], a[16]); SQRADD2(a[2], a[15]); SQRADD2(a[3], a[14]); SQRADD2(a[4], a[13]); SQRADD2(a[5], a[12]); SQRADD2(a[6], a[11]); SQRADD2(a[7], a[10]); SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[0], a[18]); SQRADD2(a[1], a[17]); SQRADD2(a[2], a[16]); SQRADD2(a[3], a[15]); SQRADD2(a[4], a[14]); SQRADD2(a[5], a[13]); SQRADD2(a[6], a[12]); SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[0], a[19]); SQRADD2(a[1], a[18]); SQRADD2(a[2], a[17]); SQRADD2(a[3], a[16]); SQRADD2(a[4], a[15]); SQRADD2(a[5], a[14]); SQRADD2(a[6], a[13]); SQRADD2(a[7], a[12]); SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[0], a[20]); SQRADD2(a[1], a[19]); SQRADD2(a[2], a[18]); SQRADD2(a[3], a[17]); SQRADD2(a[4], a[16]); SQRADD2(a[5], a[15]); SQRADD2(a[6], a[14]); SQRADD2(a[7], a[13]); SQRADD2(a[8], a[12]); SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[0], a[21]); SQRADD2(a[1], a[20]); SQRADD2(a[2], a[19]); SQRADD2(a[3], a[18]); SQRADD2(a[4], a[17]); SQRADD2(a[5], a[16]); SQRADD2(a[6], a[15]); SQRADD2(a[7], a[14]); SQRADD2(a[8], a[13]); SQRADD2(a[9], a[12]); SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[0], a[22]); SQRADD2(a[1], a[21]); SQRADD2(a[2], a[20]); SQRADD2(a[3], a[19]); SQRADD2(a[4], a[18]); SQRADD2(a[5], a[17]); SQRADD2(a[6], a[16]); SQRADD2(a[7], a[15]); SQRADD2(a[8], a[14]); SQRADD2(a[9], a[13]); SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[0], a[23]); SQRADD2(a[1], a[22]); SQRADD2(a[2], a[21]); SQRADD2(a[3], a[20]); SQRADD2(a[4], a[19]); SQRADD2(a[5], a[18]); SQRADD2(a[6], a[17]); SQRADD2(a[7], a[16]); SQRADD2(a[8], a[15]); SQRADD2(a[9], a[14]); SQRADD2(a[10], a[13]); SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[0], a[24]); SQRADD2(a[1], a[23]); SQRADD2(a[2], a[22]); SQRADD2(a[3], a[21]); SQRADD2(a[4], a[20]); SQRADD2(a[5], a[19]); SQRADD2(a[6], a[18]); SQRADD2(a[7], a[17]); SQRADD2(a[8], a[16]); SQRADD2(a[9], a[15]); SQRADD2(a[10], a[14]); SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[0], a[25]); SQRADD2(a[1], a[24]); SQRADD2(a[2], a[23]); SQRADD2(a[3], a[22]); SQRADD2(a[4], a[21]); SQRADD2(a[5], a[20]); SQRADD2(a[6], a[19]); SQRADD2(a[7], a[18]); SQRADD2(a[8], a[17]); SQRADD2(a[9], a[16]); SQRADD2(a[10], a[15]); SQRADD2(a[11], a[14]); SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[0], a[26]); SQRADD2(a[1], a[25]); SQRADD2(a[2], a[24]); SQRADD2(a[3], a[23]); SQRADD2(a[4], a[22]); SQRADD2(a[5], a[21]); SQRADD2(a[6], a[20]); SQRADD2(a[7], a[19]); SQRADD2(a[8], a[18]); SQRADD2(a[9], a[17]); SQRADD2(a[10], a[16]); SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[0], a[27]); SQRADD2(a[1], a[26]); SQRADD2(a[2], a[25]); SQRADD2(a[3], a[24]); SQRADD2(a[4], a[23]); SQRADD2(a[5], a[22]); SQRADD2(a[6], a[21]); SQRADD2(a[7], a[20]); SQRADD2(a[8], a[19]); SQRADD2(a[9], a[18]); SQRADD2(a[10], a[17]); SQRADD2(a[11], a[16]); SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[0], a[28]); SQRADD2(a[1], a[27]); SQRADD2(a[2], a[26]); SQRADD2(a[3], a[25]); SQRADD2(a[4], a[24]); SQRADD2(a[5], a[23]); SQRADD2(a[6], a[22]); SQRADD2(a[7], a[21]); SQRADD2(a[8], a[20]); SQRADD2(a[9], a[19]); SQRADD2(a[10], a[18]); SQRADD2(a[11], a[17]); SQRADD2(a[12], a[16]); SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[0], a[29]); SQRADD2(a[1], a[28]); SQRADD2(a[2], a[27]); SQRADD2(a[3], a[26]); SQRADD2(a[4], a[25]); SQRADD2(a[5], a[24]); SQRADD2(a[6], a[23]); SQRADD2(a[7], a[22]); SQRADD2(a[8], a[21]); SQRADD2(a[9], a[20]); SQRADD2(a[10], a[19]); SQRADD2(a[11], a[18]); SQRADD2(a[12], a[17]); SQRADD2(a[13], a[16]); SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD2(a[0], a[30]); SQRADD2(a[1], a[29]); SQRADD2(a[2], a[28]); SQRADD2(a[3], a[27]); SQRADD2(a[4], a[26]); SQRADD2(a[5], a[25]); SQRADD2(a[6], a[24]); SQRADD2(a[7], a[23]); SQRADD2(a[8], a[22]); SQRADD2(a[9], a[21]); SQRADD2(a[10], a[20]); SQRADD2(a[11], a[19]); SQRADD2(a[12], a[18]); SQRADD2(a[13], a[17]); SQRADD2(a[14], a[16]); SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADD2(a[0], a[31]); SQRADD2(a[1], a[30]); SQRADD2(a[2], a[29]); SQRADD2(a[3], a[28]); SQRADD2(a[4], a[27]); SQRADD2(a[5], a[26]); SQRADD2(a[6], a[25]); SQRADD2(a[7], a[24]); SQRADD2(a[8], a[23]); SQRADD2(a[9], a[22]); SQRADD2(a[10], a[21]); SQRADD2(a[11], a[20]); SQRADD2(a[12], a[19]); SQRADD2(a[13], a[18]); SQRADD2(a[14], a[17]); SQRADD2(a[15], a[16]); + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADD2(a[1], a[31]); SQRADD2(a[2], a[30]); SQRADD2(a[3], a[29]); SQRADD2(a[4], a[28]); SQRADD2(a[5], a[27]); SQRADD2(a[6], a[26]); SQRADD2(a[7], a[25]); SQRADD2(a[8], a[24]); SQRADD2(a[9], a[23]); SQRADD2(a[10], a[22]); SQRADD2(a[11], a[21]); SQRADD2(a[12], a[20]); SQRADD2(a[13], a[19]); SQRADD2(a[14], a[18]); SQRADD2(a[15], a[17]); SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADD2(a[2], a[31]); SQRADD2(a[3], a[30]); SQRADD2(a[4], a[29]); SQRADD2(a[5], a[28]); SQRADD2(a[6], a[27]); SQRADD2(a[7], a[26]); SQRADD2(a[8], a[25]); SQRADD2(a[9], a[24]); SQRADD2(a[10], a[23]); SQRADD2(a[11], a[22]); SQRADD2(a[12], a[21]); SQRADD2(a[13], a[20]); SQRADD2(a[14], a[19]); SQRADD2(a[15], a[18]); SQRADD2(a[16], a[17]); + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADD2(a[3], a[31]); SQRADD2(a[4], a[30]); SQRADD2(a[5], a[29]); SQRADD2(a[6], a[28]); SQRADD2(a[7], a[27]); SQRADD2(a[8], a[26]); SQRADD2(a[9], a[25]); SQRADD2(a[10], a[24]); SQRADD2(a[11], a[23]); SQRADD2(a[12], a[22]); SQRADD2(a[13], a[21]); SQRADD2(a[14], a[20]); SQRADD2(a[15], a[19]); SQRADD2(a[16], a[18]); SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADD2(a[4], a[31]); SQRADD2(a[5], a[30]); SQRADD2(a[6], a[29]); SQRADD2(a[7], a[28]); SQRADD2(a[8], a[27]); SQRADD2(a[9], a[26]); SQRADD2(a[10], a[25]); SQRADD2(a[11], a[24]); SQRADD2(a[12], a[23]); SQRADD2(a[13], a[22]); SQRADD2(a[14], a[21]); SQRADD2(a[15], a[20]); SQRADD2(a[16], a[19]); SQRADD2(a[17], a[18]); + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADD2(a[5], a[31]); SQRADD2(a[6], a[30]); SQRADD2(a[7], a[29]); SQRADD2(a[8], a[28]); SQRADD2(a[9], a[27]); SQRADD2(a[10], a[26]); SQRADD2(a[11], a[25]); SQRADD2(a[12], a[24]); SQRADD2(a[13], a[23]); SQRADD2(a[14], a[22]); SQRADD2(a[15], a[21]); SQRADD2(a[16], a[20]); SQRADD2(a[17], a[19]); SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADD2(a[6], a[31]); SQRADD2(a[7], a[30]); SQRADD2(a[8], a[29]); SQRADD2(a[9], a[28]); SQRADD2(a[10], a[27]); SQRADD2(a[11], a[26]); SQRADD2(a[12], a[25]); SQRADD2(a[13], a[24]); SQRADD2(a[14], a[23]); SQRADD2(a[15], a[22]); SQRADD2(a[16], a[21]); SQRADD2(a[17], a[20]); SQRADD2(a[18], a[19]); + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADD2(a[7], a[31]); SQRADD2(a[8], a[30]); SQRADD2(a[9], a[29]); SQRADD2(a[10], a[28]); SQRADD2(a[11], a[27]); SQRADD2(a[12], a[26]); SQRADD2(a[13], a[25]); SQRADD2(a[14], a[24]); SQRADD2(a[15], a[23]); SQRADD2(a[16], a[22]); SQRADD2(a[17], a[21]); SQRADD2(a[18], a[20]); SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADD2(a[8], a[31]); SQRADD2(a[9], a[30]); SQRADD2(a[10], a[29]); SQRADD2(a[11], a[28]); SQRADD2(a[12], a[27]); SQRADD2(a[13], a[26]); SQRADD2(a[14], a[25]); SQRADD2(a[15], a[24]); SQRADD2(a[16], a[23]); SQRADD2(a[17], a[22]); SQRADD2(a[18], a[21]); SQRADD2(a[19], a[20]); + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADD2(a[9], a[31]); SQRADD2(a[10], a[30]); SQRADD2(a[11], a[29]); SQRADD2(a[12], a[28]); SQRADD2(a[13], a[27]); SQRADD2(a[14], a[26]); SQRADD2(a[15], a[25]); SQRADD2(a[16], a[24]); SQRADD2(a[17], a[23]); SQRADD2(a[18], a[22]); SQRADD2(a[19], a[21]); SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADD2(a[10], a[31]); SQRADD2(a[11], a[30]); SQRADD2(a[12], a[29]); SQRADD2(a[13], a[28]); SQRADD2(a[14], a[27]); SQRADD2(a[15], a[26]); SQRADD2(a[16], a[25]); SQRADD2(a[17], a[24]); SQRADD2(a[18], a[23]); SQRADD2(a[19], a[22]); SQRADD2(a[20], a[21]); + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADD2(a[11], a[31]); SQRADD2(a[12], a[30]); SQRADD2(a[13], a[29]); SQRADD2(a[14], a[28]); SQRADD2(a[15], a[27]); SQRADD2(a[16], a[26]); SQRADD2(a[17], a[25]); SQRADD2(a[18], a[24]); SQRADD2(a[19], a[23]); SQRADD2(a[20], a[22]); SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADD2(a[12], a[31]); SQRADD2(a[13], a[30]); SQRADD2(a[14], a[29]); SQRADD2(a[15], a[28]); SQRADD2(a[16], a[27]); SQRADD2(a[17], a[26]); SQRADD2(a[18], a[25]); SQRADD2(a[19], a[24]); SQRADD2(a[20], a[23]); SQRADD2(a[21], a[22]); + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADD2(a[13], a[31]); SQRADD2(a[14], a[30]); SQRADD2(a[15], a[29]); SQRADD2(a[16], a[28]); SQRADD2(a[17], a[27]); SQRADD2(a[18], a[26]); SQRADD2(a[19], a[25]); SQRADD2(a[20], a[24]); SQRADD2(a[21], a[23]); SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADD2(a[14], a[31]); SQRADD2(a[15], a[30]); SQRADD2(a[16], a[29]); SQRADD2(a[17], a[28]); SQRADD2(a[18], a[27]); SQRADD2(a[19], a[26]); SQRADD2(a[20], a[25]); SQRADD2(a[21], a[24]); SQRADD2(a[22], a[23]); + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADD2(a[15], a[31]); SQRADD2(a[16], a[30]); SQRADD2(a[17], a[29]); SQRADD2(a[18], a[28]); SQRADD2(a[19], a[27]); SQRADD2(a[20], a[26]); SQRADD2(a[21], a[25]); SQRADD2(a[22], a[24]); SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADD2(a[16], a[31]); SQRADD2(a[17], a[30]); SQRADD2(a[18], a[29]); SQRADD2(a[19], a[28]); SQRADD2(a[20], a[27]); SQRADD2(a[21], a[26]); SQRADD2(a[22], a[25]); SQRADD2(a[23], a[24]); + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADD2(a[17], a[31]); SQRADD2(a[18], a[30]); SQRADD2(a[19], a[29]); SQRADD2(a[20], a[28]); SQRADD2(a[21], a[27]); SQRADD2(a[22], a[26]); SQRADD2(a[23], a[25]); SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADD2(a[18], a[31]); SQRADD2(a[19], a[30]); SQRADD2(a[20], a[29]); SQRADD2(a[21], a[28]); SQRADD2(a[22], a[27]); SQRADD2(a[23], a[26]); SQRADD2(a[24], a[25]); + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADD2(a[19], a[31]); SQRADD2(a[20], a[30]); SQRADD2(a[21], a[29]); SQRADD2(a[22], a[28]); SQRADD2(a[23], a[27]); SQRADD2(a[24], a[26]); SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADD2(a[20], a[31]); SQRADD2(a[21], a[30]); SQRADD2(a[22], a[29]); SQRADD2(a[23], a[28]); SQRADD2(a[24], a[27]); SQRADD2(a[25], a[26]); + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADD2(a[21], a[31]); SQRADD2(a[22], a[30]); SQRADD2(a[23], a[29]); SQRADD2(a[24], a[28]); SQRADD2(a[25], a[27]); SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADD2(a[22], a[31]); SQRADD2(a[23], a[30]); SQRADD2(a[24], a[29]); SQRADD2(a[25], a[28]); SQRADD2(a[26], a[27]); + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADD2(a[23], a[31]); SQRADD2(a[24], a[30]); SQRADD2(a[25], a[29]); SQRADD2(a[26], a[28]); SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADD2(a[24], a[31]); SQRADD2(a[25], a[30]); SQRADD2(a[26], a[29]); SQRADD2(a[27], a[28]); + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADD2(a[25], a[31]); SQRADD2(a[26], a[30]); SQRADD2(a[27], a[29]); SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADD2(a[26], a[31]); SQRADD2(a[27], a[30]); SQRADD2(a[28], a[29]); + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 64 * sizeof(fp_digit)); + fp_clamp(B); +} + +#endif + diff --git a/fp_sqrmod.c b/fp_sqrmod.c new file mode 100644 index 0000000..a32c171 --- /dev/null +++ b/fp_sqrmod.c @@ -0,0 +1,19 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sqr(a, &tmp); + return fp_mod(&tmp, b, c); +} diff --git a/fp_sub.c b/fp_sub.c new file mode 100644 index 0000000..c6a99d0 --- /dev/null +++ b/fp_sub.c @@ -0,0 +1,46 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (fp_cmp_mag (a, b) != FP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + s_fp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS; + /* The second has a larger magnitude */ + s_fp_sub (b, a, c); + } + } +} + diff --git a/fp_sub_d.c b/fp_sub_d.c new file mode 100644 index 0000000..890bb2b --- /dev/null +++ b/fp_sub_d.c @@ -0,0 +1,18 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_sub(a, &tmp, c); +} diff --git a/fp_submod.c b/fp_submod.c new file mode 100644 index 0000000..ecfa4b7 --- /dev/null +++ b/fp_submod.c @@ -0,0 +1,20 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sub(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + diff --git a/fp_to_signed_bin.c b/fp_to_signed_bin.c new file mode 100644 index 0000000..2a48483 --- /dev/null +++ b/fp_to_signed_bin.c @@ -0,0 +1,16 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_signed_bin(fp_int *a, unsigned char *b) +{ + fp_to_unsigned_bin (a, b + 1); + b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1); +} diff --git a/fp_to_unsigned_bin.c b/fp_to_unsigned_bin.c new file mode 100644 index 0000000..40c15f9 --- /dev/null +++ b/fp_to_unsigned_bin.c @@ -0,0 +1,25 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_unsigned_bin(fp_int *a, unsigned char *b) +{ + int x; + fp_int t; + + fp_init_copy(&t, a); + + x = 0; + while (fp_iszero (&t) == FP_NO) { + b[x++] = (unsigned char) (t.dp[0] & 255); + fp_div_2d (&t, 8, &t, NULL); + } + bn_reverse (b, x); +} diff --git a/fp_toradix.c b/fp_toradix.c new file mode 100644 index 0000000..1a7c516 --- /dev/null +++ b/fp_toradix.c @@ -0,0 +1,55 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_toradix(fp_int *a, char *str, int radix) +{ + int digs; + fp_int t; + fp_digit d; + char *_s = str; + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* quick out if its zero */ + if (fp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return FP_OKAY; + } + + fp_init_copy(&t, a); + + /* if it is negative output a - */ + if (t.sign == FP_NEG) { + ++_s; + *str++ = '-'; + t.sign = FP_ZPOS; + } + + digs = 0; + while (fp_iszero (&t) == FP_NO) { + fp_div_d (&t, (fp_digit) radix, &t, &d); + *str++ = fp_s_rmap[d]; + ++digs; + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + return FP_OKAY; +} diff --git a/fp_unsigned_bin_size.c b/fp_unsigned_bin_size.c new file mode 100644 index 0000000..6ec52ee --- /dev/null +++ b/fp_unsigned_bin_size.c @@ -0,0 +1,16 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_unsigned_bin_size(fp_int *a) +{ + int size = fp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} diff --git a/gen.pl b/gen.pl new file mode 100644 index 0000000..127ec6f --- /dev/null +++ b/gen.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl -w +# +# Generates a "single file" you can use to quickly +# add the whole source without any makefile troubles +# +use strict; + +open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!"; +foreach my $filename (glob "fp_*.c") { + open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!"; + print OUT "/* Start: $filename */\n"; + print OUT while ; + print OUT "\n/* End: $filename */\n\n"; + close SRC or die "Error closing $filename after reading: $!"; +} +print OUT "\n/* EOF */\n"; +close OUT or die "Error closing mpi.c after writing: $!"; \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..967fa57 --- /dev/null +++ b/makefile @@ -0,0 +1,78 @@ +#makefile for TomsFastMath +# +# +CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops + +#profiling +#PROF=-pg -g +#CFLAGS += $(PROF) + +#speed +CFLAGS += -fomit-frame-pointer + +VERSION=0.01 + +default: libtfm.a + +OBJECTS = \ +fp_set.o \ +\ +fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \ +fp_mul_2.o fp_div_2.o \ +\ +fp_cnt_lsb.o \ +\ +fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \ +s_fp_add.o s_fp_sub.o \ +\ +fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \ +fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \ +fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \ +fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \ +\ +fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \ +\ +fp_exptmod.o \ +\ +fp_cmp.o fp_cmp_mag.o \ +\ +fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \ +fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \ +fp_read_radix.o fp_toradix.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \ +\ + +libtfm.a: $(OBJECTS) + $(AR) $(ARFLAGS) libtfm.a $(OBJECTS) + ranlib libtfm.a + +mtest/mtest: mtest/mtest.c + cd mtest ; make mtest + +test: libtfm.a demo/test.o mtest/mtest + $(CC) demo/test.o libtfm.a $(PROF) -o test + +stest: libtfm.a demo/stest.o + $(CC) demo/stest.o libtfm.a -o stest + +docdvi: tfm.tex + touch tfm.ind + latex tfm >/dev/null + latex tfm >/dev/null + makeindex tfm + latex tfm >/dev/null + +docs: docdvi + latex tfm >/dev/null + dvipdf tfm + mv -f tfm.pdf doc + +clean: + rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest + cd mtest ; make clean + +zipup: docs clean + perl gen.pl ; mv mpi.c pre_gen/ ; \ + cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \ + cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \ + tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \ + zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* diff --git a/makefile.gba b/makefile.gba new file mode 100644 index 0000000..89e3451 --- /dev/null +++ b/makefile.gba @@ -0,0 +1,55 @@ +#makefile for TomsFastMath +# +#For the GameboyAdance... er.... ARMv4 +SFLAGS = $(CFLAGS) -Wall -W -Wshadow -I./ -O3 -funroll-all-loops -mthumb -mthumb-interwork -I../devkitadv/mylib/lib +CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops -marm -mthumb-interwork -I../devkitadv/mylib/lib + +#profiling +#PROF=-pg -g +#CFLAGS += $(PROF) + +#speed +CFLAGS += -fomit-frame-pointer + +VERSION=0.01 + +default: libtfm.a + +OBJECTS = \ +fp_set.o \ +\ +fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \ +fp_mul_2.o fp_div_2.o \ +\ +fp_cnt_lsb.o \ +\ +fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \ +s_fp_add.o s_fp_sub.o \ +\ +fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \ +fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \ +fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \ +fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \ +\ +fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \ +\ +fp_exptmod.o \ +\ +fp_cmp.o fp_cmp_mag.o \ +\ +fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \ +fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \ +fp_read_radix.o fp_toradix.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \ +\ + +libtfm.a: $(OBJECTS) + $(AR) $(ARFLAGS) libtfm.a $(OBJECTS) + ranlib libtfm.a + +demo/stest.o: demo/stest.c + $(CC) $(SFLAGS) -DGBA_MODE demo/stest.c -c -o demo/stest.o + +stest: libtfm.a demo/stest.o + $(CC) -mthumb -mthumb-interwork demo/stest.o libtfm.a ../devkitadv/mylib/lib/gba.a -o stest.elf + objcopy -O binary stest.elf stest.bin + diff --git a/mtest/makefile b/mtest/makefile new file mode 100644 index 0000000..a0cdf72 --- /dev/null +++ b/mtest/makefile @@ -0,0 +1,9 @@ +CFLAGS += -Wall -W -O3 + +default: mtest + +mtest: mtest.o + $(CC) mtest.o -ltommath -o mtest + +clean: + rm -f *.o mtest diff --git a/mtest/mtest.c b/mtest/mtest.c new file mode 100644 index 0000000..be85362 --- /dev/null +++ b/mtest/mtest.c @@ -0,0 +1,320 @@ +/* makes a bignum test harness with NUM tests per operation + * + * the output is made in the following format [one parameter per line] + +operation +operand1 +operand2 +[... operandN] +result1 +result2 +[... resultN] + +So for example "a * b mod n" would be + +mulmod +a +b +n +a*b mod n + +e.g. if a=3, b=4 n=11 then + +mulmod +3 +4 +11 +1 + + */ + +#ifdef MP_8BIT +#define THE_MASK 127 +#else +#define THE_MASK 32767 +#endif + +#include +#include +#include +#include + +FILE *rng; + +/* 1-2048 bit numbers */ +void rand_num(mp_int *a) +{ + int n, size; + unsigned char buf[2048]; + + size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256; + buf[0] = (fgetc(rng)&1)?1:0; + fread(buf+1, 1, size, rng); + while (buf[1] == 0) buf[1] = fgetc(rng); + mp_read_raw(a, buf, 1+size); +} + +/* 1-256 bit numbers (to test things like exptmod) */ +void rand_num2(mp_int *a) +{ + int n, size; + unsigned char buf[2048]; + + size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 32; + buf[0] = (fgetc(rng)&1)?1:0; + fread(buf+1, 1, size, rng); + while (buf[1] == 0) buf[1] = fgetc(rng); + mp_read_raw(a, buf, 1+size); +} + +#define mp_to64(a, b) mp_toradix(a, b, 64) + +int main(void) +{ + int n, tmp; + mp_int a, b, c, d, e; + clock_t t1; + char buf[4096]; + + mp_init(&a); + mp_init(&b); + mp_init(&c); + mp_init(&d); + mp_init(&e); + + + /* initial (2^n - 1)^2 testing, makes sure the comba multiplier works [it has the new carry code] */ +/* + mp_set(&a, 1); + for (n = 1; n < 8192; n++) { + mp_mul(&a, &a, &c); + printf("mul\n"); + mp_to64(&a, buf); + printf("%s\n%s\n", buf, buf); + mp_to64(&c, buf); + printf("%s\n", buf); + + mp_add_d(&a, 1, &a); + mp_mul_2(&a, &a); + mp_sub_d(&a, 1, &a); + } +*/ + + rng = fopen("/dev/urandom", "rb"); + if (rng == NULL) { + rng = fopen("/dev/random", "rb"); + if (rng == NULL) { + fprintf(stderr, "\nWarning: stdin used as random source\n\n"); + rng = stdin; + } + } + + t1 = clock(); + for (;;) { +#if 0 + if (clock() - t1 > CLOCKS_PER_SEC) { + sleep(2); + t1 = clock(); + } +#endif + n = fgetc(rng) % 16; + + if (n == 0) { + /* add tests */ + rand_num(&a); + rand_num(&b); + mp_add(&a, &b, &c); + printf("add\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 1) { + /* sub tests */ + rand_num(&a); + rand_num(&b); + mp_sub(&a, &b, &c); + printf("sub\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 2) { + /* mul tests */ + rand_num(&a); + rand_num(&b); + mp_mul(&a, &b, &c); + printf("mul\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 3) { + /* div tests */ + rand_num(&a); + rand_num(&b); + mp_div(&a, &b, &c, &d); + printf("div\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + mp_to64(&d, buf); + printf("%s\n", buf); + } else if (n == 4) { + /* sqr tests */ + rand_num(&a); + mp_sqr(&a, &b); + printf("sqr\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 5) { + /* mul_2d test */ + rand_num(&a); + mp_copy(&a, &b); + n = fgetc(rng) & 63; + mp_mul_2d(&b, n, &b); + mp_to64(&a, buf); + printf("mul2d\n"); + printf("%s\n", buf); + printf("%d\n", n); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 6) { + /* div_2d test */ + rand_num(&a); + mp_copy(&a, &b); + n = fgetc(rng) & 63; + mp_div_2d(&b, n, &b, NULL); + mp_to64(&a, buf); + printf("div2d\n"); + printf("%s\n", buf); + printf("%d\n", n); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 7) { + /* gcd test */ + rand_num(&a); + rand_num(&b); + a.sign = MP_ZPOS; + b.sign = MP_ZPOS; + mp_gcd(&a, &b, &c); + printf("gcd\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 8) { + /* lcm test */ + rand_num(&a); + rand_num(&b); + a.sign = MP_ZPOS; + b.sign = MP_ZPOS; + mp_lcm(&a, &b, &c); + printf("lcm\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 9) { + /* exptmod test */ + rand_num2(&a); + rand_num2(&b); + rand_num2(&c); +// if (c.dp[0]&1) mp_add_d(&c, 1, &c); + a.sign = b.sign = c.sign = 0; + c.dp[0] |= 1; + if (c.used <= 2) continue; +// if (mp_cmp(&a, &c) != MP_LT) continue; +// if (mp_cmp(&b, &c) != MP_LT) continue; + mp_exptmod(&a, &b, &c, &d); + printf("expt\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + mp_to64(&d, buf); + printf("%s\n", buf); + } else if (n == 10) { + /* invmod test */ + rand_num2(&a); + rand_num2(&b); + b.dp[0] |= 1; + b.sign = MP_ZPOS; + a.sign = MP_ZPOS; + mp_gcd(&a, &b, &c); + if (mp_cmp_d(&c, 1) != 0) continue; + if (mp_cmp_d(&b, 1) == 0) continue; + mp_invmod(&a, &b, &c); + printf("invmod\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + mp_to64(&c, buf); + printf("%s\n", buf); + } else if (n == 11) { + rand_num(&a); + mp_mul_2(&a, &a); + mp_div_2(&a, &b); + printf("div2\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 12) { + rand_num(&a); + mp_mul_2(&a, &b); + printf("mul2\n"); + mp_to64(&a, buf); + printf("%s\n", buf); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 13) { + rand_num(&a); + tmp = abs(rand()) & THE_MASK; + mp_add_d(&a, tmp, &b); + printf("add_d\n"); + mp_to64(&a, buf); + printf("%s\n%d\n", buf, tmp); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 14) { + rand_num(&a); + tmp = abs(rand()) & THE_MASK; + mp_sub_d(&a, tmp, &b); + printf("sub_d\n"); + mp_to64(&a, buf); + printf("%s\n%d\n", buf, tmp); + mp_to64(&b, buf); + printf("%s\n", buf); + } else if (n == 15) { + rand_num(&a); + tmp = abs(rand()) & THE_MASK; + mp_mul_d(&a, tmp, &b); + printf("mul_d\n"); + mp_to64(&a, buf); + printf("%s\n%d\n", buf, tmp); + mp_to64(&b, buf); + printf("%s\n", buf); + } + } + fclose(rng); + return 0; +} diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c new file mode 100644 index 0000000..3ac1f64 --- /dev/null +++ b/pre_gen/mpi.c @@ -0,0 +1,4459 @@ +/* Start: fp_2expt.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = 2**b */ +void fp_2expt(fp_int *a, int b) +{ + int z; + + /* zero a as per default */ + fp_zero (a); + + if (b < 0) { + return; + } + + z = b / DIGIT_BIT; + if (z >= FP_SIZE) { + return; + } + + /* set the used count of where the bit will go */ + a->used = z + 1; + + /* put the single bit in its place */ + a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT); +} + + +/* End: fp_2expt.c */ + +/* Start: fp_add.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + /* get sign of both inputs */ + sa = a->sign; + sb = b->sign; + + /* handle two cases, not four */ + if (sa == sb) { + /* both positive or both negative */ + /* add their magnitudes, copy the sign */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* one positive, the other negative */ + /* subtract the one with the greater magnitude from */ + /* the one of the lesser magnitude. The result gets */ + /* the sign of the one with the greater magnitude. */ + if (fp_cmp_mag (a, b) == FP_LT) { + c->sign = sb; + s_fp_sub (b, a, c); + } else { + c->sign = sa; + s_fp_sub (a, b, c); + } + } +} + +/* End: fp_add.c */ + +/* Start: fp_add_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_add(a,&tmp,c); +} + +/* End: fp_add_d.c */ + +/* Start: fp_addmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_add(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + +/* End: fp_addmod.c */ + +/* Start: fp_cmp.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp(fp_int *a, fp_int *b) +{ + if (a->sign == FP_NEG && b->sign == FP_ZPOS) { + return FP_LT; + } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) { + return FP_GT; + } else { + /* compare digits */ + if (a->sign == FP_NEG) { + /* if negative compare opposite direction */ + return fp_cmp_mag(b, a); + } else { + return fp_cmp_mag(a, b); + } + } +} + +/* End: fp_cmp.c */ + +/* Start: fp_cmp_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b) +{ + /* compare based on sign */ + if ((b && a->used == 0) || a->sign == FP_NEG) { + return FP_LT; + } + + /* compare based on magnitude */ + if (a->used > 1) { + return FP_GT; + } + + /* compare the only digit of a to b */ + if (a->dp[0] > b) { + return FP_GT; + } else if (a->dp[0] < b) { + return FP_LT; + } else { + return FP_EQ; + } + +} + +/* End: fp_cmp_d.c */ + +/* Start: fp_cmp_mag.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_cmp_mag(fp_int *a, fp_int *b) +{ + int x; + + if (a->used > b->used) { + return FP_GT; + } else if (a->used < b->used) { + return FP_LT; + } else { + for (x = a->used - 1; x >= 0; x--) { + if (a->dp[x] > b->dp[x]) { + return FP_GT; + } else if (a->dp[x] < b->dp[x]) { + return FP_LT; + } + } + } + return FP_EQ; +} + + +/* End: fp_cmp_mag.c */ + +/* Start: fp_cnt_lsb.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static const int lnz[16] = { + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a) +{ + int x; + fp_digit q, qq; + + /* easy out */ + if (fp_iszero(a) == 1) { + return 0; + } + + /* scan lower digits until non-zero */ + for (x = 0; x < a->used && a->dp[x] == 0; x++); + q = a->dp[x]; + x *= DIGIT_BIT; + + /* now scan this digit until a 1 is found */ + if ((q & 1) == 0) { + do { + qq = q & 15; + x += lnz[qq]; + q >>= 4; + } while (qq == 0); + } + return x; +} + + +/* End: fp_cnt_lsb.c */ + +/* Start: fp_count_bits.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_count_bits (fp_int * a) +{ + int r; + fp_digit q; + + /* shortcut */ + if (a->used == 0) { + return 0; + } + + /* get number of digits and add that */ + r = (a->used - 1) * DIGIT_BIT; + + /* take the last digit and count the bits in it */ + q = a->dp[a->used - 1]; + while (q > ((fp_digit) 0)) { + ++r; + q >>= ((fp_digit) 1); + } + return r; +} + +/* End: fp_count_bits.c */ + +/* Start: fp_div.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int q, x, y, t1, t2; + int n, t, i, norm, neg; + + /* is divisor zero ? */ + if (fp_iszero (b) == 1) { + return FP_VAL; + } + + /* if a < b then q=0, r = a */ + if (fp_cmp_mag (a, b) == FP_LT) { + if (d != NULL) { + fp_copy (a, d); + } + if (c != NULL) { + fp_zero (c); + } + return FP_OKAY; + } + + fp_init(&q); + q.used = a->used + 2; + + fp_init(&t1); + fp_init(&t2); + fp_init_copy(&x, a); + fp_init_copy(&y, b); + + /* fix the sign */ + neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG; + x.sign = y.sign = FP_ZPOS; + + /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ + norm = fp_count_bits(&y) % DIGIT_BIT; + if (norm < (int)(DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + fp_mul_2d (&x, norm, &x); + fp_mul_2d (&y, norm, &y); + } else { + norm = 0; + } + + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ + n = x.used - 1; + t = y.used - 1; + + /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ + fp_lshd (&y, n - t); /* y = y*b**{n-t} */ + + while (fp_cmp (&x, &y) != FP_LT) { + ++(q.dp[n - t]); + fp_sub (&x, &y, &x); + } + + /* reset y by shifting it back down */ + fp_rshd (&y, n - t); + + /* step 3. for i from n down to (t + 1) */ + for (i = n; i >= (t + 1); i--) { + if (i > x.used) { + continue; + } + + /* step 3.1 if xi == yt then set q{i-t-1} to b-1, + * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ + if (x.dp[i] == y.dp[t]) { + q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1); + } else { + fp_word tmp; + tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT); + tmp |= ((fp_word) x.dp[i - 1]); + tmp /= ((fp_word) y.dp[t]); + q.dp[i - t - 1] = (fp_digit) (tmp); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + q.dp[i - t - 1] = (q.dp[i - t - 1] + 1); + do { + q.dp[i - t - 1] = (q.dp[i - t - 1] - 1); + + /* find left hand */ + fp_zero (&t1); + t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; + t1.dp[1] = y.dp[t]; + t1.used = 2; + fp_mul_d (&t1, q.dp[i - t - 1], &t1); + + /* find right hand */ + t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; + t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; + t2.dp[2] = x.dp[i]; + t2.used = 3; + } while (fp_cmp_mag(&t1, &t2) == FP_GT); + + /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ + fp_mul_d (&y, q.dp[i - t - 1], &t1); + fp_lshd (&t1, i - t - 1); + fp_sub (&x, &t1, &x); + + /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ + if (x.sign == FP_NEG) { + fp_copy (&y, &t1); + fp_lshd (&t1, i - t - 1); + fp_add (&x, &t1, &x); + q.dp[i - t - 1] = q.dp[i - t - 1] - 1; + } + } + + /* now q is the quotient and x is the remainder + * [which we have to normalize] + */ + + /* get sign before writing to c */ + x.sign = x.used == 0 ? FP_ZPOS : a->sign; + + if (c != NULL) { + fp_clamp (&q); + fp_copy (&q, c); + c->sign = neg; + } + + if (d != NULL) { + fp_div_2d (&x, norm, &x, NULL); + +/* the following is a kludge, essentially we were seeing the right remainder but + with excess digits that should have been zero + */ + for (i = b->used; i < x.used; i++) { + x.dp[i] = 0; + } + fp_clamp(&x); + fp_copy (&x, d); + } + + return FP_OKAY; +} + +/* End: fp_div.c */ + +/* Start: fp_div_2.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a/2 */ +void fp_div_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* source alias */ + tmpa = a->dp + b->used - 1; + + /* dest alias */ + tmpb = b->dp + b->used - 1; + + /* carry */ + r = 0; + for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ + rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ + *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ + r = rr; + } + + /* zero excess digits */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; + fp_clamp (b); +} + +/* End: fp_div_2.c */ + +/* Start: fp_div_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a / 2**b */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d) +{ + fp_digit D, r, rr; + int x; + fp_int t; + + /* if the shift count is <= 0 then we do no work */ + if (b <= 0) { + fp_copy (a, c); + if (d != NULL) { + fp_zero (d); + } + return; + } + + fp_init(&t); + + /* get the remainder */ + if (d != NULL) { + fp_mod_2d (a, b, &t); + } + + /* copy */ + fp_copy(a, c); + + /* shift by as many digits in the bit count */ + if (b >= (int)DIGIT_BIT) { + fp_rshd (c, b / DIGIT_BIT); + } + + /* shift any bit count < DIGIT_BIT */ + D = (fp_digit) (b % DIGIT_BIT); + if (D != 0) { + register fp_digit *tmpc, mask, shift; + + /* mask */ + mask = (((fp_digit)1) << D) - 1; + + /* shift for lsb */ + shift = DIGIT_BIT - D; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ + r = 0; + for (x = c->used - 1; x >= 0; x--) { + /* get the lower bits of this word in a temp */ + rr = *tmpc & mask; + + /* shift the current word and mix in the carry bits from the previous word */ + *tmpc = (*tmpc >> D) | (r << shift); + --tmpc; + + /* set the carry to the carry bits of the current word found above */ + r = rr; + } + } + fp_clamp (c); + if (d != NULL) { + fp_copy (&t, d); + } +} + +/* End: fp_div_2d.c */ + +/* Start: fp_div_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +static int s_is_power_of_two(fp_digit b, int *p) +{ + int x; + + for (x = 1; x < DIGIT_BIT; x++) { + if (b == (((fp_digit)1)< cb + d == a */ +int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d) +{ + fp_int q; + fp_word w; + fp_digit t; + int ix; + + /* cannot divide by zero */ + if (b == 0) { + return FP_VAL; + } + + /* quick outs */ + if (b == 1 || fp_iszero(a) == 1) { + if (d != NULL) { + *d = 0; + } + if (c != NULL) { + fp_copy(a, c); + } + return FP_OKAY; + } + + /* power of two ? */ + if (s_is_power_of_two(b, &ix) == 1) { + if (d != NULL) { + *d = a->dp[0] & ((((fp_digit)1)<used; + q.sign = a->sign; + w = 0; + for (ix = a->used - 1; ix >= 0; ix--) { + w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]); + + if (w >= b) { + t = (fp_digit)(w / b); + w -= ((fp_word)t) * ((fp_word)b); + } else { + t = 0; + } + q.dp[ix] = (fp_digit)t; + } + + if (d != NULL) { + *d = (fp_digit)w; + } + + if (c != NULL) { + fp_clamp(&q); + fp_copy(&q, c); + } + + return FP_OKAY; +} + + +/* End: fp_div_d.c */ + +/* Start: fp_exptmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* y = g**x (mod b) + * Some restrictions... x must be positive and < b + */ + +int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) +{ + fp_int M[64], res; + fp_digit buf, mp; + int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + + /* find window size */ + x = fp_count_bits (X); + if (x <= 7) { + winsize = 2; + } else if (x <= 36) { + winsize = 3; + } else if (x <= 140) { + winsize = 4; + } else if (x <= 450) { + winsize = 5; + } else { + winsize = 6; + } + + /* init M array */ + memset(M, 0, sizeof(fp_int)*(1< P so we reduce it first */ + fp_mod(G, P, &M[1]); + } else { + fp_copy(G, &M[1]); + } + fp_mulmod (&M[1], &res, P, &M[1]); + + /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ + fp_copy (&M[1], &M[1 << (winsize - 1)]); + for (x = 0; x < (winsize - 1); x++) { + fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]); + fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp); + } + + /* create upper table */ + for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { + fp_mul(&M[x - 1], &M[1], &M[x]); + fp_montgomery_reduce(&M[x], P, mp); + } + + /* set initial mode and bit cnt */ + mode = 0; + bitcnt = 1; + buf = 0; + digidx = X->used - 1; + bitcpy = 0; + bitbuf = 0; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* if the bit is zero and mode == 0 then we ignore it + * These represent the leading zero bits before the first 1 bit + * in the exponent. Technically this opt is not required but it + * does lower the # of trivial squaring/reductions used + */ + if (mode == 0 && y == 0) { + continue; + } + + /* if the bit is zero and mode == 1 then we square */ + if (mode == 1 && y == 0) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + continue; + } + + /* else we add it to the window */ + bitbuf |= (y << (winsize - ++bitcpy)); + mode = 2; + + if (bitcpy == winsize) { + /* ok window is filled so square as required and multiply */ + /* square first */ + for (x = 0; x < winsize; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + } + + /* then multiply */ + fp_mul(&res, &M[bitbuf], &res); + fp_montgomery_reduce(&res, P, mp); + + /* empty window and reset */ + bitcpy = 0; + bitbuf = 0; + mode = 1; + } + } + + /* if bits remain then square/multiply */ + if (mode == 2 && bitcpy > 0) { + /* square then multiply if the bit is set */ + for (x = 0; x < bitcpy; x++) { + fp_sqr(&res, &res); + fp_montgomery_reduce(&res, P, mp); + + /* get next bit of the window */ + bitbuf <<= 1; + if ((bitbuf & (1 << winsize)) != 0) { + /* then multiply */ + fp_mul(&res, &M[1], &res); + fp_montgomery_reduce(&res, P, mp); + } + } + } + + /* fixup result if Montgomery reduction is used + * recall that any value in a Montgomery system is + * actually multiplied by R mod n. So we have + * to reduce one more time to cancel out the factor + * of R. + */ + fp_montgomery_reduce(&res, P, mp); + + /* swap res with Y */ + fp_copy (&res, Y); + return FP_OKAY; +} + +/* End: fp_exptmod.c */ + +/* Start: fp_gcd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = (a, b) */ +void fp_gcd(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int u, v, r; + + /* either zero than gcd is the largest */ + if (fp_iszero (a) == 1 && fp_iszero (b) == 0) { + fp_abs (b, c); + return; + } + if (fp_iszero (a) == 0 && fp_iszero (b) == 1) { + fp_abs (a, c); + return; + } + + /* optimized. At this point if a == 0 then + * b must equal zero too + */ + if (fp_iszero (a) == 1) { + fp_zero(c); + return; + } + + /* sort inputs */ + if (fp_cmp_mag(a, b) != FP_LT) { + fp_init_copy(&u, a); + fp_init_copy(&v, b); + } else { + fp_init_copy(&u, b); + fp_init_copy(&v, a); + } + + fp_zero(&r); + while (fp_iszero(&v) == FP_NO) { + fp_mod(&u, &v, &r); + fp_copy(&v, &u); + fp_copy(&r, &v); + } + fp_copy(&u, c); +} + +/* End: fp_gcd.c */ + +/* Start: fp_invmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = 1/a (mod b) for odd b only */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int x, y, u, v, B, D; + int neg; + + /* 2. [modified] b must be odd */ + if (fp_iseven (b) == FP_YES) { + return FP_VAL; + } + + /* init all our temps */ + fp_init(&x); fp_init(&y); + fp_init(&u); fp_init(&v); + fp_init(&B); fp_init(&D); + + /* x == modulus, y == value to invert */ + fp_copy(b, &x); + + /* we need y = |a| */ + fp_abs(a, &y); + + /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ + fp_copy(&x, &u); + fp_copy(&y, &v); + fp_set (&D, 1); + +top: + /* 4. while u is even do */ + while (fp_iseven (&u) == FP_YES) { + /* 4.1 u = u/2 */ + fp_div_2 (&u, &u); + + /* 4.2 if B is odd then */ + if (fp_isodd (&B) == FP_YES) { + fp_sub (&B, &x, &B); + } + /* B = B/2 */ + fp_div_2 (&B, &B); + } + + /* 5. while v is even do */ + while (fp_iseven (&v) == FP_YES) { + /* 5.1 v = v/2 */ + fp_div_2 (&v, &v); + + /* 5.2 if D is odd then */ + if (fp_isodd (&D) == FP_YES) { + /* D = (D-x)/2 */ + fp_sub (&D, &x, &D); + } + /* D = D/2 */ + fp_div_2 (&D, &D); + } + + /* 6. if u >= v then */ + if (fp_cmp (&u, &v) != FP_LT) { + /* u = u - v, B = B - D */ + fp_sub (&u, &v, &u); + fp_sub (&B, &D, &B); + } else { + /* v - v - u, D = D - B */ + fp_sub (&v, &u, &v); + fp_sub (&D, &B, &D); + } + + /* if not zero goto step 4 */ + if (fp_iszero (&u) == FP_NO) { + goto top; + } + + /* now a = C, b = D, gcd == g*v */ + + /* if v != 1 then there is no inverse */ + if (fp_cmp_d (&v, 1) != FP_EQ) { + return FP_VAL; + } + + /* b is now the inverse */ + neg = a->sign; + while (D.sign == FP_NEG) { + fp_add (&D, b, &D); + } + fp_copy (&D, c); + c->sign = neg; + return FP_OKAY; +} + +/* End: fp_invmod.c */ + +/* Start: fp_isprime.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* a few primes */ +static const fp_digit primes[256] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 +}; + +int fp_isprime(fp_int *a) +{ + fp_int b; + fp_digit d; + int r, res; + + /* do trial division */ + for (r = 0; r < 256; r++) { + fp_mod_d(a, primes[r], &d); + if (d == 0) { + return FP_NO; + } + } + + /* now do 8 miller rabins */ + for (r = 0; r < 8; r++) { + fp_set(&b, primes[r]); + fp_prime_miller_rabin(a, &b, &res); + if (res == FP_NO) { + return FP_NO; + } + } + return FP_YES; +} + +/* End: fp_isprime.c */ + +/* Start: fp_lcm.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = [a, b] */ +void fp_lcm(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int t1, t2; + + fp_init(&t1); + fp_init(&t2); + fp_gcd(a, b, &t1); + if (fp_cmp_mag(a, b) == FP_GT) { + fp_div(a, &t1, &t2, NULL); + fp_mul(b, &t2, c); + } else { + fp_div(b, &t1, &t2, NULL); + fp_mul(a, &t2, c); + } +} + +/* End: fp_lcm.c */ + +/* Start: fp_lshd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_lshd(fp_int *a, int x) +{ + int y; + + /* move up and truncate as required */ + y = MIN(a->used + x - 1, (int)(FP_SIZE-1)); + + /* store new size */ + a->used = y + 1; + + /* move digits */ + for (; y >= x; y--) { + a->dp[y] = a->dp[y-x]; + } + + /* zero lower digits */ + for (; y >= 0; y--) { + a->dp[y] = 0; + } + + /* clamp digits */ + fp_clamp(a); +} + +/* End: fp_lshd.c */ + +/* Start: fp_mod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c) +{ + return fp_div(a, b, NULL, c); +} + + + +/* End: fp_mod.c */ + +/* Start: fp_mod_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod 2**d */ +void fp_mod_2d(fp_int *a, int b, fp_int *c) +{ + int x; + + /* zero if count less than or equal to zero */ + if (b <= 0) { + fp_zero(c); + return; + } + + /* get copy of input */ + fp_copy(a, c); + + /* if 2**d is larger than we just return */ + if (b >= (DIGIT_BIT * a->used)) { + return; + } + + /* zero digits above the last digit of the modulus */ + for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { + c->dp[x] = 0; + } + /* clear the digit that is not completely outside/inside the modulus */ + c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b); + fp_clamp (c); +} + +/* End: fp_mod_2d.c */ + +/* Start: fp_mod_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a mod b, 0 <= c < b */ +int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c) +{ + return fp_div_d(a, b, NULL, c); +} + +/* End: fp_mod_d.c */ + +/* Start: fp_montgomery_calc_normalization.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b) +{ + int x, bits; + + /* how many bits of last digit does b use */ + bits = fp_count_bits (b) % DIGIT_BIT; + + /* compute A = B^(n-1) * 2^(bits-1) */ + if (b->used > 1) { + fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1); + } else { + fp_set(a, 1); + ++bits; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)DIGIT_BIT; x++) { + fp_mul_2 (a, a); + if (fp_cmp_mag (a, b) != FP_LT) { + s_fp_sub (a, b, a); + } + } +} + + +/* End: fp_montgomery_calc_normalization.c */ + +/* Start: fp_montgomery_reduce.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +#if defined(TFM_X86) + +/* x86-32 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movl %7,%%eax \n\t" \ +"mull %6 \n\t" \ +"addl %%eax,%0 \n\t" \ +"adcl %%edx,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%eax", "%edx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 code */ + +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ +asm( \ +"movq %7,%%rax \n\t" \ +"mulq %6 \n\t" \ +"addq %%rax,%0 \n\t" \ +"adcq %%rdx,%1 \n\t" \ +"adcq $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(mu), "g"(*tmpm++) \ + : "%rax", "%rdx", "%cc"); + +#define PROPCARRY \ +asm( \ +"movq %1,%%rax \n\t" \ +"addq %%rax,%6 \n\t" \ +"movq %2,%%rax \n\t" \ +"adcq %%rax,%7 \n\t" \ +"adcq $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%rax", "%cc"); + +#elif defined(TFM_SSE2) + +/* SSE2 code */ + +#define MONT_START \ +asm("movd %0,%%mm2"::"g"(mp)); + +#define MONT_FINI \ +asm("emms"); + +#define LOOP_START \ +asm(\ +"movd %0,%%mm1 \n\t" \ +"pmuludq %%mm2,%%mm1 \n\t" \ +:: "g"(c[x]), "g"(mp)); + +#define INNERMUL \ +asm( \ +"movd %6,%%mm0 \n\t" \ +"pmuludq %%mm1,%%mm0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"psrlq $32, %%mm0 \n\t" \ +"addl %%eax,%0 \n\t" \ +"movd %%mm0,%%eax \n\t" \ +"adcl %%eax,%1 \n\t" \ +"adcl $0,%2 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "g"(*tmpm++) \ + : "%eax", "%cc"); + +#define PROPCARRY \ +asm( \ +"movl %1,%%eax \n\t" \ +"addl %%eax,%6 \n\t" \ +"movl %2,%%eax \n\t" \ +"adcl %%eax,%7 \n\t" \ +"adcl $0,%8 \n\t" \ +:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \ + "m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \ +: "%eax", "%cc"); + +#elif defined(TFM_ARM) + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +/* NOTE: later write it using two regs instead of three for _c + ... */ +#define INNERMUL \ +asm( \ +"UMULL r0,r1,%0,%1 \n\t" \ +"LDR r2,[%2] \n\t" \ +"ADDS r2,r2,r0 \n\t" \ +"STR r2,[%2] \n\t" \ +"LDR r2,[%3] \n\t" \ +"ADCS r2,r2,r1 \n\t" \ +"STR r2,[%3] \n\t" \ +"LDR r2,[%4] \n\t" \ +"ADC r2,r2,#0 \n\t" \ +"STR r2,[%4] \n\t" \ +::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc"); + +#define PROPCARRY \ +asm( \ +"LDR r0,[%1] \n\t" \ +"LDR r1,[%0,#4] \n\t" \ +"ADDS r0,r0,r1 \n\t" \ +"STR r0,[%0,#4] \n\t" \ +"LDR r0,[%2] \n\t" \ +"LDR r1,[%1,#4] \n\t" \ +"ADCS r0,r0,r1 \n\t" \ +"STR r0,[%1,#4] \n\t" \ +"LDR r0,[%2,#4] \n\t" \ +"ADC r0,r0,#0 \n\t" \ +"STR r0,[%2,#4] \n\t" \ +::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc"); + +#else + +/* ISO C code */ +#define MONT_START + +#define MONT_FINI + +#define LOOP_START \ + mu = c[x] * mp; + +#define INNERMUL \ + t = ((fp_word)mu) * ((fp_word)*tmpm++); \ + _c[OFF0] += t; if (_c[OFF0] < (fp_digit)t) ++_c[OFF1]; \ + _c[OFF1] += (t>>DIGIT_BIT); if (_c[OFF1] < (fp_digit)(t>>DIGIT_BIT)) ++_c[OFF2]; \ + +#define PROPCARRY \ + _c[OFF0+1] += _c[OFF1]; if (_c[OFF0+1] < _c[OFF1]) ++_c[OFF1+1]; \ + _c[OFF1+1] += _c[OFF2]; if (_c[OFF1+1] < _c[OFF2]) ++_c[OFF2+1]; + + +#endif + + +#define OFF0 (0) +#define OFF1 (FP_SIZE) +#define OFF2 (FP_SIZE+FP_SIZE) + +/* computes x/R == x (mod N) via Montgomery Reduction */ +void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) +{ + fp_digit c[3*FP_SIZE], *_c, *tmpm, mu; + int oldused, x, y, pa; + fp_word t; + + /* now zero the buff */ + pa = m->used; + memset(c, 0, sizeof(c)); + + /* copy the input */ + oldused = a->used; + for (x = 0; x < oldused; x++) { + c[x] = a->dp[x]; + } + + MONT_START; + + /* now let's get bizz-sy! */ + for (x = 0; x < pa; x++) { + /* get Mu for this round */ + LOOP_START; + + /* our friendly neighbourhood alias */ + _c = c + x; + tmpm = m->dp; + + for (y = 0; y < pa; y++) { + INNERMUL; + ++_c; + } + /* send carry up man... */ + _c = c + x; + PROPCARRY; + } + + /* fix the rest of the carries */ + _c = c + pa; + for (; x < pa * 2 + 2; x++) { + PROPCARRY; + ++_c; + } + + /* now copy out */ + _c = c + pa; + tmpm = a->dp; + for (x = 0; x < pa+1; x++) { + *tmpm++ = *_c++; + } + + for (; x < oldused; x++) { + *tmpm++ = 0; + } + + MONT_FINI; + + a->used = pa+1; + fp_clamp(a); + + /* if A >= m then A = A - m */ + if (fp_cmp_mag (a, m) != FP_LT) { + s_fp_sub (a, m, a); + } +} + +/* End: fp_montgomery_reduce.c */ + +/* Start: fp_montgomery_setup.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* setups the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *rho) +{ + fp_digit x, b; + +/* fast inversion mod 2**k + * + * Based on the fact that + * + * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + * => 2*X*A - X*X*A*A = 1 + * => 2*(1) - (1) = 1 + */ + b = a->dp[0]; + + if ((b & 1) == 0) { + return FP_VAL; + } + + x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - b * x; /* here x*a==1 mod 2**8 */ + x *= 2 - b * x; /* here x*a==1 mod 2**16 */ + x *= 2 - b * x; /* here x*a==1 mod 2**32 */ +#ifdef FP_64BIT + x *= 2 - b * x; /* here x*a==1 mod 2**64 */ +#endif + + /* rho = -1/m mod b */ + *rho = (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x)); + + return FP_OKAY; +} + + +/* End: fp_montgomery_setup.c */ + +/* Start: fp_mul.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul(fp_int *A, fp_int *B, fp_int *C) +{ + int r, y, yy, s; + fp_int ac, bd, comp, amb, cmd, t1, t2; + + y = MAX(A->used, B->used); + yy = MIN(A->used, B->used); + if (yy <= 8 || y <= 64) { + + /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size + of the largest input. We also want to avoid doing excess mults if the + inputs are not close to the next power of two. That is, for example, + if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications + */ + if (y <= 4) { + fp_mul_comba4(A,B,C); + } else if (y <= 8) { + fp_mul_comba8(A,B,C); + } else if (y <= 16 && y >= 12) { + fp_mul_comba16(A,B,C); +#ifdef TFM_HUGE + } else if (y <= 32 && y >= 28) { + fp_mul_comba32(A,B,C); +#endif + } else { + fp_mul_comba(A,B,C); + } + } else { + /* do the karatsuba action + + if A = ab and B = cd for ||a|| = r we need to solve + + ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = yy >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + +// fp_copy(B, &t2); fp_rshd(&t2, r); + for (s = 0; s < B->used - r; s++) { + t2.dp[s] = B->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t2.dp[s] = 0; + } + if (B->used >= r) { + t2.used = B->used - r; + } else { + t2.used = 0; + } + t2.sign = B->sign; + + fp_copy(&t1, &amb); fp_copy(&t2, &cmd); + fp_zero(&ac); + fp_mul(&t1, &t2, &ac); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); +// fp_mod_2d(B, r * DIGIT_BIT, &t2); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + t2.dp[s] = B->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + t2.dp[s] = 0; + } + t1.used = r; + t2.used = r; + fp_clamp(&t1); + fp_clamp(&t2); + + fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd); + fp_zero(&bd); + fp_mul(&t1, &t2, &bd); + + /* now get the (a-b)(c-d) term */ + fp_zero(&comp); + fp_mul(&amb, &cmd, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &ac, &comp); + fp_add(&comp, &bd, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&ac, r+r); + + /* now sum them together */ + s = A->sign ^ B->sign; + fp_zero(C); + fp_add(&ac, &comp, C); + fp_add(&bd, C, C); + C->sign = C->used ? s : FP_ZPOS; + } +} + + +/* End: fp_mul.c */ + +/* Start: fp_mul_2.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_mul_2(fp_int * a, fp_int * b) +{ + int x, oldused; + + oldused = b->used; + b->used = a->used; + + { + register fp_digit r, rr, *tmpa, *tmpb; + + /* alias for source */ + tmpa = a->dp; + + /* alias for dest */ + tmpb = b->dp; + + /* carry */ + r = 0; + for (x = 0; x < a->used; x++) { + + /* get what will be the *next* carry bit from the + * MSB of the current digit + */ + rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1)); + + /* now shift up this digit, add in the carry [from the previous] */ + *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r); + + /* copy the carry that would be from the source + * digit into the next iteration + */ + r = rr; + } + + /* new leading digit? */ + if (r != 0 && b->used != (FP_SIZE-1)) { + /* add a MSB which is always 1 at this point */ + *tmpb = 1; + ++(b->used); + } + + /* now zero any excess digits on the destination + * that we didn't write to + */ + tmpb = b->dp + b->used; + for (x = b->used; x < oldused; x++) { + *tmpb++ = 0; + } + } + b->sign = a->sign; +} + + +/* End: fp_mul_2.c */ + +/* Start: fp_mul_2d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * 2**d */ +void fp_mul_2d(fp_int *a, int b, fp_int *c) +{ + fp_digit carry, carrytmp, shift; + int x; + + /* copy it */ + fp_copy(a, c); + + /* handle whole digits */ + if (b >= DIGIT_BIT) { + fp_lshd(c, b/DIGIT_BIT); + } + b %= DIGIT_BIT; + + /* shift the digits */ + if (b != 0) { + carry = 0; + shift = DIGIT_BIT - b; + for (x = 0; x < c->used; x++) { + carrytmp = c->dp[x] >> shift; + c->dp[x] = (c->dp[x] << b) + carry; + carry = carrytmp; + } + /* store last carry if room */ + if (carry && x < FP_SIZE) { + c->dp[c->used++] = carry; + } + } + fp_clamp(c); +} + + +/* End: fp_mul_2d.c */ + +/* Start: fp_mul_comba.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ + +/* About this file... + +*/ + +#include + +/* these are the combas. Worship them. */ +#if defined(TFM_X86) +/* Generic x86 optimized code */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%rax","%rdx","%cc"); + +#elif defined(TFM_SSE2) +/* use SSE2 optimizations */ + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +/* store the first sum */ +#define COMBA_STORE(x) \ + x = c0; + +/* store the second sum [carry] */ +#define COMBA_STORE2(x) \ + x = c1; + +/* anything you need at the end */ +#define COMBA_FINI \ + asm("emms"); + +/* this should multiply i and j */ + #define MULADD(i, j) \ + asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "addl %%eax,%0 \n\t" \ + "movd %%mm0,%%eax \n\t" \ + "adcl %%eax,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); + +#elif defined(TFM_ARM) +/* ARM code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2, %2, #0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#else +/* ISO C code */ + +#define COMBA_START + +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define COMBA_FINI + +#define MULADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +#endif + + +/* generic PxQ multiplier */ +void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) +{ + int ix, iy, iz, tx, ty, pa; + fp_digit c0, c1, c2, *tmpx, *tmpy; + fp_word t; + fp_int tmp, *dst; + + COMBA_START; + COMBA_CLEAR; + + /* get size of output and trim */ + pa = A->used + B->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + if (A == C || B == C) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(C); + dst = C; + } + + for (ix = 0; ix < pa; ix++) { + /* get offsets into the two bignums */ + ty = MIN(ix, B->used-1); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = B->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* execute loop */ + COMBA_FORWARD; + for (iz = 0; iz < iy; ++iz) { + MULADD(*tmpx++, *tmpy--); + } + + /* store term */ + COMBA_STORE(dst->dp[ix]); + } + /* store final carry */ + COMBA_STORE2(dst->dp[ix]); + COMBA_FINI; + + dst->used = pa; + fp_clamp(dst); + dst->sign = dst->used ? A->sign ^ B->sign : FP_ZPOS; + fp_copy(dst, C); +} + +void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(fp_digit)); + memcpy(at+4, B->dp, 4 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); MULADD(at[1], at[5]); MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); MULADD(at[1], at[6]); MULADD(at[2], at[5]); MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); MULADD(at[2], at[6]); MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + + +void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(fp_digit)); + memcpy(at+8, B->dp, 8 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); MULADD(at[1], at[9]); MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); MULADD(at[1], at[10]); MULADD(at[2], at[9]); MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); MULADD(at[1], at[11]); MULADD(at[2], at[10]); MULADD(at[3], at[9]); MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); MULADD(at[1], at[12]); MULADD(at[2], at[11]); MULADD(at[3], at[10]); MULADD(at[4], at[9]); MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); MULADD(at[1], at[13]); MULADD(at[2], at[12]); MULADD(at[3], at[11]); MULADD(at[4], at[10]); MULADD(at[5], at[9]); MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); MULADD(at[1], at[14]); MULADD(at[2], at[13]); MULADD(at[3], at[12]); MULADD(at[4], at[11]); MULADD(at[5], at[10]); MULADD(at[6], at[9]); MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); MULADD(at[2], at[14]); MULADD(at[3], at[13]); MULADD(at[4], at[12]); MULADD(at[5], at[11]); MULADD(at[6], at[10]); MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); MULADD(at[3], at[14]); MULADD(at[4], at[13]); MULADD(at[5], at[12]); MULADD(at[6], at[11]); MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); MULADD(at[4], at[14]); MULADD(at[5], at[13]); MULADD(at[6], at[12]); MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); MULADD(at[5], at[14]); MULADD(at[6], at[13]); MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); MULADD(at[6], at[14]); MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + + +void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(fp_digit)); + memcpy(at+16, B->dp, 16 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); MULADD(at[1], at[17]); MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); MULADD(at[1], at[18]); MULADD(at[2], at[17]); MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); MULADD(at[1], at[19]); MULADD(at[2], at[18]); MULADD(at[3], at[17]); MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); MULADD(at[1], at[20]); MULADD(at[2], at[19]); MULADD(at[3], at[18]); MULADD(at[4], at[17]); MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); MULADD(at[1], at[21]); MULADD(at[2], at[20]); MULADD(at[3], at[19]); MULADD(at[4], at[18]); MULADD(at[5], at[17]); MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); MULADD(at[1], at[22]); MULADD(at[2], at[21]); MULADD(at[3], at[20]); MULADD(at[4], at[19]); MULADD(at[5], at[18]); MULADD(at[6], at[17]); MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); MULADD(at[1], at[23]); MULADD(at[2], at[22]); MULADD(at[3], at[21]); MULADD(at[4], at[20]); MULADD(at[5], at[19]); MULADD(at[6], at[18]); MULADD(at[7], at[17]); MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); MULADD(at[1], at[24]); MULADD(at[2], at[23]); MULADD(at[3], at[22]); MULADD(at[4], at[21]); MULADD(at[5], at[20]); MULADD(at[6], at[19]); MULADD(at[7], at[18]); MULADD(at[8], at[17]); MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); MULADD(at[1], at[25]); MULADD(at[2], at[24]); MULADD(at[3], at[23]); MULADD(at[4], at[22]); MULADD(at[5], at[21]); MULADD(at[6], at[20]); MULADD(at[7], at[19]); MULADD(at[8], at[18]); MULADD(at[9], at[17]); MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); MULADD(at[1], at[26]); MULADD(at[2], at[25]); MULADD(at[3], at[24]); MULADD(at[4], at[23]); MULADD(at[5], at[22]); MULADD(at[6], at[21]); MULADD(at[7], at[20]); MULADD(at[8], at[19]); MULADD(at[9], at[18]); MULADD(at[10], at[17]); MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); MULADD(at[1], at[27]); MULADD(at[2], at[26]); MULADD(at[3], at[25]); MULADD(at[4], at[24]); MULADD(at[5], at[23]); MULADD(at[6], at[22]); MULADD(at[7], at[21]); MULADD(at[8], at[20]); MULADD(at[9], at[19]); MULADD(at[10], at[18]); MULADD(at[11], at[17]); MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); MULADD(at[1], at[28]); MULADD(at[2], at[27]); MULADD(at[3], at[26]); MULADD(at[4], at[25]); MULADD(at[5], at[24]); MULADD(at[6], at[23]); MULADD(at[7], at[22]); MULADD(at[8], at[21]); MULADD(at[9], at[20]); MULADD(at[10], at[19]); MULADD(at[11], at[18]); MULADD(at[12], at[17]); MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); MULADD(at[1], at[29]); MULADD(at[2], at[28]); MULADD(at[3], at[27]); MULADD(at[4], at[26]); MULADD(at[5], at[25]); MULADD(at[6], at[24]); MULADD(at[7], at[23]); MULADD(at[8], at[22]); MULADD(at[9], at[21]); MULADD(at[10], at[20]); MULADD(at[11], at[19]); MULADD(at[12], at[18]); MULADD(at[13], at[17]); MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); MULADD(at[1], at[30]); MULADD(at[2], at[29]); MULADD(at[3], at[28]); MULADD(at[4], at[27]); MULADD(at[5], at[26]); MULADD(at[6], at[25]); MULADD(at[7], at[24]); MULADD(at[8], at[23]); MULADD(at[9], at[22]); MULADD(at[10], at[21]); MULADD(at[11], at[20]); MULADD(at[12], at[19]); MULADD(at[13], at[18]); MULADD(at[14], at[17]); MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); MULADD(at[2], at[30]); MULADD(at[3], at[29]); MULADD(at[4], at[28]); MULADD(at[5], at[27]); MULADD(at[6], at[26]); MULADD(at[7], at[25]); MULADD(at[8], at[24]); MULADD(at[9], at[23]); MULADD(at[10], at[22]); MULADD(at[11], at[21]); MULADD(at[12], at[20]); MULADD(at[13], at[19]); MULADD(at[14], at[18]); MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); MULADD(at[3], at[30]); MULADD(at[4], at[29]); MULADD(at[5], at[28]); MULADD(at[6], at[27]); MULADD(at[7], at[26]); MULADD(at[8], at[25]); MULADD(at[9], at[24]); MULADD(at[10], at[23]); MULADD(at[11], at[22]); MULADD(at[12], at[21]); MULADD(at[13], at[20]); MULADD(at[14], at[19]); MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); MULADD(at[4], at[30]); MULADD(at[5], at[29]); MULADD(at[6], at[28]); MULADD(at[7], at[27]); MULADD(at[8], at[26]); MULADD(at[9], at[25]); MULADD(at[10], at[24]); MULADD(at[11], at[23]); MULADD(at[12], at[22]); MULADD(at[13], at[21]); MULADD(at[14], at[20]); MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); MULADD(at[5], at[30]); MULADD(at[6], at[29]); MULADD(at[7], at[28]); MULADD(at[8], at[27]); MULADD(at[9], at[26]); MULADD(at[10], at[25]); MULADD(at[11], at[24]); MULADD(at[12], at[23]); MULADD(at[13], at[22]); MULADD(at[14], at[21]); MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); MULADD(at[6], at[30]); MULADD(at[7], at[29]); MULADD(at[8], at[28]); MULADD(at[9], at[27]); MULADD(at[10], at[26]); MULADD(at[11], at[25]); MULADD(at[12], at[24]); MULADD(at[13], at[23]); MULADD(at[14], at[22]); MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); MULADD(at[7], at[30]); MULADD(at[8], at[29]); MULADD(at[9], at[28]); MULADD(at[10], at[27]); MULADD(at[11], at[26]); MULADD(at[12], at[25]); MULADD(at[13], at[24]); MULADD(at[14], at[23]); MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); MULADD(at[8], at[30]); MULADD(at[9], at[29]); MULADD(at[10], at[28]); MULADD(at[11], at[27]); MULADD(at[12], at[26]); MULADD(at[13], at[25]); MULADD(at[14], at[24]); MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); MULADD(at[9], at[30]); MULADD(at[10], at[29]); MULADD(at[11], at[28]); MULADD(at[12], at[27]); MULADD(at[13], at[26]); MULADD(at[14], at[25]); MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); MULADD(at[10], at[30]); MULADD(at[11], at[29]); MULADD(at[12], at[28]); MULADD(at[13], at[27]); MULADD(at[14], at[26]); MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); MULADD(at[11], at[30]); MULADD(at[12], at[29]); MULADD(at[13], at[28]); MULADD(at[14], at[27]); MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); MULADD(at[12], at[30]); MULADD(at[13], at[29]); MULADD(at[14], at[28]); MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); MULADD(at[13], at[30]); MULADD(at[14], at[29]); MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); MULADD(at[14], at[30]); MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#ifdef TFM_HUGE + +void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C) +{ + fp_word t; + fp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(fp_digit)); + memcpy(at+32, B->dp, 32 * sizeof(fp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); MULADD(at[1], at[33]); MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); MULADD(at[1], at[34]); MULADD(at[2], at[33]); MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); MULADD(at[1], at[35]); MULADD(at[2], at[34]); MULADD(at[3], at[33]); MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); MULADD(at[1], at[36]); MULADD(at[2], at[35]); MULADD(at[3], at[34]); MULADD(at[4], at[33]); MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); MULADD(at[1], at[37]); MULADD(at[2], at[36]); MULADD(at[3], at[35]); MULADD(at[4], at[34]); MULADD(at[5], at[33]); MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); MULADD(at[1], at[38]); MULADD(at[2], at[37]); MULADD(at[3], at[36]); MULADD(at[4], at[35]); MULADD(at[5], at[34]); MULADD(at[6], at[33]); MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); MULADD(at[1], at[39]); MULADD(at[2], at[38]); MULADD(at[3], at[37]); MULADD(at[4], at[36]); MULADD(at[5], at[35]); MULADD(at[6], at[34]); MULADD(at[7], at[33]); MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); MULADD(at[1], at[40]); MULADD(at[2], at[39]); MULADD(at[3], at[38]); MULADD(at[4], at[37]); MULADD(at[5], at[36]); MULADD(at[6], at[35]); MULADD(at[7], at[34]); MULADD(at[8], at[33]); MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); MULADD(at[1], at[41]); MULADD(at[2], at[40]); MULADD(at[3], at[39]); MULADD(at[4], at[38]); MULADD(at[5], at[37]); MULADD(at[6], at[36]); MULADD(at[7], at[35]); MULADD(at[8], at[34]); MULADD(at[9], at[33]); MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); MULADD(at[1], at[42]); MULADD(at[2], at[41]); MULADD(at[3], at[40]); MULADD(at[4], at[39]); MULADD(at[5], at[38]); MULADD(at[6], at[37]); MULADD(at[7], at[36]); MULADD(at[8], at[35]); MULADD(at[9], at[34]); MULADD(at[10], at[33]); MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); MULADD(at[1], at[43]); MULADD(at[2], at[42]); MULADD(at[3], at[41]); MULADD(at[4], at[40]); MULADD(at[5], at[39]); MULADD(at[6], at[38]); MULADD(at[7], at[37]); MULADD(at[8], at[36]); MULADD(at[9], at[35]); MULADD(at[10], at[34]); MULADD(at[11], at[33]); MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); MULADD(at[1], at[44]); MULADD(at[2], at[43]); MULADD(at[3], at[42]); MULADD(at[4], at[41]); MULADD(at[5], at[40]); MULADD(at[6], at[39]); MULADD(at[7], at[38]); MULADD(at[8], at[37]); MULADD(at[9], at[36]); MULADD(at[10], at[35]); MULADD(at[11], at[34]); MULADD(at[12], at[33]); MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); MULADD(at[1], at[45]); MULADD(at[2], at[44]); MULADD(at[3], at[43]); MULADD(at[4], at[42]); MULADD(at[5], at[41]); MULADD(at[6], at[40]); MULADD(at[7], at[39]); MULADD(at[8], at[38]); MULADD(at[9], at[37]); MULADD(at[10], at[36]); MULADD(at[11], at[35]); MULADD(at[12], at[34]); MULADD(at[13], at[33]); MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); MULADD(at[1], at[46]); MULADD(at[2], at[45]); MULADD(at[3], at[44]); MULADD(at[4], at[43]); MULADD(at[5], at[42]); MULADD(at[6], at[41]); MULADD(at[7], at[40]); MULADD(at[8], at[39]); MULADD(at[9], at[38]); MULADD(at[10], at[37]); MULADD(at[11], at[36]); MULADD(at[12], at[35]); MULADD(at[13], at[34]); MULADD(at[14], at[33]); MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); MULADD(at[1], at[47]); MULADD(at[2], at[46]); MULADD(at[3], at[45]); MULADD(at[4], at[44]); MULADD(at[5], at[43]); MULADD(at[6], at[42]); MULADD(at[7], at[41]); MULADD(at[8], at[40]); MULADD(at[9], at[39]); MULADD(at[10], at[38]); MULADD(at[11], at[37]); MULADD(at[12], at[36]); MULADD(at[13], at[35]); MULADD(at[14], at[34]); MULADD(at[15], at[33]); MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); MULADD(at[1], at[48]); MULADD(at[2], at[47]); MULADD(at[3], at[46]); MULADD(at[4], at[45]); MULADD(at[5], at[44]); MULADD(at[6], at[43]); MULADD(at[7], at[42]); MULADD(at[8], at[41]); MULADD(at[9], at[40]); MULADD(at[10], at[39]); MULADD(at[11], at[38]); MULADD(at[12], at[37]); MULADD(at[13], at[36]); MULADD(at[14], at[35]); MULADD(at[15], at[34]); MULADD(at[16], at[33]); MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); MULADD(at[1], at[49]); MULADD(at[2], at[48]); MULADD(at[3], at[47]); MULADD(at[4], at[46]); MULADD(at[5], at[45]); MULADD(at[6], at[44]); MULADD(at[7], at[43]); MULADD(at[8], at[42]); MULADD(at[9], at[41]); MULADD(at[10], at[40]); MULADD(at[11], at[39]); MULADD(at[12], at[38]); MULADD(at[13], at[37]); MULADD(at[14], at[36]); MULADD(at[15], at[35]); MULADD(at[16], at[34]); MULADD(at[17], at[33]); MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); MULADD(at[1], at[50]); MULADD(at[2], at[49]); MULADD(at[3], at[48]); MULADD(at[4], at[47]); MULADD(at[5], at[46]); MULADD(at[6], at[45]); MULADD(at[7], at[44]); MULADD(at[8], at[43]); MULADD(at[9], at[42]); MULADD(at[10], at[41]); MULADD(at[11], at[40]); MULADD(at[12], at[39]); MULADD(at[13], at[38]); MULADD(at[14], at[37]); MULADD(at[15], at[36]); MULADD(at[16], at[35]); MULADD(at[17], at[34]); MULADD(at[18], at[33]); MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); MULADD(at[1], at[51]); MULADD(at[2], at[50]); MULADD(at[3], at[49]); MULADD(at[4], at[48]); MULADD(at[5], at[47]); MULADD(at[6], at[46]); MULADD(at[7], at[45]); MULADD(at[8], at[44]); MULADD(at[9], at[43]); MULADD(at[10], at[42]); MULADD(at[11], at[41]); MULADD(at[12], at[40]); MULADD(at[13], at[39]); MULADD(at[14], at[38]); MULADD(at[15], at[37]); MULADD(at[16], at[36]); MULADD(at[17], at[35]); MULADD(at[18], at[34]); MULADD(at[19], at[33]); MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); MULADD(at[1], at[52]); MULADD(at[2], at[51]); MULADD(at[3], at[50]); MULADD(at[4], at[49]); MULADD(at[5], at[48]); MULADD(at[6], at[47]); MULADD(at[7], at[46]); MULADD(at[8], at[45]); MULADD(at[9], at[44]); MULADD(at[10], at[43]); MULADD(at[11], at[42]); MULADD(at[12], at[41]); MULADD(at[13], at[40]); MULADD(at[14], at[39]); MULADD(at[15], at[38]); MULADD(at[16], at[37]); MULADD(at[17], at[36]); MULADD(at[18], at[35]); MULADD(at[19], at[34]); MULADD(at[20], at[33]); MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); MULADD(at[1], at[53]); MULADD(at[2], at[52]); MULADD(at[3], at[51]); MULADD(at[4], at[50]); MULADD(at[5], at[49]); MULADD(at[6], at[48]); MULADD(at[7], at[47]); MULADD(at[8], at[46]); MULADD(at[9], at[45]); MULADD(at[10], at[44]); MULADD(at[11], at[43]); MULADD(at[12], at[42]); MULADD(at[13], at[41]); MULADD(at[14], at[40]); MULADD(at[15], at[39]); MULADD(at[16], at[38]); MULADD(at[17], at[37]); MULADD(at[18], at[36]); MULADD(at[19], at[35]); MULADD(at[20], at[34]); MULADD(at[21], at[33]); MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); MULADD(at[1], at[54]); MULADD(at[2], at[53]); MULADD(at[3], at[52]); MULADD(at[4], at[51]); MULADD(at[5], at[50]); MULADD(at[6], at[49]); MULADD(at[7], at[48]); MULADD(at[8], at[47]); MULADD(at[9], at[46]); MULADD(at[10], at[45]); MULADD(at[11], at[44]); MULADD(at[12], at[43]); MULADD(at[13], at[42]); MULADD(at[14], at[41]); MULADD(at[15], at[40]); MULADD(at[16], at[39]); MULADD(at[17], at[38]); MULADD(at[18], at[37]); MULADD(at[19], at[36]); MULADD(at[20], at[35]); MULADD(at[21], at[34]); MULADD(at[22], at[33]); MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); MULADD(at[1], at[55]); MULADD(at[2], at[54]); MULADD(at[3], at[53]); MULADD(at[4], at[52]); MULADD(at[5], at[51]); MULADD(at[6], at[50]); MULADD(at[7], at[49]); MULADD(at[8], at[48]); MULADD(at[9], at[47]); MULADD(at[10], at[46]); MULADD(at[11], at[45]); MULADD(at[12], at[44]); MULADD(at[13], at[43]); MULADD(at[14], at[42]); MULADD(at[15], at[41]); MULADD(at[16], at[40]); MULADD(at[17], at[39]); MULADD(at[18], at[38]); MULADD(at[19], at[37]); MULADD(at[20], at[36]); MULADD(at[21], at[35]); MULADD(at[22], at[34]); MULADD(at[23], at[33]); MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); MULADD(at[1], at[56]); MULADD(at[2], at[55]); MULADD(at[3], at[54]); MULADD(at[4], at[53]); MULADD(at[5], at[52]); MULADD(at[6], at[51]); MULADD(at[7], at[50]); MULADD(at[8], at[49]); MULADD(at[9], at[48]); MULADD(at[10], at[47]); MULADD(at[11], at[46]); MULADD(at[12], at[45]); MULADD(at[13], at[44]); MULADD(at[14], at[43]); MULADD(at[15], at[42]); MULADD(at[16], at[41]); MULADD(at[17], at[40]); MULADD(at[18], at[39]); MULADD(at[19], at[38]); MULADD(at[20], at[37]); MULADD(at[21], at[36]); MULADD(at[22], at[35]); MULADD(at[23], at[34]); MULADD(at[24], at[33]); MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); MULADD(at[1], at[57]); MULADD(at[2], at[56]); MULADD(at[3], at[55]); MULADD(at[4], at[54]); MULADD(at[5], at[53]); MULADD(at[6], at[52]); MULADD(at[7], at[51]); MULADD(at[8], at[50]); MULADD(at[9], at[49]); MULADD(at[10], at[48]); MULADD(at[11], at[47]); MULADD(at[12], at[46]); MULADD(at[13], at[45]); MULADD(at[14], at[44]); MULADD(at[15], at[43]); MULADD(at[16], at[42]); MULADD(at[17], at[41]); MULADD(at[18], at[40]); MULADD(at[19], at[39]); MULADD(at[20], at[38]); MULADD(at[21], at[37]); MULADD(at[22], at[36]); MULADD(at[23], at[35]); MULADD(at[24], at[34]); MULADD(at[25], at[33]); MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); MULADD(at[1], at[58]); MULADD(at[2], at[57]); MULADD(at[3], at[56]); MULADD(at[4], at[55]); MULADD(at[5], at[54]); MULADD(at[6], at[53]); MULADD(at[7], at[52]); MULADD(at[8], at[51]); MULADD(at[9], at[50]); MULADD(at[10], at[49]); MULADD(at[11], at[48]); MULADD(at[12], at[47]); MULADD(at[13], at[46]); MULADD(at[14], at[45]); MULADD(at[15], at[44]); MULADD(at[16], at[43]); MULADD(at[17], at[42]); MULADD(at[18], at[41]); MULADD(at[19], at[40]); MULADD(at[20], at[39]); MULADD(at[21], at[38]); MULADD(at[22], at[37]); MULADD(at[23], at[36]); MULADD(at[24], at[35]); MULADD(at[25], at[34]); MULADD(at[26], at[33]); MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); MULADD(at[1], at[59]); MULADD(at[2], at[58]); MULADD(at[3], at[57]); MULADD(at[4], at[56]); MULADD(at[5], at[55]); MULADD(at[6], at[54]); MULADD(at[7], at[53]); MULADD(at[8], at[52]); MULADD(at[9], at[51]); MULADD(at[10], at[50]); MULADD(at[11], at[49]); MULADD(at[12], at[48]); MULADD(at[13], at[47]); MULADD(at[14], at[46]); MULADD(at[15], at[45]); MULADD(at[16], at[44]); MULADD(at[17], at[43]); MULADD(at[18], at[42]); MULADD(at[19], at[41]); MULADD(at[20], at[40]); MULADD(at[21], at[39]); MULADD(at[22], at[38]); MULADD(at[23], at[37]); MULADD(at[24], at[36]); MULADD(at[25], at[35]); MULADD(at[26], at[34]); MULADD(at[27], at[33]); MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); MULADD(at[1], at[60]); MULADD(at[2], at[59]); MULADD(at[3], at[58]); MULADD(at[4], at[57]); MULADD(at[5], at[56]); MULADD(at[6], at[55]); MULADD(at[7], at[54]); MULADD(at[8], at[53]); MULADD(at[9], at[52]); MULADD(at[10], at[51]); MULADD(at[11], at[50]); MULADD(at[12], at[49]); MULADD(at[13], at[48]); MULADD(at[14], at[47]); MULADD(at[15], at[46]); MULADD(at[16], at[45]); MULADD(at[17], at[44]); MULADD(at[18], at[43]); MULADD(at[19], at[42]); MULADD(at[20], at[41]); MULADD(at[21], at[40]); MULADD(at[22], at[39]); MULADD(at[23], at[38]); MULADD(at[24], at[37]); MULADD(at[25], at[36]); MULADD(at[26], at[35]); MULADD(at[27], at[34]); MULADD(at[28], at[33]); MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); MULADD(at[1], at[61]); MULADD(at[2], at[60]); MULADD(at[3], at[59]); MULADD(at[4], at[58]); MULADD(at[5], at[57]); MULADD(at[6], at[56]); MULADD(at[7], at[55]); MULADD(at[8], at[54]); MULADD(at[9], at[53]); MULADD(at[10], at[52]); MULADD(at[11], at[51]); MULADD(at[12], at[50]); MULADD(at[13], at[49]); MULADD(at[14], at[48]); MULADD(at[15], at[47]); MULADD(at[16], at[46]); MULADD(at[17], at[45]); MULADD(at[18], at[44]); MULADD(at[19], at[43]); MULADD(at[20], at[42]); MULADD(at[21], at[41]); MULADD(at[22], at[40]); MULADD(at[23], at[39]); MULADD(at[24], at[38]); MULADD(at[25], at[37]); MULADD(at[26], at[36]); MULADD(at[27], at[35]); MULADD(at[28], at[34]); MULADD(at[29], at[33]); MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); MULADD(at[1], at[62]); MULADD(at[2], at[61]); MULADD(at[3], at[60]); MULADD(at[4], at[59]); MULADD(at[5], at[58]); MULADD(at[6], at[57]); MULADD(at[7], at[56]); MULADD(at[8], at[55]); MULADD(at[9], at[54]); MULADD(at[10], at[53]); MULADD(at[11], at[52]); MULADD(at[12], at[51]); MULADD(at[13], at[50]); MULADD(at[14], at[49]); MULADD(at[15], at[48]); MULADD(at[16], at[47]); MULADD(at[17], at[46]); MULADD(at[18], at[45]); MULADD(at[19], at[44]); MULADD(at[20], at[43]); MULADD(at[21], at[42]); MULADD(at[22], at[41]); MULADD(at[23], at[40]); MULADD(at[24], at[39]); MULADD(at[25], at[38]); MULADD(at[26], at[37]); MULADD(at[27], at[36]); MULADD(at[28], at[35]); MULADD(at[29], at[34]); MULADD(at[30], at[33]); MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); MULADD(at[2], at[62]); MULADD(at[3], at[61]); MULADD(at[4], at[60]); MULADD(at[5], at[59]); MULADD(at[6], at[58]); MULADD(at[7], at[57]); MULADD(at[8], at[56]); MULADD(at[9], at[55]); MULADD(at[10], at[54]); MULADD(at[11], at[53]); MULADD(at[12], at[52]); MULADD(at[13], at[51]); MULADD(at[14], at[50]); MULADD(at[15], at[49]); MULADD(at[16], at[48]); MULADD(at[17], at[47]); MULADD(at[18], at[46]); MULADD(at[19], at[45]); MULADD(at[20], at[44]); MULADD(at[21], at[43]); MULADD(at[22], at[42]); MULADD(at[23], at[41]); MULADD(at[24], at[40]); MULADD(at[25], at[39]); MULADD(at[26], at[38]); MULADD(at[27], at[37]); MULADD(at[28], at[36]); MULADD(at[29], at[35]); MULADD(at[30], at[34]); MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); MULADD(at[3], at[62]); MULADD(at[4], at[61]); MULADD(at[5], at[60]); MULADD(at[6], at[59]); MULADD(at[7], at[58]); MULADD(at[8], at[57]); MULADD(at[9], at[56]); MULADD(at[10], at[55]); MULADD(at[11], at[54]); MULADD(at[12], at[53]); MULADD(at[13], at[52]); MULADD(at[14], at[51]); MULADD(at[15], at[50]); MULADD(at[16], at[49]); MULADD(at[17], at[48]); MULADD(at[18], at[47]); MULADD(at[19], at[46]); MULADD(at[20], at[45]); MULADD(at[21], at[44]); MULADD(at[22], at[43]); MULADD(at[23], at[42]); MULADD(at[24], at[41]); MULADD(at[25], at[40]); MULADD(at[26], at[39]); MULADD(at[27], at[38]); MULADD(at[28], at[37]); MULADD(at[29], at[36]); MULADD(at[30], at[35]); MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); MULADD(at[4], at[62]); MULADD(at[5], at[61]); MULADD(at[6], at[60]); MULADD(at[7], at[59]); MULADD(at[8], at[58]); MULADD(at[9], at[57]); MULADD(at[10], at[56]); MULADD(at[11], at[55]); MULADD(at[12], at[54]); MULADD(at[13], at[53]); MULADD(at[14], at[52]); MULADD(at[15], at[51]); MULADD(at[16], at[50]); MULADD(at[17], at[49]); MULADD(at[18], at[48]); MULADD(at[19], at[47]); MULADD(at[20], at[46]); MULADD(at[21], at[45]); MULADD(at[22], at[44]); MULADD(at[23], at[43]); MULADD(at[24], at[42]); MULADD(at[25], at[41]); MULADD(at[26], at[40]); MULADD(at[27], at[39]); MULADD(at[28], at[38]); MULADD(at[29], at[37]); MULADD(at[30], at[36]); MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); MULADD(at[5], at[62]); MULADD(at[6], at[61]); MULADD(at[7], at[60]); MULADD(at[8], at[59]); MULADD(at[9], at[58]); MULADD(at[10], at[57]); MULADD(at[11], at[56]); MULADD(at[12], at[55]); MULADD(at[13], at[54]); MULADD(at[14], at[53]); MULADD(at[15], at[52]); MULADD(at[16], at[51]); MULADD(at[17], at[50]); MULADD(at[18], at[49]); MULADD(at[19], at[48]); MULADD(at[20], at[47]); MULADD(at[21], at[46]); MULADD(at[22], at[45]); MULADD(at[23], at[44]); MULADD(at[24], at[43]); MULADD(at[25], at[42]); MULADD(at[26], at[41]); MULADD(at[27], at[40]); MULADD(at[28], at[39]); MULADD(at[29], at[38]); MULADD(at[30], at[37]); MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); MULADD(at[6], at[62]); MULADD(at[7], at[61]); MULADD(at[8], at[60]); MULADD(at[9], at[59]); MULADD(at[10], at[58]); MULADD(at[11], at[57]); MULADD(at[12], at[56]); MULADD(at[13], at[55]); MULADD(at[14], at[54]); MULADD(at[15], at[53]); MULADD(at[16], at[52]); MULADD(at[17], at[51]); MULADD(at[18], at[50]); MULADD(at[19], at[49]); MULADD(at[20], at[48]); MULADD(at[21], at[47]); MULADD(at[22], at[46]); MULADD(at[23], at[45]); MULADD(at[24], at[44]); MULADD(at[25], at[43]); MULADD(at[26], at[42]); MULADD(at[27], at[41]); MULADD(at[28], at[40]); MULADD(at[29], at[39]); MULADD(at[30], at[38]); MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); MULADD(at[7], at[62]); MULADD(at[8], at[61]); MULADD(at[9], at[60]); MULADD(at[10], at[59]); MULADD(at[11], at[58]); MULADD(at[12], at[57]); MULADD(at[13], at[56]); MULADD(at[14], at[55]); MULADD(at[15], at[54]); MULADD(at[16], at[53]); MULADD(at[17], at[52]); MULADD(at[18], at[51]); MULADD(at[19], at[50]); MULADD(at[20], at[49]); MULADD(at[21], at[48]); MULADD(at[22], at[47]); MULADD(at[23], at[46]); MULADD(at[24], at[45]); MULADD(at[25], at[44]); MULADD(at[26], at[43]); MULADD(at[27], at[42]); MULADD(at[28], at[41]); MULADD(at[29], at[40]); MULADD(at[30], at[39]); MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); MULADD(at[8], at[62]); MULADD(at[9], at[61]); MULADD(at[10], at[60]); MULADD(at[11], at[59]); MULADD(at[12], at[58]); MULADD(at[13], at[57]); MULADD(at[14], at[56]); MULADD(at[15], at[55]); MULADD(at[16], at[54]); MULADD(at[17], at[53]); MULADD(at[18], at[52]); MULADD(at[19], at[51]); MULADD(at[20], at[50]); MULADD(at[21], at[49]); MULADD(at[22], at[48]); MULADD(at[23], at[47]); MULADD(at[24], at[46]); MULADD(at[25], at[45]); MULADD(at[26], at[44]); MULADD(at[27], at[43]); MULADD(at[28], at[42]); MULADD(at[29], at[41]); MULADD(at[30], at[40]); MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); MULADD(at[9], at[62]); MULADD(at[10], at[61]); MULADD(at[11], at[60]); MULADD(at[12], at[59]); MULADD(at[13], at[58]); MULADD(at[14], at[57]); MULADD(at[15], at[56]); MULADD(at[16], at[55]); MULADD(at[17], at[54]); MULADD(at[18], at[53]); MULADD(at[19], at[52]); MULADD(at[20], at[51]); MULADD(at[21], at[50]); MULADD(at[22], at[49]); MULADD(at[23], at[48]); MULADD(at[24], at[47]); MULADD(at[25], at[46]); MULADD(at[26], at[45]); MULADD(at[27], at[44]); MULADD(at[28], at[43]); MULADD(at[29], at[42]); MULADD(at[30], at[41]); MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); MULADD(at[10], at[62]); MULADD(at[11], at[61]); MULADD(at[12], at[60]); MULADD(at[13], at[59]); MULADD(at[14], at[58]); MULADD(at[15], at[57]); MULADD(at[16], at[56]); MULADD(at[17], at[55]); MULADD(at[18], at[54]); MULADD(at[19], at[53]); MULADD(at[20], at[52]); MULADD(at[21], at[51]); MULADD(at[22], at[50]); MULADD(at[23], at[49]); MULADD(at[24], at[48]); MULADD(at[25], at[47]); MULADD(at[26], at[46]); MULADD(at[27], at[45]); MULADD(at[28], at[44]); MULADD(at[29], at[43]); MULADD(at[30], at[42]); MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); MULADD(at[11], at[62]); MULADD(at[12], at[61]); MULADD(at[13], at[60]); MULADD(at[14], at[59]); MULADD(at[15], at[58]); MULADD(at[16], at[57]); MULADD(at[17], at[56]); MULADD(at[18], at[55]); MULADD(at[19], at[54]); MULADD(at[20], at[53]); MULADD(at[21], at[52]); MULADD(at[22], at[51]); MULADD(at[23], at[50]); MULADD(at[24], at[49]); MULADD(at[25], at[48]); MULADD(at[26], at[47]); MULADD(at[27], at[46]); MULADD(at[28], at[45]); MULADD(at[29], at[44]); MULADD(at[30], at[43]); MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); MULADD(at[12], at[62]); MULADD(at[13], at[61]); MULADD(at[14], at[60]); MULADD(at[15], at[59]); MULADD(at[16], at[58]); MULADD(at[17], at[57]); MULADD(at[18], at[56]); MULADD(at[19], at[55]); MULADD(at[20], at[54]); MULADD(at[21], at[53]); MULADD(at[22], at[52]); MULADD(at[23], at[51]); MULADD(at[24], at[50]); MULADD(at[25], at[49]); MULADD(at[26], at[48]); MULADD(at[27], at[47]); MULADD(at[28], at[46]); MULADD(at[29], at[45]); MULADD(at[30], at[44]); MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); MULADD(at[13], at[62]); MULADD(at[14], at[61]); MULADD(at[15], at[60]); MULADD(at[16], at[59]); MULADD(at[17], at[58]); MULADD(at[18], at[57]); MULADD(at[19], at[56]); MULADD(at[20], at[55]); MULADD(at[21], at[54]); MULADD(at[22], at[53]); MULADD(at[23], at[52]); MULADD(at[24], at[51]); MULADD(at[25], at[50]); MULADD(at[26], at[49]); MULADD(at[27], at[48]); MULADD(at[28], at[47]); MULADD(at[29], at[46]); MULADD(at[30], at[45]); MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); MULADD(at[14], at[62]); MULADD(at[15], at[61]); MULADD(at[16], at[60]); MULADD(at[17], at[59]); MULADD(at[18], at[58]); MULADD(at[19], at[57]); MULADD(at[20], at[56]); MULADD(at[21], at[55]); MULADD(at[22], at[54]); MULADD(at[23], at[53]); MULADD(at[24], at[52]); MULADD(at[25], at[51]); MULADD(at[26], at[50]); MULADD(at[27], at[49]); MULADD(at[28], at[48]); MULADD(at[29], at[47]); MULADD(at[30], at[46]); MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); MULADD(at[15], at[62]); MULADD(at[16], at[61]); MULADD(at[17], at[60]); MULADD(at[18], at[59]); MULADD(at[19], at[58]); MULADD(at[20], at[57]); MULADD(at[21], at[56]); MULADD(at[22], at[55]); MULADD(at[23], at[54]); MULADD(at[24], at[53]); MULADD(at[25], at[52]); MULADD(at[26], at[51]); MULADD(at[27], at[50]); MULADD(at[28], at[49]); MULADD(at[29], at[48]); MULADD(at[30], at[47]); MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); MULADD(at[16], at[62]); MULADD(at[17], at[61]); MULADD(at[18], at[60]); MULADD(at[19], at[59]); MULADD(at[20], at[58]); MULADD(at[21], at[57]); MULADD(at[22], at[56]); MULADD(at[23], at[55]); MULADD(at[24], at[54]); MULADD(at[25], at[53]); MULADD(at[26], at[52]); MULADD(at[27], at[51]); MULADD(at[28], at[50]); MULADD(at[29], at[49]); MULADD(at[30], at[48]); MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); MULADD(at[17], at[62]); MULADD(at[18], at[61]); MULADD(at[19], at[60]); MULADD(at[20], at[59]); MULADD(at[21], at[58]); MULADD(at[22], at[57]); MULADD(at[23], at[56]); MULADD(at[24], at[55]); MULADD(at[25], at[54]); MULADD(at[26], at[53]); MULADD(at[27], at[52]); MULADD(at[28], at[51]); MULADD(at[29], at[50]); MULADD(at[30], at[49]); MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); MULADD(at[18], at[62]); MULADD(at[19], at[61]); MULADD(at[20], at[60]); MULADD(at[21], at[59]); MULADD(at[22], at[58]); MULADD(at[23], at[57]); MULADD(at[24], at[56]); MULADD(at[25], at[55]); MULADD(at[26], at[54]); MULADD(at[27], at[53]); MULADD(at[28], at[52]); MULADD(at[29], at[51]); MULADD(at[30], at[50]); MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); MULADD(at[19], at[62]); MULADD(at[20], at[61]); MULADD(at[21], at[60]); MULADD(at[22], at[59]); MULADD(at[23], at[58]); MULADD(at[24], at[57]); MULADD(at[25], at[56]); MULADD(at[26], at[55]); MULADD(at[27], at[54]); MULADD(at[28], at[53]); MULADD(at[29], at[52]); MULADD(at[30], at[51]); MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); MULADD(at[20], at[62]); MULADD(at[21], at[61]); MULADD(at[22], at[60]); MULADD(at[23], at[59]); MULADD(at[24], at[58]); MULADD(at[25], at[57]); MULADD(at[26], at[56]); MULADD(at[27], at[55]); MULADD(at[28], at[54]); MULADD(at[29], at[53]); MULADD(at[30], at[52]); MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); MULADD(at[21], at[62]); MULADD(at[22], at[61]); MULADD(at[23], at[60]); MULADD(at[24], at[59]); MULADD(at[25], at[58]); MULADD(at[26], at[57]); MULADD(at[27], at[56]); MULADD(at[28], at[55]); MULADD(at[29], at[54]); MULADD(at[30], at[53]); MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); MULADD(at[22], at[62]); MULADD(at[23], at[61]); MULADD(at[24], at[60]); MULADD(at[25], at[59]); MULADD(at[26], at[58]); MULADD(at[27], at[57]); MULADD(at[28], at[56]); MULADD(at[29], at[55]); MULADD(at[30], at[54]); MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); MULADD(at[23], at[62]); MULADD(at[24], at[61]); MULADD(at[25], at[60]); MULADD(at[26], at[59]); MULADD(at[27], at[58]); MULADD(at[28], at[57]); MULADD(at[29], at[56]); MULADD(at[30], at[55]); MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); MULADD(at[24], at[62]); MULADD(at[25], at[61]); MULADD(at[26], at[60]); MULADD(at[27], at[59]); MULADD(at[28], at[58]); MULADD(at[29], at[57]); MULADD(at[30], at[56]); MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); MULADD(at[25], at[62]); MULADD(at[26], at[61]); MULADD(at[27], at[60]); MULADD(at[28], at[59]); MULADD(at[29], at[58]); MULADD(at[30], at[57]); MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); MULADD(at[26], at[62]); MULADD(at[27], at[61]); MULADD(at[28], at[60]); MULADD(at[29], at[59]); MULADD(at[30], at[58]); MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); MULADD(at[27], at[62]); MULADD(at[28], at[61]); MULADD(at[29], at[60]); MULADD(at[30], at[59]); MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); MULADD(at[28], at[62]); MULADD(at[29], at[61]); MULADD(at[30], at[60]); MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); MULADD(at[29], at[62]); MULADD(at[30], at[61]); MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); MULADD(at[30], at[62]); MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + fp_clamp(C); + COMBA_FINI; +} + +#endif + +/* End: fp_mul_comba.c */ + +/* Start: fp_mul_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_word w; + int x, oldused; + + oldused = c->used; + c->used = a->used; + c->sign = a->sign; + w = 0; + for (x = 0; x < a->used; x++) { + w = ((fp_word)a->dp[x]) * ((fp_word)b) + w; + c->dp[x] = (fp_digit)w; + w = w >> DIGIT_BIT; + } + if (w != 0 && (a->used != FP_SIZE)) { + c->dp[c->used++] = w; + ++x; + } + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} + + +/* End: fp_mul_d.c */ + +/* Start: fp_mulmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_mul(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + +/* End: fp_mulmod.c */ + +/* Start: fp_prime_miller_rabin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* Miller-Rabin test of "a" to the base of "b" as described in + * HAC pp. 139 Algorithm 4.24 + * + * Sets result to 0 if definitely composite or 1 if probably prime. + * Randomly the chance of error is no more than 1/4 and often + * very much lower. + */ +void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result) +{ + fp_int n1, y, r; + int s, j; + + /* default */ + *result = FP_NO; + + /* ensure b > 1 */ + if (fp_cmp_d(b, 1) != FP_GT) { + return; + } + + /* get n1 = a - 1 */ + fp_init_copy(&n1, a); + fp_sub_d(&n1, 1, &n1); + + /* set 2**s * r = n1 */ + fp_init_copy(&r, &n1); + + /* count the number of least significant bits + * which are zero + */ + s = fp_cnt_lsb(&r); + + /* now divide n - 1 by 2**s */ + fp_div_2d (&r, s, &r, NULL); + + /* compute y = b**r mod a */ + fp_init(&y); + fp_exptmod(b, &r, a, &y); + + /* if y != 1 and y != n1 do */ + if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) { + j = 1; + /* while j <= s-1 and y != n1 */ + while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) { + fp_sqrmod (&y, a, &y); + + /* if y == 1 then composite */ + if (fp_cmp_d (&y, 1) == FP_EQ) { + return; + } + ++j; + } + + /* if y != n1 then composite */ + if (fp_cmp (&y, &n1) != FP_EQ) { + return; + } + } + + /* probably prime now */ + *result = FP_YES; +} + +/* End: fp_prime_miller_rabin.c */ + +/* Start: fp_prime_random_ex.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* This is possibly the mother of all prime generation functions, muahahahahaha! */ +int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat) +{ + unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; + int res, err, bsize, maskOR_msb_offset; + + /* sanity check the input */ + if (size <= 1 || t <= 0) { + return FP_VAL; + } + + /* TFM_PRIME_SAFE implies TFM_PRIME_BBS */ + if (flags & TFM_PRIME_SAFE) { + flags |= TFM_PRIME_BBS; + } + + /* calc the byte size */ + bsize = (size>>3)+(size&7?1:0); + + /* we need a buffer of bsize bytes */ + tmp = malloc(bsize); + if (tmp == NULL) { + return FP_MEM; + } + + /* calc the maskAND value for the MSbyte*/ + maskAND = 0xFF >> (8 - (size & 7)); + + /* calc the maskOR_msb */ + maskOR_msb = 0; + maskOR_msb_offset = (size - 2) >> 3; + if (flags & TFM_PRIME_2MSB_ON) { + maskOR_msb |= 1 << ((size - 2) & 7); + } else if (flags & TFM_PRIME_2MSB_OFF) { + maskAND &= ~(1 << ((size - 2) & 7)); + } + + /* get the maskOR_lsb */ + maskOR_lsb = 1; + if (flags & TFM_PRIME_BBS) { + maskOR_lsb |= 3; + } + + do { + /* read the bytes */ + if (cb(tmp, bsize, dat) != bsize) { + err = FP_VAL; + goto error; + } + + /* work over the MSbyte */ + tmp[0] &= maskAND; + tmp[0] |= 1 << ((size - 1) & 7); + + /* mix in the maskORs */ + tmp[maskOR_msb_offset] |= maskOR_msb; + tmp[bsize-1] |= maskOR_lsb; + + /* read it in */ + fp_read_unsigned_bin(a, tmp, bsize); + + /* is it prime? */ + res = fp_isprime(a); + if (res == FP_NO) continue; + + if (flags & TFM_PRIME_SAFE) { + /* see if (a-1)/2 is prime */ + fp_sub_d(a, 1, a); + fp_div_2(a, a); + + /* is it prime? */ + res = fp_isprime(a); + } + } while (res == FP_NO); + + if (flags & TFM_PRIME_SAFE) { + /* restore a to the original value */ + fp_mul_2(a, a); + fp_add_d(a, 1, a); + } + + err = FP_OKAY; +error: + free(tmp); + return err; +} + +/* End: fp_prime_random_ex.c */ + +/* Start: fp_radix_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_radix_size(fp_int *a, int radix, int *size) +{ +} + +/* End: fp_radix_size.c */ + +/* Start: fp_read_radix.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_read_radix(fp_int *a, char *str, int radix) +{ + int y, neg; + char ch; + + /* make sure the radix is ok */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* if the leading digit is a + * minus set the sign to negative. + */ + if (*str == '-') { + ++str; + neg = FP_NEG; + } else { + neg = FP_ZPOS; + } + + /* set the integer to the default of zero */ + fp_zero (a); + + /* process each digit of the string */ + while (*str) { + /* if the radix < 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + ch = (char) ((radix < 36) ? toupper (*str) : *str); + for (y = 0; y < 64; y++) { + if (ch == fp_s_rmap[y]) { + break; + } + } + + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if (y < radix) { + fp_mul_d (a, (fp_digit) radix, a); + fp_add_d (a, (fp_digit) y, a); + } else { + break; + } + ++str; + } + + /* set the sign only if a != 0 */ + if (fp_iszero(a) != FP_YES) { + a->sign = neg; + } + return FP_OKAY; +} + +/* End: fp_read_radix.c */ + +/* Start: fp_read_signed_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_signed_bin(fp_int *a, unsigned char *b, int c) +{ + /* read magnitude */ + fp_read_unsigned_bin (a, b + 1, c - 1); + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = FP_ZPOS; + } else { + a->sign = FP_NEG; + } +} + +/* End: fp_read_signed_bin.c */ + +/* Start: fp_read_unsigned_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c) +{ + /* zero the int */ + fp_zero (a); + + /* read the bytes in */ + for (; c > 0; c--) { + fp_mul_2d (a, 8, a); + a->dp[0] |= *b++; + a->used += 1; + } + fp_clamp (a); +} + +/* End: fp_read_unsigned_bin.c */ + +/* Start: fp_reverse.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* reverse an array, used for radix code */ +void bn_reverse (unsigned char *s, int len) +{ + int ix, iy; + unsigned char t; + + ix = 0; + iy = len - 1; + while (ix < iy) { + t = s[ix]; + s[ix] = s[iy]; + s[iy] = t; + ++ix; + --iy; + } +} + +/* End: fp_reverse.c */ + +/* Start: fp_rshd.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_rshd(fp_int *a, int x) +{ + int y; + + /* too many digits just zero and return */ + if (x >= a->used) { + fp_zero(a); + return; + } + + /* shift */ + for (y = 0; y < a->used - x; y++) { + a->dp[y] = a->dp[y+x]; + } + + /* zero rest */ + for (; y < a->used; y++) { + a->dp[y] = 0; + } + + /* decrement count */ + a->used -= x; + fp_clamp(a); +} + + +/* End: fp_rshd.c */ + +/* Start: fp_s_rmap.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* chars used in radix conversions */ +const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +/* End: fp_s_rmap.c */ + +/* Start: fp_set.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_set(fp_int *a, fp_digit b) +{ + fp_zero(a); + a->dp[0] = b; + a->used = b ? 1 : 0; +} + +/* End: fp_set.c */ + +/* Start: fp_signed_bin_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_signed_bin_size(fp_int *a) +{ + return 1 + fp_unsigned_bin_size (a); +} + +/* End: fp_signed_bin_size.c */ + +/* Start: fp_sqr.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* b = a*a */ +void fp_sqr(fp_int *A, fp_int *B) +{ + int r, y, s; + fp_int aa, bb, comp, amb, t1; + + y = A->used; + if (y <= 48) { + if (y <= 4) { + fp_sqr_comba4(A,B); + } else if (y <= 8) { + fp_sqr_comba8(A,B); + } else if (y <= 16 && y >= 12) { + fp_sqr_comba16(A,B); +#ifdef TFM_HUGE + } else if (y <= 32 && y >= 28) { + fp_sqr_comba32(A,B); +#endif + } else { + fp_sqr_comba(A, B); + } + + } else { + /* do the karatsuba action + + if A = ab ||a|| = r we need to solve + + a^2*r^2 + (-(a-b)^2 + a^2 + b^2)*r + b^2 + + So we solve for the three products then we form the final result with careful shifting + and addition. + +Obvious points of optimization + +- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits] +- Similarly the "bd" parts can be memcpy'ed and zeroed to 8 +- + + */ + /* get our value of r */ + r = y >> 1; + + /* now solve for ac */ +// fp_copy(A, &t1); fp_rshd(&t1, r); + for (s = 0; s < A->used - r; s++) { + t1.dp[s] = A->dp[s+r]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + if (A->used >= r) { + t1.used = A->used - r; + } else { + t1.used = 0; + } + t1.sign = A->sign; + fp_copy(&t1, &amb); + fp_zero(&aa); + fp_sqr(&t1, &aa); + + /* now solve for bd */ +// fp_mod_2d(A, r * DIGIT_BIT, &t1); + for (s = 0; s < r; s++) { + t1.dp[s] = A->dp[s]; + } + for (; s < FP_SIZE; s++) { + t1.dp[s] = 0; + } + t1.used = r; + fp_clamp(&t1); + + fp_sub(&amb, &t1, &amb); + fp_zero(&bb); + fp_sqr(&t1, &bb); + + /* now get the (a-b) term */ + fp_zero(&comp); + fp_sqr(&amb, &comp); + + /* now solve the system, do the middle term first */ + comp.sign ^= 1; + fp_add(&comp, &aa, &comp); + fp_add(&comp, &bb, &comp); + fp_lshd(&comp, r); + + /* leading term */ + fp_lshd(&aa, r+r); + + /* now sum them together */ + fp_zero(B); + fp_add(&aa, &comp, B); + fp_add(&bb, B, B); + B->sign = FP_ZPOS; + } +} + + +/* End: fp_sqr.c */ + +/* Start: fp_sqr_comba.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* About this file... +*/ + +#if defined(TFM_X86) + +/* x86-32 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %%eax \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movl %6,%%eax \n\t" \ + "mull %7 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_X86_64) +/* x86-64 optimized */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +#define SQRADD(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%rax","%rdx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%rax","%rdx","%cc"); + + +#elif defined(TFM_SSE2) + +/* SSE2 Optimized */ +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI \ + asm("emms"); + +#define SQRADD(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "pmuludq %%mm0,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); + +#define SQRADD2(i, j) \ +asm volatile ( \ + "movd %6,%%mm0 \n\t" \ + "movd %7,%%mm1 \n\t" \ + "pmuludq %%mm1,%%mm0\n\t" \ + "movd %%mm0,%%eax \n\t" \ + "psrlq $32,%%mm0 \n\t" \ + "movd %%mm0,%%edx \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + "addl %%eax,%0 \n\t" \ + "adcl %%edx,%1 \n\t" \ + "adcl $0,%2 \n\t" \ + :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); + +#elif defined(TFM_ARM) + +/* ARM code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ +asm( \ +" UMULL r0,r1,%6,%6 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ +asm( \ +" UMULL r0,r1,%6,%7 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +" ADDS %0,%0,r0 \n\t" \ +" ADCS %1,%1,r1 \n\t" \ +" ADC %2,%2,#0 \n\t" \ +:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); + +#else + +/* ISO C portable code */ + +#define COMBA_START + +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; + +#define COMBA_FINI + +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; + +#endif + +/* generic comba squarer */ +void fp_sqr_comba(fp_int *A, fp_int *B) +{ + int pa, ix, iz; + fp_digit c0, c1, c2; + fp_int tmp, *dst; + fp_word t; + + /* get size of output and trim */ + pa = A->used + A->used; + if (pa >= FP_SIZE) { + pa = FP_SIZE-1; + } + + /* number of output digits to produce */ + COMBA_START; + CLEAR_CARRY; + + if (A == B) { + fp_zero(&tmp); + dst = &tmp; + } else { + fp_zero(B); + dst = B; + } + + for (ix = 0; ix < pa; ix++) { + int tx, ty, iy; + fp_digit *tmpy, *tmpx; + + /* get offsets into the two bignums */ + ty = MIN(A->used-1, ix); + tx = ix - ty; + + /* setup temp aliases */ + tmpx = A->dp + tx; + tmpy = A->dp + ty; + + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy = MIN(A->used-tx, ty+1); + + /* now for squaring tx can never equal ty + * we halve the distance since they approach at a rate of 2x + * and we have to round because odd cases need to be executed + */ + iy = MIN(iy, (ty-tx+1)>>1); + + /* forward carries */ + CARRY_FORWARD; + + /* execute loop */ + for (iz = 0; iz < iy; iz++) { + SQRADD2(*tmpx++, *tmpy--); + } + + /* even columns have the square term in them */ + if ((ix&1) == 0) { + SQRADD(A->dp[ix>>1], A->dp[ix>>1]); + } + + /* store it */ + COMBA_STORE(dst->dp[ix]); + } + COMBA_STORE2(dst->dp[ix]); + + COMBA_FINI; + + /* setup dest */ + dst->used = pa; + fp_clamp (dst); + if (dst != B) { + fp_copy(dst, B); + } +} + +void fp_sqr_comba4(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[8], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 8 * sizeof(fp_digit)); + fp_clamp(B); +} + + +void fp_sqr_comba8(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[16], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 16 * sizeof(fp_digit)); + fp_clamp(B); +} + + +void fp_sqr_comba16(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[32], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[0], a[8]); SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[0], a[9]); SQRADD2(a[1], a[8]); SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[0], a[10]); SQRADD2(a[1], a[9]); SQRADD2(a[2], a[8]); SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[0], a[11]); SQRADD2(a[1], a[10]); SQRADD2(a[2], a[9]); SQRADD2(a[3], a[8]); SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[0], a[12]); SQRADD2(a[1], a[11]); SQRADD2(a[2], a[10]); SQRADD2(a[3], a[9]); SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[0], a[13]); SQRADD2(a[1], a[12]); SQRADD2(a[2], a[11]); SQRADD2(a[3], a[10]); SQRADD2(a[4], a[9]); SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[0], a[14]); SQRADD2(a[1], a[13]); SQRADD2(a[2], a[12]); SQRADD2(a[3], a[11]); SQRADD2(a[4], a[10]); SQRADD2(a[5], a[9]); SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[0], a[15]); SQRADD2(a[1], a[14]); SQRADD2(a[2], a[13]); SQRADD2(a[3], a[12]); SQRADD2(a[4], a[11]); SQRADD2(a[5], a[10]); SQRADD2(a[6], a[9]); SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[1], a[15]); SQRADD2(a[2], a[14]); SQRADD2(a[3], a[13]); SQRADD2(a[4], a[12]); SQRADD2(a[5], a[11]); SQRADD2(a[6], a[10]); SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[2], a[15]); SQRADD2(a[3], a[14]); SQRADD2(a[4], a[13]); SQRADD2(a[5], a[12]); SQRADD2(a[6], a[11]); SQRADD2(a[7], a[10]); SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[3], a[15]); SQRADD2(a[4], a[14]); SQRADD2(a[5], a[13]); SQRADD2(a[6], a[12]); SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[4], a[15]); SQRADD2(a[5], a[14]); SQRADD2(a[6], a[13]); SQRADD2(a[7], a[12]); SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[5], a[15]); SQRADD2(a[6], a[14]); SQRADD2(a[7], a[13]); SQRADD2(a[8], a[12]); SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[6], a[15]); SQRADD2(a[7], a[14]); SQRADD2(a[8], a[13]); SQRADD2(a[9], a[12]); SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[7], a[15]); SQRADD2(a[8], a[14]); SQRADD2(a[9], a[13]); SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[8], a[15]); SQRADD2(a[9], a[14]); SQRADD2(a[10], a[13]); SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[9], a[15]); SQRADD2(a[10], a[14]); SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[10], a[15]); SQRADD2(a[11], a[14]); SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 32 * sizeof(fp_digit)); + fp_clamp(B); +} + +#ifdef TFM_HUGE + +void fp_sqr_comba32(fp_int *A, fp_int *B) +{ + fp_word t; + fp_digit *a, b[64], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0],a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[0], a[5]); SQRADD2(a[1], a[4]); SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD2(a[0], a[6]); SQRADD2(a[1], a[5]); SQRADD2(a[2], a[4]); SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADD2(a[0], a[7]); SQRADD2(a[1], a[6]); SQRADD2(a[2], a[5]); SQRADD2(a[3], a[4]); + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADD2(a[0], a[8]); SQRADD2(a[1], a[7]); SQRADD2(a[2], a[6]); SQRADD2(a[3], a[5]); SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADD2(a[0], a[9]); SQRADD2(a[1], a[8]); SQRADD2(a[2], a[7]); SQRADD2(a[3], a[6]); SQRADD2(a[4], a[5]); + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[0], a[10]); SQRADD2(a[1], a[9]); SQRADD2(a[2], a[8]); SQRADD2(a[3], a[7]); SQRADD2(a[4], a[6]); SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[0], a[11]); SQRADD2(a[1], a[10]); SQRADD2(a[2], a[9]); SQRADD2(a[3], a[8]); SQRADD2(a[4], a[7]); SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[0], a[12]); SQRADD2(a[1], a[11]); SQRADD2(a[2], a[10]); SQRADD2(a[3], a[9]); SQRADD2(a[4], a[8]); SQRADD2(a[5], a[7]); SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[0], a[13]); SQRADD2(a[1], a[12]); SQRADD2(a[2], a[11]); SQRADD2(a[3], a[10]); SQRADD2(a[4], a[9]); SQRADD2(a[5], a[8]); SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD2(a[0], a[14]); SQRADD2(a[1], a[13]); SQRADD2(a[2], a[12]); SQRADD2(a[3], a[11]); SQRADD2(a[4], a[10]); SQRADD2(a[5], a[9]); SQRADD2(a[6], a[8]); SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADD2(a[0], a[15]); SQRADD2(a[1], a[14]); SQRADD2(a[2], a[13]); SQRADD2(a[3], a[12]); SQRADD2(a[4], a[11]); SQRADD2(a[5], a[10]); SQRADD2(a[6], a[9]); SQRADD2(a[7], a[8]); + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADD2(a[0], a[16]); SQRADD2(a[1], a[15]); SQRADD2(a[2], a[14]); SQRADD2(a[3], a[13]); SQRADD2(a[4], a[12]); SQRADD2(a[5], a[11]); SQRADD2(a[6], a[10]); SQRADD2(a[7], a[9]); SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADD2(a[0], a[17]); SQRADD2(a[1], a[16]); SQRADD2(a[2], a[15]); SQRADD2(a[3], a[14]); SQRADD2(a[4], a[13]); SQRADD2(a[5], a[12]); SQRADD2(a[6], a[11]); SQRADD2(a[7], a[10]); SQRADD2(a[8], a[9]); + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADD2(a[0], a[18]); SQRADD2(a[1], a[17]); SQRADD2(a[2], a[16]); SQRADD2(a[3], a[15]); SQRADD2(a[4], a[14]); SQRADD2(a[5], a[13]); SQRADD2(a[6], a[12]); SQRADD2(a[7], a[11]); SQRADD2(a[8], a[10]); SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADD2(a[0], a[19]); SQRADD2(a[1], a[18]); SQRADD2(a[2], a[17]); SQRADD2(a[3], a[16]); SQRADD2(a[4], a[15]); SQRADD2(a[5], a[14]); SQRADD2(a[6], a[13]); SQRADD2(a[7], a[12]); SQRADD2(a[8], a[11]); SQRADD2(a[9], a[10]); + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADD2(a[0], a[20]); SQRADD2(a[1], a[19]); SQRADD2(a[2], a[18]); SQRADD2(a[3], a[17]); SQRADD2(a[4], a[16]); SQRADD2(a[5], a[15]); SQRADD2(a[6], a[14]); SQRADD2(a[7], a[13]); SQRADD2(a[8], a[12]); SQRADD2(a[9], a[11]); SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADD2(a[0], a[21]); SQRADD2(a[1], a[20]); SQRADD2(a[2], a[19]); SQRADD2(a[3], a[18]); SQRADD2(a[4], a[17]); SQRADD2(a[5], a[16]); SQRADD2(a[6], a[15]); SQRADD2(a[7], a[14]); SQRADD2(a[8], a[13]); SQRADD2(a[9], a[12]); SQRADD2(a[10], a[11]); + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADD2(a[0], a[22]); SQRADD2(a[1], a[21]); SQRADD2(a[2], a[20]); SQRADD2(a[3], a[19]); SQRADD2(a[4], a[18]); SQRADD2(a[5], a[17]); SQRADD2(a[6], a[16]); SQRADD2(a[7], a[15]); SQRADD2(a[8], a[14]); SQRADD2(a[9], a[13]); SQRADD2(a[10], a[12]); SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADD2(a[0], a[23]); SQRADD2(a[1], a[22]); SQRADD2(a[2], a[21]); SQRADD2(a[3], a[20]); SQRADD2(a[4], a[19]); SQRADD2(a[5], a[18]); SQRADD2(a[6], a[17]); SQRADD2(a[7], a[16]); SQRADD2(a[8], a[15]); SQRADD2(a[9], a[14]); SQRADD2(a[10], a[13]); SQRADD2(a[11], a[12]); + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADD2(a[0], a[24]); SQRADD2(a[1], a[23]); SQRADD2(a[2], a[22]); SQRADD2(a[3], a[21]); SQRADD2(a[4], a[20]); SQRADD2(a[5], a[19]); SQRADD2(a[6], a[18]); SQRADD2(a[7], a[17]); SQRADD2(a[8], a[16]); SQRADD2(a[9], a[15]); SQRADD2(a[10], a[14]); SQRADD2(a[11], a[13]); SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADD2(a[0], a[25]); SQRADD2(a[1], a[24]); SQRADD2(a[2], a[23]); SQRADD2(a[3], a[22]); SQRADD2(a[4], a[21]); SQRADD2(a[5], a[20]); SQRADD2(a[6], a[19]); SQRADD2(a[7], a[18]); SQRADD2(a[8], a[17]); SQRADD2(a[9], a[16]); SQRADD2(a[10], a[15]); SQRADD2(a[11], a[14]); SQRADD2(a[12], a[13]); + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[0], a[26]); SQRADD2(a[1], a[25]); SQRADD2(a[2], a[24]); SQRADD2(a[3], a[23]); SQRADD2(a[4], a[22]); SQRADD2(a[5], a[21]); SQRADD2(a[6], a[20]); SQRADD2(a[7], a[19]); SQRADD2(a[8], a[18]); SQRADD2(a[9], a[17]); SQRADD2(a[10], a[16]); SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[0], a[27]); SQRADD2(a[1], a[26]); SQRADD2(a[2], a[25]); SQRADD2(a[3], a[24]); SQRADD2(a[4], a[23]); SQRADD2(a[5], a[22]); SQRADD2(a[6], a[21]); SQRADD2(a[7], a[20]); SQRADD2(a[8], a[19]); SQRADD2(a[9], a[18]); SQRADD2(a[10], a[17]); SQRADD2(a[11], a[16]); SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[0], a[28]); SQRADD2(a[1], a[27]); SQRADD2(a[2], a[26]); SQRADD2(a[3], a[25]); SQRADD2(a[4], a[24]); SQRADD2(a[5], a[23]); SQRADD2(a[6], a[22]); SQRADD2(a[7], a[21]); SQRADD2(a[8], a[20]); SQRADD2(a[9], a[19]); SQRADD2(a[10], a[18]); SQRADD2(a[11], a[17]); SQRADD2(a[12], a[16]); SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[0], a[29]); SQRADD2(a[1], a[28]); SQRADD2(a[2], a[27]); SQRADD2(a[3], a[26]); SQRADD2(a[4], a[25]); SQRADD2(a[5], a[24]); SQRADD2(a[6], a[23]); SQRADD2(a[7], a[22]); SQRADD2(a[8], a[21]); SQRADD2(a[9], a[20]); SQRADD2(a[10], a[19]); SQRADD2(a[11], a[18]); SQRADD2(a[12], a[17]); SQRADD2(a[13], a[16]); SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD2(a[0], a[30]); SQRADD2(a[1], a[29]); SQRADD2(a[2], a[28]); SQRADD2(a[3], a[27]); SQRADD2(a[4], a[26]); SQRADD2(a[5], a[25]); SQRADD2(a[6], a[24]); SQRADD2(a[7], a[23]); SQRADD2(a[8], a[22]); SQRADD2(a[9], a[21]); SQRADD2(a[10], a[20]); SQRADD2(a[11], a[19]); SQRADD2(a[12], a[18]); SQRADD2(a[13], a[17]); SQRADD2(a[14], a[16]); SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADD2(a[0], a[31]); SQRADD2(a[1], a[30]); SQRADD2(a[2], a[29]); SQRADD2(a[3], a[28]); SQRADD2(a[4], a[27]); SQRADD2(a[5], a[26]); SQRADD2(a[6], a[25]); SQRADD2(a[7], a[24]); SQRADD2(a[8], a[23]); SQRADD2(a[9], a[22]); SQRADD2(a[10], a[21]); SQRADD2(a[11], a[20]); SQRADD2(a[12], a[19]); SQRADD2(a[13], a[18]); SQRADD2(a[14], a[17]); SQRADD2(a[15], a[16]); + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADD2(a[1], a[31]); SQRADD2(a[2], a[30]); SQRADD2(a[3], a[29]); SQRADD2(a[4], a[28]); SQRADD2(a[5], a[27]); SQRADD2(a[6], a[26]); SQRADD2(a[7], a[25]); SQRADD2(a[8], a[24]); SQRADD2(a[9], a[23]); SQRADD2(a[10], a[22]); SQRADD2(a[11], a[21]); SQRADD2(a[12], a[20]); SQRADD2(a[13], a[19]); SQRADD2(a[14], a[18]); SQRADD2(a[15], a[17]); SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADD2(a[2], a[31]); SQRADD2(a[3], a[30]); SQRADD2(a[4], a[29]); SQRADD2(a[5], a[28]); SQRADD2(a[6], a[27]); SQRADD2(a[7], a[26]); SQRADD2(a[8], a[25]); SQRADD2(a[9], a[24]); SQRADD2(a[10], a[23]); SQRADD2(a[11], a[22]); SQRADD2(a[12], a[21]); SQRADD2(a[13], a[20]); SQRADD2(a[14], a[19]); SQRADD2(a[15], a[18]); SQRADD2(a[16], a[17]); + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADD2(a[3], a[31]); SQRADD2(a[4], a[30]); SQRADD2(a[5], a[29]); SQRADD2(a[6], a[28]); SQRADD2(a[7], a[27]); SQRADD2(a[8], a[26]); SQRADD2(a[9], a[25]); SQRADD2(a[10], a[24]); SQRADD2(a[11], a[23]); SQRADD2(a[12], a[22]); SQRADD2(a[13], a[21]); SQRADD2(a[14], a[20]); SQRADD2(a[15], a[19]); SQRADD2(a[16], a[18]); SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADD2(a[4], a[31]); SQRADD2(a[5], a[30]); SQRADD2(a[6], a[29]); SQRADD2(a[7], a[28]); SQRADD2(a[8], a[27]); SQRADD2(a[9], a[26]); SQRADD2(a[10], a[25]); SQRADD2(a[11], a[24]); SQRADD2(a[12], a[23]); SQRADD2(a[13], a[22]); SQRADD2(a[14], a[21]); SQRADD2(a[15], a[20]); SQRADD2(a[16], a[19]); SQRADD2(a[17], a[18]); + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADD2(a[5], a[31]); SQRADD2(a[6], a[30]); SQRADD2(a[7], a[29]); SQRADD2(a[8], a[28]); SQRADD2(a[9], a[27]); SQRADD2(a[10], a[26]); SQRADD2(a[11], a[25]); SQRADD2(a[12], a[24]); SQRADD2(a[13], a[23]); SQRADD2(a[14], a[22]); SQRADD2(a[15], a[21]); SQRADD2(a[16], a[20]); SQRADD2(a[17], a[19]); SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADD2(a[6], a[31]); SQRADD2(a[7], a[30]); SQRADD2(a[8], a[29]); SQRADD2(a[9], a[28]); SQRADD2(a[10], a[27]); SQRADD2(a[11], a[26]); SQRADD2(a[12], a[25]); SQRADD2(a[13], a[24]); SQRADD2(a[14], a[23]); SQRADD2(a[15], a[22]); SQRADD2(a[16], a[21]); SQRADD2(a[17], a[20]); SQRADD2(a[18], a[19]); + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADD2(a[7], a[31]); SQRADD2(a[8], a[30]); SQRADD2(a[9], a[29]); SQRADD2(a[10], a[28]); SQRADD2(a[11], a[27]); SQRADD2(a[12], a[26]); SQRADD2(a[13], a[25]); SQRADD2(a[14], a[24]); SQRADD2(a[15], a[23]); SQRADD2(a[16], a[22]); SQRADD2(a[17], a[21]); SQRADD2(a[18], a[20]); SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADD2(a[8], a[31]); SQRADD2(a[9], a[30]); SQRADD2(a[10], a[29]); SQRADD2(a[11], a[28]); SQRADD2(a[12], a[27]); SQRADD2(a[13], a[26]); SQRADD2(a[14], a[25]); SQRADD2(a[15], a[24]); SQRADD2(a[16], a[23]); SQRADD2(a[17], a[22]); SQRADD2(a[18], a[21]); SQRADD2(a[19], a[20]); + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADD2(a[9], a[31]); SQRADD2(a[10], a[30]); SQRADD2(a[11], a[29]); SQRADD2(a[12], a[28]); SQRADD2(a[13], a[27]); SQRADD2(a[14], a[26]); SQRADD2(a[15], a[25]); SQRADD2(a[16], a[24]); SQRADD2(a[17], a[23]); SQRADD2(a[18], a[22]); SQRADD2(a[19], a[21]); SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADD2(a[10], a[31]); SQRADD2(a[11], a[30]); SQRADD2(a[12], a[29]); SQRADD2(a[13], a[28]); SQRADD2(a[14], a[27]); SQRADD2(a[15], a[26]); SQRADD2(a[16], a[25]); SQRADD2(a[17], a[24]); SQRADD2(a[18], a[23]); SQRADD2(a[19], a[22]); SQRADD2(a[20], a[21]); + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADD2(a[11], a[31]); SQRADD2(a[12], a[30]); SQRADD2(a[13], a[29]); SQRADD2(a[14], a[28]); SQRADD2(a[15], a[27]); SQRADD2(a[16], a[26]); SQRADD2(a[17], a[25]); SQRADD2(a[18], a[24]); SQRADD2(a[19], a[23]); SQRADD2(a[20], a[22]); SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADD2(a[12], a[31]); SQRADD2(a[13], a[30]); SQRADD2(a[14], a[29]); SQRADD2(a[15], a[28]); SQRADD2(a[16], a[27]); SQRADD2(a[17], a[26]); SQRADD2(a[18], a[25]); SQRADD2(a[19], a[24]); SQRADD2(a[20], a[23]); SQRADD2(a[21], a[22]); + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADD2(a[13], a[31]); SQRADD2(a[14], a[30]); SQRADD2(a[15], a[29]); SQRADD2(a[16], a[28]); SQRADD2(a[17], a[27]); SQRADD2(a[18], a[26]); SQRADD2(a[19], a[25]); SQRADD2(a[20], a[24]); SQRADD2(a[21], a[23]); SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADD2(a[14], a[31]); SQRADD2(a[15], a[30]); SQRADD2(a[16], a[29]); SQRADD2(a[17], a[28]); SQRADD2(a[18], a[27]); SQRADD2(a[19], a[26]); SQRADD2(a[20], a[25]); SQRADD2(a[21], a[24]); SQRADD2(a[22], a[23]); + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADD2(a[15], a[31]); SQRADD2(a[16], a[30]); SQRADD2(a[17], a[29]); SQRADD2(a[18], a[28]); SQRADD2(a[19], a[27]); SQRADD2(a[20], a[26]); SQRADD2(a[21], a[25]); SQRADD2(a[22], a[24]); SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADD2(a[16], a[31]); SQRADD2(a[17], a[30]); SQRADD2(a[18], a[29]); SQRADD2(a[19], a[28]); SQRADD2(a[20], a[27]); SQRADD2(a[21], a[26]); SQRADD2(a[22], a[25]); SQRADD2(a[23], a[24]); + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADD2(a[17], a[31]); SQRADD2(a[18], a[30]); SQRADD2(a[19], a[29]); SQRADD2(a[20], a[28]); SQRADD2(a[21], a[27]); SQRADD2(a[22], a[26]); SQRADD2(a[23], a[25]); SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADD2(a[18], a[31]); SQRADD2(a[19], a[30]); SQRADD2(a[20], a[29]); SQRADD2(a[21], a[28]); SQRADD2(a[22], a[27]); SQRADD2(a[23], a[26]); SQRADD2(a[24], a[25]); + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADD2(a[19], a[31]); SQRADD2(a[20], a[30]); SQRADD2(a[21], a[29]); SQRADD2(a[22], a[28]); SQRADD2(a[23], a[27]); SQRADD2(a[24], a[26]); SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADD2(a[20], a[31]); SQRADD2(a[21], a[30]); SQRADD2(a[22], a[29]); SQRADD2(a[23], a[28]); SQRADD2(a[24], a[27]); SQRADD2(a[25], a[26]); + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADD2(a[21], a[31]); SQRADD2(a[22], a[30]); SQRADD2(a[23], a[29]); SQRADD2(a[24], a[28]); SQRADD2(a[25], a[27]); SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADD2(a[22], a[31]); SQRADD2(a[23], a[30]); SQRADD2(a[24], a[29]); SQRADD2(a[25], a[28]); SQRADD2(a[26], a[27]); + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADD2(a[23], a[31]); SQRADD2(a[24], a[30]); SQRADD2(a[25], a[29]); SQRADD2(a[26], a[28]); SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADD2(a[24], a[31]); SQRADD2(a[25], a[30]); SQRADD2(a[26], a[29]); SQRADD2(a[27], a[28]); + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADD2(a[25], a[31]); SQRADD2(a[26], a[30]); SQRADD2(a[27], a[29]); SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADD2(a[26], a[31]); SQRADD2(a[27], a[30]); SQRADD2(a[28], a[29]); + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = FP_ZPOS; + memcpy(B->dp, b, 64 * sizeof(fp_digit)); + fp_clamp(B); +} + +#endif + + +/* End: fp_sqr_comba.c */ + +/* Start: fp_sqrmod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sqr(a, &tmp); + return fp_mod(&tmp, b, c); +} + +/* End: fp_sqrmod.c */ + +/* Start: fp_sub.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int sa, sb; + + sa = a->sign; + sb = b->sign; + + if (sa != sb) { + /* subtract a negative from a positive, OR */ + /* subtract a positive from a negative. */ + /* In either case, ADD their magnitudes, */ + /* and use the sign of the first number. */ + c->sign = sa; + s_fp_add (a, b, c); + } else { + /* subtract a positive from a positive, OR */ + /* subtract a negative from a negative. */ + /* First, take the difference between their */ + /* magnitudes, then... */ + if (fp_cmp_mag (a, b) != FP_LT) { + /* Copy the sign from the first */ + c->sign = sa; + /* The first has a larger or equal magnitude */ + s_fp_sub (a, b, c); + } else { + /* The result has the *opposite* sign from */ + /* the first number. */ + c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS; + /* The second has a larger magnitude */ + s_fp_sub (b, a, c); + } + } +} + + +/* End: fp_sub.c */ + +/* Start: fp_sub_d.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* c = a - b */ +void fp_sub_d(fp_int *a, fp_digit b, fp_int *c) +{ + fp_int tmp; + fp_set(&tmp, b); + fp_sub(a, &tmp, c); +} + +/* End: fp_sub_d.c */ + +/* Start: fp_submod.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) +{ + fp_int tmp; + fp_zero(&tmp); + fp_sub(a, b, &tmp); + return fp_mod(&tmp, c, d); +} + + +/* End: fp_submod.c */ + +/* Start: fp_to_signed_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_signed_bin(fp_int *a, unsigned char *b) +{ + fp_to_unsigned_bin (a, b + 1); + b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1); +} + +/* End: fp_to_signed_bin.c */ + +/* Start: fp_to_unsigned_bin.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +void fp_to_unsigned_bin(fp_int *a, unsigned char *b) +{ + int x; + fp_int t; + + fp_init_copy(&t, a); + + x = 0; + while (fp_iszero (&t) == FP_NO) { + b[x++] = (unsigned char) (t.dp[0] & 255); + fp_div_2d (&t, 8, &t, NULL); + } + bn_reverse (b, x); +} + +/* End: fp_to_unsigned_bin.c */ + +/* Start: fp_toradix.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_toradix(fp_int *a, char *str, int radix) +{ + int digs; + fp_int t; + fp_digit d; + char *_s = str; + + /* check range of the radix */ + if (radix < 2 || radix > 64) { + return FP_VAL; + } + + /* quick out if its zero */ + if (fp_iszero(a) == 1) { + *str++ = '0'; + *str = '\0'; + return FP_OKAY; + } + + fp_init_copy(&t, a); + + /* if it is negative output a - */ + if (t.sign == FP_NEG) { + ++_s; + *str++ = '-'; + t.sign = FP_ZPOS; + } + + digs = 0; + while (fp_iszero (&t) == FP_NO) { + fp_div_d (&t, (fp_digit) radix, &t, &d); + *str++ = fp_s_rmap[d]; + ++digs; + } + + /* reverse the digits of the string. In this case _s points + * to the first digit [exluding the sign] of the number] + */ + bn_reverse ((unsigned char *)_s, digs); + + /* append a NULL so the string is properly terminated */ + *str = '\0'; + return FP_OKAY; +} + +/* End: fp_toradix.c */ + +/* Start: fp_unsigned_bin_size.c */ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +int fp_unsigned_bin_size(fp_int *a) +{ + int size = fp_count_bits (a); + return (size / 8 + ((size & 7) != 0 ? 1 : 0)); +} + +/* End: fp_unsigned_bin_size.c */ + + +/* EOF */ diff --git a/random_txt_files/amd64.txt b/random_txt_files/amd64.txt new file mode 100644 index 0000000..945b243 --- /dev/null +++ b/random_txt_files/amd64.txt @@ -0,0 +1,43 @@ +AMD64 timings + +using ISO C +mult + 512-bit: 496 + 1024-bit: 1717 + 2048-bit: 7200 +sqr + 512-bit: 448 + 1024-bit: 1760 + 2048-bit: 7099 +mont + 512-bit: 1416 + 1024-bit: 5156 + 2048-bit: 20820 +expt + 512-bit: 1520207 + 1024-bit: 10603520 + 2048-bit: 84893649 + +using amd64 +mult + 512-bit: 292 + 1024-bit: 945 + 2048-bit: 3620 +sqr + 512-bit: 238 + 1024-bit: 801 + 2048-bit: 2853 +mont + 512-bit: 731 + 1024-bit: 1730 + 2048-bit: 5462 +Exptmod: + 512-bit: 641743 + 1024-bit: 3167406 + 2048-bit: 20158609 + +LTM exptmods + +Exponentiating 513-bit => 825/sec, 2183028 cycles +Exponentiating 1025-bit => 151/sec, 11900720 cycles +Exponentiating 2049-bit => 24/sec, 72376416 cycles diff --git a/random_txt_files/exptmod_timings.txt b/random_txt_files/exptmod_timings.txt new file mode 100644 index 0000000..d637979 --- /dev/null +++ b/random_txt_files/exptmod_timings.txt @@ -0,0 +1,45 @@ +LTM timings: + +Athlon Barton +Exponentiating 513-bit => 561/sec, 3909824 cycles +Exponentiating 1025-bit => 103/sec, 21175496 cycles +Exponentiating 2049-bit => 16/sec, 129845554 cycles + +P4 Northwood +Exponentiating 513-bit => 284/sec, 9884722 cycles +Exponentiating 1025-bit => 47/sec, 59090432 cycles +Exponentiating 2049-bit => 6/sec, 427456070 cycles + +TFM timings: + +Athlon Barton + 512-bit: 2289257 + 1024-bit: 12871373 + 2048-bit: 97211357 + +P4 Northwood [x86-32] + 512-bit: 8015598 + 1024-bit: 55559304 + 2048-bit: 409861746 + +P4 Northwood [SSE2] + 512-bit: 5895000 + 1024-bit: 39648730 + 2048-bit: 304110670 + +
+ + + + + + + + + + + +
ProcessorSize in bitsx86-32 x86-64SSE2LTM
P4 512 8015598 58950009884722
1024 55559304 3964873059090432
2048 409861746 304110670427456070
Athlon Barton512 22892573909824
1024 1287137321175496
2048 97211357129845554
Athlon64 512 6417432183028
1042 316740611900720
2048 2015860972376416
+Cycles per operation +
+ diff --git a/random_txt_files/ltm_times.txt b/random_txt_files/ltm_times.txt new file mode 100644 index 0000000..700d877 --- /dev/null +++ b/random_txt_files/ltm_times.txt @@ -0,0 +1,37 @@ +LTM Timings... + +Multiplying 140-bit => 2950763/sec, 952 cycles +Multiplying 196-bit => 2150939/sec, 1306 cycles +Multiplying 252-bit => 1357066/sec, 2070 cycles +Multiplying 308-bit => 1055269/sec, 2662 cycles +Multiplying 364-bit => 817557/sec, 3436 cycles +Multiplying 420-bit => 636413/sec, 4414 cycles +Multiplying 475-bit => 536912/sec, 5232 cycles +Multiplying 531-bit => 433641/sec, 6478 cycles +Multiplying 588-bit => 372069/sec, 7550 cycles +Multiplying 644-bit => 322813/sec, 8702 cycles +Multiplying 698-bit => 275566/sec, 10194 cycles +Multiplying 753-bit => 242082/sec, 11604 cycles +Multiplying 809-bit => 214797/sec, 13078 cycles +Multiplying 867-bit => 189626/sec, 14814 cycles +Multiplying 921-bit => 168858/sec, 16636 cycles +Multiplying 978-bit => 151598/sec, 18530 cycles +Multiplying 1036-bit => 137580/sec, 20418 cycles +Multiplying 1091-bit => 124661/sec, 22534 cycles +Multiplying 1148-bit => 111677/sec, 25154 cycles +Multiplying 1199-bit => 102762/sec, 27336 cycles +Multiplying 1258-bit => 94519/sec, 29720 cycles +Multiplying 1316-bit => 86975/sec, 32298 cycles +Multiplying 1371-bit => 79754/sec, 35222 cycles +Multiplying 1427-bit => 74473/sec, 37720 cycles +Multiplying 1483-bit => 68827/sec, 40814 cycles +Multiplying 1537-bit => 63644/sec, 44138 cycles +Multiplying 1595-bit => 59646/sec, 47096 cycles +Multiplying 1651-bit => 56469/sec, 49746 cycles +Multiplying 1708-bit => 52640/sec, 53364 cycles +Multiplying 1764-bit => 49823/sec, 56382 cycles +Multiplying 1819-bit => 46856/sec, 59952 cycles +Multiplying 1875-bit => 44264/sec, 63462 cycles +Multiplying 1929-bit => 41641/sec, 67460 cycles +Multiplying 1985-bit => 39539/sec, 71046 cycles +Multiplying 2044-bit => 37591/sec, 74728 cycles \ No newline at end of file diff --git a/random_txt_files/old_sqr_times.txt b/random_txt_files/old_sqr_times.txt new file mode 100644 index 0000000..3842a86 --- /dev/null +++ b/random_txt_files/old_sqr_times.txt @@ -0,0 +1,14 @@ +I started with: + 512-bit: 16338 + 1024-bit: 51020 + 2048-bit: 142718 + +My x86-32 + 512-bit: 2864 + 1024-bit: 10615 + 2048-bit: 41807 + +My SSE2 + 512-bit: 2168 + 1024-bit: 7727 + 2048-bit: 33163 \ No newline at end of file diff --git a/s_fp_add.c b/s_fp_add.c new file mode 100644 index 0000000..e0f10f6 --- /dev/null +++ b/s_fp_add.c @@ -0,0 +1,37 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* unsigned addition */ +void s_fp_add(fp_int *a, fp_int *b, fp_int *c) +{ + int x, y, oldused; + fp_word t; + + y = MAX(a->used, b->used); + oldused = c->used; + c->used = y; + + t = 0; + for (x = 0; x < y; x++) { + t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]); + c->dp[x] = (fp_digit)t; + t >>= DIGIT_BIT; + } + if (t != 0 && x != FP_SIZE) { + c->dp[c->used++] = (fp_digit)t; + ++x; + } + + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} diff --git a/s_fp_sub.c b/s_fp_sub.c new file mode 100644 index 0000000..de5546c --- /dev/null +++ b/s_fp_sub.c @@ -0,0 +1,31 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#include + +/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */ +void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) +{ + int x, oldused; + fp_word t; + + oldused = c->used; + c->used = a->used; + t = 0; + for (x = 0; x < a->used; x++) { + t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t); + c->dp[x] = (fp_digit)t; + t = (t >> DIGIT_BIT) & 1; + } + + for (; x < oldused; x++) { + c->dp[x] = 0; + } + fp_clamp(c); +} diff --git a/tfm.h b/tfm.h new file mode 100644 index 0000000..9b12946 --- /dev/null +++ b/tfm.h @@ -0,0 +1,290 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@iahu.ca + */ +#ifndef TFM_H_ +#define TFM_H_ + +#include +#include +#include +#include +#include + +#undef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#undef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) + +/* do we want huge code? The answer is, yes. */ +#define TFM_HUGE + +/* Max size of any number in bits. Basically the largest size you will be multiplying + * should be half [or smaller] of FP_MAX_SIZE-four_digit + * + * You can externally define this or it defaults to 4096-bits. + */ +#ifndef FP_MAX_SIZE + #define FP_MAX_SIZE (4096+(4*DIGIT_BIT)) +#endif + +/* will this lib work? */ +#if (CHAR_BIT & 7) + #error CHAR_BIT must be a multiple of eight. +#endif +#if FP_MAX_SIZE % CHAR_BIT + #error FP_MAX_SIZE must be a multiple of CHAR_BIT +#endif + +/* make sure we are using 64-bit digits with x86-64 asm */ +#if defined(TFM_X86_64) + #ifndef FP_64BIT + #define FP_64BIT + #endif +#endif + +/* make sure we're 32-bit for x86-32/sse/arm */ +#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)) && defined(FP_64BIT) + #warning x86-32, SSE2 and ARM optimizations require 32-bit digits (undefining) + #undef FP_64BIT +#endif + +/* some default configurations. + */ +#if defined(FP_64BIT) + /* for GCC only on supported platforms */ +#ifndef CRYPT + typedef unsigned long ulong64; +#endif + typedef ulong64 fp_digit; + typedef unsigned long fp_word __attribute__ ((mode(TI))); +#else + /* this is to make porting into LibTomCrypt easier :-) */ +#ifndef CRYPT + #if defined(_MSC_VER) || defined(__BORLANDC__) + typedef unsigned __int64 ulong64; + typedef signed __int64 long64; + #else + typedef unsigned long long ulong64; + typedef signed long long long64; + #endif +#endif + typedef unsigned long fp_digit; + typedef ulong64 fp_word; +#endif + +/* # of digits this is */ +#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit)) +#define FP_MASK (fp_digit)(-1) +#define FP_SIZE (FP_MAX_SIZE/DIGIT_BIT) + +/* signs */ +#define FP_ZPOS 0 +#define FP_NEG 1 + +/* return codes */ +#define FP_OKAY 0 +#define FP_VAL 1 +#define FP_MEM 2 + +/* equalities */ +#define FP_LT -1 /* less than */ +#define FP_EQ 0 /* equal to */ +#define FP_GT 1 /* greater than */ + +/* replies */ +#define FP_YES 1 /* yes response */ +#define FP_NO 0 /* no response */ + +/* a FP type */ +typedef struct { + fp_digit dp[FP_SIZE]; + int used, + sign; +} fp_int; + +/* functions */ + +/* initialize [or zero] an fp int */ +#define fp_init(a) memset((a), 0, sizeof(fp_int)) +#define fp_zero(a) fp_init(a) + +/* zero/even/odd ? */ +#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO) +#define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO) +#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO) + +/* set to a small digit */ +void fp_set(fp_int *a, fp_digit b); + +/* copy from a to b */ +#define fp_copy(a, b) (((a) != (b)) && memcpy((b), (a), sizeof(fp_int))) +#define fp_init_copy(a, b) fp_copy(b, a) + +/* negate and absolute */ +#define fp_neg(a, b) { fp_copy(a, b); (b)->sign ^= 1; } +#define fp_abs(a, b) { fp_copy(a, b); (b)->sign = 0; } + +/* clamp digits */ +#define fp_clamp(a) { while ((a)->used && (a)->dp[(a)->used-1] == 0) --((a)->used); (a)->sign = (a)->used ? (a)->sign : FP_ZPOS; } + +/* right shift x digits */ +void fp_rshd(fp_int *a, int x); + +/* left shift x digits */ +void fp_lshd(fp_int *a, int x); + +/* signed comparisonm */ +int fp_cmp(fp_int *a, fp_int *b); + +/* unsigned comparisonm */ +int fp_cmp_mag(fp_int *a, fp_int *b); + +/* power of 2 operations */ +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d); +void fp_mod_2d(fp_int *a, int b, fp_int *c); +void fp_mul_2d(fp_int *a, int b, fp_int *c); +void fp_2expt (fp_int *a, int b); +void fp_mul_2(fp_int *a, fp_int *c); +void fp_div_2(fp_int *a, fp_int *c); + +/* Counts the number of lsbs which are zero before the first zero bit */ +int fp_cnt_lsb(fp_int *a); + +/* c = a + b */ +void fp_add(fp_int *a, fp_int *b, fp_int *c); + +/* c = a - b */ +void fp_sub(fp_int *a, fp_int *b, fp_int *c); + +/* c = a * b */ +void fp_mul(fp_int *a, fp_int *b, fp_int *c); + +/* b = a*a */ +void fp_sqr(fp_int *a, fp_int *b); + +/* a/b => cb + d == a */ +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* c = a mod b, 0 <= c < b */ +int fp_mod(fp_int *a, fp_int *b, fp_int *c); + +/* compare against a single digit */ +int fp_cmp_d(fp_int *a, fp_digit b); + +/* c = a + b */ +void fp_add_d(fp_int *a, fp_digit b, fp_int *c); + +/* c = a - b */ +void fp_sub_d(fp_int *a, fp_digit b, fp_int *c); + +/* c = a * b */ +void fp_mul_d(fp_int *a, fp_digit b, fp_int *c); + +/* a/b => cb + d == a */ +int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d); + +/* c = a mod b, 0 <= c < b */ +int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c); + +/* ---> number theory <--- */ +/* d = a + b (mod c) */ +int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* d = a - b (mod c) */ +int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* d = a * b (mod c) */ +int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* c = a * a (mod b) */ +int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c); + +/* c = 1/a (mod b) */ +int fp_invmod(fp_int *a, fp_int *b, fp_int *c); + +/* c = (a, b) */ +void fp_gcd(fp_int *a, fp_int *b, fp_int *c); + +/* c = [a, b] */ +void fp_lcm(fp_int *a, fp_int *b, fp_int *c); + +/* setups the montgomery reduction */ +int fp_montgomery_setup(fp_int *a, fp_digit *mp); + +/* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. + */ +void fp_montgomery_calc_normalization(fp_int *a, fp_int *b); + +/* computes x/R == x (mod N) via Montgomery Reduction */ +void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp); + +/* d = a**b (mod c) */ +int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); + +/* primality stuff */ + +/* perform a Miller-Rabin test of a to the base b and store result in "result" */ +void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result); + +/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime */ +int fp_isprime(fp_int *a); + +/* Primality generation flags */ +#define TFM_PRIME_BBS 0x0001 /* BBS style prime */ +#define TFM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */ +#define TFM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */ +#define TFM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */ + +/* callback for fp_prime_random, should fill dst with random bytes and return how many read [upto len] */ +typedef int tfm_prime_callback(unsigned char *dst, int len, void *dat); + +#define fp_prime_random(a, t, size, bbs, cb, dat) fp_prime_random_ex(a, t, ((size) * 8) + 1, (bbs==1)?TFM_PRIME_BBS:0, cb, dat) + +int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat); + +/* radix conersions */ +int fp_count_bits(fp_int *a); + +int fp_unsigned_bin_size(fp_int *a); +void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c); +void fp_to_unsigned_bin(fp_int *a, unsigned char *b); + +int fp_signed_bin_size(fp_int *a); +void fp_read_signed_bin(fp_int *a, unsigned char *b, int c); +void fp_to_signed_bin(fp_int *a, unsigned char *b); + +int fp_read_radix(fp_int *a, char *str, int radix); +int fp_toradix(fp_int *a, char *str, int radix); +int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen); + + +/* VARIOUS LOW LEVEL STUFFS */ +void s_fp_add(fp_int *a, fp_int *b, fp_int *c); +void s_fp_sub(fp_int *a, fp_int *b, fp_int *c); +void bn_reverse(unsigned char *s, int len); +void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C); +#ifdef TFM_HUGE +void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C); +#endif +void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C); +void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C); +void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C); + +void fp_sqr_comba(fp_int *A, fp_int *B); +void fp_sqr_comba4(fp_int *A, fp_int *B); +void fp_sqr_comba8(fp_int *A, fp_int *B); +void fp_sqr_comba16(fp_int *A, fp_int *B); +#ifdef TFM_HUGE +void fp_sqr_comba32(fp_int *A, fp_int *B); +#endif +extern const char *fp_s_rmap; + +#endif + diff --git a/tfm.tex b/tfm.tex new file mode 100644 index 0000000..914208c --- /dev/null +++ b/tfm.tex @@ -0,0 +1,580 @@ +\documentclass[b5paper]{book} +\usepackage{hyperref} +\usepackage{makeidx} +\usepackage{amssymb} +\usepackage{color} +\usepackage{alltt} +\usepackage{graphicx} +\usepackage{layout} +\def\union{\cup} +\def\intersect{\cap} +\def\getsrandom{\stackrel{\rm R}{\gets}} +\def\cross{\times} +\def\cat{\hspace{0.5em} \| \hspace{0.5em}} +\def\catn{$\|$} +\def\divides{\hspace{0.3em} | \hspace{0.3em}} +\def\nequiv{\not\equiv} +\def\approx{\raisebox{0.2ex}{\mbox{\small $\sim$}}} +\def\lcm{{\rm lcm}} +\def\gcd{{\rm gcd}} +\def\log{{\rm log}} +\def\ord{{\rm ord}} +\def\abs{{\mathit abs}} +\def\rep{{\mathit rep}} +\def\mod{{\mathit\ mod\ }} +\renewcommand{\pmod}[1]{\ ({\rm mod\ }{#1})} +\newcommand{\floor}[1]{\left\lfloor{#1}\right\rfloor} +\newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil} +\def\Or{{\rm\ or\ }} +\def\And{{\rm\ and\ }} +\def\iff{\hspace{1em}\Longleftrightarrow\hspace{1em}} +\def\implies{\Rightarrow} +\def\undefined{{\rm ``undefined"}} +\def\Proof{\vspace{1ex}\noindent {\bf Proof:}\hspace{1em}} +\let\oldphi\phi +\def\phi{\varphi} +\def\Pr{{\rm Pr}} +\newcommand{\str}[1]{{\mathbf{#1}}} +\def\F{{\mathbb F}} +\def\N{{\mathbb N}} +\def\Z{{\mathbb Z}} +\def\R{{\mathbb R}} +\def\C{{\mathbb C}} +\def\Q{{\mathbb Q}} +\definecolor{DGray}{gray}{0.5} +\newcommand{\emailaddr}[1]{\mbox{$<${#1}$>$}} +\def\twiddle{\raisebox{0.3ex}{\mbox{\tiny $\sim$}}} +\def\gap{\vspace{0.5ex}} +\makeindex +\begin{document} +\frontmatter +\pagestyle{empty} +\title{TomsFastMath User Manual \\ v0.01} +\author{Tom St Denis \\ tomstdenis@iahu.ca} +\maketitle +This text and library are all hereby placed in the public domain. This book has been formatted for B5 +[176x250] paper using the \LaTeX{} {\em book} macro package. + +\vspace{13cm} + +\begin{flushleft}This project was sponsored in part by + +Secure Science Corporation \url{http://www.securescience.net}. +\end{flushleft} + +\tableofcontents +\listoffigures +\mainmatter +\pagestyle{headings} +\chapter{Introduction} +\section{What is TomsFastMath?} + +TomsFastMath is meant to be a very fast yet still fairly portable and easy to port large +integer arithmetic library written in ISO C. The goal specifically is to be able to perform +very fast modular exponentiations and other related functions required for ECC, DH and RSA +cryptosystems. + +Most of the library is pure ISO C portable source code while a small portion (three files) contain +a mixture of ISO C and assembler inline fragments. Compared to LibTomMath this new library is +meant to be much faster while sacrificing flexibiltiy. This is accomplished through several means. + +\begin{enumerate} + \item The new code is slightly messier and contains asm blocks. + \item This uses fixed not multiple precision integers. + \item It is designed only for fast modular exponentiations [e.g. less flexibility]. +\end{enumerate} + +To mitigate some of the problems that arise from using assembler it has been carefully and +appropriately used where it would make the most gain in performance. Also we use macro's +for assembler code which allows new ports to be inserted easily. + +The new code uses fixed precision arithmetic which means at compile time you choose a maximum +precision and all numbers are limited to that. This has the benefit of not requiring any +memory heap operations (which are slow) in any of the functions. It has the downside that +integers that are too large are truncated. + +The goal of this library is to be able to perform modular exponentiations (with an odd modulus) very +fast. This is what takes the most time in systems such as RSA and DH. This also requires +fast multiplication and squaring and has the side effect of speeding up ECC operations as well. + +\section{License} +TomsFastMath is public domain. + +\section{Building} +Currently only a GCC makefile has been provided. To build the library simply type +``make''. The library is a bit too new to put into production so no install +scripts exist yet. You can build the test program with ``make test''. + +To perform simple static testing (useful to test out new assembly ports) use the stest +program. Type ``make stest'' and run it on your target. The program will perform three +multiplications, squarings and montgomery reductions. Likely if your assembly +code is invalid this code will exhibit the bug. + +\subsection{Build Limitations} +TomsFastMath has the following build requirements which are non--portable but under most +circumstances not problematic. + +\begin{enumerate} +\item ``CHAR\_BIT'' must be eight. +\item The ``fp\_digit'' type must be a multiple of eight bits long. +\item The ``fp\_word'' must be at least twice the length of fp\_digit. +\end{enumerate} + +\subsection{Optimization Configuration} +By default TFM is configured for 32--bit digits using ISO C source code. This mode while portable +is not very efficient. While building the library (from scratch) you can define one of +several ``CFLAGS'' defines. + +For example, to build with with SSE2 optimizations type + +\begin{verbatim} +export CFLAGS=-DTFM_SSE2 +make clean libtfm.a +\end{verbatim} + +\subsubsection{x86--32} The ``x86--32'' mode is defined by ``TFM\_X86'' and covers all +i386 and beyond processors. It requires GCC to build and only works with 32--bit digits. In this +mode fp\_digit is 32--bits and fp\_word is 64--bits. + +\subsubsection{SSE2} The ``SSE2'' mode is defined by ``TFM\_SSE2'' and requires a Pentium 4, Pentium +M or Athlon64 processor. It requires GCC to build. Note that you shouldn't define both +TFM\_X86 and TFM\_SSE2 at the same time. This mode only works with 32--bit digits. In this +mode fp\_digit is 32--bits and fp\_word is 64--bits. + +\subsubsection{x86--64} The ``x86--64'' mode is defined by ``TFM\_X86\_64'' and requires a +``x86--64'' capable processor (Athlon64 and future Pentium processors). It requires GCC to +build and only works with 64--bit digits. Note that by enabling this mode it will automatically +enable 64--bit digits. In this mode fp\_digit is 64--bits and fp\_word is 128--bits. + +\subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 or higher +processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and +fp\_word is 64--bits. + +\subsubsection{Future Releases} Future releases will support additional platform optimizations. +Developers of MIPS and PPC platforms are encouraged to submit GCC asm inline patches +(see chapter \ref{chap:asmops} for more information). + +\begin{figure}[here] +\begin{small} +\begin{center} +\begin{tabular}{|l|l|} +\hline \textbf{Processor} & \textbf{Recommended Mode} \\ +\hline All 32--bit x86 platforms & TFM\_X86 \\ +\hline Pentium 4 & TFM\_SSE2 \\ +\hline Athlon64 & TFM\_X86\_64 \\ +\hline ARMv4 or higher & TFM\_ARM \\ +\hline +\end{tabular} +\caption{Recommended Build Modes} +\end{center} +\end{small} +\end{figure} + +\subsection{Precision Configuration} +The precision of all integers in this library are fixed to a limited precision. Essentially +the rule of setting the precision is if you plan on doing modular exponentiation with $k$--bit +numbers than the precision must be fixed to $2k$--bits plus four digits. + +This is changed by altering the value of ``FP\_MAX\_SIZE'' in tfm.h to your desired size. By default, +the library is configured to handle upto 2048--bit inputs to the modular exponentiator. + +\chapter{Getting Started} +\section{Data Types} +TomsFastMath is a large fixed precision integer library. It provides the functionality to +manipulate large signed integers through a relatively trivial api and a single data type. + +The ``fp\_int'' or fixed precision integer is the data type that the functions operate with. + +\begin{verbatim} +typedef struct { + fp_digit dp[FP_SIZE]; + int used, + sign; +} fp_int; +\end{verbatim} + +The \textbf{dp} member is the array of digits that forms the number. It must always be zero +padded. The \textbf{used} member is the count of digits used in the array. Although the +precision is fixed the algorithms are still tuned to not process the entire array if it +does not have to. The \textbf{sign} indicates the sign of the integer. It is \textbf{FP\_ZPOS} (0) +if the integer is zero or positive and \textbf{FP\_NEG} (1) otherwise. + +\section{Initialization} +\subsection{Simple Initialization} +To initialize an integer to the default state of zero use the fp\_init() function. + +\index{fp\_init} +\begin{verbatim} +void fp_init(fp_int *a); +\end{verbatim} + +This will initialize the fp\_int $a$ to zero. Note that the function fp\_zero() is an alias +for fp\_init(). + +\subsection{Initialize Small Constants} +To initialize an integer with a small single digit value use the fp\_set() function. + +\index{fp\_set} +\begin{verbatim} +void fp_set(fp_int *a, fp_digit b); +\end{verbatim} + +This will initialize $a$ and set it equal to the digit $b$. + +\subsection{Initialize Copy} +To initialize an integer with a copy of another integer use the fp\_init\_copy() function. + +\index{fp\_init\_copy} +\begin{verbatim} +void fp_init_copy(fp_int *a, fp_int *b) +\end{verbatim} + +This will initialize $a$ as a copy of $b$. Note that for compatibility with LibTomMath the function +fp\_copy() is also provided. + +\chapter{Arithmetic Operations} +\section{Odds and Evens} +To quickly and easily tell if an integer is zero, odd or even use the following functions. + +\index{fp\_iszero} \index{fp\_iseven} \index{fp\_isodd} +\begin{verbatim} +int fp_iszero(fp_int *a); +int fp_iseven(fp_int *a); +int fp_isodd(fp_int *a); +\end{verbatim} + +These will return \textbf{FP\_YES} if the answer to their respective questions is yes. Otherwise they +return \textbf{FP\_NO}. Note that these are implemented as macros and as such you should avoid using +++ or --~-- operators on the input operand. + +\section{Sign Manipulation} +To negate or compute the absolute of an integer use the following functions. + +\index{fp\_neg} \index{fp\_abs} +\begin{verbatim} +void fp_neg(fp_int *a, fp_int *b); +void fp_abs(fp_int *a, fp_int *b); +\end{verbatim} +This will compute the negation (or absolute) of $a$ and store the result in $b$. Note that these +are implemented as macros and as such you should avoid using ++ or --~-- operators on the input +operand. + +\section{Comparisons} +To perform signed or unsigned comparisons use following functions. + +\index{fp\_cmp} \index{fp\_cmp\_mag} +\begin{verbatim} +int fp_cmp(fp_int *a, fp_int *b); +int fp_cmp_mag(fp_int *a, fp_int *b); +\end{verbatim} +These will compare $a$ to $b$. They will return \textbf{FP\_GT} if $a$ is larger than $b$, +\textbf{FP\_EQ} if they are equal and \textbf{FP\_LT} if $a$ is less than $b$. + +The function fp\_cmp performs signed comparisons while the other performs unsigned comparisons. + +\section{Shifting} +To shift the digits of an fp\_int left or right use the following functions. + +\index{fp\_lshd} \index{fp\_rshd} +\begin{verbatim} +void fp_lshd(fp_int *a, int x); +void fp_rshd(fp_int *a, int x); +\end{verbatim} + +These will shift the digits of $a$ left (or right respectively) $x$ digits. + +To shift individual bits of an fp\_int use the following functions. + +\index{fp\_div\_2d} \index{fp\_mod\_2d} \index{fp\_mul\_2d} \index{fp\_div\_2} \index{fp\_mul\_2} +\begin{verbatim} +void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d); +void fp_mod_2d(fp_int *a, int b, fp_int *c); +void fp_mul_2d(fp_int *a, int b, fp_int *c); +void fp_mul_2(fp_int *a, fp_int *c); +void fp_div_2(fp_int *a, fp_int *c); +void fp_2expt(fp_int *a, int b); +\end{verbatim} +fp\_div\_2d() will divide $a$ by $2^b$ and store the quotient in $c$ and remainder in $d$. Either of +$c$ or $d$ can be \textbf{NULL} if their value is not required. fp\_mod\_2d() is a shortcut to +compute the remainder directly. fp\_mul\_2d() will multiply $a$ by $2^b$ and store the result in $c$. + +The fp\_mul\_2() and fp\_div\_2() functions are optimized multiplication and divisions by two. The +function fp\_2expt() will compute $a = 2^b$ quickly. + +To quickly count the number of least significant bits that are zero use the following function. + +\index{fp\_cnt\_lsb} +\begin{verbatim} +int fp_cnt_lsb(fp_int *a); +\end{verbatim} +This will return the number of adjacent least significant bits that are zero. This is equivalent +to the number of times two evenly divides $a$. + +\section{Basic Algebra} + +The following functions round out the basic algebraic functionality of the library. + +\index{fp\_add} \index{fp\_sub} \index{fp\_mul} \index{fp\_sqr} \index{fp\_div} \index{fp\_mod} +\begin{verbatim} +void fp_add(fp_int *a, fp_int *b, fp_int *c); +void fp_sub(fp_int *a, fp_int *b, fp_int *c); +void fp_mul(fp_int *a, fp_int *b, fp_int *c); +void fp_sqr(fp_int *a, fp_int *b); +int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d); +int fp_mod(fp_int *a, fp_int *b, fp_int *c); +\end{verbatim} + +The functions fp\_add(), fp\_sub() and fp\_mul() perform their respective operations on $a$ and +$b$ and store the result in $c$. The function fp\_sqr() computes $b = a^2$ and is faster than +using fp\_mul() to perform the same operation. + +The function fp\_div() divides $a$ by $b$ and stores the quotient in $c$ and remainder in $d$. Either +of $c$ and $d$ can be \textbf{NULL} if the result is not required. The function fp\_mod() is a simple +shortcut to find the remainder. + +\section{Modular Exponentiation} +To compute a modular exponentiation use the following function. + +\index{fp\_exptmod} +\begin{verbatim} +int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d); +\end{verbatim} +This computes $d \equiv a^b \mbox{ (mod }c)$ for any odd $c$ and positive $b$. The size of $c$ +must be half of the maximum precision used during the build of the library. For example, +by default $c$ must be less than $2^{2048}$. + +\section{Number Theoretic} + +To perform modular inverses, greatest common divisor or least common multiples use the following +functions. + +\index{fp\_invmod} \index{fp\_gcd} \index{fp\_lcm} +\begin{verbatim} +int fp_invmod(fp_int *a, fp_int *b, fp_int *c); +void fp_gcd(fp_int *a, fp_int *b, fp_int *c); +void fp_lcm(fp_int *a, fp_int *b, fp_int *c); +\end{verbatim} + +The fp\_invmod() function will find the modular inverse of $a$ modulo an odd modulus $b$ and store +it in $c$ (provided it exists). The function fp\_gcd() will compute the greatest common +divisor of $a$ and $b$ and store it in $c$. Similarly the fp\_lcm() function will compute +the least common multiple of $a$ and $b$ and store it in $c$. + +\section{Prime Numbers} +To quickly test a number for primality call this function. + +\index{fp\_isprime} +\begin{verbatim} +int fp_isprime(fp_int *a); +\end{verbatim} +This will return \textbf{FP\_YES} if $a$ is probably prime. It uses 256 trial divisions and +eight rounds of Rabin-Miller testing. Note that this routine performs modular exponentiations +which means that $a$ must be in a valid range of precision. + +\chapter{Porting TomsFastMath} +\label{chap:asmops} +\section{Getting Started} +Porting TomsFastMath to a given processor target is usually a simple procedure. For the most part +assembly is used to get around the lack of a ``add with carry'' operation in the C language. To +make matters simpler the use of assembler is through macro blocks. + +Each ``port'' is defined by a block of code that re-defines the portable ISO C macros with assembler +inline blocks. To add a new port you must designate a TFM\_XXX define that will enable your +port when built. + +\section{Multiply with Comba} +The file ``fp\_mul\_comba.c'' is responsible for providing the fast multiplication within the +library. This comba multiplication is fairly simple. It uses a sliding three digit carry +system with the variables $c0$, $c1$, $c2$. For every digit of output $c0$ is the what will +be that digit, $c1$ will carry into the next digit and $c2$ will be the ``c1'' carry for +the next digit. For every ``next'' digit effectively $c0$ is stored as output, $c1$ moves into +$c0$, $c2$ into $c1$ and zero into $c2$. + +The following macros define the assmebler interface to the code. + +\begin{verbatim} +#define COMBA_START +\end{verbatim} + +This is issued at the beginning of the multiplication function. This is in place to allow you to +initialize any registers or machine words required. You can leave it blank if you do not need +it. + +\begin{verbatim} +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; +\end{verbatim} + +This clears the three comba carries. If you are going to place carries in registers then +zero the appropriate registers. Note that the functions do not use $c0$, $c1$ or $c2$ directly +so you are free to ignore these varibles and use registers directly. + +\begin{verbatim} +#define COMBA_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; +\end{verbatim} + +This propagates the carries after a digit has been produced. + +\begin{verbatim} +#define COMBA_STORE(x) \ + x = c0; +\end{verbatim} + +This stores the $c0$ digit in the memory location specified by $x$. Note that if you manually +aliased $c0$ with a register than just store that register in $x$. + +\begin{verbatim} +#define COMBA_STORE2(x) \ + x = c1; +\end{verbatim} + +This stores the $c1$ digit in the memory location specified by $x$. Note that if you manually +aliased $c1$ with a register than just store that register in $x$. + +\begin{verbatim} +#define COMBA_FINI +\end{verbatim} + +If at the end of the function you need to perform some action fill this macro in. + +\begin{verbatim} +#define MULADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; +\end{verbatim} + +This macro performs the ``multiply and add'' step that is central to the comba +multiplier. It multiplies the fp\_digits $i$ and $j$ to produce a fp\_word result. Effectively +the double--digit value is added to the three-digit carry formed by $c0$, $c1$, $c2$ where $c0$ +is the least significant digit. + +\section{Squaring with Comba} +Squaring is similar to multiplication except that it uses a special ``multiply and add twice'' macro +that replaces multiplications that are not required. + +\begin{verbatim} +#define COMBA_START +\end{verbatim} + +This allows for any initialization code you might have. + +\begin{verbatim} +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; +\end{verbatim} + +This will clear the carries. Like multiplication you can safely alias the three carry variables +to registers if you can/want to. + +\begin{verbatim} +#define COMBA_STORE(x) \ + x = c0; +\end{verbatim} + +Store the $c0$ carry to a given memory location. + +\begin{verbatim} +#define COMBA_STORE2(x) \ + x = c1; +\end{verbatim} + +Store the $c1$ carry to a given memory location. + +\begin{verbatim} +#define CARRY_FORWARD \ + c0 = c1; c1 = c2; c2 = 0; +\end{verbatim} + +Forward propagate all three carry variables. + +\begin{verbatim} +#define COMBA_FINI +\end{verbatim} + +If you need to clean up at the end of the function. + +\begin{verbatim} +/* multiplies point i and j, updates carry "c1" and digit c2 */ +#define SQRADD(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; +\end{verbatim} + +This is essentially the MULADD macro from the multiplication code. + +\begin{verbatim} +/* for squaring some of the terms are doubled... */ +#define SQRADD2(i, j) \ + t = ((fp_word)i) * ((fp_word)j); \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; \ + c0 = (c0 + t); if (c0 < ((fp_digit)t)) ++c1; \ + c1 = (c1 + (t>>DIGIT_BIT)); if (c1 < (t>>DIGIT_BIT)) ++c2; +\end{verbatim} + +This is like SQRADD except it adds the produce twice. It's similar to +computing SQRADD(i, j*2). + +\section{Montgomery with Comba} +Montgomery reduction is used in modular exponentiation and is most called function during +that operation. It's important to make sure this routine is very fast or all is lost. + +Unlike the two other comba routines this one does not use a single three--digit carry +system. It does have three--digit carries except that the routine steps through them +in the inner loop. This means you cannot alias them to registers (at all). + +To make matters simple though the three arrays of carries are stored in one array. The +``c0'' array resides in $c[0 \ldots OFF1-1]$, ``c1'' in $c[OFF1 \ldots OFF2-1]$ and ``c2'' in +$c[OFF2 \ldots OFF2+FP\_SIZE-1]$. + +\begin{verbatim} +#define MONT_START +\end{verbatim} + +This allows you to insert anything at the start that you need. + +\begin{verbatim} +#define MONT_FINI +\end{verbatim} + +This allows you to insert anything at the end that you need. + +\begin{verbatim} +#define LOOP_START \ + mu = c[x] * mp; +\end{verbatim} + +This computes the $\mu$ value for the inner loop. You can safely alias $mu$ and $mp$ to +a register if you want. + +\begin{verbatim} +#define INNERMUL \ + t = ((fp_word)mu) * ((fp_word)*tmpm++); \ + _c[OFF0] += t; \ + if (_c[OFF0] < (fp_digit)t) ++_c[OFF1]; \ + _c[OFF1] += (t>>DIGIT_BIT); \ + if (_c[OFF1] < (fp_digit)(t>>DIGIT_BIT)) ++_c[OFF2]; +\end{verbatim} + +This computes the inner product and adds it to the correct set of carry variables. The variable +$\_c$ is a pointer alias to $c[x+y]$ and used to simplify the code. + +You can safely alias $\_c$ to a register for INNERMUL by setting it equal to ``c + x'' +\footnote{Where ``c'' is an array on the stack.} by modifying LOOP\_START. + +\begin{verbatim} +#define PROPCARRY \ + _c[OFF0+1] += _c[OFF1]; \ + if (_c[OFF0+1] < _c[OFF1]) ++_c[OFF1+1]; \ + _c[OFF1+1] += _c[OFF2]; \ + if (_c[OFF1+1] < _c[OFF2]) ++_c[OFF2+1]; +\end{verbatim} + +This propagates the carry upwards by one digit. + +\input{tfm.ind} + +\end{document}