forked from ibphoenix/tomsfastmath
added tomsfastmath-0.05
This commit is contained in:
parent
f91cf2d1cf
commit
a6c4c5a261
@ -1,3 +1,10 @@
|
|||||||
|
August 1st, 2005
|
||||||
|
0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC]
|
||||||
|
-- Added makefile.shared to make shared objects [required for LTC]
|
||||||
|
-- Improved makefiles to make them way more configurable
|
||||||
|
-- Added timing resistant fp_exptmod() enabled with TFM_TIMING_RESISTANT
|
||||||
|
|
||||||
|
July 23rd, 2005
|
||||||
0.04 -- Fixed bugs in the SSE2 squaring code
|
0.04 -- Fixed bugs in the SSE2 squaring code
|
||||||
-- Rewrote the multipliers to be optimized for small inputs
|
-- Rewrote the multipliers to be optimized for small inputs
|
||||||
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction
|
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction
|
||||||
|
137
comba_mont_gen.c
137
comba_mont_gen.c
@ -1,59 +1,112 @@
|
|||||||
/* generate montgomery reductions for m->used = 1...16 */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
int N;
|
int x, y, z;
|
||||||
|
|
||||||
for (N = 1; N <= 16; N++) {
|
|
||||||
|
|
||||||
printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
|
|
||||||
printf(
|
printf(
|
||||||
|
#if 0
|
||||||
|
"#ifdef TFM_SMALL_SET\n"
|
||||||
|
"/* computes x/R == x (mod N) via Montgomery Reduction */\n"
|
||||||
|
"void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
|
" fp_digit c[FP_SIZE], *_c, *tmpm, mu, cy;\n"
|
||||||
" int oldused, x, y;\n"
|
" int oldused, x, y, pa;\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"#if defined(USE_MEMSET)\n"
|
||||||
" /* now zero the buff */\n"
|
" /* now zero the buff */\n"
|
||||||
" memset(c, 0, sizeof(c));\n"
|
" memset(c, 0, sizeof c);\n"
|
||||||
|
"#endif\n"
|
||||||
|
" pa = m->used;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" /* copy the input */\n"
|
" /* copy the input */\n"
|
||||||
" oldused = a->used;\n"
|
" oldused = a->used;\n"
|
||||||
" for (x = 0; x < oldused; x++) {\n"
|
" for (x = 0; x < oldused; x++) {\n"
|
||||||
" c[x] = a->dp[x];\n"
|
" c[x] = a->dp[x];\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"\n"
|
"#if !defined(USE_MEMSET)\n"
|
||||||
|
" for (; x < 2*pa+3; x++) {\n"
|
||||||
|
" c[x] = 0;\n"
|
||||||
|
" }\n"
|
||||||
|
"#endif\n"
|
||||||
" MONT_START;\n"
|
" MONT_START;\n"
|
||||||
|
#endif
|
||||||
"\n"
|
"\n"
|
||||||
" /* now let's get bizz-sy! */\n"
|
" switch (pa) {\n");
|
||||||
" for (x = 0; x < %d; x++) {\n"
|
|
||||||
" /* get Mu for this round */\n"
|
for (x = 1; x <= 64; x++) {
|
||||||
" LOOP_START;\n"
|
if (x > 16 && (x != 32 && x != 48 && x != 64)) continue;
|
||||||
"\n"
|
if (x > 16) printf("#ifdef TFM_HUGE\n");
|
||||||
" /* our friendly neighbourhood alias */\n"
|
|
||||||
" _c = c + x;\n"
|
|
||||||
" tmpm = m->dp;\n"
|
|
||||||
"\n"
|
printf(" case %d:\n", x);
|
||||||
" for (y = 0; y < %d; y++) {\n"
|
|
||||||
" INNERMUL;\n"
|
for (y = 0; y < x; y++) {
|
||||||
" ++_c;\n"
|
|
||||||
|
printf(" x = %d; cy = 0;\n"
|
||||||
|
" LOOP_START;\n"
|
||||||
|
" _c = c + %d;\n"
|
||||||
|
" tmpm = m->dp;\n", y, y);
|
||||||
|
|
||||||
|
printf("#ifdef INNERMUL8\n");
|
||||||
|
for (z = 0; z+8 <= x; z += 8) {
|
||||||
|
printf(" INNERMUL8; _c += 8; tmpm += 8;\n");
|
||||||
|
}
|
||||||
|
for (; z < x; z++) {
|
||||||
|
printf(" INNERMUL; ++_c;\n");
|
||||||
|
}
|
||||||
|
printf("#else\n");
|
||||||
|
for (z = 0; z < x; z++) {
|
||||||
|
printf(" INNERMUL; ++_c;\n");
|
||||||
|
}
|
||||||
|
printf("#endif\n");
|
||||||
|
printf(" LOOP_END;\n"
|
||||||
|
" while (cy) {\n"
|
||||||
|
" PROPCARRY;\n"
|
||||||
|
" ++_c;\n"
|
||||||
|
" }\n");
|
||||||
|
}
|
||||||
|
//printf(" }\n");
|
||||||
|
printf(" break;\n");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define LOOP_MACRO(stride) \
|
||||||
|
for (x = 0; x < stride; x++) { \
|
||||||
|
fp_digit cy = 0; \
|
||||||
|
/* get Mu for this round */ \
|
||||||
|
LOOP_START; \
|
||||||
|
_c = c + x; \
|
||||||
|
tmpm = m->dp; \
|
||||||
|
for (y = 0; y < stride; y++) { \
|
||||||
|
INNERMUL; \
|
||||||
|
++_c; \
|
||||||
|
} \
|
||||||
|
LOOP_END; \
|
||||||
|
while (cy) { \
|
||||||
|
PROPCARRY; \
|
||||||
|
++_c; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (x > 16) printf("#endif /* TFM_HUGE */\n");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
printf(
|
||||||
" }\n"
|
" }\n"
|
||||||
" /* send carry up man... */\n"
|
|
||||||
" _c = c + x;\n"
|
|
||||||
" PROPCARRY;\n"
|
|
||||||
" } \n"
|
|
||||||
"\n"
|
|
||||||
" /* fix the rest of the carries */\n"
|
|
||||||
" _c = c + %d;\n"
|
|
||||||
" for (x = %d; x < %d * 2 + 2; x++) {\n"
|
|
||||||
" PROPCARRY;\n"
|
|
||||||
" ++_c;\n"
|
|
||||||
" }\n"
|
|
||||||
"\n"
|
|
||||||
" /* now copy out */\n"
|
" /* now copy out */\n"
|
||||||
" _c = c + %d;\n"
|
" _c = c + pa;\n"
|
||||||
" tmpm = a->dp;\n"
|
" tmpm = a->dp;\n"
|
||||||
" for (x = 0; x < %d+1; x++) {\n"
|
" for (x = 0; x < pa+1; x++) {\n"
|
||||||
" *tmpm++ = *_c++;\n"
|
" *tmpm++ = *_c++;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"\n"
|
"\n"
|
||||||
@ -63,19 +116,17 @@ printf(
|
|||||||
"\n"
|
"\n"
|
||||||
" MONT_FINI;\n"
|
" MONT_FINI;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" a->used = %d+1;\n"
|
" a->used = pa+1;\n"
|
||||||
" fp_clamp(a);\n"
|
" fp_clamp(a);\n"
|
||||||
"\n"
|
"\n"
|
||||||
" /* if A >= m then A = A - m */\n"
|
" /* if A >= m then A = A - m */\n"
|
||||||
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
|
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
|
||||||
" s_fp_sub (a, m, a);\n"
|
" s_fp_sub (a, m, a);\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"}\n", N,N,N,N,N,N,N,N);
|
"}\n\n#endif\n");
|
||||||
}
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -213,7 +213,7 @@ t1 = TIMFUNC();
|
|||||||
sleep(1);
|
sleep(1);
|
||||||
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
|
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
|
||||||
|
|
||||||
goto expttime;
|
goto multtime;
|
||||||
/* do some timings... */
|
/* do some timings... */
|
||||||
printf("Addition:\n");
|
printf("Addition:\n");
|
||||||
for (t = 2; t <= FP_SIZE/2; t += 2) {
|
for (t = 2; t <= FP_SIZE/2; t += 2) {
|
||||||
|
BIN
doc/tfm.pdf
BIN
doc/tfm.pdf
Binary file not shown.
71
fp_exptmod.c
71
fp_exptmod.c
@ -9,6 +9,75 @@
|
|||||||
*/
|
*/
|
||||||
#include <tfm.h>
|
#include <tfm.h>
|
||||||
|
|
||||||
|
#ifdef TFM_TIMING_RESISTANT
|
||||||
|
|
||||||
|
/* timing resistant montgomery ladder based exptmod
|
||||||
|
|
||||||
|
Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
|
||||||
|
*/
|
||||||
|
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||||
|
{
|
||||||
|
fp_int R[2];
|
||||||
|
fp_digit buf, mp;
|
||||||
|
int err, bitcnt, digidx, y;
|
||||||
|
|
||||||
|
/* now setup montgomery */
|
||||||
|
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
fp_init(&R[0]);
|
||||||
|
fp_init(&R[1]);
|
||||||
|
|
||||||
|
/* now we need R mod m */
|
||||||
|
fp_montgomery_calc_normalization (&R[0], P);
|
||||||
|
|
||||||
|
/* now set R[0][1] to G * R mod m */
|
||||||
|
if (fp_cmp_mag(P, G) != FP_GT) {
|
||||||
|
/* G > P so we reduce it first */
|
||||||
|
fp_mod(G, P, &R[1]);
|
||||||
|
} else {
|
||||||
|
fp_copy(G, &R[1]);
|
||||||
|
}
|
||||||
|
fp_mulmod (&R[1], &R[0], P, &R[1]);
|
||||||
|
|
||||||
|
/* for j = t-1 downto 0 do
|
||||||
|
r_!k = R0*R1; r_k = r_k^2
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* set initial mode and bit cnt */
|
||||||
|
bitcnt = 1;
|
||||||
|
buf = 0;
|
||||||
|
digidx = X->used - 1;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* grab next digit as required */
|
||||||
|
if (--bitcnt == 0) {
|
||||||
|
/* if digidx == -1 we are out of digits so break */
|
||||||
|
if (digidx == -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* read next digit and reset bitcnt */
|
||||||
|
buf = X->dp[digidx--];
|
||||||
|
bitcnt = (int)DIGIT_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* grab the next msb from the exponent */
|
||||||
|
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
|
||||||
|
buf <<= (fp_digit)1;
|
||||||
|
|
||||||
|
/* do ops */
|
||||||
|
fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
|
||||||
|
fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
|
||||||
|
}
|
||||||
|
|
||||||
|
fp_montgomery_reduce(&R[0], P, mp);
|
||||||
|
fp_copy(&R[0], Y);
|
||||||
|
return FP_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
/* y = g**x (mod b)
|
/* y = g**x (mod b)
|
||||||
* Some restrictions... x must be positive and < b
|
* Some restrictions... x must be positive and < b
|
||||||
*/
|
*/
|
||||||
@ -168,6 +237,8 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
|||||||
return FP_OKAY;
|
return FP_OKAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||||
{
|
{
|
||||||
|
107
fp_invmod.c
107
fp_invmod.c
@ -9,6 +9,111 @@
|
|||||||
*/
|
*/
|
||||||
#include <tfm.h>
|
#include <tfm.h>
|
||||||
|
|
||||||
|
static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
|
||||||
|
{
|
||||||
|
fp_int x, y, u, v, A, B, C, D;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
/* b cannot be negative */
|
||||||
|
if (b->sign == FP_NEG || fp_iszero(b) == 1) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* init temps */
|
||||||
|
fp_init(&x); fp_init(&y);
|
||||||
|
fp_init(&u); fp_init(&v);
|
||||||
|
fp_init(&A); fp_init(&B);
|
||||||
|
fp_init(&C); fp_init(&D);
|
||||||
|
|
||||||
|
/* x = a, y = b */
|
||||||
|
if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
fp_copy(b, &y);
|
||||||
|
|
||||||
|
/* 2. [modified] if x,y are both even then return an error! */
|
||||||
|
if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
|
||||||
|
fp_copy (&x, &u);
|
||||||
|
fp_copy (&y, &v);
|
||||||
|
fp_set (&A, 1);
|
||||||
|
fp_set (&D, 1);
|
||||||
|
|
||||||
|
top:
|
||||||
|
/* 4. while u is even do */
|
||||||
|
while (fp_iseven (&u) == 1) {
|
||||||
|
/* 4.1 u = u/2 */
|
||||||
|
fp_div_2 (&u, &u);
|
||||||
|
|
||||||
|
/* 4.2 if A or B is odd then */
|
||||||
|
if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
|
||||||
|
/* A = (A+y)/2, B = (B-x)/2 */
|
||||||
|
fp_add (&A, &y, &A);
|
||||||
|
fp_sub (&B, &x, &B);
|
||||||
|
}
|
||||||
|
/* A = A/2, B = B/2 */
|
||||||
|
fp_div_2 (&A, &A);
|
||||||
|
fp_div_2 (&B, &B);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 5. while v is even do */
|
||||||
|
while (fp_iseven (&v) == 1) {
|
||||||
|
/* 5.1 v = v/2 */
|
||||||
|
fp_div_2 (&v, &v);
|
||||||
|
|
||||||
|
/* 5.2 if C or D is odd then */
|
||||||
|
if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
|
||||||
|
/* C = (C+y)/2, D = (D-x)/2 */
|
||||||
|
fp_add (&C, &y, &C);
|
||||||
|
fp_sub (&D, &x, &D);
|
||||||
|
}
|
||||||
|
/* C = C/2, D = D/2 */
|
||||||
|
fp_div_2 (&C, &C);
|
||||||
|
fp_div_2 (&D, &D);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 6. if u >= v then */
|
||||||
|
if (fp_cmp (&u, &v) != FP_LT) {
|
||||||
|
/* u = u - v, A = A - C, B = B - D */
|
||||||
|
fp_sub (&u, &v, &u);
|
||||||
|
fp_sub (&A, &C, &A);
|
||||||
|
fp_sub (&B, &D, &B);
|
||||||
|
} else {
|
||||||
|
/* v - v - u, C = C - A, D = D - B */
|
||||||
|
fp_sub (&v, &u, &v);
|
||||||
|
fp_sub (&C, &A, &C);
|
||||||
|
fp_sub (&D, &B, &D);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if not zero goto step 4 */
|
||||||
|
if (fp_iszero (&u) == 0)
|
||||||
|
goto top;
|
||||||
|
|
||||||
|
/* now a = C, b = D, gcd == g*v */
|
||||||
|
|
||||||
|
/* if v != 1 then there is no inverse */
|
||||||
|
if (fp_cmp_d (&v, 1) != FP_EQ) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if its too low */
|
||||||
|
while (fp_cmp_d(&C, 0) == FP_LT) {
|
||||||
|
fp_add(&C, b, &C);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* too big */
|
||||||
|
while (fp_cmp_mag(&C, b) != FP_LT) {
|
||||||
|
fp_sub(&C, b, &C);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* C is now the inverse */
|
||||||
|
fp_copy(&C, c);
|
||||||
|
return FP_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
/* c = 1/a (mod b) for odd b only */
|
/* c = 1/a (mod b) for odd b only */
|
||||||
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
||||||
{
|
{
|
||||||
@ -17,7 +122,7 @@ int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
|||||||
|
|
||||||
/* 2. [modified] b must be odd */
|
/* 2. [modified] b must be odd */
|
||||||
if (fp_iseven (b) == FP_YES) {
|
if (fp_iseven (b) == FP_YES) {
|
||||||
return FP_VAL;
|
return fp_invmod_slow(a,b,c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* init all our temps */
|
/* init all our temps */
|
||||||
|
@ -299,8 +299,6 @@ asm( \
|
|||||||
|
|
||||||
|
|
||||||
#define LO 0
|
#define LO 0
|
||||||
#define HI 1
|
|
||||||
#define CY 2
|
|
||||||
|
|
||||||
/* computes x/R == x (mod N) via Montgomery Reduction */
|
/* computes x/R == x (mod N) via Montgomery Reduction */
|
||||||
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
||||||
@ -347,7 +345,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
|||||||
}
|
}
|
||||||
LOOP_END;
|
LOOP_END;
|
||||||
while (cy) {
|
while (cy) {
|
||||||
PROPCARRY; // cy = cy > (*_c += cy);
|
PROPCARRY;
|
||||||
++_c;
|
++_c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -374,7 +372,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* $Source$ */
|
/* $Source$ */
|
||||||
/* $Revision$ */
|
/* $Revision$ */
|
||||||
/* $Date$ */
|
/* $Date$ */
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@
|
|||||||
|
|
||||||
/* this should multiply i and j */
|
/* this should multiply i and j */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -118,7 +118,7 @@ asm ( \
|
|||||||
|
|
||||||
/* this should multiply i and j */
|
/* this should multiply i and j */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
#define COMBA_FINI
|
#define COMBA_FINI
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %%eax \n\t" \
|
"mull %%eax \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -45,7 +45,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -57,7 +57,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"movl %%eax,%0 \n\t" \
|
"movl %%eax,%0 \n\t" \
|
||||||
@ -66,7 +66,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -75,7 +75,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
@ -104,7 +104,7 @@ asm ( \
|
|||||||
#define COMBA_FINI
|
#define COMBA_FINI
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %%rax \n\t" \
|
"mulq %%rax \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -113,7 +113,7 @@ asm ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -125,7 +125,7 @@ asm ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"movq %%rax,%0 \n\t" \
|
"movq %%rax,%0 \n\t" \
|
||||||
@ -134,7 +134,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -143,7 +143,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addq %6,%0 \n\t" \
|
"addq %6,%0 \n\t" \
|
||||||
"adcq %7,%1 \n\t" \
|
"adcq %7,%1 \n\t" \
|
||||||
"adcq %8,%2 \n\t" \
|
"adcq %8,%2 \n\t" \
|
||||||
@ -173,7 +173,7 @@ asm ( \
|
|||||||
asm("emms");
|
asm("emms");
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"pmuludq %%mm0,%%mm0\n\t" \
|
"pmuludq %%mm0,%%mm0\n\t" \
|
||||||
"movd %%mm0,%%eax \n\t" \
|
"movd %%mm0,%%eax \n\t" \
|
||||||
@ -185,7 +185,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -201,7 +201,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -212,7 +212,7 @@ asm volatile ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -225,7 +225,7 @@ asm volatile ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
|
67
makefile
67
makefile
@ -1,7 +1,13 @@
|
|||||||
#makefile for TomsFastMath
|
#makefile for TomsFastMath
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops
|
VERSION=0.05
|
||||||
|
|
||||||
|
CFLAGS += -Wall -W -Wshadow -I./
|
||||||
|
|
||||||
|
ifndef IGNORE_SPEED
|
||||||
|
|
||||||
|
CFLAGS += -O3 -funroll-all-loops
|
||||||
|
|
||||||
#profiling
|
#profiling
|
||||||
#PROF=-pg -g
|
#PROF=-pg -g
|
||||||
@ -10,9 +16,7 @@ CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops
|
|||||||
#speed
|
#speed
|
||||||
CFLAGS += -fomit-frame-pointer
|
CFLAGS += -fomit-frame-pointer
|
||||||
|
|
||||||
VERSION=0.04
|
endif
|
||||||
|
|
||||||
default: libtfm.a
|
|
||||||
|
|
||||||
OBJECTS = \
|
OBJECTS = \
|
||||||
fp_set.o \
|
fp_set.o \
|
||||||
@ -52,23 +56,29 @@ ifndef INCPATH
|
|||||||
INCPATH=/usr/include
|
INCPATH=/usr/include
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef TFM_GROUP
|
ifndef INSTALL_GROUP
|
||||||
GROUP=wheel
|
GROUP=wheel
|
||||||
|
else
|
||||||
|
GROUP=$(INSTALL_GROUP)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef TFM_USER
|
ifndef INSTALL_USER
|
||||||
USER=root
|
USER=root
|
||||||
|
else
|
||||||
|
USER=$(INSTALL_USER)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef LIBNAME
|
ifndef LIBNAME
|
||||||
LIBNAME=libtfm.a
|
LIBNAME=libtfm.a
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(LIBNAME): $(OBJECTS)
|
default: $(LIBNAME)
|
||||||
$(AR) $(ARFLAGS) $(LIBNAME) $(OBJECTS)
|
|
||||||
ranlib $(LIBNAME)
|
|
||||||
|
|
||||||
install: libtfm.a
|
$(LIBNAME): $(OBJECTS)
|
||||||
|
$(AR) $(ARFLAGS) $@ $(OBJECTS)
|
||||||
|
ranlib $@
|
||||||
|
|
||||||
|
install: $(LIBNAME)
|
||||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
|
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
|
||||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||||
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||||
@ -77,17 +87,17 @@ install: libtfm.a
|
|||||||
mtest/mtest: mtest/mtest.c
|
mtest/mtest: mtest/mtest.c
|
||||||
cd mtest ; make mtest
|
cd mtest ; make mtest
|
||||||
|
|
||||||
test: libtfm.a demo/test.o mtest/mtest
|
test: $(LIBNAME) demo/test.o mtest/mtest
|
||||||
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
|
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
|
||||||
|
|
||||||
timing: libtfm.a demo/test.o
|
timing: $(LIBNAME) demo/test.o
|
||||||
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
|
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
|
||||||
|
|
||||||
stest: libtfm.a demo/stest.o
|
stest: $(LIBNAME) demo/stest.o
|
||||||
$(CC) $(CFLAGS) demo/stest.o libtfm.a -o stest
|
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME) -o stest
|
||||||
|
|
||||||
rsatest: libtfm.a demo/rsa.o
|
rsatest: $(LIBNAME) demo/rsa.o
|
||||||
$(CC) $(CFLAGS) demo/rsa.o libtfm.a -o rsatest
|
$(CC) $(CFLAGS) demo/rsa.o $(LIBNAME) -o rsatest
|
||||||
|
|
||||||
docdvi: tfm.tex
|
docdvi: tfm.tex
|
||||||
touch tfm.ind
|
touch tfm.ind
|
||||||
@ -101,8 +111,23 @@ docs: docdvi
|
|||||||
dvipdf tfm
|
dvipdf tfm
|
||||||
mv -f tfm.pdf doc
|
mv -f tfm.pdf doc
|
||||||
|
|
||||||
|
#This rule cleans the source tree of all compiled code, not including the pdf
|
||||||
|
#documentation.
|
||||||
clean:
|
clean:
|
||||||
rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest *~ rsatest *.gcda *.gcno demo/*.gcda demo/*.gcno mtest/*.gcno mtest/*.gcda
|
rm -f `find . -type f | grep "[.]o" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]lo" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]a" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]la" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]obj" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]lib" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]exe" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]gcda" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]gcno" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]il" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]dyn" | xargs`
|
||||||
|
rm -f `find . -type f | grep "[.]dpi" | xargs`
|
||||||
|
rm -rf `find . -type d | grep "[.]libs" | xargs`
|
||||||
|
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc
|
||||||
cd mtest ; make clean
|
cd mtest ; make clean
|
||||||
|
|
||||||
no_oops: clean
|
no_oops: clean
|
||||||
@ -116,3 +141,7 @@ zipup: no_oops docs clean
|
|||||||
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
|
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
|
||||||
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
|
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
|
||||||
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/*
|
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/*
|
||||||
|
|
||||||
|
# $Source: /cvs/libtom/tomsfastmath/makefile,v $
|
||||||
|
# $Revision: 1.17 $
|
||||||
|
# $Date: 2005/07/30 04:23:55 $
|
||||||
|
55
makefile.gba
55
makefile.gba
@ -1,55 +0,0 @@
|
|||||||
#makefile for TomsFastMath
|
|
||||||
#
|
|
||||||
#For the GameboyAdance... er.... ARMv4
|
|
||||||
SFLAGS = $(CFLAGS) -Wall -W -Wshadow -I./ -O3 -funroll-all-loops -mthumb -mthumb-interwork -I../devkitadv/mylib/lib
|
|
||||||
CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops -marm -mthumb-interwork -I../devkitadv/mylib/lib
|
|
||||||
|
|
||||||
#profiling
|
|
||||||
#PROF=-pg -g
|
|
||||||
#CFLAGS += $(PROF)
|
|
||||||
|
|
||||||
#speed
|
|
||||||
CFLAGS += -fomit-frame-pointer
|
|
||||||
|
|
||||||
VERSION=0.01
|
|
||||||
|
|
||||||
default: libtfm.a
|
|
||||||
|
|
||||||
OBJECTS = \
|
|
||||||
fp_set.o \
|
|
||||||
\
|
|
||||||
fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \
|
|
||||||
fp_mul_2.o fp_div_2.o \
|
|
||||||
\
|
|
||||||
fp_cnt_lsb.o \
|
|
||||||
\
|
|
||||||
fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \
|
|
||||||
s_fp_add.o s_fp_sub.o \
|
|
||||||
\
|
|
||||||
fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \
|
|
||||||
fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \
|
|
||||||
fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \
|
|
||||||
fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \
|
|
||||||
\
|
|
||||||
fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \
|
|
||||||
\
|
|
||||||
fp_exptmod.o \
|
|
||||||
\
|
|
||||||
fp_cmp.o fp_cmp_mag.o \
|
|
||||||
\
|
|
||||||
fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \
|
|
||||||
fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \
|
|
||||||
fp_read_radix.o fp_toradix.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \
|
|
||||||
\
|
|
||||||
|
|
||||||
libtfm.a: $(OBJECTS)
|
|
||||||
$(AR) $(ARFLAGS) libtfm.a $(OBJECTS)
|
|
||||||
ranlib libtfm.a
|
|
||||||
|
|
||||||
demo/stest.o: demo/stest.c
|
|
||||||
$(CC) $(SFLAGS) -DGBA_MODE demo/stest.c -c -o demo/stest.o
|
|
||||||
|
|
||||||
stest: libtfm.a demo/stest.o
|
|
||||||
$(CC) -mthumb -mthumb-interwork demo/stest.o libtfm.a ../devkitadv/mylib/lib/gba.a -o stest.elf
|
|
||||||
objcopy -O binary stest.elf stest.bin
|
|
||||||
|
|
109
makefile.shared
Normal file
109
makefile.shared
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#makefile for TomsFastMath
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
CC=libtool --mode=compile gcc
|
||||||
|
|
||||||
|
CFLAGS += -Wall -W -Wshadow -I./
|
||||||
|
|
||||||
|
ifndef IGNORE_SPEED
|
||||||
|
|
||||||
|
CFLAGS += -O3 -funroll-all-loops
|
||||||
|
|
||||||
|
#profiling
|
||||||
|
#PROF=-pg -g
|
||||||
|
#CFLAGS += $(PROF)
|
||||||
|
|
||||||
|
#speed
|
||||||
|
CFLAGS += -fomit-frame-pointer
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
VERSION=0:5
|
||||||
|
|
||||||
|
OBJECTS = \
|
||||||
|
fp_set.o \
|
||||||
|
\
|
||||||
|
fp_rshd.o fp_lshd.o fp_div_2d.o fp_mod_2d.o fp_mul_2d.o fp_2expt.o \
|
||||||
|
fp_mul_2.o fp_div_2.o \
|
||||||
|
\
|
||||||
|
fp_cnt_lsb.o \
|
||||||
|
\
|
||||||
|
fp_add.o fp_sub.o fp_mul.o fp_sqr.o fp_div.o fp_mod.o \
|
||||||
|
s_fp_add.o s_fp_sub.o \
|
||||||
|
\
|
||||||
|
fp_cmp_d.o fp_add_d.o fp_sub_d.o fp_mul_d.o fp_div_d.o fp_mod_d.o \
|
||||||
|
fp_addmod.o fp_submod.o fp_mulmod.o fp_sqrmod.o fp_invmod.o \
|
||||||
|
fp_gcd.o fp_lcm.o fp_prime_miller_rabin.o fp_isprime.o \
|
||||||
|
fp_prime_random_ex.o fp_mul_comba.o fp_sqr_comba.o \
|
||||||
|
\
|
||||||
|
fp_montgomery_setup.o fp_montgomery_calc_normalization.o fp_montgomery_reduce.o \
|
||||||
|
\
|
||||||
|
fp_exptmod.o \
|
||||||
|
\
|
||||||
|
fp_cmp.o fp_cmp_mag.o \
|
||||||
|
\
|
||||||
|
fp_unsigned_bin_size.o fp_read_unsigned_bin.o fp_to_unsigned_bin.o \
|
||||||
|
fp_signed_bin_size.o fp_read_signed_bin.o fp_to_signed_bin.o \
|
||||||
|
fp_read_radix.o fp_toradix.o fp_radix_size.o fp_count_bits.o fp_reverse.o fp_s_rmap.o \
|
||||||
|
\
|
||||||
|
fp_ident.o
|
||||||
|
|
||||||
|
HEADERS=tfm.h
|
||||||
|
|
||||||
|
ifndef LIBPATH
|
||||||
|
LIBPATH=/usr/lib
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef INCPATH
|
||||||
|
INCPATH=/usr/include
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef INSTALL_GROUP
|
||||||
|
GROUP=wheel
|
||||||
|
else
|
||||||
|
GROUP=$(INSTALL_GROUP)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef INSTALL_USER
|
||||||
|
USER=root
|
||||||
|
else
|
||||||
|
USER=$(INSTALL_USER)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef LIBNAME
|
||||||
|
LIBNAME=libtfm.la
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef LIBNAME_S
|
||||||
|
LIBNAME_S=libtfm.a
|
||||||
|
endif
|
||||||
|
|
||||||
|
default: $(LIBNAME)
|
||||||
|
|
||||||
|
$(LIBNAME): $(OBJECTS)
|
||||||
|
|
||||||
|
install: $(LIBNAME)
|
||||||
|
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION)
|
||||||
|
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]o" | xargs` -o $(LIBNAME_S)
|
||||||
|
ranlib $(LIBNAME_S)
|
||||||
|
libtool --silent --mode=install install -c $(LIBNAME) $(LIBPATH)/$(LIBNAME)
|
||||||
|
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||||
|
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||||
|
|
||||||
|
mtest/mtest: mtest/mtest.c
|
||||||
|
cd mtest ; make mtest
|
||||||
|
|
||||||
|
test: $(LIBNAME) demo/test.o mtest/mtest
|
||||||
|
$(CC) $(CFLAGS) demo/test.o $(LIBNAME_S) $(PROF) -o test
|
||||||
|
|
||||||
|
timing: $(LIBNAME) demo/test.o
|
||||||
|
$(CC) $(CFLAGS) demo/test.o $(LIBNAME_S) $(PROF) -o test
|
||||||
|
|
||||||
|
stest: $(LIBNAME) demo/stest.o
|
||||||
|
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
|
||||||
|
|
||||||
|
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
|
||||||
|
# $Revision: 1.4 $
|
||||||
|
# $Date: 2005/07/28 03:08:35 $
|
||||||
|
|
218
pre_gen/mpi.c
218
pre_gen/mpi.c
@ -757,6 +757,75 @@ int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
|
|||||||
*/
|
*/
|
||||||
#include <tfm.h>
|
#include <tfm.h>
|
||||||
|
|
||||||
|
#ifdef TFM_TIMING_RESISTANT
|
||||||
|
|
||||||
|
/* timing resistant montgomery ladder based exptmod
|
||||||
|
|
||||||
|
Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
|
||||||
|
*/
|
||||||
|
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||||
|
{
|
||||||
|
fp_int R[2];
|
||||||
|
fp_digit buf, mp;
|
||||||
|
int err, bitcnt, digidx, y;
|
||||||
|
|
||||||
|
/* now setup montgomery */
|
||||||
|
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
fp_init(&R[0]);
|
||||||
|
fp_init(&R[1]);
|
||||||
|
|
||||||
|
/* now we need R mod m */
|
||||||
|
fp_montgomery_calc_normalization (&R[0], P);
|
||||||
|
|
||||||
|
/* now set R[0][1] to G * R mod m */
|
||||||
|
if (fp_cmp_mag(P, G) != FP_GT) {
|
||||||
|
/* G > P so we reduce it first */
|
||||||
|
fp_mod(G, P, &R[1]);
|
||||||
|
} else {
|
||||||
|
fp_copy(G, &R[1]);
|
||||||
|
}
|
||||||
|
fp_mulmod (&R[1], &R[0], P, &R[1]);
|
||||||
|
|
||||||
|
/* for j = t-1 downto 0 do
|
||||||
|
r_!k = R0*R1; r_k = r_k^2
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* set initial mode and bit cnt */
|
||||||
|
bitcnt = 1;
|
||||||
|
buf = 0;
|
||||||
|
digidx = X->used - 1;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* grab next digit as required */
|
||||||
|
if (--bitcnt == 0) {
|
||||||
|
/* if digidx == -1 we are out of digits so break */
|
||||||
|
if (digidx == -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* read next digit and reset bitcnt */
|
||||||
|
buf = X->dp[digidx--];
|
||||||
|
bitcnt = (int)DIGIT_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* grab the next msb from the exponent */
|
||||||
|
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
|
||||||
|
buf <<= (fp_digit)1;
|
||||||
|
|
||||||
|
/* do ops */
|
||||||
|
fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
|
||||||
|
fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
|
||||||
|
}
|
||||||
|
|
||||||
|
fp_montgomery_reduce(&R[0], P, mp);
|
||||||
|
fp_copy(&R[0], Y);
|
||||||
|
return FP_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
/* y = g**x (mod b)
|
/* y = g**x (mod b)
|
||||||
* Some restrictions... x must be positive and < b
|
* Some restrictions... x must be positive and < b
|
||||||
*/
|
*/
|
||||||
@ -916,6 +985,8 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
|||||||
return FP_OKAY;
|
return FP_OKAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||||
{
|
{
|
||||||
@ -1105,6 +1176,111 @@ int main(void)
|
|||||||
*/
|
*/
|
||||||
#include <tfm.h>
|
#include <tfm.h>
|
||||||
|
|
||||||
|
static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
|
||||||
|
{
|
||||||
|
fp_int x, y, u, v, A, B, C, D;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
/* b cannot be negative */
|
||||||
|
if (b->sign == FP_NEG || fp_iszero(b) == 1) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* init temps */
|
||||||
|
fp_init(&x); fp_init(&y);
|
||||||
|
fp_init(&u); fp_init(&v);
|
||||||
|
fp_init(&A); fp_init(&B);
|
||||||
|
fp_init(&C); fp_init(&D);
|
||||||
|
|
||||||
|
/* x = a, y = b */
|
||||||
|
if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
fp_copy(b, &y);
|
||||||
|
|
||||||
|
/* 2. [modified] if x,y are both even then return an error! */
|
||||||
|
if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
|
||||||
|
fp_copy (&x, &u);
|
||||||
|
fp_copy (&y, &v);
|
||||||
|
fp_set (&A, 1);
|
||||||
|
fp_set (&D, 1);
|
||||||
|
|
||||||
|
top:
|
||||||
|
/* 4. while u is even do */
|
||||||
|
while (fp_iseven (&u) == 1) {
|
||||||
|
/* 4.1 u = u/2 */
|
||||||
|
fp_div_2 (&u, &u);
|
||||||
|
|
||||||
|
/* 4.2 if A or B is odd then */
|
||||||
|
if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
|
||||||
|
/* A = (A+y)/2, B = (B-x)/2 */
|
||||||
|
fp_add (&A, &y, &A);
|
||||||
|
fp_sub (&B, &x, &B);
|
||||||
|
}
|
||||||
|
/* A = A/2, B = B/2 */
|
||||||
|
fp_div_2 (&A, &A);
|
||||||
|
fp_div_2 (&B, &B);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 5. while v is even do */
|
||||||
|
while (fp_iseven (&v) == 1) {
|
||||||
|
/* 5.1 v = v/2 */
|
||||||
|
fp_div_2 (&v, &v);
|
||||||
|
|
||||||
|
/* 5.2 if C or D is odd then */
|
||||||
|
if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
|
||||||
|
/* C = (C+y)/2, D = (D-x)/2 */
|
||||||
|
fp_add (&C, &y, &C);
|
||||||
|
fp_sub (&D, &x, &D);
|
||||||
|
}
|
||||||
|
/* C = C/2, D = D/2 */
|
||||||
|
fp_div_2 (&C, &C);
|
||||||
|
fp_div_2 (&D, &D);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 6. if u >= v then */
|
||||||
|
if (fp_cmp (&u, &v) != FP_LT) {
|
||||||
|
/* u = u - v, A = A - C, B = B - D */
|
||||||
|
fp_sub (&u, &v, &u);
|
||||||
|
fp_sub (&A, &C, &A);
|
||||||
|
fp_sub (&B, &D, &B);
|
||||||
|
} else {
|
||||||
|
/* v - v - u, C = C - A, D = D - B */
|
||||||
|
fp_sub (&v, &u, &v);
|
||||||
|
fp_sub (&C, &A, &C);
|
||||||
|
fp_sub (&D, &B, &D);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if not zero goto step 4 */
|
||||||
|
if (fp_iszero (&u) == 0)
|
||||||
|
goto top;
|
||||||
|
|
||||||
|
/* now a = C, b = D, gcd == g*v */
|
||||||
|
|
||||||
|
/* if v != 1 then there is no inverse */
|
||||||
|
if (fp_cmp_d (&v, 1) != FP_EQ) {
|
||||||
|
return FP_VAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if its too low */
|
||||||
|
while (fp_cmp_d(&C, 0) == FP_LT) {
|
||||||
|
fp_add(&C, b, &C);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* too big */
|
||||||
|
while (fp_cmp_mag(&C, b) != FP_LT) {
|
||||||
|
fp_sub(&C, b, &C);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* C is now the inverse */
|
||||||
|
fp_copy(&C, c);
|
||||||
|
return FP_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
/* c = 1/a (mod b) for odd b only */
|
/* c = 1/a (mod b) for odd b only */
|
||||||
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
||||||
{
|
{
|
||||||
@ -1113,7 +1289,7 @@ int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
|
|||||||
|
|
||||||
/* 2. [modified] b must be odd */
|
/* 2. [modified] b must be odd */
|
||||||
if (fp_iseven (b) == FP_YES) {
|
if (fp_iseven (b) == FP_YES) {
|
||||||
return FP_VAL;
|
return fp_invmod_slow(a,b,c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* init all our temps */
|
/* init all our temps */
|
||||||
@ -1814,8 +1990,6 @@ asm( \
|
|||||||
|
|
||||||
|
|
||||||
#define LO 0
|
#define LO 0
|
||||||
#define HI 1
|
|
||||||
#define CY 2
|
|
||||||
|
|
||||||
/* computes x/R == x (mod N) via Montgomery Reduction */
|
/* computes x/R == x (mod N) via Montgomery Reduction */
|
||||||
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
||||||
@ -1862,7 +2036,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
|||||||
}
|
}
|
||||||
LOOP_END;
|
LOOP_END;
|
||||||
while (cy) {
|
while (cy) {
|
||||||
PROPCARRY; // cy = cy > (*_c += cy);
|
PROPCARRY;
|
||||||
++_c;
|
++_c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1889,11 +2063,11 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* $Source$ */
|
/* $Source$ */
|
||||||
/* $Revision$ */
|
/* $Revision$ */
|
||||||
/* $Date$ */
|
/* $Date$ */
|
||||||
|
|
||||||
|
|
||||||
/* End: fp_montgomery_reduce.c */
|
/* End: fp_montgomery_reduce.c */
|
||||||
|
|
||||||
/* Start: fp_montgomery_setup.c */
|
/* Start: fp_montgomery_setup.c */
|
||||||
@ -2270,7 +2444,7 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
|
|||||||
|
|
||||||
/* this should multiply i and j */
|
/* this should multiply i and j */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -2341,7 +2515,7 @@ asm ( \
|
|||||||
|
|
||||||
/* this should multiply i and j */
|
/* this should multiply i and j */
|
||||||
#define MULADD(i, j) \
|
#define MULADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -5678,7 +5852,7 @@ Obvious points of optimization
|
|||||||
#define COMBA_FINI
|
#define COMBA_FINI
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %%eax \n\t" \
|
"mull %%eax \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -5687,7 +5861,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -5699,7 +5873,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"movl %%eax,%0 \n\t" \
|
"movl %%eax,%0 \n\t" \
|
||||||
@ -5708,7 +5882,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movl %6,%%eax \n\t" \
|
"movl %6,%%eax \n\t" \
|
||||||
"mull %7 \n\t" \
|
"mull %7 \n\t" \
|
||||||
"addl %%eax,%0 \n\t" \
|
"addl %%eax,%0 \n\t" \
|
||||||
@ -5717,7 +5891,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
@ -5746,7 +5920,7 @@ asm ( \
|
|||||||
#define COMBA_FINI
|
#define COMBA_FINI
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %%rax \n\t" \
|
"mulq %%rax \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -5755,7 +5929,7 @@ asm ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -5767,7 +5941,7 @@ asm ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"movq %%rax,%0 \n\t" \
|
"movq %%rax,%0 \n\t" \
|
||||||
@ -5776,7 +5950,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm ( \
|
asm( \
|
||||||
"movq %6,%%rax \n\t" \
|
"movq %6,%%rax \n\t" \
|
||||||
"mulq %7 \n\t" \
|
"mulq %7 \n\t" \
|
||||||
"addq %%rax,%0 \n\t" \
|
"addq %%rax,%0 \n\t" \
|
||||||
@ -5785,7 +5959,7 @@ asm ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addq %6,%0 \n\t" \
|
"addq %6,%0 \n\t" \
|
||||||
"adcq %7,%1 \n\t" \
|
"adcq %7,%1 \n\t" \
|
||||||
"adcq %8,%2 \n\t" \
|
"adcq %8,%2 \n\t" \
|
||||||
@ -5815,7 +5989,7 @@ asm ( \
|
|||||||
asm("emms");
|
asm("emms");
|
||||||
|
|
||||||
#define SQRADD(i, j) \
|
#define SQRADD(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"pmuludq %%mm0,%%mm0\n\t" \
|
"pmuludq %%mm0,%%mm0\n\t" \
|
||||||
"movd %%mm0,%%eax \n\t" \
|
"movd %%mm0,%%eax \n\t" \
|
||||||
@ -5827,7 +6001,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
|
||||||
|
|
||||||
#define SQRADD2(i, j) \
|
#define SQRADD2(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -5843,7 +6017,7 @@ asm volatile ( \
|
|||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDSC(i, j) \
|
#define SQRADDSC(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -5854,7 +6028,7 @@ asm volatile ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
|
||||||
|
|
||||||
#define SQRADDAC(i, j) \
|
#define SQRADDAC(i, j) \
|
||||||
asm volatile ( \
|
asm( \
|
||||||
"movd %6,%%mm0 \n\t" \
|
"movd %6,%%mm0 \n\t" \
|
||||||
"movd %7,%%mm1 \n\t" \
|
"movd %7,%%mm1 \n\t" \
|
||||||
"pmuludq %%mm1,%%mm0\n\t" \
|
"pmuludq %%mm1,%%mm0\n\t" \
|
||||||
@ -5867,7 +6041,7 @@ asm volatile ( \
|
|||||||
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
|
||||||
|
|
||||||
#define SQRADDDB \
|
#define SQRADDDB \
|
||||||
asm ( \
|
asm( \
|
||||||
"addl %6,%0 \n\t" \
|
"addl %6,%0 \n\t" \
|
||||||
"adcl %7,%1 \n\t" \
|
"adcl %7,%1 \n\t" \
|
||||||
"adcl %8,%2 \n\t" \
|
"adcl %8,%2 \n\t" \
|
||||||
|
56
tfm.aux
56
tfm.aux
@ -17,40 +17,42 @@
|
|||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}}
|
\@writefile{toc}{\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}}
|
\@writefile{toc}{\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}}
|
\@writefile{toc}{\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}Intel CC}{2}{subsection.1.3.1}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}MSVC}{2}{subsection.1.3.2}}
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Build Limitations}{3}{subsection.1.3.3}}
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.4}Optimization Configuration}{3}{subsection.1.3.4}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{x86--32}{3}{section*.3}}
|
\@writefile{toc}{\contentsline {subsubsection}{x86--32}{3}{section*.3}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{SSE2}{3}{section*.4}}
|
\@writefile{toc}{\contentsline {subsubsection}{SSE2}{3}{section*.4}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{x86--64}{3}{section*.5}}
|
\@writefile{toc}{\contentsline {subsubsection}{x86--64}{4}{section*.5}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{ARM}{3}{section*.6}}
|
\@writefile{toc}{\contentsline {subsubsection}{ARM}{4}{section*.6}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{PPC32}{3}{section*.7}}
|
\@writefile{toc}{\contentsline {subsubsection}{PPC32}{4}{section*.7}}
|
||||||
\@writefile{toc}{\contentsline {subsubsection}{Future Releases}{4}{section*.8}}
|
\@writefile{toc}{\contentsline {subsubsection}{Future Releases}{4}{section*.8}}
|
||||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}}
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.5}Precision Configuration}{5}{subsection.1.3.5}}
|
||||||
\@writefile{toc}{\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}}
|
\@writefile{toc}{\contentsline {chapter}{\numberline {2}Getting Started}{7}{chapter.2}}
|
||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}}
|
\@writefile{toc}{\contentsline {section}{\numberline {2.1}Data Types}{7}{section.2.1}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}}
|
\@writefile{toc}{\contentsline {section}{\numberline {2.2}Initialization}{8}{section.2.2}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{8}{subsection.2.2.1}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{8}{subsection.2.2.2}}
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}}
|
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{8}{subsection.2.2.3}}
|
||||||
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}}
|
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Arithmetic Operations}{9}{chapter.3}}
|
||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.1}Odds and Evens}{9}{section.3.1}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.2}Sign Manipulation}{9}{section.3.2}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.3}Comparisons}{10}{section.3.3}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.4}Shifting}{10}{section.3.4}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.5}Basic Algebra}{11}{section.3.5}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.6}Modular Exponentiation}{11}{section.3.6}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.7}Number Theoretic}{11}{section.3.7}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}}
|
\@writefile{toc}{\contentsline {section}{\numberline {3.8}Prime Numbers}{12}{section.3.8}}
|
||||||
\@writefile{toc}{\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}}
|
\@writefile{toc}{\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{13}{chapter.4}}
|
||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\newlabel{chap:asmops}{{4}{11}{Porting TomsFastMath\relax }{chapter.4}{}}
|
\newlabel{chap:asmops}{{4}{13}{Porting TomsFastMath\relax }{chapter.4}{}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}}
|
\@writefile{toc}{\contentsline {section}{\numberline {4.1}Getting Started}{13}{section.4.1}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}}
|
\@writefile{toc}{\contentsline {section}{\numberline {4.2}Multiply with Comba}{13}{section.4.2}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}}
|
\@writefile{toc}{\contentsline {section}{\numberline {4.3}Squaring with Comba}{15}{section.4.3}}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}}
|
\@writefile{toc}{\contentsline {section}{\numberline {4.4}Montgomery with Comba}{17}{section.4.4}}
|
||||||
|
17
tfm.h
17
tfm.h
@ -48,6 +48,11 @@
|
|||||||
*/
|
*/
|
||||||
/* #define TFM_PRESCOTT */
|
/* #define TFM_PRESCOTT */
|
||||||
|
|
||||||
|
/* Do we want timing resistant fp_exptmod() ?
|
||||||
|
* This makes it slower but also timing invariant with respect to the exponent
|
||||||
|
*/
|
||||||
|
/* #define TFM_TIMING_RESISTANT */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Max size of any number in bits. Basically the largest size you will be multiplying
|
/* Max size of any number in bits. Basically the largest size you will be multiplying
|
||||||
@ -355,15 +360,25 @@ int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
|
|||||||
void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
|
void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
|
||||||
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
|
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
|
||||||
void bn_reverse(unsigned char *s, int len);
|
void bn_reverse(unsigned char *s, int len);
|
||||||
|
|
||||||
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
|
||||||
|
|
||||||
|
#ifdef TFM_SMALL_SET
|
||||||
|
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef TFM_HUGE
|
#ifdef TFM_HUGE
|
||||||
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
|
||||||
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
|
||||||
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
|
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
|
||||||
#endif
|
#endif
|
||||||
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
|
|
||||||
|
|
||||||
|
void fp_sqr_comba(fp_int *A, fp_int *B);
|
||||||
|
|
||||||
|
#ifdef TFM_SMALL_SET
|
||||||
void fp_sqr_comba_small(fp_int *A, fp_int *B);
|
void fp_sqr_comba_small(fp_int *A, fp_int *B);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef TFM_HUGE
|
#ifdef TFM_HUGE
|
||||||
void fp_sqr_comba32(fp_int *A, fp_int *B);
|
void fp_sqr_comba32(fp_int *A, fp_int *B);
|
||||||
void fp_sqr_comba48(fp_int *A, fp_int *B);
|
void fp_sqr_comba48(fp_int *A, fp_int *B);
|
||||||
|
58
tfm.idx
58
tfm.idx
@ -1,29 +1,29 @@
|
|||||||
\indexentry{fp\_init|hyperpage}{6}
|
\indexentry{fp\_init|hyperpage}{8}
|
||||||
\indexentry{fp\_set|hyperpage}{6}
|
\indexentry{fp\_set|hyperpage}{8}
|
||||||
\indexentry{fp\_init\_copy|hyperpage}{6}
|
\indexentry{fp\_init\_copy|hyperpage}{8}
|
||||||
\indexentry{fp\_iszero|hyperpage}{7}
|
\indexentry{fp\_iszero|hyperpage}{9}
|
||||||
\indexentry{fp\_iseven|hyperpage}{7}
|
\indexentry{fp\_iseven|hyperpage}{9}
|
||||||
\indexentry{fp\_isodd|hyperpage}{7}
|
\indexentry{fp\_isodd|hyperpage}{9}
|
||||||
\indexentry{fp\_neg|hyperpage}{7}
|
\indexentry{fp\_neg|hyperpage}{9}
|
||||||
\indexentry{fp\_abs|hyperpage}{7}
|
\indexentry{fp\_abs|hyperpage}{9}
|
||||||
\indexentry{fp\_cmp|hyperpage}{8}
|
\indexentry{fp\_cmp|hyperpage}{10}
|
||||||
\indexentry{fp\_cmp\_mag|hyperpage}{8}
|
\indexentry{fp\_cmp\_mag|hyperpage}{10}
|
||||||
\indexentry{fp\_lshd|hyperpage}{8}
|
\indexentry{fp\_lshd|hyperpage}{10}
|
||||||
\indexentry{fp\_rshd|hyperpage}{8}
|
\indexentry{fp\_rshd|hyperpage}{10}
|
||||||
\indexentry{fp\_div\_2d|hyperpage}{8}
|
\indexentry{fp\_div\_2d|hyperpage}{10}
|
||||||
\indexentry{fp\_mod\_2d|hyperpage}{8}
|
\indexentry{fp\_mod\_2d|hyperpage}{10}
|
||||||
\indexentry{fp\_mul\_2d|hyperpage}{8}
|
\indexentry{fp\_mul\_2d|hyperpage}{10}
|
||||||
\indexentry{fp\_div\_2|hyperpage}{8}
|
\indexentry{fp\_div\_2|hyperpage}{10}
|
||||||
\indexentry{fp\_mul\_2|hyperpage}{8}
|
\indexentry{fp\_mul\_2|hyperpage}{10}
|
||||||
\indexentry{fp\_cnt\_lsb|hyperpage}{8}
|
\indexentry{fp\_cnt\_lsb|hyperpage}{10}
|
||||||
\indexentry{fp\_add|hyperpage}{9}
|
\indexentry{fp\_add|hyperpage}{11}
|
||||||
\indexentry{fp\_sub|hyperpage}{9}
|
\indexentry{fp\_sub|hyperpage}{11}
|
||||||
\indexentry{fp\_mul|hyperpage}{9}
|
\indexentry{fp\_mul|hyperpage}{11}
|
||||||
\indexentry{fp\_sqr|hyperpage}{9}
|
\indexentry{fp\_sqr|hyperpage}{11}
|
||||||
\indexentry{fp\_div|hyperpage}{9}
|
\indexentry{fp\_div|hyperpage}{11}
|
||||||
\indexentry{fp\_mod|hyperpage}{9}
|
\indexentry{fp\_mod|hyperpage}{11}
|
||||||
\indexentry{fp\_exptmod|hyperpage}{9}
|
\indexentry{fp\_exptmod|hyperpage}{11}
|
||||||
\indexentry{fp\_invmod|hyperpage}{9}
|
\indexentry{fp\_invmod|hyperpage}{11}
|
||||||
\indexentry{fp\_gcd|hyperpage}{9}
|
\indexentry{fp\_gcd|hyperpage}{11}
|
||||||
\indexentry{fp\_lcm|hyperpage}{9}
|
\indexentry{fp\_lcm|hyperpage}{11}
|
||||||
\indexentry{fp\_isprime|hyperpage}{10}
|
\indexentry{fp\_isprime|hyperpage}{12}
|
||||||
|
58
tfm.ind
58
tfm.ind
@ -1,33 +1,33 @@
|
|||||||
\begin{theindex}
|
\begin{theindex}
|
||||||
|
|
||||||
\item fp\_abs, \hyperpage{7}
|
\item fp\_abs, \hyperpage{9}
|
||||||
\item fp\_add, \hyperpage{9}
|
\item fp\_add, \hyperpage{11}
|
||||||
\item fp\_cmp, \hyperpage{8}
|
\item fp\_cmp, \hyperpage{10}
|
||||||
\item fp\_cmp\_mag, \hyperpage{8}
|
\item fp\_cmp\_mag, \hyperpage{10}
|
||||||
\item fp\_cnt\_lsb, \hyperpage{8}
|
\item fp\_cnt\_lsb, \hyperpage{10}
|
||||||
\item fp\_div, \hyperpage{9}
|
\item fp\_div, \hyperpage{11}
|
||||||
\item fp\_div\_2, \hyperpage{8}
|
\item fp\_div\_2, \hyperpage{10}
|
||||||
\item fp\_div\_2d, \hyperpage{8}
|
\item fp\_div\_2d, \hyperpage{10}
|
||||||
\item fp\_exptmod, \hyperpage{9}
|
\item fp\_exptmod, \hyperpage{11}
|
||||||
\item fp\_gcd, \hyperpage{9}
|
\item fp\_gcd, \hyperpage{11}
|
||||||
\item fp\_init, \hyperpage{6}
|
\item fp\_init, \hyperpage{8}
|
||||||
\item fp\_init\_copy, \hyperpage{6}
|
\item fp\_init\_copy, \hyperpage{8}
|
||||||
\item fp\_invmod, \hyperpage{9}
|
\item fp\_invmod, \hyperpage{11}
|
||||||
\item fp\_iseven, \hyperpage{7}
|
\item fp\_iseven, \hyperpage{9}
|
||||||
\item fp\_isodd, \hyperpage{7}
|
\item fp\_isodd, \hyperpage{9}
|
||||||
\item fp\_isprime, \hyperpage{10}
|
\item fp\_isprime, \hyperpage{12}
|
||||||
\item fp\_iszero, \hyperpage{7}
|
\item fp\_iszero, \hyperpage{9}
|
||||||
\item fp\_lcm, \hyperpage{9}
|
\item fp\_lcm, \hyperpage{11}
|
||||||
\item fp\_lshd, \hyperpage{8}
|
\item fp\_lshd, \hyperpage{10}
|
||||||
\item fp\_mod, \hyperpage{9}
|
\item fp\_mod, \hyperpage{11}
|
||||||
\item fp\_mod\_2d, \hyperpage{8}
|
\item fp\_mod\_2d, \hyperpage{10}
|
||||||
\item fp\_mul, \hyperpage{9}
|
\item fp\_mul, \hyperpage{11}
|
||||||
\item fp\_mul\_2, \hyperpage{8}
|
\item fp\_mul\_2, \hyperpage{10}
|
||||||
\item fp\_mul\_2d, \hyperpage{8}
|
\item fp\_mul\_2d, \hyperpage{10}
|
||||||
\item fp\_neg, \hyperpage{7}
|
\item fp\_neg, \hyperpage{9}
|
||||||
\item fp\_rshd, \hyperpage{8}
|
\item fp\_rshd, \hyperpage{10}
|
||||||
\item fp\_set, \hyperpage{6}
|
\item fp\_set, \hyperpage{8}
|
||||||
\item fp\_sqr, \hyperpage{9}
|
\item fp\_sqr, \hyperpage{11}
|
||||||
\item fp\_sub, \hyperpage{9}
|
\item fp\_sub, \hyperpage{11}
|
||||||
|
|
||||||
\end{theindex}
|
\end{theindex}
|
||||||
|
52
tfm.log
52
tfm.log
@ -1,4 +1,4 @@
|
|||||||
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 23 JUL 2005 07:42
|
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 1 AUG 2005 13:34
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**tfm
|
**tfm
|
||||||
(./tfm.tex
|
(./tfm.tex
|
||||||
@ -216,107 +216,107 @@ File: umsb.fd 2002/01/19 v2.2g AMS font definitions
|
|||||||
Chapter 1.
|
Chapter 1.
|
||||||
[1
|
[1
|
||||||
|
|
||||||
] [2] [3] [4]
|
] [2] [3] [4] [5] [6
|
||||||
|
|
||||||
|
]
|
||||||
Chapter 2.
|
Chapter 2.
|
||||||
|
|
||||||
Underfull \vbox (badness 7649) has occurred while \output is active []
|
Underfull \vbox (badness 7649) has occurred while \output is active []
|
||||||
|
|
||||||
[5
|
[7]
|
||||||
|
[8]
|
||||||
]
|
|
||||||
[6]
|
|
||||||
Chapter 3.
|
Chapter 3.
|
||||||
[7
|
[9
|
||||||
|
|
||||||
] [8] [9] [10]
|
] [10] [11] [12]
|
||||||
Chapter 4.
|
Chapter 4.
|
||||||
[11
|
[13
|
||||||
|
|
||||||
] [12] [13]
|
] [14] [15]
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 559--559
|
||||||
[]\OT1/cmtt/m/n/10 #define SQRADDSC(i, j)
|
[]\OT1/cmtt/m/n/10 #define SQRADDSC(i, j)
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 559--559
|
||||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 559--559
|
||||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)i) * ((fp_word)j);
|
[] \OT1/cmtt/m/n/10 t = ((fp_word)i) * ((fp_word)j);
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 559--559
|
||||||
[] \OT1/cmtt/m/n/10 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;
|
[] \OT1/cmtt/m/n/10 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (25.129pt too wide) in paragraph at lines 548--549
|
Overfull \hbox (25.129pt too wide) in paragraph at lines 560--561
|
||||||
\OT1/cmr/m/n/10 This com-putes a prod-uct and stores it in the ``sec-ondary'' c
|
\OT1/cmr/m/n/10 This com-putes a prod-uct and stores it in the ``sec-ondary'' c
|
||||||
arry reg-is-ters $[]$.
|
arry reg-is-ters $[]$.
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 568--568
|
||||||
[]\OT1/cmtt/m/n/10 #define SQRADDAC(i, j)
|
[]\OT1/cmtt/m/n/10 #define SQRADDAC(i, j)
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 568--568
|
||||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 568--568
|
||||||
[] \OT1/cmtt/m/n/10 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t;
|
[] \OT1/cmtt/m/n/10 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 568--568
|
||||||
[] \OT1/cmtt/m/n/10 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t
|
[] \OT1/cmtt/m/n/10 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t
|
||||||
>> DIGIT_BIT; \[]
|
>> DIGIT_BIT; \[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 578--578
|
||||||
[]\OT1/cmtt/m/n/10 #define SQRADDDB
|
[]\OT1/cmtt/m/n/10 #define SQRADDDB
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
|
Overfull \hbox (74.99634pt too wide) in paragraph at lines 578--578
|
||||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
Overfull \hbox (190.49533pt too wide) in paragraph at lines 578--578
|
||||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;
|
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;
|
||||||
\[]
|
\[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
Overfull \hbox (190.49533pt too wide) in paragraph at lines 578--578
|
||||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BI
|
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BI
|
||||||
T); c1 = t; \[]
|
T); c1 = t; \[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
Overfull \hbox (190.49533pt too wide) in paragraph at lines 578--578
|
||||||
[] \OT1/cmtt/m/n/10 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_B
|
[] \OT1/cmtt/m/n/10 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_B
|
||||||
IT); \[]
|
IT); \[]
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[14] [15] (./tfm.ind [16] [17
|
[16] [17] (./tfm.ind [18] [19
|
||||||
|
|
||||||
|
|
||||||
]) (./tfm.aux) )
|
]) (./tfm.aux) )
|
||||||
@ -329,4 +329,4 @@ Here is how much of TeX's memory you used:
|
|||||||
580 hyphenation exceptions out of 1000
|
580 hyphenation exceptions out of 1000
|
||||||
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
|
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
|
||||||
|
|
||||||
Output written on tfm.dvi (23 pages, 49708 bytes).
|
Output written on tfm.dvi (25 pages, 51612 bytes).
|
||||||
|
30
tfm.tex
30
tfm.tex
@ -49,8 +49,8 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
\frontmatter
|
\frontmatter
|
||||||
\pagestyle{empty}
|
\pagestyle{empty}
|
||||||
\title{TomsFastMath User Manual \\ v0.04}
|
\title{TomsFastMath User Manual \\ v0.05}
|
||||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
\author{Tom St Denis \\ tomstdenis@gmail.com}
|
||||||
\maketitle
|
\maketitle
|
||||||
This text and library are all hereby placed in the public domain. This book has been formatted for B5
|
This text and library are all hereby placed in the public domain. This book has been formatted for B5
|
||||||
[176x250] paper using the \LaTeX{} {\em book} macro package.
|
[176x250] paper using the \LaTeX{} {\em book} macro package.
|
||||||
@ -101,14 +101,26 @@ fast multiplication and squaring and has the side effect of speeding up ECC oper
|
|||||||
TomsFastMath is public domain.
|
TomsFastMath is public domain.
|
||||||
|
|
||||||
\section{Building}
|
\section{Building}
|
||||||
Currently only a GCC makefile has been provided. To build the library simply type
|
To build the library simply type ``make''. Or to install in typical *unix like directories use
|
||||||
``make''. The library is a bit too new to put into production so no install
|
``make install''. Similarly a shared library can be built with ``make -f makefile.shared install''.
|
||||||
scripts exist yet. You can build the test program with ``make test''.
|
|
||||||
|
|
||||||
To perform simple static testing (useful to test out new assembly ports) use the stest
|
You can build the test program with ``make test''. To perform simple static testing (useful to
|
||||||
program. Type ``make stest'' and run it on your target. The program will perform three
|
test out new assembly ports) use the stest program. Type ``make stest'' and run it on your
|
||||||
multiplications, squarings and montgomery reductions. Likely if your assembly
|
target. The program will perform three multiplications, squarings and montgomery reductions.
|
||||||
code is invalid this code will exhibit the bug.
|
Likely if your assembly code is invalid this code will exhibit the bug.
|
||||||
|
|
||||||
|
\subsection{Intel CC}
|
||||||
|
In theory you should be able to build the library with
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
CFLAGS="-O3 -ip" CC=icc make IGNORE_SPEED=1
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
However, Intels inline assembler is way less advanced than GCCs. As a result it doesn't compile.
|
||||||
|
Fortunately it doesn't really matter.
|
||||||
|
|
||||||
|
\subsection{MSVC}
|
||||||
|
The library doesn't build with MSVC. Imagine that.
|
||||||
|
|
||||||
\subsection{Build Limitations}
|
\subsection{Build Limitations}
|
||||||
TomsFastMath has the following build requirements which are non--portable but under most
|
TomsFastMath has the following build requirements which are non--portable but under most
|
||||||
|
54
tfm.toc
54
tfm.toc
@ -2,32 +2,34 @@
|
|||||||
\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}
|
\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}
|
||||||
\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}
|
\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}
|
||||||
\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}
|
\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}
|
||||||
\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}
|
\contentsline {subsection}{\numberline {1.3.1}Intel CC}{2}{subsection.1.3.1}
|
||||||
\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}
|
\contentsline {subsection}{\numberline {1.3.2}MSVC}{2}{subsection.1.3.2}
|
||||||
|
\contentsline {subsection}{\numberline {1.3.3}Build Limitations}{3}{subsection.1.3.3}
|
||||||
|
\contentsline {subsection}{\numberline {1.3.4}Optimization Configuration}{3}{subsection.1.3.4}
|
||||||
\contentsline {subsubsection}{x86--32}{3}{section*.3}
|
\contentsline {subsubsection}{x86--32}{3}{section*.3}
|
||||||
\contentsline {subsubsection}{SSE2}{3}{section*.4}
|
\contentsline {subsubsection}{SSE2}{3}{section*.4}
|
||||||
\contentsline {subsubsection}{x86--64}{3}{section*.5}
|
\contentsline {subsubsection}{x86--64}{4}{section*.5}
|
||||||
\contentsline {subsubsection}{ARM}{3}{section*.6}
|
\contentsline {subsubsection}{ARM}{4}{section*.6}
|
||||||
\contentsline {subsubsection}{PPC32}{3}{section*.7}
|
\contentsline {subsubsection}{PPC32}{4}{section*.7}
|
||||||
\contentsline {subsubsection}{Future Releases}{4}{section*.8}
|
\contentsline {subsubsection}{Future Releases}{4}{section*.8}
|
||||||
\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}
|
\contentsline {subsection}{\numberline {1.3.5}Precision Configuration}{5}{subsection.1.3.5}
|
||||||
\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}
|
\contentsline {chapter}{\numberline {2}Getting Started}{7}{chapter.2}
|
||||||
\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}
|
\contentsline {section}{\numberline {2.1}Data Types}{7}{section.2.1}
|
||||||
\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}
|
\contentsline {section}{\numberline {2.2}Initialization}{8}{section.2.2}
|
||||||
\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}
|
\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{8}{subsection.2.2.1}
|
||||||
\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}
|
\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{8}{subsection.2.2.2}
|
||||||
\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}
|
\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{8}{subsection.2.2.3}
|
||||||
\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}
|
\contentsline {chapter}{\numberline {3}Arithmetic Operations}{9}{chapter.3}
|
||||||
\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}
|
\contentsline {section}{\numberline {3.1}Odds and Evens}{9}{section.3.1}
|
||||||
\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}
|
\contentsline {section}{\numberline {3.2}Sign Manipulation}{9}{section.3.2}
|
||||||
\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}
|
\contentsline {section}{\numberline {3.3}Comparisons}{10}{section.3.3}
|
||||||
\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}
|
\contentsline {section}{\numberline {3.4}Shifting}{10}{section.3.4}
|
||||||
\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}
|
\contentsline {section}{\numberline {3.5}Basic Algebra}{11}{section.3.5}
|
||||||
\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}
|
\contentsline {section}{\numberline {3.6}Modular Exponentiation}{11}{section.3.6}
|
||||||
\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}
|
\contentsline {section}{\numberline {3.7}Number Theoretic}{11}{section.3.7}
|
||||||
\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}
|
\contentsline {section}{\numberline {3.8}Prime Numbers}{12}{section.3.8}
|
||||||
\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}
|
\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{13}{chapter.4}
|
||||||
\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}
|
\contentsline {section}{\numberline {4.1}Getting Started}{13}{section.4.1}
|
||||||
\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}
|
\contentsline {section}{\numberline {4.2}Multiply with Comba}{13}{section.4.2}
|
||||||
\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}
|
\contentsline {section}{\numberline {4.3}Squaring with Comba}{15}{section.4.3}
|
||||||
\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}
|
\contentsline {section}{\numberline {4.4}Montgomery with Comba}{17}{section.4.4}
|
||||||
|
Loading…
Reference in New Issue
Block a user