added tomsfastmath-0.04

This commit is contained in:
Tom St Denis 2005-07-23 10:43:03 +00:00 committed by Steffen Jaeckel
parent ca551d4c5e
commit f91cf2d1cf
83 changed files with 10946 additions and 1899 deletions

20
TODO
View File

@ -1,20 +0,0 @@
---
0. IMPORTANT... why are you doubling the "even" terms individually? STUPID!
- make it so you have four new macros that use an additional 3 carry variables
- SQRADDSC - store first mult [ simple store, no carry ]
- SQRADDAC - add subsequent mults [ 3n word add ]
- SQRADDDB - double the carry [ 3n word add ]
- SQRADDFC - forward the doubles into the main [ 3n word add, note, x86_32 may need "g" instead of "r" ]
- only use the four macro pattern for rows with >= 3 "doubles"
- otherwise use the existing SQRADD
1. Write more documentation ;-)
2. Ports to PPC and MIPS
3. Fix any lingering bugs, add additional requested functionality.
4. Unrolled copies of montgomery will speed it up a bit
5.
NOTE: The library is still fairly new. I've tested it quite a bit but that doesn't mean surprises
can't happen. Please test the results you get for correctness.

View File

@ -1,7 +1,25 @@
0.04 -- Fixed bugs in the SSE2 squaring code
-- Rewrote the multipliers to be optimized for small inputs
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction
code. It brings the performance for small numbers on the AMD64 and all numbers on the P4
to a new level. Thanks!
-- Added missing ARM support for fp_montgomery_reduce.c that the NSS folk left off, Officially
the ARM code is for v4 and above WITH the "M" multiplier support (e.g. umlal instruction)
-- Added PPC32 support, define TFM_PPC32 to enable it, I used the "PowerPC 6xx" instruction
databook for reference. Does not require altivec. Should be fairly portable to the other
32-bit PPCs provided they have mullw and mulhwu instructions.
[Note: porting the macros to PPC64 should be trivial, anyone with a shell to lend... email me!]
-- Rewrote the config a bit in tfm.h so you can better choose which set of "oh my god that's huge" code to
enable for your task. "generic" functions are ALWAYS included which are smaller but will cover the
gaps in the coverage for ya.
-- The PPC32 code has been verified to function on a Darwin box running GCC 2.95.2
[Thanks to the folk at PeerSec for lending me a shell to use]
-- Fixed a bug in fp_exptmod() where if the exponent was negative AND the destination the output
would have the sign set to FP_NEG.
March 1st, 2005
0.03 -- Optimized squaring
--
-- Applied new license header to all files (still PD)
September 18th, 2004
0.02 -- Added TFM_LARGE to turn on/off 16x combas to save even more space.

81
comba_mont_gen.c Normal file
View File

@ -0,0 +1,81 @@
/* generate montgomery reductions for m->used = 1...16 */
#include <stdio.h>
int main(void)
{
int N;
for (N = 1; N <= 16; N++) {
printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
printf(
"{\n"
" fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
" int oldused, x, y;\n"
"\n"
" /* now zero the buff */\n"
" memset(c, 0, sizeof(c));\n"
"\n"
" /* copy the input */\n"
" oldused = a->used;\n"
" for (x = 0; x < oldused; x++) {\n"
" c[x] = a->dp[x];\n"
" }\n"
"\n"
" MONT_START;\n"
"\n"
" /* now let's get bizz-sy! */\n"
" for (x = 0; x < %d; x++) {\n"
" /* get Mu for this round */\n"
" LOOP_START;\n"
"\n"
" /* our friendly neighbourhood alias */\n"
" _c = c + x;\n"
" tmpm = m->dp;\n"
"\n"
" for (y = 0; y < %d; y++) {\n"
" INNERMUL;\n"
" ++_c;\n"
" }\n"
" /* send carry up man... */\n"
" _c = c + x;\n"
" PROPCARRY;\n"
" } \n"
"\n"
" /* fix the rest of the carries */\n"
" _c = c + %d;\n"
" for (x = %d; x < %d * 2 + 2; x++) {\n"
" PROPCARRY;\n"
" ++_c;\n"
" }\n"
"\n"
" /* now copy out */\n"
" _c = c + %d;\n"
" tmpm = a->dp;\n"
" for (x = 0; x < %d+1; x++) {\n"
" *tmpm++ = *_c++;\n"
" }\n"
"\n"
" for (; x < oldused; x++) {\n"
" *tmpm++ = 0;\n"
" }\n"
"\n"
" MONT_FINI;\n"
"\n"
" a->used = %d+1;\n"
" fp_clamp(a);\n"
"\n"
" /* if A >= m then A = A - m */\n"
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
" s_fp_sub (a, m, a);\n"
" }\n"
"}\n", N,N,N,N,N,N,N,N);
}
return 0;
}

View File

@ -1,3 +1,13 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
/* program emits a NxN comba multiplier */
#include <stdio.h>
@ -47,3 +57,7 @@ printf(
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

61
comba_mult_smallgen.c Normal file
View File

@ -0,0 +1,61 @@
/* program emits a NxN comba multiplier for 1x1 to 16x16 */
#include <stdio.h>
int main(int argc, char **argv)
{
int N, x, y, z;
/* print out preamble */
printf(
"void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)\n"
"{\n"
" fp_digit c0, c1, c2, at[32];\n"
" switch (MAX(A->used, B->used)) { \n"
);
for (N = 1; N <= 16; N++) {
printf(
"\n"
" case %d:\n"
" memcpy(at, A->dp, %d * sizeof(fp_digit));\n"
" memcpy(at+%d, B->dp, %d * sizeof(fp_digit));\n"
" COMBA_START;\n"
"\n"
" COMBA_CLEAR;\n", N, N, N, N);
/* now do the rows */
for (x = 0; x < (N+N-1); x++) {
printf(
" /* %d */\n", x);
if (x > 0) {
printf(
" COMBA_FORWARD;\n");
}
for (y = 0; y < N; y++) {
for (z = 0; z < N; z++) {
if ((y+z)==x) {
printf(" MULADD(at[%d], at[%d]); ", y, z+N);
}
}
}
printf(
"\n"
" COMBA_STORE(C->dp[%d]);\n", x);
}
printf(
" COMBA_STORE2(C->dp[%d]);\n"
" C->used = %d;\n"
" C->sign = A->sign ^ B->sign;\n"
" fp_clamp(C);\n"
" COMBA_FINI;\n"
" break;\n", N+N-1, N+N);
}
printf(" }\n}\n\n");
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,3 +1,13 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
/* Generates squaring comba code... it learns it knows our secrets! */
#include <stdio.h>
@ -90,3 +100,7 @@ if (N >= 16) printf("#endif\n");
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

109
comba_sqr_smallgen.c Normal file
View File

@ -0,0 +1,109 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
/* Generates squaring comba code... it learns it knows our secrets! */
#include <stdio.h>
int main(int argc, char **argv)
{
int x, y, z, N, f;
printf(
"void fp_sqr_comba_small(fp_int *A, fp_int *B)\n"
"{\n"
" fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;\n"
);
printf(" switch (A->used) { \n");
for (N = 1; N <= 16; N++) {
printf(
" case %d:\n"
" a = A->dp;\n"
" COMBA_START; \n"
"\n"
" /* clear carries */\n"
" CLEAR_CARRY;\n"
"\n"
" /* output 0 */\n"
" SQRADD(a[0],a[0]);\n"
" COMBA_STORE(b[0]);\n", N);
for (x = 1; x < N+N-1; x++) {
printf(
"\n /* output %d */\n"
" CARRY_FORWARD;\n ", x);
for (f = y = 0; y < N; y++) {
for (z = 0; z < N; z++) {
if (z != y && z + y == x && y <= z) {
++f;
}
}
}
if (f <= 2) {
for (y = 0; y < N; y++) {
for (z = 0; z < N; z++) {
if (y<=z && (y+z)==x) {
if (y == z) {
printf(" SQRADD(a[%d], a[%d]); ", y, y);
} else {
printf(" SQRADD2(a[%d], a[%d]); ", y, z);
}
}
}
}
} else {
// new method
/* do evens first */
f = 0;
for (y = 0; y < N; y++) {
for (z = 0; z < N; z++) {
if (z != y && z + y == x && y <= z) {
if (f == 0) {
// first double
printf("SQRADDSC(a[%d], a[%d]); ", y, z);
f = 1;
} else {
printf("SQRADDAC(a[%d], a[%d]); ", y, z);
}
}
}
}
// forward the carry
printf("SQRADDDB; ");
if ((x&1) == 0) {
// add the square
printf("SQRADD(a[%d], a[%d]); ", x/2, x/2);
}
}
printf("\n COMBA_STORE(b[%d]);\n", x);
}
printf(" COMBA_STORE2(b[%d]);\n", N+N-1);
printf(
" COMBA_FINI;\n"
"\n"
" B->used = %d;\n"
" B->sign = FP_ZPOS;\n"
" memcpy(B->dp, b, %d * sizeof(fp_digit));\n"
" fp_clamp(B);\n"
" break;\n\n", N+N, N+N);
}
printf("}\n\n}\n");
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

24
delme.c
View File

@ -1,24 +0,0 @@
#include "tfm.h"
int main(void)
{
fp_int a;
char buf[4096];
fp_init(&a);
fp_read_radix( &a,
"///////////93zgY8MZ2DCJ6Oek0t1pHAG9E28fdp7G22xwcEnER8b5A27cED0JT"
"xvKPiyqwGnimAmfjybyKDq/XDMrjKS95v8MrTc9UViRqJ4BffZVjQml/NBRq1hVj"
"xZXh+rg9dwMkdoGHV4iVvaaePb7iv5izmW1ykA5ZlmMOsaWs75NJccaMFwZz9CzV"
"WsLT8zoZhPOSOlDM88LIkvxLAGTmbfPjPmmrJagyc0JnT6m8oXWXV3AGNaOkDiux"
"uvvtB1WEXWER9uEYx0UYZxN5NV1lJ5B9tYlBzfLO5nWvbKbywfLgvHNI9XYO+WKG"
"5NAEMeggn2sjCnSD151wCwXL8QlV7BfaxFk515ZRxmgAwd5NNGOCVREN3uMcuUJ7"
"g/MkZDi9CzSUZ9JWIYLXdSxZqYOQqkvhyI/w1jcA26JOTW9pFiXgP58VAnWNUo0C"
"k+4NLtfXNMnt2OZ0kjb6uWZYJw1qvQinGzjR/E3z48vBWj4WgJhIol//////////",
64 );
if( fp_isprime( &a ) ) printf("It's prime.\n");
else printf( "Not prime.\n");
return 0;
}

83
demo/rsa.c Normal file
View File

@ -0,0 +1,83 @@
#include "tfm.h"
#include <time.h>
int main(void)
{
fp_int d, e, n, c, m, e_m;
clock_t t1;
int x;
/* read in the parameters */
fp_read_radix(&n, "ce032e860a9809a5ec31e4b0fd4b546f8c40043e3d2ec3d8f49d8f2f3dd19e887094ee1af75caa1c2e6cd9ec78bf1dfd6280002ac8c30ecd72da2e4c59a28a9248048aaae2a8fa627f71bece979cebf9f8eee2bd594d4a4f2e791647573c7ec1fcbd320d3825be3fa8a17c97086fdae56f7086ce512b81cc2fe44161270ec5e9", 16);
fp_read_radix(&e, "10001", 16);
fp_read_radix(&m, "39f5a911250f45b99390e2df322b33c729099ab52b5879d06b00818cce57c649a66ed7eb6d8ae214d11caf9c81e83a7368cf0edb2b71dad791f13fecf546123b40377851e67835ade1d6be57f4de18a62db4cdb1880f4ab2e6a29acfd85ca22a13dc1f6fee2621ef0fc8689cd738e6f065c033ec7c148d8d348688af83d6f6bd", 16);
fp_read_radix(&c, "9ff70ea6968a04530e6b06bf01aa937209cc8450e76ac19477743de996ba3fb445923c947f8d0add8c57efa51d15485309918459da6c1e5a97f215193b797dce98db51bdb4639c2ecfa90ebb051e3a2daeffd27a7d6e62043703a7b15e0ada5170427b63099cd01ef52cd92d8723e5774bea32716aaa7f5adbae817fb12a5b50", 16);
/* test it */
fp_exptmod(&m, &e, &n, &e_m);
if (fp_cmp(&e_m, &c)) {
char buf[1024];
printf("Encrypted text not equal\n");
fp_toradix(&e_m, buf, 16);
printf("e_m == %s\n", buf);
return 0;
}
printf("CLOCKS_PER_SEC = %llu\n", (unsigned long long)CLOCKS_PER_SEC);
t1 = clock();
for (x = 0; x < 1000; x++) {
fp_exptmod(&m, &e, &n, &e_m);
}
t1 = clock() - t1;
printf("1000 RSA operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
printf("RSA encrypt/sec %10.5g\n", (double)CLOCKS_PER_SEC / ((double)t1 / 1000.0) );
/* read in the parameters */
fp_read_radix(&n, "a7f30e2e04d31acc6936916af1e404a4007adfb9e97864de28d1c7ba3034633bee2cd9d5da3ea3cdcdc9a6f3daf5702ef750f4c3aadb0e27410ac04532176795995148cdb4691bd09a8a846e3e24e073ce2f89b34dfeb2ee89b646923ca60ee3f73c4d5397478380425e7260f75dfdc54826e160395b0889b1162cf115a9773f", 16);
fp_read_radix(&d, "16d166f3c9a404d810d3611e6e8ed43293fe1db75c8906eb4810785a4b82529929dade1db7f11ac0335d5a59773e3167b022479eedefa514a0399db5c900750a56323cf9f5b0f21e7d60a46d75f3fcaabf30a63cbe34048b741a57ac36a13914afda798709dea5771f8d456cf72ec5f3afc1d88d023de40311143a36e7028739", 16);
fp_read_radix(&c, "7d216641c32543f5b8428bdd0b11d819cfbdb16f1df285247f677aa4d44de62ab064f4a0d060ec99cb94aa398113a4317f2c550d0371140b0fd2c88886cac771812e72faad4b7adf495b9b850b142ccd7f45c0a27f164c8c7731731c0015f69d0241812e769d961054618aeb9e8e8989dba95714a2cf56c9e525c5e34b5812dd", 16);
fp_read_radix(&m, "5f323bf0b394b98ffd78727dc9883bb4f42287def6b60fa2a964b2510bc55d61357bf5a6883d2982b268810f8fef116d3ae68ebb41fd10d65a0af4bec0530eb369f37c14b55c3be60223b582372fb6589b648d5a0c7252d1ae2dae5809785d993e9e5d0c4d9b0bcba0cde0d6671734747fba5483c735e1dab7df7b10ec6f62d8", 16);
/* test it */
fp_exptmod(&c, &d, &n, &e_m);
if (fp_cmp(&e_m, &m)) {
char buf[1024];
printf("Decrypted text not equal\n");
fp_toradix(&e_m, buf, 16);
printf("e_m == %s\n", buf);
return 0;
}
t1 = clock();
for (x = 0; x < 100; x++) {
fp_exptmod(&c, &d, &n, &e_m);
}
t1 = clock() - t1;
printf("100 RSA operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
printf("RSA decrypt/sec %10.5g\n", (double)CLOCKS_PER_SEC / ((double)t1 / 100.0) );
/* test half size */
fp_rshd(&n, n.used >> 1);
fp_rshd(&d, d.used >> 1);
fp_rshd(&c, c.used >> 1);
printf("n.used == %4d bits\n", n.used * DIGIT_BIT);
/* ensure n is odd */
n.dp[0] |= 1;
t1 = clock();
for (x = 0; x < 100; x++) {
fp_exptmod(&c, &d, &n, &e_m);
}
t1 = clock() - t1;
printf("100 RSA-half operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
printf("RSA decrypt/sec %10.5g (estimate of RSA-1024-CRT) \n", (double)CLOCKS_PER_SEC / ((double)t1 / 50.0) );
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -142,3 +142,7 @@ int main(void)
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -22,10 +22,18 @@ int myrng(unsigned char *dst, int len, void *dat)
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#if defined(__i386__) || defined(__x86_64__)
#if defined(INTEL_CC)
ulong64 a;
asm ("rdtsc":"=A"(a));
return a;
#elif defined(__i386__) || defined(__x86_64__)
ulong64 a;
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
return a;
#elif defined(TFM_PPC32)
unsigned long a;
__asm__ __volatile__ ("mftb %0":"=r"(a));
return a;
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
@ -135,9 +143,7 @@ int main(void)
printf("Testing read_radix\n");
fp_read_radix(&a, "123456789012345678901234567890", 16); draw(&a);
goto testing;
#if 1
#if 0
/* test mont */
printf("Montgomery test #1\n");
fp_set(&a, 0x1234567ULL);
@ -196,8 +202,18 @@ goto testing;
}
printf("\n\n");
#endif
#ifdef TESTING
goto testing;
#endif
#if 1
t1 = TIMFUNC();
sleep(1);
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
goto expttime;
/* do some timings... */
printf("Addition:\n");
for (t = 2; t <= FP_SIZE/2; t += 2) {
@ -211,7 +227,7 @@ goto testing;
a.used = t;
b.used = t;
t2 = -1;
for (ix = 0; ix < 2500; ++ix) {
for (ix = 0; ix < 25000; ++ix) {
t1 = TIMFUNC();
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
@ -222,6 +238,7 @@ goto testing;
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
multtime:
printf("Multiplication:\n");
for (t = 2; t <= FP_SIZE/2; t += 2) {
fp_zero(&a);
@ -263,8 +280,8 @@ sqrtime:
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
return;
//#else
monttime:
printf("Montgomery:\n");
for (t = 2; t <= (FP_SIZE/2)-2; t += 2) {
fp_zero(&a);
@ -295,7 +312,7 @@ return;
expttime:
printf("Exptmod:\n");
for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += t) {
for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += 256/DIGIT_BIT) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
@ -309,7 +326,7 @@ expttime:
c.used = t;
t2 = -1;
for (ix = 0; ix < 256; ++ix) {
for (ix = 0; ix < 500; ++ix) {
t1 = TIMFUNC();
fp_exptmod(&c, &b, &a, &d);
fp_exptmod(&c, &b, &a, &d);
@ -320,10 +337,10 @@ expttime:
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
return;
return;
#endif
return;
testing:
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
@ -567,3 +584,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

Binary file not shown.

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -33,3 +33,7 @@ void fp_2expt(fp_int *a, int b)
a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -37,3 +37,7 @@ void fp_add(fp_int *a, fp_int *b, fp_int *c)
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -16,3 +16,7 @@ void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
fp_set(&tmp, b);
fp_add(a,&tmp,c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -17,3 +17,7 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
fp_add(a, b, &tmp);
return fp_mod(&tmp, c, d);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -25,3 +25,7 @@ int fp_cmp(fp_int *a, fp_int *b)
}
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -32,3 +32,7 @@ int fp_cmp_d(fp_int *a, fp_digit b)
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -29,3 +29,7 @@ int fp_cmp_mag(fp_int *a, fp_int *b)
return FP_EQ;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -40,3 +40,7 @@ int fp_cnt_lsb(fp_int *a)
return x;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -30,3 +30,7 @@ int fp_count_bits (fp_int * a)
}
return r;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -151,3 +151,7 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -47,3 +47,7 @@ void fp_div_2(fp_int * a, fp_int * b)
b->sign = a->sign;
fp_clamp (b);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -73,3 +73,7 @@ void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
fp_copy (&t, d);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -87,3 +87,7 @@ int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -174,6 +174,13 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
fp_int tmp;
int err;
#ifdef TFM_CHECK
/* prevent overflows */
if (P->used > (FP_SIZE/2)) {
return FP_VAL;
}
#endif
/* is X negative? */
if (X->sign == FP_NEG) {
/* yes, copy G and invmod it */
@ -183,10 +190,16 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
}
X->sign = FP_ZPOS;
err = _fp_exptmod(&tmp, X, P, Y);
X->sign = FP_NEG;
if (X != Y) {
X->sign = FP_NEG;
}
return err;
} else {
/* Positive exponent so just exptmod */
return _fp_exptmod(G, X, P, Y);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -49,3 +49,7 @@ void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
}
fp_copy(&u, c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,3 +1,12 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include "tfm.h"
const char *fp_ident(void)
@ -39,9 +48,6 @@ const char *fp_ident(void)
#ifdef FP_64BIT
" FP_64BIT "
#endif
#ifdef TFM_LARGE
" TFM_LARGE "
#endif
#ifdef TFM_HUGE
" TFM_HUGE "
#endif
@ -64,3 +70,7 @@ int main(void)
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -96,3 +96,7 @@ top:
c->sign = neg;
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -72,3 +72,7 @@ int fp_isprime(fp_int *a)
}
return FP_YES;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -25,3 +25,7 @@ void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
fp_mul(a, &t2, c);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -32,3 +32,7 @@ void fp_lshd(fp_int *a, int x)
/* clamp digits */
fp_clamp(a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -28,3 +28,7 @@ int fp_mod(fp_int *a, fp_int *b, fp_int *c)
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -36,3 +36,7 @@ void fp_mod_2d(fp_int *a, int b, fp_int *c)
c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
fp_clamp (c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -14,3 +14,7 @@ int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
{
return fp_div_d(a, b, NULL, c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -36,3 +36,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,252 +1,380 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
*/
#include <tfm.h>
#if defined(TFM_X86)
/* x86-32 code */
#define MONT_START
#define MONT_FINI
#define LOOP_START \
mu = c[x] * mp;
#define INNERMUL \
asm( \
"movl %7,%%eax \n\t" \
"mull %6 \n\t" \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"g"(mu), "g"(*tmpm++) \
: "%eax", "%edx", "%cc");
#define PROPCARRY \
asm( \
"movl %1,%%eax \n\t" \
"addl %%eax,%6 \n\t" \
"movl %2,%%eax \n\t" \
"adcl %%eax,%7 \n\t" \
"adcl $0,%8 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
: "%eax", "%cc");
#elif defined(TFM_X86_64)
/* x86-64 code */
#define MONT_START
#define MONT_FINI
#define LOOP_START \
mu = c[x] * mp;
#define INNERMUL \
asm( \
"movq %7,%%rax \n\t" \
"mulq %6 \n\t" \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"g"(mu), "g"(*tmpm++) \
: "%rax", "%rdx", "%cc");
#define PROPCARRY \
asm( \
"movq %1,%%rax \n\t" \
"movq %2,%%rbx \n\t" \
"addq %%rax,%6 \n\t" \
"adcq %%rbx,%7 \n\t" \
"adcq $0,%8 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
: "%rax", "%rbx", "%cc");
#elif defined(TFM_SSE2)
/* SSE2 code */
#define MONT_START \
asm("movd %0,%%mm2"::"g"(mp));
#define MONT_FINI \
asm("emms");
#define LOOP_START \
asm(\
"movd %0,%%mm1 \n\t" \
"pmuludq %%mm2,%%mm1 \n\t" \
:: "g"(c[x]));
#define INNERMUL \
asm( \
"movd %6,%%mm0 \n\t" \
"pmuludq %%mm1,%%mm0 \n\t" \
"movd %%mm0,%%eax \n\t" \
"psrlq $32, %%mm0 \n\t" \
"addl %%eax,%0 \n\t" \
"movd %%mm0,%%eax \n\t" \
"adcl %%eax,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"g"(*tmpm++) \
: "%eax", "%cc");
#define PROPCARRY \
asm( \
"movl %1,%%eax \n\t" \
"addl %%eax,%6 \n\t" \
"movl %2,%%eax \n\t" \
"adcl %%eax,%7 \n\t" \
"adcl $0,%8 \n\t" \
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
"g"(_c[OFF0+1]), "g"(_c[OFF1+1]), "g"(_c[OFF2+1]) \
: "%eax", "%cc");
#elif defined(TFM_ARM)
/* ISO C code */
#define MONT_START
#define MONT_FINI
#define LOOP_START \
mu = c[x] * mp;
/* NOTE: later write it using two regs instead of three for _c + ... */
#define INNERMUL \
asm( \
"UMULL r0,r1,%0,%1 \n\t" \
"LDR r2,[%2] \n\t" \
"ADDS r2,r2,r0 \n\t" \
"STR r2,[%2] \n\t" \
"LDR r2,[%3] \n\t" \
"ADCS r2,r2,r1 \n\t" \
"STR r2,[%3] \n\t" \
"LDR r2,[%4] \n\t" \
"ADC r2,r2,#0 \n\t" \
"STR r2,[%4] \n\t" \
::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc");
#define PROPCARRY \
asm( \
"LDR r0,[%1] \n\t" \
"LDR r1,[%0,#4] \n\t" \
"ADDS r0,r0,r1 \n\t" \
"STR r0,[%0,#4] \n\t" \
"LDR r0,[%2] \n\t" \
"LDR r1,[%1,#4] \n\t" \
"ADCS r0,r0,r1 \n\t" \
"STR r0,[%1,#4] \n\t" \
"LDR r0,[%2,#4] \n\t" \
"ADC r0,r0,#0 \n\t" \
"STR r0,[%2,#4] \n\t" \
::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc");
#else
/* ISO C code */
#define MONT_START
#define MONT_FINI
#define LOOP_START \
mu = c[x] * mp;
#define INNERMUL \
do { fp_word t; \
t = (fp_word)_c[OFF0] + ((fp_word)mu) * ((fp_word)*tmpm++); _c[OFF0] = t; \
t = (fp_word)_c[OFF1] + (t >> DIGIT_BIT); _c[OFF1] = t; \
_c[OFF2] += (t >> DIGIT_BIT); \
} while (0);
#define PROPCARRY \
do { fp_word t; \
t = (fp_word)_c[OFF0+1] + (fp_word)_c[OFF1]; _c[OFF0+1] = t; \
t = (fp_word)_c[OFF1+1] + (t >> DIGIT_BIT) + (fp_word)_c[OFF2]; _c[OFF1+1] = t; \
_c[OFF2+1] += (t >> DIGIT_BIT); \
} while (0);
#endif
#define OFF0 (0)
#define OFF1 (FP_SIZE)
#define OFF2 (FP_SIZE+FP_SIZE)
/* computes x/R == x (mod N) via Montgomery Reduction */
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{
fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;
int oldused, x, y, pa;
/* now zero the buff */
pa = m->used;
memset(c, 0, sizeof(c));
/* copy the input */
oldused = a->used;
for (x = 0; x < oldused; x++) {
c[x] = a->dp[x];
}
MONT_START;
/* now let's get bizz-sy! */
for (x = 0; x < pa; x++) {
/* get Mu for this round */
LOOP_START;
/* our friendly neighbourhood alias */
_c = c + x;
tmpm = m->dp;
for (y = 0; y < pa; y++) {
INNERMUL;
++_c;
}
/* send carry up man... */
_c = c + x;
PROPCARRY;
}
/* fix the rest of the carries */
_c = c + pa;
for (x = pa; x < pa * 2 + 2; x++) {
PROPCARRY;
++_c;
}
/* now copy out */
_c = c + pa;
tmpm = a->dp;
for (x = 0; x < pa+1; x++) {
*tmpm++ = *_c++;
}
for (; x < oldused; x++) {
*tmpm++ = 0;
}
MONT_FINI;
a->used = pa+1;
fp_clamp(a);
/* if A >= m then A = A - m */
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
}
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
/******************************************************************/
#if defined(TFM_X86)
/* x86-32 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movl %5,%%eax \n\t" \
"mull %4 \n\t" \
"addl %1,%%eax \n\t" \
"adcl $0,%%edx \n\t" \
"addl %%eax,%0 \n\t" \
"adcl $0,%%edx \n\t" \
"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
: "%eax", "%edx", "%cc")
#define PROPCARRY \
asm( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "%cc")
/******************************************************************/
#elif defined(TFM_X86_64)
/* x86-64 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
"movq %5,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %1,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %%rax,%0 \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rdx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "%cc")
#define INNERMUL8 \
asm( \
"movq 0(%5),%%rax \n\t" \
"movq 0(%2),%%r10 \n\t" \
"movq 0x8(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x8(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x10(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x10(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x8(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x18(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x18(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x10(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x20(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x20(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x18(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x28(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x28(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x20(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x30(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x30(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x28(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"movq 0x38(%5),%%r11 \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq 0x38(%2),%%r10 \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x30(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
"movq %%r11,%%rax \n\t" \
"mulq %4 \n\t" \
"addq %%r10,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"addq %3,%%rax \n\t" \
"adcq $0,%%rdx \n\t" \
"movq %%rax,0x38(%0) \n\t" \
"movq %%rdx,%1 \n\t" \
\
:"=r"(_c), "=r"(cy) \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "%cc")
#define PROPCARRY \
asm( \
"addq %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbq %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%rax", "%cc")
/******************************************************************/
#elif defined(TFM_SSE2)
/* SSE2 code (assumes 32-bit fp_digits) */
/* XMM register assignments:
* xmm0 *tmpm++, then Mu * (*tmpm++)
* xmm1 c[x], then Mu
* xmm2 mp
* xmm3 cy
* xmm4 _c[LO]
*/
#define MONT_START \
asm("movd %0,%%mm2"::"g"(mp))
#define MONT_FINI \
asm("emms")
#define LOOP_START \
asm( \
"movd %0,%%mm1 \n\t" \
"pxor %%mm3,%%mm3 \n\t" \
"pmuludq %%mm2,%%mm1 \n\t" \
:: "g"(c[x]))
/* pmuludq on mmx registers does a 32x32->64 multiply. */
#define INNERMUL \
asm( \
"movd %1,%%mm4 \n\t" \
"movd %2,%%mm0 \n\t" \
"paddq %%mm4,%%mm3 \n\t" \
"pmuludq %%mm1,%%mm0 \n\t" \
"paddq %%mm0,%%mm3 \n\t" \
"movd %%mm3,%0 \n\t" \
"psrlq $32, %%mm3 \n\t" \
:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
#define LOOP_END \
asm( "movd %%mm3,%0 \n" :"=r"(cy))
#define PROPCARRY \
asm( \
"addl %1,%0 \n\t" \
"setb %%al \n\t" \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "%cc")
/******************************************************************/
#elif defined(TFM_ARM)
/* ARMv4 code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
" UMLAL r0,%0,%3,%4 \n\t" \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" ADDS r0,r0,%0 \n\t" \
" STR r0,%1 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
#elif defined(TFM_PPC32)
/* PPC32 */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
asm( \
" mullw r16,%3,%4 \n\t" \
" mulhwu r17,%3,%4 \n\t" \
" addc r16,r16,%0 \n\t" \
" addze r17,r17 \n\t" \
" lwz r18,%1 \n\t" \
" addc r16,r16,r18 \n\t" \
" addze %0,r17 \n\t" \
" stw r16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
#define PROPCARRY \
asm( \
" lwz r16,%1 \n\t" \
" addc r16,r16,%0 \n\t" \
" stw r16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
/******************************************************************/
#else
/* ISO C code */
#define MONT_START
#define MONT_FINI
#define LOOP_END
#define LOOP_START \
mu = c[x] * mp
#define INNERMUL \
do { fp_word t; \
_c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
(((fp_word)mu) * ((fp_word)*tmpm++)); \
cy = (t >> DIGIT_BIT); \
} while (0)
#define PROPCARRY \
do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
#endif
/******************************************************************/
#define LO 0
#define HI 1
#define CY 2
/* computes x/R == x (mod N) via Montgomery Reduction */
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{
fp_digit c[FP_SIZE], *_c, *tmpm, mu;
int oldused, x, y, pa;
#if defined(USE_MEMSET)
/* now zero the buff */
memset(c, 0, sizeof c);
#endif
pa = m->used;
/* copy the input */
oldused = a->used;
for (x = 0; x < oldused; x++) {
c[x] = a->dp[x];
}
#if !defined(USE_MEMSET)
for (; x < 2*pa+3; x++) {
c[x] = 0;
}
#endif
MONT_START;
for (x = 0; x < pa; x++) {
fp_digit cy = 0;
/* get Mu for this round */
LOOP_START;
_c = c + x;
tmpm = m->dp;
y = 0;
#if defined(TFM_X86_64)
for (; y < (pa & ~7); y += 8) {
INNERMUL8;
_c += 8;
tmpm += 8;
}
#endif
for (; y < pa; y++) {
INNERMUL;
++_c;
}
LOOP_END;
while (cy) {
PROPCARRY; // cy = cy > (*_c += cy);
++_c;
}
}
/* now copy out */
_c = c + pa;
tmpm = a->dp;
for (x = 0; x < pa+1; x++) {
*tmpm++ = *_c++;
}
for (; x < oldused; x++) {
*tmpm++ = 0;
}
MONT_FINI;
a->used = pa+1;
fp_clamp(a);
/* if A >= m then A = A - m */
if (fp_cmp_mag (a, m) != FP_LT) {
s_fp_sub (a, m, a);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -42,3 +42,7 @@ int fp_montgomery_setup(fp_int *a, fp_digit *rho)
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -24,19 +24,26 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
inputs are not close to the next power of two. That is, for example,
if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
*/
if (y <= 4) {
fp_mul_comba4(A,B,C);
} else if (y <= 8) {
fp_mul_comba8(A,B,C);
#if defined(TFM_LARGE)
} else if (y <= 16 && y >= 10) {
fp_mul_comba16(A,B,C);
#ifdef TFM_SMALL_SET
if (y <= 16) {
fp_mul_comba_small(A,B,C);
#elif defined(TFM_HUGE)
if (0) { 1;
#endif
#if defined(TFM_HUGE)
} else if (y <= 32 && y >= 24) {
} else if (y <= 32) {
fp_mul_comba32(A,B,C);
} else if (y <= 48) {
fp_mul_comba48(A,B,C);
} else if (y <= 64) {
fp_mul_comba64(A,B,C);
#endif
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
{
#else
} else {
#endif
fp_mul_comba(A,B,C);
}
} else {
@ -44,7 +51,7 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
if A = ab and B = cd for ||a|| = r we need to solve
ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd
ac*r^2 + ((a+b)(c+d) - (ac + bd))*r + bd
So we solve for the three products then we form the final result with careful shifting
and addition.
@ -72,7 +79,7 @@ Obvious points of optimization
} else {
t1.used = 0;
}
t1.sign = A->sign;
t1.sign = 0;
// fp_copy(B, &t2); fp_rshd(&t2, r);
for (s = 0; s < B->used - r; s++) {
@ -86,7 +93,7 @@ Obvious points of optimization
} else {
t2.used = 0;
}
t2.sign = B->sign;
t2.sign = 0;
fp_copy(&t1, &amb); fp_copy(&t2, &cmd);
fp_zero(&ac);
@ -100,7 +107,7 @@ Obvious points of optimization
t2.dp[s] = B->dp[s];
}
for (; s < FP_SIZE; s++) {
t1.dp[s] = 0;
t1.dp[s] = 0;
t2.dp[s] = 0;
}
t1.used = r;
@ -108,18 +115,17 @@ Obvious points of optimization
fp_clamp(&t1);
fp_clamp(&t2);
fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd);
s_fp_add(&amb, &t1, &amb); s_fp_add(&cmd, &t2, &cmd);
fp_zero(&bd);
fp_mul(&t1, &t2, &bd);
/* now get the (a-b)(c-d) term */
/* now get the (a+b)(c+d) term */
fp_zero(&comp);
fp_mul(&amb, &cmd, &comp);
/* now solve the system, do the middle term first */
comp.sign ^= 1;
fp_add(&comp, &ac, &comp);
fp_add(&comp, &bd, &comp);
s_fp_sub(&comp, &ac, &comp);
s_fp_sub(&comp, &bd, &comp);
fp_lshd(&comp, r);
/* leading term */
@ -134,3 +140,7 @@ Obvious points of optimization
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -61,3 +61,7 @@ void fp_mul_2(fp_int * a, fp_int * b)
b->sign = a->sign;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -41,3 +41,7 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -34,3 +34,7 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
/* d = a * b (mod c) */
@ -16,3 +16,7 @@ int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
fp_mul(a, b, &tmp);
return fp_mod(&tmp, c, d);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -71,3 +71,7 @@ void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
/* probably prime now */
*result = FP_YES;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -95,3 +95,7 @@ error:
free(tmp);
return err;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -47,3 +47,7 @@ int fp_radix_size(fp_int *a, int radix, int *size)
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -64,3 +64,7 @@ int fp_read_radix(fp_int *a, char *str, int radix)
}
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -21,3 +21,7 @@ void fp_read_signed_bin(fp_int *a, unsigned char *b, int c)
a->sign = FP_NEG;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -22,3 +22,7 @@ void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
}
fp_clamp (a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -25,3 +25,7 @@ void bn_reverse (unsigned char *s, int len)
--iy;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -34,3 +34,7 @@ void fp_rshd(fp_int *a, int x)
fp_clamp(a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,9 +5,13 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
/* chars used in radix conversions */
const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -15,3 +15,7 @@ void fp_set(fp_int *a, fp_digit b)
a->dp[0] = b;
a->used = b ? 1 : 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -13,3 +13,7 @@ int fp_signed_bin_size(fp_int *a)
{
return 1 + fp_unsigned_bin_size (a);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -17,21 +17,26 @@ void fp_sqr(fp_int *A, fp_int *B)
y = A->used;
if (y <= 64) {
if (y <= 4) {
fp_sqr_comba4(A,B);
} else if (y <= 8) {
fp_sqr_comba8(A,B);
#if defined(TFM_LARGE)
} else if (y <= 16 && y >= 12) {
fp_sqr_comba16(A,B);
#if defined(TFM_SMALL_SET)
if (y <= 16) {
fp_sqr_comba_small(A,B);
#elif defined(TFM_HUGE)
if (0) { 1;
#endif
#if defined(TFM_HUGE)
} else if (y <= 32 && y >= 20) {
} else if (y <= 32) {
fp_sqr_comba32(A,B);
} else if (y <= 64 && y >= 48) {
} else if (y <= 48) {
fp_sqr_comba48(A,B);
} else if (y <= 64) {
fp_sqr_comba64(A,B);
#endif
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
{
#else
} else {
#endif
fp_sqr_comba(A, B);
}
@ -109,3 +114,7 @@ Obvious points of optimization
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,13 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
/* generic comba squarer */
void fp_sqr_comba(fp_int *A, fp_int *B)
{
@ -73,3 +83,7 @@ void fp_sqr_comba(fp_int *A, fp_int *B)
fp_copy(dst, B);
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -17,3 +17,7 @@ int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
fp_sqr(a, &tmp);
return fp_mod(&tmp, b, c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -44,3 +44,7 @@ void fp_sub(fp_int *a, fp_int *b, fp_int *c)
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -16,3 +16,7 @@ void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
fp_set(&tmp, b);
fp_sub(a, &tmp, c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -18,3 +18,7 @@ int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
return fp_mod(&tmp, c, d);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -14,3 +14,7 @@ void fp_to_signed_bin(fp_int *a, unsigned char *b)
fp_to_unsigned_bin (a, b + 1);
b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -23,3 +23,7 @@ void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
}
bn_reverse (b, x);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -53,3 +53,7 @@ int fp_toradix(fp_int *a, char *str, int radix)
*str = '\0';
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -14,3 +14,7 @@ int fp_unsigned_bin_size(fp_int *a)
int size = fp_count_bits (a);
return (size / 8 + ((size & 7) != 0 ? 1 : 0));
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -10,7 +10,7 @@ CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops
#speed
CFLAGS += -fomit-frame-pointer
VERSION=0.03
VERSION=0.04
default: libtfm.a
@ -42,9 +42,37 @@ fp_read_radix.o fp_toradix.o fp_radix_size.o fp_count_bits.o fp_reverse.o fp_s_r
\
fp_ident.o
libtfm.a: $(OBJECTS)
$(AR) $(ARFLAGS) libtfm.a $(OBJECTS)
ranlib libtfm.a
HEADERS=tfm.h
ifndef LIBPATH
LIBPATH=/usr/lib
endif
ifndef INCPATH
INCPATH=/usr/include
endif
ifndef TFM_GROUP
GROUP=wheel
endif
ifndef TFM_USER
USER=root
endif
ifndef LIBNAME
LIBNAME=libtfm.a
endif
$(LIBNAME): $(OBJECTS)
$(AR) $(ARFLAGS) $(LIBNAME) $(OBJECTS)
ranlib $(LIBNAME)
install: libtfm.a
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
mtest/mtest: mtest/mtest.c
cd mtest ; make mtest
@ -52,8 +80,14 @@ mtest/mtest: mtest/mtest.c
test: libtfm.a demo/test.o mtest/mtest
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
timing: libtfm.a demo/test.o
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
stest: libtfm.a demo/stest.o
$(CC) demo/stest.o libtfm.a -o stest
$(CC) $(CFLAGS) demo/stest.o libtfm.a -o stest
rsatest: libtfm.a demo/rsa.o
$(CC) $(CFLAGS) demo/rsa.o libtfm.a -o rsatest
docdvi: tfm.tex
touch tfm.ind
@ -68,10 +102,15 @@ docs: docdvi
mv -f tfm.pdf doc
clean:
rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest *~
rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest *~ rsatest *.gcda *.gcno demo/*.gcda demo/*.gcno mtest/*.gcno mtest/*.gcda
cd mtest ; make clean
zipup: docs clean
no_oops: clean
cd .. ; cvs commit
echo Scanning for scratch/dirty files
find . -type f | grep -v CVS | xargs -n 1 bash mess.sh
zipup: no_oops docs clean
perl gen.pl ; mv mpi.c pre_gen/ ; \
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \

4
mess.sh Normal file
View File

@ -0,0 +1,4 @@
#!/bin/bash
if cvs log $1 >/dev/null 2>/dev/null; then exit 0; else echo "$1 shouldn't be here" ; exit 1; fi

View File

@ -3,7 +3,7 @@ CFLAGS += -Wall -W -O3
default: mtest
mtest: mtest.o
$(CC) mtest.o -ltommath -o mtest
$(CC) $(CFLAGS) mtest.o -ltommath -o mtest
clean:
rm -f *.o mtest *~

View File

@ -60,7 +60,7 @@ void rand_num2(mp_int *a)
int n, size;
unsigned char buf[2048];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 32;
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256;
buf[0] = (fgetc(rng)&1)?1:0;
fread(buf+1, 1, size, rng);
while (buf[1] == 0) buf[1] = fgetc(rng);
@ -317,3 +317,7 @@ int main(void)
fclose(rng);
return 0;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -35,3 +35,7 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
@ -29,3 +29,7 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
}
fp_clamp(c);
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

56
tfm.aux Normal file
View File

@ -0,0 +1,56 @@
\relax
\ifx\hyper@anchor\@undefined
\global \let \oldcontentsline\contentsline
\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global \let \oldnewlabel\newlabel
\gdef \newlabel#1#2{\newlabelxx{#1}#2}
\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\let \contentsline\oldcontentsline
\let \newlabel\oldnewlabel}
\else
\global \let \hyper@last\relax
\fi
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{toc}{\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}}
\@writefile{toc}{\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}}
\@writefile{toc}{\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}}
\@writefile{toc}{\contentsline {subsubsection}{x86--32}{3}{section*.3}}
\@writefile{toc}{\contentsline {subsubsection}{SSE2}{3}{section*.4}}
\@writefile{toc}{\contentsline {subsubsection}{x86--64}{3}{section*.5}}
\@writefile{toc}{\contentsline {subsubsection}{ARM}{3}{section*.6}}
\@writefile{toc}{\contentsline {subsubsection}{PPC32}{3}{section*.7}}
\@writefile{toc}{\contentsline {subsubsection}{Future Releases}{4}{section*.8}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}}
\@writefile{toc}{\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{toc}{\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}}
\@writefile{toc}{\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}}
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{toc}{\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}}
\@writefile{toc}{\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}}
\@writefile{toc}{\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}}
\@writefile{toc}{\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}}
\@writefile{toc}{\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}}
\@writefile{toc}{\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}}
\@writefile{toc}{\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}}
\@writefile{toc}{\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}}
\@writefile{toc}{\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\newlabel{chap:asmops}{{4}{11}{Porting TomsFastMath\relax }{chapter.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}}
\@writefile{toc}{\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}}
\@writefile{toc}{\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}}
\@writefile{toc}{\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}}

BIN
tfm.dvi Normal file

Binary file not shown.

81
tfm.h
View File

@ -5,7 +5,7 @@
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@iahu.ca
* Tom St Denis, tomstdenis@gmail.com
*/
#ifndef TFM_H_
#define TFM_H_
@ -16,28 +16,44 @@
#include <ctype.h>
#include <limits.h>
#undef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#undef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
/* do we want large code? */
#define TFM_LARGE
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
/* do we want huge code (implies large)? The answer is, yes. */
/* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
#ifndef TFM_ALREADY_SET
/* do we want the large set of small multiplications ?
Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC
Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-)
*/
#define TFM_SMALL_SET
/* do we want huge code
Enable these if you are doing 32, 48 or 64 digit multiplications (useful for RSA)
Less important on 64-bit machines as 32 digits == 2048 bits
*/
#define TFM_HUGE
/* imply TFM_LARGE as required */
#if defined(TFM_HUGE)
#if !defined(TFM_LARGE)
#define TFM_LARGE
#endif
/* do we want some overflow checks
Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long)
*/
/* #define TFM_CHECK */
/* Is the target a P4 Prescott
*/
/* #define TFM_PRESCOTT */
#endif
/* Max size of any number in bits. Basically the largest size you will be multiplying
* should be half [or smaller] of FP_MAX_SIZE-four_digit
*
* You can externally define this or it defaults to 4096-bits.
* You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
*/
#ifndef FP_MAX_SIZE
#define FP_MAX_SIZE (4096+(4*DIGIT_BIT))
@ -76,9 +92,9 @@
#endif
#endif
/* make sure we're 32-bit for x86-32/sse/arm */
#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)) && defined(FP_64BIT)
#warning x86-32, SSE2 and ARM optimizations require 32-bit digits (undefining)
/* make sure we're 32-bit for x86-32/sse/arm/ppc32 */
#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(FP_64BIT)
#warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining)
#undef FP_64BIT
#endif
@ -104,6 +120,12 @@
#endif
#define TFM_ASM
#endif
#ifdef TFM_PPC32
#ifdef TFM_ASM
#error TFM_ASM already defined!
#endif
#define TFM_ASM
#endif
/* we want no asm? */
#ifdef TFM_NO_ASM
@ -111,6 +133,7 @@
#undef TFM_X86_64
#undef TFM_SSE2
#undef TFM_ARM
#undef TFM_PPC32
#undef TFM_ASM
#endif
@ -179,8 +202,8 @@ const char *fp_ident(void);
/* zero/even/odd ? */
#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
#define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
#define fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
/* set to a small digit */
void fp_set(fp_int *a, fp_digit b);
@ -335,24 +358,22 @@ void bn_reverse(unsigned char *s, int len);
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
#ifdef TFM_HUGE
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_LARGE
void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C);
#endif
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
void fp_sqr_comba(fp_int *A, fp_int *B);
void fp_sqr_comba4(fp_int *A, fp_int *B);
void fp_sqr_comba8(fp_int *A, fp_int *B);
#ifdef TFM_LARGE
void fp_sqr_comba16(fp_int *A, fp_int *B);
#endif
void fp_sqr_comba_small(fp_int *A, fp_int *B);
#ifdef TFM_HUGE
void fp_sqr_comba32(fp_int *A, fp_int *B);
void fp_sqr_comba48(fp_int *A, fp_int *B);
void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif
extern const char *fp_s_rmap;
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

29
tfm.idx Normal file
View File

@ -0,0 +1,29 @@
\indexentry{fp\_init|hyperpage}{6}
\indexentry{fp\_set|hyperpage}{6}
\indexentry{fp\_init\_copy|hyperpage}{6}
\indexentry{fp\_iszero|hyperpage}{7}
\indexentry{fp\_iseven|hyperpage}{7}
\indexentry{fp\_isodd|hyperpage}{7}
\indexentry{fp\_neg|hyperpage}{7}
\indexentry{fp\_abs|hyperpage}{7}
\indexentry{fp\_cmp|hyperpage}{8}
\indexentry{fp\_cmp\_mag|hyperpage}{8}
\indexentry{fp\_lshd|hyperpage}{8}
\indexentry{fp\_rshd|hyperpage}{8}
\indexentry{fp\_div\_2d|hyperpage}{8}
\indexentry{fp\_mod\_2d|hyperpage}{8}
\indexentry{fp\_mul\_2d|hyperpage}{8}
\indexentry{fp\_div\_2|hyperpage}{8}
\indexentry{fp\_mul\_2|hyperpage}{8}
\indexentry{fp\_cnt\_lsb|hyperpage}{8}
\indexentry{fp\_add|hyperpage}{9}
\indexentry{fp\_sub|hyperpage}{9}
\indexentry{fp\_mul|hyperpage}{9}
\indexentry{fp\_sqr|hyperpage}{9}
\indexentry{fp\_div|hyperpage}{9}
\indexentry{fp\_mod|hyperpage}{9}
\indexentry{fp\_exptmod|hyperpage}{9}
\indexentry{fp\_invmod|hyperpage}{9}
\indexentry{fp\_gcd|hyperpage}{9}
\indexentry{fp\_lcm|hyperpage}{9}
\indexentry{fp\_isprime|hyperpage}{10}

6
tfm.ilg Normal file
View File

@ -0,0 +1,6 @@
This is makeindex, version 2.14 [02-Oct-2002] (kpathsea + Thai support).
Scanning input file tfm.idx....done (29 entries accepted, 0 rejected).
Sorting entries....done (137 comparisons).
Generating output file tfm.ind....done (33 lines written, 0 warnings).
Output written in tfm.ind.
Transcript written in tfm.ilg.

33
tfm.ind Normal file
View File

@ -0,0 +1,33 @@
\begin{theindex}
\item fp\_abs, \hyperpage{7}
\item fp\_add, \hyperpage{9}
\item fp\_cmp, \hyperpage{8}
\item fp\_cmp\_mag, \hyperpage{8}
\item fp\_cnt\_lsb, \hyperpage{8}
\item fp\_div, \hyperpage{9}
\item fp\_div\_2, \hyperpage{8}
\item fp\_div\_2d, \hyperpage{8}
\item fp\_exptmod, \hyperpage{9}
\item fp\_gcd, \hyperpage{9}
\item fp\_init, \hyperpage{6}
\item fp\_init\_copy, \hyperpage{6}
\item fp\_invmod, \hyperpage{9}
\item fp\_iseven, \hyperpage{7}
\item fp\_isodd, \hyperpage{7}
\item fp\_isprime, \hyperpage{10}
\item fp\_iszero, \hyperpage{7}
\item fp\_lcm, \hyperpage{9}
\item fp\_lshd, \hyperpage{8}
\item fp\_mod, \hyperpage{9}
\item fp\_mod\_2d, \hyperpage{8}
\item fp\_mul, \hyperpage{9}
\item fp\_mul\_2, \hyperpage{8}
\item fp\_mul\_2d, \hyperpage{8}
\item fp\_neg, \hyperpage{7}
\item fp\_rshd, \hyperpage{8}
\item fp\_set, \hyperpage{6}
\item fp\_sqr, \hyperpage{9}
\item fp\_sub, \hyperpage{9}
\end{theindex}

5
tfm.lof Normal file
View File

@ -0,0 +1,5 @@
\addvspace {10\p@ }
\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}
\addvspace {10\p@ }
\addvspace {10\p@ }
\addvspace {10\p@ }

332
tfm.log Normal file
View File

@ -0,0 +1,332 @@
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 23 JUL 2005 07:42
entering extended mode
**tfm
(./tfm.tex
LaTeX2e <2003/12/01>
Babel <v3.8d> and hyphenation patterns for american, french, german, ngerman, b
ahasa, basque, bulgarian, catalan, croatian, czech, danish, dutch, esperanto, e
stonian, finnish, greek, icelandic, irish, italian, latin, magyar, norsk, polis
h, portuges, romanian, russian, serbian, slovak, slovene, spanish, swedish, tur
kish, ukrainian, nohyphenation, loaded.
(/usr/share/texmf/tex/latex/base/book.cls
Document Class: book 2004/02/16 v1.4f Standard LaTeX document class
(/usr/share/texmf/tex/latex/base/bk10.clo
File: bk10.clo 2004/02/16 v1.4f Standard LaTeX file (size option)
)
\c@part=\count79
\c@chapter=\count80
\c@section=\count81
\c@subsection=\count82
\c@subsubsection=\count83
\c@paragraph=\count84
\c@subparagraph=\count85
\c@figure=\count86
\c@table=\count87
\abovecaptionskip=\skip41
\belowcaptionskip=\skip42
\bibindent=\dimen102
)
(/usr/share/texmf/tex/latex/hyperref/hyperref.sty
Package: hyperref 2003/11/30 v6.74m Hypertext links for LaTeX
(/usr/share/texmf/tex/latex/graphics/keyval.sty
Package: keyval 1999/03/16 v1.13 key=value parser (DPC)
\KV@toks@=\toks14
)
\@linkdim=\dimen103
\Hy@linkcounter=\count88
\Hy@pagecounter=\count89
(/usr/share/texmf/tex/latex/hyperref/pd1enc.def
File: pd1enc.def 2003/11/30 v6.74m Hyperref: PDFDocEncoding definition (HO)
)
(/usr/share/texmf/tex/latex/hyperref/hyperref.cfg
File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive and teTeX
)
Package hyperref Info: Hyper figures OFF on input line 1880.
Package hyperref Info: Link nesting OFF on input line 1885.
Package hyperref Info: Hyper index ON on input line 1888.
Package hyperref Info: Plain pages ON on input line 1893.
Package hyperref Info: Backreferencing OFF on input line 1900.
Implicit mode ON; LaTeX internals redefined
Package hyperref Info: Bookmarks ON on input line 2004.
(/usr/share/texmf/tex/latex/html/url.sty
Package: url 1999/03/02 ver 1.4 Verb mode for urls, email addresses, and file
names
)
LaTeX Info: Redefining \url on input line 2143.
\Fld@menulength=\count90
\Field@Width=\dimen104
\Fld@charsize=\dimen105
\Choice@toks=\toks15
\Field@toks=\toks16
Package hyperref Info: Hyper figures OFF on input line 2618.
Package hyperref Info: Link nesting OFF on input line 2623.
Package hyperref Info: Hyper index ON on input line 2626.
Package hyperref Info: backreferencing OFF on input line 2633.
Package hyperref Info: Link coloring OFF on input line 2638.
\c@Item=\count91
\c@Hfootnote=\count92
)
*hyperref using default driver hypertex*
(/usr/share/texmf/tex/latex/hyperref/hypertex.def
File: hypertex.def 2003/11/30 v6.74m Hyperref driver for HyperTeX specials
)
(/usr/share/texmf/tex/latex/base/makeidx.sty
Package: makeidx 2000/03/29 v1.0m Standard LaTeX package
)
(/usr/share/texmf/tex/latex/amsfonts/amssymb.sty
Package: amssymb 2002/01/22 v2.2d
(/usr/share/texmf/tex/latex/amsfonts/amsfonts.sty
Package: amsfonts 2001/10/25 v2.2f
\@emptytoks=\toks17
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
(Font) U/euf/m/n --> U/euf/b/n on input line 132.
))
(/usr/share/texmf/tex/latex/graphics/color.sty
Package: color 1999/02/16 v1.0i Standard LaTeX Color (DPC)
(/usr/share/texmf/tex/latex/graphics/color.cfg
File: color.cfg 2005/02/03 v1.3 color configuration of teTeX/TeXLive
)
Package color Info: Driver file: dvips.def on input line 125.
(/usr/share/texmf/tex/latex/graphics/dvips.def
File: dvips.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR)
)
(/usr/share/texmf/tex/latex/graphics/dvipsnam.def
File: dvipsnam.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR)
))
(/usr/share/texmf/tex/latex/base/alltt.sty
Package: alltt 1997/06/16 v2.0g defines alltt environment
)
(/usr/share/texmf/tex/latex/graphics/graphicx.sty
Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
(/usr/share/texmf/tex/latex/graphics/graphics.sty
Package: graphics 2001/07/07 v1.0n Standard LaTeX Graphics (DPC,SPQR)
(/usr/share/texmf/tex/latex/graphics/trig.sty
Package: trig 1999/03/16 v1.09 sin cos tan (DPC)
)
(/usr/share/texmf/tex/latex/graphics/graphics.cfg
File: graphics.cfg 2005/02/03 v1.3 graphics configuration of teTeX/TeXLive
)
Package graphics Info: Driver file: dvips.def on input line 80.
)
\Gin@req@height=\dimen106
\Gin@req@width=\dimen107
)
(/usr/share/texmf/tex/latex/tools/layout.sty
Package: layout 2000/09/25 v1.2c Show layout parameters
\oneinch=\count93
\cnt@paperwidth=\count94
\cnt@paperheight=\count95
\cnt@hoffset=\count96
\cnt@voffset=\count97
\cnt@textheight=\count98
\cnt@textwidth=\count99
\cnt@topmargin=\count100
\cnt@oddsidemargin=\count101
\cnt@evensidemargin=\count102
\cnt@headheight=\count103
\cnt@headsep=\count104
\cnt@marginparsep=\count105
\cnt@marginparwidth=\count106
\cnt@marginparpush=\count107
\cnt@footskip=\count108
\fheight=\count109
\ref@top=\count110
\ref@hoffset=\count111
\ref@voffset=\count112
\ref@head=\count113
\ref@body=\count114
\ref@foot=\count115
\ref@margin=\count116
\ref@marginwidth=\count117
\ref@marginpar=\count118
\Interval=\count119
\ExtraYPos=\count120
\PositionX=\count121
\PositionY=\count122
\ArrowLength=\count123
)
\@indexfile=\write3
\openout3 = `tfm.idx'.
Writing index file tfm.idx
(./tfm.aux)
\openout1 = `tfm.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 49.
LaTeX Font Info: ... okay on input line 49.
Package hyperref Info: Link coloring OFF on input line 49.
(/usr/share/texmf/tex/latex/hyperref/nameref.sty
Package: nameref 2003/12/03 v2.21 Cross-referencing by name of section
\c@section@level=\count124
)
LaTeX Info: Redefining \ref on input line 49.
LaTeX Info: Redefining \pageref on input line 49.
LaTeX Font Info: Try loading font information for U+msa on input line 55.
(/usr/share/texmf/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2002/01/19 v2.2g AMS font definitions
)
LaTeX Font Info: Try loading font information for U+msb on input line 55.
(/usr/share/texmf/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2002/01/19 v2.2g AMS font definitions
) [1
] [2] (./tfm.toc [3
])
\tf@toc=\write4
\openout4 = `tfm.toc'.
[4]
(./tfm.lof)
\tf@lof=\write5
\openout5 = `tfm.lof'.
[5
] [6
]
Chapter 1.
[1
] [2] [3] [4]
Chapter 2.
Underfull \vbox (badness 7649) has occurred while \output is active []
[5
]
[6]
Chapter 3.
[7
] [8] [9] [10]
Chapter 4.
[11
] [12] [13]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
[]\OT1/cmtt/m/n/10 #define SQRADDSC(i, j)
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
[] \OT1/cmtt/m/n/10 do { fp_word t;
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
[] \OT1/cmtt/m/n/10 t = ((fp_word)i) * ((fp_word)j);
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
[] \OT1/cmtt/m/n/10 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;
\[]
[]
Overfull \hbox (25.129pt too wide) in paragraph at lines 548--549
\OT1/cmr/m/n/10 This com-putes a prod-uct and stores it in the ``sec-ondary'' c
arry reg-is-ters $[]$.
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
[]\OT1/cmtt/m/n/10 #define SQRADDAC(i, j)
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
[] \OT1/cmtt/m/n/10 do { fp_word t;
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
[] \OT1/cmtt/m/n/10 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t;
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
[] \OT1/cmtt/m/n/10 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t
>> DIGIT_BIT; \[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
[]\OT1/cmtt/m/n/10 #define SQRADDDB
\[]
[]
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
[] \OT1/cmtt/m/n/10 do { fp_word t;
\[]
[]
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;
\[]
[]
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BI
T); c1 = t; \[]
[]
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
[] \OT1/cmtt/m/n/10 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_B
IT); \[]
[]
[14] [15] (./tfm.ind [16] [17
]) (./tfm.aux) )
Here is how much of TeX's memory you used:
2712 strings out of 49501
35892 string characters out of 426789
81342 words of memory out of 1100000
5856 multiletter control sequences out of 10000+15000
15453 words of font info for 59 fonts, out of 400000 for 2000
580 hyphenation exceptions out of 1000
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
Output written on tfm.dvi (23 pages, 49708 bytes).

48
tfm.tex
View File

@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
\title{TomsFastMath User Manual \\ v0.03}
\title{TomsFastMath User Manual \\ v0.04}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
This text and library are all hereby placed in the public domain. This book has been formatted for B5
@ -143,6 +143,10 @@ TFM\_X86 and TFM\_SSE2 at the same time. This mode only works with 32--bit dig
mode fp\_digit is 32--bits and fp\_word is 64--bits. While this mode will work on the AMD Athlon64
series of processors it is less efficient than the native ``x86--64'' mode and not recommended.
There is an additional ``TFM\_PRESCOTT'' flag that you can define for P4 Prescott processors. This causes
the mul/sqr functions to use x86\_32 and the montgomery reduction to use SSE2 which is (so far) the fastest
combination. If you are using an older (e.g. Northwood) generation P4 don't define this.
\subsubsection{x86--64} The ``x86--64'' mode is defined by ``TFM\_X86\_64'' and requires a
``x86--64'' capable processor (Athlon64 and future Pentium processors). It requires GCC to
build and only works with 64--bit digits. Note that by enabling this mode it will automatically
@ -150,12 +154,16 @@ enable 64--bit digits. In this mode fp\_digit is 64--bits and fp\_word is 128--
be autodetected when building with GCC to an ``x86--64'' target. You can override this behaviour by defining
TFM\_NO\_ASM.
\subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 or higher
processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and
\subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 with the M instructions (enhanced
multipliers) or higher processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and
fp\_word is 64--bits.
\subsubsection{PPC32} The ``PPC32'' mode is defined by ``TFM\_PPC32'' and requires a standard PPC processor. It doesn't
use altivec or other extensions so it should work on all compliant implementations of PPC. It requires GCC and works
with 32--bit digits. In this mode fp\_digit is 32--bits and fp\_word is 64--bits.
\subsubsection{Future Releases} Future releases will support additional platform optimizations.
Developers of MIPS and PPC platforms are encouraged to submit GCC asm inline patches
Developers of MIPS and SPARC platforms are encouraged to submit GCC asm inline patches
(see chapter \ref{chap:asmops} for more information).
\begin{figure}[here]
@ -165,8 +173,10 @@ Developers of MIPS and PPC platforms are encouraged to submit GCC asm inline pat
\hline \textbf{Processor} & \textbf{Recommended Mode} \\
\hline All 32--bit x86 platforms & TFM\_X86 \\
\hline Pentium 4 & TFM\_SSE2 \\
\hline Pentium 4 Prescott & TFM\_SSE2 + TFM\_PRESCOTT \\
\hline Athlon64 & TFM\_X86\_64 \\
\hline ARMv4 or higher & TFM\_ARM \\
\hline ARMv4 or higher with M & TFM\_ARM \\
\hline G3/G4 (32-bit PPC) & TFM\_PPC32 \\
\hline &\\
\hline x86--32 or x86--64 (with GCC) & Leave blank and let autodetect work \\
\hline
@ -589,26 +599,26 @@ This computes the $\mu$ value for the inner loop. You can safely alias $mu$ and
a register if you want.
\begin{verbatim}
#define INNERMUL \
t = ((fp_word)mu) * ((fp_word)*tmpm++); \
_c[OFF0] += t; \
if (_c[OFF0] < (fp_digit)t) ++_c[OFF1]; \
_c[OFF1] += (t>>DIGIT_BIT); \
if (_c[OFF1] < (fp_digit)(t>>DIGIT_BIT)) ++_c[OFF2];
#define INNERMUL \
do { fp_word t; \
_c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
(((fp_word)mu) * ((fp_word)*tmpm++)); \
cy = (t >> DIGIT_BIT); \
} while (0)
\end{verbatim}
This computes the inner product and adds it to the correct set of carry variables. The variable
$\_c$ is a pointer alias to $c[x+y]$ and used to simplify the code.
This computes the inner product and adds it to the destination and carry variable $cy$.
This uses the $mu$ value computed above (can be in a register already) and the
$cy$ which is a chaining carry. Inside the INNERMUL loop the $cy$ value can be kept
inside a register (hint: it always starts as $cy = 0$ in the first iteration).
You can safely alias $\_c$ to a register for INNERMUL by setting it equal to ``c + x''
\footnote{Where ``c'' is an array on the stack.} by modifying LOOP\_START.
Upon completion of the inner loop the macro LOOP\_END is called which is used to fetch
$cy$ into the variable the C program can see. This is where, if you cached $cy$ in a
register you would copy it to the locally accessible C variable.
\begin{verbatim}
#define PROPCARRY \
_c[OFF0+1] += _c[OFF1]; \
if (_c[OFF0+1] < _c[OFF1]) ++_c[OFF1+1]; \
_c[OFF1+1] += _c[OFF2]; \
if (_c[OFF1+1] < _c[OFF2]) ++_c[OFF2+1];
do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
\end{verbatim}
This propagates the carry upwards by one digit.

33
tfm.toc Normal file
View File

@ -0,0 +1,33 @@
\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}
\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}
\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}
\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}
\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}
\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}
\contentsline {subsubsection}{x86--32}{3}{section*.3}
\contentsline {subsubsection}{SSE2}{3}{section*.4}
\contentsline {subsubsection}{x86--64}{3}{section*.5}
\contentsline {subsubsection}{ARM}{3}{section*.6}
\contentsline {subsubsection}{PPC32}{3}{section*.7}
\contentsline {subsubsection}{Future Releases}{4}{section*.8}
\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}
\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}
\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}
\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}
\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}
\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}
\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}
\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}
\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}
\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}
\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}
\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}
\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}
\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}
\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}
\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}
\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}
\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}
\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}
\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}
\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}