forked from ibphoenix/tomsfastmath
added tomsfastmath-0.04
This commit is contained in:
parent
ca551d4c5e
commit
f91cf2d1cf
20
TODO
20
TODO
@ -1,20 +0,0 @@
|
||||
---
|
||||
0. IMPORTANT... why are you doubling the "even" terms individually? STUPID!
|
||||
- make it so you have four new macros that use an additional 3 carry variables
|
||||
- SQRADDSC - store first mult [ simple store, no carry ]
|
||||
- SQRADDAC - add subsequent mults [ 3n word add ]
|
||||
- SQRADDDB - double the carry [ 3n word add ]
|
||||
- SQRADDFC - forward the doubles into the main [ 3n word add, note, x86_32 may need "g" instead of "r" ]
|
||||
- only use the four macro pattern for rows with >= 3 "doubles"
|
||||
- otherwise use the existing SQRADD
|
||||
|
||||
|
||||
1. Write more documentation ;-)
|
||||
2. Ports to PPC and MIPS
|
||||
3. Fix any lingering bugs, add additional requested functionality.
|
||||
4. Unrolled copies of montgomery will speed it up a bit
|
||||
5.
|
||||
|
||||
|
||||
NOTE: The library is still fairly new. I've tested it quite a bit but that doesn't mean surprises
|
||||
can't happen. Please test the results you get for correctness.
|
22
changes.txt
22
changes.txt
@ -1,7 +1,25 @@
|
||||
0.04 -- Fixed bugs in the SSE2 squaring code
|
||||
-- Rewrote the multipliers to be optimized for small inputs
|
||||
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction
|
||||
code. It brings the performance for small numbers on the AMD64 and all numbers on the P4
|
||||
to a new level. Thanks!
|
||||
-- Added missing ARM support for fp_montgomery_reduce.c that the NSS folk left off, Officially
|
||||
the ARM code is for v4 and above WITH the "M" multiplier support (e.g. umlal instruction)
|
||||
-- Added PPC32 support, define TFM_PPC32 to enable it, I used the "PowerPC 6xx" instruction
|
||||
databook for reference. Does not require altivec. Should be fairly portable to the other
|
||||
32-bit PPCs provided they have mullw and mulhwu instructions.
|
||||
[Note: porting the macros to PPC64 should be trivial, anyone with a shell to lend... email me!]
|
||||
-- Rewrote the config a bit in tfm.h so you can better choose which set of "oh my god that's huge" code to
|
||||
enable for your task. "generic" functions are ALWAYS included which are smaller but will cover the
|
||||
gaps in the coverage for ya.
|
||||
-- The PPC32 code has been verified to function on a Darwin box running GCC 2.95.2
|
||||
[Thanks to the folk at PeerSec for lending me a shell to use]
|
||||
-- Fixed a bug in fp_exptmod() where if the exponent was negative AND the destination the output
|
||||
would have the sign set to FP_NEG.
|
||||
|
||||
March 1st, 2005
|
||||
0.03 -- Optimized squaring
|
||||
--
|
||||
|
||||
-- Applied new license header to all files (still PD)
|
||||
|
||||
September 18th, 2004
|
||||
0.02 -- Added TFM_LARGE to turn on/off 16x combas to save even more space.
|
||||
|
81
comba_mont_gen.c
Normal file
81
comba_mont_gen.c
Normal file
@ -0,0 +1,81 @@
|
||||
/* generate montgomery reductions for m->used = 1...16 */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int N;
|
||||
|
||||
for (N = 1; N <= 16; N++) {
|
||||
|
||||
printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
|
||||
printf(
|
||||
"{\n"
|
||||
" fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
|
||||
" int oldused, x, y;\n"
|
||||
"\n"
|
||||
" /* now zero the buff */\n"
|
||||
" memset(c, 0, sizeof(c));\n"
|
||||
"\n"
|
||||
" /* copy the input */\n"
|
||||
" oldused = a->used;\n"
|
||||
" for (x = 0; x < oldused; x++) {\n"
|
||||
" c[x] = a->dp[x];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" MONT_START;\n"
|
||||
"\n"
|
||||
" /* now let's get bizz-sy! */\n"
|
||||
" for (x = 0; x < %d; x++) {\n"
|
||||
" /* get Mu for this round */\n"
|
||||
" LOOP_START;\n"
|
||||
"\n"
|
||||
" /* our friendly neighbourhood alias */\n"
|
||||
" _c = c + x;\n"
|
||||
" tmpm = m->dp;\n"
|
||||
"\n"
|
||||
" for (y = 0; y < %d; y++) {\n"
|
||||
" INNERMUL;\n"
|
||||
" ++_c;\n"
|
||||
" }\n"
|
||||
" /* send carry up man... */\n"
|
||||
" _c = c + x;\n"
|
||||
" PROPCARRY;\n"
|
||||
" } \n"
|
||||
"\n"
|
||||
" /* fix the rest of the carries */\n"
|
||||
" _c = c + %d;\n"
|
||||
" for (x = %d; x < %d * 2 + 2; x++) {\n"
|
||||
" PROPCARRY;\n"
|
||||
" ++_c;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" /* now copy out */\n"
|
||||
" _c = c + %d;\n"
|
||||
" tmpm = a->dp;\n"
|
||||
" for (x = 0; x < %d+1; x++) {\n"
|
||||
" *tmpm++ = *_c++;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" for (; x < oldused; x++) {\n"
|
||||
" *tmpm++ = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" MONT_FINI;\n"
|
||||
"\n"
|
||||
" a->used = %d+1;\n"
|
||||
" fp_clamp(a);\n"
|
||||
"\n"
|
||||
" /* if A >= m then A = A - m */\n"
|
||||
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
|
||||
" s_fp_sub (a, m, a);\n"
|
||||
" }\n"
|
||||
"}\n", N,N,N,N,N,N,N,N);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -1,3 +1,13 @@
|
||||
/* TomsFastMath, a fast ISO C bignum library.
|
||||
*
|
||||
* This project is meant to fill in where LibTomMath
|
||||
* falls short. That is speed ;-)
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
|
||||
/* program emits a NxN comba multiplier */
|
||||
#include <stdio.h>
|
||||
|
||||
@ -47,3 +57,7 @@ printf(
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
61
comba_mult_smallgen.c
Normal file
61
comba_mult_smallgen.c
Normal file
@ -0,0 +1,61 @@
|
||||
/* program emits a NxN comba multiplier for 1x1 to 16x16 */
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int N, x, y, z;
|
||||
|
||||
/* print out preamble */
|
||||
printf(
|
||||
"void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)\n"
|
||||
"{\n"
|
||||
" fp_digit c0, c1, c2, at[32];\n"
|
||||
" switch (MAX(A->used, B->used)) { \n"
|
||||
);
|
||||
|
||||
for (N = 1; N <= 16; N++) {
|
||||
|
||||
printf(
|
||||
"\n"
|
||||
" case %d:\n"
|
||||
" memcpy(at, A->dp, %d * sizeof(fp_digit));\n"
|
||||
" memcpy(at+%d, B->dp, %d * sizeof(fp_digit));\n"
|
||||
" COMBA_START;\n"
|
||||
"\n"
|
||||
" COMBA_CLEAR;\n", N, N, N, N);
|
||||
|
||||
/* now do the rows */
|
||||
for (x = 0; x < (N+N-1); x++) {
|
||||
printf(
|
||||
" /* %d */\n", x);
|
||||
if (x > 0) {
|
||||
printf(
|
||||
" COMBA_FORWARD;\n");
|
||||
}
|
||||
for (y = 0; y < N; y++) {
|
||||
for (z = 0; z < N; z++) {
|
||||
if ((y+z)==x) {
|
||||
printf(" MULADD(at[%d], at[%d]); ", y, z+N);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(
|
||||
"\n"
|
||||
" COMBA_STORE(C->dp[%d]);\n", x);
|
||||
}
|
||||
printf(
|
||||
" COMBA_STORE2(C->dp[%d]);\n"
|
||||
" C->used = %d;\n"
|
||||
" C->sign = A->sign ^ B->sign;\n"
|
||||
" fp_clamp(C);\n"
|
||||
" COMBA_FINI;\n"
|
||||
" break;\n", N+N-1, N+N);
|
||||
}
|
||||
printf(" }\n}\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
@ -1,3 +1,13 @@
|
||||
/* TomsFastMath, a fast ISO C bignum library.
|
||||
*
|
||||
* This project is meant to fill in where LibTomMath
|
||||
* falls short. That is speed ;-)
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
|
||||
/* Generates squaring comba code... it learns it knows our secrets! */
|
||||
#include <stdio.h>
|
||||
|
||||
@ -90,3 +100,7 @@ if (N >= 16) printf("#endif\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
109
comba_sqr_smallgen.c
Normal file
109
comba_sqr_smallgen.c
Normal file
@ -0,0 +1,109 @@
|
||||
/* TomsFastMath, a fast ISO C bignum library.
|
||||
*
|
||||
* This project is meant to fill in where LibTomMath
|
||||
* falls short. That is speed ;-)
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
|
||||
/* Generates squaring comba code... it learns it knows our secrets! */
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int x, y, z, N, f;
|
||||
|
||||
printf(
|
||||
"void fp_sqr_comba_small(fp_int *A, fp_int *B)\n"
|
||||
"{\n"
|
||||
" fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;\n"
|
||||
);
|
||||
|
||||
printf(" switch (A->used) { \n");
|
||||
|
||||
for (N = 1; N <= 16; N++) {
|
||||
printf(
|
||||
" case %d:\n"
|
||||
" a = A->dp;\n"
|
||||
" COMBA_START; \n"
|
||||
"\n"
|
||||
" /* clear carries */\n"
|
||||
" CLEAR_CARRY;\n"
|
||||
"\n"
|
||||
" /* output 0 */\n"
|
||||
" SQRADD(a[0],a[0]);\n"
|
||||
" COMBA_STORE(b[0]);\n", N);
|
||||
|
||||
for (x = 1; x < N+N-1; x++) {
|
||||
printf(
|
||||
"\n /* output %d */\n"
|
||||
" CARRY_FORWARD;\n ", x);
|
||||
|
||||
for (f = y = 0; y < N; y++) {
|
||||
for (z = 0; z < N; z++) {
|
||||
if (z != y && z + y == x && y <= z) {
|
||||
++f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (f <= 2) {
|
||||
for (y = 0; y < N; y++) {
|
||||
for (z = 0; z < N; z++) {
|
||||
if (y<=z && (y+z)==x) {
|
||||
if (y == z) {
|
||||
printf(" SQRADD(a[%d], a[%d]); ", y, y);
|
||||
} else {
|
||||
printf(" SQRADD2(a[%d], a[%d]); ", y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// new method
|
||||
/* do evens first */
|
||||
f = 0;
|
||||
for (y = 0; y < N; y++) {
|
||||
for (z = 0; z < N; z++) {
|
||||
if (z != y && z + y == x && y <= z) {
|
||||
if (f == 0) {
|
||||
// first double
|
||||
printf("SQRADDSC(a[%d], a[%d]); ", y, z);
|
||||
f = 1;
|
||||
} else {
|
||||
printf("SQRADDAC(a[%d], a[%d]); ", y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// forward the carry
|
||||
printf("SQRADDDB; ");
|
||||
if ((x&1) == 0) {
|
||||
// add the square
|
||||
printf("SQRADD(a[%d], a[%d]); ", x/2, x/2);
|
||||
}
|
||||
}
|
||||
printf("\n COMBA_STORE(b[%d]);\n", x);
|
||||
}
|
||||
printf(" COMBA_STORE2(b[%d]);\n", N+N-1);
|
||||
|
||||
printf(
|
||||
" COMBA_FINI;\n"
|
||||
"\n"
|
||||
" B->used = %d;\n"
|
||||
" B->sign = FP_ZPOS;\n"
|
||||
" memcpy(B->dp, b, %d * sizeof(fp_digit));\n"
|
||||
" fp_clamp(B);\n"
|
||||
" break;\n\n", N+N, N+N);
|
||||
}
|
||||
|
||||
printf("}\n\n}\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
24
delme.c
24
delme.c
@ -1,24 +0,0 @@
|
||||
#include "tfm.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
fp_int a;
|
||||
char buf[4096];
|
||||
|
||||
fp_init(&a);
|
||||
fp_read_radix( &a,
|
||||
"///////////93zgY8MZ2DCJ6Oek0t1pHAG9E28fdp7G22xwcEnER8b5A27cED0JT"
|
||||
"xvKPiyqwGnimAmfjybyKDq/XDMrjKS95v8MrTc9UViRqJ4BffZVjQml/NBRq1hVj"
|
||||
"xZXh+rg9dwMkdoGHV4iVvaaePb7iv5izmW1ykA5ZlmMOsaWs75NJccaMFwZz9CzV"
|
||||
"WsLT8zoZhPOSOlDM88LIkvxLAGTmbfPjPmmrJagyc0JnT6m8oXWXV3AGNaOkDiux"
|
||||
"uvvtB1WEXWER9uEYx0UYZxN5NV1lJ5B9tYlBzfLO5nWvbKbywfLgvHNI9XYO+WKG"
|
||||
"5NAEMeggn2sjCnSD151wCwXL8QlV7BfaxFk515ZRxmgAwd5NNGOCVREN3uMcuUJ7"
|
||||
"g/MkZDi9CzSUZ9JWIYLXdSxZqYOQqkvhyI/w1jcA26JOTW9pFiXgP58VAnWNUo0C"
|
||||
"k+4NLtfXNMnt2OZ0kjb6uWZYJw1qvQinGzjR/E3z48vBWj4WgJhIol//////////",
|
||||
64 );
|
||||
|
||||
if( fp_isprime( &a ) ) printf("It's prime.\n");
|
||||
else printf( "Not prime.\n");
|
||||
|
||||
return 0;
|
||||
}
|
83
demo/rsa.c
Normal file
83
demo/rsa.c
Normal file
@ -0,0 +1,83 @@
|
||||
#include "tfm.h"
|
||||
#include <time.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
fp_int d, e, n, c, m, e_m;
|
||||
clock_t t1;
|
||||
int x;
|
||||
|
||||
/* read in the parameters */
|
||||
fp_read_radix(&n, "ce032e860a9809a5ec31e4b0fd4b546f8c40043e3d2ec3d8f49d8f2f3dd19e887094ee1af75caa1c2e6cd9ec78bf1dfd6280002ac8c30ecd72da2e4c59a28a9248048aaae2a8fa627f71bece979cebf9f8eee2bd594d4a4f2e791647573c7ec1fcbd320d3825be3fa8a17c97086fdae56f7086ce512b81cc2fe44161270ec5e9", 16);
|
||||
fp_read_radix(&e, "10001", 16);
|
||||
fp_read_radix(&m, "39f5a911250f45b99390e2df322b33c729099ab52b5879d06b00818cce57c649a66ed7eb6d8ae214d11caf9c81e83a7368cf0edb2b71dad791f13fecf546123b40377851e67835ade1d6be57f4de18a62db4cdb1880f4ab2e6a29acfd85ca22a13dc1f6fee2621ef0fc8689cd738e6f065c033ec7c148d8d348688af83d6f6bd", 16);
|
||||
fp_read_radix(&c, "9ff70ea6968a04530e6b06bf01aa937209cc8450e76ac19477743de996ba3fb445923c947f8d0add8c57efa51d15485309918459da6c1e5a97f215193b797dce98db51bdb4639c2ecfa90ebb051e3a2daeffd27a7d6e62043703a7b15e0ada5170427b63099cd01ef52cd92d8723e5774bea32716aaa7f5adbae817fb12a5b50", 16);
|
||||
|
||||
/* test it */
|
||||
fp_exptmod(&m, &e, &n, &e_m);
|
||||
if (fp_cmp(&e_m, &c)) {
|
||||
char buf[1024];
|
||||
printf("Encrypted text not equal\n");
|
||||
fp_toradix(&e_m, buf, 16);
|
||||
printf("e_m == %s\n", buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("CLOCKS_PER_SEC = %llu\n", (unsigned long long)CLOCKS_PER_SEC);
|
||||
t1 = clock();
|
||||
for (x = 0; x < 1000; x++) {
|
||||
fp_exptmod(&m, &e, &n, &e_m);
|
||||
}
|
||||
t1 = clock() - t1;
|
||||
printf("1000 RSA operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
|
||||
printf("RSA encrypt/sec %10.5g\n", (double)CLOCKS_PER_SEC / ((double)t1 / 1000.0) );
|
||||
|
||||
/* read in the parameters */
|
||||
fp_read_radix(&n, "a7f30e2e04d31acc6936916af1e404a4007adfb9e97864de28d1c7ba3034633bee2cd9d5da3ea3cdcdc9a6f3daf5702ef750f4c3aadb0e27410ac04532176795995148cdb4691bd09a8a846e3e24e073ce2f89b34dfeb2ee89b646923ca60ee3f73c4d5397478380425e7260f75dfdc54826e160395b0889b1162cf115a9773f", 16);
|
||||
fp_read_radix(&d, "16d166f3c9a404d810d3611e6e8ed43293fe1db75c8906eb4810785a4b82529929dade1db7f11ac0335d5a59773e3167b022479eedefa514a0399db5c900750a56323cf9f5b0f21e7d60a46d75f3fcaabf30a63cbe34048b741a57ac36a13914afda798709dea5771f8d456cf72ec5f3afc1d88d023de40311143a36e7028739", 16);
|
||||
fp_read_radix(&c, "7d216641c32543f5b8428bdd0b11d819cfbdb16f1df285247f677aa4d44de62ab064f4a0d060ec99cb94aa398113a4317f2c550d0371140b0fd2c88886cac771812e72faad4b7adf495b9b850b142ccd7f45c0a27f164c8c7731731c0015f69d0241812e769d961054618aeb9e8e8989dba95714a2cf56c9e525c5e34b5812dd", 16);
|
||||
fp_read_radix(&m, "5f323bf0b394b98ffd78727dc9883bb4f42287def6b60fa2a964b2510bc55d61357bf5a6883d2982b268810f8fef116d3ae68ebb41fd10d65a0af4bec0530eb369f37c14b55c3be60223b582372fb6589b648d5a0c7252d1ae2dae5809785d993e9e5d0c4d9b0bcba0cde0d6671734747fba5483c735e1dab7df7b10ec6f62d8", 16);
|
||||
|
||||
/* test it */
|
||||
fp_exptmod(&c, &d, &n, &e_m);
|
||||
if (fp_cmp(&e_m, &m)) {
|
||||
char buf[1024];
|
||||
printf("Decrypted text not equal\n");
|
||||
fp_toradix(&e_m, buf, 16);
|
||||
printf("e_m == %s\n", buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
t1 = clock();
|
||||
for (x = 0; x < 100; x++) {
|
||||
fp_exptmod(&c, &d, &n, &e_m);
|
||||
}
|
||||
t1 = clock() - t1;
|
||||
printf("100 RSA operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
|
||||
printf("RSA decrypt/sec %10.5g\n", (double)CLOCKS_PER_SEC / ((double)t1 / 100.0) );
|
||||
|
||||
|
||||
/* test half size */
|
||||
fp_rshd(&n, n.used >> 1);
|
||||
fp_rshd(&d, d.used >> 1);
|
||||
fp_rshd(&c, c.used >> 1);
|
||||
printf("n.used == %4d bits\n", n.used * DIGIT_BIT);
|
||||
|
||||
/* ensure n is odd */
|
||||
n.dp[0] |= 1;
|
||||
t1 = clock();
|
||||
for (x = 0; x < 100; x++) {
|
||||
fp_exptmod(&c, &d, &n, &e_m);
|
||||
}
|
||||
t1 = clock() - t1;
|
||||
printf("100 RSA-half operations took %10.5g seconds\n", (double)t1 / (double)CLOCKS_PER_SEC);
|
||||
printf("RSA decrypt/sec %10.5g (estimate of RSA-1024-CRT) \n", (double)CLOCKS_PER_SEC / ((double)t1 / 50.0) );
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
@ -142,3 +142,7 @@ int main(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
39
demo/test.c
39
demo/test.c
@ -22,10 +22,18 @@ int myrng(unsigned char *dst, int len, void *dat)
|
||||
static ulong64 TIMFUNC (void)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#if defined(INTEL_CC)
|
||||
ulong64 a;
|
||||
asm ("rdtsc":"=A"(a));
|
||||
return a;
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
ulong64 a;
|
||||
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
|
||||
return a;
|
||||
#elif defined(TFM_PPC32)
|
||||
unsigned long a;
|
||||
__asm__ __volatile__ ("mftb %0":"=r"(a));
|
||||
return a;
|
||||
#else /* gcc-IA64 version */
|
||||
unsigned long result;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
|
||||
@ -135,9 +143,7 @@ int main(void)
|
||||
printf("Testing read_radix\n");
|
||||
fp_read_radix(&a, "123456789012345678901234567890", 16); draw(&a);
|
||||
|
||||
goto testing;
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
/* test mont */
|
||||
printf("Montgomery test #1\n");
|
||||
fp_set(&a, 0x1234567ULL);
|
||||
@ -197,7 +203,17 @@ goto testing;
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
||||
#ifdef TESTING
|
||||
goto testing;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
|
||||
t1 = TIMFUNC();
|
||||
sleep(1);
|
||||
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
|
||||
|
||||
goto expttime;
|
||||
/* do some timings... */
|
||||
printf("Addition:\n");
|
||||
for (t = 2; t <= FP_SIZE/2; t += 2) {
|
||||
@ -211,7 +227,7 @@ goto testing;
|
||||
a.used = t;
|
||||
b.used = t;
|
||||
t2 = -1;
|
||||
for (ix = 0; ix < 2500; ++ix) {
|
||||
for (ix = 0; ix < 25000; ++ix) {
|
||||
t1 = TIMFUNC();
|
||||
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
|
||||
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
|
||||
@ -222,6 +238,7 @@ goto testing;
|
||||
}
|
||||
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
|
||||
}
|
||||
multtime:
|
||||
printf("Multiplication:\n");
|
||||
for (t = 2; t <= FP_SIZE/2; t += 2) {
|
||||
fp_zero(&a);
|
||||
@ -263,8 +280,8 @@ sqrtime:
|
||||
}
|
||||
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
|
||||
}
|
||||
return;
|
||||
//#else
|
||||
monttime:
|
||||
printf("Montgomery:\n");
|
||||
for (t = 2; t <= (FP_SIZE/2)-2; t += 2) {
|
||||
fp_zero(&a);
|
||||
@ -295,7 +312,7 @@ return;
|
||||
expttime:
|
||||
printf("Exptmod:\n");
|
||||
|
||||
for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += t) {
|
||||
for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += 256/DIGIT_BIT) {
|
||||
fp_zero(&a);
|
||||
fp_zero(&b);
|
||||
fp_zero(&c);
|
||||
@ -309,7 +326,7 @@ expttime:
|
||||
c.used = t;
|
||||
|
||||
t2 = -1;
|
||||
for (ix = 0; ix < 256; ++ix) {
|
||||
for (ix = 0; ix < 500; ++ix) {
|
||||
t1 = TIMFUNC();
|
||||
fp_exptmod(&c, &b, &a, &d);
|
||||
fp_exptmod(&c, &b, &a, &d);
|
||||
@ -321,9 +338,9 @@ expttime:
|
||||
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
|
||||
}
|
||||
return;
|
||||
|
||||
#endif
|
||||
|
||||
return;
|
||||
testing:
|
||||
|
||||
div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
|
||||
@ -567,3 +584,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
BIN
doc/tfm.pdf
BIN
doc/tfm.pdf
Binary file not shown.
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -33,3 +33,7 @@ void fp_2expt(fp_int *a, int b)
|
||||
a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_add.c
6
fp_add.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -37,3 +37,7 @@ void fp_add(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -16,3 +16,7 @@ void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
|
||||
fp_set(&tmp, b);
|
||||
fp_add(a,&tmp,c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -17,3 +17,7 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
||||
fp_add(a, b, &tmp);
|
||||
return fp_mod(&tmp, c, d);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_cmp.c
6
fp_cmp.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -25,3 +25,7 @@ int fp_cmp(fp_int *a, fp_int *b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -32,3 +32,7 @@ int fp_cmp_d(fp_int *a, fp_digit b)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -29,3 +29,7 @@ int fp_cmp_mag(fp_int *a, fp_int *b)
|
||||
return FP_EQ;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -40,3 +40,7 @@ int fp_cnt_lsb(fp_int *a)
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -30,3 +30,7 @@ int fp_count_bits (fp_int * a)
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_div.c
6
fp_div.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -151,3 +151,7 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
||||
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -47,3 +47,7 @@ void fp_div_2(fp_int * a, fp_int * b)
|
||||
b->sign = a->sign;
|
||||
fp_clamp (b);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -73,3 +73,7 @@ void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
|
||||
fp_copy (&t, d);
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -87,3 +87,7 @@ int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
15
fp_exptmod.c
15
fp_exptmod.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -174,6 +174,13 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||
fp_int tmp;
|
||||
int err;
|
||||
|
||||
#ifdef TFM_CHECK
|
||||
/* prevent overflows */
|
||||
if (P->used > (FP_SIZE/2)) {
|
||||
return FP_VAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* is X negative? */
|
||||
if (X->sign == FP_NEG) {
|
||||
/* yes, copy G and invmod it */
|
||||
@ -183,10 +190,16 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
|
||||
}
|
||||
X->sign = FP_ZPOS;
|
||||
err = _fp_exptmod(&tmp, X, P, Y);
|
||||
if (X != Y) {
|
||||
X->sign = FP_NEG;
|
||||
}
|
||||
return err;
|
||||
} else {
|
||||
/* Positive exponent so just exptmod */
|
||||
return _fp_exptmod(G, X, P, Y);
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_gcd.c
6
fp_gcd.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -49,3 +49,7 @@ void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
fp_copy(&u, c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
16
fp_ident.c
16
fp_ident.c
@ -1,3 +1,12 @@
|
||||
/* TomsFastMath, a fast ISO C bignum library.
|
||||
*
|
||||
* This project is meant to fill in where LibTomMath
|
||||
* falls short. That is speed ;-)
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include "tfm.h"
|
||||
|
||||
const char *fp_ident(void)
|
||||
@ -39,9 +48,6 @@ const char *fp_ident(void)
|
||||
#ifdef FP_64BIT
|
||||
" FP_64BIT "
|
||||
#endif
|
||||
#ifdef TFM_LARGE
|
||||
" TFM_LARGE "
|
||||
#endif
|
||||
#ifdef TFM_HUGE
|
||||
" TFM_HUGE "
|
||||
#endif
|
||||
@ -64,3 +70,7 @@ int main(void)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -96,3 +96,7 @@ top:
|
||||
c->sign = neg;
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -72,3 +72,7 @@ int fp_isprime(fp_int *a)
|
||||
}
|
||||
return FP_YES;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_lcm.c
6
fp_lcm.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -25,3 +25,7 @@ void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
|
||||
fp_mul(a, &t2, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -32,3 +32,7 @@ void fp_lshd(fp_int *a, int x)
|
||||
/* clamp digits */
|
||||
fp_clamp(a);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_mod.c
6
fp_mod.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -28,3 +28,7 @@ int fp_mod(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -36,3 +36,7 @@ void fp_mod_2d(fp_int *a, int b, fp_int *c)
|
||||
c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
|
||||
fp_clamp (c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -14,3 +14,7 @@ int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
|
||||
{
|
||||
return fp_div_d(a, b, NULL, c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -36,3 +36,7 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,229 +5,352 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
/******************************************************************/
|
||||
#if defined(TFM_X86)
|
||||
|
||||
/* x86-32 code */
|
||||
|
||||
#define MONT_START
|
||||
|
||||
#define MONT_FINI
|
||||
|
||||
#define LOOP_END
|
||||
#define LOOP_START \
|
||||
mu = c[x] * mp;
|
||||
mu = c[x] * mp
|
||||
|
||||
#define INNERMUL \
|
||||
asm( \
|
||||
"movl %7,%%eax \n\t" \
|
||||
"mull %6 \n\t" \
|
||||
"movl %5,%%eax \n\t" \
|
||||
"mull %4 \n\t" \
|
||||
"addl %1,%%eax \n\t" \
|
||||
"adcl $0,%%edx \n\t" \
|
||||
"addl %%eax,%0 \n\t" \
|
||||
"adcl %%edx,%1 \n\t" \
|
||||
"adcl $0,%2 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"g"(mu), "g"(*tmpm++) \
|
||||
: "%eax", "%edx", "%cc");
|
||||
"adcl $0,%%edx \n\t" \
|
||||
"movl %%edx,%1 \n\t" \
|
||||
:"=g"(_c[LO]), "=r"(cy) \
|
||||
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
|
||||
: "%eax", "%edx", "%cc")
|
||||
|
||||
#define PROPCARRY \
|
||||
asm( \
|
||||
"movl %1,%%eax \n\t" \
|
||||
"addl %%eax,%6 \n\t" \
|
||||
"movl %2,%%eax \n\t" \
|
||||
"adcl %%eax,%7 \n\t" \
|
||||
"adcl $0,%8 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
|
||||
: "%eax", "%cc");
|
||||
"addl %1,%0 \n\t" \
|
||||
"setb %%al \n\t" \
|
||||
"movzbl %%al,%1 \n\t" \
|
||||
:"=g"(_c[LO]), "=r"(cy) \
|
||||
:"0"(_c[LO]), "1"(cy) \
|
||||
: "%eax", "%cc")
|
||||
|
||||
/******************************************************************/
|
||||
#elif defined(TFM_X86_64)
|
||||
/* x86-64 code */
|
||||
|
||||
#define MONT_START
|
||||
|
||||
#define MONT_FINI
|
||||
|
||||
#define LOOP_END
|
||||
#define LOOP_START \
|
||||
mu = c[x] * mp;
|
||||
mu = c[x] * mp
|
||||
|
||||
#define INNERMUL \
|
||||
asm( \
|
||||
"movq %7,%%rax \n\t" \
|
||||
"mulq %6 \n\t" \
|
||||
"movq %5,%%rax \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %1,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"addq %%rax,%0 \n\t" \
|
||||
"adcq %%rdx,%1 \n\t" \
|
||||
"adcq $0,%2 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"g"(mu), "g"(*tmpm++) \
|
||||
: "%rax", "%rdx", "%cc");
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
:"=g"(_c[LO]), "=r"(cy) \
|
||||
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
|
||||
: "%rax", "%rdx", "%cc")
|
||||
|
||||
#define INNERMUL8 \
|
||||
asm( \
|
||||
"movq 0(%5),%%rax \n\t" \
|
||||
"movq 0(%2),%%r10 \n\t" \
|
||||
"movq 0x8(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x8(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x10(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x10(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x8(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x18(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x18(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x10(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x20(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x20(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x18(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x28(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x28(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x20(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x30(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x30(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x28(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"movq 0x38(%5),%%r11 \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq 0x38(%2),%%r10 \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x30(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
"movq %%r11,%%rax \n\t" \
|
||||
"mulq %4 \n\t" \
|
||||
"addq %%r10,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"addq %3,%%rax \n\t" \
|
||||
"adcq $0,%%rdx \n\t" \
|
||||
"movq %%rax,0x38(%0) \n\t" \
|
||||
"movq %%rdx,%1 \n\t" \
|
||||
\
|
||||
:"=r"(_c), "=r"(cy) \
|
||||
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
|
||||
: "%rax", "%rdx", "%r10", "%r11", "%cc")
|
||||
|
||||
|
||||
#define PROPCARRY \
|
||||
asm( \
|
||||
"movq %1,%%rax \n\t" \
|
||||
"movq %2,%%rbx \n\t" \
|
||||
"addq %%rax,%6 \n\t" \
|
||||
"adcq %%rbx,%7 \n\t" \
|
||||
"adcq $0,%8 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"m"(_c[OFF0+1]), "m"(_c[OFF1+1]), "m"(_c[OFF2+1]) \
|
||||
: "%rax", "%rbx", "%cc");
|
||||
"addq %1,%0 \n\t" \
|
||||
"setb %%al \n\t" \
|
||||
"movzbq %%al,%1 \n\t" \
|
||||
:"=g"(_c[LO]), "=r"(cy) \
|
||||
:"0"(_c[LO]), "1"(cy) \
|
||||
: "%rax", "%cc")
|
||||
|
||||
/******************************************************************/
|
||||
#elif defined(TFM_SSE2)
|
||||
|
||||
/* SSE2 code */
|
||||
/* SSE2 code (assumes 32-bit fp_digits) */
|
||||
/* XMM register assignments:
|
||||
* xmm0 *tmpm++, then Mu * (*tmpm++)
|
||||
* xmm1 c[x], then Mu
|
||||
* xmm2 mp
|
||||
* xmm3 cy
|
||||
* xmm4 _c[LO]
|
||||
*/
|
||||
|
||||
#define MONT_START \
|
||||
asm("movd %0,%%mm2"::"g"(mp));
|
||||
asm("movd %0,%%mm2"::"g"(mp))
|
||||
|
||||
#define MONT_FINI \
|
||||
asm("emms");
|
||||
asm("emms")
|
||||
|
||||
#define LOOP_START \
|
||||
asm( \
|
||||
"movd %0,%%mm1 \n\t" \
|
||||
"pxor %%mm3,%%mm3 \n\t" \
|
||||
"pmuludq %%mm2,%%mm1 \n\t" \
|
||||
:: "g"(c[x]));
|
||||
:: "g"(c[x]))
|
||||
|
||||
/* pmuludq on mmx registers does a 32x32->64 multiply. */
|
||||
#define INNERMUL \
|
||||
asm( \
|
||||
"movd %6,%%mm0 \n\t" \
|
||||
"movd %1,%%mm4 \n\t" \
|
||||
"movd %2,%%mm0 \n\t" \
|
||||
"paddq %%mm4,%%mm3 \n\t" \
|
||||
"pmuludq %%mm1,%%mm0 \n\t" \
|
||||
"movd %%mm0,%%eax \n\t" \
|
||||
"psrlq $32, %%mm0 \n\t" \
|
||||
"addl %%eax,%0 \n\t" \
|
||||
"movd %%mm0,%%eax \n\t" \
|
||||
"adcl %%eax,%1 \n\t" \
|
||||
"adcl $0,%2 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"g"(*tmpm++) \
|
||||
: "%eax", "%cc");
|
||||
"paddq %%mm0,%%mm3 \n\t" \
|
||||
"movd %%mm3,%0 \n\t" \
|
||||
"psrlq $32, %%mm3 \n\t" \
|
||||
:"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
|
||||
|
||||
#define LOOP_END \
|
||||
asm( "movd %%mm3,%0 \n" :"=r"(cy))
|
||||
|
||||
#define PROPCARRY \
|
||||
asm( \
|
||||
"movl %1,%%eax \n\t" \
|
||||
"addl %%eax,%6 \n\t" \
|
||||
"movl %2,%%eax \n\t" \
|
||||
"adcl %%eax,%7 \n\t" \
|
||||
"adcl $0,%8 \n\t" \
|
||||
:"=g"(_c[OFF0]), "=g"(_c[OFF1]), "=g"(_c[OFF2]):"0"(_c[OFF0]), "1"(_c[OFF1]), "2"(_c[OFF2]), \
|
||||
"g"(_c[OFF0+1]), "g"(_c[OFF1+1]), "g"(_c[OFF2+1]) \
|
||||
: "%eax", "%cc");
|
||||
"addl %1,%0 \n\t" \
|
||||
"setb %%al \n\t" \
|
||||
"movzbl %%al,%1 \n\t" \
|
||||
:"=g"(_c[LO]), "=r"(cy) \
|
||||
:"0"(_c[LO]), "1"(cy) \
|
||||
: "%eax", "%cc")
|
||||
|
||||
/******************************************************************/
|
||||
#elif defined(TFM_ARM)
|
||||
/* ARMv4 code */
|
||||
|
||||
/* ISO C code */
|
||||
#define MONT_START
|
||||
|
||||
#define MONT_FINI
|
||||
|
||||
#define LOOP_END
|
||||
#define LOOP_START \
|
||||
mu = c[x] * mp;
|
||||
mu = c[x] * mp
|
||||
|
||||
/* NOTE: later write it using two regs instead of three for _c + ... */
|
||||
#define INNERMUL \
|
||||
asm( \
|
||||
"UMULL r0,r1,%0,%1 \n\t" \
|
||||
"LDR r2,[%2] \n\t" \
|
||||
"ADDS r2,r2,r0 \n\t" \
|
||||
"STR r2,[%2] \n\t" \
|
||||
"LDR r2,[%3] \n\t" \
|
||||
"ADCS r2,r2,r1 \n\t" \
|
||||
"STR r2,[%3] \n\t" \
|
||||
"LDR r2,[%4] \n\t" \
|
||||
"ADC r2,r2,#0 \n\t" \
|
||||
"STR r2,[%4] \n\t" \
|
||||
::"r"(mu),"r"(*tmpm++),"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "r2", "%cc");
|
||||
" LDR r0,%1 \n\t" \
|
||||
" ADDS r0,r0,%0 \n\t" \
|
||||
" MOVCS %0,#1 \n\t" \
|
||||
" MOVCC %0,#0 \n\t" \
|
||||
" UMLAL r0,%0,%3,%4 \n\t" \
|
||||
" STR r0,%1 \n\t" \
|
||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
|
||||
|
||||
#define PROPCARRY \
|
||||
asm( \
|
||||
"LDR r0,[%1] \n\t" \
|
||||
"LDR r1,[%0,#4] \n\t" \
|
||||
"ADDS r0,r0,r1 \n\t" \
|
||||
"STR r0,[%0,#4] \n\t" \
|
||||
"LDR r0,[%2] \n\t" \
|
||||
"LDR r1,[%1,#4] \n\t" \
|
||||
"ADCS r0,r0,r1 \n\t" \
|
||||
"STR r0,[%1,#4] \n\t" \
|
||||
"LDR r0,[%2,#4] \n\t" \
|
||||
"ADC r0,r0,#0 \n\t" \
|
||||
"STR r0,[%2,#4] \n\t" \
|
||||
::"r"(_c + OFF0),"r"(_c + OFF1),"r"(_c + OFF2):"r0", "r1", "%cc");
|
||||
" LDR r0,%1 \n\t" \
|
||||
" ADDS r0,r0,%0 \n\t" \
|
||||
" STR r0,%1 \n\t" \
|
||||
" MOVCS %0,#1 \n\t" \
|
||||
" MOVCC %0,#0 \n\t" \
|
||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
|
||||
|
||||
#elif defined(TFM_PPC32)
|
||||
|
||||
/* PPC32 */
|
||||
#define MONT_START
|
||||
#define MONT_FINI
|
||||
#define LOOP_END
|
||||
#define LOOP_START \
|
||||
mu = c[x] * mp
|
||||
|
||||
#define INNERMUL \
|
||||
asm( \
|
||||
" mullw r16,%3,%4 \n\t" \
|
||||
" mulhwu r17,%3,%4 \n\t" \
|
||||
" addc r16,r16,%0 \n\t" \
|
||||
" addze r17,r17 \n\t" \
|
||||
" lwz r18,%1 \n\t" \
|
||||
" addc r16,r16,r18 \n\t" \
|
||||
" addze %0,r17 \n\t" \
|
||||
" stw r16,%1 \n\t" \
|
||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r16", "r17", "r18","%cc");
|
||||
|
||||
#define PROPCARRY \
|
||||
asm( \
|
||||
" lwz r16,%1 \n\t" \
|
||||
" addc r16,r16,%0 \n\t" \
|
||||
" stw r16,%1 \n\t" \
|
||||
" xor %0,%0,%0 \n\t" \
|
||||
" addze %0,%0 \n\t" \
|
||||
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
|
||||
|
||||
/******************************************************************/
|
||||
#else
|
||||
|
||||
/* ISO C code */
|
||||
#define MONT_START
|
||||
|
||||
#define MONT_FINI
|
||||
|
||||
#define LOOP_END
|
||||
#define LOOP_START \
|
||||
mu = c[x] * mp;
|
||||
mu = c[x] * mp
|
||||
|
||||
#define INNERMUL \
|
||||
do { fp_word t; \
|
||||
t = (fp_word)_c[OFF0] + ((fp_word)mu) * ((fp_word)*tmpm++); _c[OFF0] = t; \
|
||||
t = (fp_word)_c[OFF1] + (t >> DIGIT_BIT); _c[OFF1] = t; \
|
||||
_c[OFF2] += (t >> DIGIT_BIT); \
|
||||
} while (0);
|
||||
_c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
|
||||
(((fp_word)mu) * ((fp_word)*tmpm++)); \
|
||||
cy = (t >> DIGIT_BIT); \
|
||||
} while (0)
|
||||
|
||||
#define PROPCARRY \
|
||||
do { fp_word t; \
|
||||
t = (fp_word)_c[OFF0+1] + (fp_word)_c[OFF1]; _c[OFF0+1] = t; \
|
||||
t = (fp_word)_c[OFF1+1] + (t >> DIGIT_BIT) + (fp_word)_c[OFF2]; _c[OFF1+1] = t; \
|
||||
_c[OFF2+1] += (t >> DIGIT_BIT); \
|
||||
} while (0);
|
||||
do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
|
||||
|
||||
#endif
|
||||
/******************************************************************/
|
||||
|
||||
|
||||
#define OFF0 (0)
|
||||
#define OFF1 (FP_SIZE)
|
||||
#define OFF2 (FP_SIZE+FP_SIZE)
|
||||
#define LO 0
|
||||
#define HI 1
|
||||
#define CY 2
|
||||
|
||||
/* computes x/R == x (mod N) via Montgomery Reduction */
|
||||
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
||||
{
|
||||
fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;
|
||||
fp_digit c[FP_SIZE], *_c, *tmpm, mu;
|
||||
int oldused, x, y, pa;
|
||||
|
||||
#if defined(USE_MEMSET)
|
||||
/* now zero the buff */
|
||||
memset(c, 0, sizeof c);
|
||||
#endif
|
||||
pa = m->used;
|
||||
memset(c, 0, sizeof(c));
|
||||
|
||||
/* copy the input */
|
||||
oldused = a->used;
|
||||
for (x = 0; x < oldused; x++) {
|
||||
c[x] = a->dp[x];
|
||||
}
|
||||
|
||||
#if !defined(USE_MEMSET)
|
||||
for (; x < 2*pa+3; x++) {
|
||||
c[x] = 0;
|
||||
}
|
||||
#endif
|
||||
MONT_START;
|
||||
|
||||
/* now let's get bizz-sy! */
|
||||
for (x = 0; x < pa; x++) {
|
||||
fp_digit cy = 0;
|
||||
/* get Mu for this round */
|
||||
LOOP_START;
|
||||
|
||||
/* our friendly neighbourhood alias */
|
||||
_c = c + x;
|
||||
tmpm = m->dp;
|
||||
y = 0;
|
||||
#if defined(TFM_X86_64)
|
||||
for (; y < (pa & ~7); y += 8) {
|
||||
INNERMUL8;
|
||||
_c += 8;
|
||||
tmpm += 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < pa; y++) {
|
||||
for (; y < pa; y++) {
|
||||
INNERMUL;
|
||||
++_c;
|
||||
}
|
||||
/* send carry up man... */
|
||||
_c = c + x;
|
||||
PROPCARRY;
|
||||
}
|
||||
|
||||
/* fix the rest of the carries */
|
||||
_c = c + pa;
|
||||
for (x = pa; x < pa * 2 + 2; x++) {
|
||||
PROPCARRY;
|
||||
LOOP_END;
|
||||
while (cy) {
|
||||
PROPCARRY; // cy = cy > (*_c += cy);
|
||||
++_c;
|
||||
}
|
||||
}
|
||||
|
||||
/* now copy out */
|
||||
_c = c + pa;
|
||||
@ -250,3 +373,8 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
||||
s_fp_sub (a, m, a);
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -42,3 +42,7 @@ int fp_montgomery_setup(fp_int *a, fp_digit *rho)
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
44
fp_mul.c
44
fp_mul.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -24,19 +24,26 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
|
||||
inputs are not close to the next power of two. That is, for example,
|
||||
if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
|
||||
*/
|
||||
if (y <= 4) {
|
||||
fp_mul_comba4(A,B,C);
|
||||
} else if (y <= 8) {
|
||||
fp_mul_comba8(A,B,C);
|
||||
#if defined(TFM_LARGE)
|
||||
} else if (y <= 16 && y >= 10) {
|
||||
fp_mul_comba16(A,B,C);
|
||||
|
||||
#ifdef TFM_SMALL_SET
|
||||
if (y <= 16) {
|
||||
fp_mul_comba_small(A,B,C);
|
||||
#elif defined(TFM_HUGE)
|
||||
if (0) { 1;
|
||||
#endif
|
||||
#if defined(TFM_HUGE)
|
||||
} else if (y <= 32 && y >= 24) {
|
||||
} else if (y <= 32) {
|
||||
fp_mul_comba32(A,B,C);
|
||||
} else if (y <= 48) {
|
||||
fp_mul_comba48(A,B,C);
|
||||
} else if (y <= 64) {
|
||||
fp_mul_comba64(A,B,C);
|
||||
#endif
|
||||
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
|
||||
{
|
||||
#else
|
||||
} else {
|
||||
#endif
|
||||
fp_mul_comba(A,B,C);
|
||||
}
|
||||
} else {
|
||||
@ -44,7 +51,7 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
|
||||
|
||||
if A = ab and B = cd for ||a|| = r we need to solve
|
||||
|
||||
ac*r^2 + (-(a-b)(c-d) + ac + bd)*r + bd
|
||||
ac*r^2 + ((a+b)(c+d) - (ac + bd))*r + bd
|
||||
|
||||
So we solve for the three products then we form the final result with careful shifting
|
||||
and addition.
|
||||
@ -72,7 +79,7 @@ Obvious points of optimization
|
||||
} else {
|
||||
t1.used = 0;
|
||||
}
|
||||
t1.sign = A->sign;
|
||||
t1.sign = 0;
|
||||
|
||||
// fp_copy(B, &t2); fp_rshd(&t2, r);
|
||||
for (s = 0; s < B->used - r; s++) {
|
||||
@ -86,7 +93,7 @@ Obvious points of optimization
|
||||
} else {
|
||||
t2.used = 0;
|
||||
}
|
||||
t2.sign = B->sign;
|
||||
t2.sign = 0;
|
||||
|
||||
fp_copy(&t1, &amb); fp_copy(&t2, &cmd);
|
||||
fp_zero(&ac);
|
||||
@ -108,18 +115,17 @@ Obvious points of optimization
|
||||
fp_clamp(&t1);
|
||||
fp_clamp(&t2);
|
||||
|
||||
fp_sub(&amb, &t1, &amb); fp_sub(&cmd, &t2, &cmd);
|
||||
s_fp_add(&amb, &t1, &amb); s_fp_add(&cmd, &t2, &cmd);
|
||||
fp_zero(&bd);
|
||||
fp_mul(&t1, &t2, &bd);
|
||||
|
||||
/* now get the (a-b)(c-d) term */
|
||||
/* now get the (a+b)(c+d) term */
|
||||
fp_zero(&comp);
|
||||
fp_mul(&amb, &cmd, &comp);
|
||||
|
||||
/* now solve the system, do the middle term first */
|
||||
comp.sign ^= 1;
|
||||
fp_add(&comp, &ac, &comp);
|
||||
fp_add(&comp, &bd, &comp);
|
||||
s_fp_sub(&comp, &ac, &comp);
|
||||
s_fp_sub(&comp, &bd, &comp);
|
||||
fp_lshd(&comp, r);
|
||||
|
||||
/* leading term */
|
||||
@ -134,3 +140,7 @@ Obvious points of optimization
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -61,3 +61,7 @@ void fp_mul_2(fp_int * a, fp_int * b)
|
||||
b->sign = a->sign;
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -41,3 +41,7 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
|
||||
fp_clamp(c);
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
1948
fp_mul_comba.c
1948
fp_mul_comba.c
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -34,3 +34,7 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
|
||||
fp_clamp(c);
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
/* d = a * b (mod c) */
|
||||
@ -16,3 +16,7 @@ int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
||||
fp_mul(a, b, &tmp);
|
||||
return fp_mod(&tmp, c, d);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -71,3 +71,7 @@ void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
|
||||
/* probably prime now */
|
||||
*result = FP_YES;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -95,3 +95,7 @@ error:
|
||||
free(tmp);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -47,3 +47,7 @@ int fp_radix_size(fp_int *a, int radix, int *size)
|
||||
return FP_OKAY;
|
||||
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -64,3 +64,7 @@ int fp_read_radix(fp_int *a, char *str, int radix)
|
||||
}
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -21,3 +21,7 @@ void fp_read_signed_bin(fp_int *a, unsigned char *b, int c)
|
||||
a->sign = FP_NEG;
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -22,3 +22,7 @@ void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
|
||||
}
|
||||
fp_clamp (a);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -25,3 +25,7 @@ void bn_reverse (unsigned char *s, int len)
|
||||
--iy;
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -34,3 +34,7 @@ void fp_rshd(fp_int *a, int x)
|
||||
fp_clamp(a);
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,9 +5,13 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
/* chars used in radix conversions */
|
||||
const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_set.c
6
fp_set.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -15,3 +15,7 @@ void fp_set(fp_int *a, fp_digit b)
|
||||
a->dp[0] = b;
|
||||
a->used = b ? 1 : 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -13,3 +13,7 @@ int fp_signed_bin_size(fp_int *a)
|
||||
{
|
||||
return 1 + fp_unsigned_bin_size (a);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
29
fp_sqr.c
29
fp_sqr.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -17,21 +17,26 @@ void fp_sqr(fp_int *A, fp_int *B)
|
||||
|
||||
y = A->used;
|
||||
if (y <= 64) {
|
||||
if (y <= 4) {
|
||||
fp_sqr_comba4(A,B);
|
||||
} else if (y <= 8) {
|
||||
fp_sqr_comba8(A,B);
|
||||
#if defined(TFM_LARGE)
|
||||
} else if (y <= 16 && y >= 12) {
|
||||
fp_sqr_comba16(A,B);
|
||||
|
||||
#if defined(TFM_SMALL_SET)
|
||||
if (y <= 16) {
|
||||
fp_sqr_comba_small(A,B);
|
||||
#elif defined(TFM_HUGE)
|
||||
if (0) { 1;
|
||||
#endif
|
||||
#if defined(TFM_HUGE)
|
||||
} else if (y <= 32 && y >= 20) {
|
||||
} else if (y <= 32) {
|
||||
fp_sqr_comba32(A,B);
|
||||
} else if (y <= 64 && y >= 48) {
|
||||
} else if (y <= 48) {
|
||||
fp_sqr_comba48(A,B);
|
||||
} else if (y <= 64) {
|
||||
fp_sqr_comba64(A,B);
|
||||
#endif
|
||||
#if !defined(TFM_SMALL_SET) && !defined(TFM_HUGE)
|
||||
{
|
||||
#else
|
||||
} else {
|
||||
#endif
|
||||
fp_sqr_comba(A, B);
|
||||
}
|
||||
|
||||
@ -109,3 +114,7 @@ Obvious points of optimization
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
1865
fp_sqr_comba.c
1865
fp_sqr_comba.c
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,13 @@
|
||||
/* TomsFastMath, a fast ISO C bignum library.
|
||||
*
|
||||
* This project is meant to fill in where LibTomMath
|
||||
* falls short. That is speed ;-)
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
|
||||
/* generic comba squarer */
|
||||
void fp_sqr_comba(fp_int *A, fp_int *B)
|
||||
{
|
||||
@ -73,3 +83,7 @@ void fp_sqr_comba(fp_int *A, fp_int *B)
|
||||
fp_copy(dst, B);
|
||||
}
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -17,3 +17,7 @@ int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
|
||||
fp_sqr(a, &tmp);
|
||||
return fp_mod(&tmp, b, c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
6
fp_sub.c
6
fp_sub.c
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -44,3 +44,7 @@ void fp_sub(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -16,3 +16,7 @@ void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
|
||||
fp_set(&tmp, b);
|
||||
fp_sub(a, &tmp, c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -18,3 +18,7 @@ int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
||||
return fp_mod(&tmp, c, d);
|
||||
}
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -14,3 +14,7 @@ void fp_to_signed_bin(fp_int *a, unsigned char *b)
|
||||
fp_to_unsigned_bin (a, b + 1);
|
||||
b[0] = (unsigned char) ((a->sign == FP_ZPOS) ? 0 : 1);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -23,3 +23,7 @@ void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
|
||||
}
|
||||
bn_reverse (b, x);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -53,3 +53,7 @@ int fp_toradix(fp_int *a, char *str, int radix)
|
||||
*str = '\0';
|
||||
return FP_OKAY;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -14,3 +14,7 @@ int fp_unsigned_bin_size(fp_int *a)
|
||||
int size = fp_count_bits (a);
|
||||
return (size / 8 + ((size & 7) != 0 ? 1 : 0));
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
53
makefile
53
makefile
@ -10,7 +10,7 @@ CFLAGS += -Wall -W -Wshadow -I./ -O3 -funroll-all-loops
|
||||
#speed
|
||||
CFLAGS += -fomit-frame-pointer
|
||||
|
||||
VERSION=0.03
|
||||
VERSION=0.04
|
||||
|
||||
default: libtfm.a
|
||||
|
||||
@ -42,9 +42,37 @@ fp_read_radix.o fp_toradix.o fp_radix_size.o fp_count_bits.o fp_reverse.o fp_s_r
|
||||
\
|
||||
fp_ident.o
|
||||
|
||||
libtfm.a: $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) libtfm.a $(OBJECTS)
|
||||
ranlib libtfm.a
|
||||
HEADERS=tfm.h
|
||||
|
||||
ifndef LIBPATH
|
||||
LIBPATH=/usr/lib
|
||||
endif
|
||||
|
||||
ifndef INCPATH
|
||||
INCPATH=/usr/include
|
||||
endif
|
||||
|
||||
ifndef TFM_GROUP
|
||||
GROUP=wheel
|
||||
endif
|
||||
|
||||
ifndef TFM_USER
|
||||
USER=root
|
||||
endif
|
||||
|
||||
ifndef LIBNAME
|
||||
LIBNAME=libtfm.a
|
||||
endif
|
||||
|
||||
$(LIBNAME): $(OBJECTS)
|
||||
$(AR) $(ARFLAGS) $(LIBNAME) $(OBJECTS)
|
||||
ranlib $(LIBNAME)
|
||||
|
||||
install: libtfm.a
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
|
||||
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
|
||||
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
|
||||
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
|
||||
|
||||
mtest/mtest: mtest/mtest.c
|
||||
cd mtest ; make mtest
|
||||
@ -52,8 +80,14 @@ mtest/mtest: mtest/mtest.c
|
||||
test: libtfm.a demo/test.o mtest/mtest
|
||||
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
|
||||
|
||||
timing: libtfm.a demo/test.o
|
||||
$(CC) $(CFLAGS) demo/test.o libtfm.a $(PROF) -o test
|
||||
|
||||
stest: libtfm.a demo/stest.o
|
||||
$(CC) demo/stest.o libtfm.a -o stest
|
||||
$(CC) $(CFLAGS) demo/stest.o libtfm.a -o stest
|
||||
|
||||
rsatest: libtfm.a demo/rsa.o
|
||||
$(CC) $(CFLAGS) demo/rsa.o libtfm.a -o rsatest
|
||||
|
||||
docdvi: tfm.tex
|
||||
touch tfm.ind
|
||||
@ -68,10 +102,15 @@ docs: docdvi
|
||||
mv -f tfm.pdf doc
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest *~
|
||||
rm -f $(OBJECTS) *.a demo/*.o test tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc stest *~ rsatest *.gcda *.gcno demo/*.gcda demo/*.gcno mtest/*.gcno mtest/*.gcda
|
||||
cd mtest ; make clean
|
||||
|
||||
zipup: docs clean
|
||||
no_oops: clean
|
||||
cd .. ; cvs commit
|
||||
echo Scanning for scratch/dirty files
|
||||
find . -type f | grep -v CVS | xargs -n 1 bash mess.sh
|
||||
|
||||
zipup: no_oops docs clean
|
||||
perl gen.pl ; mv mpi.c pre_gen/ ; \
|
||||
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
|
||||
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
|
||||
|
4
mess.sh
Normal file
4
mess.sh
Normal file
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
if cvs log $1 >/dev/null 2>/dev/null; then exit 0; else echo "$1 shouldn't be here" ; exit 1; fi
|
||||
|
||||
|
@ -3,7 +3,7 @@ CFLAGS += -Wall -W -O3
|
||||
default: mtest
|
||||
|
||||
mtest: mtest.o
|
||||
$(CC) mtest.o -ltommath -o mtest
|
||||
$(CC) $(CFLAGS) mtest.o -ltommath -o mtest
|
||||
|
||||
clean:
|
||||
rm -f *.o mtest *~
|
||||
|
@ -60,7 +60,7 @@ void rand_num2(mp_int *a)
|
||||
int n, size;
|
||||
unsigned char buf[2048];
|
||||
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 32;
|
||||
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % 256;
|
||||
buf[0] = (fgetc(rng)&1)?1:0;
|
||||
fread(buf+1, 1, size, rng);
|
||||
while (buf[1] == 0) buf[1] = fgetc(rng);
|
||||
@ -317,3 +317,7 @@ int main(void)
|
||||
fclose(rng);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
4587
pre_gen/mpi.c
4587
pre_gen/mpi.c
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -35,3 +35,7 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
fp_clamp(c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#include <tfm.h>
|
||||
|
||||
@ -29,3 +29,7 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
|
||||
}
|
||||
fp_clamp(c);
|
||||
}
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
56
tfm.aux
Normal file
56
tfm.aux
Normal file
@ -0,0 +1,56 @@
|
||||
\relax
|
||||
\ifx\hyper@anchor\@undefined
|
||||
\global \let \oldcontentsline\contentsline
|
||||
\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
|
||||
\global \let \oldnewlabel\newlabel
|
||||
\gdef \newlabel#1#2{\newlabelxx{#1}#2}
|
||||
\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
|
||||
\AtEndDocument{\let \contentsline\oldcontentsline
|
||||
\let \newlabel\oldnewlabel}
|
||||
\else
|
||||
\global \let \hyper@last\relax
|
||||
\fi
|
||||
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}}
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{x86--32}{3}{section*.3}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{SSE2}{3}{section*.4}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{x86--64}{3}{section*.5}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{ARM}{3}{section*.6}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{PPC32}{3}{section*.7}}
|
||||
\@writefile{toc}{\contentsline {subsubsection}{Future Releases}{4}{section*.8}}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}}
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}}
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}}
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}}
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}}
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}}
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\newlabel{chap:asmops}{{4}{11}{Porting TomsFastMath\relax }{chapter.4}{}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}}
|
75
tfm.h
75
tfm.h
@ -5,7 +5,7 @@
|
||||
*
|
||||
* This project is public domain and free for all purposes.
|
||||
*
|
||||
* Tom St Denis, tomstdenis@iahu.ca
|
||||
* Tom St Denis, tomstdenis@gmail.com
|
||||
*/
|
||||
#ifndef TFM_H_
|
||||
#define TFM_H_
|
||||
@ -16,28 +16,44 @@
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
#undef MIN
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#undef MAX
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
/* do we want large code? */
|
||||
#define TFM_LARGE
|
||||
/* externally define this symbol to ignore the default settings, useful for changing the build from the make process */
|
||||
#ifndef TFM_ALREADY_SET
|
||||
|
||||
/* do we want huge code (implies large)? The answer is, yes. */
|
||||
/* do we want the large set of small multiplications ?
|
||||
Enable these if you are going to be doing a lot of small (<= 16 digit) multiplications say in ECC
|
||||
Or if you're on a 64-bit machine doing RSA as a 1024-bit integer == 16 digits ;-)
|
||||
*/
|
||||
#define TFM_SMALL_SET
|
||||
|
||||
/* do we want huge code
|
||||
Enable these if you are doing 32, 48 or 64 digit multiplications (useful for RSA)
|
||||
Less important on 64-bit machines as 32 digits == 2048 bits
|
||||
*/
|
||||
#define TFM_HUGE
|
||||
|
||||
/* imply TFM_LARGE as required */
|
||||
#if defined(TFM_HUGE)
|
||||
#if !defined(TFM_LARGE)
|
||||
#define TFM_LARGE
|
||||
#endif
|
||||
/* do we want some overflow checks
|
||||
Not required if you make sure your numbers are within range (e.g. by default a modulus for fp_exptmod() can only be upto 2048 bits long)
|
||||
*/
|
||||
/* #define TFM_CHECK */
|
||||
|
||||
/* Is the target a P4 Prescott
|
||||
*/
|
||||
/* #define TFM_PRESCOTT */
|
||||
|
||||
#endif
|
||||
|
||||
/* Max size of any number in bits. Basically the largest size you will be multiplying
|
||||
* should be half [or smaller] of FP_MAX_SIZE-four_digit
|
||||
*
|
||||
* You can externally define this or it defaults to 4096-bits.
|
||||
* You can externally define this or it defaults to 4096-bits [allowing multiplications upto 2048x2048 bits ]
|
||||
*/
|
||||
#ifndef FP_MAX_SIZE
|
||||
#define FP_MAX_SIZE (4096+(4*DIGIT_BIT))
|
||||
@ -76,9 +92,9 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* make sure we're 32-bit for x86-32/sse/arm */
|
||||
#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)) && defined(FP_64BIT)
|
||||
#warning x86-32, SSE2 and ARM optimizations require 32-bit digits (undefining)
|
||||
/* make sure we're 32-bit for x86-32/sse/arm/ppc32 */
|
||||
#if (defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM) || defined(TFM_PPC32)) && defined(FP_64BIT)
|
||||
#warning x86-32, SSE2 and ARM, PPC32 optimizations require 32-bit digits (undefining)
|
||||
#undef FP_64BIT
|
||||
#endif
|
||||
|
||||
@ -104,6 +120,12 @@
|
||||
#endif
|
||||
#define TFM_ASM
|
||||
#endif
|
||||
#ifdef TFM_PPC32
|
||||
#ifdef TFM_ASM
|
||||
#error TFM_ASM already defined!
|
||||
#endif
|
||||
#define TFM_ASM
|
||||
#endif
|
||||
|
||||
/* we want no asm? */
|
||||
#ifdef TFM_NO_ASM
|
||||
@ -111,6 +133,7 @@
|
||||
#undef TFM_X86_64
|
||||
#undef TFM_SSE2
|
||||
#undef TFM_ARM
|
||||
#undef TFM_PPC32
|
||||
#undef TFM_ASM
|
||||
#endif
|
||||
|
||||
@ -179,7 +202,7 @@ const char *fp_ident(void);
|
||||
|
||||
/* zero/even/odd ? */
|
||||
#define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
|
||||
#define fp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
|
||||
#define fp_iseven(a) (((a)->used >= 0 && (((a)->dp[0] & 1) == 0)) ? FP_YES : FP_NO)
|
||||
#define fp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? FP_YES : FP_NO)
|
||||
|
||||
/* set to a small digit */
|
||||
@ -335,24 +358,22 @@ void bn_reverse(unsigned char *s, int len);
|
||||
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
|
||||
#ifdef TFM_HUGE
|
||||
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
|
||||
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
|
||||
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
|
||||
#endif
|
||||
#ifdef TFM_LARGE
|
||||
void fp_mul_comba16(fp_int *A, fp_int *B, fp_int *C);
|
||||
#endif
|
||||
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
|
||||
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
|
||||
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
|
||||
|
||||
void fp_sqr_comba(fp_int *A, fp_int *B);
|
||||
void fp_sqr_comba4(fp_int *A, fp_int *B);
|
||||
void fp_sqr_comba8(fp_int *A, fp_int *B);
|
||||
#ifdef TFM_LARGE
|
||||
void fp_sqr_comba16(fp_int *A, fp_int *B);
|
||||
#endif
|
||||
void fp_sqr_comba_small(fp_int *A, fp_int *B);
|
||||
#ifdef TFM_HUGE
|
||||
void fp_sqr_comba32(fp_int *A, fp_int *B);
|
||||
void fp_sqr_comba48(fp_int *A, fp_int *B);
|
||||
void fp_sqr_comba64(fp_int *A, fp_int *B);
|
||||
#endif
|
||||
extern const char *fp_s_rmap;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* $Source$ */
|
||||
/* $Revision$ */
|
||||
/* $Date$ */
|
||||
|
29
tfm.idx
Normal file
29
tfm.idx
Normal file
@ -0,0 +1,29 @@
|
||||
\indexentry{fp\_init|hyperpage}{6}
|
||||
\indexentry{fp\_set|hyperpage}{6}
|
||||
\indexentry{fp\_init\_copy|hyperpage}{6}
|
||||
\indexentry{fp\_iszero|hyperpage}{7}
|
||||
\indexentry{fp\_iseven|hyperpage}{7}
|
||||
\indexentry{fp\_isodd|hyperpage}{7}
|
||||
\indexentry{fp\_neg|hyperpage}{7}
|
||||
\indexentry{fp\_abs|hyperpage}{7}
|
||||
\indexentry{fp\_cmp|hyperpage}{8}
|
||||
\indexentry{fp\_cmp\_mag|hyperpage}{8}
|
||||
\indexentry{fp\_lshd|hyperpage}{8}
|
||||
\indexentry{fp\_rshd|hyperpage}{8}
|
||||
\indexentry{fp\_div\_2d|hyperpage}{8}
|
||||
\indexentry{fp\_mod\_2d|hyperpage}{8}
|
||||
\indexentry{fp\_mul\_2d|hyperpage}{8}
|
||||
\indexentry{fp\_div\_2|hyperpage}{8}
|
||||
\indexentry{fp\_mul\_2|hyperpage}{8}
|
||||
\indexentry{fp\_cnt\_lsb|hyperpage}{8}
|
||||
\indexentry{fp\_add|hyperpage}{9}
|
||||
\indexentry{fp\_sub|hyperpage}{9}
|
||||
\indexentry{fp\_mul|hyperpage}{9}
|
||||
\indexentry{fp\_sqr|hyperpage}{9}
|
||||
\indexentry{fp\_div|hyperpage}{9}
|
||||
\indexentry{fp\_mod|hyperpage}{9}
|
||||
\indexentry{fp\_exptmod|hyperpage}{9}
|
||||
\indexentry{fp\_invmod|hyperpage}{9}
|
||||
\indexentry{fp\_gcd|hyperpage}{9}
|
||||
\indexentry{fp\_lcm|hyperpage}{9}
|
||||
\indexentry{fp\_isprime|hyperpage}{10}
|
6
tfm.ilg
Normal file
6
tfm.ilg
Normal file
@ -0,0 +1,6 @@
|
||||
This is makeindex, version 2.14 [02-Oct-2002] (kpathsea + Thai support).
|
||||
Scanning input file tfm.idx....done (29 entries accepted, 0 rejected).
|
||||
Sorting entries....done (137 comparisons).
|
||||
Generating output file tfm.ind....done (33 lines written, 0 warnings).
|
||||
Output written in tfm.ind.
|
||||
Transcript written in tfm.ilg.
|
33
tfm.ind
Normal file
33
tfm.ind
Normal file
@ -0,0 +1,33 @@
|
||||
\begin{theindex}
|
||||
|
||||
\item fp\_abs, \hyperpage{7}
|
||||
\item fp\_add, \hyperpage{9}
|
||||
\item fp\_cmp, \hyperpage{8}
|
||||
\item fp\_cmp\_mag, \hyperpage{8}
|
||||
\item fp\_cnt\_lsb, \hyperpage{8}
|
||||
\item fp\_div, \hyperpage{9}
|
||||
\item fp\_div\_2, \hyperpage{8}
|
||||
\item fp\_div\_2d, \hyperpage{8}
|
||||
\item fp\_exptmod, \hyperpage{9}
|
||||
\item fp\_gcd, \hyperpage{9}
|
||||
\item fp\_init, \hyperpage{6}
|
||||
\item fp\_init\_copy, \hyperpage{6}
|
||||
\item fp\_invmod, \hyperpage{9}
|
||||
\item fp\_iseven, \hyperpage{7}
|
||||
\item fp\_isodd, \hyperpage{7}
|
||||
\item fp\_isprime, \hyperpage{10}
|
||||
\item fp\_iszero, \hyperpage{7}
|
||||
\item fp\_lcm, \hyperpage{9}
|
||||
\item fp\_lshd, \hyperpage{8}
|
||||
\item fp\_mod, \hyperpage{9}
|
||||
\item fp\_mod\_2d, \hyperpage{8}
|
||||
\item fp\_mul, \hyperpage{9}
|
||||
\item fp\_mul\_2, \hyperpage{8}
|
||||
\item fp\_mul\_2d, \hyperpage{8}
|
||||
\item fp\_neg, \hyperpage{7}
|
||||
\item fp\_rshd, \hyperpage{8}
|
||||
\item fp\_set, \hyperpage{6}
|
||||
\item fp\_sqr, \hyperpage{9}
|
||||
\item fp\_sub, \hyperpage{9}
|
||||
|
||||
\end{theindex}
|
5
tfm.lof
Normal file
5
tfm.lof
Normal file
@ -0,0 +1,5 @@
|
||||
\addvspace {10\p@ }
|
||||
\contentsline {figure}{\numberline {1.1}{\ignorespaces Recommended Build Modes}}{4}{figure.1.1}
|
||||
\addvspace {10\p@ }
|
||||
\addvspace {10\p@ }
|
||||
\addvspace {10\p@ }
|
332
tfm.log
Normal file
332
tfm.log
Normal file
@ -0,0 +1,332 @@
|
||||
This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) (format=latex 2005.4.10) 23 JUL 2005 07:42
|
||||
entering extended mode
|
||||
**tfm
|
||||
(./tfm.tex
|
||||
LaTeX2e <2003/12/01>
|
||||
Babel <v3.8d> and hyphenation patterns for american, french, german, ngerman, b
|
||||
ahasa, basque, bulgarian, catalan, croatian, czech, danish, dutch, esperanto, e
|
||||
stonian, finnish, greek, icelandic, irish, italian, latin, magyar, norsk, polis
|
||||
h, portuges, romanian, russian, serbian, slovak, slovene, spanish, swedish, tur
|
||||
kish, ukrainian, nohyphenation, loaded.
|
||||
(/usr/share/texmf/tex/latex/base/book.cls
|
||||
Document Class: book 2004/02/16 v1.4f Standard LaTeX document class
|
||||
(/usr/share/texmf/tex/latex/base/bk10.clo
|
||||
File: bk10.clo 2004/02/16 v1.4f Standard LaTeX file (size option)
|
||||
)
|
||||
\c@part=\count79
|
||||
\c@chapter=\count80
|
||||
\c@section=\count81
|
||||
\c@subsection=\count82
|
||||
\c@subsubsection=\count83
|
||||
\c@paragraph=\count84
|
||||
\c@subparagraph=\count85
|
||||
\c@figure=\count86
|
||||
\c@table=\count87
|
||||
\abovecaptionskip=\skip41
|
||||
\belowcaptionskip=\skip42
|
||||
\bibindent=\dimen102
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/hyperref/hyperref.sty
|
||||
Package: hyperref 2003/11/30 v6.74m Hypertext links for LaTeX
|
||||
|
||||
(/usr/share/texmf/tex/latex/graphics/keyval.sty
|
||||
Package: keyval 1999/03/16 v1.13 key=value parser (DPC)
|
||||
\KV@toks@=\toks14
|
||||
)
|
||||
\@linkdim=\dimen103
|
||||
\Hy@linkcounter=\count88
|
||||
\Hy@pagecounter=\count89
|
||||
|
||||
(/usr/share/texmf/tex/latex/hyperref/pd1enc.def
|
||||
File: pd1enc.def 2003/11/30 v6.74m Hyperref: PDFDocEncoding definition (HO)
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/hyperref/hyperref.cfg
|
||||
File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive and teTeX
|
||||
)
|
||||
Package hyperref Info: Hyper figures OFF on input line 1880.
|
||||
Package hyperref Info: Link nesting OFF on input line 1885.
|
||||
Package hyperref Info: Hyper index ON on input line 1888.
|
||||
Package hyperref Info: Plain pages ON on input line 1893.
|
||||
Package hyperref Info: Backreferencing OFF on input line 1900.
|
||||
|
||||
Implicit mode ON; LaTeX internals redefined
|
||||
Package hyperref Info: Bookmarks ON on input line 2004.
|
||||
(/usr/share/texmf/tex/latex/html/url.sty
|
||||
Package: url 1999/03/02 ver 1.4 Verb mode for urls, email addresses, and file
|
||||
names
|
||||
)
|
||||
LaTeX Info: Redefining \url on input line 2143.
|
||||
\Fld@menulength=\count90
|
||||
\Field@Width=\dimen104
|
||||
\Fld@charsize=\dimen105
|
||||
\Choice@toks=\toks15
|
||||
\Field@toks=\toks16
|
||||
Package hyperref Info: Hyper figures OFF on input line 2618.
|
||||
Package hyperref Info: Link nesting OFF on input line 2623.
|
||||
Package hyperref Info: Hyper index ON on input line 2626.
|
||||
Package hyperref Info: backreferencing OFF on input line 2633.
|
||||
Package hyperref Info: Link coloring OFF on input line 2638.
|
||||
\c@Item=\count91
|
||||
\c@Hfootnote=\count92
|
||||
)
|
||||
*hyperref using default driver hypertex*
|
||||
(/usr/share/texmf/tex/latex/hyperref/hypertex.def
|
||||
File: hypertex.def 2003/11/30 v6.74m Hyperref driver for HyperTeX specials
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/base/makeidx.sty
|
||||
Package: makeidx 2000/03/29 v1.0m Standard LaTeX package
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/amsfonts/amssymb.sty
|
||||
Package: amssymb 2002/01/22 v2.2d
|
||||
|
||||
(/usr/share/texmf/tex/latex/amsfonts/amsfonts.sty
|
||||
Package: amsfonts 2001/10/25 v2.2f
|
||||
\@emptytoks=\toks17
|
||||
\symAMSa=\mathgroup4
|
||||
\symAMSb=\mathgroup5
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
|
||||
(Font) U/euf/m/n --> U/euf/b/n on input line 132.
|
||||
))
|
||||
(/usr/share/texmf/tex/latex/graphics/color.sty
|
||||
Package: color 1999/02/16 v1.0i Standard LaTeX Color (DPC)
|
||||
|
||||
(/usr/share/texmf/tex/latex/graphics/color.cfg
|
||||
File: color.cfg 2005/02/03 v1.3 color configuration of teTeX/TeXLive
|
||||
)
|
||||
Package color Info: Driver file: dvips.def on input line 125.
|
||||
|
||||
(/usr/share/texmf/tex/latex/graphics/dvips.def
|
||||
File: dvips.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR)
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/graphics/dvipsnam.def
|
||||
File: dvipsnam.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR)
|
||||
))
|
||||
(/usr/share/texmf/tex/latex/base/alltt.sty
|
||||
Package: alltt 1997/06/16 v2.0g defines alltt environment
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/graphics/graphicx.sty
|
||||
Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR)
|
||||
|
||||
(/usr/share/texmf/tex/latex/graphics/graphics.sty
|
||||
Package: graphics 2001/07/07 v1.0n Standard LaTeX Graphics (DPC,SPQR)
|
||||
|
||||
(/usr/share/texmf/tex/latex/graphics/trig.sty
|
||||
Package: trig 1999/03/16 v1.09 sin cos tan (DPC)
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/graphics/graphics.cfg
|
||||
File: graphics.cfg 2005/02/03 v1.3 graphics configuration of teTeX/TeXLive
|
||||
)
|
||||
Package graphics Info: Driver file: dvips.def on input line 80.
|
||||
)
|
||||
\Gin@req@height=\dimen106
|
||||
\Gin@req@width=\dimen107
|
||||
)
|
||||
(/usr/share/texmf/tex/latex/tools/layout.sty
|
||||
Package: layout 2000/09/25 v1.2c Show layout parameters
|
||||
\oneinch=\count93
|
||||
\cnt@paperwidth=\count94
|
||||
\cnt@paperheight=\count95
|
||||
\cnt@hoffset=\count96
|
||||
\cnt@voffset=\count97
|
||||
\cnt@textheight=\count98
|
||||
\cnt@textwidth=\count99
|
||||
\cnt@topmargin=\count100
|
||||
\cnt@oddsidemargin=\count101
|
||||
\cnt@evensidemargin=\count102
|
||||
\cnt@headheight=\count103
|
||||
\cnt@headsep=\count104
|
||||
\cnt@marginparsep=\count105
|
||||
\cnt@marginparwidth=\count106
|
||||
\cnt@marginparpush=\count107
|
||||
\cnt@footskip=\count108
|
||||
\fheight=\count109
|
||||
\ref@top=\count110
|
||||
\ref@hoffset=\count111
|
||||
\ref@voffset=\count112
|
||||
\ref@head=\count113
|
||||
\ref@body=\count114
|
||||
\ref@foot=\count115
|
||||
\ref@margin=\count116
|
||||
\ref@marginwidth=\count117
|
||||
\ref@marginpar=\count118
|
||||
\Interval=\count119
|
||||
\ExtraYPos=\count120
|
||||
\PositionX=\count121
|
||||
\PositionY=\count122
|
||||
\ArrowLength=\count123
|
||||
)
|
||||
\@indexfile=\write3
|
||||
\openout3 = `tfm.idx'.
|
||||
|
||||
|
||||
Writing index file tfm.idx
|
||||
(./tfm.aux)
|
||||
\openout1 = `tfm.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 49.
|
||||
LaTeX Font Info: ... okay on input line 49.
|
||||
Package hyperref Info: Link coloring OFF on input line 49.
|
||||
(/usr/share/texmf/tex/latex/hyperref/nameref.sty
|
||||
Package: nameref 2003/12/03 v2.21 Cross-referencing by name of section
|
||||
\c@section@level=\count124
|
||||
)
|
||||
LaTeX Info: Redefining \ref on input line 49.
|
||||
LaTeX Info: Redefining \pageref on input line 49.
|
||||
LaTeX Font Info: Try loading font information for U+msa on input line 55.
|
||||
|
||||
(/usr/share/texmf/tex/latex/amsfonts/umsa.fd
|
||||
File: umsa.fd 2002/01/19 v2.2g AMS font definitions
|
||||
)
|
||||
LaTeX Font Info: Try loading font information for U+msb on input line 55.
|
||||
|
||||
(/usr/share/texmf/tex/latex/amsfonts/umsb.fd
|
||||
File: umsb.fd 2002/01/19 v2.2g AMS font definitions
|
||||
) [1
|
||||
|
||||
|
||||
|
||||
] [2] (./tfm.toc [3
|
||||
|
||||
])
|
||||
\tf@toc=\write4
|
||||
\openout4 = `tfm.toc'.
|
||||
|
||||
[4]
|
||||
(./tfm.lof)
|
||||
\tf@lof=\write5
|
||||
\openout5 = `tfm.lof'.
|
||||
|
||||
[5
|
||||
|
||||
] [6
|
||||
|
||||
]
|
||||
Chapter 1.
|
||||
[1
|
||||
|
||||
] [2] [3] [4]
|
||||
Chapter 2.
|
||||
|
||||
Underfull \vbox (badness 7649) has occurred while \output is active []
|
||||
|
||||
[5
|
||||
|
||||
]
|
||||
[6]
|
||||
Chapter 3.
|
||||
[7
|
||||
|
||||
] [8] [9] [10]
|
||||
Chapter 4.
|
||||
[11
|
||||
|
||||
] [12] [13]
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
||||
[]\OT1/cmtt/m/n/10 #define SQRADDSC(i, j)
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)i) * ((fp_word)j);
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 547--547
|
||||
[] \OT1/cmtt/m/n/10 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (25.129pt too wide) in paragraph at lines 548--549
|
||||
\OT1/cmr/m/n/10 This com-putes a prod-uct and stores it in the ``sec-ondary'' c
|
||||
arry reg-is-ters $[]$.
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
||||
[]\OT1/cmtt/m/n/10 #define SQRADDAC(i, j)
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
||||
[] \OT1/cmtt/m/n/10 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 556--556
|
||||
[] \OT1/cmtt/m/n/10 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t
|
||||
>> DIGIT_BIT; \[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
|
||||
[]\OT1/cmtt/m/n/10 #define SQRADDDB
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (74.99634pt too wide) in paragraph at lines 566--566
|
||||
[] \OT1/cmtt/m/n/10 do { fp_word t;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;
|
||||
\[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
||||
[] \OT1/cmtt/m/n/10 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BI
|
||||
T); c1 = t; \[]
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (190.49533pt too wide) in paragraph at lines 566--566
|
||||
[] \OT1/cmtt/m/n/10 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_B
|
||||
IT); \[]
|
||||
[]
|
||||
|
||||
[14] [15] (./tfm.ind [16] [17
|
||||
|
||||
|
||||
]) (./tfm.aux) )
|
||||
Here is how much of TeX's memory you used:
|
||||
2712 strings out of 49501
|
||||
35892 string characters out of 426789
|
||||
81342 words of memory out of 1100000
|
||||
5856 multiletter control sequences out of 10000+15000
|
||||
15453 words of font info for 59 fonts, out of 400000 for 2000
|
||||
580 hyphenation exceptions out of 1000
|
||||
25i,9n,25p,195b,321s stack positions out of 1500i,500n,1500p,200000b,5000s
|
||||
|
||||
Output written on tfm.dvi (23 pages, 49708 bytes).
|
46
tfm.tex
46
tfm.tex
@ -49,7 +49,7 @@
|
||||
\begin{document}
|
||||
\frontmatter
|
||||
\pagestyle{empty}
|
||||
\title{TomsFastMath User Manual \\ v0.03}
|
||||
\title{TomsFastMath User Manual \\ v0.04}
|
||||
\author{Tom St Denis \\ tomstdenis@iahu.ca}
|
||||
\maketitle
|
||||
This text and library are all hereby placed in the public domain. This book has been formatted for B5
|
||||
@ -143,6 +143,10 @@ TFM\_X86 and TFM\_SSE2 at the same time. This mode only works with 32--bit dig
|
||||
mode fp\_digit is 32--bits and fp\_word is 64--bits. While this mode will work on the AMD Athlon64
|
||||
series of processors it is less efficient than the native ``x86--64'' mode and not recommended.
|
||||
|
||||
There is an additional ``TFM\_PRESCOTT'' flag that you can define for P4 Prescott processors. This causes
|
||||
the mul/sqr functions to use x86\_32 and the montgomery reduction to use SSE2 which is (so far) the fastest
|
||||
combination. If you are using an older (e.g. Northwood) generation P4 don't define this.
|
||||
|
||||
\subsubsection{x86--64} The ``x86--64'' mode is defined by ``TFM\_X86\_64'' and requires a
|
||||
``x86--64'' capable processor (Athlon64 and future Pentium processors). It requires GCC to
|
||||
build and only works with 64--bit digits. Note that by enabling this mode it will automatically
|
||||
@ -150,12 +154,16 @@ enable 64--bit digits. In this mode fp\_digit is 64--bits and fp\_word is 128--
|
||||
be autodetected when building with GCC to an ``x86--64'' target. You can override this behaviour by defining
|
||||
TFM\_NO\_ASM.
|
||||
|
||||
\subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 or higher
|
||||
processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and
|
||||
\subsubsection{ARM} The ``ARM'' mode is defined by ``TFM\_ARM'' and requires a ARMv4 with the M instructions (enhanced
|
||||
multipliers) or higher processor. It requires GCC and works with 32--bit digits. In this mode fp\_digit is 32--bits and
|
||||
fp\_word is 64--bits.
|
||||
|
||||
\subsubsection{PPC32} The ``PPC32'' mode is defined by ``TFM\_PPC32'' and requires a standard PPC processor. It doesn't
|
||||
use altivec or other extensions so it should work on all compliant implementations of PPC. It requires GCC and works
|
||||
with 32--bit digits. In this mode fp\_digit is 32--bits and fp\_word is 64--bits.
|
||||
|
||||
\subsubsection{Future Releases} Future releases will support additional platform optimizations.
|
||||
Developers of MIPS and PPC platforms are encouraged to submit GCC asm inline patches
|
||||
Developers of MIPS and SPARC platforms are encouraged to submit GCC asm inline patches
|
||||
(see chapter \ref{chap:asmops} for more information).
|
||||
|
||||
\begin{figure}[here]
|
||||
@ -165,8 +173,10 @@ Developers of MIPS and PPC platforms are encouraged to submit GCC asm inline pat
|
||||
\hline \textbf{Processor} & \textbf{Recommended Mode} \\
|
||||
\hline All 32--bit x86 platforms & TFM\_X86 \\
|
||||
\hline Pentium 4 & TFM\_SSE2 \\
|
||||
\hline Pentium 4 Prescott & TFM\_SSE2 + TFM\_PRESCOTT \\
|
||||
\hline Athlon64 & TFM\_X86\_64 \\
|
||||
\hline ARMv4 or higher & TFM\_ARM \\
|
||||
\hline ARMv4 or higher with M & TFM\_ARM \\
|
||||
\hline G3/G4 (32-bit PPC) & TFM\_PPC32 \\
|
||||
\hline &\\
|
||||
\hline x86--32 or x86--64 (with GCC) & Leave blank and let autodetect work \\
|
||||
\hline
|
||||
@ -590,25 +600,25 @@ a register if you want.
|
||||
|
||||
\begin{verbatim}
|
||||
#define INNERMUL \
|
||||
t = ((fp_word)mu) * ((fp_word)*tmpm++); \
|
||||
_c[OFF0] += t; \
|
||||
if (_c[OFF0] < (fp_digit)t) ++_c[OFF1]; \
|
||||
_c[OFF1] += (t>>DIGIT_BIT); \
|
||||
if (_c[OFF1] < (fp_digit)(t>>DIGIT_BIT)) ++_c[OFF2];
|
||||
do { fp_word t; \
|
||||
_c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
|
||||
(((fp_word)mu) * ((fp_word)*tmpm++)); \
|
||||
cy = (t >> DIGIT_BIT); \
|
||||
} while (0)
|
||||
\end{verbatim}
|
||||
|
||||
This computes the inner product and adds it to the correct set of carry variables. The variable
|
||||
$\_c$ is a pointer alias to $c[x+y]$ and used to simplify the code.
|
||||
This computes the inner product and adds it to the destination and carry variable $cy$.
|
||||
This uses the $mu$ value computed above (can be in a register already) and the
|
||||
$cy$ which is a chaining carry. Inside the INNERMUL loop the $cy$ value can be kept
|
||||
inside a register (hint: it always starts as $cy = 0$ in the first iteration).
|
||||
|
||||
You can safely alias $\_c$ to a register for INNERMUL by setting it equal to ``c + x''
|
||||
\footnote{Where ``c'' is an array on the stack.} by modifying LOOP\_START.
|
||||
Upon completion of the inner loop the macro LOOP\_END is called which is used to fetch
|
||||
$cy$ into the variable the C program can see. This is where, if you cached $cy$ in a
|
||||
register you would copy it to the locally accessible C variable.
|
||||
|
||||
\begin{verbatim}
|
||||
#define PROPCARRY \
|
||||
_c[OFF0+1] += _c[OFF1]; \
|
||||
if (_c[OFF0+1] < _c[OFF1]) ++_c[OFF1+1]; \
|
||||
_c[OFF1+1] += _c[OFF2]; \
|
||||
if (_c[OFF1+1] < _c[OFF2]) ++_c[OFF2+1];
|
||||
do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
|
||||
\end{verbatim}
|
||||
|
||||
This propagates the carry upwards by one digit.
|
||||
|
33
tfm.toc
Normal file
33
tfm.toc
Normal file
@ -0,0 +1,33 @@
|
||||
\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}
|
||||
\contentsline {section}{\numberline {1.1}What is TomsFastMath?}{1}{section.1.1}
|
||||
\contentsline {section}{\numberline {1.2}License}{2}{section.1.2}
|
||||
\contentsline {section}{\numberline {1.3}Building}{2}{section.1.3}
|
||||
\contentsline {subsection}{\numberline {1.3.1}Build Limitations}{2}{subsection.1.3.1}
|
||||
\contentsline {subsection}{\numberline {1.3.2}Optimization Configuration}{2}{subsection.1.3.2}
|
||||
\contentsline {subsubsection}{x86--32}{3}{section*.3}
|
||||
\contentsline {subsubsection}{SSE2}{3}{section*.4}
|
||||
\contentsline {subsubsection}{x86--64}{3}{section*.5}
|
||||
\contentsline {subsubsection}{ARM}{3}{section*.6}
|
||||
\contentsline {subsubsection}{PPC32}{3}{section*.7}
|
||||
\contentsline {subsubsection}{Future Releases}{4}{section*.8}
|
||||
\contentsline {subsection}{\numberline {1.3.3}Precision Configuration}{4}{subsection.1.3.3}
|
||||
\contentsline {chapter}{\numberline {2}Getting Started}{5}{chapter.2}
|
||||
\contentsline {section}{\numberline {2.1}Data Types}{5}{section.2.1}
|
||||
\contentsline {section}{\numberline {2.2}Initialization}{6}{section.2.2}
|
||||
\contentsline {subsection}{\numberline {2.2.1}Simple Initialization}{6}{subsection.2.2.1}
|
||||
\contentsline {subsection}{\numberline {2.2.2}Initialize Small Constants}{6}{subsection.2.2.2}
|
||||
\contentsline {subsection}{\numberline {2.2.3}Initialize Copy}{6}{subsection.2.2.3}
|
||||
\contentsline {chapter}{\numberline {3}Arithmetic Operations}{7}{chapter.3}
|
||||
\contentsline {section}{\numberline {3.1}Odds and Evens}{7}{section.3.1}
|
||||
\contentsline {section}{\numberline {3.2}Sign Manipulation}{7}{section.3.2}
|
||||
\contentsline {section}{\numberline {3.3}Comparisons}{8}{section.3.3}
|
||||
\contentsline {section}{\numberline {3.4}Shifting}{8}{section.3.4}
|
||||
\contentsline {section}{\numberline {3.5}Basic Algebra}{9}{section.3.5}
|
||||
\contentsline {section}{\numberline {3.6}Modular Exponentiation}{9}{section.3.6}
|
||||
\contentsline {section}{\numberline {3.7}Number Theoretic}{9}{section.3.7}
|
||||
\contentsline {section}{\numberline {3.8}Prime Numbers}{10}{section.3.8}
|
||||
\contentsline {chapter}{\numberline {4}Porting TomsFastMath}{11}{chapter.4}
|
||||
\contentsline {section}{\numberline {4.1}Getting Started}{11}{section.4.1}
|
||||
\contentsline {section}{\numberline {4.2}Multiply with Comba}{11}{section.4.2}
|
||||
\contentsline {section}{\numberline {4.3}Squaring with Comba}{13}{section.4.3}
|
||||
\contentsline {section}{\numberline {4.4}Montgomery with Comba}{15}{section.4.4}
|
Loading…
Reference in New Issue
Block a user