3
0

Merge branch 'release/0.13.0'

This commit is contained in:
Steffen Jaeckel 2015-10-24 14:56:46 +02:00
commit 5c395e04e6
124 changed files with 16680 additions and 2783 deletions

27
.gitignore vendored Normal file
View File

@ -0,0 +1,27 @@
*.[ao]
*.aux
*.dvi
*.idx
*.ilg
*.ind
*.lof
*.log
*.toc
*.out
*.l[ao]
*.orig
.project
.cproject
/.libs
test_*.txt
test
test.exe
mtest
mtest.exe
stest
stest.exe
rsatest
rsatest.exe
timing
timing.exe

32
.travis.yml Normal file
View File

@ -0,0 +1,32 @@
language: c
compiler:
- gcc
script: CC="${MYCC}" make ${SHARED} test_standalone >test_gcc_1.txt 2>test_gcc_2.txt && ./test >test_std.txt 2>test_err.txt
env:
- MYCC="gcc" SHARED=""
- MYCC="gcc -m32" SHARED=""
- MYCC="gcc-4.8" SHARED=""
- MYCC="gcc-4.8 -m32" SHARED=""
- MYCC="gcc-4.9" SHARED=""
- MYCC="gcc-4.9 -m32" SHARED=""
- MYCC="gcc" SHARED="-f makefile.shared"
- MYCC="gcc -m32" SHARED="-f makefile.shared"
- MYCC="gcc-4.8" SHARED="-f makefile.shared"
- MYCC="gcc-4.8 -m32" SHARED="-f makefile.shared"
- MYCC="gcc-4.9" SHARED="-f makefile.shared"
- MYCC="gcc-4.9 -m32" SHARED="-f makefile.shared"
matrix:
fast_finish: true
before_script:
- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get -qq update
- sudo apt-get install gcc-4.9-multilib gcc-4.8-multilib gcc-multilib build-essential
after_failure:
- cat test_gcc_1.txt
- cat test_std.txt
- cat test_err.txt
after_script:
- cat test_gcc_2.txt
notifications:
irc: "chat.freenode.net#libtom"

35
LICENSE
View File

@ -1,7 +1,36 @@
TomsFastMath is public domain.
TomsFastMath is licensed under DUAL licensing terms.
Choose and use the license of your needs.
[LICENSE #1]
TomsFastMath is public domain. As should all quality software be.
Tom St Denis
[/LICENSE #1]
[LICENSE #2]
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
[/LICENSE #2]
-- Mark Karpelès & Steffen Jaeckel
Note some ideas were borrowed from LibTomMath and OpenSSL. All of the code is original or ported
from LibTomMath [no code was ported from OpenSSL]. As such the origins and status of this code
are both public domain.
from LibTomMath [no code was ported from OpenSSL].
-- Tom St Denis

11
README.md Normal file
View File

@ -0,0 +1,11 @@
tomsfastmath
============
See doc/tfm.pdf for a detailed documentation
Project Status
==============
master: [![Build Status](https://travis-ci.org/libtom/tomsfastmath.svg?branch=master)](https://travis-ci.org/libtom/tomsfastmath)

View File

@ -1,3 +1,15 @@
October 24th, 2015
v0.13.0
-- Add fp_rand()
-- Fix bug in fp_sub() reported by Martins Mozeiko
-- Fix bugs/apply patches in fp_mul() and fp_sqr() reported by rasky
-- Fix bugs in fp_read_radix()
-- Fix build issues for Linux x32 ABI
-- Sebastian Siewior provided fp_toradix_n(),
reported multiple issues on behalf of ClamAV
and did most of the testing work to be able to push this release out.
-- Fix a load of compiler warnings.
March 14th, 2007
0.12 -- Christophe Devine contributed MIPS asm w00t
++ quick release to get the MIPS code out there

View File

@ -8,8 +8,32 @@
#ifndef DISPLAY
#define DISPLAY(x) printf(x)
#define DISPLAY_P(...) printf(__VA_ARGS__)
#else
#define DISPLAY_P(...) (void)0
#define fp_dump(n,p) do{}while(0)
#endif
#ifndef fp_dump
void fp_dump(const char* n, fp_int* p)
{
int sz;
if (fp_radix_size(p, 2, &sz) != FP_OKAY)
return;
char* str = malloc(sz);
if (!str)
return;
#ifdef STEST_VERBOSE
fp_toradix(p, str, 2);
DISPLAY_P("%s = 0b%s\n", n, str);
fp_toradix(p, str, 16);
DISPLAY_P("%s = 0x%s\n", n, str);
#endif
fp_toradix(p, str, 10);
DISPLAY_P("%s = %s\n", n, str);
free(str);
}
#endif
#ifdef GBA_MODE
int c_main(void)
@ -33,6 +57,8 @@ int main(void)
modetxt_gotoxy(0,0);
#endif
DISPLAY_P("TFM Ident string:\n%s\n\n", fp_ident());
/* test multiplication */
fp_read_radix(&a, "3453534534535345345341230891273", 10);
fp_read_radix(&b, "2394873294871238934718923" , 10);
@ -40,7 +66,7 @@ int main(void)
fp_mul(&a, &b, &d);
if (fp_cmp(&c, &d)) {
DISPLAY("mul failed\n");
return 0;
return -1;
} else {
DISPLAY("mul passed\n");
}
@ -52,7 +78,7 @@ int main(void)
fp_mul(&a, &b, &d);
if (fp_cmp(&c, &d)) {
DISPLAY("mul failed\n");
return 0;
return -1;
} else {
DISPLAY("mul passed\n");
}
@ -64,7 +90,7 @@ int main(void)
fp_mul(&a, &b, &d);
if (fp_cmp(&c, &d)) {
DISPLAY("mul failed\n");
return 0;
return -1;
} else {
DISPLAY("mul passed\n");
}
@ -75,7 +101,7 @@ int main(void)
fp_sqr(&a, &c);
if (fp_cmp(&c, &b)) {
DISPLAY("sqr failed\n");
return 0;
return -1;
} else {
DISPLAY("sqr passed\n");
}
@ -85,7 +111,7 @@ int main(void)
fp_sqr(&a, &c);
if (fp_cmp(&c, &b)) {
DISPLAY("sqr failed\n");
return 0;
return -1;
} else {
DISPLAY("sqr passed\n");
}
@ -95,7 +121,7 @@ int main(void)
fp_sqr(&a, &c);
if (fp_cmp(&c, &b)) {
DISPLAY("sqr failed\n");
return 0;
return -1;
} else {
DISPLAY("sqr passed\n");
}
@ -104,12 +130,19 @@ int main(void)
/* montgomery reductions */
fp_read_radix(&a, "234892374892374893489123428937892781237863278637826327367637836278362783627836783678363", 10);
fp_read_radix(&b, "4447823492749823749234123489273987393983289319382762756425425425642727352327452374521", 10);
#ifdef FP_64BIT
fp_read_radix(&c, "942974496560863503657226741422301598807235487941674147660989764036913926327577165648", 10);
#else
fp_read_radix(&c, "2396271882990732698083317035605836523697277786556053771759862552557086442129695099100", 10);
fp_montgomery_setup(&b, &dp);
#endif
if (fp_montgomery_setup(&b, &dp) != FP_OKAY)
DISPLAY("mont setup failed\n");
fp_montgomery_reduce(&a, &b, dp);
if (fp_cmp(&a, &c)) {
DISPLAY("mont failed\n");
return 0;
fp_dump("a (is )", &a);
fp_dump("c (should)", &c);
return -1;
} else {
DISPLAY("mont passed\n");
}
@ -117,11 +150,14 @@ int main(void)
fp_read_radix(&a, "2348923748923748934891234456645654645645684576353428937892781237863278637826327367637836278362783627836783678363", 10);
fp_read_radix(&b, "444782349274982374923412348927398739398328931938276275642542542564272735232745237452123424324324444121111119", 10);
fp_read_radix(&c, "45642613844554582908652603086180267403823312390990082328515008314514368668691233331246183943400359349283420", 10);
fp_montgomery_setup(&b, &dp);
if (fp_montgomery_setup(&b, &dp) != FP_OKAY)
DISPLAY("mont setup failed\n");
fp_montgomery_reduce(&a, &b, dp);
if (fp_cmp(&a, &c)) {
DISPLAY("mont failed\n");
return 0;
fp_dump("a (is )", &a);
fp_dump("c (should)", &c);
return -1;
} else {
DISPLAY("mont passed\n");
}
@ -129,11 +165,14 @@ int main(void)
fp_read_radix(&a, "234823424242342923748923748934891234456645654645645684576353424972378234762378623891236834132352375235378462378489378927812378632786378263273676378362783627555555555539568389052478124618461834763837685723645827529034853490580134568947341278498542893481762349723907847892983627836783678363", 10);
fp_read_radix(&b, "44478234927456563455982374923412348927398739398328931938276275642485623481638279025465891276312903262837562349056234783648712314678120389173890128905425242424239784256427", 10);
fp_read_radix(&c, "33160865265453361650564031464519042126185632333462754084489985719613480783282357410514898819797738034600484519472656152351777186694609218202276509271061460265488348645081", 10);
fp_montgomery_setup(&b, &dp);
if (fp_montgomery_setup(&b, &dp) != FP_OKAY)
DISPLAY("mont setup failed\n");
fp_montgomery_reduce(&a, &b, dp);
if (fp_cmp(&a, &c)) {
DISPLAY("mont failed\n");
return 0;
fp_dump("a (is )", &a);
fp_dump("c (should)", &c);
return -1;
} else {
DISPLAY("mont passed\n");
}

View File

@ -1,12 +1,23 @@
/* TFM demo program */
#include <tfm.h>
#include <time.h>
#include <unistd.h>
#ifndef TFM_DEMO_TEST_VS_MTEST
#define TFM_DEMO_TEST_VS_MTEST 1
#endif
void draw(fp_int *a)
{
int x;
printf("%d, %d, ", a->used, a->sign);
for (x = a->used - 1; x >= 0; x--) {
#if SIZEOF_FP_DIGIT == 4
printf("%08lx ", a->dp[x]);
#else
printf("%016llx ", a->dp[x]);
#endif
}
printf("\n");
}
@ -14,71 +25,33 @@ void draw(fp_int *a)
int myrng(unsigned char *dst, int len, void *dat)
{
int x;
(void)dat;
for (x = 0; x < len; x++) dst[x] = rand() & 0xFF;
return len;
}
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC (void)
{
#if defined __GNUC__
#if defined(INTEL_CC)
ulong64 a;
asm ("rdtsc":"=A"(a));
return a;
#elif defined(__i386__) || defined(__x86_64__)
ulong64 a;
__asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
return a;
#elif defined(TFM_PPC32)
unsigned long a, b;
__asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
return (((ulong64)b) << 32ULL) | ((ulong64)a);
#elif defined(TFM_AVR32)
FILE *in;
char buf[20];
in = fopen("/sys/devices/system/cpu/cpu0/pccycles", "r");
fgets(buf, 20, in);
fclose(in);
return strtoul(buf, NULL, 10);
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
char cmd[4096], buf[4096];
int main(void)
{
fp_int a,b,c,d,e,f;
unsigned long ix;
#if TFM_DEMO_TEST_VS_MTEST
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n,
div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, cnt, rr;
#else
fp_digit fp;
int n, err;
unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n,
div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, t, cnt, rr, ix;
ulong64 t1, t2;
#endif
srand(time(NULL));
printf("TFM Ident string:\n%s\n\n", fp_ident());
fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f);
fp_zero(&a); draw(&a);
fp_zero(&a);
#if TFM_DEMO_TEST_VS_MTEST == 0
draw(&a);
/* test set and simple shifts */
printf("Testing mul/div 2\n");
@ -134,6 +107,10 @@ int main(void)
printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
fp_read_radix(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", 16); draw(&a);
fp_sub_d(&a, 3, &b); draw(&b);
fp_read_radix(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", 16);
printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
/* test mul_d */
printf("Testing mul_d and div_d\n");
@ -150,7 +127,6 @@ int main(void)
printf("Testing read_radix\n");
fp_read_radix(&a, "123456789012345678901234567890", 16); draw(&a);
#if 0
/* test mont */
printf("Montgomery test #1\n");
fp_set(&a, 0x1234567ULL);
@ -208,421 +184,10 @@ int main(void)
}
}
printf("\n\n");
#endif
#ifdef TESTING
goto testing;
#endif
#if 1
t1 = TIMFUNC();
sleep(1);
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
goto multtime;
/* do some timings... */
printf("Addition:\n");
for (t = 2; t <= FP_SIZE/2; t += 2) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
b.dp[ix] = ix;
}
a.used = t;
b.used = t;
t2 = -1;
for (ix = 0; ix < 25000; ++ix) {
t1 = TIMFUNC();
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
fp_add(&a, &b, &c); fp_add(&a, &b, &c);
t2 = (TIMFUNC() - t1)>>3;
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
multtime:
printf("Multiplication:\n");
for (t = 2; t < FP_SIZE/2; t += 2) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
b.dp[ix] = ix;
}
a.used = t;
b.used = t;
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
t2 = (TIMFUNC() - t1)>>7;
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
//#else
sqrtime:
printf("Squaring:\n");
for (t = 2; t < FP_SIZE/2; t += 2) {
fp_zero(&a);
fp_zero(&b);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
}
a.used = t;
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
fp_sqr(&a, &b); fp_sqr(&a, &b);
t2 = (TIMFUNC() - t1)>>7;
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
invmodtime:
printf("Invmod:\n");
for (t = 2; t < FP_SIZE/2; t += 2) {
fp_zero(&a);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1;
}
a.used = t;
fp_zero(&b);
for (ix = 0; ix < t; ix++) {
b.dp[ix] = rand();
}
b.used = t;
fp_clamp(&b);
fp_zero(&c);
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
t2 = (TIMFUNC() - t1)>>6;
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
//#else
monttime:
printf("Montgomery:\n");
for (t = 2; t <= (FP_SIZE/2)-4; t += 2) {
// printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
fp_zero(&a);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1;
}
a.used = t;
fp_montgomery_setup(&a, &fp);
fp_sub_d(&a, 3, &b);
fp_sqr(&b, &b);
fp_copy(&b, &c);
fp_copy(&b, &d);
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
fp_montgomery_reduce(&c, &a, &fp);
fp_montgomery_reduce(&d, &a, &fp);
t2 = (TIMFUNC() - t1)>>6;
fp_copy(&b, &c);
fp_copy(&b, &d);
if (t1<t2) { --ix; t2 = t1; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
//#else
expttime:
printf("Exptmod:\n");
for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += 256/DIGIT_BIT) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix+1;
b.dp[ix] = (fp_digit)rand() * (fp_digit)rand();
c.dp[ix] = ix;
}
a.used = t;
b.used = t;
c.used = t;
t2 = -1;
for (ix = 0; ix < 500; ++ix) {
t1 = TIMFUNC();
fp_exptmod(&c, &b, &a, &d);
fp_exptmod(&c, &b, &a, &d);
t2 = (TIMFUNC() - t1)>>1;
fp_copy(&b, &c);
fp_copy(&b, &d);
if (t1<t2) { t2 = t1; --ix; }
}
printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
}
return 0;
#endif
return 0;
testing:
#else
fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a);
@ -643,7 +208,7 @@ testing:
fp_mul_2d(&a, rr, &a);
a.sign = b.sign;
if (fp_cmp(&a, &b) != FP_EQ) {
printf("mul2d failed, rr == %lu\n",rr);
printf("\nmul2d failed, rr == %lu\n",rr);
draw(&a);
draw(&b);
return 0;
@ -657,7 +222,7 @@ testing:
a.sign = b.sign;
if (a.used == b.used && a.used == 0) { a.sign = b.sign = FP_ZPOS; }
if (fp_cmp(&a, &b) != FP_EQ) {
printf("div2d failed, rr == %lu\n",rr);
printf("\ndiv2d failed, rr == %lu\n",rr);
draw(&a);
draw(&b);
return 0;
@ -669,7 +234,7 @@ testing:
fp_copy(&a, &d);
fp_add(&d, &b, &d);
if (fp_cmp(&c, &d) != FP_EQ) {
printf("add %lu failure!\n", add_n);
printf("\nadd %lu failure!\n", add_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
@ -681,7 +246,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
fp_read_signed_bin(&d, (unsigned char *)cmd, rr);
if (fp_cmp(&c, &d) != FP_EQ) {
printf("fp_signed_bin failure!\n");
printf("f\np_signed_bin failure!\n");
draw(&c);
draw(&d);
return 0;
@ -692,7 +257,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
fp_read_unsigned_bin(&d, (unsigned char *)cmd, rr);
if (fp_cmp_mag(&c, &d) != FP_EQ) {
printf("fp_unsigned_bin failure!\n");
printf("\nfp_unsigned_bin failure!\n");
draw(&c);
draw(&d);
return 0;
@ -705,98 +270,98 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fp_copy(&a, &d);
fp_sub(&d, &b, &d);
if (fp_cmp(&c, &d) != FP_EQ) {
printf("sub %lu failure!\n", sub_n);
printf("\nsub %lu failure!\n", sub_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "mul")) {
} else if (!strcmp(cmd, "mul")) { ++mul_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
//continue;
fp_copy(&a, &d);
fp_mul(&d, &b, &d); ++mul_n;
fp_mul(&d, &b, &d);
if (fp_cmp(&c, &d) != FP_EQ) {
printf("mul %lu failure!\n", mul_n);
printf("\nmul %lu failure!\n", mul_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "div")) {
} else if (!strcmp(cmd, "div")) { ++div_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&d, buf, 64);
// continue;
fp_div(&a, &b, &e, &f); ++div_n;
fp_div(&a, &b, &e, &f);
if (fp_cmp(&c, &e) != FP_EQ || fp_cmp(&d, &f) != FP_EQ) {
printf("div %lu failure!\n", div_n);
printf("\ndiv %lu failure!\n", div_n);
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e); draw(&f);
return 0;
}
} else if (!strcmp(cmd, "sqr")) {
} else if (!strcmp(cmd, "sqr")) { ++sqr_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
// continue;
fp_copy(&a, &c);
fp_sqr(&c, &c); ++sqr_n;
fp_sqr(&c, &c);
if (fp_cmp(&b, &c) != FP_EQ) {
printf("sqr %lu failure!\n", sqr_n);
printf("\nsqr %lu failure!\n", sqr_n);
draw(&a);draw(&b);draw(&c);
return 0;
}
} else if (!strcmp(cmd, "gcd")) {
} else if (!strcmp(cmd, "gcd")) { ++gcd_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
// continue;
fp_copy(&a, &d);
fp_gcd(&d, &b, &d); ++gcd_n;
fp_gcd(&d, &b, &d);
d.sign = c.sign;
if (fp_cmp(&c, &d) != FP_EQ) {
printf("gcd %lu failure!\n", gcd_n);
printf("\ngcd %lu failure!\n", gcd_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "lcm")) {
} else if (!strcmp(cmd, "lcm")) { ++lcm_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
//continue;
fp_copy(&a, &d);
fp_lcm(&d, &b, &d); ++lcm_n;
fp_lcm(&d, &b, &d);
d.sign = c.sign;
if (fp_cmp(&c, &d) != FP_EQ) {
printf("lcm %lu failure!\n", lcm_n);
printf("\nlcm %lu failure!\n", lcm_n);
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
} else if (!strcmp(cmd, "expt")) {
} else if (!strcmp(cmd, "expt")) { ++expt_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&d, buf, 64);
// continue;
fp_copy(&a, &e);
fp_exptmod(&e, &b, &c, &e); ++expt_n;
fp_exptmod(&e, &b, &c, &e);
if (fp_cmp(&d, &e) != FP_EQ) {
printf("expt %lu failure!\n", expt_n);
printf("\nexpt %lu failure!\n", expt_n);
draw(&a);draw(&b);draw(&c);draw(&d); draw(&e);
return 0;
}
} else if (!strcmp(cmd, "invmod")) {
} else if (!strcmp(cmd, "invmod")) { ++inv_n;
fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
//continue;
fp_invmod(&a, &b, &d);
#if 1
fp_mulmod(&d,&a,&b,&e); ++inv_n;
fp_mulmod(&d,&a,&b,&e);
if (fp_cmp_d(&e, 1) != FP_EQ) {
#else
if (fp_cmp(&d, &c) != FP_EQ) {
#endif
printf("inv [wrong value from MPI?!] failure\n");
printf("\ninv [wrong value from MPI?!] failure\n");
draw(&a);draw(&b);draw(&c);draw(&d);
return 0;
}
@ -806,7 +371,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fp_div_2(&a, &c);
if (fp_cmp(&c, &b) != FP_EQ) {
printf("div_2 %lu failure\n", div2_n);
printf("\ndiv_2 %lu failure\n", div2_n);
draw(&a);
draw(&b);
draw(&c);
@ -817,7 +382,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fp_mul_2(&a, &c);
if (fp_cmp(&c, &b) != FP_EQ) {
printf("mul_2 %lu failure\n", mul2_n);
printf("\nmul_2 %lu failure\n", mul2_n);
draw(&a);
draw(&b);
draw(&c);
@ -829,7 +394,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fp_add_d(&a, ix, &c);
if (fp_cmp(&b, &c) != FP_EQ) {
printf("add_d %lu failure\n", add_d_n);
printf("\nadd_d %lu failure\n", add_d_n);
draw(&a);
draw(&b);
draw(&c);
@ -842,7 +407,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fp_sub_d(&a, ix, &c);
if (fp_cmp(&b, &c) != FP_EQ) {
printf("sub_d %lu failure\n", sub_d_n);
printf("\nsub_d %lu failure\n", sub_d_n);
draw(&a);
draw(&b);
draw(&c);
@ -855,7 +420,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
fp_mul_d(&a, ix, &c);
if (fp_cmp(&b, &c) != FP_EQ) {
printf("mul_d %lu failure\n", sub_d_n);
printf("\nmul_d %lu failure\n", mul_d_n);
draw(&a);
draw(&b);
draw(&c);
@ -865,6 +430,7 @@ draw(&a);draw(&b);draw(&c);draw(&d);
}
}
#endif
}

625
demo/timing.c Normal file
View File

@ -0,0 +1,625 @@
/* TFM timing analysis */
#include <tfm.h>
#include <time.h>
#include <unistd.h>
/* RDTSC from Scott Duplichan */
static ulong64 TIMFUNC(void)
{
#if defined __GNUC__
#if defined(INTEL_CC)
ulong64 a;
asm ("rdtsc":"=A"(a));
return a;
#elif defined(__i386__) || defined(__x86_64__)
/* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html
* the old code always got a warning issued by gcc, clang did not complain...
*/
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((ulong64)lo)|( ((ulong64)hi)<<32);
#elif defined(TFM_PPC32)
unsigned long a, b;
__asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
return (((ulong64)b) << 32ULL) | ((ulong64)a);
#elif defined(TFM_AVR32)
FILE *in;
char buf[20];
in = fopen("/sys/devices/system/cpu/cpu0/pccycles", "r");
fgets(buf, 20, in);
fclose(in);
return strtoul(buf, NULL, 10);
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
while (__builtin_expect ((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
return result;
#endif
// Microsoft and Intel Windows compilers
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc ();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#endif
return __getReg (3116);
#else
#error need rdtsc function for this build
#endif
}
static ulong64 ticks;
static const char* p_str;
static void print_start(const char* s)
{
p_str = s;
}
static void print_line(ulong64 b, ulong64 t)
{
printf("%llu;%s;%llu;%llu\n", ticks, p_str, b, t);
}
int main(void)
{
fp_int a,b,c,d;
ulong64 t1, t2;
fp_digit fp;
unsigned long t, ix;
t1 = TIMFUNC();
sleep(1);
ticks = TIMFUNC() - t1;
fprintf(stderr, "Ticks per second: %llu\n", ticks);
printf("Ticks/sec;Algorithm;bits;time\n");
/* do some timings... */
print_start("Addition");
for (t = 2; t <= FP_SIZE / 2; t += 2) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
b.dp[ix] = ix;
}
a.used = t;
b.used = t;
t2 = -1;
for (ix = 0; ix < 25000; ++ix) {
t1 = TIMFUNC();
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
fp_add(&a, &b, &c);
t2 = (TIMFUNC() - t1) >> 3;
if (t1 < t2) {
--ix;
t2 = t1;
}
}
print_line(t * DIGIT_BIT, t2);
}
print_start("Multiplication");
for (t = 2; t < FP_SIZE / 2; t += 2) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
b.dp[ix] = ix;
}
a.used = t;
b.used = t;
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
fp_mul(&a, &b, &c);
t2 = (TIMFUNC() - t1) >> 7;
if (t1 < t2) {
--ix;
t2 = t1;
}
}
print_line(t * DIGIT_BIT, t2);
}
print_start("Squaring");
for (t = 2; t < FP_SIZE / 2; t += 2) {
fp_zero(&a);
fp_zero(&b);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix;
}
a.used = t;
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
fp_sqr(&a, &b);
t2 = (TIMFUNC() - t1) >> 7;
if (t1 < t2) {
--ix;
t2 = t1;
}
}
print_line(t * DIGIT_BIT, t2);
}
print_start("Invmod");
for (t = 2; t < FP_SIZE / 2; t += 2) {
fp_zero(&a);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1;
}
a.used = t;
fp_zero(&b);
for (ix = 0; ix < t; ix++) {
b.dp[ix] = rand();
}
b.used = t;
fp_clamp(&b);
fp_zero(&c);
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
fp_invmod(&b, &a, &c);
t2 = (TIMFUNC() - t1) >> 6;
if (t1 < t2) {
--ix;
t2 = t1;
}
}
print_line(t * DIGIT_BIT, t2);
}
print_start("Montgomery");
for (t = 2; t <= (FP_SIZE / 2) - 4; t += 2) {
// printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
fp_zero(&a);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix | 1;
}
a.used = t;
fp_montgomery_setup(&a, &fp);
fp_sub_d(&a, 3, &b);
fp_sqr(&b, &b);
fp_copy(&b, &c);
fp_copy(&b, &d);
t2 = -1;
for (ix = 0; ix < 100; ++ix) {
t1 = TIMFUNC();
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
fp_montgomery_reduce(&c, &a, fp);
fp_montgomery_reduce(&d, &a, fp);
t2 = (TIMFUNC() - t1) >> 6;
fp_copy(&b, &c);
fp_copy(&b, &d);
if (t1 < t2) {
--ix;
t2 = t1;
}
}
print_line(t * DIGIT_BIT, t2);
}
print_start("Exptmod");
for (t = 512 / DIGIT_BIT; t <= (FP_SIZE / 2) - 2; t += 256 / DIGIT_BIT) {
fp_zero(&a);
fp_zero(&b);
fp_zero(&c);
for (ix = 0; ix < t; ix++) {
a.dp[ix] = ix + 1;
b.dp[ix] = (fp_digit) rand() * (fp_digit) rand();
c.dp[ix] = ix;
}
a.used = t;
b.used = t;
c.used = t;
t2 = -1;
for (ix = 0; ix < 500; ++ix) {
t1 = TIMFUNC();
fp_exptmod(&c, &b, &a, &d);
fp_exptmod(&c, &b, &a, &d);
t2 = (TIMFUNC() - t1) >> 1;
fp_copy(&b, &c);
fp_copy(&b, &d);
if (t1 < t2) {
t2 = t1;
--ix;
}
}
print_line(t * DIGIT_BIT, t2);
}
return 0;
}

Binary file not shown.

2
gen.pl
View File

@ -6,7 +6,7 @@
use strict;
open( OUT, ">mpi.c" ) or die "Couldn't open mpi.c for writing: $!";
foreach my $filename (glob "*fp_*.c") {
foreach my $filename (glob "src/*/*fp_*.c") {
next if ($filename eq "fp_sqr_comba_generic.c");
open( SRC, "<$filename" ) or die "Couldn't open $filename for reading: $!";
print OUT "/* Start: $filename */\n";

49
libtfm.symbols Normal file
View File

@ -0,0 +1,49 @@
fp_2expt
fp_add
fp_add_d
fp_addmod
fp_cmp
fp_cmp_d
fp_cmp_mag
fp_cnt_lsb
fp_count_bits
fp_div
fp_div_2
fp_div_2d
fp_div_d
fp_exptmod
fp_gcd
fp_ident
fp_invmod
fp_isprime
fp_lcm
fp_lshd
fp_mod
fp_mod_2d
fp_mod_d
fp_montgomery_calc_normalization
fp_montgomery_reduce
fp_montgomery_setup
fp_mul
fp_mul_2
fp_mul_2d
fp_mul_d
fp_mulmod
fp_prime_random_ex
fp_radix_size
fp_read_radix
fp_read_signed_bin
fp_read_unsigned_bin
fp_rshd
fp_set
fp_signed_bin_size
fp_sqr
fp_sqrmod
fp_sub
fp_sub_d
fp_submod
fp_to_signed_bin
fp_to_unsigned_bin
fp_toradix
fp_toradix_n
fp_unsigned_bin_size

158
makefile
View File

@ -1,10 +1,22 @@
#makefile for TomsFastMath
#
#
VERSION=0.12
VERSION=0.13
CFLAGS += -Wall -W -Wshadow -Isrc/headers
# Compiler and Linker Names
ifndef PREFIX
PREFIX=
endif
ifeq ($(CC),cc)
CC = $(PREFIX)gcc
endif
LD=$(PREFIX)ld
AR=$(PREFIX)ar
RANLIB=$(PREFIX)ranlib
ifndef MAKE
MAKE=make
endif
@ -27,27 +39,29 @@ OBJECTS=src/addsub/fp_add.o src/addsub/fp_add_d.o src/addsub/fp_addmod.o src/add
src/addsub/fp_cmp_d.o src/addsub/fp_cmp_mag.o src/addsub/fp_sub.o src/addsub/fp_sub_d.o \
src/addsub/fp_submod.o src/addsub/s_fp_add.o src/addsub/s_fp_sub.o src/bin/fp_radix_size.o \
src/bin/fp_read_radix.o src/bin/fp_read_signed_bin.o src/bin/fp_read_unsigned_bin.o \
src/bin/fp_reverse.o src/bin/fp_s_rmap.o src/bin/fp_signed_bin_size.o src/bin/fp_to_signed_bin.o \
src/bin/fp_to_unsigned_bin.o src/bin/fp_toradix.o src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o \
src/bit/fp_count_bits.o src/bit/fp_div_2.o src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o \
src/bit/fp_rshd.o src/divide/fp_div.o src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o \
src/exptmod/fp_2expt.o src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_set.o \
src/bin/fp_reverse.o src/bin/fp_signed_bin_size.o src/bin/fp_s_rmap.o src/bin/fp_toradix.o \
src/bin/fp_toradix_n.o src/bin/fp_to_signed_bin.o src/bin/fp_to_unsigned_bin.o \
src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o src/bit/fp_count_bits.o src/bit/fp_div_2.o \
src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o src/bit/fp_rshd.o src/divide/fp_div.o \
src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o src/exptmod/fp_2expt.o \
src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_rand.o src/misc/fp_set.o \
src/mont/fp_montgomery_calc_normalization.o src/mont/fp_montgomery_reduce.o \
src/mont/fp_montgomery_setup.o src/mul/fp_mul.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o \
src/mul/fp_mul_comba.o src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o \
src/mul/fp_mul_comba_24.o src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_32.o \
src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_48.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_64.o \
src/mul/fp_mul_comba_7.o src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o \
src/mont/fp_montgomery_setup.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o src/mul/fp_mul.o \
src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o src/mul/fp_mul_comba_24.o \
src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_32.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_48.o \
src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_64.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_7.o \
src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o src/mul/fp_mul_comba.o \
src/mul/fp_mul_comba_small_set.o src/mul/fp_mul_d.o src/mul/fp_mulmod.o src/numtheory/fp_gcd.o \
src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_lcm.o \
src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o src/sqr/fp_sqr.o \
src/sqr/fp_sqr_comba.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_3.o src/sqr/fp_sqr_comba_32.o \
src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_6.o src/sqr/fp_sqr_comba_64.o \
src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o \
src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_isprime_ex.o \
src/numtheory/fp_lcm.o src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o \
src/sqr/fp_sqr.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_32.o src/sqr/fp_sqr_comba_3.o \
src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_64.o src/sqr/fp_sqr_comba_6.o \
src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o src/sqr/fp_sqr_comba.o \
src/sqr/fp_sqr_comba_generic.o src/sqr/fp_sqr_comba_small_set.o src/sqr/fp_sqrmod.o
HEADERS=src/headers/tfm.h
HEADERS_PUB:=src/headers/tfm.h
HEADERS=src/headers/tfm_private.h $(HEADERS_PUB)
#END_INS
@ -77,32 +91,44 @@ endif
default: $(LIBNAME)
$(OBJECTS): $(HEADERS)
$(LIBNAME): $(OBJECTS)
$(AR) $(ARFLAGS) $@ $(OBJECTS)
ranlib $@
$(RANLIB) $@
install: $(LIBNAME)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH)
install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH)
install -g $(GROUP) -o $(USER) $(HEADERS_PUB) $(DESTDIR)$(INCPATH)
mtest/mtest: mtest/mtest.o
cd mtest ; CFLAGS="$(CFLAGS) -I../" MAKE=${MAKE} ${MAKE} mtest
.PHONY: mtest
mtest: $(LIBNAME)
cd mtest; CC="$(CC)" CFLAGS="$(CFLAGS) -I../" MAKE=${MAKE} ${MAKE} mtest
test: $(LIBNAME) demo/test.o mtest/mtest
demo/test.o: CFLAGS+=-Wno-unused-result
.PHONY: test
test: $(LIBNAME) demo/test.o
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
timing: $(LIBNAME) demo/test.o
test_standalone: CFLAGS+=-DTFM_DEMO_TEST_VS_MTEST=0
.PHONY: test_standalone
test_standalone: $(LIBNAME) demo/test.o
$(CC) $(CFLAGS) demo/test.o $(LIBNAME) $(PROF) -o test
timing: $(LIBNAME) demo/timing.o
$(CC) $(CFLAGS) demo/timing.o $(LIBNAME) $(PROF) -o timing
profiled:
CFLAGS="${CFLAGS} -fprofile-generate" MAKE=${MAKE} ${MAKE} timing
CC="$(CC)" PREFIX="${PREFIX} CFLAGS="${CFLAGS} -fprofile-generate" MAKE=${MAKE} ${MAKE} timing
./test
rm -f `find . -type f | grep "[.]o" | xargs`
rm -f `find . -type f | grep "[.]a" | xargs`
rm -f `find . -type f -name "*.o" | xargs`
rm -f `find . -type f -name "*.a" | xargs`
rm -f test
CFLAGS="${CFLAGS} -fprofile-use" MAKE=${MAKE} ${MAKE} timing
CC=$(CC) PREFIX="${PREFIX} CFLAGS="${CFLAGS} -fprofile-use" MAKE=${MAKE} ${MAKE} timing
stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME) -o stest
@ -111,6 +137,15 @@ rsatest: $(LIBNAME) demo/rsa.o
$(CC) $(CFLAGS) demo/rsa.o $(LIBNAME) -o rsatest
docdvi: tfm.tex
cp tfm.tex tfm.bak
touch --reference=tfm.tex tfm.bak
(printf "%s" "\def\fixedpdfdate{"; date +'D:%Y%m%d%H%M%S%:z' -d @$$(stat --format=%Y tfm.tex) | sed "s/:\([0-9][0-9]\)$$/'\1'}/g") > tfm-deterministic.tex
printf "%s\n" "\pdfinfo{" >> tfm-deterministic.tex
printf "%s\n" " /CreationDate (\fixedpdfdate)" >> tfm-deterministic.tex
printf "%s\n}\n" " /ModDate (\fixedpdfdate)" >> tfm-deterministic.tex
cat tfm.tex >> tfm-deterministic.tex
mv tfm-deterministic.tex tfm.tex
touch --reference=tfm.bak tfm.tex
touch tfm.ind
latex tfm >/dev/null
latex tfm >/dev/null
@ -119,41 +154,48 @@ docdvi: tfm.tex
docs: docdvi
latex tfm >/dev/null
dvipdf tfm
pdflatex tfm >/dev/null
sed -b -i 's,^/ID \[.*\]$$,/ID [<0> <0>],g' tfm.pdf
mv tfm.bak tfm.tex
mv -f tfm.pdf doc
#This rule cleans the source tree of all compiled code, not including the pdf
#documentation.
clean:
rm -f `find . -type f | grep "[.]o" | xargs`
rm -f `find . -type f | grep "[.]lo" | xargs`
rm -f `find . -type f | grep "[.]a" | xargs`
rm -f `find . -type f | grep "[.]la" | xargs`
rm -f `find . -type f | grep "[.]obj" | xargs`
rm -f `find . -type f | grep "[.]lib" | xargs`
rm -f `find . -type f | grep "[.]exe" | xargs`
rm -f `find . -type f | grep "[.]gcda" | xargs`
rm -f `find . -type f | grep "[.]gcno" | xargs`
rm -f `find . -type f | grep "[.]il" | xargs`
rm -f `find . -type f | grep "[.]dyn" | xargs`
rm -f `find . -type f | grep "[.]dpi" | xargs`
rm -rf `find . -type d | grep "[.]libs" | xargs`
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.toc test mtest/mtest
cd mtest ; MAKE=${MAKE} ${MAKE} clean
rm -f `find . -type f -name "*.o" | xargs`
rm -f `find . -type f -name "*.lo" | xargs`
rm -f `find . -type f -name "*.a" | xargs`
rm -f `find . -type f -name "*.la" | xargs`
rm -f `find . -type f -name "*.obj" | xargs`
rm -f `find . -type f -name "*.lib" | xargs`
rm -f `find . -type f -name "*.exe" | xargs`
rm -f `find . -type f -name "*.gcov" | xargs`
rm -f `find . -type f -name "*.gcda" | xargs`
rm -f `find . -type f -name "*.gcno" | xargs`
rm -f `find . -type f -name "*.il" | xargs`
rm -f `find . -type f -name "*.dyn" | xargs`
rm -f `find . -type f -name "*.dpi" | xargs`
rm -rf `find . -type d -name "*.libs" | xargs`
rm -f tfm.aux tfm.dvi tfm.idx tfm.ilg tfm.ind tfm.lof tfm.log tfm.out tfm.toc test test.exe
cd mtest; MAKE=${MAKE} ${MAKE} clean
no_oops: clean
cd .. ; cvs commit
echo Scanning for scratch/dirty files
find . -type f | grep -v CVS | xargs -n 1 bash mess.sh
.PHONY: pre_gen
pre_gen:
perl gen.pl
sed -e 's/[[:blank:]]*$$//' mpi.c > pre_gen/mpi.c
rm mpi.c
zipup: no_oops docs clean
perl gen.pl ; mv mpi.c pre_gen/ ; \
cd .. ; rm -rf tfm* tomsfastmath-$(VERSION) ; mkdir tomsfastmath-$(VERSION) ; \
cp -R ./tomsfastmath/* ./tomsfastmath-$(VERSION)/ ; \
tar -c tomsfastmath-$(VERSION)/* | bzip2 -9vvc > tfm-$(VERSION).tar.bz2 ; \
zip -9r tfm-$(VERSION).zip tomsfastmath-$(VERSION)/* ; \
mv -f tfm* ~ ; rm -rf tomsfastmath-$(VERSION)
zipup:
rm -rf ../tomsfastmath-$(VERSION) && rm -f ../tfm-$(VERSION).zip ../tfm-$(VERSION).tar.bz2 && \
expsrc.sh -i . -o ../tomsfastmath-$(VERSION) --svntags --no-fetch -p '*.c' -p '*.h' && \
MAKE=${MAKE} ${MAKE} -C ../tomsfastmath-$(VERSION) docs && \
tar -c ../tomsfastmath-$(VERSION)/* | bzip2 -9vvc > ../tfm-$(VERSION).tar.bz2 && \
zip -9 -r ../tfm-$(VERSION).zip ../tomsfastmath-$(VERSION)/* && \
gpg -b -a ../tfm-$(VERSION).tar.bz2 && gpg -b -a ../tfm-$(VERSION).zip
# $Source: /cvs/libtom/tomsfastmath/makefile,v $
# $Revision: 1.38 $
# $Date: 2007/03/13 01:23:03 $
new_file:
bash updatemakes.sh
# $Source$
# $Revision$
# $Date$

View File

@ -1,9 +1,10 @@
#makefile for TomsFastMath
#
#
VERSION=0:12
VERSION=1:0:0
CC=libtool --mode=compile --tag=CC gcc
LT ?= libtool
LTCOMPILE = $(LT) --mode=compile --tag=CC $(CC)
CFLAGS += -Wall -W -Wshadow -Isrc/headers
@ -25,24 +26,25 @@ OBJECTS=src/addsub/fp_add.o src/addsub/fp_add_d.o src/addsub/fp_addmod.o src/add
src/addsub/fp_cmp_d.o src/addsub/fp_cmp_mag.o src/addsub/fp_sub.o src/addsub/fp_sub_d.o \
src/addsub/fp_submod.o src/addsub/s_fp_add.o src/addsub/s_fp_sub.o src/bin/fp_radix_size.o \
src/bin/fp_read_radix.o src/bin/fp_read_signed_bin.o src/bin/fp_read_unsigned_bin.o \
src/bin/fp_reverse.o src/bin/fp_s_rmap.o src/bin/fp_signed_bin_size.o src/bin/fp_to_signed_bin.o \
src/bin/fp_to_unsigned_bin.o src/bin/fp_toradix.o src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o \
src/bit/fp_count_bits.o src/bit/fp_div_2.o src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o \
src/bit/fp_rshd.o src/divide/fp_div.o src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o \
src/exptmod/fp_2expt.o src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_set.o \
src/bin/fp_reverse.o src/bin/fp_signed_bin_size.o src/bin/fp_s_rmap.o src/bin/fp_toradix.o \
src/bin/fp_toradix_n.o src/bin/fp_to_signed_bin.o src/bin/fp_to_unsigned_bin.o \
src/bin/fp_unsigned_bin_size.o src/bit/fp_cnt_lsb.o src/bit/fp_count_bits.o src/bit/fp_div_2.o \
src/bit/fp_div_2d.o src/bit/fp_lshd.o src/bit/fp_mod_2d.o src/bit/fp_rshd.o src/divide/fp_div.o \
src/divide/fp_div_d.o src/divide/fp_mod.o src/divide/fp_mod_d.o src/exptmod/fp_2expt.o \
src/exptmod/fp_exptmod.o src/misc/fp_ident.o src/misc/fp_rand.o src/misc/fp_set.o \
src/mont/fp_montgomery_calc_normalization.o src/mont/fp_montgomery_reduce.o \
src/mont/fp_montgomery_setup.o src/mul/fp_mul.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o \
src/mul/fp_mul_comba.o src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o \
src/mul/fp_mul_comba_24.o src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_32.o \
src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_48.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_64.o \
src/mul/fp_mul_comba_7.o src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o \
src/mont/fp_montgomery_setup.o src/mul/fp_mul_2.o src/mul/fp_mul_2d.o src/mul/fp_mul.o \
src/mul/fp_mul_comba_12.o src/mul/fp_mul_comba_17.o src/mul/fp_mul_comba_20.o src/mul/fp_mul_comba_24.o \
src/mul/fp_mul_comba_28.o src/mul/fp_mul_comba_32.o src/mul/fp_mul_comba_3.o src/mul/fp_mul_comba_48.o \
src/mul/fp_mul_comba_4.o src/mul/fp_mul_comba_64.o src/mul/fp_mul_comba_6.o src/mul/fp_mul_comba_7.o \
src/mul/fp_mul_comba_8.o src/mul/fp_mul_comba_9.o src/mul/fp_mul_comba.o \
src/mul/fp_mul_comba_small_set.o src/mul/fp_mul_d.o src/mul/fp_mulmod.o src/numtheory/fp_gcd.o \
src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_lcm.o \
src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o src/sqr/fp_sqr.o \
src/sqr/fp_sqr_comba.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_3.o src/sqr/fp_sqr_comba_32.o \
src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_6.o src/sqr/fp_sqr_comba_64.o \
src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o \
src/numtheory/fp_invmod.o src/numtheory/fp_isprime.o src/numtheory/fp_isprime_ex.o \
src/numtheory/fp_lcm.o src/numtheory/fp_prime_miller_rabin.o src/numtheory/fp_prime_random_ex.o \
src/sqr/fp_sqr.o src/sqr/fp_sqr_comba_12.o src/sqr/fp_sqr_comba_17.o src/sqr/fp_sqr_comba_20.o \
src/sqr/fp_sqr_comba_24.o src/sqr/fp_sqr_comba_28.o src/sqr/fp_sqr_comba_32.o src/sqr/fp_sqr_comba_3.o \
src/sqr/fp_sqr_comba_48.o src/sqr/fp_sqr_comba_4.o src/sqr/fp_sqr_comba_64.o src/sqr/fp_sqr_comba_6.o \
src/sqr/fp_sqr_comba_7.o src/sqr/fp_sqr_comba_8.o src/sqr/fp_sqr_comba_9.o src/sqr/fp_sqr_comba.o \
src/sqr/fp_sqr_comba_generic.o src/sqr/fp_sqr_comba_small_set.o src/sqr/fp_sqrmod.o
HEADERS=src/headers/tfm.h
@ -80,10 +82,13 @@ endif
default: $(LIBNAME)
objs: $(OBJECTS)
$(OBJECTS): $(HEADERS)
.c.o:
$(LTCOMPILE) $(CFLAGS) $(LDFLAGS) -o $@ -c $<
$(LIBNAME): $(OBJECTS)
libtool --silent --mode=link gcc $(CFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION)
libtool --silent --mode=link --tag=CC $(CC) $(CFLAGS) $(LDFLAGS) `find . -type f | grep "[.]lo" | xargs` -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) -export-symbols libtfm.symbols
install: $(LIBNAME)
install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH)
@ -94,16 +99,26 @@ install: $(LIBNAME)
mtest/mtest: mtest/mtest.c
cd mtest ; make mtest
test: $(LIBNAME) demo/test.o mtest/mtest
$(CC) $(CFLAGS) demo/test.o $(LIBNAME_S) $(PROF) -o test
demo/test.o: CFLAGS+=-Wno-unused-result
timing: $(LIBNAME) demo/test.o
$(CC) $(CFLAGS) demo/test.o $(LIBNAME_S) $(PROF) -o test
.PHONY: test
test: $(LIBNAME) demo/test.o
$(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o test demo/test.o $(LIBNAME)
test_standalone: CFLAGS+=-DTFM_DEMO_TEST_VS_MTEST=0
.PHONY: test_standalone
test_standalone: $(LIBNAME) demo/test.o
$(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o test demo/test.o $(LIBNAME)
stest: $(LIBNAME) demo/stest.o
$(CC) $(CFLAGS) demo/stest.o $(LIBNAME_S) -o stest
$(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o stest demo/stest.o $(LIBNAME)
# $Source: /cvs/libtom/tomsfastmath/makefile.shared,v $
# $Revision: 1.19 $
# $Date: 2007/03/13 01:23:03 $
.PHONY: timing
timing: $(LIBNAME) demo/timing.o
$(LT) --mode=link --tag=CC $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o timing demo/timing.o $(LIBNAME)
# $Source$
# $Revision$
# $Date$

View File

@ -1,9 +1,10 @@
CFLAGS += -Wall -W -O3
CFLAGS += -Wall -W -O3 -Wno-unused-result
default: mtest
.PHONY: mtest
mtest: mtest.o
$(CC) $(CFLAGS) mtest.o -ltommath -o mtest
clean:
rm -f *.o mtest *~
rm -f *.o mtest *~ mtest.exe

View File

@ -39,6 +39,7 @@ mulmod
#include <time.h>
#include <tommath.h>
#define CRYPT
#undef DIGIT_BIT
#include "../src/headers/tfm.h"
FILE *rng;
@ -46,8 +47,8 @@ FILE *rng;
/* 1-2048 bit numbers */
void rand_num(mp_int *a)
{
int n, size;
unsigned char buf[2048];
int size;
unsigned char buf[(FP_MAX_SIZE/16 - DIGIT_BIT/2) + 1];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2);
buf[0] = (fgetc(rng)&1)?1:0;
@ -59,8 +60,8 @@ void rand_num(mp_int *a)
/* 1-256 bit numbers (to test things like exptmod) */
void rand_num2(mp_int *a)
{
int n, size;
unsigned char buf[2048];
int size;
unsigned char buf[(FP_MAX_SIZE/16 - DIGIT_BIT/2) + 1];
size = 1 + ((fgetc(rng)<<8) + fgetc(rng)) % (FP_MAX_SIZE/16 - DIGIT_BIT/2);
buf[0] = (fgetc(rng)&1)?1:0;
@ -69,13 +70,15 @@ void rand_num2(mp_int *a)
mp_read_raw(a, buf, 1+size);
}
#define mp_to64(a, b) mp_toradix(a, b, 64)
#define mp_to64(a, b) mp_toradix_n(a, b, 64, sizeof(b))
int main(void)
{
int n, tmp;
mp_int a, b, c, d, e;
#ifdef MTEST_NO_FULLSPEED
clock_t t1;
#endif
char buf[4096];
mp_init(&a);
@ -88,7 +91,7 @@ int main(void)
/* initial (2^n - 1)^2 testing, makes sure the comba multiplier works [it has the new carry code] */
/*
mp_set(&a, 1);
for (n = 1; n < 8192; n++) {
for (n = 1; n < ((FP_MAX_SIZE-(8*DIGIT_BIT))/2); n++) {
mp_mul(&a, &a, &c);
printf("mul\n");
mp_to64(&a, buf);
@ -111,9 +114,11 @@ int main(void)
}
}
#ifdef MTEST_NO_FULLSPEED
t1 = clock();
#endif
for (;;) {
#if 0
#ifdef MTEST_NO_FULLSPEED
if (clock() - t1 > CLOCKS_PER_SEC) {
sleep(2);
t1 = clock();

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_add(fp_int *a, fp_int *b, fp_int *c)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a + b */
void fp_add_d(fp_int *a, fp_digit b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* d = a + b (mod c) */
int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_cmp(fp_int *a, fp_int *b)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* compare against a single digit */
int fp_cmp_d(fp_int *a, fp_digit b)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_cmp_mag(fp_int *a, fp_int *b)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a - b */
void fp_sub(fp_int *a, fp_int *b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a - b */
void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* d = a - b (mod c) */
int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* unsigned addition */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
@ -16,7 +16,7 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
register fp_word t;
y = MAX(a->used, b->used);
oldused = c->used;
oldused = MIN(c->used, FP_SIZE);
c->used = y;
t = 0;

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
@ -27,7 +27,7 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
for (; x < a->used; x++) {
t = ((fp_word)a->dp[x]) - t;
c->dp[x] = (fp_digit)t;
t = (t >> DIGIT_BIT);
t = (t >> DIGIT_BIT)&1;
}
for (; x < oldused; x++) {
c->dp[x] = 0;

View File

@ -7,11 +7,10 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_radix_size(fp_int *a, int radix, int *size)
{
int digs;
fp_int t;
fp_digit d;
@ -36,7 +35,6 @@ int fp_radix_size(fp_int *a, int radix, int *size)
t.sign = FP_ZPOS;
}
digs = 0;
while (fp_iszero (&t) == FP_NO) {
fp_div_d (&t, (fp_digit) radix, &t, &d);
(*size)++;

View File

@ -7,13 +7,16 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_read_radix(fp_int *a, char *str, int radix)
{
int y, neg;
char ch;
/* set the integer to the default of zero */
fp_zero (a);
/* make sure the radix is ok */
if (radix < 2 || radix > 64) {
return FP_VAL;
@ -29,16 +32,13 @@ int fp_read_radix(fp_int *a, char *str, int radix)
neg = FP_ZPOS;
}
/* set the integer to the default of zero */
fp_zero (a);
/* process each digit of the string */
while (*str) {
/* if the radix < 36 the conversion is case insensitive
* this allows numbers like 1AB and 1ab to represent the same value
* [e.g. in hex]
*/
ch = (char) ((radix < 36) ? toupper (*str) : *str);
ch = (char) ((radix <= 36) ? toupper ((int)*str) : *str);
for (y = 0; y < 64; y++) {
if (ch == fp_s_rmap[y]) {
break;

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_read_signed_bin(fp_int *a, unsigned char *b, int c)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* reverse an array, used for radix code */
void fp_reverse (unsigned char *s, int len)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* chars used in radix conversions */
const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_signed_bin_size(fp_int *a)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_to_signed_bin(fp_int *a, unsigned char *b)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
{

View File

@ -7,51 +7,23 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/**
* a: pointer to fp_int representing the input number
* str: output buffer
* radix: number of character to use for encoding of the number
*
* The radix value can be in the range 2 to 64. This function converts number
* a into a string str. Please don't use this function because a too small
* chosen str buffer would lead to an overflow which can not be detected.
* Please use fp_toradix_n() instead.
*
* Return: FP_VAL on error, FP_OKAY on success.
*/
int fp_toradix(fp_int *a, char *str, int radix)
{
int digs;
fp_int t;
fp_digit d;
char *_s = str;
/* check range of the radix */
if (radix < 2 || radix > 64) {
return FP_VAL;
}
/* quick out if its zero */
if (fp_iszero(a) == 1) {
*str++ = '0';
*str = '\0';
return FP_OKAY;
}
fp_init_copy(&t, a);
/* if it is negative output a - */
if (t.sign == FP_NEG) {
++_s;
*str++ = '-';
t.sign = FP_ZPOS;
}
digs = 0;
while (fp_iszero (&t) == FP_NO) {
fp_div_d (&t, (fp_digit) radix, &t, &d);
*str++ = fp_s_rmap[d];
++digs;
}
/* reverse the digits of the string. In this case _s points
* to the first digit [exluding the sign] of the number]
*/
fp_reverse ((unsigned char *)_s, digs);
/* append a NULL so the string is properly terminated */
*str = '\0';
return FP_OKAY;
return fp_toradix_n(a, str, radix, INT_MAX);
}
/* $Source$ */

71
src/bin/fp_toradix_n.c Normal file
View File

@ -0,0 +1,71 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
int fp_toradix_n(fp_int *a, char *str, int radix, int maxlen)
{
int digs;
fp_int t;
fp_digit d;
char *_s = str;
/* check range of the radix */
if (maxlen < 2 || radix < 2 || radix > 64)
return FP_VAL;
/* quick check for zero */
if (fp_iszero(a) == FP_YES) {
*str++ = '0';
*str = '\0';
return FP_OKAY;
}
fp_init_copy(&t, a);
/* if it is negative output a - */
if (t.sign == FP_NEG) {
/* we have to reverse our digits later... but not the - sign!! */
++_s;
/* store the flag and mark the number as positive */
*str++ = '-';
t.sign = FP_ZPOS;
/* subtract a char */
--maxlen;
}
digs = 0;
while (fp_iszero (&t) == FP_NO) {
if (--maxlen < 1) {
/* no more room */
break;
}
fp_div_d(&t, (fp_digit) radix, &t, &d);
*str++ = fp_s_rmap[d];
++digs;
}
/* reverse the digits of the string. In this case _s points
* to the first digit [exluding the sign] of the number]
*/
fp_reverse((unsigned char *) _s, digs);
/* append a NULL so the string is properly terminated */
*str = '\0';
if (maxlen < 1)
return FP_VAL;
return FP_OKAY;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_unsigned_bin_size(fp_int *a)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
static const int lnz[16] = {
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
int fp_count_bits (fp_int * a)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* b = a/2 */
void fp_div_2(fp_int * a, fp_int * b)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a / 2**b */
void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_lshd(fp_int *a, int x)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a mod 2**d */
void fp_mod_2d(fp_int *a, int b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_rshd(fp_int *a, int x)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* a/b => cb + d == a */
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
static int s_is_power_of_two(fp_digit b, int *p)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a mod b, 0 <= c < b */
int fp_mod(fp_int *a, fp_int *b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a mod b, 0 <= c < b */
int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* computes a = 2**b */
void fp_2expt(fp_int *a, int b)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
#ifdef TFM_TIMING_RESISTANT

8
src/generators/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
comba_mult_gen
comba_mult_smallgen
comba_sqr_gen
comba_sqr_smallgen
comba_mult_gen.exe
comba_mult_smallgen.exe
comba_sqr_gen.exe
comba_sqr_smallgen.exe

View File

@ -18,6 +18,10 @@ int main(int argc, char **argv)
/* print out preamble */
printf(
"#define TFM_DEFINES\n"
"#include \"fp_mul_comba.c\"\n"
"\n"
"#if defined(TFM_MUL%d) && FP_SIZE >= %d\n"
"void fp_mul_comba%d(fp_int *A, fp_int *B, fp_int *C)\n"
"{\n"
" fp_digit c0, c1, c2, at[%d];\n"
@ -26,7 +30,7 @@ printf(
" memcpy(at+%d, B->dp, %d * sizeof(fp_digit));\n"
" COMBA_START;\n"
"\n"
" COMBA_CLEAR;\n", N, N+N, N, N, N);
" COMBA_CLEAR;\n", N, N+N, N, N+N, N, N, N);
/* now do the rows */
for (x = 0; x < (N+N-1); x++) {
@ -53,7 +57,11 @@ printf(
" C->sign = A->sign ^ B->sign;\n"
" fp_clamp(C);\n"
" COMBA_FINI;\n"
"}\n\n\n", N+N-1, N+N);
"}\n#endif\n\n\n"
"/* $Source$ */\n"
"/* $Revision$ */\n"
"/* $Date$ */\n"
, N+N-1, N+N);
return 0;
}

View File

@ -7,6 +7,10 @@ int main(int argc, char **argv)
/* print out preamble */
printf(
"#define TFM_DEFINES\n"
"#include \"fp_mul_comba.c\"\n"
"\n"
"#if defined(TFM_SMALL_SET)\n"
"void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)\n"
"{\n"
" fp_digit c0, c1, c2, at[32];\n"
@ -51,7 +55,10 @@ printf(
" COMBA_FINI;\n"
" break;\n", N+N-1, N+N);
}
printf(" }\n}\n\n");
printf(" }\n}\n\n#endif\n\n\n"
"/* $Source$ */\n"
"/* $Revision$ */\n"
"/* $Date$ */\n");
return 0;
}

View File

@ -16,20 +16,26 @@ int main(int argc, char **argv)
N = atoi(argv[1]);
printf(
"#ifdef TFM_SQR%d\n"
"#define TFM_DEFINES\n"
"#include \"fp_sqr_comba.c\"\n"
"\n"
"#if defined(TFM_SQR%d) && FP_SIZE >= %d\n"
"void fp_sqr_comba%d(fp_int *A, fp_int *B)\n"
"{\n"
" fp_digit *a, b[%d], c0, c1, c2, sc0, sc1, sc2;\n"
"#ifdef TFM_ISO\n"
" fp_word tt;\n"
"#endif\n"
"\n"
" a = A->dp;\n"
" COMBA_START; \n"
" COMBA_START;\n"
"\n"
" /* clear carries */\n"
" CLEAR_CARRY;\n"
"\n"
" /* output 0 */\n"
" SQRADD(a[0],a[0]);\n"
" COMBA_STORE(b[0]);\n", N, N, N+N);
" COMBA_STORE(b[0]);\n", N, N+N, N, N+N);
for (x = 1; x < N+N-1; x++) {
printf(
@ -91,7 +97,11 @@ printf(
" B->sign = FP_ZPOS;\n"
" memcpy(B->dp, b, %d * sizeof(fp_digit));\n"
" fp_clamp(B);\n"
"}\n#endif\n\n\n", N+N, N+N);
"}\n#endif\n\n\n"
"/* $Source$ */\n"
"/* $Revision$ */\n"
"/* $Date$ */\n"
, N+N, N+N);
return 0;
}

View File

@ -16,9 +16,16 @@ int main(int argc, char **argv)
int x, y, z, N, f;
printf(
"#define TFM_DEFINES\n"
"#include \"fp_sqr_comba.c\"\n"
"\n"
"#if defined(TFM_SMALL_SET)\n"
"void fp_sqr_comba_small(fp_int *A, fp_int *B)\n"
"{\n"
" fp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;\n"
"#ifdef TFM_ISO\n"
" fp_word tt;\n"
"#endif\n"
);
printf(" switch (A->used) { \n");
@ -99,7 +106,11 @@ printf(
" break;\n\n", N+N, N+N);
}
printf("}\n\n}\n");
printf("}\n}\n\n#endif /* TFM_SMALL_SET */\n\n"
"/* $Source$ */\n"
"/* $Revision$ */\n"
"/* $Date$ */\n"
);
return 0;
}

31
src/generators/makefile Normal file
View File

@ -0,0 +1,31 @@
all: comba_sqr_gen comba_sqr_smallgen
clean:
rm -f comba_mult_gen
rm -f comba_mult_gen.exe
rm -f comba_mult_smallgen
rm -f comba_mult_smallgen.exe
rm -f comba_sqr_gen
rm -f comba_sqr_gen.exe
rm -f comba_sqr_smallgen
rm -f comba_sqr_smallgen.exe
comba_mult_gen: comba_mult_gen.c
gcc -o comba_mult_gen comba_mult_gen.c
comba_mult_smallgen: comba_mult_smallgen.c
gcc -o comba_mult_smallgen comba_mult_smallgen.c
comba_sqr_gen: comba_sqr_gen.c
gcc -o comba_sqr_gen comba_sqr_gen.c
comba_sqr_smallgen: comba_sqr_smallgen.c
gcc -o comba_sqr_smallgen comba_sqr_smallgen.c
regen: comba_mult_gen comba_mult_smallgen comba_sqr_gen comba_sqr_smallgen
for i in 3 4 6 7 8 9 12 17 20 24 28 32 48 64; do \
./comba_mult_gen $$i | sed -e 's/ *$$//' > ../mul/fp_mul_comba_$$i.c; \
done
./comba_mult_smallgen > ../mul/fp_mul_comba_small_set.c
for i in 3 4 6 7 8 9 12 17 20 24 28 32 48 64; do \
./comba_sqr_gen $$i | sed -e 's/ *$$//' > ../sqr/fp_sqr_comba_$$i.c; \
done
./comba_sqr_smallgen > ../sqr/fp_sqr_comba_small_set.c

View File

@ -16,6 +16,15 @@
#include <ctype.h>
#include <limits.h>
/* 0xMaMiPaXX
* Major
* Minor
* Patch
* XX - undefined
*/
#define TFM_VERSION 0x000D0000
#define TFM_VERSION_S "v0.13.0"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
@ -104,6 +113,10 @@
#error FP_MAX_SIZE must be a multiple of CHAR_BIT
#endif
#if __SIZEOF_LONG__ == 8
#define FP_64BIT
#endif
/* autodetect x86-64 and make sure we are using 64-bit digits with x86-64 asm */
#if defined(__x86_64__)
#if defined(TFM_X86) || defined(TFM_SSE2) || defined(TFM_ARM)
@ -245,11 +258,15 @@
#if defined(FP_64BIT)
/* for GCC only on supported platforms */
#ifndef CRYPT
typedef unsigned long ulong64;
#endif
typedef unsigned long long ulong64;
#endif /* CRYPT */
typedef ulong64 fp_digit;
#define SIZEOF_FP_DIGIT 8
typedef unsigned long fp_word __attribute__ ((mode(TI)));
#else
/* this is to make porting into LibTomCrypt easier :-) */
#ifndef CRYPT
#if defined(_MSC_VER) || defined(__BORLANDC__)
@ -258,14 +275,16 @@
#else
typedef unsigned long long ulong64;
typedef signed long long long64;
#endif
#endif
typedef unsigned long fp_digit;
#endif /* defined(_MSC_VER) ... */
#endif /* CRYPT */
typedef unsigned int fp_digit;
#define SIZEOF_FP_DIGIT 4
typedef ulong64 fp_word;
#endif
#endif /* FP_64BIT */
/* # of digits this is */
#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit))
#define DIGIT_BIT ((CHAR_BIT) * SIZEOF_FP_DIGIT)
#define FP_MASK (fp_digit)(-1)
#define FP_SIZE (FP_MAX_SIZE/DIGIT_BIT)
@ -311,6 +330,9 @@ const char *fp_ident(void);
/* set to a small digit */
void fp_set(fp_int *a, fp_digit b);
/* makes a pseudo-random int of a given size */
void fp_rand(fp_int *a, int digits);
/* copy from a to b */
#define fp_copy(a, b) (void)(((a) != (b)) && memcpy((b), (a), sizeof(fp_int)))
#define fp_init_copy(a, b) fp_copy(b, a)
@ -422,8 +444,11 @@ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d);
/* perform a Miller-Rabin test of a to the base b and store result in "result" */
void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result);
#define FP_PRIME_SIZE 256
/* 256 trial divisions + 8 Miller-Rabins, returns FP_YES if probable prime */
int fp_isprime(fp_int *a);
/* extended version of fp_isprime, do 't' Miller-Rabins instead of only 8 */
int fp_isprime_ex(fp_int *a, int t);
/* Primality generation flags */
#define TFM_PRIME_BBS 0x0001 /* BBS style prime */
@ -450,119 +475,13 @@ void fp_read_signed_bin(fp_int *a, unsigned char *b, int c);
void fp_to_signed_bin(fp_int *a, unsigned char *b);
int fp_read_radix(fp_int *a, char *str, int radix);
int fp_radix_size(fp_int *a, int radix, int *size);
int fp_toradix(fp_int *a, char *str, int radix);
int fp_toradix_n(fp_int * a, char *str, int radix, int maxlen);
/* VARIOUS LOW LEVEL STUFFS */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
void fp_reverse(unsigned char *s, int len);
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
#ifdef TFM_SMALL_SET
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL3
void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL4
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL6
void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL7
void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL8
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL9
void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL12
void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL17
void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL20
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL24
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL28
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif
void fp_sqr_comba(fp_int *A, fp_int *B);
#ifdef TFM_SMALL_SET
void fp_sqr_comba_small(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR3
void fp_sqr_comba3(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR4
void fp_sqr_comba4(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR6
void fp_sqr_comba6(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR7
void fp_sqr_comba7(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR8
void fp_sqr_comba8(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR9
void fp_sqr_comba9(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR12
void fp_sqr_comba12(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR17
void fp_sqr_comba17(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR20
void fp_sqr_comba20(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR24
void fp_sqr_comba24(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR28
void fp_sqr_comba28(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif
extern const char *fp_s_rmap;
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

125
src/headers/tfm_private.h Normal file
View File

@ -0,0 +1,125 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#ifndef TFM_PRIVATE_H_
#define TFM_PRIVATE_H_
#include <tfm.h>
/* VARIOUS LOW LEVEL STUFFS */
void s_fp_add(fp_int *a, fp_int *b, fp_int *c);
void s_fp_sub(fp_int *a, fp_int *b, fp_int *c);
void fp_reverse(unsigned char *s, int len);
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C);
#ifdef TFM_SMALL_SET
void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL3
void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL4
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL6
void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL7
void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL8
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL9
void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL12
void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL17
void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL20
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL24
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL28
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL32
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL48
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C);
#endif
#ifdef TFM_MUL64
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C);
#endif
void fp_sqr_comba(fp_int *A, fp_int *B);
#ifdef TFM_SMALL_SET
void fp_sqr_comba_small(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR3
void fp_sqr_comba3(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR4
void fp_sqr_comba4(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR6
void fp_sqr_comba6(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR7
void fp_sqr_comba7(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR8
void fp_sqr_comba8(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR9
void fp_sqr_comba9(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR12
void fp_sqr_comba12(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR17
void fp_sqr_comba17(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR20
void fp_sqr_comba20(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR24
void fp_sqr_comba24(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR28
void fp_sqr_comba28(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR32
void fp_sqr_comba32(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR48
void fp_sqr_comba48(fp_int *A, fp_int *B);
#endif
#ifdef TFM_SQR64
void fp_sqr_comba64(fp_int *A, fp_int *B);
#endif
extern const char *fp_s_rmap;
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include "tfm.h"
#include <tfm_private.h>
const char *fp_ident(void)
{
@ -15,11 +15,14 @@ const char *fp_ident(void)
memset(buf, 0, sizeof(buf));
snprintf(buf, sizeof(buf)-1,
"TomsFastMath (%s)\n"
"TomsFastMath " TFM_VERSION_S "\n"
#if defined(TFM_IDENT_BUILD_DATE)
"Built on " __DATE__ " at " __TIME__ "\n"
#endif
"\n"
"Sizeofs\n"
"\tfp_digit = %u\n"
"\tfp_word = %u\n"
"\tfp_digit = %lu\n"
"\tfp_word = %lu\n"
"\n"
"FP_MAX_SIZE = %u\n"
"\n"
@ -70,11 +73,11 @@ const char *fp_ident(void)
#ifdef TFM_HUGE
" TFM_HUGE "
#endif
"\n", __DATE__, sizeof(fp_digit), sizeof(fp_word), FP_MAX_SIZE);
"\n", (unsigned long)sizeof(fp_digit), (unsigned long)sizeof(fp_word), FP_MAX_SIZE);
if (sizeof(fp_digit) == sizeof(fp_word)) {
strncat(buf, "WARNING: sizeof(fp_digit) == sizeof(fp_word), this build is likely to not work properly.\n",
sizeof(buf)-1);
sizeof(buf) - strlen(buf) - 1);
}
return buf;
}

41
src/misc/fp_rand.c Normal file
View File

@ -0,0 +1,41 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* makes a pseudo-random int of a given size */
void fp_rand(fp_int *a, int digits)
{
fp_digit d;
fp_zero(a);
if (digits <= 0) {
return;
}
/* first place a random non-zero digit */
do {
d = ((fp_digit) abs (rand ())) & FP_MASK;
} while (d == 0);
fp_add_d (a, d, a);
while (--digits > 0) {
fp_lshd (a, 1);
fp_add_d (a, ((fp_digit) abs (rand ())), a);
}
return;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_set(fp_int *a, fp_digit b)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* computes a = B**n mod b without division or multiplication useful for
* normalizing numbers in a Montgomery system.

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/******************************************************************/
#if defined(TFM_X86) && !defined(TFM_SSE2)
@ -29,8 +29,8 @@ asm( \
"adcl $0,%%edx \n\t" \
"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
: "%eax", "%edx", "%cc")
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%eax", "%edx", "cc")
#define PROPCARRY \
asm( \
@ -39,7 +39,7 @@ asm( \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "%cc")
: "%eax", "cc")
/******************************************************************/
#elif defined(TFM_X86_64)
@ -62,7 +62,7 @@ asm( \
"movq %%rdx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
: "%rax", "%rdx", "%cc")
: "%rax", "%rdx", "cc")
#define INNERMUL8 \
asm( \
@ -155,7 +155,7 @@ asm( \
\
:"=r"(_c), "=r"(cy) \
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
: "%rax", "%rdx", "%r10", "%r11", "%cc")
: "%rax", "%rdx", "%r10", "%r11", "cc")
#define PROPCARRY \
@ -165,7 +165,7 @@ asm( \
"movzbq %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%rax", "%cc")
: "%rax", "cc")
/******************************************************************/
#elif defined(TFM_SSE2)
@ -280,7 +280,7 @@ asm( \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
: "%eax", "%cc")
: "%eax", "cc")
/******************************************************************/
#elif defined(TFM_ARM)
@ -300,7 +300,7 @@ asm( \
" MOVCC %0,#0 \n\t" \
" UMLAL r0,%0,%3,%4 \n\t" \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
#define PROPCARRY \
asm( \
@ -309,7 +309,7 @@ asm( \
" STR r0,%1 \n\t" \
" MOVCS %0,#1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
/******************************************************************/
#elif defined(TFM_PPC32)
@ -325,22 +325,18 @@ asm( \
asm( \
" mullw 16,%3,%4 \n\t" \
" mulhwu 17,%3,%4 \n\t" \
" addc 16,16,%0 \n\t" \
" addc 16,16,%2 \n\t" \
" addze 17,17 \n\t" \
" lwz 18,%1 \n\t" \
" addc 16,16,18 \n\t" \
" addc %1,16,%5 \n\t" \
" addze %0,17 \n\t" \
" stw 16,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
:"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm;
#define PROPCARRY \
asm( \
" lwz 16,%1 \n\t" \
" addc 16,16,%0 \n\t" \
" stw 16,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
" addc %1,%3,%2 \n\t" \
" xor %0,%2,%2 \n\t" \
" addze %0,%2 \n\t" \
:"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc");
/******************************************************************/
#elif defined(TFM_PPC64)
@ -362,7 +358,7 @@ asm( \
" addc r16,r16,r18 \n\t" \
" addze %0,r17 \n\t" \
" sdx r16,0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm;
:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm;
#define PROPCARRY \
asm( \
@ -371,7 +367,7 @@ asm( \
" sdx r16,0,%1 \n\t" \
" xor %0,%0,%0 \n\t" \
" addze %0,%0 \n\t" \
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc");
/******************************************************************/
#elif defined(TFM_AVR32)
@ -401,7 +397,7 @@ asm( \
" st.w %1,r2 \n\t" \
" eor %0,%0 \n\t" \
" acr %0 \n\t" \
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
:"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
/******************************************************************/
#elif defined(TFM_MIPS)
@ -509,7 +505,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
_c = c + x;
tmpm = m->dp;
y = 0;
#if (defined(TFM_SSE2) || defined(TFM_X86_64))
#if defined(INNERMUL8)
for (; y < (pa & ~7); y += 8) {
INNERMUL8;
_c += 8;

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* setups the montgomery reduction */
int fp_montgomery_setup(fp_int *a, fp_digit *rho)

View File

@ -7,122 +7,133 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a * b */
void fp_mul(fp_int *A, fp_int *B, fp_int *C)
{
int y, yy;
int y, old_used;
#if FP_SIZE >= 48
int yy;
#endif
old_used = C->used;
/* call generic if we're out of range */
if (A->used + B->used > FP_SIZE) {
fp_mul_comba(A, B, C);
return ;
goto clean;
}
y = MAX(A->used, B->used);
#if FP_SIZE >= 48
yy = MIN(A->used, B->used);
#endif
/* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
of the largest input. We also want to avoid doing excess mults if the
inputs are not close to the next power of two. That is, for example,
if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
*/
#ifdef TFM_MUL3
#if defined(TFM_MUL3) && FP_SIZE >= 6
if (y <= 3) {
fp_mul_comba3(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL4
#if defined(TFM_MUL4) && FP_SIZE >= 8
if (y == 4) {
fp_mul_comba4(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL6
#if defined(TFM_MUL6) && FP_SIZE >= 12
if (y <= 6) {
fp_mul_comba6(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL7
#if defined(TFM_MUL7) && FP_SIZE >= 14
if (y == 7) {
fp_mul_comba7(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL8
#if defined(TFM_MUL8) && FP_SIZE >= 16
if (y == 8) {
fp_mul_comba8(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL9
#if defined(TFM_MUL9) && FP_SIZE >= 18
if (y == 9) {
fp_mul_comba9(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL12
#if defined(TFM_MUL12) && FP_SIZE >= 24
if (y <= 12) {
fp_mul_comba12(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_MUL17
#if defined(TFM_MUL17) && FP_SIZE >= 34
if (y <= 17) {
fp_mul_comba17(A,B,C);
return;
goto clean;
}
#endif
#ifdef TFM_SMALL_SET
#if defined(TFM_SMALL_SET) && FP_SIZE >= 32
if (y <= 16) {
fp_mul_comba_small(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL20)
#if defined(TFM_MUL20) && FP_SIZE >= 40
if (y <= 20) {
fp_mul_comba20(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL24)
#if defined(TFM_MUL24) && FP_SIZE >= 48
if (yy >= 16 && y <= 24) {
fp_mul_comba24(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL28)
#if defined(TFM_MUL28) && FP_SIZE >= 56
if (yy >= 20 && y <= 28) {
fp_mul_comba28(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL32)
#if defined(TFM_MUL32) && FP_SIZE >= 64
if (yy >= 24 && y <= 32) {
fp_mul_comba32(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL48)
#if defined(TFM_MUL48) && FP_SIZE >= 96
if (yy >= 40 && y <= 48) {
fp_mul_comba48(A,B,C);
return;
goto clean;
}
#endif
#if defined(TFM_MUL64)
#if defined(TFM_MUL64) && FP_SIZE >= 128
if (yy >= 56 && y <= 64) {
fp_mul_comba64(A,B,C);
return;
goto clean;
}
#endif
fp_mul_comba(A,B,C);
clean:
for (y = C->used; y < old_used; y++) {
C->dp[y] = 0;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */
/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul.c,v $ */
/* $Revision: 1.1 $ */
/* $Date: 2006/12/31 21:25:53 $ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
void fp_mul_2(fp_int * a, fp_int * b)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a * 2**d */
void fp_mul_2d(fp_int *a, int b, fp_int *c)

View File

@ -12,7 +12,7 @@
*/
#include <tfm.h>
#include <tfm_private.h>
#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
#undef TFM_SSE2
@ -53,7 +53,7 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#elif defined(TFM_X86_64)
/* x86-64 optimized */
@ -88,7 +88,7 @@ asm ( \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#elif defined(TFM_SSE2)
/* use SSE2 optimizations */
@ -128,7 +128,7 @@ asm( \
"movd %%mm0,%%eax \n\t" \
"adcl %%eax,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
#elif defined(TFM_ARM)
/* ARM code */
@ -155,7 +155,7 @@ asm( \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
#elif defined(TFM_PPC32)
/* For 32-bit PPC */
@ -297,8 +297,11 @@ asm( \
#define MULADD(i, j) \
do { fp_word t; \
t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \
c0 = t; \
t = (fp_word)c1 + (t >> DIGIT_BIT); \
c1 = t; \
c2 += t >> DIGIT_BIT; \
} while (0);
#endif
@ -346,7 +349,9 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
/* execute loop */
COMBA_FORWARD;
for (iz = 0; iz < iy; ++iz) {
MULADD(*tmpx++, *tmpy--);
fp_digit _tmpx = *tmpx++;
fp_digit _tmpy = *tmpy--;
MULADD(_tmpx, _tmpy);
}
/* store term */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL12
#if defined(TFM_MUL12) && FP_SIZE >= 24
void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[24];
@ -109,3 +109,8 @@ void fp_mul_comba12(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL17
#if defined(TFM_MUL17) && FP_SIZE >= 34
void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[34];
@ -149,3 +149,8 @@ void fp_mul_comba17(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL20
#if defined(TFM_MUL20) && FP_SIZE >= 40
void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[40];
@ -173,3 +173,8 @@ void fp_mul_comba20(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL24
#if defined(TFM_MUL24) && FP_SIZE >= 48
void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[48];
@ -205,3 +205,8 @@ void fp_mul_comba24(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL28
#if defined(TFM_MUL28) && FP_SIZE >= 56
void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[56];
@ -237,3 +237,8 @@ void fp_mul_comba28(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL3
#if defined(TFM_MUL3) && FP_SIZE >= 6
void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[6];
@ -37,3 +37,8 @@ void fp_mul_comba3(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL32
#if defined(TFM_MUL32) && FP_SIZE >= 64
void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[64];
@ -283,3 +283,8 @@ void fp_mul_comba32(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL4
#if defined(TFM_MUL4) && FP_SIZE >= 8
void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[8];
@ -45,3 +45,8 @@ void fp_mul_comba4(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL48
#if defined(TFM_MUL48) && FP_SIZE >= 96
void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[96];
@ -397,3 +397,8 @@ void fp_mul_comba48(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL6
#if defined(TFM_MUL6) && FP_SIZE >= 12
void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[12];
@ -61,3 +61,8 @@ void fp_mul_comba6(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL64
#if defined(TFM_MUL64) && FP_SIZE >= 128
void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[128];
@ -525,3 +525,8 @@ void fp_mul_comba64(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL7
#if defined(TFM_MUL7) && FP_SIZE >= 14
void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[14];
@ -69,3 +69,8 @@ void fp_mul_comba7(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL8
#if defined(TFM_MUL8) && FP_SIZE >= 16
void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[16];
@ -77,3 +77,8 @@ void fp_mul_comba8(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_mul_comba.c"
#ifdef TFM_MUL9
#if defined(TFM_MUL9) && FP_SIZE >= 18
void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
{
fp_digit c0, c1, c2, at[18];
@ -85,3 +85,8 @@ void fp_mul_comba9(fp_int *A, fp_int *B, fp_int *C)
COMBA_FINI;
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -1226,3 +1226,8 @@ void fp_mul_comba_small(fp_int *A, fp_int *B, fp_int *C)
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = a * b */
void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* d = a * b (mod c) */
int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = (a, b) */
void fp_gcd(fp_int *a, fp_int *b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
{

View File

@ -7,71 +7,11 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
/* a few primes */
static const fp_digit primes[256] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
};
#include <tfm_private.h>
int fp_isprime(fp_int *a)
{
fp_int b;
fp_digit d;
int r, res;
/* do trial division */
for (r = 0; r < 256; r++) {
fp_mod_d(a, primes[r], &d);
if (d == 0) {
return FP_NO;
}
}
/* now do 8 miller rabins */
fp_init(&b);
for (r = 0; r < 8; r++) {
fp_set(&b, primes[r]);
fp_prime_miller_rabin(a, &b, &res);
if (res == FP_NO) {
return FP_NO;
}
}
return FP_YES;
return fp_isprime_ex(a, 8);
}
/* $Source$ */

View File

@ -0,0 +1,83 @@
/* TomsFastMath, a fast ISO C bignum library.
*
* This project is meant to fill in where LibTomMath
* falls short. That is speed ;-)
*
* This project is public domain and free for all purposes.
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm_private.h>
/* a few primes */
static const fp_digit primes[FP_PRIME_SIZE] = {
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
};
int fp_isprime_ex(fp_int *a, int t)
{
fp_int b;
fp_digit d;
int r, res;
if (t <= 0 || t > FP_PRIME_SIZE) {
return FP_NO;
}
/* do trial division */
for (r = 0; r < 256; r++) {
fp_mod_d(a, primes[r], &d);
if (d == 0) {
return FP_NO;
}
}
/* now do 't' miller rabins */
fp_init(&b);
for (r = 0; r < t; r++) {
fp_set(&b, primes[r]);
fp_prime_miller_rabin(a, &b, &res);
if (res == FP_NO) {
return FP_NO;
}
}
return FP_YES;
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* c = [a, b] */
void fp_lcm(fp_int *a, fp_int *b, fp_int *c)

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* Miller-Rabin test of "a" to the base of "b" as described in
* HAC pp. 139 Algorithm 4.24

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* This is possibly the mother of all prime generation functions, muahahahahaha! */
int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback cb, void *dat)
@ -16,7 +16,7 @@ int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback
int res, err, bsize, maskOR_msb_offset;
/* sanity check the input */
if (size <= 1 || t <= 0) {
if (size <= 1 || cb == NULL || t <= 0 || t > FP_PRIME_SIZE) {
return FP_VAL;
}
@ -35,7 +35,7 @@ int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback
}
/* calc the maskAND value for the MSbyte*/
maskAND = 0xFF >> (8 - (size & 7));
maskAND = 0xFF >> ((8 - (size & 7)) & 7);
/* calc the maskOR_msb */
maskOR_msb = 0;
@ -71,7 +71,7 @@ int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback
fp_read_unsigned_bin(a, tmp, bsize);
/* is it prime? */
res = fp_isprime(a);
res = fp_isprime_ex(a, t);
if (res == FP_NO) continue;
if (flags & TFM_PRIME_SAFE) {
@ -80,7 +80,7 @@ int fp_prime_random_ex(fp_int *a, int t, int size, int flags, tfm_prime_callback
fp_div_2(a, a);
/* is it prime? */
res = fp_isprime(a);
res = fp_isprime_ex(a, t);
}
} while (res == FP_NO);

View File

@ -7,114 +7,120 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
/* b = a*a */
void fp_sqr(fp_int *A, fp_int *B)
{
int y;
int y, old_used;
old_used = B->used;
/* call generic if we're out of range */
if (A->used + A->used > FP_SIZE) {
fp_sqr_comba(A, B);
return ;
goto clean;
}
y = A->used;
#if defined(TFM_SQR3)
#if defined(TFM_SQR3) && FP_SIZE >= 6
if (y <= 3) {
fp_sqr_comba3(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR4)
#if defined(TFM_SQR4) && FP_SIZE >= 8
if (y == 4) {
fp_sqr_comba4(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR6)
#if defined(TFM_SQR6) && FP_SIZE >= 12
if (y <= 6) {
fp_sqr_comba6(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR7)
#if defined(TFM_SQR7) && FP_SIZE >= 14
if (y == 7) {
fp_sqr_comba7(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR8)
#if defined(TFM_SQR8) && FP_SIZE >= 16
if (y == 8) {
fp_sqr_comba8(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR9)
#if defined(TFM_SQR9) && FP_SIZE >= 18
if (y == 9) {
fp_sqr_comba9(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR12)
#if defined(TFM_SQR12) && FP_SIZE >= 24
if (y <= 12) {
fp_sqr_comba12(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR17)
#if defined(TFM_SQR17) && FP_SIZE >= 34
if (y <= 17) {
fp_sqr_comba17(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SMALL_SET)
if (y <= 16) {
fp_sqr_comba_small(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR20)
#if defined(TFM_SQR20) && FP_SIZE >= 40
if (y <= 20) {
fp_sqr_comba20(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR24)
#if defined(TFM_SQR24) && FP_SIZE >= 48
if (y <= 24) {
fp_sqr_comba24(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR28)
#if defined(TFM_SQR28) && FP_SIZE >= 56
if (y <= 28) {
fp_sqr_comba28(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR32)
#if defined(TFM_SQR32) && FP_SIZE >= 64
if (y <= 32) {
fp_sqr_comba32(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR48)
#if defined(TFM_SQR48) && FP_SIZE >= 96
if (y <= 48) {
fp_sqr_comba48(A,B);
return;
goto clean;
}
#endif
#if defined(TFM_SQR64)
#if defined(TFM_SQR64) && FP_SIZE >= 128
if (y <= 64) {
fp_sqr_comba64(A,B);
return;
goto clean;
}
#endif
fp_sqr_comba(A, B);
clean:
for (y = B->used; y < old_used; y++) {
B->dp[y] = 0;
}
}
/* $Source$ */
/* $Revision$ */
/* $Date$ */
/* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr.c,v $ */
/* $Revision: 1.1 $ */
/* $Date: 2006/12/31 21:25:53 $ */

View File

@ -7,7 +7,7 @@
*
* Tom St Denis, tomstdenis@gmail.com
*/
#include <tfm.h>
#include <tfm_private.h>
#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
#undef TFM_SSE2
@ -41,7 +41,7 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
#define SQRADD2(i, j) \
asm( \
@ -53,16 +53,16 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDSC(i, j) \
asm( \
"movl %6,%%eax \n\t" \
"mull %7 \n\t" \
"movl %3,%%eax \n\t" \
"mull %4 \n\t" \
"movl %%eax,%0 \n\t" \
"movl %%edx,%1 \n\t" \
"xorl %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
#define SQRADDAC(i, j) \
asm( \
@ -71,7 +71,7 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
#define SQRADDDB \
asm( \
@ -81,7 +81,7 @@ asm( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_X86_64)
/* x86-64 optimized */
@ -109,7 +109,7 @@ asm( \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc");
#define SQRADD2(i, j) \
asm( \
@ -121,16 +121,16 @@ asm( \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDSC(i, j) \
asm( \
"movq %6,%%rax \n\t" \
"mulq %7 \n\t" \
"movq %3,%%rax \n\t" \
"mulq %4 \n\t" \
"movq %%rax,%0 \n\t" \
"movq %%rdx,%1 \n\t" \
"xorq %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDAC(i, j) \
asm( \
@ -139,7 +139,7 @@ asm( \
"addq %%rax,%0 \n\t" \
"adcq %%rdx,%1 \n\t" \
"adcq $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
#define SQRADDDB \
asm( \
@ -149,7 +149,7 @@ asm( \
"addq %6,%0 \n\t" \
"adcq %7,%1 \n\t" \
"adcq %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_SSE2)
@ -181,7 +181,7 @@ asm( \
"movd %%mm0,%%eax \n\t" \
"adcl %%eax,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
#define SQRADD2(i, j) \
asm( \
@ -197,7 +197,7 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDSC(i, j) \
asm( \
@ -221,7 +221,7 @@ asm( \
"addl %%eax,%0 \n\t" \
"adcl %%edx,%1 \n\t" \
"adcl $0,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
#define SQRADDDB \
asm( \
@ -231,7 +231,7 @@ asm( \
"addl %6,%0 \n\t" \
"adcl %7,%1 \n\t" \
"adcl %8,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
#elif defined(TFM_ARM)
@ -260,7 +260,7 @@ asm( \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
@ -272,13 +272,13 @@ asm( \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDSC(i, j) \
asm( \
" UMULL %0,%1,%6,%7 \n\t" \
" SUB %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
@ -286,7 +286,7 @@ asm( \
" ADDS %0,%0,r0 \n\t" \
" ADCS %1,%1,r1 \n\t" \
" ADC %2,%2,#0 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
#define SQRADDDB \
asm( \
@ -296,7 +296,7 @@ asm( \
" ADDS %0,%0,%3 \n\t" \
" ADCS %1,%1,%4 \n\t" \
" ADC %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_PPC32)
@ -326,7 +326,7 @@ asm( \
" mulhwu 16,%6,%6 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
@ -339,14 +339,14 @@ asm( \
" addc %0,%0,16 \n\t" \
" adde %1,%1,17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
#define SQRADDSC(i, j) \
asm( \
" mullw %0,%6,%7 \n\t" \
" mulhwu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
@ -355,7 +355,7 @@ asm( \
" mulhwu 16,%6,%7 \n\t" \
" adde %1,%1,16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
#define SQRADDDB \
asm( \
@ -365,7 +365,7 @@ asm( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_PPC64)
/* PPC64 */
@ -394,7 +394,7 @@ asm( \
" mulhdu r16,%6,%6 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
/* for squaring some of the terms are doubled... */
#define SQRADD2(i, j) \
@ -407,14 +407,14 @@ asm( \
" addc %0,%0,r16 \n\t" \
" adde %1,%1,r17 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
#define SQRADDSC(i, j) \
asm( \
" mulld %0,%6,%7 \n\t" \
" mulhdu %1,%6,%7 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \
@ -423,7 +423,7 @@ asm( \
" mulhdu r16,%6,%7 \n\t" \
" adde %1,%1,r16 \n\t" \
" addze %2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
#define SQRADDDB \
asm( \
@ -433,7 +433,7 @@ asm( \
" addc %0,%0,%3 \n\t" \
" adde %1,%1,%4 \n\t" \
" adde %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_AVR32)
@ -501,7 +501,7 @@ asm( \
" add %0,%0,%3 \n\t" \
" adc %1,%1,%4 \n\t" \
" adc %2,%2,%5 \n\t" \
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
:"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
#elif defined(TFM_MIPS)
@ -571,7 +571,7 @@ asm( \
" mflo %0 \n\t" \
" mfhi %1 \n\t" \
" xor %2,%2,%2 \n\t" \
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
:"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
#define SQRADDAC(i, j) \
asm( \

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR12
#if defined(TFM_SQR12) && FP_SIZE >= 24
void fp_sqr_comba12(fp_int *A, fp_int *B)
{
fp_digit *a, b[24], c0, c1, c2, sc0, sc1, sc2;

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR17
#if defined(TFM_SQR17) && FP_SIZE >= 34
void fp_sqr_comba17(fp_int *A, fp_int *B)
{
fp_digit *a, b[34], c0, c1, c2, sc0, sc1, sc2;

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR20
#if defined(TFM_SQR20) && FP_SIZE >= 40
void fp_sqr_comba20(fp_int *A, fp_int *B)
{
fp_digit *a, b[40], c0, c1, c2, sc0, sc1, sc2;

View File

@ -1,7 +1,7 @@
#define TFM_DEFINES
#include "fp_sqr_comba.c"
#ifdef TFM_SQR24
#if defined(TFM_SQR24) && FP_SIZE >= 48
void fp_sqr_comba24(fp_int *A, fp_int *B)
{
fp_digit *a, b[48], c0, c1, c2, sc0, sc1, sc2;

Some files were not shown because too many files have changed in this diff Show More