2004-08-25 04:43:43 +02:00
|
|
|
/* TomsFastMath, a fast ISO C bignum library.
|
|
|
|
*
|
|
|
|
* This project is meant to fill in where LibTomMath
|
|
|
|
* falls short. That is speed ;-)
|
|
|
|
*
|
|
|
|
* This project is public domain and free for all purposes.
|
|
|
|
*
|
2005-07-23 12:43:03 +02:00
|
|
|
* Tom St Denis, tomstdenis@gmail.com
|
2004-08-25 04:43:43 +02:00
|
|
|
*/
|
|
|
|
#include <tfm.h>
|
|
|
|
|
|
|
|
/* c = a * b */
|
|
|
|
void fp_mul(fp_int *A, fp_int *B, fp_int *C)
|
|
|
|
{
|
|
|
|
int r, y, yy, s;
|
|
|
|
fp_int ac, bd, comp, amb, cmd, t1, t2;
|
|
|
|
|
|
|
|
y = MAX(A->used, B->used);
|
|
|
|
yy = MIN(A->used, B->used);
|
|
|
|
if (yy <= 8 || y <= 64) {
|
|
|
|
|
|
|
|
/* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
|
|
|
|
of the largest input. We also want to avoid doing excess mults if the
|
|
|
|
inputs are not close to the next power of two. That is, for example,
|
|
|
|
if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
|
|
|
|
*/
|
2005-07-23 12:43:03 +02:00
|
|
|
|
|
|
|
#ifdef TFM_SMALL_SET
|
|
|
|
if (y <= 16) {
|
|
|
|
fp_mul_comba_small(A,B,C);
|
|
|
|
#elif defined(TFM_HUGE)
|
|
|
|
if (0) { 1;
|
2004-09-19 03:31:44 +02:00
|
|
|
#endif
|
|
|
|
#if defined(TFM_HUGE)
|
2005-07-23 12:43:03 +02:00
|
|
|
} else if (y <= 32) {
|
2004-08-25 04:43:43 +02:00
|
|
|
fp_mul_comba32(A,B,C);
|
2005-07-23 12:43:03 +02:00
|
|
|
} else if (y <= 48) {
|
|
|
|
fp_mul_comba48(A,B,C);
|
|
|
|
} else if (y <= 64) {
|
|
|
|
fp_mul_comba64(A,B,C);
|
2004-08-25 04:43:43 +02:00
|
|
|
#endif
|
2005-07-23 12:43:03 +02:00
|
|
|
#if !defined(TFM_HUGE) && !defined(TFM_SMALL_SET)
|
|
|
|
{
|
|
|
|
#else
|
2004-08-25 04:43:43 +02:00
|
|
|
} else {
|
2005-07-23 12:43:03 +02:00
|
|
|
#endif
|
2004-08-25 04:43:43 +02:00
|
|
|
fp_mul_comba(A,B,C);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* do the karatsuba action
|
|
|
|
|
|
|
|
if A = ab and B = cd for ||a|| = r we need to solve
|
|
|
|
|
2005-07-23 12:43:03 +02:00
|
|
|
ac*r^2 + ((a+b)(c+d) - (ac + bd))*r + bd
|
2004-08-25 04:43:43 +02:00
|
|
|
|
|
|
|
So we solve for the three products then we form the final result with careful shifting
|
|
|
|
and addition.
|
|
|
|
|
|
|
|
Obvious points of optimization
|
|
|
|
|
|
|
|
- "ac" parts can be memcpy'ed with an offset [all you have to do is zero upto the next 8 digits]
|
|
|
|
- Similarly the "bd" parts can be memcpy'ed and zeroed to 8
|
|
|
|
-
|
|
|
|
|
|
|
|
*/
|
|
|
|
/* get our value of r */
|
|
|
|
r = yy >> 1;
|
|
|
|
|
|
|
|
/* now solve for ac */
|
|
|
|
// fp_copy(A, &t1); fp_rshd(&t1, r);
|
|
|
|
for (s = 0; s < A->used - r; s++) {
|
|
|
|
t1.dp[s] = A->dp[s+r];
|
|
|
|
}
|
|
|
|
for (; s < FP_SIZE; s++) {
|
|
|
|
t1.dp[s] = 0;
|
|
|
|
}
|
|
|
|
if (A->used >= r) {
|
|
|
|
t1.used = A->used - r;
|
|
|
|
} else {
|
|
|
|
t1.used = 0;
|
|
|
|
}
|
2005-07-23 12:43:03 +02:00
|
|
|
t1.sign = 0;
|
2004-08-25 04:43:43 +02:00
|
|
|
|
|
|
|
// fp_copy(B, &t2); fp_rshd(&t2, r);
|
|
|
|
for (s = 0; s < B->used - r; s++) {
|
|
|
|
t2.dp[s] = B->dp[s+r];
|
|
|
|
}
|
|
|
|
for (; s < FP_SIZE; s++) {
|
|
|
|
t2.dp[s] = 0;
|
|
|
|
}
|
|
|
|
if (B->used >= r) {
|
|
|
|
t2.used = B->used - r;
|
|
|
|
} else {
|
|
|
|
t2.used = 0;
|
|
|
|
}
|
2005-07-23 12:43:03 +02:00
|
|
|
t2.sign = 0;
|
2004-08-25 04:43:43 +02:00
|
|
|
|
|
|
|
fp_copy(&t1, &amb); fp_copy(&t2, &cmd);
|
|
|
|
fp_zero(&ac);
|
|
|
|
fp_mul(&t1, &t2, &ac);
|
|
|
|
|
|
|
|
/* now solve for bd */
|
|
|
|
// fp_mod_2d(A, r * DIGIT_BIT, &t1);
|
|
|
|
// fp_mod_2d(B, r * DIGIT_BIT, &t2);
|
|
|
|
for (s = 0; s < r; s++) {
|
|
|
|
t1.dp[s] = A->dp[s];
|
|
|
|
t2.dp[s] = B->dp[s];
|
|
|
|
}
|
|
|
|
for (; s < FP_SIZE; s++) {
|
2005-07-23 12:43:03 +02:00
|
|
|
t1.dp[s] = 0;
|
2004-08-25 04:43:43 +02:00
|
|
|
t2.dp[s] = 0;
|
|
|
|
}
|
|
|
|
t1.used = r;
|
|
|
|
t2.used = r;
|
|
|
|
fp_clamp(&t1);
|
|
|
|
fp_clamp(&t2);
|
|
|
|
|
2005-07-23 12:43:03 +02:00
|
|
|
s_fp_add(&amb, &t1, &amb); s_fp_add(&cmd, &t2, &cmd);
|
2004-08-25 04:43:43 +02:00
|
|
|
fp_zero(&bd);
|
|
|
|
fp_mul(&t1, &t2, &bd);
|
|
|
|
|
2005-07-23 12:43:03 +02:00
|
|
|
/* now get the (a+b)(c+d) term */
|
2004-08-25 04:43:43 +02:00
|
|
|
fp_zero(&comp);
|
|
|
|
fp_mul(&amb, &cmd, &comp);
|
|
|
|
|
|
|
|
/* now solve the system, do the middle term first */
|
2005-07-23 12:43:03 +02:00
|
|
|
s_fp_sub(&comp, &ac, &comp);
|
|
|
|
s_fp_sub(&comp, &bd, &comp);
|
2004-08-25 04:43:43 +02:00
|
|
|
fp_lshd(&comp, r);
|
|
|
|
|
|
|
|
/* leading term */
|
|
|
|
fp_lshd(&ac, r+r);
|
|
|
|
|
|
|
|
/* now sum them together */
|
|
|
|
s = A->sign ^ B->sign;
|
|
|
|
fp_zero(C);
|
|
|
|
fp_add(&ac, &comp, C);
|
|
|
|
fp_add(&bd, C, C);
|
|
|
|
C->sign = C->used ? s : FP_ZPOS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-23 12:43:03 +02:00
|
|
|
|
|
|
|
/* $Source$ */
|
|
|
|
/* $Revision$ */
|
|
|
|
/* $Date$ */
|