tomsfastmath/fp_sqr_comba_generic.c
2010-07-22 10:06:23 +02:00

76 lines
1.6 KiB
C

/* generic comba squarer */
void fp_sqr_comba(fp_int *A, fp_int *B)
{
int pa, ix, iz;
fp_digit c0, c1, c2;
fp_int tmp, *dst;
/* get size of output and trim */
pa = A->used + A->used;
if (pa >= FP_SIZE) {
pa = FP_SIZE-1;
}
/* number of output digits to produce */
COMBA_START;
CLEAR_CARRY;
if (A == B) {
fp_zero(&tmp);
dst = &tmp;
} else {
fp_zero(B);
dst = B;
}
for (ix = 0; ix < pa; ix++) {
int tx, ty, iy;
fp_digit *tmpy, *tmpx;
/* get offsets into the two bignums */
ty = MIN(A->used-1, ix);
tx = ix - ty;
/* setup temp aliases */
tmpx = A->dp + tx;
tmpy = A->dp + ty;
/* this is the number of times the loop will iterrate, essentially its
while (tx++ < a->used && ty-- >= 0) { ... }
*/
iy = MIN(A->used-tx, ty+1);
/* now for squaring tx can never equal ty
* we halve the distance since they approach at a rate of 2x
* and we have to round because odd cases need to be executed
*/
iy = MIN(iy, (ty-tx+1)>>1);
/* forward carries */
CARRY_FORWARD;
/* execute loop */
for (iz = 0; iz < iy; iz++) {
SQRADD2(*tmpx++, *tmpy--);
}
/* even columns have the square term in them */
if ((ix&1) == 0) {
SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
}
/* store it */
COMBA_STORE(dst->dp[ix]);
}
COMBA_STORE2(dst->dp[ix]);
COMBA_FINI;
/* setup dest */
dst->used = pa;
fp_clamp (dst);
if (dst != B) {
fp_copy(dst, B);
}
}