forked from ibphoenix/tomsfastmath
76 lines
1.6 KiB
C
76 lines
1.6 KiB
C
|
/* generic comba squarer */
|
||
|
void fp_sqr_comba(fp_int *A, fp_int *B)
|
||
|
{
|
||
|
int pa, ix, iz;
|
||
|
fp_digit c0, c1, c2;
|
||
|
fp_int tmp, *dst;
|
||
|
|
||
|
/* get size of output and trim */
|
||
|
pa = A->used + A->used;
|
||
|
if (pa >= FP_SIZE) {
|
||
|
pa = FP_SIZE-1;
|
||
|
}
|
||
|
|
||
|
/* number of output digits to produce */
|
||
|
COMBA_START;
|
||
|
CLEAR_CARRY;
|
||
|
|
||
|
if (A == B) {
|
||
|
fp_zero(&tmp);
|
||
|
dst = &tmp;
|
||
|
} else {
|
||
|
fp_zero(B);
|
||
|
dst = B;
|
||
|
}
|
||
|
|
||
|
for (ix = 0; ix < pa; ix++) {
|
||
|
int tx, ty, iy;
|
||
|
fp_digit *tmpy, *tmpx;
|
||
|
|
||
|
/* get offsets into the two bignums */
|
||
|
ty = MIN(A->used-1, ix);
|
||
|
tx = ix - ty;
|
||
|
|
||
|
/* setup temp aliases */
|
||
|
tmpx = A->dp + tx;
|
||
|
tmpy = A->dp + ty;
|
||
|
|
||
|
/* this is the number of times the loop will iterrate, essentially its
|
||
|
while (tx++ < a->used && ty-- >= 0) { ... }
|
||
|
*/
|
||
|
iy = MIN(A->used-tx, ty+1);
|
||
|
|
||
|
/* now for squaring tx can never equal ty
|
||
|
* we halve the distance since they approach at a rate of 2x
|
||
|
* and we have to round because odd cases need to be executed
|
||
|
*/
|
||
|
iy = MIN(iy, (ty-tx+1)>>1);
|
||
|
|
||
|
/* forward carries */
|
||
|
CARRY_FORWARD;
|
||
|
|
||
|
/* execute loop */
|
||
|
for (iz = 0; iz < iy; iz++) {
|
||
|
SQRADD2(*tmpx++, *tmpy--);
|
||
|
}
|
||
|
|
||
|
/* even columns have the square term in them */
|
||
|
if ((ix&1) == 0) {
|
||
|
SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
|
||
|
}
|
||
|
|
||
|
/* store it */
|
||
|
COMBA_STORE(dst->dp[ix]);
|
||
|
}
|
||
|
COMBA_STORE2(dst->dp[ix]);
|
||
|
|
||
|
COMBA_FINI;
|
||
|
|
||
|
/* setup dest */
|
||
|
dst->used = pa;
|
||
|
fp_clamp (dst);
|
||
|
if (dst != B) {
|
||
|
fp_copy(dst, B);
|
||
|
}
|
||
|
}
|