Skip to content

Commit a6c4c5a

Browse files
Tom St Denissjaeckel
authored andcommitted
added tomsfastmath-0.05
1 parent f91cf2d commit a6c4c5a

21 files changed

+830
-310
lines changed

changes.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
August 1st, 2005
2+
0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC]
3+
-- Added makefile.shared to make shared objects [required for LTC]
4+
-- Improved makefiles to make them way more configurable
5+
-- Added timing resistant fp_exptmod() enabled with TFM_TIMING_RESISTANT
6+
7+
July 23rd, 2005
18
0.04 -- Fixed bugs in the SSE2 squaring code
29
-- Rewrote the multipliers to be optimized for small inputs
310
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction

comba_mont_gen.c

Lines changed: 95 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,112 @@
1-
/* generate montgomery reductions for m->used = 1...16 */
2-
31
#include <stdio.h>
42

53
int main(void)
64
{
7-
int N;
8-
9-
for (N = 1; N <= 16; N++) {
10-
11-
printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
5+
int x, y, z;
6+
127
printf(
8+
#if 0
9+
"#ifdef TFM_SMALL_SET\n"
10+
"/* computes x/R == x (mod N) via Montgomery Reduction */\n"
11+
"void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)\n"
1312
"{\n"
14-
" fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
15-
" int oldused, x, y;\n"
13+
" fp_digit c[FP_SIZE], *_c, *tmpm, mu, cy;\n"
14+
" int oldused, x, y, pa;\n"
1615
"\n"
16+
"#if defined(USE_MEMSET)\n"
1717
" /* now zero the buff */\n"
18-
" memset(c, 0, sizeof(c));\n"
18+
" memset(c, 0, sizeof c);\n"
19+
"#endif\n"
20+
" pa = m->used;\n"
1921
"\n"
2022
" /* copy the input */\n"
2123
" oldused = a->used;\n"
2224
" for (x = 0; x < oldused; x++) {\n"
2325
" c[x] = a->dp[x];\n"
2426
" }\n"
25-
"\n"
27+
"#if !defined(USE_MEMSET)\n"
28+
" for (; x < 2*pa+3; x++) {\n"
29+
" c[x] = 0;\n"
30+
" }\n"
31+
"#endif\n"
2632
" MONT_START;\n"
33+
#endif
2734
"\n"
28-
" /* now let's get bizz-sy! */\n"
29-
" for (x = 0; x < %d; x++) {\n"
30-
" /* get Mu for this round */\n"
31-
" LOOP_START;\n"
32-
"\n"
33-
" /* our friendly neighbourhood alias */\n"
34-
" _c = c + x;\n"
35-
" tmpm = m->dp;\n"
36-
"\n"
37-
" for (y = 0; y < %d; y++) {\n"
38-
" INNERMUL;\n"
39-
" ++_c;\n"
40-
" }\n"
41-
" /* send carry up man... */\n"
42-
" _c = c + x;\n"
43-
" PROPCARRY;\n"
44-
" } \n"
45-
"\n"
46-
" /* fix the rest of the carries */\n"
47-
" _c = c + %d;\n"
48-
" for (x = %d; x < %d * 2 + 2; x++) {\n"
49-
" PROPCARRY;\n"
50-
" ++_c;\n"
35+
" switch (pa) {\n");
36+
37+
for (x = 1; x <= 64; x++) {
38+
if (x > 16 && (x != 32 && x != 48 && x != 64)) continue;
39+
if (x > 16) printf("#ifdef TFM_HUGE\n");
40+
41+
42+
43+
printf(" case %d:\n", x);
44+
45+
for (y = 0; y < x; y++) {
46+
47+
printf(" x = %d; cy = 0;\n"
48+
" LOOP_START;\n"
49+
" _c = c + %d;\n"
50+
" tmpm = m->dp;\n", y, y);
51+
52+
printf("#ifdef INNERMUL8\n");
53+
for (z = 0; z+8 <= x; z += 8) {
54+
printf(" INNERMUL8; _c += 8; tmpm += 8;\n");
55+
}
56+
for (; z < x; z++) {
57+
printf(" INNERMUL; ++_c;\n");
58+
}
59+
printf("#else\n");
60+
for (z = 0; z < x; z++) {
61+
printf(" INNERMUL; ++_c;\n");
62+
}
63+
printf("#endif\n");
64+
printf(" LOOP_END;\n"
65+
" while (cy) {\n"
66+
" PROPCARRY;\n"
67+
" ++_c;\n"
68+
" }\n");
69+
}
70+
//printf(" }\n");
71+
printf(" break;\n");
72+
73+
74+
75+
#define LOOP_MACRO(stride) \
76+
for (x = 0; x < stride; x++) { \
77+
fp_digit cy = 0; \
78+
/* get Mu for this round */ \
79+
LOOP_START; \
80+
_c = c + x; \
81+
tmpm = m->dp; \
82+
for (y = 0; y < stride; y++) { \
83+
INNERMUL; \
84+
++_c; \
85+
} \
86+
LOOP_END; \
87+
while (cy) { \
88+
PROPCARRY; \
89+
++_c; \
90+
} \
91+
}
92+
93+
94+
95+
96+
97+
if (x > 16) printf("#endif /* TFM_HUGE */\n");
98+
99+
100+
}
101+
102+
#if 0
103+
104+
printf(
51105
" }\n"
52-
"\n"
53106
" /* now copy out */\n"
54-
" _c = c + %d;\n"
107+
" _c = c + pa;\n"
55108
" tmpm = a->dp;\n"
56-
" for (x = 0; x < %d+1; x++) {\n"
109+
" for (x = 0; x < pa+1; x++) {\n"
57110
" *tmpm++ = *_c++;\n"
58111
" }\n"
59112
"\n"
@@ -63,19 +116,17 @@ printf(
63116
"\n"
64117
" MONT_FINI;\n"
65118
"\n"
66-
" a->used = %d+1;\n"
119+
" a->used = pa+1;\n"
67120
" fp_clamp(a);\n"
68121
"\n"
69122
" /* if A >= m then A = A - m */\n"
70123
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
71124
" s_fp_sub (a, m, a);\n"
72125
" }\n"
73-
"}\n", N,N,N,N,N,N,N,N);
74-
}
75-
76-
return 0;
77-
}
78-
126+
"}\n\n#endif\n");
79127

128+
#endif
80129

81130

131+
return 0;
132+
}

demo/test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ t1 = TIMFUNC();
213213
sleep(1);
214214
printf("Ticks per second: %llu\n", TIMFUNC() - t1);
215215

216-
goto expttime;
216+
goto multtime;
217217
/* do some timings... */
218218
printf("Addition:\n");
219219
for (t = 2; t <= FP_SIZE/2; t += 2) {

doc/tfm.pdf

2.59 KB
Binary file not shown.

fp_exptmod.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,75 @@
99
*/
1010
#include <tfm.h>
1111

12+
#ifdef TFM_TIMING_RESISTANT
13+
14+
/* timing resistant montgomery ladder based exptmod
15+
16+
Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
17+
*/
18+
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
19+
{
20+
fp_int R[2];
21+
fp_digit buf, mp;
22+
int err, bitcnt, digidx, y;
23+
24+
/* now setup montgomery */
25+
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
26+
return err;
27+
}
28+
29+
fp_init(&R[0]);
30+
fp_init(&R[1]);
31+
32+
/* now we need R mod m */
33+
fp_montgomery_calc_normalization (&R[0], P);
34+
35+
/* now set R[0][1] to G * R mod m */
36+
if (fp_cmp_mag(P, G) != FP_GT) {
37+
/* G > P so we reduce it first */
38+
fp_mod(G, P, &R[1]);
39+
} else {
40+
fp_copy(G, &R[1]);
41+
}
42+
fp_mulmod (&R[1], &R[0], P, &R[1]);
43+
44+
/* for j = t-1 downto 0 do
45+
r_!k = R0*R1; r_k = r_k^2
46+
*/
47+
48+
/* set initial mode and bit cnt */
49+
bitcnt = 1;
50+
buf = 0;
51+
digidx = X->used - 1;
52+
53+
for (;;) {
54+
/* grab next digit as required */
55+
if (--bitcnt == 0) {
56+
/* if digidx == -1 we are out of digits so break */
57+
if (digidx == -1) {
58+
break;
59+
}
60+
/* read next digit and reset bitcnt */
61+
buf = X->dp[digidx--];
62+
bitcnt = (int)DIGIT_BIT;
63+
}
64+
65+
/* grab the next msb from the exponent */
66+
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
67+
buf <<= (fp_digit)1;
68+
69+
/* do ops */
70+
fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
71+
fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
72+
}
73+
74+
fp_montgomery_reduce(&R[0], P, mp);
75+
fp_copy(&R[0], Y);
76+
return FP_OKAY;
77+
}
78+
79+
#else
80+
1281
/* y = g**x (mod b)
1382
* Some restrictions... x must be positive and < b
1483
*/
@@ -168,6 +237,8 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
168237
return FP_OKAY;
169238
}
170239

240+
#endif
241+
171242

172243
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
173244
{

0 commit comments

Comments
 (0)