]>
Commit | Line | Data |
---|---|---|
bae9f6d2 JC |
1 | // Copyright 2012 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | // +build amd64,!gccgo,!appengine | |
6 | ||
7 | package curve25519 | |
8 | ||
9 | // These functions are implemented in the .s files. The names of the functions | |
10 | // in the rest of the file are also taken from the SUPERCOP sources to help | |
11 | // people following along. | |
12 | ||
13 | //go:noescape | |
14 | ||
15 | func cswap(inout *[5]uint64, v uint64) | |
16 | ||
17 | //go:noescape | |
18 | ||
19 | func ladderstep(inout *[5][5]uint64) | |
20 | ||
21 | //go:noescape | |
22 | ||
23 | func freeze(inout *[5]uint64) | |
24 | ||
25 | //go:noescape | |
26 | ||
27 | func mul(dest, a, b *[5]uint64) | |
28 | ||
29 | //go:noescape | |
30 | ||
31 | func square(out, in *[5]uint64) | |
32 | ||
33 | // mladder uses a Montgomery ladder to calculate (xr/zr) *= s. | |
34 | func mladder(xr, zr *[5]uint64, s *[32]byte) { | |
35 | var work [5][5]uint64 | |
36 | ||
37 | work[0] = *xr | |
38 | setint(&work[1], 1) | |
39 | setint(&work[2], 0) | |
40 | work[3] = *xr | |
41 | setint(&work[4], 1) | |
42 | ||
43 | j := uint(6) | |
44 | var prevbit byte | |
45 | ||
46 | for i := 31; i >= 0; i-- { | |
47 | for j < 8 { | |
48 | bit := ((*s)[i] >> j) & 1 | |
49 | swap := bit ^ prevbit | |
50 | prevbit = bit | |
51 | cswap(&work[1], uint64(swap)) | |
52 | ladderstep(&work) | |
53 | j-- | |
54 | } | |
55 | j = 7 | |
56 | } | |
57 | ||
58 | *xr = work[1] | |
59 | *zr = work[2] | |
60 | } | |
61 | ||
62 | func scalarMult(out, in, base *[32]byte) { | |
63 | var e [32]byte | |
64 | copy(e[:], (*in)[:]) | |
65 | e[0] &= 248 | |
66 | e[31] &= 127 | |
67 | e[31] |= 64 | |
68 | ||
69 | var t, z [5]uint64 | |
70 | unpack(&t, base) | |
71 | mladder(&t, &z, &e) | |
72 | invert(&z, &z) | |
73 | mul(&t, &t, &z) | |
74 | pack(out, &t) | |
75 | } | |
76 | ||
77 | func setint(r *[5]uint64, v uint64) { | |
78 | r[0] = v | |
79 | r[1] = 0 | |
80 | r[2] = 0 | |
81 | r[3] = 0 | |
82 | r[4] = 0 | |
83 | } | |
84 | ||
85 | // unpack sets r = x where r consists of 5, 51-bit limbs in little-endian | |
86 | // order. | |
87 | func unpack(r *[5]uint64, x *[32]byte) { | |
88 | r[0] = uint64(x[0]) | | |
89 | uint64(x[1])<<8 | | |
90 | uint64(x[2])<<16 | | |
91 | uint64(x[3])<<24 | | |
92 | uint64(x[4])<<32 | | |
93 | uint64(x[5])<<40 | | |
94 | uint64(x[6]&7)<<48 | |
95 | ||
96 | r[1] = uint64(x[6])>>3 | | |
97 | uint64(x[7])<<5 | | |
98 | uint64(x[8])<<13 | | |
99 | uint64(x[9])<<21 | | |
100 | uint64(x[10])<<29 | | |
101 | uint64(x[11])<<37 | | |
102 | uint64(x[12]&63)<<45 | |
103 | ||
104 | r[2] = uint64(x[12])>>6 | | |
105 | uint64(x[13])<<2 | | |
106 | uint64(x[14])<<10 | | |
107 | uint64(x[15])<<18 | | |
108 | uint64(x[16])<<26 | | |
109 | uint64(x[17])<<34 | | |
110 | uint64(x[18])<<42 | | |
111 | uint64(x[19]&1)<<50 | |
112 | ||
113 | r[3] = uint64(x[19])>>1 | | |
114 | uint64(x[20])<<7 | | |
115 | uint64(x[21])<<15 | | |
116 | uint64(x[22])<<23 | | |
117 | uint64(x[23])<<31 | | |
118 | uint64(x[24])<<39 | | |
119 | uint64(x[25]&15)<<47 | |
120 | ||
121 | r[4] = uint64(x[25])>>4 | | |
122 | uint64(x[26])<<4 | | |
123 | uint64(x[27])<<12 | | |
124 | uint64(x[28])<<20 | | |
125 | uint64(x[29])<<28 | | |
126 | uint64(x[30])<<36 | | |
127 | uint64(x[31]&127)<<44 | |
128 | } | |
129 | ||
130 | // pack sets out = x where out is the usual, little-endian form of the 5, | |
131 | // 51-bit limbs in x. | |
132 | func pack(out *[32]byte, x *[5]uint64) { | |
133 | t := *x | |
134 | freeze(&t) | |
135 | ||
136 | out[0] = byte(t[0]) | |
137 | out[1] = byte(t[0] >> 8) | |
138 | out[2] = byte(t[0] >> 16) | |
139 | out[3] = byte(t[0] >> 24) | |
140 | out[4] = byte(t[0] >> 32) | |
141 | out[5] = byte(t[0] >> 40) | |
142 | out[6] = byte(t[0] >> 48) | |
143 | ||
144 | out[6] ^= byte(t[1]<<3) & 0xf8 | |
145 | out[7] = byte(t[1] >> 5) | |
146 | out[8] = byte(t[1] >> 13) | |
147 | out[9] = byte(t[1] >> 21) | |
148 | out[10] = byte(t[1] >> 29) | |
149 | out[11] = byte(t[1] >> 37) | |
150 | out[12] = byte(t[1] >> 45) | |
151 | ||
152 | out[12] ^= byte(t[2]<<6) & 0xc0 | |
153 | out[13] = byte(t[2] >> 2) | |
154 | out[14] = byte(t[2] >> 10) | |
155 | out[15] = byte(t[2] >> 18) | |
156 | out[16] = byte(t[2] >> 26) | |
157 | out[17] = byte(t[2] >> 34) | |
158 | out[18] = byte(t[2] >> 42) | |
159 | out[19] = byte(t[2] >> 50) | |
160 | ||
161 | out[19] ^= byte(t[3]<<1) & 0xfe | |
162 | out[20] = byte(t[3] >> 7) | |
163 | out[21] = byte(t[3] >> 15) | |
164 | out[22] = byte(t[3] >> 23) | |
165 | out[23] = byte(t[3] >> 31) | |
166 | out[24] = byte(t[3] >> 39) | |
167 | out[25] = byte(t[3] >> 47) | |
168 | ||
169 | out[25] ^= byte(t[4]<<4) & 0xf0 | |
170 | out[26] = byte(t[4] >> 4) | |
171 | out[27] = byte(t[4] >> 12) | |
172 | out[28] = byte(t[4] >> 20) | |
173 | out[29] = byte(t[4] >> 28) | |
174 | out[30] = byte(t[4] >> 36) | |
175 | out[31] = byte(t[4] >> 44) | |
176 | } | |
177 | ||
178 | // invert calculates r = x^-1 mod p using Fermat's little theorem. | |
179 | func invert(r *[5]uint64, x *[5]uint64) { | |
180 | var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 | |
181 | ||
182 | square(&z2, x) /* 2 */ | |
183 | square(&t, &z2) /* 4 */ | |
184 | square(&t, &t) /* 8 */ | |
185 | mul(&z9, &t, x) /* 9 */ | |
186 | mul(&z11, &z9, &z2) /* 11 */ | |
187 | square(&t, &z11) /* 22 */ | |
188 | mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ | |
189 | ||
190 | square(&t, &z2_5_0) /* 2^6 - 2^1 */ | |
191 | for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ | |
192 | square(&t, &t) | |
193 | } | |
194 | mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ | |
195 | ||
196 | square(&t, &z2_10_0) /* 2^11 - 2^1 */ | |
197 | for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ | |
198 | square(&t, &t) | |
199 | } | |
200 | mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ | |
201 | ||
202 | square(&t, &z2_20_0) /* 2^21 - 2^1 */ | |
203 | for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ | |
204 | square(&t, &t) | |
205 | } | |
206 | mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ | |
207 | ||
208 | square(&t, &t) /* 2^41 - 2^1 */ | |
209 | for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ | |
210 | square(&t, &t) | |
211 | } | |
212 | mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ | |
213 | ||
214 | square(&t, &z2_50_0) /* 2^51 - 2^1 */ | |
215 | for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ | |
216 | square(&t, &t) | |
217 | } | |
218 | mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ | |
219 | ||
220 | square(&t, &z2_100_0) /* 2^101 - 2^1 */ | |
221 | for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ | |
222 | square(&t, &t) | |
223 | } | |
224 | mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ | |
225 | ||
226 | square(&t, &t) /* 2^201 - 2^1 */ | |
227 | for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ | |
228 | square(&t, &t) | |
229 | } | |
230 | mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ | |
231 | ||
232 | square(&t, &t) /* 2^251 - 2^1 */ | |
233 | square(&t, &t) /* 2^252 - 2^2 */ | |
234 | square(&t, &t) /* 2^253 - 2^3 */ | |
235 | ||
236 | square(&t, &t) /* 2^254 - 2^4 */ | |
237 | ||
238 | square(&t, &t) /* 2^255 - 2^5 */ | |
239 | mul(r, &t, &z11) /* 2^255 - 21 */ | |
240 | } |