1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation 5 */ 6 7 #include <crypto/curve25519.h> 8 #include <crypto/internal/kpp.h> 9 10 #include <linux/types.h> 11 #include <linux/jump_label.h> 12 #include <linux/kernel.h> 13 #include <linux/module.h> 14 15 #include <asm/cpufeature.h> 16 #include <asm/processor.h> 17 18 static __always_inline u64 eq_mask(u64 a, u64 b) 19 { 20 u64 x = a ^ b; 21 u64 minus_x = ~x + (u64)1U; 22 u64 x_or_minus_x = x | minus_x; 23 u64 xnx = x_or_minus_x >> (u32)63U; 24 return xnx - (u64)1U; 25 } 26 27 static __always_inline u64 gte_mask(u64 a, u64 b) 28 { 29 u64 x = a; 30 u64 y = b; 31 u64 x_xor_y = x ^ y; 32 u64 x_sub_y = x - y; 33 u64 x_sub_y_xor_y = x_sub_y ^ y; 34 u64 q = x_xor_y | x_sub_y_xor_y; 35 u64 x_xor_q = x ^ q; 36 u64 x_xor_q_ = x_xor_q >> (u32)63U; 37 return x_xor_q_ - (u64)1U; 38 } 39 40 /* Computes the addition of four-element f1 with value in f2 41 * and returns the carry (if any) */ 42 static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) 43 { 44 u64 carry_r; 45 46 asm volatile( 47 /* Clear registers to propagate the carry bit */ 48 " xor %%r8, %%r8;" 49 " xor %%r9, %%r9;" 50 " xor %%r10, %%r10;" 51 " xor %%r11, %%r11;" 52 " xor %1, %1;" 53 54 /* Begin addition chain */ 55 " addq 0(%3), %0;" 56 " movq %0, 0(%2);" 57 " adcxq 8(%3), %%r8;" 58 " movq %%r8, 8(%2);" 59 " adcxq 16(%3), %%r9;" 60 " movq %%r9, 16(%2);" 61 " adcxq 24(%3), %%r10;" 62 " movq %%r10, 24(%2);" 63 64 /* Return the carry bit in a register */ 65 " adcx %%r11, %1;" 66 : "+&r" (f2), "=&r" (carry_r) 67 : "r" (out), "r" (f1) 68 : "%r8", "%r9", "%r10", "%r11", "memory", "cc" 69 ); 70 71 return carry_r; 72 } 73 74 /* Computes the field addition of two field elements */ 75 static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) 76 { 77 asm volatile( 78 /* Compute the raw addition of f1 + f2 */ 79 " movq 0(%0), %%r8;" 80 " addq 0(%2), %%r8;" 81 " movq 8(%0), %%r9;" 82 " adcxq 8(%2), %%r9;" 83 " movq 16(%0), %%r10;" 84 " adcxq 16(%2), %%r10;" 85 " movq 24(%0), %%r11;" 86 " adcxq 24(%2), %%r11;" 87 88 /* Wrap the result back into the field */ 89 90 /* Step 1: Compute carry*38 */ 91 " mov $0, %%rax;" 92 " mov $38, %0;" 93 " cmovc %0, %%rax;" 94 95 /* Step 2: Add carry*38 to the original sum */ 96 " xor %%rcx, %%rcx;" 97 " add %%rax, %%r8;" 98 " adcx %%rcx, %%r9;" 99 " movq %%r9, 8(%1);" 100 " adcx %%rcx, %%r10;" 101 " movq %%r10, 16(%1);" 102 " adcx %%rcx, %%r11;" 103 " movq %%r11, 24(%1);" 104 105 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 106 " mov $0, %%rax;" 107 " cmovc %0, %%rax;" 108 " add %%rax, %%r8;" 109 " movq %%r8, 0(%1);" 110 : "+&r" (f2) 111 : "r" (out), "r" (f1) 112 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" 113 ); 114 } 115 116 /* Computes the field substraction of two field elements */ 117 static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) 118 { 119 asm volatile( 120 /* Compute the raw substraction of f1-f2 */ 121 " movq 0(%1), %%r8;" 122 " subq 0(%2), %%r8;" 123 " movq 8(%1), %%r9;" 124 " sbbq 8(%2), %%r9;" 125 " movq 16(%1), %%r10;" 126 " sbbq 16(%2), %%r10;" 127 " movq 24(%1), %%r11;" 128 " sbbq 24(%2), %%r11;" 129 130 /* Wrap the result back into the field */ 131 132 /* Step 1: Compute carry*38 */ 133 " mov $0, %%rax;" 134 " mov $38, %%rcx;" 135 " cmovc %%rcx, %%rax;" 136 137 /* Step 2: Substract carry*38 from the original difference */ 138 " sub %%rax, %%r8;" 139 " sbb $0, %%r9;" 140 " sbb $0, %%r10;" 141 " sbb $0, %%r11;" 142 143 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 144 " mov $0, %%rax;" 145 " cmovc %%rcx, %%rax;" 146 " sub %%rax, %%r8;" 147 148 /* Store the result */ 149 " movq %%r8, 0(%0);" 150 " movq %%r9, 8(%0);" 151 " movq %%r10, 16(%0);" 152 " movq %%r11, 24(%0);" 153 : 154 : "r" (out), "r" (f1), "r" (f2) 155 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" 156 ); 157 } 158 159 /* Computes a field multiplication: out <- f1 * f2 160 * Uses the 8-element buffer tmp for intermediate results */ 161 static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) 162 { 163 asm volatile( 164 /* Compute the raw multiplication: tmp <- src1 * src2 */ 165 166 /* Compute src1[0] * src2 */ 167 " movq 0(%1), %%rdx;" 168 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" 169 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" 170 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" 171 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" 172 " adox %%rdx, %%rax;" 173 /* Compute src1[1] * src2 */ 174 " movq 8(%1), %%rdx;" 175 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" 176 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" 177 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 178 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 179 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 180 /* Compute src1[2] * src2 */ 181 " movq 16(%1), %%rdx;" 182 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" 183 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" 184 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 185 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 186 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 187 /* Compute src1[3] * src2 */ 188 " movq 24(%1), %%rdx;" 189 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" 190 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" 191 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" 192 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" 193 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" 194 /* Line up pointers */ 195 " mov %0, %1;" 196 " mov %2, %0;" 197 198 /* Wrap the result back into the field */ 199 200 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 201 " mov $38, %%rdx;" 202 " mulxq 32(%1), %%r8, %%r13;" 203 " xor %3, %3;" 204 " adoxq 0(%1), %%r8;" 205 " mulxq 40(%1), %%r9, %%rbx;" 206 " adcx %%r13, %%r9;" 207 " adoxq 8(%1), %%r9;" 208 " mulxq 48(%1), %%r10, %%r13;" 209 " adcx %%rbx, %%r10;" 210 " adoxq 16(%1), %%r10;" 211 " mulxq 56(%1), %%r11, %%rax;" 212 " adcx %%r13, %%r11;" 213 " adoxq 24(%1), %%r11;" 214 " adcx %3, %%rax;" 215 " adox %3, %%rax;" 216 " imul %%rdx, %%rax;" 217 218 /* Step 2: Fold the carry back into dst */ 219 " add %%rax, %%r8;" 220 " adcx %3, %%r9;" 221 " movq %%r9, 8(%0);" 222 " adcx %3, %%r10;" 223 " movq %%r10, 16(%0);" 224 " adcx %3, %%r11;" 225 " movq %%r11, 24(%0);" 226 227 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 228 " mov $0, %%rax;" 229 " cmovc %%rdx, %%rax;" 230 " add %%rax, %%r8;" 231 " movq %%r8, 0(%0);" 232 : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) 233 : 234 : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" 235 ); 236 } 237 238 /* Computes two field multiplications: 239 * out[0] <- f1[0] * f2[0] 240 * out[1] <- f1[1] * f2[1] 241 * Uses the 16-element buffer tmp for intermediate results. */ 242 static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) 243 { 244 asm volatile( 245 /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ 246 247 /* Compute src1[0] * src2 */ 248 " movq 0(%1), %%rdx;" 249 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" 250 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" 251 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" 252 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" 253 " adox %%rdx, %%rax;" 254 /* Compute src1[1] * src2 */ 255 " movq 8(%1), %%rdx;" 256 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" 257 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" 258 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 259 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 260 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 261 /* Compute src1[2] * src2 */ 262 " movq 16(%1), %%rdx;" 263 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" 264 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" 265 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 266 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 267 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 268 /* Compute src1[3] * src2 */ 269 " movq 24(%1), %%rdx;" 270 " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" 271 " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" 272 " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" 273 " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" 274 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" 275 276 /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ 277 278 /* Compute src1[0] * src2 */ 279 " movq 32(%1), %%rdx;" 280 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" 281 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" 282 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" 283 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" 284 " adox %%rdx, %%rax;" 285 /* Compute src1[1] * src2 */ 286 " movq 40(%1), %%rdx;" 287 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" 288 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" 289 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 290 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 291 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 292 /* Compute src1[2] * src2 */ 293 " movq 48(%1), %%rdx;" 294 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" 295 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" 296 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" 297 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" 298 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" 299 /* Compute src1[3] * src2 */ 300 " movq 56(%1), %%rdx;" 301 " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" 302 " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" 303 " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" 304 " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" 305 " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" 306 /* Line up pointers */ 307 " mov %0, %1;" 308 " mov %2, %0;" 309 310 /* Wrap the results back into the field */ 311 312 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 313 " mov $38, %%rdx;" 314 " mulxq 32(%1), %%r8, %%r13;" 315 " xor %3, %3;" 316 " adoxq 0(%1), %%r8;" 317 " mulxq 40(%1), %%r9, %%rbx;" 318 " adcx %%r13, %%r9;" 319 " adoxq 8(%1), %%r9;" 320 " mulxq 48(%1), %%r10, %%r13;" 321 " adcx %%rbx, %%r10;" 322 " adoxq 16(%1), %%r10;" 323 " mulxq 56(%1), %%r11, %%rax;" 324 " adcx %%r13, %%r11;" 325 " adoxq 24(%1), %%r11;" 326 " adcx %3, %%rax;" 327 " adox %3, %%rax;" 328 " imul %%rdx, %%rax;" 329 330 /* Step 2: Fold the carry back into dst */ 331 " add %%rax, %%r8;" 332 " adcx %3, %%r9;" 333 " movq %%r9, 8(%0);" 334 " adcx %3, %%r10;" 335 " movq %%r10, 16(%0);" 336 " adcx %3, %%r11;" 337 " movq %%r11, 24(%0);" 338 339 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 340 " mov $0, %%rax;" 341 " cmovc %%rdx, %%rax;" 342 " add %%rax, %%r8;" 343 " movq %%r8, 0(%0);" 344 345 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 346 " mov $38, %%rdx;" 347 " mulxq 96(%1), %%r8, %%r13;" 348 " xor %3, %3;" 349 " adoxq 64(%1), %%r8;" 350 " mulxq 104(%1), %%r9, %%rbx;" 351 " adcx %%r13, %%r9;" 352 " adoxq 72(%1), %%r9;" 353 " mulxq 112(%1), %%r10, %%r13;" 354 " adcx %%rbx, %%r10;" 355 " adoxq 80(%1), %%r10;" 356 " mulxq 120(%1), %%r11, %%rax;" 357 " adcx %%r13, %%r11;" 358 " adoxq 88(%1), %%r11;" 359 " adcx %3, %%rax;" 360 " adox %3, %%rax;" 361 " imul %%rdx, %%rax;" 362 363 /* Step 2: Fold the carry back into dst */ 364 " add %%rax, %%r8;" 365 " adcx %3, %%r9;" 366 " movq %%r9, 40(%0);" 367 " adcx %3, %%r10;" 368 " movq %%r10, 48(%0);" 369 " adcx %3, %%r11;" 370 " movq %%r11, 56(%0);" 371 372 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 373 " mov $0, %%rax;" 374 " cmovc %%rdx, %%rax;" 375 " add %%rax, %%r8;" 376 " movq %%r8, 32(%0);" 377 : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) 378 : 379 : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" 380 ); 381 } 382 383 /* Computes the field multiplication of four-element f1 with value in f2 */ 384 static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) 385 { 386 register u64 f2_r asm("rdx") = f2; 387 388 asm volatile( 389 /* Compute the raw multiplication of f1*f2 */ 390 " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ 391 " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ 392 " add %%rcx, %%r9;" 393 " mov $0, %%rcx;" 394 " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ 395 " adcx %%rbx, %%r10;" 396 " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ 397 " adcx %%r13, %%r11;" 398 " adcx %%rcx, %%rax;" 399 400 /* Wrap the result back into the field */ 401 402 /* Step 1: Compute carry*38 */ 403 " mov $38, %%rdx;" 404 " imul %%rdx, %%rax;" 405 406 /* Step 2: Fold the carry back into dst */ 407 " add %%rax, %%r8;" 408 " adcx %%rcx, %%r9;" 409 " movq %%r9, 8(%1);" 410 " adcx %%rcx, %%r10;" 411 " movq %%r10, 16(%1);" 412 " adcx %%rcx, %%r11;" 413 " movq %%r11, 24(%1);" 414 415 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 416 " mov $0, %%rax;" 417 " cmovc %%rdx, %%rax;" 418 " add %%rax, %%r8;" 419 " movq %%r8, 0(%1);" 420 : "+&r" (f2_r) 421 : "r" (out), "r" (f1) 422 : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" 423 ); 424 } 425 426 /* Computes p1 <- bit ? p2 : p1 in constant time */ 427 static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) 428 { 429 asm volatile( 430 /* Invert the polarity of bit to match cmov expectations */ 431 " add $18446744073709551615, %0;" 432 433 /* cswap p1[0], p2[0] */ 434 " movq 0(%1), %%r8;" 435 " movq 0(%2), %%r9;" 436 " mov %%r8, %%r10;" 437 " cmovc %%r9, %%r8;" 438 " cmovc %%r10, %%r9;" 439 " movq %%r8, 0(%1);" 440 " movq %%r9, 0(%2);" 441 442 /* cswap p1[1], p2[1] */ 443 " movq 8(%1), %%r8;" 444 " movq 8(%2), %%r9;" 445 " mov %%r8, %%r10;" 446 " cmovc %%r9, %%r8;" 447 " cmovc %%r10, %%r9;" 448 " movq %%r8, 8(%1);" 449 " movq %%r9, 8(%2);" 450 451 /* cswap p1[2], p2[2] */ 452 " movq 16(%1), %%r8;" 453 " movq 16(%2), %%r9;" 454 " mov %%r8, %%r10;" 455 " cmovc %%r9, %%r8;" 456 " cmovc %%r10, %%r9;" 457 " movq %%r8, 16(%1);" 458 " movq %%r9, 16(%2);" 459 460 /* cswap p1[3], p2[3] */ 461 " movq 24(%1), %%r8;" 462 " movq 24(%2), %%r9;" 463 " mov %%r8, %%r10;" 464 " cmovc %%r9, %%r8;" 465 " cmovc %%r10, %%r9;" 466 " movq %%r8, 24(%1);" 467 " movq %%r9, 24(%2);" 468 469 /* cswap p1[4], p2[4] */ 470 " movq 32(%1), %%r8;" 471 " movq 32(%2), %%r9;" 472 " mov %%r8, %%r10;" 473 " cmovc %%r9, %%r8;" 474 " cmovc %%r10, %%r9;" 475 " movq %%r8, 32(%1);" 476 " movq %%r9, 32(%2);" 477 478 /* cswap p1[5], p2[5] */ 479 " movq 40(%1), %%r8;" 480 " movq 40(%2), %%r9;" 481 " mov %%r8, %%r10;" 482 " cmovc %%r9, %%r8;" 483 " cmovc %%r10, %%r9;" 484 " movq %%r8, 40(%1);" 485 " movq %%r9, 40(%2);" 486 487 /* cswap p1[6], p2[6] */ 488 " movq 48(%1), %%r8;" 489 " movq 48(%2), %%r9;" 490 " mov %%r8, %%r10;" 491 " cmovc %%r9, %%r8;" 492 " cmovc %%r10, %%r9;" 493 " movq %%r8, 48(%1);" 494 " movq %%r9, 48(%2);" 495 496 /* cswap p1[7], p2[7] */ 497 " movq 56(%1), %%r8;" 498 " movq 56(%2), %%r9;" 499 " mov %%r8, %%r10;" 500 " cmovc %%r9, %%r8;" 501 " cmovc %%r10, %%r9;" 502 " movq %%r8, 56(%1);" 503 " movq %%r9, 56(%2);" 504 : "+&r" (bit) 505 : "r" (p1), "r" (p2) 506 : "%r8", "%r9", "%r10", "memory", "cc" 507 ); 508 } 509 510 /* Computes the square of a field element: out <- f * f 511 * Uses the 8-element buffer tmp for intermediate results */ 512 static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) 513 { 514 asm volatile( 515 /* Compute the raw multiplication: tmp <- f * f */ 516 517 /* Step 1: Compute all partial products */ 518 " movq 0(%1), %%rdx;" /* f[0] */ 519 " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ 520 " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ 521 " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ 522 " movq 24(%1), %%rdx;" /* f[3] */ 523 " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ 524 " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ 525 " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ 526 " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ 527 528 /* Step 2: Compute two parallel carry chains */ 529 " xor %%r15, %%r15;" 530 " adox %%rax, %%r10;" 531 " adcx %%r8, %%r8;" 532 " adox %%rcx, %%r11;" 533 " adcx %%r9, %%r9;" 534 " adox %%r15, %%rbx;" 535 " adcx %%r10, %%r10;" 536 " adox %%r15, %%r13;" 537 " adcx %%r11, %%r11;" 538 " adox %%r15, %%r14;" 539 " adcx %%rbx, %%rbx;" 540 " adcx %%r13, %%r13;" 541 " adcx %%r14, %%r14;" 542 543 /* Step 3: Compute intermediate squares */ 544 " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ 545 " movq %%rax, 0(%0);" 546 " add %%rcx, %%r8;" " movq %%r8, 8(%0);" 547 " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ 548 " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" 549 " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" 550 " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ 551 " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" 552 " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" 553 " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ 554 " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" 555 " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" 556 557 /* Line up pointers */ 558 " mov %0, %1;" 559 " mov %2, %0;" 560 561 /* Wrap the result back into the field */ 562 563 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 564 " mov $38, %%rdx;" 565 " mulxq 32(%1), %%r8, %%r13;" 566 " xor %%rcx, %%rcx;" 567 " adoxq 0(%1), %%r8;" 568 " mulxq 40(%1), %%r9, %%rbx;" 569 " adcx %%r13, %%r9;" 570 " adoxq 8(%1), %%r9;" 571 " mulxq 48(%1), %%r10, %%r13;" 572 " adcx %%rbx, %%r10;" 573 " adoxq 16(%1), %%r10;" 574 " mulxq 56(%1), %%r11, %%rax;" 575 " adcx %%r13, %%r11;" 576 " adoxq 24(%1), %%r11;" 577 " adcx %%rcx, %%rax;" 578 " adox %%rcx, %%rax;" 579 " imul %%rdx, %%rax;" 580 581 /* Step 2: Fold the carry back into dst */ 582 " add %%rax, %%r8;" 583 " adcx %%rcx, %%r9;" 584 " movq %%r9, 8(%0);" 585 " adcx %%rcx, %%r10;" 586 " movq %%r10, 16(%0);" 587 " adcx %%rcx, %%r11;" 588 " movq %%r11, 24(%0);" 589 590 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 591 " mov $0, %%rax;" 592 " cmovc %%rdx, %%rax;" 593 " add %%rax, %%r8;" 594 " movq %%r8, 0(%0);" 595 : "+&r" (tmp), "+&r" (f), "+&r" (out) 596 : 597 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" 598 ); 599 } 600 601 /* Computes two field squarings: 602 * out[0] <- f[0] * f[0] 603 * out[1] <- f[1] * f[1] 604 * Uses the 16-element buffer tmp for intermediate results */ 605 static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) 606 { 607 asm volatile( 608 /* Step 1: Compute all partial products */ 609 " movq 0(%1), %%rdx;" /* f[0] */ 610 " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ 611 " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ 612 " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ 613 " movq 24(%1), %%rdx;" /* f[3] */ 614 " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ 615 " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ 616 " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ 617 " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ 618 619 /* Step 2: Compute two parallel carry chains */ 620 " xor %%r15, %%r15;" 621 " adox %%rax, %%r10;" 622 " adcx %%r8, %%r8;" 623 " adox %%rcx, %%r11;" 624 " adcx %%r9, %%r9;" 625 " adox %%r15, %%rbx;" 626 " adcx %%r10, %%r10;" 627 " adox %%r15, %%r13;" 628 " adcx %%r11, %%r11;" 629 " adox %%r15, %%r14;" 630 " adcx %%rbx, %%rbx;" 631 " adcx %%r13, %%r13;" 632 " adcx %%r14, %%r14;" 633 634 /* Step 3: Compute intermediate squares */ 635 " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ 636 " movq %%rax, 0(%0);" 637 " add %%rcx, %%r8;" " movq %%r8, 8(%0);" 638 " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ 639 " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" 640 " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" 641 " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ 642 " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" 643 " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" 644 " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ 645 " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" 646 " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" 647 648 /* Step 1: Compute all partial products */ 649 " movq 32(%1), %%rdx;" /* f[0] */ 650 " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ 651 " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ 652 " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ 653 " movq 56(%1), %%rdx;" /* f[3] */ 654 " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ 655 " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ 656 " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ 657 " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ 658 659 /* Step 2: Compute two parallel carry chains */ 660 " xor %%r15, %%r15;" 661 " adox %%rax, %%r10;" 662 " adcx %%r8, %%r8;" 663 " adox %%rcx, %%r11;" 664 " adcx %%r9, %%r9;" 665 " adox %%r15, %%rbx;" 666 " adcx %%r10, %%r10;" 667 " adox %%r15, %%r13;" 668 " adcx %%r11, %%r11;" 669 " adox %%r15, %%r14;" 670 " adcx %%rbx, %%rbx;" 671 " adcx %%r13, %%r13;" 672 " adcx %%r14, %%r14;" 673 674 /* Step 3: Compute intermediate squares */ 675 " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ 676 " movq %%rax, 64(%0);" 677 " add %%rcx, %%r8;" " movq %%r8, 72(%0);" 678 " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ 679 " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" 680 " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" 681 " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ 682 " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" 683 " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" 684 " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ 685 " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" 686 " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" 687 688 /* Line up pointers */ 689 " mov %0, %1;" 690 " mov %2, %0;" 691 692 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 693 " mov $38, %%rdx;" 694 " mulxq 32(%1), %%r8, %%r13;" 695 " xor %%rcx, %%rcx;" 696 " adoxq 0(%1), %%r8;" 697 " mulxq 40(%1), %%r9, %%rbx;" 698 " adcx %%r13, %%r9;" 699 " adoxq 8(%1), %%r9;" 700 " mulxq 48(%1), %%r10, %%r13;" 701 " adcx %%rbx, %%r10;" 702 " adoxq 16(%1), %%r10;" 703 " mulxq 56(%1), %%r11, %%rax;" 704 " adcx %%r13, %%r11;" 705 " adoxq 24(%1), %%r11;" 706 " adcx %%rcx, %%rax;" 707 " adox %%rcx, %%rax;" 708 " imul %%rdx, %%rax;" 709 710 /* Step 2: Fold the carry back into dst */ 711 " add %%rax, %%r8;" 712 " adcx %%rcx, %%r9;" 713 " movq %%r9, 8(%0);" 714 " adcx %%rcx, %%r10;" 715 " movq %%r10, 16(%0);" 716 " adcx %%rcx, %%r11;" 717 " movq %%r11, 24(%0);" 718 719 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 720 " mov $0, %%rax;" 721 " cmovc %%rdx, %%rax;" 722 " add %%rax, %%r8;" 723 " movq %%r8, 0(%0);" 724 725 /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ 726 " mov $38, %%rdx;" 727 " mulxq 96(%1), %%r8, %%r13;" 728 " xor %%rcx, %%rcx;" 729 " adoxq 64(%1), %%r8;" 730 " mulxq 104(%1), %%r9, %%rbx;" 731 " adcx %%r13, %%r9;" 732 " adoxq 72(%1), %%r9;" 733 " mulxq 112(%1), %%r10, %%r13;" 734 " adcx %%rbx, %%r10;" 735 " adoxq 80(%1), %%r10;" 736 " mulxq 120(%1), %%r11, %%rax;" 737 " adcx %%r13, %%r11;" 738 " adoxq 88(%1), %%r11;" 739 " adcx %%rcx, %%rax;" 740 " adox %%rcx, %%rax;" 741 " imul %%rdx, %%rax;" 742 743 /* Step 2: Fold the carry back into dst */ 744 " add %%rax, %%r8;" 745 " adcx %%rcx, %%r9;" 746 " movq %%r9, 40(%0);" 747 " adcx %%rcx, %%r10;" 748 " movq %%r10, 48(%0);" 749 " adcx %%rcx, %%r11;" 750 " movq %%r11, 56(%0);" 751 752 /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ 753 " mov $0, %%rax;" 754 " cmovc %%rdx, %%rax;" 755 " add %%rax, %%r8;" 756 " movq %%r8, 32(%0);" 757 : "+&r" (tmp), "+&r" (f), "+&r" (out) 758 : 759 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" 760 ); 761 } 762 763 static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) 764 { 765 u64 *nq = p01_tmp1; 766 u64 *nq_p1 = p01_tmp1 + (u32)8U; 767 u64 *tmp1 = p01_tmp1 + (u32)16U; 768 u64 *x1 = q; 769 u64 *x2 = nq; 770 u64 *z2 = nq + (u32)4U; 771 u64 *z3 = nq_p1 + (u32)4U; 772 u64 *a = tmp1; 773 u64 *b = tmp1 + (u32)4U; 774 u64 *ab = tmp1; 775 u64 *dc = tmp1 + (u32)8U; 776 u64 *x3; 777 u64 *z31; 778 u64 *d0; 779 u64 *c0; 780 u64 *a1; 781 u64 *b1; 782 u64 *d; 783 u64 *c; 784 u64 *ab1; 785 u64 *dc1; 786 fadd(a, x2, z2); 787 fsub(b, x2, z2); 788 x3 = nq_p1; 789 z31 = nq_p1 + (u32)4U; 790 d0 = dc; 791 c0 = dc + (u32)4U; 792 fadd(c0, x3, z31); 793 fsub(d0, x3, z31); 794 fmul2(dc, dc, ab, tmp2); 795 fadd(x3, d0, c0); 796 fsub(z31, d0, c0); 797 a1 = tmp1; 798 b1 = tmp1 + (u32)4U; 799 d = tmp1 + (u32)8U; 800 c = tmp1 + (u32)12U; 801 ab1 = tmp1; 802 dc1 = tmp1 + (u32)8U; 803 fsqr2(dc1, ab1, tmp2); 804 fsqr2(nq_p1, nq_p1, tmp2); 805 a1[0U] = c[0U]; 806 a1[1U] = c[1U]; 807 a1[2U] = c[2U]; 808 a1[3U] = c[3U]; 809 fsub(c, d, c); 810 fmul_scalar(b1, c, (u64)121665U); 811 fadd(b1, b1, d); 812 fmul2(nq, dc1, ab1, tmp2); 813 fmul(z3, z3, x1, tmp2); 814 } 815 816 static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) 817 { 818 u64 *x2 = nq; 819 u64 *z2 = nq + (u32)4U; 820 u64 *a = tmp1; 821 u64 *b = tmp1 + (u32)4U; 822 u64 *d = tmp1 + (u32)8U; 823 u64 *c = tmp1 + (u32)12U; 824 u64 *ab = tmp1; 825 u64 *dc = tmp1 + (u32)8U; 826 fadd(a, x2, z2); 827 fsub(b, x2, z2); 828 fsqr2(dc, ab, tmp2); 829 a[0U] = c[0U]; 830 a[1U] = c[1U]; 831 a[2U] = c[2U]; 832 a[3U] = c[3U]; 833 fsub(c, d, c); 834 fmul_scalar(b, c, (u64)121665U); 835 fadd(b, b, d); 836 fmul2(nq, dc, ab, tmp2); 837 } 838 839 static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) 840 { 841 u64 tmp2[16U] = { 0U }; 842 u64 p01_tmp1_swap[33U] = { 0U }; 843 u64 *p0 = p01_tmp1_swap; 844 u64 *p01 = p01_tmp1_swap; 845 u64 *p03 = p01; 846 u64 *p11 = p01 + (u32)8U; 847 u64 *x0; 848 u64 *z0; 849 u64 *p01_tmp1; 850 u64 *p01_tmp11; 851 u64 *nq10; 852 u64 *nq_p11; 853 u64 *swap1; 854 u64 sw0; 855 u64 *nq1; 856 u64 *tmp1; 857 memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); 858 x0 = p03; 859 z0 = p03 + (u32)4U; 860 x0[0U] = (u64)1U; 861 x0[1U] = (u64)0U; 862 x0[2U] = (u64)0U; 863 x0[3U] = (u64)0U; 864 z0[0U] = (u64)0U; 865 z0[1U] = (u64)0U; 866 z0[2U] = (u64)0U; 867 z0[3U] = (u64)0U; 868 p01_tmp1 = p01_tmp1_swap; 869 p01_tmp11 = p01_tmp1_swap; 870 nq10 = p01_tmp1_swap; 871 nq_p11 = p01_tmp1_swap + (u32)8U; 872 swap1 = p01_tmp1_swap + (u32)32U; 873 cswap2((u64)1U, nq10, nq_p11); 874 point_add_and_double(init1, p01_tmp11, tmp2); 875 swap1[0U] = (u64)1U; 876 { 877 u32 i; 878 for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { 879 u64 *p01_tmp12 = p01_tmp1_swap; 880 u64 *swap2 = p01_tmp1_swap + (u32)32U; 881 u64 *nq2 = p01_tmp12; 882 u64 *nq_p12 = p01_tmp12 + (u32)8U; 883 u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); 884 u64 sw = swap2[0U] ^ bit; 885 cswap2(sw, nq2, nq_p12); 886 point_add_and_double(init1, p01_tmp12, tmp2); 887 swap2[0U] = bit; 888 } 889 } 890 sw0 = swap1[0U]; 891 cswap2(sw0, nq10, nq_p11); 892 nq1 = p01_tmp1; 893 tmp1 = p01_tmp1 + (u32)16U; 894 point_double(nq1, tmp1, tmp2); 895 point_double(nq1, tmp1, tmp2); 896 point_double(nq1, tmp1, tmp2); 897 memcpy(out, p0, (u32)8U * sizeof(p0[0U])); 898 899 memzero_explicit(tmp2, sizeof(tmp2)); 900 memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); 901 } 902 903 static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) 904 { 905 u32 i; 906 fsqr(o, inp, tmp); 907 for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) 908 fsqr(o, o, tmp); 909 } 910 911 static void finv(u64 *o, const u64 *i, u64 *tmp) 912 { 913 u64 t1[16U] = { 0U }; 914 u64 *a0 = t1; 915 u64 *b = t1 + (u32)4U; 916 u64 *c = t1 + (u32)8U; 917 u64 *t00 = t1 + (u32)12U; 918 u64 *tmp1 = tmp; 919 u64 *a; 920 u64 *t0; 921 fsquare_times(a0, i, tmp1, (u32)1U); 922 fsquare_times(t00, a0, tmp1, (u32)2U); 923 fmul(b, t00, i, tmp); 924 fmul(a0, b, a0, tmp); 925 fsquare_times(t00, a0, tmp1, (u32)1U); 926 fmul(b, t00, b, tmp); 927 fsquare_times(t00, b, tmp1, (u32)5U); 928 fmul(b, t00, b, tmp); 929 fsquare_times(t00, b, tmp1, (u32)10U); 930 fmul(c, t00, b, tmp); 931 fsquare_times(t00, c, tmp1, (u32)20U); 932 fmul(t00, t00, c, tmp); 933 fsquare_times(t00, t00, tmp1, (u32)10U); 934 fmul(b, t00, b, tmp); 935 fsquare_times(t00, b, tmp1, (u32)50U); 936 fmul(c, t00, b, tmp); 937 fsquare_times(t00, c, tmp1, (u32)100U); 938 fmul(t00, t00, c, tmp); 939 fsquare_times(t00, t00, tmp1, (u32)50U); 940 fmul(t00, t00, b, tmp); 941 fsquare_times(t00, t00, tmp1, (u32)5U); 942 a = t1; 943 t0 = t1 + (u32)12U; 944 fmul(o, t0, a, tmp); 945 } 946 947 static void store_felem(u64 *b, u64 *f) 948 { 949 u64 f30 = f[3U]; 950 u64 top_bit0 = f30 >> (u32)63U; 951 u64 f31; 952 u64 top_bit; 953 u64 f0; 954 u64 f1; 955 u64 f2; 956 u64 f3; 957 u64 m0; 958 u64 m1; 959 u64 m2; 960 u64 m3; 961 u64 mask; 962 u64 f0_; 963 u64 f1_; 964 u64 f2_; 965 u64 f3_; 966 u64 o0; 967 u64 o1; 968 u64 o2; 969 u64 o3; 970 f[3U] = f30 & (u64)0x7fffffffffffffffU; 971 add_scalar(f, f, (u64)19U * top_bit0); 972 f31 = f[3U]; 973 top_bit = f31 >> (u32)63U; 974 f[3U] = f31 & (u64)0x7fffffffffffffffU; 975 add_scalar(f, f, (u64)19U * top_bit); 976 f0 = f[0U]; 977 f1 = f[1U]; 978 f2 = f[2U]; 979 f3 = f[3U]; 980 m0 = gte_mask(f0, (u64)0xffffffffffffffedU); 981 m1 = eq_mask(f1, (u64)0xffffffffffffffffU); 982 m2 = eq_mask(f2, (u64)0xffffffffffffffffU); 983 m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); 984 mask = ((m0 & m1) & m2) & m3; 985 f0_ = f0 - (mask & (u64)0xffffffffffffffedU); 986 f1_ = f1 - (mask & (u64)0xffffffffffffffffU); 987 f2_ = f2 - (mask & (u64)0xffffffffffffffffU); 988 f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); 989 o0 = f0_; 990 o1 = f1_; 991 o2 = f2_; 992 o3 = f3_; 993 b[0U] = o0; 994 b[1U] = o1; 995 b[2U] = o2; 996 b[3U] = o3; 997 } 998 999 static void encode_point(u8 *o, const u64 *i) 1000 { 1001 const u64 *x = i; 1002 const u64 *z = i + (u32)4U; 1003 u64 tmp[4U] = { 0U }; 1004 u64 tmp_w[16U] = { 0U }; 1005 finv(tmp, z, tmp_w); 1006 fmul(tmp, tmp, x, tmp_w); 1007 store_felem((u64 *)o, tmp); 1008 } 1009 1010 static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) 1011 { 1012 u64 init1[8U] = { 0U }; 1013 u64 tmp[4U] = { 0U }; 1014 u64 tmp3; 1015 u64 *x; 1016 u64 *z; 1017 { 1018 u32 i; 1019 for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { 1020 u64 *os = tmp; 1021 const u8 *bj = pub + i * (u32)8U; 1022 u64 u = *(u64 *)bj; 1023 u64 r = u; 1024 u64 x0 = r; 1025 os[i] = x0; 1026 } 1027 } 1028 tmp3 = tmp[3U]; 1029 tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; 1030 x = init1; 1031 z = init1 + (u32)4U; 1032 z[0U] = (u64)1U; 1033 z[1U] = (u64)0U; 1034 z[2U] = (u64)0U; 1035 z[3U] = (u64)0U; 1036 x[0U] = tmp[0U]; 1037 x[1U] = tmp[1U]; 1038 x[2U] = tmp[2U]; 1039 x[3U] = tmp[3U]; 1040 montgomery_ladder(init1, priv, init1); 1041 encode_point(out, init1); 1042 } 1043 1044 /* The below constants were generated using this sage script: 1045 * 1046 * #!/usr/bin/env sage 1047 * import sys 1048 * from sage.all import * 1049 * def limbs(n): 1050 * n = int(n) 1051 * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) 1052 * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l 1053 * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) 1054 * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] 1055 * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) 1056 * print("static const u64 table_ladder[] = {") 1057 * p = ec.lift_x(9) 1058 * for i in range(252): 1059 * l = (p[0] + p[2]) / (p[0] - p[2]) 1060 * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) 1061 * p = p * 2 1062 * print("};") 1063 * 1064 */ 1065 1066 static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; 1067 1068 static const u64 table_ladder[] = { 1069 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, 1070 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, 1071 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, 1072 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, 1073 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, 1074 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, 1075 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, 1076 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, 1077 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, 1078 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, 1079 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, 1080 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, 1081 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, 1082 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, 1083 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, 1084 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, 1085 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, 1086 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, 1087 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, 1088 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, 1089 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, 1090 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, 1091 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, 1092 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, 1093 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, 1094 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, 1095 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, 1096 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, 1097 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, 1098 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, 1099 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, 1100 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, 1101 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, 1102 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, 1103 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, 1104 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, 1105 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, 1106 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, 1107 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, 1108 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, 1109 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, 1110 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, 1111 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, 1112 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, 1113 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, 1114 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, 1115 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, 1116 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, 1117 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, 1118 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, 1119 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, 1120 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, 1121 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, 1122 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, 1123 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, 1124 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, 1125 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, 1126 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, 1127 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, 1128 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, 1129 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, 1130 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, 1131 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, 1132 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, 1133 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, 1134 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, 1135 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, 1136 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, 1137 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, 1138 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, 1139 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, 1140 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, 1141 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, 1142 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, 1143 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, 1144 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, 1145 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, 1146 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, 1147 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, 1148 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, 1149 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, 1150 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, 1151 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, 1152 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, 1153 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, 1154 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, 1155 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, 1156 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, 1157 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, 1158 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, 1159 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, 1160 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, 1161 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, 1162 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, 1163 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, 1164 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, 1165 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, 1166 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, 1167 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, 1168 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, 1169 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, 1170 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, 1171 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, 1172 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, 1173 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, 1174 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, 1175 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, 1176 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, 1177 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, 1178 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, 1179 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, 1180 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, 1181 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, 1182 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, 1183 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, 1184 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, 1185 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, 1186 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, 1187 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, 1188 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, 1189 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, 1190 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, 1191 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, 1192 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, 1193 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, 1194 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, 1195 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, 1196 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, 1197 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, 1198 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, 1199 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, 1200 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, 1201 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, 1202 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, 1203 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, 1204 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, 1205 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, 1206 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, 1207 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, 1208 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, 1209 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, 1210 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, 1211 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, 1212 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, 1213 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, 1214 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, 1215 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, 1216 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, 1217 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, 1218 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, 1219 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, 1220 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, 1221 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, 1222 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, 1223 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, 1224 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, 1225 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, 1226 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, 1227 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, 1228 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, 1229 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, 1230 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, 1231 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, 1232 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, 1233 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, 1234 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, 1235 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, 1236 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, 1237 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, 1238 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, 1239 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, 1240 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, 1241 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, 1242 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, 1243 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, 1244 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, 1245 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, 1246 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, 1247 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, 1248 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, 1249 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, 1250 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, 1251 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, 1252 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, 1253 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, 1254 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, 1255 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, 1256 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, 1257 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, 1258 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, 1259 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, 1260 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, 1261 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, 1262 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, 1263 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, 1264 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, 1265 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, 1266 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, 1267 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, 1268 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, 1269 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, 1270 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, 1271 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, 1272 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, 1273 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, 1274 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, 1275 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, 1276 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, 1277 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, 1278 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, 1279 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, 1280 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, 1281 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, 1282 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, 1283 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, 1284 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, 1285 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, 1286 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, 1287 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, 1288 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, 1289 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, 1290 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, 1291 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, 1292 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, 1293 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, 1294 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, 1295 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, 1296 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, 1297 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, 1298 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, 1299 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, 1300 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, 1301 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, 1302 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, 1303 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, 1304 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, 1305 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, 1306 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, 1307 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, 1308 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, 1309 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, 1310 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, 1311 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, 1312 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, 1313 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, 1314 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, 1315 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, 1316 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, 1317 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, 1318 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, 1319 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, 1320 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL 1321 }; 1322 1323 static void curve25519_ever64_base(u8 *out, const u8 *priv) 1324 { 1325 u64 swap = 1; 1326 int i, j, k; 1327 u64 tmp[16 + 32 + 4]; 1328 u64 *x1 = &tmp[0]; 1329 u64 *z1 = &tmp[4]; 1330 u64 *x2 = &tmp[8]; 1331 u64 *z2 = &tmp[12]; 1332 u64 *xz1 = &tmp[0]; 1333 u64 *xz2 = &tmp[8]; 1334 u64 *a = &tmp[0 + 16]; 1335 u64 *b = &tmp[4 + 16]; 1336 u64 *c = &tmp[8 + 16]; 1337 u64 *ab = &tmp[0 + 16]; 1338 u64 *abcd = &tmp[0 + 16]; 1339 u64 *ef = &tmp[16 + 16]; 1340 u64 *efgh = &tmp[16 + 16]; 1341 u64 *key = &tmp[0 + 16 + 32]; 1342 1343 memcpy(key, priv, 32); 1344 ((u8 *)key)[0] &= 248; 1345 ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; 1346 1347 x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; 1348 z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; 1349 z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; 1350 memcpy(x2, p_minus_s, sizeof(p_minus_s)); 1351 1352 j = 3; 1353 for (i = 0; i < 4; ++i) { 1354 while (j < (const int[]){ 64, 64, 64, 63 }[i]) { 1355 u64 bit = (key[i] >> j) & 1; 1356 k = (64 * i + j - 3); 1357 swap = swap ^ bit; 1358 cswap2(swap, xz1, xz2); 1359 swap = bit; 1360 fsub(b, x1, z1); 1361 fadd(a, x1, z1); 1362 fmul(c, &table_ladder[4 * k], b, ef); 1363 fsub(b, a, c); 1364 fadd(a, a, c); 1365 fsqr2(ab, ab, efgh); 1366 fmul2(xz1, xz2, ab, efgh); 1367 ++j; 1368 } 1369 j = 0; 1370 } 1371 1372 point_double(xz1, abcd, efgh); 1373 point_double(xz1, abcd, efgh); 1374 point_double(xz1, abcd, efgh); 1375 encode_point(out, xz1); 1376 1377 memzero_explicit(tmp, sizeof(tmp)); 1378 } 1379 1380 static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); 1381 1382 void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], 1383 const u8 secret[CURVE25519_KEY_SIZE], 1384 const u8 basepoint[CURVE25519_KEY_SIZE]) 1385 { 1386 if (static_branch_likely(&curve25519_use_bmi2_adx)) 1387 curve25519_ever64(mypublic, secret, basepoint); 1388 else 1389 curve25519_generic(mypublic, secret, basepoint); 1390 } 1391 EXPORT_SYMBOL(curve25519_arch); 1392 1393 void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], 1394 const u8 secret[CURVE25519_KEY_SIZE]) 1395 { 1396 if (static_branch_likely(&curve25519_use_bmi2_adx)) 1397 curve25519_ever64_base(pub, secret); 1398 else 1399 curve25519_generic(pub, secret, curve25519_base_point); 1400 } 1401 EXPORT_SYMBOL(curve25519_base_arch); 1402 1403 static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, 1404 unsigned int len) 1405 { 1406 u8 *secret = kpp_tfm_ctx(tfm); 1407 1408 if (!len) 1409 curve25519_generate_secret(secret); 1410 else if (len == CURVE25519_KEY_SIZE && 1411 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) 1412 memcpy(secret, buf, CURVE25519_KEY_SIZE); 1413 else 1414 return -EINVAL; 1415 return 0; 1416 } 1417 1418 static int curve25519_generate_public_key(struct kpp_request *req) 1419 { 1420 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); 1421 const u8 *secret = kpp_tfm_ctx(tfm); 1422 u8 buf[CURVE25519_KEY_SIZE]; 1423 int copied, nbytes; 1424 1425 if (req->src) 1426 return -EINVAL; 1427 1428 curve25519_base_arch(buf, secret); 1429 1430 /* might want less than we've got */ 1431 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); 1432 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, 1433 nbytes), 1434 buf, nbytes); 1435 if (copied != nbytes) 1436 return -EINVAL; 1437 return 0; 1438 } 1439 1440 static int curve25519_compute_shared_secret(struct kpp_request *req) 1441 { 1442 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); 1443 const u8 *secret = kpp_tfm_ctx(tfm); 1444 u8 public_key[CURVE25519_KEY_SIZE]; 1445 u8 buf[CURVE25519_KEY_SIZE]; 1446 int copied, nbytes; 1447 1448 if (!req->src) 1449 return -EINVAL; 1450 1451 copied = sg_copy_to_buffer(req->src, 1452 sg_nents_for_len(req->src, 1453 CURVE25519_KEY_SIZE), 1454 public_key, CURVE25519_KEY_SIZE); 1455 if (copied != CURVE25519_KEY_SIZE) 1456 return -EINVAL; 1457 1458 curve25519_arch(buf, secret, public_key); 1459 1460 /* might want less than we've got */ 1461 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); 1462 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, 1463 nbytes), 1464 buf, nbytes); 1465 if (copied != nbytes) 1466 return -EINVAL; 1467 return 0; 1468 } 1469 1470 static unsigned int curve25519_max_size(struct crypto_kpp *tfm) 1471 { 1472 return CURVE25519_KEY_SIZE; 1473 } 1474 1475 static struct kpp_alg curve25519_alg = { 1476 .base.cra_name = "curve25519", 1477 .base.cra_driver_name = "curve25519-x86", 1478 .base.cra_priority = 200, 1479 .base.cra_module = THIS_MODULE, 1480 .base.cra_ctxsize = CURVE25519_KEY_SIZE, 1481 1482 .set_secret = curve25519_set_secret, 1483 .generate_public_key = curve25519_generate_public_key, 1484 .compute_shared_secret = curve25519_compute_shared_secret, 1485 .max_size = curve25519_max_size, 1486 }; 1487 1488 1489 static int __init curve25519_mod_init(void) 1490 { 1491 if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) 1492 static_branch_enable(&curve25519_use_bmi2_adx); 1493 else 1494 return 0; 1495 return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? 1496 crypto_register_kpp(&curve25519_alg) : 0; 1497 } 1498 1499 static void __exit curve25519_mod_exit(void) 1500 { 1501 if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && 1502 (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX))) 1503 crypto_unregister_kpp(&curve25519_alg); 1504 } 1505 1506 module_init(curve25519_mod_init); 1507 module_exit(curve25519_mod_exit); 1508 1509 MODULE_ALIAS_CRYPTO("curve25519"); 1510 MODULE_ALIAS_CRYPTO("curve25519-x86"); 1511 MODULE_LICENSE("GPL v2"); 1512 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); 1513