1/* 2 * Implement AES algorithm in Intel AES-NI instructions. 3 * 4 * The white paper of AES-NI instructions can be downloaded from: 5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 6 * 7 * Copyright (C) 2008, Intel Corp. 8 * Author: Huang Ying <ying.huang@intel.com> 9 * Vinodh Gopal <vinodh.gopal@intel.com> 10 * Kahraman Akdemir 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2 of the License, or 15 * (at your option) any later version. 16 */ 17 18#include <linux/linkage.h> 19 20.text 21 22#define STATE1 %xmm0 23#define STATE2 %xmm4 24#define STATE3 %xmm5 25#define STATE4 %xmm6 26#define STATE STATE1 27#define IN1 %xmm1 28#define IN2 %xmm7 29#define IN3 %xmm8 30#define IN4 %xmm9 31#define IN IN1 32#define KEY %xmm2 33#define IV %xmm3 34 35#define KEYP %rdi 36#define OUTP %rsi 37#define INP %rdx 38#define LEN %rcx 39#define IVP %r8 40#define KLEN %r9d 41#define T1 %r10 42#define TKEYP T1 43#define T2 %r11 44 45_key_expansion_128: 46_key_expansion_256a: 47 pshufd $0b11111111, %xmm1, %xmm1 48 shufps $0b00010000, %xmm0, %xmm4 49 pxor %xmm4, %xmm0 50 shufps $0b10001100, %xmm0, %xmm4 51 pxor %xmm4, %xmm0 52 pxor %xmm1, %xmm0 53 movaps %xmm0, (%rcx) 54 add $0x10, %rcx 55 ret 56 57_key_expansion_192a: 58 pshufd $0b01010101, %xmm1, %xmm1 59 shufps $0b00010000, %xmm0, %xmm4 60 pxor %xmm4, %xmm0 61 shufps $0b10001100, %xmm0, %xmm4 62 pxor %xmm4, %xmm0 63 pxor %xmm1, %xmm0 64 65 movaps %xmm2, %xmm5 66 movaps %xmm2, %xmm6 67 pslldq $4, %xmm5 68 pshufd $0b11111111, %xmm0, %xmm3 69 pxor %xmm3, %xmm2 70 pxor %xmm5, %xmm2 71 72 movaps %xmm0, %xmm1 73 shufps $0b01000100, %xmm0, %xmm6 74 movaps %xmm6, (%rcx) 75 shufps $0b01001110, %xmm2, %xmm1 76 movaps %xmm1, 16(%rcx) 77 add $0x20, %rcx 78 ret 79 80_key_expansion_192b: 81 pshufd $0b01010101, %xmm1, %xmm1 82 shufps $0b00010000, %xmm0, %xmm4 83 pxor %xmm4, %xmm0 84 shufps $0b10001100, %xmm0, %xmm4 85 pxor %xmm4, %xmm0 86 pxor %xmm1, %xmm0 87 88 movaps %xmm2, %xmm5 89 pslldq $4, %xmm5 90 pshufd $0b11111111, %xmm0, %xmm3 91 pxor %xmm3, %xmm2 92 pxor %xmm5, %xmm2 93 94 movaps %xmm0, (%rcx) 95 add $0x10, %rcx 96 ret 97 98_key_expansion_256b: 99 pshufd $0b10101010, %xmm1, %xmm1 100 shufps $0b00010000, %xmm2, %xmm4 101 pxor %xmm4, %xmm2 102 shufps $0b10001100, %xmm2, %xmm4 103 pxor %xmm4, %xmm2 104 pxor %xmm1, %xmm2 105 movaps %xmm2, (%rcx) 106 add $0x10, %rcx 107 ret 108 109/* 110 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 111 * unsigned int key_len) 112 */ 113ENTRY(aesni_set_key) 114 movups (%rsi), %xmm0 # user key (first 16 bytes) 115 movaps %xmm0, (%rdi) 116 lea 0x10(%rdi), %rcx # key addr 117 movl %edx, 480(%rdi) 118 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 119 cmp $24, %dl 120 jb .Lenc_key128 121 je .Lenc_key192 122 movups 0x10(%rsi), %xmm2 # other user key 123 movaps %xmm2, (%rcx) 124 add $0x10, %rcx 125 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 126 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 127 call _key_expansion_256a 128 # aeskeygenassist $0x1, %xmm0, %xmm1 129 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 130 call _key_expansion_256b 131 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 132 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 133 call _key_expansion_256a 134 # aeskeygenassist $0x2, %xmm0, %xmm1 135 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 136 call _key_expansion_256b 137 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 138 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 139 call _key_expansion_256a 140 # aeskeygenassist $0x4, %xmm0, %xmm1 141 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 142 call _key_expansion_256b 143 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 144 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 145 call _key_expansion_256a 146 # aeskeygenassist $0x8, %xmm0, %xmm1 147 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 148 call _key_expansion_256b 149 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 150 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 151 call _key_expansion_256a 152 # aeskeygenassist $0x10, %xmm0, %xmm1 153 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 154 call _key_expansion_256b 155 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 156 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 157 call _key_expansion_256a 158 # aeskeygenassist $0x20, %xmm0, %xmm1 159 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 160 call _key_expansion_256b 161 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 162 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 163 call _key_expansion_256a 164 jmp .Ldec_key 165.Lenc_key192: 166 movq 0x10(%rsi), %xmm2 # other user key 167 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 168 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 169 call _key_expansion_192a 170 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 171 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 172 call _key_expansion_192b 173 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 174 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 175 call _key_expansion_192a 176 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 177 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 178 call _key_expansion_192b 179 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 180 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 181 call _key_expansion_192a 182 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 183 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 184 call _key_expansion_192b 185 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 186 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 187 call _key_expansion_192a 188 # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 189 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 190 call _key_expansion_192b 191 jmp .Ldec_key 192.Lenc_key128: 193 # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 194 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 195 call _key_expansion_128 196 # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 197 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 198 call _key_expansion_128 199 # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 200 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 201 call _key_expansion_128 202 # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 203 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 204 call _key_expansion_128 205 # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 206 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 207 call _key_expansion_128 208 # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 209 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 210 call _key_expansion_128 211 # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 212 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 213 call _key_expansion_128 214 # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 215 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 216 call _key_expansion_128 217 # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 218 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b 219 call _key_expansion_128 220 # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 221 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 222 call _key_expansion_128 223.Ldec_key: 224 sub $0x10, %rcx 225 movaps (%rdi), %xmm0 226 movaps (%rcx), %xmm1 227 movaps %xmm0, 240(%rcx) 228 movaps %xmm1, 240(%rdi) 229 add $0x10, %rdi 230 lea 240-16(%rcx), %rsi 231.align 4 232.Ldec_key_loop: 233 movaps (%rdi), %xmm0 234 # aesimc %xmm0, %xmm1 235 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 236 movaps %xmm1, (%rsi) 237 add $0x10, %rdi 238 sub $0x10, %rsi 239 cmp %rcx, %rdi 240 jb .Ldec_key_loop 241 xor %rax, %rax 242 ret 243 244/* 245 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 246 */ 247ENTRY(aesni_enc) 248 movl 480(KEYP), KLEN # key length 249 movups (INP), STATE # input 250 call _aesni_enc1 251 movups STATE, (OUTP) # output 252 ret 253 254/* 255 * _aesni_enc1: internal ABI 256 * input: 257 * KEYP: key struct pointer 258 * KLEN: round count 259 * STATE: initial state (input) 260 * output: 261 * STATE: finial state (output) 262 * changed: 263 * KEY 264 * TKEYP (T1) 265 */ 266_aesni_enc1: 267 movaps (KEYP), KEY # key 268 mov KEYP, TKEYP 269 pxor KEY, STATE # round 0 270 add $0x30, TKEYP 271 cmp $24, KLEN 272 jb .Lenc128 273 lea 0x20(TKEYP), TKEYP 274 je .Lenc192 275 add $0x20, TKEYP 276 movaps -0x60(TKEYP), KEY 277 # aesenc KEY, STATE 278 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 279 movaps -0x50(TKEYP), KEY 280 # aesenc KEY, STATE 281 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 282.align 4 283.Lenc192: 284 movaps -0x40(TKEYP), KEY 285 # aesenc KEY, STATE 286 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 287 movaps -0x30(TKEYP), KEY 288 # aesenc KEY, STATE 289 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 290.align 4 291.Lenc128: 292 movaps -0x20(TKEYP), KEY 293 # aesenc KEY, STATE 294 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 295 movaps -0x10(TKEYP), KEY 296 # aesenc KEY, STATE 297 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 298 movaps (TKEYP), KEY 299 # aesenc KEY, STATE 300 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 301 movaps 0x10(TKEYP), KEY 302 # aesenc KEY, STATE 303 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 304 movaps 0x20(TKEYP), KEY 305 # aesenc KEY, STATE 306 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 307 movaps 0x30(TKEYP), KEY 308 # aesenc KEY, STATE 309 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 310 movaps 0x40(TKEYP), KEY 311 # aesenc KEY, STATE 312 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 313 movaps 0x50(TKEYP), KEY 314 # aesenc KEY, STATE 315 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 316 movaps 0x60(TKEYP), KEY 317 # aesenc KEY, STATE 318 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 319 movaps 0x70(TKEYP), KEY 320 # aesenclast KEY, STATE # last round 321 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 322 ret 323 324/* 325 * _aesni_enc4: internal ABI 326 * input: 327 * KEYP: key struct pointer 328 * KLEN: round count 329 * STATE1: initial state (input) 330 * STATE2 331 * STATE3 332 * STATE4 333 * output: 334 * STATE1: finial state (output) 335 * STATE2 336 * STATE3 337 * STATE4 338 * changed: 339 * KEY 340 * TKEYP (T1) 341 */ 342_aesni_enc4: 343 movaps (KEYP), KEY # key 344 mov KEYP, TKEYP 345 pxor KEY, STATE1 # round 0 346 pxor KEY, STATE2 347 pxor KEY, STATE3 348 pxor KEY, STATE4 349 add $0x30, TKEYP 350 cmp $24, KLEN 351 jb .L4enc128 352 lea 0x20(TKEYP), TKEYP 353 je .L4enc192 354 add $0x20, TKEYP 355 movaps -0x60(TKEYP), KEY 356 # aesenc KEY, STATE1 357 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 358 # aesenc KEY, STATE2 359 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 360 # aesenc KEY, STATE3 361 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 362 # aesenc KEY, STATE4 363 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 364 movaps -0x50(TKEYP), KEY 365 # aesenc KEY, STATE1 366 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 367 # aesenc KEY, STATE2 368 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 369 # aesenc KEY, STATE3 370 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 371 # aesenc KEY, STATE4 372 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 373#.align 4 374.L4enc192: 375 movaps -0x40(TKEYP), KEY 376 # aesenc KEY, STATE1 377 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 378 # aesenc KEY, STATE2 379 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 380 # aesenc KEY, STATE3 381 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 382 # aesenc KEY, STATE4 383 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 384 movaps -0x30(TKEYP), KEY 385 # aesenc KEY, STATE1 386 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 387 # aesenc KEY, STATE2 388 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 389 # aesenc KEY, STATE3 390 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 391 # aesenc KEY, STATE4 392 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 393#.align 4 394.L4enc128: 395 movaps -0x20(TKEYP), KEY 396 # aesenc KEY, STATE1 397 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 398 # aesenc KEY, STATE2 399 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 400 # aesenc KEY, STATE3 401 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 402 # aesenc KEY, STATE4 403 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 404 movaps -0x10(TKEYP), KEY 405 # aesenc KEY, STATE1 406 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 407 # aesenc KEY, STATE2 408 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 409 # aesenc KEY, STATE3 410 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 411 # aesenc KEY, STATE4 412 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 413 movaps (TKEYP), KEY 414 # aesenc KEY, STATE1 415 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 416 # aesenc KEY, STATE2 417 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 418 # aesenc KEY, STATE3 419 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 420 # aesenc KEY, STATE4 421 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 422 movaps 0x10(TKEYP), KEY 423 # aesenc KEY, STATE1 424 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 425 # aesenc KEY, STATE2 426 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 427 # aesenc KEY, STATE3 428 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 429 # aesenc KEY, STATE4 430 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 431 movaps 0x20(TKEYP), KEY 432 # aesenc KEY, STATE1 433 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 434 # aesenc KEY, STATE2 435 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 436 # aesenc KEY, STATE3 437 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 438 # aesenc KEY, STATE4 439 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 440 movaps 0x30(TKEYP), KEY 441 # aesenc KEY, STATE1 442 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 443 # aesenc KEY, STATE2 444 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 445 # aesenc KEY, STATE3 446 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 447 # aesenc KEY, STATE4 448 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 449 movaps 0x40(TKEYP), KEY 450 # aesenc KEY, STATE1 451 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 452 # aesenc KEY, STATE2 453 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 454 # aesenc KEY, STATE3 455 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 456 # aesenc KEY, STATE4 457 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 458 movaps 0x50(TKEYP), KEY 459 # aesenc KEY, STATE1 460 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 461 # aesenc KEY, STATE2 462 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 463 # aesenc KEY, STATE3 464 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 465 # aesenc KEY, STATE4 466 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 467 movaps 0x60(TKEYP), KEY 468 # aesenc KEY, STATE1 469 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 470 # aesenc KEY, STATE2 471 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 472 # aesenc KEY, STATE3 473 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea 474 # aesenc KEY, STATE4 475 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 476 movaps 0x70(TKEYP), KEY 477 # aesenclast KEY, STATE1 # last round 478 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 479 # aesenclast KEY, STATE2 480 .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 481 # aesenclast KEY, STATE3 482 .byte 0x66, 0x0f, 0x38, 0xdd, 0xea 483 # aesenclast KEY, STATE4 484 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 485 ret 486 487/* 488 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 489 */ 490ENTRY(aesni_dec) 491 mov 480(KEYP), KLEN # key length 492 add $240, KEYP 493 movups (INP), STATE # input 494 call _aesni_dec1 495 movups STATE, (OUTP) #output 496 ret 497 498/* 499 * _aesni_dec1: internal ABI 500 * input: 501 * KEYP: key struct pointer 502 * KLEN: key length 503 * STATE: initial state (input) 504 * output: 505 * STATE: finial state (output) 506 * changed: 507 * KEY 508 * TKEYP (T1) 509 */ 510_aesni_dec1: 511 movaps (KEYP), KEY # key 512 mov KEYP, TKEYP 513 pxor KEY, STATE # round 0 514 add $0x30, TKEYP 515 cmp $24, KLEN 516 jb .Ldec128 517 lea 0x20(TKEYP), TKEYP 518 je .Ldec192 519 add $0x20, TKEYP 520 movaps -0x60(TKEYP), KEY 521 # aesdec KEY, STATE 522 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 523 movaps -0x50(TKEYP), KEY 524 # aesdec KEY, STATE 525 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 526.align 4 527.Ldec192: 528 movaps -0x40(TKEYP), KEY 529 # aesdec KEY, STATE 530 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 531 movaps -0x30(TKEYP), KEY 532 # aesdec KEY, STATE 533 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 534.align 4 535.Ldec128: 536 movaps -0x20(TKEYP), KEY 537 # aesdec KEY, STATE 538 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 539 movaps -0x10(TKEYP), KEY 540 # aesdec KEY, STATE 541 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 542 movaps (TKEYP), KEY 543 # aesdec KEY, STATE 544 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 545 movaps 0x10(TKEYP), KEY 546 # aesdec KEY, STATE 547 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 548 movaps 0x20(TKEYP), KEY 549 # aesdec KEY, STATE 550 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 551 movaps 0x30(TKEYP), KEY 552 # aesdec KEY, STATE 553 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 554 movaps 0x40(TKEYP), KEY 555 # aesdec KEY, STATE 556 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 557 movaps 0x50(TKEYP), KEY 558 # aesdec KEY, STATE 559 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 560 movaps 0x60(TKEYP), KEY 561 # aesdec KEY, STATE 562 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 563 movaps 0x70(TKEYP), KEY 564 # aesdeclast KEY, STATE # last round 565 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 566 ret 567 568/* 569 * _aesni_dec4: internal ABI 570 * input: 571 * KEYP: key struct pointer 572 * KLEN: key length 573 * STATE1: initial state (input) 574 * STATE2 575 * STATE3 576 * STATE4 577 * output: 578 * STATE1: finial state (output) 579 * STATE2 580 * STATE3 581 * STATE4 582 * changed: 583 * KEY 584 * TKEYP (T1) 585 */ 586_aesni_dec4: 587 movaps (KEYP), KEY # key 588 mov KEYP, TKEYP 589 pxor KEY, STATE1 # round 0 590 pxor KEY, STATE2 591 pxor KEY, STATE3 592 pxor KEY, STATE4 593 add $0x30, TKEYP 594 cmp $24, KLEN 595 jb .L4dec128 596 lea 0x20(TKEYP), TKEYP 597 je .L4dec192 598 add $0x20, TKEYP 599 movaps -0x60(TKEYP), KEY 600 # aesdec KEY, STATE1 601 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 602 # aesdec KEY, STATE2 603 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 604 # aesdec KEY, STATE3 605 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 606 # aesdec KEY, STATE4 607 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 608 movaps -0x50(TKEYP), KEY 609 # aesdec KEY, STATE1 610 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 611 # aesdec KEY, STATE2 612 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 613 # aesdec KEY, STATE3 614 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 615 # aesdec KEY, STATE4 616 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 617.align 4 618.L4dec192: 619 movaps -0x40(TKEYP), KEY 620 # aesdec KEY, STATE1 621 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 622 # aesdec KEY, STATE2 623 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 624 # aesdec KEY, STATE3 625 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 626 # aesdec KEY, STATE4 627 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 628 movaps -0x30(TKEYP), KEY 629 # aesdec KEY, STATE1 630 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 631 # aesdec KEY, STATE2 632 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 633 # aesdec KEY, STATE3 634 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 635 # aesdec KEY, STATE4 636 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 637.align 4 638.L4dec128: 639 movaps -0x20(TKEYP), KEY 640 # aesdec KEY, STATE1 641 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 642 # aesdec KEY, STATE2 643 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 644 # aesdec KEY, STATE3 645 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 646 # aesdec KEY, STATE4 647 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 648 movaps -0x10(TKEYP), KEY 649 # aesdec KEY, STATE1 650 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 651 # aesdec KEY, STATE2 652 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 653 # aesdec KEY, STATE3 654 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 655 # aesdec KEY, STATE4 656 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 657 movaps (TKEYP), KEY 658 # aesdec KEY, STATE1 659 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 660 # aesdec KEY, STATE2 661 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 662 # aesdec KEY, STATE3 663 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 664 # aesdec KEY, STATE4 665 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 666 movaps 0x10(TKEYP), KEY 667 # aesdec KEY, STATE1 668 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 669 # aesdec KEY, STATE2 670 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 671 # aesdec KEY, STATE3 672 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 673 # aesdec KEY, STATE4 674 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 675 movaps 0x20(TKEYP), KEY 676 # aesdec KEY, STATE1 677 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 678 # aesdec KEY, STATE2 679 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 680 # aesdec KEY, STATE3 681 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 682 # aesdec KEY, STATE4 683 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 684 movaps 0x30(TKEYP), KEY 685 # aesdec KEY, STATE1 686 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 687 # aesdec KEY, STATE2 688 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 689 # aesdec KEY, STATE3 690 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 691 # aesdec KEY, STATE4 692 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 693 movaps 0x40(TKEYP), KEY 694 # aesdec KEY, STATE1 695 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 696 # aesdec KEY, STATE2 697 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 698 # aesdec KEY, STATE3 699 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 700 # aesdec KEY, STATE4 701 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 702 movaps 0x50(TKEYP), KEY 703 # aesdec KEY, STATE1 704 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 705 # aesdec KEY, STATE2 706 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 707 # aesdec KEY, STATE3 708 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 709 # aesdec KEY, STATE4 710 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 711 movaps 0x60(TKEYP), KEY 712 # aesdec KEY, STATE1 713 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 714 # aesdec KEY, STATE2 715 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 716 # aesdec KEY, STATE3 717 .byte 0x66, 0x0f, 0x38, 0xde, 0xea 718 # aesdec KEY, STATE4 719 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 720 movaps 0x70(TKEYP), KEY 721 # aesdeclast KEY, STATE1 # last round 722 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 723 # aesdeclast KEY, STATE2 724 .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 725 # aesdeclast KEY, STATE3 726 .byte 0x66, 0x0f, 0x38, 0xdf, 0xea 727 # aesdeclast KEY, STATE4 728 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 729 ret 730 731/* 732 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 733 * size_t len) 734 */ 735ENTRY(aesni_ecb_enc) 736 test LEN, LEN # check length 737 jz .Lecb_enc_ret 738 mov 480(KEYP), KLEN 739 cmp $16, LEN 740 jb .Lecb_enc_ret 741 cmp $64, LEN 742 jb .Lecb_enc_loop1 743.align 4 744.Lecb_enc_loop4: 745 movups (INP), STATE1 746 movups 0x10(INP), STATE2 747 movups 0x20(INP), STATE3 748 movups 0x30(INP), STATE4 749 call _aesni_enc4 750 movups STATE1, (OUTP) 751 movups STATE2, 0x10(OUTP) 752 movups STATE3, 0x20(OUTP) 753 movups STATE4, 0x30(OUTP) 754 sub $64, LEN 755 add $64, INP 756 add $64, OUTP 757 cmp $64, LEN 758 jge .Lecb_enc_loop4 759 cmp $16, LEN 760 jb .Lecb_enc_ret 761.align 4 762.Lecb_enc_loop1: 763 movups (INP), STATE1 764 call _aesni_enc1 765 movups STATE1, (OUTP) 766 sub $16, LEN 767 add $16, INP 768 add $16, OUTP 769 cmp $16, LEN 770 jge .Lecb_enc_loop1 771.Lecb_enc_ret: 772 ret 773 774/* 775 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 776 * size_t len); 777 */ 778ENTRY(aesni_ecb_dec) 779 test LEN, LEN 780 jz .Lecb_dec_ret 781 mov 480(KEYP), KLEN 782 add $240, KEYP 783 cmp $16, LEN 784 jb .Lecb_dec_ret 785 cmp $64, LEN 786 jb .Lecb_dec_loop1 787.align 4 788.Lecb_dec_loop4: 789 movups (INP), STATE1 790 movups 0x10(INP), STATE2 791 movups 0x20(INP), STATE3 792 movups 0x30(INP), STATE4 793 call _aesni_dec4 794 movups STATE1, (OUTP) 795 movups STATE2, 0x10(OUTP) 796 movups STATE3, 0x20(OUTP) 797 movups STATE4, 0x30(OUTP) 798 sub $64, LEN 799 add $64, INP 800 add $64, OUTP 801 cmp $64, LEN 802 jge .Lecb_dec_loop4 803 cmp $16, LEN 804 jb .Lecb_dec_ret 805.align 4 806.Lecb_dec_loop1: 807 movups (INP), STATE1 808 call _aesni_dec1 809 movups STATE1, (OUTP) 810 sub $16, LEN 811 add $16, INP 812 add $16, OUTP 813 cmp $16, LEN 814 jge .Lecb_dec_loop1 815.Lecb_dec_ret: 816 ret 817 818/* 819 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 820 * size_t len, u8 *iv) 821 */ 822ENTRY(aesni_cbc_enc) 823 cmp $16, LEN 824 jb .Lcbc_enc_ret 825 mov 480(KEYP), KLEN 826 movups (IVP), STATE # load iv as initial state 827.align 4 828.Lcbc_enc_loop: 829 movups (INP), IN # load input 830 pxor IN, STATE 831 call _aesni_enc1 832 movups STATE, (OUTP) # store output 833 sub $16, LEN 834 add $16, INP 835 add $16, OUTP 836 cmp $16, LEN 837 jge .Lcbc_enc_loop 838 movups STATE, (IVP) 839.Lcbc_enc_ret: 840 ret 841 842/* 843 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 844 * size_t len, u8 *iv) 845 */ 846ENTRY(aesni_cbc_dec) 847 cmp $16, LEN 848 jb .Lcbc_dec_ret 849 mov 480(KEYP), KLEN 850 add $240, KEYP 851 movups (IVP), IV 852 cmp $64, LEN 853 jb .Lcbc_dec_loop1 854.align 4 855.Lcbc_dec_loop4: 856 movups (INP), IN1 857 movaps IN1, STATE1 858 movups 0x10(INP), IN2 859 movaps IN2, STATE2 860 movups 0x20(INP), IN3 861 movaps IN3, STATE3 862 movups 0x30(INP), IN4 863 movaps IN4, STATE4 864 call _aesni_dec4 865 pxor IV, STATE1 866 pxor IN1, STATE2 867 pxor IN2, STATE3 868 pxor IN3, STATE4 869 movaps IN4, IV 870 movups STATE1, (OUTP) 871 movups STATE2, 0x10(OUTP) 872 movups STATE3, 0x20(OUTP) 873 movups STATE4, 0x30(OUTP) 874 sub $64, LEN 875 add $64, INP 876 add $64, OUTP 877 cmp $64, LEN 878 jge .Lcbc_dec_loop4 879 cmp $16, LEN 880 jb .Lcbc_dec_ret 881.align 4 882.Lcbc_dec_loop1: 883 movups (INP), IN 884 movaps IN, STATE 885 call _aesni_dec1 886 pxor IV, STATE 887 movups STATE, (OUTP) 888 movaps IN, IV 889 sub $16, LEN 890 add $16, INP 891 add $16, OUTP 892 cmp $16, LEN 893 jge .Lcbc_dec_loop1 894 movups IV, (IVP) 895.Lcbc_dec_ret: 896 ret 897