1/* 2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 3 * 4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .text 15 .arch armv8-a+crypto 16 17 /* 18 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 19 * u32 *macp, u8 const rk[], u32 rounds); 20 */ 21ENTRY(ce_aes_ccm_auth_data) 22 ldr w8, [x3] /* leftover from prev round? */ 23 ld1 {v0.16b}, [x0] /* load mac */ 24 cbz w8, 1f 25 sub w8, w8, #16 26 eor v1.16b, v1.16b, v1.16b 270: ldrb w7, [x1], #1 /* get 1 byte of input */ 28 subs w2, w2, #1 29 add w8, w8, #1 30 ins v1.b[0], w7 31 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 32 beq 8f /* out of input? */ 33 cbnz w8, 0b 34 eor v0.16b, v0.16b, v1.16b 351: ld1 {v3.16b}, [x4] /* load first round key */ 36 prfm pldl1strm, [x1] 37 cmp w5, #12 /* which key size? */ 38 add x6, x4, #16 39 sub w7, w5, #2 /* modified # of rounds */ 40 bmi 2f 41 bne 5f 42 mov v5.16b, v3.16b 43 b 4f 442: mov v4.16b, v3.16b 45 ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 463: aese v0.16b, v4.16b 47 aesmc v0.16b, v0.16b 484: ld1 {v3.16b}, [x6], #16 /* load next round key */ 49 aese v0.16b, v5.16b 50 aesmc v0.16b, v0.16b 515: ld1 {v4.16b}, [x6], #16 /* load next round key */ 52 subs w7, w7, #3 53 aese v0.16b, v3.16b 54 aesmc v0.16b, v0.16b 55 ld1 {v5.16b}, [x6], #16 /* load next round key */ 56 bpl 3b 57 aese v0.16b, v4.16b 58 subs w2, w2, #16 /* last data? */ 59 eor v0.16b, v0.16b, v5.16b /* final round */ 60 bmi 6f 61 ld1 {v1.16b}, [x1], #16 /* load next input block */ 62 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 63 bne 1b 646: st1 {v0.16b}, [x0] /* store mac */ 65 beq 10f 66 adds w2, w2, #16 67 beq 10f 68 mov w8, w2 697: ldrb w7, [x1], #1 70 umov w6, v0.b[0] 71 eor w6, w6, w7 72 strb w6, [x0], #1 73 subs w2, w2, #1 74 beq 10f 75 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 76 b 7b 778: mov w7, w8 78 add w8, w8, #16 799: ext v1.16b, v1.16b, v1.16b, #1 80 adds w7, w7, #1 81 bne 9b 82 eor v0.16b, v0.16b, v1.16b 83 st1 {v0.16b}, [x0] 8410: str w8, [x3] 85 ret 86ENDPROC(ce_aes_ccm_auth_data) 87 88 /* 89 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 90 * u32 rounds); 91 */ 92ENTRY(ce_aes_ccm_final) 93 ld1 {v3.16b}, [x2], #16 /* load first round key */ 94 ld1 {v0.16b}, [x0] /* load mac */ 95 cmp w3, #12 /* which key size? */ 96 sub w3, w3, #2 /* modified # of rounds */ 97 ld1 {v1.16b}, [x1] /* load 1st ctriv */ 98 bmi 0f 99 bne 3f 100 mov v5.16b, v3.16b 101 b 2f 1020: mov v4.16b, v3.16b 1031: ld1 {v5.16b}, [x2], #16 /* load next round key */ 104 aese v0.16b, v4.16b 105 aesmc v0.16b, v0.16b 106 aese v1.16b, v4.16b 107 aesmc v1.16b, v1.16b 1082: ld1 {v3.16b}, [x2], #16 /* load next round key */ 109 aese v0.16b, v5.16b 110 aesmc v0.16b, v0.16b 111 aese v1.16b, v5.16b 112 aesmc v1.16b, v1.16b 1133: ld1 {v4.16b}, [x2], #16 /* load next round key */ 114 subs w3, w3, #3 115 aese v0.16b, v3.16b 116 aesmc v0.16b, v0.16b 117 aese v1.16b, v3.16b 118 aesmc v1.16b, v1.16b 119 bpl 1b 120 aese v0.16b, v4.16b 121 aese v1.16b, v4.16b 122 /* final round key cancels out */ 123 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 124 st1 {v0.16b}, [x0] /* store result */ 125 ret 126ENDPROC(ce_aes_ccm_final) 127 128 .macro aes_ccm_do_crypt,enc 129 ldr x8, [x6, #8] /* load lower ctr */ 130 ld1 {v0.16b}, [x5] /* load mac */ 131CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 1320: /* outer loop */ 133 ld1 {v1.8b}, [x6] /* load upper ctr */ 134 prfm pldl1strm, [x1] 135 add x8, x8, #1 136 rev x9, x8 137 cmp w4, #12 /* which key size? */ 138 sub w7, w4, #2 /* get modified # of rounds */ 139 ins v1.d[1], x9 /* no carry in lower ctr */ 140 ld1 {v3.16b}, [x3] /* load first round key */ 141 add x10, x3, #16 142 bmi 1f 143 bne 4f 144 mov v5.16b, v3.16b 145 b 3f 1461: mov v4.16b, v3.16b 147 ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 1482: /* inner loop: 3 rounds, 2x interleaved */ 149 aese v0.16b, v4.16b 150 aesmc v0.16b, v0.16b 151 aese v1.16b, v4.16b 152 aesmc v1.16b, v1.16b 1533: ld1 {v3.16b}, [x10], #16 /* load next round key */ 154 aese v0.16b, v5.16b 155 aesmc v0.16b, v0.16b 156 aese v1.16b, v5.16b 157 aesmc v1.16b, v1.16b 1584: ld1 {v4.16b}, [x10], #16 /* load next round key */ 159 subs w7, w7, #3 160 aese v0.16b, v3.16b 161 aesmc v0.16b, v0.16b 162 aese v1.16b, v3.16b 163 aesmc v1.16b, v1.16b 164 ld1 {v5.16b}, [x10], #16 /* load next round key */ 165 bpl 2b 166 aese v0.16b, v4.16b 167 aese v1.16b, v4.16b 168 subs w2, w2, #16 169 bmi 6f /* partial block? */ 170 ld1 {v2.16b}, [x1], #16 /* load next input block */ 171 .if \enc == 1 172 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 173 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 174 .else 175 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 176 eor v1.16b, v2.16b, v5.16b /* final round enc */ 177 .endif 178 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 179 st1 {v1.16b}, [x0], #16 /* write output block */ 180 bne 0b 181CPU_LE( rev x8, x8 ) 182 st1 {v0.16b}, [x5] /* store mac */ 183 str x8, [x6, #8] /* store lsb end of ctr (BE) */ 1845: ret 185 1866: eor v0.16b, v0.16b, v5.16b /* final round mac */ 187 eor v1.16b, v1.16b, v5.16b /* final round enc */ 188 st1 {v0.16b}, [x5] /* store mac */ 189 add w2, w2, #16 /* process partial tail block */ 1907: ldrb w9, [x1], #1 /* get 1 byte of input */ 191 umov w6, v1.b[0] /* get top crypted ctr byte */ 192 umov w7, v0.b[0] /* get top mac byte */ 193 .if \enc == 1 194 eor w7, w7, w9 195 eor w9, w9, w6 196 .else 197 eor w9, w9, w6 198 eor w7, w7, w9 199 .endif 200 strb w9, [x0], #1 /* store out byte */ 201 strb w7, [x5], #1 /* store mac byte */ 202 subs w2, w2, #1 203 beq 5b 204 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 205 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 206 b 7b 207 .endm 208 209 /* 210 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 211 * u8 const rk[], u32 rounds, u8 mac[], 212 * u8 ctr[]); 213 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 214 * u8 const rk[], u32 rounds, u8 mac[], 215 * u8 ctr[]); 216 */ 217ENTRY(ce_aes_ccm_encrypt) 218 aes_ccm_do_crypt 1 219ENDPROC(ce_aes_ccm_encrypt) 220 221ENTRY(ce_aes_ccm_decrypt) 222 aes_ccm_do_crypt 0 223ENDPROC(ce_aes_ccm_decrypt) 224