1/* 2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 3 * 4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .text 15 .arch armv8-a+crypto 16 17 /* 18 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 19 * u32 *macp, u8 const rk[], u32 rounds); 20 */ 21ENTRY(ce_aes_ccm_auth_data) 22 ldr w8, [x3] /* leftover from prev round? */ 23 ld1 {v0.16b}, [x0] /* load mac */ 24 cbz w8, 1f 25 sub w8, w8, #16 26 eor v1.16b, v1.16b, v1.16b 270: ldrb w7, [x1], #1 /* get 1 byte of input */ 28 subs w2, w2, #1 29 add w8, w8, #1 30 ins v1.b[0], w7 31 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 32 beq 8f /* out of input? */ 33 cbnz w8, 0b 34 eor v0.16b, v0.16b, v1.16b 351: ld1 {v3.4s}, [x4] /* load first round key */ 36 prfm pldl1strm, [x1] 37 cmp w5, #12 /* which key size? */ 38 add x6, x4, #16 39 sub w7, w5, #2 /* modified # of rounds */ 40 bmi 2f 41 bne 5f 42 mov v5.16b, v3.16b 43 b 4f 442: mov v4.16b, v3.16b 45 ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 463: aese v0.16b, v4.16b 47 aesmc v0.16b, v0.16b 484: ld1 {v3.4s}, [x6], #16 /* load next round key */ 49 aese v0.16b, v5.16b 50 aesmc v0.16b, v0.16b 515: ld1 {v4.4s}, [x6], #16 /* load next round key */ 52 subs w7, w7, #3 53 aese v0.16b, v3.16b 54 aesmc v0.16b, v0.16b 55 ld1 {v5.4s}, [x6], #16 /* load next round key */ 56 bpl 3b 57 aese v0.16b, v4.16b 58 subs w2, w2, #16 /* last data? */ 59 eor v0.16b, v0.16b, v5.16b /* final round */ 60 bmi 6f 61 ld1 {v1.16b}, [x1], #16 /* load next input block */ 62 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 63 bne 1b 646: st1 {v0.16b}, [x0] /* store mac */ 65 beq 10f 66 adds w2, w2, #16 67 beq 10f 68 mov w8, w2 697: ldrb w7, [x1], #1 70 umov w6, v0.b[0] 71 eor w6, w6, w7 72 strb w6, [x0], #1 73 subs w2, w2, #1 74 beq 10f 75 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 76 b 7b 778: cbz w8, 91f 78 mov w7, w8 79 add w8, w8, #16 809: ext v1.16b, v1.16b, v1.16b, #1 81 adds w7, w7, #1 82 bne 9b 8391: eor v0.16b, v0.16b, v1.16b 84 st1 {v0.16b}, [x0] 8510: str w8, [x3] 86 ret 87ENDPROC(ce_aes_ccm_auth_data) 88 89 /* 90 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 91 * u32 rounds); 92 */ 93ENTRY(ce_aes_ccm_final) 94 ld1 {v3.4s}, [x2], #16 /* load first round key */ 95 ld1 {v0.16b}, [x0] /* load mac */ 96 cmp w3, #12 /* which key size? */ 97 sub w3, w3, #2 /* modified # of rounds */ 98 ld1 {v1.16b}, [x1] /* load 1st ctriv */ 99 bmi 0f 100 bne 3f 101 mov v5.16b, v3.16b 102 b 2f 1030: mov v4.16b, v3.16b 1041: ld1 {v5.4s}, [x2], #16 /* load next round key */ 105 aese v0.16b, v4.16b 106 aesmc v0.16b, v0.16b 107 aese v1.16b, v4.16b 108 aesmc v1.16b, v1.16b 1092: ld1 {v3.4s}, [x2], #16 /* load next round key */ 110 aese v0.16b, v5.16b 111 aesmc v0.16b, v0.16b 112 aese v1.16b, v5.16b 113 aesmc v1.16b, v1.16b 1143: ld1 {v4.4s}, [x2], #16 /* load next round key */ 115 subs w3, w3, #3 116 aese v0.16b, v3.16b 117 aesmc v0.16b, v0.16b 118 aese v1.16b, v3.16b 119 aesmc v1.16b, v1.16b 120 bpl 1b 121 aese v0.16b, v4.16b 122 aese v1.16b, v4.16b 123 /* final round key cancels out */ 124 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 125 st1 {v0.16b}, [x0] /* store result */ 126 ret 127ENDPROC(ce_aes_ccm_final) 128 129 .macro aes_ccm_do_crypt,enc 130 ldr x8, [x6, #8] /* load lower ctr */ 131 ld1 {v0.16b}, [x5] /* load mac */ 132CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 1330: /* outer loop */ 134 ld1 {v1.8b}, [x6] /* load upper ctr */ 135 prfm pldl1strm, [x1] 136 add x8, x8, #1 137 rev x9, x8 138 cmp w4, #12 /* which key size? */ 139 sub w7, w4, #2 /* get modified # of rounds */ 140 ins v1.d[1], x9 /* no carry in lower ctr */ 141 ld1 {v3.4s}, [x3] /* load first round key */ 142 add x10, x3, #16 143 bmi 1f 144 bne 4f 145 mov v5.16b, v3.16b 146 b 3f 1471: mov v4.16b, v3.16b 148 ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 1492: /* inner loop: 3 rounds, 2x interleaved */ 150 aese v0.16b, v4.16b 151 aesmc v0.16b, v0.16b 152 aese v1.16b, v4.16b 153 aesmc v1.16b, v1.16b 1543: ld1 {v3.4s}, [x10], #16 /* load next round key */ 155 aese v0.16b, v5.16b 156 aesmc v0.16b, v0.16b 157 aese v1.16b, v5.16b 158 aesmc v1.16b, v1.16b 1594: ld1 {v4.4s}, [x10], #16 /* load next round key */ 160 subs w7, w7, #3 161 aese v0.16b, v3.16b 162 aesmc v0.16b, v0.16b 163 aese v1.16b, v3.16b 164 aesmc v1.16b, v1.16b 165 ld1 {v5.4s}, [x10], #16 /* load next round key */ 166 bpl 2b 167 aese v0.16b, v4.16b 168 aese v1.16b, v4.16b 169 subs w2, w2, #16 170 bmi 6f /* partial block? */ 171 ld1 {v2.16b}, [x1], #16 /* load next input block */ 172 .if \enc == 1 173 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 174 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 175 .else 176 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 177 eor v1.16b, v2.16b, v5.16b /* final round enc */ 178 .endif 179 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 180 st1 {v1.16b}, [x0], #16 /* write output block */ 181 bne 0b 182CPU_LE( rev x8, x8 ) 183 st1 {v0.16b}, [x5] /* store mac */ 184 str x8, [x6, #8] /* store lsb end of ctr (BE) */ 1855: ret 186 1876: eor v0.16b, v0.16b, v5.16b /* final round mac */ 188 eor v1.16b, v1.16b, v5.16b /* final round enc */ 189 st1 {v0.16b}, [x5] /* store mac */ 190 add w2, w2, #16 /* process partial tail block */ 1917: ldrb w9, [x1], #1 /* get 1 byte of input */ 192 umov w6, v1.b[0] /* get top crypted ctr byte */ 193 umov w7, v0.b[0] /* get top mac byte */ 194 .if \enc == 1 195 eor w7, w7, w9 196 eor w9, w9, w6 197 .else 198 eor w9, w9, w6 199 eor w7, w7, w9 200 .endif 201 strb w9, [x0], #1 /* store out byte */ 202 strb w7, [x5], #1 /* store mac byte */ 203 subs w2, w2, #1 204 beq 5b 205 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 206 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 207 b 7b 208 .endm 209 210 /* 211 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 212 * u8 const rk[], u32 rounds, u8 mac[], 213 * u8 ctr[]); 214 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 215 * u8 const rk[], u32 rounds, u8 mac[], 216 * u8 ctr[]); 217 */ 218ENTRY(ce_aes_ccm_encrypt) 219 aes_ccm_do_crypt 1 220ENDPROC(ce_aes_ccm_encrypt) 221 222ENTRY(ce_aes_ccm_decrypt) 223 aes_ccm_do_crypt 0 224ENDPROC(ce_aes_ccm_decrypt) 225