1/* 2 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 3 * 4 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 SHASH .req v0 15 SHASH2 .req v1 16 T1 .req v2 17 T2 .req v3 18 MASK .req v4 19 XL .req v5 20 XM .req v6 21 XH .req v7 22 IN1 .req v7 23 24 .text 25 .arch armv8-a+crypto 26 27 /* 28 * void pmull_ghash_update(int blocks, u64 dg[], const char *src, 29 * struct ghash_key const *k, const char *head) 30 */ 31ENTRY(pmull_ghash_update) 32 ld1 {SHASH.2d}, [x3] 33 ld1 {XL.2d}, [x1] 34 movi MASK.16b, #0xe1 35 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 36 shl MASK.2d, MASK.2d, #57 37 eor SHASH2.16b, SHASH2.16b, SHASH.16b 38 39 /* do the head block first, if supplied */ 40 cbz x4, 0f 41 ld1 {T1.2d}, [x4] 42 b 1f 43 440: ld1 {T1.2d}, [x2], #16 45 sub w0, w0, #1 46 471: /* multiply XL by SHASH in GF(2^128) */ 48CPU_LE( rev64 T1.16b, T1.16b ) 49 50 ext T2.16b, XL.16b, XL.16b, #8 51 ext IN1.16b, T1.16b, T1.16b, #8 52 eor T1.16b, T1.16b, T2.16b 53 eor XL.16b, XL.16b, IN1.16b 54 55 pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 56 eor T1.16b, T1.16b, XL.16b 57 pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 58 pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) 59 60 ext T1.16b, XL.16b, XH.16b, #8 61 eor T2.16b, XL.16b, XH.16b 62 eor XM.16b, XM.16b, T1.16b 63 eor XM.16b, XM.16b, T2.16b 64 pmull T2.1q, XL.1d, MASK.1d 65 66 mov XH.d[0], XM.d[1] 67 mov XM.d[1], XL.d[0] 68 69 eor XL.16b, XM.16b, T2.16b 70 ext T2.16b, XL.16b, XL.16b, #8 71 pmull XL.1q, XL.1d, MASK.1d 72 eor T2.16b, T2.16b, XH.16b 73 eor XL.16b, XL.16b, T2.16b 74 75 cbnz w0, 0b 76 77 st1 {XL.2d}, [x1] 78 ret 79ENDPROC(pmull_ghash_update) 80 81 KS .req v8 82 CTR .req v9 83 INP .req v10 84 85 .macro load_round_keys, rounds, rk 86 cmp \rounds, #12 87 blo 2222f /* 128 bits */ 88 beq 1111f /* 192 bits */ 89 ld1 {v17.4s-v18.4s}, [\rk], #32 901111: ld1 {v19.4s-v20.4s}, [\rk], #32 912222: ld1 {v21.4s-v24.4s}, [\rk], #64 92 ld1 {v25.4s-v28.4s}, [\rk], #64 93 ld1 {v29.4s-v31.4s}, [\rk] 94 .endm 95 96 .macro enc_round, state, key 97 aese \state\().16b, \key\().16b 98 aesmc \state\().16b, \state\().16b 99 .endm 100 101 .macro enc_block, state, rounds 102 cmp \rounds, #12 103 b.lo 2222f /* 128 bits */ 104 b.eq 1111f /* 192 bits */ 105 enc_round \state, v17 106 enc_round \state, v18 1071111: enc_round \state, v19 108 enc_round \state, v20 1092222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 110 enc_round \state, \key 111 .endr 112 aese \state\().16b, v30.16b 113 eor \state\().16b, \state\().16b, v31.16b 114 .endm 115 116 .macro pmull_gcm_do_crypt, enc 117 ld1 {SHASH.2d}, [x4] 118 ld1 {XL.2d}, [x1] 119 ldr x8, [x5, #8] // load lower counter 120 121 movi MASK.16b, #0xe1 122 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 123CPU_LE( rev x8, x8 ) 124 shl MASK.2d, MASK.2d, #57 125 eor SHASH2.16b, SHASH2.16b, SHASH.16b 126 127 .if \enc == 1 128 ld1 {KS.16b}, [x7] 129 .endif 130 1310: ld1 {CTR.8b}, [x5] // load upper counter 132 ld1 {INP.16b}, [x3], #16 133 rev x9, x8 134 add x8, x8, #1 135 sub w0, w0, #1 136 ins CTR.d[1], x9 // set lower counter 137 138 .if \enc == 1 139 eor INP.16b, INP.16b, KS.16b // encrypt input 140 st1 {INP.16b}, [x2], #16 141 .endif 142 143 rev64 T1.16b, INP.16b 144 145 cmp w6, #12 146 b.ge 2f // AES-192/256? 147 1481: enc_round CTR, v21 149 150 ext T2.16b, XL.16b, XL.16b, #8 151 ext IN1.16b, T1.16b, T1.16b, #8 152 153 enc_round CTR, v22 154 155 eor T1.16b, T1.16b, T2.16b 156 eor XL.16b, XL.16b, IN1.16b 157 158 enc_round CTR, v23 159 160 pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 161 eor T1.16b, T1.16b, XL.16b 162 163 enc_round CTR, v24 164 165 pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 166 pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) 167 168 enc_round CTR, v25 169 170 ext T1.16b, XL.16b, XH.16b, #8 171 eor T2.16b, XL.16b, XH.16b 172 eor XM.16b, XM.16b, T1.16b 173 174 enc_round CTR, v26 175 176 eor XM.16b, XM.16b, T2.16b 177 pmull T2.1q, XL.1d, MASK.1d 178 179 enc_round CTR, v27 180 181 mov XH.d[0], XM.d[1] 182 mov XM.d[1], XL.d[0] 183 184 enc_round CTR, v28 185 186 eor XL.16b, XM.16b, T2.16b 187 188 enc_round CTR, v29 189 190 ext T2.16b, XL.16b, XL.16b, #8 191 192 aese CTR.16b, v30.16b 193 194 pmull XL.1q, XL.1d, MASK.1d 195 eor T2.16b, T2.16b, XH.16b 196 197 eor KS.16b, CTR.16b, v31.16b 198 199 eor XL.16b, XL.16b, T2.16b 200 201 .if \enc == 0 202 eor INP.16b, INP.16b, KS.16b 203 st1 {INP.16b}, [x2], #16 204 .endif 205 206 cbnz w0, 0b 207 208CPU_LE( rev x8, x8 ) 209 st1 {XL.2d}, [x1] 210 str x8, [x5, #8] // store lower counter 211 212 .if \enc == 1 213 st1 {KS.16b}, [x7] 214 .endif 215 216 ret 217 2182: b.eq 3f // AES-192? 219 enc_round CTR, v17 220 enc_round CTR, v18 2213: enc_round CTR, v19 222 enc_round CTR, v20 223 b 1b 224 .endm 225 226 /* 227 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], 228 * struct ghash_key const *k, u8 ctr[], 229 * int rounds, u8 ks[]) 230 */ 231ENTRY(pmull_gcm_encrypt) 232 pmull_gcm_do_crypt 1 233ENDPROC(pmull_gcm_encrypt) 234 235 /* 236 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], 237 * struct ghash_key const *k, u8 ctr[], 238 * int rounds) 239 */ 240ENTRY(pmull_gcm_decrypt) 241 pmull_gcm_do_crypt 0 242ENDPROC(pmull_gcm_decrypt) 243 244 /* 245 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds) 246 */ 247ENTRY(pmull_gcm_encrypt_block) 248 cbz x2, 0f 249 load_round_keys w3, x2 2500: ld1 {v0.16b}, [x1] 251 enc_block v0, w3 252 st1 {v0.16b}, [x0] 253 ret 254ENDPROC(pmull_gcm_encrypt_block) 255