1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions 4 * as specified in rfc8998 5 * https://datatracker.ietf.org/doc/html/rfc8998 6 * 7 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 8 */ 9 10#include <linux/linkage.h> 11#include <asm/assembler.h> 12#include "sm4-ce-asm.h" 13 14.arch armv8-a+crypto 15 16.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 17 .set .Lv\b\().4s, \b 18.endr 19 20.macro sm4e, vd, vn 21 .inst 0xcec08400 | (.L\vn << 5) | .L\vd 22.endm 23 24/* Register macros */ 25 26#define RMAC v16 27 28/* Helper macros. */ 29 30#define inc_le128(vctr) \ 31 mov vctr.d[1], x8; \ 32 mov vctr.d[0], x7; \ 33 adds x8, x8, #1; \ 34 rev64 vctr.16b, vctr.16b; \ 35 adc x7, x7, xzr; 36 37 38.align 3 39SYM_FUNC_START(sm4_ce_cbcmac_update) 40 /* input: 41 * x0: round key array, CTX 42 * x1: mac 43 * x2: src 44 * w3: nblocks 45 */ 46 SM4_PREPARE(x0) 47 48 ld1 {RMAC.16b}, [x1] 49 50.Lcbcmac_loop_4x: 51 cmp w3, #4 52 blt .Lcbcmac_loop_1x 53 54 sub w3, w3, #4 55 56 ld1 {v0.16b-v3.16b}, [x2], #64 57 58 SM4_CRYPT_BLK(RMAC) 59 eor RMAC.16b, RMAC.16b, v0.16b 60 SM4_CRYPT_BLK(RMAC) 61 eor RMAC.16b, RMAC.16b, v1.16b 62 SM4_CRYPT_BLK(RMAC) 63 eor RMAC.16b, RMAC.16b, v2.16b 64 SM4_CRYPT_BLK(RMAC) 65 eor RMAC.16b, RMAC.16b, v3.16b 66 67 cbz w3, .Lcbcmac_end 68 b .Lcbcmac_loop_4x 69 70.Lcbcmac_loop_1x: 71 sub w3, w3, #1 72 73 ld1 {v0.16b}, [x2], #16 74 75 SM4_CRYPT_BLK(RMAC) 76 eor RMAC.16b, RMAC.16b, v0.16b 77 78 cbnz w3, .Lcbcmac_loop_1x 79 80.Lcbcmac_end: 81 st1 {RMAC.16b}, [x1] 82 ret 83SYM_FUNC_END(sm4_ce_cbcmac_update) 84 85.align 3 86SYM_FUNC_START(sm4_ce_ccm_final) 87 /* input: 88 * x0: round key array, CTX 89 * x1: ctr0 (big endian, 128 bit) 90 * x2: mac 91 */ 92 SM4_PREPARE(x0) 93 94 ld1 {RMAC.16b}, [x2] 95 ld1 {v0.16b}, [x1] 96 97 SM4_CRYPT_BLK2(RMAC, v0) 98 99 /* en-/decrypt the mac with ctr0 */ 100 eor RMAC.16b, RMAC.16b, v0.16b 101 st1 {RMAC.16b}, [x2] 102 103 ret 104SYM_FUNC_END(sm4_ce_ccm_final) 105 106.align 3 107SYM_FUNC_START(sm4_ce_ccm_enc) 108 /* input: 109 * x0: round key array, CTX 110 * x1: dst 111 * x2: src 112 * x3: ctr (big endian, 128 bit) 113 * w4: nbytes 114 * x5: mac 115 */ 116 SM4_PREPARE(x0) 117 118 ldp x7, x8, [x3] 119 rev x7, x7 120 rev x8, x8 121 122 ld1 {RMAC.16b}, [x5] 123 124.Lccm_enc_loop_4x: 125 cmp w4, #(4 * 16) 126 blt .Lccm_enc_loop_1x 127 128 sub w4, w4, #(4 * 16) 129 130 /* construct CTRs */ 131 inc_le128(v8) /* +0 */ 132 inc_le128(v9) /* +1 */ 133 inc_le128(v10) /* +2 */ 134 inc_le128(v11) /* +3 */ 135 136 ld1 {v0.16b-v3.16b}, [x2], #64 137 138 SM4_CRYPT_BLK2(v8, RMAC) 139 eor v8.16b, v8.16b, v0.16b 140 eor RMAC.16b, RMAC.16b, v0.16b 141 SM4_CRYPT_BLK2(v9, RMAC) 142 eor v9.16b, v9.16b, v1.16b 143 eor RMAC.16b, RMAC.16b, v1.16b 144 SM4_CRYPT_BLK2(v10, RMAC) 145 eor v10.16b, v10.16b, v2.16b 146 eor RMAC.16b, RMAC.16b, v2.16b 147 SM4_CRYPT_BLK2(v11, RMAC) 148 eor v11.16b, v11.16b, v3.16b 149 eor RMAC.16b, RMAC.16b, v3.16b 150 151 st1 {v8.16b-v11.16b}, [x1], #64 152 153 cbz w4, .Lccm_enc_end 154 b .Lccm_enc_loop_4x 155 156.Lccm_enc_loop_1x: 157 cmp w4, #16 158 blt .Lccm_enc_tail 159 160 sub w4, w4, #16 161 162 /* construct CTRs */ 163 inc_le128(v8) 164 165 ld1 {v0.16b}, [x2], #16 166 167 SM4_CRYPT_BLK2(v8, RMAC) 168 eor v8.16b, v8.16b, v0.16b 169 eor RMAC.16b, RMAC.16b, v0.16b 170 171 st1 {v8.16b}, [x1], #16 172 173 cbz w4, .Lccm_enc_end 174 b .Lccm_enc_loop_1x 175 176.Lccm_enc_tail: 177 /* construct CTRs */ 178 inc_le128(v8) 179 180 SM4_CRYPT_BLK2(RMAC, v8) 181 182 /* store new MAC */ 183 st1 {RMAC.16b}, [x5] 184 185.Lccm_enc_tail_loop: 186 ldrb w0, [x2], #1 /* get 1 byte from input */ 187 umov w9, v8.b[0] /* get top crypted CTR byte */ 188 umov w6, RMAC.b[0] /* get top MAC byte */ 189 190 eor w9, w9, w0 /* w9 = CTR ^ input */ 191 eor w6, w6, w0 /* w6 = MAC ^ input */ 192 193 strb w9, [x1], #1 /* store out byte */ 194 strb w6, [x5], #1 /* store MAC byte */ 195 196 subs w4, w4, #1 197 beq .Lccm_enc_ret 198 199 /* shift out one byte */ 200 ext RMAC.16b, RMAC.16b, RMAC.16b, #1 201 ext v8.16b, v8.16b, v8.16b, #1 202 203 b .Lccm_enc_tail_loop 204 205.Lccm_enc_end: 206 /* store new MAC */ 207 st1 {RMAC.16b}, [x5] 208 209 /* store new CTR */ 210 rev x7, x7 211 rev x8, x8 212 stp x7, x8, [x3] 213 214.Lccm_enc_ret: 215 ret 216SYM_FUNC_END(sm4_ce_ccm_enc) 217 218.align 3 219SYM_FUNC_START(sm4_ce_ccm_dec) 220 /* input: 221 * x0: round key array, CTX 222 * x1: dst 223 * x2: src 224 * x3: ctr (big endian, 128 bit) 225 * w4: nbytes 226 * x5: mac 227 */ 228 SM4_PREPARE(x0) 229 230 ldp x7, x8, [x3] 231 rev x7, x7 232 rev x8, x8 233 234 ld1 {RMAC.16b}, [x5] 235 236.Lccm_dec_loop_4x: 237 cmp w4, #(4 * 16) 238 blt .Lccm_dec_loop_1x 239 240 sub w4, w4, #(4 * 16) 241 242 /* construct CTRs */ 243 inc_le128(v8) /* +0 */ 244 inc_le128(v9) /* +1 */ 245 inc_le128(v10) /* +2 */ 246 inc_le128(v11) /* +3 */ 247 248 ld1 {v0.16b-v3.16b}, [x2], #64 249 250 SM4_CRYPT_BLK2(v8, RMAC) 251 eor v8.16b, v8.16b, v0.16b 252 eor RMAC.16b, RMAC.16b, v8.16b 253 SM4_CRYPT_BLK2(v9, RMAC) 254 eor v9.16b, v9.16b, v1.16b 255 eor RMAC.16b, RMAC.16b, v9.16b 256 SM4_CRYPT_BLK2(v10, RMAC) 257 eor v10.16b, v10.16b, v2.16b 258 eor RMAC.16b, RMAC.16b, v10.16b 259 SM4_CRYPT_BLK2(v11, RMAC) 260 eor v11.16b, v11.16b, v3.16b 261 eor RMAC.16b, RMAC.16b, v11.16b 262 263 st1 {v8.16b-v11.16b}, [x1], #64 264 265 cbz w4, .Lccm_dec_end 266 b .Lccm_dec_loop_4x 267 268.Lccm_dec_loop_1x: 269 cmp w4, #16 270 blt .Lccm_dec_tail 271 272 sub w4, w4, #16 273 274 /* construct CTRs */ 275 inc_le128(v8) 276 277 ld1 {v0.16b}, [x2], #16 278 279 SM4_CRYPT_BLK2(v8, RMAC) 280 eor v8.16b, v8.16b, v0.16b 281 eor RMAC.16b, RMAC.16b, v8.16b 282 283 st1 {v8.16b}, [x1], #16 284 285 cbz w4, .Lccm_dec_end 286 b .Lccm_dec_loop_1x 287 288.Lccm_dec_tail: 289 /* construct CTRs */ 290 inc_le128(v8) 291 292 SM4_CRYPT_BLK2(RMAC, v8) 293 294 /* store new MAC */ 295 st1 {RMAC.16b}, [x5] 296 297.Lccm_dec_tail_loop: 298 ldrb w0, [x2], #1 /* get 1 byte from input */ 299 umov w9, v8.b[0] /* get top crypted CTR byte */ 300 umov w6, RMAC.b[0] /* get top MAC byte */ 301 302 eor w9, w9, w0 /* w9 = CTR ^ input */ 303 eor w6, w6, w9 /* w6 = MAC ^ output */ 304 305 strb w9, [x1], #1 /* store out byte */ 306 strb w6, [x5], #1 /* store MAC byte */ 307 308 subs w4, w4, #1 309 beq .Lccm_dec_ret 310 311 /* shift out one byte */ 312 ext RMAC.16b, RMAC.16b, RMAC.16b, #1 313 ext v8.16b, v8.16b, v8.16b, #1 314 315 b .Lccm_dec_tail_loop 316 317.Lccm_dec_end: 318 /* store new MAC */ 319 st1 {RMAC.16b}, [x5] 320 321 /* store new CTR */ 322 rev x7, x7 323 rev x8, x8 324 stp x7, x8, [x3] 325 326.Lccm_dec_ret: 327 ret 328SYM_FUNC_END(sm4_ce_ccm_dec) 329