1/*
2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
3 *
4 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
7 *
8 * Copyright (c) 2009 Intel Corp.
9 *   Author: Huang Ying <ying.huang@intel.com>
10 *           Vinodh Gopal
11 *           Erdinc Ozturk
12 *           Deniz Karakoyunlu
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License version 2 as published
16 * by the Free Software Foundation.
17 */
18
19#include <linux/linkage.h>
20#include <asm/assembler.h>
21
22	DATA	.req	v0
23	SHASH	.req	v1
24	IN1	.req	v2
25	T1	.req	v2
26	T2	.req	v3
27	T3	.req	v4
28	VZR	.req	v5
29
30	.text
31	.arch		armv8-a+crypto
32
33	/*
34	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
35	 *			   struct ghash_key const *k, const char *head)
36	 */
37ENTRY(pmull_ghash_update)
38	ld1		{DATA.16b}, [x1]
39	ld1		{SHASH.16b}, [x3]
40	eor		VZR.16b, VZR.16b, VZR.16b
41
42	/* do the head block first, if supplied */
43	cbz		x4, 0f
44	ld1		{IN1.2d}, [x4]
45	b		1f
46
470:	ld1		{IN1.2d}, [x2], #16
48	sub		w0, w0, #1
491:	ext		IN1.16b, IN1.16b, IN1.16b, #8
50CPU_LE(	rev64		IN1.16b, IN1.16b	)
51	eor		DATA.16b, DATA.16b, IN1.16b
52
53	/* multiply DATA by SHASH in GF(2^128) */
54	ext		T2.16b, DATA.16b, DATA.16b, #8
55	ext		T3.16b, SHASH.16b, SHASH.16b, #8
56	eor		T2.16b, T2.16b, DATA.16b
57	eor		T3.16b, T3.16b, SHASH.16b
58
59	pmull2		T1.1q, SHASH.2d, DATA.2d	// a1 * b1
60	pmull		DATA.1q, SHASH.1d, DATA.1d	// a0 * b0
61	pmull		T2.1q, T2.1d, T3.1d		// (a1 + a0)(b1 + b0)
62	eor		T2.16b, T2.16b, T1.16b		// (a0 * b1) + (a1 * b0)
63	eor		T2.16b, T2.16b, DATA.16b
64
65	ext		T3.16b, VZR.16b, T2.16b, #8
66	ext		T2.16b, T2.16b, VZR.16b, #8
67	eor		DATA.16b, DATA.16b, T3.16b
68	eor		T1.16b, T1.16b, T2.16b	// <T1:DATA> is result of
69						// carry-less multiplication
70
71	/* first phase of the reduction */
72	shl		T3.2d, DATA.2d, #1
73	eor		T3.16b, T3.16b, DATA.16b
74	shl		T3.2d, T3.2d, #5
75	eor		T3.16b, T3.16b, DATA.16b
76	shl		T3.2d, T3.2d, #57
77	ext		T2.16b, VZR.16b, T3.16b, #8
78	ext		T3.16b, T3.16b, VZR.16b, #8
79	eor		DATA.16b, DATA.16b, T2.16b
80	eor		T1.16b, T1.16b, T3.16b
81
82	/* second phase of the reduction */
83	ushr		T2.2d, DATA.2d, #5
84	eor		T2.16b, T2.16b, DATA.16b
85	ushr		T2.2d, T2.2d, #1
86	eor		T2.16b, T2.16b, DATA.16b
87	ushr		T2.2d, T2.2d, #1
88	eor		T1.16b, T1.16b, T2.16b
89	eor		DATA.16b, DATA.16b, T1.16b
90
91	cbnz		w0, 0b
92
93	st1		{DATA.16b}, [x1]
94	ret
95ENDPROC(pmull_ghash_update)
96