1c356a7e9Stim/* 2c356a7e9Stim * Intel SHA Extensions optimized implementation of a SHA-1 update function 3c356a7e9Stim * 4c356a7e9Stim * This file is provided under a dual BSD/GPLv2 license. When using or 5c356a7e9Stim * redistributing this file, you may do so under either license. 6c356a7e9Stim * 7c356a7e9Stim * GPL LICENSE SUMMARY 8c356a7e9Stim * 9c356a7e9Stim * Copyright(c) 2015 Intel Corporation. 10c356a7e9Stim * 11c356a7e9Stim * This program is free software; you can redistribute it and/or modify 12c356a7e9Stim * it under the terms of version 2 of the GNU General Public License as 13c356a7e9Stim * published by the Free Software Foundation. 14c356a7e9Stim * 15c356a7e9Stim * This program is distributed in the hope that it will be useful, but 16c356a7e9Stim * WITHOUT ANY WARRANTY; without even the implied warranty of 17c356a7e9Stim * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18c356a7e9Stim * General Public License for more details. 19c356a7e9Stim * 20c356a7e9Stim * Contact Information: 21c356a7e9Stim * Sean Gulley <sean.m.gulley@intel.com> 22c356a7e9Stim * Tim Chen <tim.c.chen@linux.intel.com> 23c356a7e9Stim * 24c356a7e9Stim * BSD LICENSE 25c356a7e9Stim * 26c356a7e9Stim * Copyright(c) 2015 Intel Corporation. 27c356a7e9Stim * 28c356a7e9Stim * Redistribution and use in source and binary forms, with or without 29c356a7e9Stim * modification, are permitted provided that the following conditions 30c356a7e9Stim * are met: 31c356a7e9Stim * 32c356a7e9Stim * * Redistributions of source code must retain the above copyright 33c356a7e9Stim * notice, this list of conditions and the following disclaimer. 34c356a7e9Stim * * Redistributions in binary form must reproduce the above copyright 35c356a7e9Stim * notice, this list of conditions and the following disclaimer in 36c356a7e9Stim * the documentation and/or other materials provided with the 37c356a7e9Stim * distribution. 38c356a7e9Stim * * Neither the name of Intel Corporation nor the names of its 39c356a7e9Stim * contributors may be used to endorse or promote products derived 40c356a7e9Stim * from this software without specific prior written permission. 41c356a7e9Stim * 42c356a7e9Stim * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43c356a7e9Stim * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44c356a7e9Stim * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45c356a7e9Stim * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46c356a7e9Stim * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47c356a7e9Stim * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48c356a7e9Stim * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49c356a7e9Stim * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50c356a7e9Stim * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51c356a7e9Stim * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52c356a7e9Stim * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53c356a7e9Stim * 54c356a7e9Stim */ 55c356a7e9Stim 56c356a7e9Stim#include <linux/linkage.h> 57*32f34bf7SEric Biggers#include <linux/cfi_types.h> 58c356a7e9Stim 59c356a7e9Stim#define DIGEST_PTR %rdi /* 1st arg */ 60c356a7e9Stim#define DATA_PTR %rsi /* 2nd arg */ 61c356a7e9Stim#define NUM_BLKS %rdx /* 3rd arg */ 62c356a7e9Stim 63c356a7e9Stim/* gcc conversion */ 64c356a7e9Stim#define FRAME_SIZE 32 /* space for 2x16 bytes */ 65c356a7e9Stim 66c356a7e9Stim#define ABCD %xmm0 67c356a7e9Stim#define E0 %xmm1 /* Need two E's b/c they ping pong */ 68c356a7e9Stim#define E1 %xmm2 69c356a7e9Stim#define MSG0 %xmm3 70c356a7e9Stim#define MSG1 %xmm4 71c356a7e9Stim#define MSG2 %xmm5 72c356a7e9Stim#define MSG3 %xmm6 73c356a7e9Stim#define SHUF_MASK %xmm7 74c356a7e9Stim 75c356a7e9Stim 76c356a7e9Stim/* 77c356a7e9Stim * Intel SHA Extensions optimized implementation of a SHA-1 update function 78c356a7e9Stim * 79c356a7e9Stim * The function takes a pointer to the current hash values, a pointer to the 80c356a7e9Stim * input data, and a number of 64 byte blocks to process. Once all blocks have 81c356a7e9Stim * been processed, the digest pointer is updated with the resulting hash value. 82c356a7e9Stim * The function only processes complete blocks, there is no functionality to 83c356a7e9Stim * store partial blocks. All message padding and hash value initialization must 84c356a7e9Stim * be done outside the update function. 85c356a7e9Stim * 86c356a7e9Stim * The indented lines in the loop are instructions related to rounds processing. 87c356a7e9Stim * The non-indented lines are instructions related to the message schedule. 88c356a7e9Stim * 89c356a7e9Stim * void sha1_ni_transform(uint32_t *digest, const void *data, 90c356a7e9Stim uint32_t numBlocks) 91c356a7e9Stim * digest : pointer to digest 92c356a7e9Stim * data: pointer to input data 93c356a7e9Stim * numBlocks: Number of blocks to process 94c356a7e9Stim */ 95c356a7e9Stim.text 96*32f34bf7SEric BiggersSYM_TYPED_FUNC_START(sha1_ni_transform) 9735a0067dSJosh Poimboeuf push %rbp 9835a0067dSJosh Poimboeuf mov %rsp, %rbp 99c356a7e9Stim sub $FRAME_SIZE, %rsp 100c356a7e9Stim and $~0xF, %rsp 101c356a7e9Stim 102c356a7e9Stim shl $6, NUM_BLKS /* convert to bytes */ 103c356a7e9Stim jz .Ldone_hash 104c356a7e9Stim add DATA_PTR, NUM_BLKS /* pointer to end of data */ 105c356a7e9Stim 106c356a7e9Stim /* load initial hash values */ 107c356a7e9Stim pinsrd $3, 1*16(DIGEST_PTR), E0 108c356a7e9Stim movdqu 0*16(DIGEST_PTR), ABCD 109c356a7e9Stim pand UPPER_WORD_MASK(%rip), E0 110c356a7e9Stim pshufd $0x1B, ABCD, ABCD 111c356a7e9Stim 112c356a7e9Stim movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 113c356a7e9Stim 114c356a7e9Stim.Lloop0: 115c356a7e9Stim /* Save hash values for addition after rounds */ 116c356a7e9Stim movdqa E0, (0*16)(%rsp) 117c356a7e9Stim movdqa ABCD, (1*16)(%rsp) 118c356a7e9Stim 119c356a7e9Stim /* Rounds 0-3 */ 120c356a7e9Stim movdqu 0*16(DATA_PTR), MSG0 121c356a7e9Stim pshufb SHUF_MASK, MSG0 122c356a7e9Stim paddd MSG0, E0 123c356a7e9Stim movdqa ABCD, E1 124c356a7e9Stim sha1rnds4 $0, E0, ABCD 125c356a7e9Stim 126c356a7e9Stim /* Rounds 4-7 */ 127c356a7e9Stim movdqu 1*16(DATA_PTR), MSG1 128c356a7e9Stim pshufb SHUF_MASK, MSG1 129c356a7e9Stim sha1nexte MSG1, E1 130c356a7e9Stim movdqa ABCD, E0 131c356a7e9Stim sha1rnds4 $0, E1, ABCD 132c356a7e9Stim sha1msg1 MSG1, MSG0 133c356a7e9Stim 134c356a7e9Stim /* Rounds 8-11 */ 135c356a7e9Stim movdqu 2*16(DATA_PTR), MSG2 136c356a7e9Stim pshufb SHUF_MASK, MSG2 137c356a7e9Stim sha1nexte MSG2, E0 138c356a7e9Stim movdqa ABCD, E1 139c356a7e9Stim sha1rnds4 $0, E0, ABCD 140c356a7e9Stim sha1msg1 MSG2, MSG1 141c356a7e9Stim pxor MSG2, MSG0 142c356a7e9Stim 143c356a7e9Stim /* Rounds 12-15 */ 144c356a7e9Stim movdqu 3*16(DATA_PTR), MSG3 145c356a7e9Stim pshufb SHUF_MASK, MSG3 146c356a7e9Stim sha1nexte MSG3, E1 147c356a7e9Stim movdqa ABCD, E0 148c356a7e9Stim sha1msg2 MSG3, MSG0 149c356a7e9Stim sha1rnds4 $0, E1, ABCD 150c356a7e9Stim sha1msg1 MSG3, MSG2 151c356a7e9Stim pxor MSG3, MSG1 152c356a7e9Stim 153c356a7e9Stim /* Rounds 16-19 */ 154c356a7e9Stim sha1nexte MSG0, E0 155c356a7e9Stim movdqa ABCD, E1 156c356a7e9Stim sha1msg2 MSG0, MSG1 157c356a7e9Stim sha1rnds4 $0, E0, ABCD 158c356a7e9Stim sha1msg1 MSG0, MSG3 159c356a7e9Stim pxor MSG0, MSG2 160c356a7e9Stim 161c356a7e9Stim /* Rounds 20-23 */ 162c356a7e9Stim sha1nexte MSG1, E1 163c356a7e9Stim movdqa ABCD, E0 164c356a7e9Stim sha1msg2 MSG1, MSG2 165c356a7e9Stim sha1rnds4 $1, E1, ABCD 166c356a7e9Stim sha1msg1 MSG1, MSG0 167c356a7e9Stim pxor MSG1, MSG3 168c356a7e9Stim 169c356a7e9Stim /* Rounds 24-27 */ 170c356a7e9Stim sha1nexte MSG2, E0 171c356a7e9Stim movdqa ABCD, E1 172c356a7e9Stim sha1msg2 MSG2, MSG3 173c356a7e9Stim sha1rnds4 $1, E0, ABCD 174c356a7e9Stim sha1msg1 MSG2, MSG1 175c356a7e9Stim pxor MSG2, MSG0 176c356a7e9Stim 177c356a7e9Stim /* Rounds 28-31 */ 178c356a7e9Stim sha1nexte MSG3, E1 179c356a7e9Stim movdqa ABCD, E0 180c356a7e9Stim sha1msg2 MSG3, MSG0 181c356a7e9Stim sha1rnds4 $1, E1, ABCD 182c356a7e9Stim sha1msg1 MSG3, MSG2 183c356a7e9Stim pxor MSG3, MSG1 184c356a7e9Stim 185c356a7e9Stim /* Rounds 32-35 */ 186c356a7e9Stim sha1nexte MSG0, E0 187c356a7e9Stim movdqa ABCD, E1 188c356a7e9Stim sha1msg2 MSG0, MSG1 189c356a7e9Stim sha1rnds4 $1, E0, ABCD 190c356a7e9Stim sha1msg1 MSG0, MSG3 191c356a7e9Stim pxor MSG0, MSG2 192c356a7e9Stim 193c356a7e9Stim /* Rounds 36-39 */ 194c356a7e9Stim sha1nexte MSG1, E1 195c356a7e9Stim movdqa ABCD, E0 196c356a7e9Stim sha1msg2 MSG1, MSG2 197c356a7e9Stim sha1rnds4 $1, E1, ABCD 198c356a7e9Stim sha1msg1 MSG1, MSG0 199c356a7e9Stim pxor MSG1, MSG3 200c356a7e9Stim 201c356a7e9Stim /* Rounds 40-43 */ 202c356a7e9Stim sha1nexte MSG2, E0 203c356a7e9Stim movdqa ABCD, E1 204c356a7e9Stim sha1msg2 MSG2, MSG3 205c356a7e9Stim sha1rnds4 $2, E0, ABCD 206c356a7e9Stim sha1msg1 MSG2, MSG1 207c356a7e9Stim pxor MSG2, MSG0 208c356a7e9Stim 209c356a7e9Stim /* Rounds 44-47 */ 210c356a7e9Stim sha1nexte MSG3, E1 211c356a7e9Stim movdqa ABCD, E0 212c356a7e9Stim sha1msg2 MSG3, MSG0 213c356a7e9Stim sha1rnds4 $2, E1, ABCD 214c356a7e9Stim sha1msg1 MSG3, MSG2 215c356a7e9Stim pxor MSG3, MSG1 216c356a7e9Stim 217c356a7e9Stim /* Rounds 48-51 */ 218c356a7e9Stim sha1nexte MSG0, E0 219c356a7e9Stim movdqa ABCD, E1 220c356a7e9Stim sha1msg2 MSG0, MSG1 221c356a7e9Stim sha1rnds4 $2, E0, ABCD 222c356a7e9Stim sha1msg1 MSG0, MSG3 223c356a7e9Stim pxor MSG0, MSG2 224c356a7e9Stim 225c356a7e9Stim /* Rounds 52-55 */ 226c356a7e9Stim sha1nexte MSG1, E1 227c356a7e9Stim movdqa ABCD, E0 228c356a7e9Stim sha1msg2 MSG1, MSG2 229c356a7e9Stim sha1rnds4 $2, E1, ABCD 230c356a7e9Stim sha1msg1 MSG1, MSG0 231c356a7e9Stim pxor MSG1, MSG3 232c356a7e9Stim 233c356a7e9Stim /* Rounds 56-59 */ 234c356a7e9Stim sha1nexte MSG2, E0 235c356a7e9Stim movdqa ABCD, E1 236c356a7e9Stim sha1msg2 MSG2, MSG3 237c356a7e9Stim sha1rnds4 $2, E0, ABCD 238c356a7e9Stim sha1msg1 MSG2, MSG1 239c356a7e9Stim pxor MSG2, MSG0 240c356a7e9Stim 241c356a7e9Stim /* Rounds 60-63 */ 242c356a7e9Stim sha1nexte MSG3, E1 243c356a7e9Stim movdqa ABCD, E0 244c356a7e9Stim sha1msg2 MSG3, MSG0 245c356a7e9Stim sha1rnds4 $3, E1, ABCD 246c356a7e9Stim sha1msg1 MSG3, MSG2 247c356a7e9Stim pxor MSG3, MSG1 248c356a7e9Stim 249c356a7e9Stim /* Rounds 64-67 */ 250c356a7e9Stim sha1nexte MSG0, E0 251c356a7e9Stim movdqa ABCD, E1 252c356a7e9Stim sha1msg2 MSG0, MSG1 253c356a7e9Stim sha1rnds4 $3, E0, ABCD 254c356a7e9Stim sha1msg1 MSG0, MSG3 255c356a7e9Stim pxor MSG0, MSG2 256c356a7e9Stim 257c356a7e9Stim /* Rounds 68-71 */ 258c356a7e9Stim sha1nexte MSG1, E1 259c356a7e9Stim movdqa ABCD, E0 260c356a7e9Stim sha1msg2 MSG1, MSG2 261c356a7e9Stim sha1rnds4 $3, E1, ABCD 262c356a7e9Stim pxor MSG1, MSG3 263c356a7e9Stim 264c356a7e9Stim /* Rounds 72-75 */ 265c356a7e9Stim sha1nexte MSG2, E0 266c356a7e9Stim movdqa ABCD, E1 267c356a7e9Stim sha1msg2 MSG2, MSG3 268c356a7e9Stim sha1rnds4 $3, E0, ABCD 269c356a7e9Stim 270c356a7e9Stim /* Rounds 76-79 */ 271c356a7e9Stim sha1nexte MSG3, E1 272c356a7e9Stim movdqa ABCD, E0 273c356a7e9Stim sha1rnds4 $3, E1, ABCD 274c356a7e9Stim 275c356a7e9Stim /* Add current hash values with previously saved */ 276c356a7e9Stim sha1nexte (0*16)(%rsp), E0 277c356a7e9Stim paddd (1*16)(%rsp), ABCD 278c356a7e9Stim 279c356a7e9Stim /* Increment data pointer and loop if more to process */ 280c356a7e9Stim add $64, DATA_PTR 281c356a7e9Stim cmp NUM_BLKS, DATA_PTR 282c356a7e9Stim jne .Lloop0 283c356a7e9Stim 284c356a7e9Stim /* Write hash values back in the correct order */ 285c356a7e9Stim pshufd $0x1B, ABCD, ABCD 286c356a7e9Stim movdqu ABCD, 0*16(DIGEST_PTR) 287c356a7e9Stim pextrd $3, E0, 1*16(DIGEST_PTR) 288c356a7e9Stim 289c356a7e9Stim.Ldone_hash: 29035a0067dSJosh Poimboeuf mov %rbp, %rsp 29135a0067dSJosh Poimboeuf pop %rbp 292c356a7e9Stim 293f94909ceSPeter Zijlstra RET 2946dcc5627SJiri SlabySYM_FUNC_END(sha1_ni_transform) 295c356a7e9Stim 296e183914aSDenys Vlasenko.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 297e183914aSDenys Vlasenko.align 16 298c356a7e9StimPSHUFFLE_BYTE_FLIP_MASK: 299c356a7e9Stim .octa 0x000102030405060708090a0b0c0d0e0f 300e183914aSDenys Vlasenko 301e183914aSDenys Vlasenko.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 302e183914aSDenys Vlasenko.align 16 303c356a7e9StimUPPER_WORD_MASK: 304c356a7e9Stim .octa 0xFFFFFFFF000000000000000000000000 305