| 1 | /* | 
|---|
| 2 | * Intel SHA Extensions optimized implementation of a SHA-1 update function | 
|---|
| 3 | * | 
|---|
| 4 | * This file is provided under a dual BSD/GPLv2 license.  When using or | 
|---|
| 5 | * redistributing this file, you may do so under either license. | 
|---|
| 6 | * | 
|---|
| 7 | * GPL LICENSE SUMMARY | 
|---|
| 8 | * | 
|---|
| 9 | * Copyright(c) 2015 Intel Corporation. | 
|---|
| 10 | * | 
|---|
| 11 | * This program is free software; you can redistribute it and/or modify | 
|---|
| 12 | * it under the terms of version 2 of the GNU General Public License as | 
|---|
| 13 | * published by the Free Software Foundation. | 
|---|
| 14 | * | 
|---|
| 15 | * This program is distributed in the hope that it will be useful, but | 
|---|
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 18 | * General Public License for more details. | 
|---|
| 19 | * | 
|---|
| 20 | * Contact Information: | 
|---|
| 21 | * 	Sean Gulley <sean.m.gulley@intel.com> | 
|---|
| 22 | * 	Tim Chen <tim.c.chen@linux.intel.com> | 
|---|
| 23 | * | 
|---|
| 24 | * BSD LICENSE | 
|---|
| 25 | * | 
|---|
| 26 | * Copyright(c) 2015 Intel Corporation. | 
|---|
| 27 | * | 
|---|
| 28 | * Redistribution and use in source and binary forms, with or without | 
|---|
| 29 | * modification, are permitted provided that the following conditions | 
|---|
| 30 | * are met: | 
|---|
| 31 | * | 
|---|
| 32 | * 	* Redistributions of source code must retain the above copyright | 
|---|
| 33 | * 	  notice, this list of conditions and the following disclaimer. | 
|---|
| 34 | * 	* Redistributions in binary form must reproduce the above copyright | 
|---|
| 35 | * 	  notice, this list of conditions and the following disclaimer in | 
|---|
| 36 | * 	  the documentation and/or other materials provided with the | 
|---|
| 37 | * 	  distribution. | 
|---|
| 38 | * 	* Neither the name of Intel Corporation nor the names of its | 
|---|
| 39 | * 	  contributors may be used to endorse or promote products derived | 
|---|
| 40 | * 	  from this software without specific prior written permission. | 
|---|
| 41 | * | 
|---|
| 42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|---|
| 43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|---|
| 44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
|---|
| 45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
|---|
| 46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
|---|
| 47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
|---|
| 48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
|---|
| 49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
|---|
| 50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|---|
| 51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
|---|
| 52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 53 | * | 
|---|
| 54 | */ | 
|---|
| 55 |  | 
|---|
| 56 | #include <linux/linkage.h> | 
|---|
| 57 |  | 
|---|
| 58 | #define STATE_PTR	%rdi	/* 1st arg */ | 
|---|
| 59 | #define DATA_PTR	%rsi	/* 2nd arg */ | 
|---|
| 60 | #define NUM_BLKS	%rdx	/* 3rd arg */ | 
|---|
| 61 |  | 
|---|
| 62 | #define ABCD		%xmm0 | 
|---|
| 63 | #define E0		%xmm1	/* Need two E's b/c they ping pong */ | 
|---|
| 64 | #define E1		%xmm2 | 
|---|
| 65 | #define MSG0		%xmm3 | 
|---|
| 66 | #define MSG1		%xmm4 | 
|---|
| 67 | #define MSG2		%xmm5 | 
|---|
| 68 | #define MSG3		%xmm6 | 
|---|
| 69 | #define SHUF_MASK	%xmm7 | 
|---|
| 70 | #define ABCD_SAVED	%xmm8 | 
|---|
| 71 | #define E0_SAVED	%xmm9 | 
|---|
| 72 |  | 
|---|
| 73 | .macro do_4rounds	i, m0, m1, m2, m3, e0, e1 | 
|---|
| 74 | .if \i < 16 | 
|---|
| 75 | movdqu		\i*4(DATA_PTR), \m0 | 
|---|
| 76 | pshufb		SHUF_MASK, \m0 | 
|---|
| 77 | .endif | 
|---|
| 78 | .if \i == 0 | 
|---|
| 79 | paddd		\m0, \e0 | 
|---|
| 80 | .else | 
|---|
| 81 | sha1nexte	\m0, \e0 | 
|---|
| 82 | .endif | 
|---|
| 83 | movdqa		ABCD, \e1 | 
|---|
| 84 | .if \i >= 12 && \i < 76 | 
|---|
| 85 | sha1msg2	\m0, \m1 | 
|---|
| 86 | .endif | 
|---|
| 87 | sha1rnds4	$\i / 20, \e0, ABCD | 
|---|
| 88 | .if \i >= 4 && \i < 68 | 
|---|
| 89 | sha1msg1	\m0, \m3 | 
|---|
| 90 | .endif | 
|---|
| 91 | .if \i >= 8 && \i < 72 | 
|---|
| 92 | pxor		\m0, \m2 | 
|---|
| 93 | .endif | 
|---|
| 94 | .endm | 
|---|
| 95 |  | 
|---|
| 96 | /* | 
|---|
| 97 | * Intel SHA Extensions optimized implementation of a SHA-1 block function | 
|---|
| 98 | * | 
|---|
| 99 | * This function takes a pointer to the current SHA-1 state, a pointer to the | 
|---|
| 100 | * input data, and the number of 64-byte blocks to process.  The number of | 
|---|
| 101 | * blocks to process is assumed to be nonzero.  Once all blocks have been | 
|---|
| 102 | * processed, the state is updated with the new state.  This function only | 
|---|
| 103 | * processes complete blocks.  State initialization, buffering of partial | 
|---|
| 104 | * blocks, and digest finalization are expected to be handled elsewhere. | 
|---|
| 105 | * | 
|---|
| 106 | * void sha1_ni_transform(struct sha1_block_state *state, | 
|---|
| 107 | *			  const u8 *data, size_t nblocks) | 
|---|
| 108 | */ | 
|---|
| 109 | .text | 
|---|
| 110 | SYM_FUNC_START(sha1_ni_transform) | 
|---|
| 111 |  | 
|---|
| 112 | /* Load the initial state from STATE_PTR. */ | 
|---|
| 113 | pxor		E0, E0 | 
|---|
| 114 | pinsrd		$3, 16(STATE_PTR), E0 | 
|---|
| 115 | movdqu		(STATE_PTR), ABCD | 
|---|
| 116 | pshufd		$0x1B, ABCD, ABCD | 
|---|
| 117 |  | 
|---|
| 118 | movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | 
|---|
| 119 |  | 
|---|
| 120 | .Lnext_block: | 
|---|
| 121 | /* Save the state for addition after the rounds. */ | 
|---|
| 122 | movdqa		E0, E0_SAVED | 
|---|
| 123 | movdqa		ABCD, ABCD_SAVED | 
|---|
| 124 |  | 
|---|
| 125 | .irp i, 0, 16, 32, 48, 64 | 
|---|
| 126 | do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3, E0, E1 | 
|---|
| 127 | do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0, E1, E0 | 
|---|
| 128 | do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1, E0, E1 | 
|---|
| 129 | do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2, E1, E0 | 
|---|
| 130 | .endr | 
|---|
| 131 |  | 
|---|
| 132 | /* Add the previous state (before the rounds) to the current state. */ | 
|---|
| 133 | sha1nexte	E0_SAVED, E0 | 
|---|
| 134 | paddd		ABCD_SAVED, ABCD | 
|---|
| 135 |  | 
|---|
| 136 | /* Advance to the next block, or break if there are no more blocks. */ | 
|---|
| 137 | add		$64, DATA_PTR | 
|---|
| 138 | dec		NUM_BLKS | 
|---|
| 139 | jnz		.Lnext_block | 
|---|
| 140 |  | 
|---|
| 141 | /* Store the new state to STATE_PTR. */ | 
|---|
| 142 | pextrd		$3, E0, 16(STATE_PTR) | 
|---|
| 143 | pshufd		$0x1B, ABCD, ABCD | 
|---|
| 144 | movdqu		ABCD, (STATE_PTR) | 
|---|
| 145 |  | 
|---|
| 146 | RET | 
|---|
| 147 | SYM_FUNC_END(sha1_ni_transform) | 
|---|
| 148 |  | 
|---|
| 149 | .section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 | 
|---|
| 150 | .align 16 | 
|---|
| 151 | PSHUFFLE_BYTE_FLIP_MASK: | 
|---|
| 152 | .octa 0x000102030405060708090a0b0c0d0e0f | 
|---|
| 153 |  | 
|---|