x86/crypto/crct10dif-pcl-asm_64.S

68411521SHerbert Xu########################################################################
68411521SHerbert Xu# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
68411521SHerbert Xu#
68411521SHerbert Xu# Copyright (c) 2013, Intel Corporation
68411521SHerbert Xu#
68411521SHerbert Xu# Authors:
68411521SHerbert Xu#     Erdinc Ozturk <erdinc.ozturk@intel.com>
68411521SHerbert Xu#     Vinodh Gopal <vinodh.gopal@intel.com>
68411521SHerbert Xu#     James Guilford <james.guilford@intel.com>
68411521SHerbert Xu#     Tim Chen <tim.c.chen@linux.intel.com>
68411521SHerbert Xu#
68411521SHerbert Xu# This software is available to you under a choice of one of two
68411521SHerbert Xu# licenses.  You may choose to be licensed under the terms of the GNU
68411521SHerbert Xu# General Public License (GPL) Version 2, available from the file
68411521SHerbert Xu# COPYING in the main directory of this source tree, or the
68411521SHerbert Xu# OpenIB.org BSD license below:
68411521SHerbert Xu#
68411521SHerbert Xu# Redistribution and use in source and binary forms, with or without
68411521SHerbert Xu# modification, are permitted provided that the following conditions are
68411521SHerbert Xu# met:
68411521SHerbert Xu#
68411521SHerbert Xu# * Redistributions of source code must retain the above copyright
68411521SHerbert Xu#   notice, this list of conditions and the following disclaimer.
68411521SHerbert Xu#
68411521SHerbert Xu# * Redistributions in binary form must reproduce the above copyright
68411521SHerbert Xu#   notice, this list of conditions and the following disclaimer in the
68411521SHerbert Xu#   documentation and/or other materials provided with the
68411521SHerbert Xu#   distribution.
68411521SHerbert Xu#
68411521SHerbert Xu# * Neither the name of the Intel Corporation nor the names of its
68411521SHerbert Xu#   contributors may be used to endorse or promote products derived from
68411521SHerbert Xu#   this software without specific prior written permission.
68411521SHerbert Xu#
68411521SHerbert Xu#
68411521SHerbert Xu# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
68411521SHerbert Xu# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68411521SHerbert Xu# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
68411521SHerbert Xu# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
68411521SHerbert Xu# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
68411521SHerbert Xu# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
68411521SHerbert Xu# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
68411521SHerbert Xu# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
68411521SHerbert Xu# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
68411521SHerbert Xu# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
68411521SHerbert Xu# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68411521SHerbert Xu#
68411521SHerbert Xu#       Reference paper titled "Fast CRC Computation for Generic
68411521SHerbert Xu#	Polynomials Using PCLMULQDQ Instruction"
68411521SHerbert Xu#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
68411521SHerbert Xu#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
68411521SHerbert Xu#
68411521SHerbert Xu
68411521SHerbert Xu#include <linux/linkage.h>
68411521SHerbert Xu
68411521SHerbert Xu.text
68411521SHerbert Xu
0974037fSEric Biggers#define		init_crc	%edi
0974037fSEric Biggers#define		buf		%rsi
0974037fSEric Biggers#define		len		%rdx
68411521SHerbert Xu
0974037fSEric Biggers#define		FOLD_CONSTS	%xmm10
0974037fSEric Biggers#define		BSWAP_MASK	%xmm11
68411521SHerbert Xu
0974037fSEric Biggers# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
0974037fSEric Biggers# reg1, reg2.
0974037fSEric Biggers.macro	fold_32_bytes	offset, reg1, reg2
0974037fSEric Biggers	movdqu	\offset(buf), %xmm9
0974037fSEric Biggers	movdqu	\offset+16(buf), %xmm12
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm9
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm12
0974037fSEric Biggers	movdqa	\reg1, %xmm8
0974037fSEric Biggers	movdqa	\reg2, %xmm13
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, \reg1
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm8
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, \reg2
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm13
0974037fSEric Biggers	pxor	%xmm9 , \reg1
0974037fSEric Biggers	xorps	%xmm8 , \reg1
0974037fSEric Biggers	pxor	%xmm12, \reg2
0974037fSEric Biggers	xorps	%xmm13, \reg2
0974037fSEric Biggers.endm
0974037fSEric Biggers
0974037fSEric Biggers# Fold src_reg into dst_reg.
0974037fSEric Biggers.macro	fold_16_bytes	src_reg, dst_reg
0974037fSEric Biggers	movdqa	\src_reg, %xmm8
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, \src_reg
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
0974037fSEric Biggers	pxor	%xmm8, \dst_reg
0974037fSEric Biggers	xorps	\src_reg, \dst_reg
0974037fSEric Biggers.endm
0974037fSEric Biggers
0974037fSEric Biggers#
0974037fSEric Biggers# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
0974037fSEric Biggers#
0974037fSEric Biggers# Assumes len >= 16.
0974037fSEric Biggers#
6dcc5627SJiri SlabySYM_FUNC_START(crc_t10dif_pcl)
68411521SHerbert Xu
0974037fSEric Biggers	movdqa	.Lbswap_mask(%rip), BSWAP_MASK
68411521SHerbert Xu
0974037fSEric Biggers	# For sizes less than 256 bytes, we can't fold 128 bytes at a time.
0974037fSEric Biggers	cmp	$256, len
0974037fSEric Biggers	jl	.Lless_than_256_bytes
68411521SHerbert Xu
0974037fSEric Biggers	# Load the first 128 data bytes.  Byte swapping is necessary to make the
0974037fSEric Biggers	# bit order match the polynomial coefficient order.
0974037fSEric Biggers	movdqu	16*0(buf), %xmm0
0974037fSEric Biggers	movdqu	16*1(buf), %xmm1
0974037fSEric Biggers	movdqu	16*2(buf), %xmm2
0974037fSEric Biggers	movdqu	16*3(buf), %xmm3
0974037fSEric Biggers	movdqu	16*4(buf), %xmm4
0974037fSEric Biggers	movdqu	16*5(buf), %xmm5
0974037fSEric Biggers	movdqu	16*6(buf), %xmm6
0974037fSEric Biggers	movdqu	16*7(buf), %xmm7
0974037fSEric Biggers	add	$128, buf
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm0
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm1
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm2
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm3
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm4
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm5
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm6
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm7
68411521SHerbert Xu
0974037fSEric Biggers	# XOR the first 16 data *bits* with the initial CRC value.
0974037fSEric Biggers	pxor	%xmm8, %xmm8
0974037fSEric Biggers	pinsrw	$7, init_crc, %xmm8
0974037fSEric Biggers	pxor	%xmm8, %xmm0
68411521SHerbert Xu
0974037fSEric Biggers	movdqa	.Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
68411521SHerbert Xu
0974037fSEric Biggers	# Subtract 128 for the 128 data bytes just consumed.  Subtract another
0974037fSEric Biggers	# 128 to simplify the termination condition of the following loop.
0974037fSEric Biggers	sub	$256, len
68411521SHerbert Xu
0974037fSEric Biggers	# While >= 128 data bytes remain (not counting xmm0-7), fold the 128
0974037fSEric Biggers	# bytes xmm0-7 into them, storing the result back into xmm0-7.
0974037fSEric Biggers.Lfold_128_bytes_loop:
0974037fSEric Biggers	fold_32_bytes	0, %xmm0, %xmm1
0974037fSEric Biggers	fold_32_bytes	32, %xmm2, %xmm3
0974037fSEric Biggers	fold_32_bytes	64, %xmm4, %xmm5
0974037fSEric Biggers	fold_32_bytes	96, %xmm6, %xmm7
0974037fSEric Biggers	add	$128, buf
0974037fSEric Biggers	sub	$128, len
0974037fSEric Biggers	jge	.Lfold_128_bytes_loop
68411521SHerbert Xu
0974037fSEric Biggers	# Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
68411521SHerbert Xu
0974037fSEric Biggers	# Fold across 64 bytes.
0974037fSEric Biggers	movdqa	.Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
0974037fSEric Biggers	fold_16_bytes	%xmm0, %xmm4
0974037fSEric Biggers	fold_16_bytes	%xmm1, %xmm5
0974037fSEric Biggers	fold_16_bytes	%xmm2, %xmm6
0974037fSEric Biggers	fold_16_bytes	%xmm3, %xmm7
0974037fSEric Biggers	# Fold across 32 bytes.
0974037fSEric Biggers	movdqa	.Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
0974037fSEric Biggers	fold_16_bytes	%xmm4, %xmm6
0974037fSEric Biggers	fold_16_bytes	%xmm5, %xmm7
0974037fSEric Biggers	# Fold across 16 bytes.
0974037fSEric Biggers	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
0974037fSEric Biggers	fold_16_bytes	%xmm6, %xmm7
68411521SHerbert Xu
0974037fSEric Biggers	# Add 128 to get the correct number of data bytes remaining in 0...127
0974037fSEric Biggers	# (not counting xmm7), following the previous extra subtraction by 128.
0974037fSEric Biggers	# Then subtract 16 to simplify the termination condition of the
0974037fSEric Biggers	# following loop.
0974037fSEric Biggers	add	$128-16, len
68411521SHerbert Xu
0974037fSEric Biggers	# While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
0974037fSEric Biggers	# xmm7 into them, storing the result back into xmm7.
0974037fSEric Biggers	jl	.Lfold_16_bytes_loop_done
0974037fSEric Biggers.Lfold_16_bytes_loop:
68411521SHerbert Xu	movdqa	%xmm7, %xmm8
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
68411521SHerbert Xu	pxor	%xmm8, %xmm7
0974037fSEric Biggers	movdqu	(buf), %xmm0
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm0
68411521SHerbert Xu	pxor	%xmm0 , %xmm7
0974037fSEric Biggers	add	$16, buf
0974037fSEric Biggers	sub	$16, len
0974037fSEric Biggers	jge	.Lfold_16_bytes_loop
68411521SHerbert Xu
0974037fSEric Biggers.Lfold_16_bytes_loop_done:
0974037fSEric Biggers	# Add 16 to get the correct number of data bytes remaining in 0...15
0974037fSEric Biggers	# (not counting xmm7), following the previous extra subtraction by 16.
0974037fSEric Biggers	add	$16, len
0974037fSEric Biggers	je	.Lreduce_final_16_bytes
68411521SHerbert Xu
0974037fSEric Biggers.Lhandle_partial_segment:
0974037fSEric Biggers	# Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
0974037fSEric Biggers	# bytes are in xmm7 and the rest are the remaining data in 'buf'.  To do
0974037fSEric Biggers	# this without needing a fold constant for each possible 'len', redivide
0974037fSEric Biggers	# the bytes into a first chunk of 'len' bytes and a second chunk of 16
0974037fSEric Biggers	# bytes, then fold the first chunk into the second.
68411521SHerbert Xu
68411521SHerbert Xu	movdqa	%xmm7, %xmm2
68411521SHerbert Xu
0974037fSEric Biggers	# xmm1 = last 16 original data bytes
0974037fSEric Biggers	movdqu	-16(buf, len), %xmm1
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm1
68411521SHerbert Xu
0974037fSEric Biggers	# xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
0974037fSEric Biggers	lea	.Lbyteshift_table+16(%rip), %rax
0974037fSEric Biggers	sub	len, %rax
68411521SHerbert Xu	movdqu	(%rax), %xmm0
68411521SHerbert Xu	pshufb	%xmm0, %xmm2
68411521SHerbert Xu
0974037fSEric Biggers	# xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
0974037fSEric Biggers	pxor	.Lmask1(%rip), %xmm0
68411521SHerbert Xu	pshufb	%xmm0, %xmm7
0974037fSEric Biggers
0974037fSEric Biggers	# xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
0974037fSEric Biggers	# then '16-len' bytes from xmm2 (high-order bytes).
68411521SHerbert Xu	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
68411521SHerbert Xu
0974037fSEric Biggers	# Fold the first chunk into the second chunk, storing the result in xmm7.
68411521SHerbert Xu	movdqa	%xmm7, %xmm8
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
68411521SHerbert Xu	pxor	%xmm8, %xmm7
0974037fSEric Biggers	pxor	%xmm1, %xmm7
68411521SHerbert Xu
0974037fSEric Biggers.Lreduce_final_16_bytes:
0974037fSEric Biggers	# Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
0974037fSEric Biggers
0974037fSEric Biggers	# Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
0974037fSEric Biggers	movdqa	.Lfinal_fold_consts(%rip), FOLD_CONSTS
0974037fSEric Biggers
0974037fSEric Biggers	# Fold the high 64 bits into the low 64 bits, while also multiplying by
0974037fSEric Biggers	# x^64.  This produces a 128-bit value congruent to x^64 * M(x) and
0974037fSEric Biggers	# whose low 48 bits are 0.
68411521SHerbert Xu	movdqa	%xmm7, %xmm0
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
68411521SHerbert Xu	pslldq	$8, %xmm0
0974037fSEric Biggers	pxor	%xmm0, %xmm7			  # + low bits * x^64
68411521SHerbert Xu
0974037fSEric Biggers	# Fold the high 32 bits into the low 96 bits.  This produces a 96-bit
0974037fSEric Biggers	# value congruent to x^64 * M(x) and whose low 48 bits are 0.
68411521SHerbert Xu	movdqa	%xmm7, %xmm0
0974037fSEric Biggers	pand	.Lmask2(%rip), %xmm0		  # zero high 32 bits
0974037fSEric Biggers	psrldq	$12, %xmm7			  # extract high 32 bits
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
0974037fSEric Biggers	pxor	%xmm0, %xmm7			  # + low bits
68411521SHerbert Xu
0974037fSEric Biggers	# Load G(x) and floor(x^48 / G(x)).
0974037fSEric Biggers	movdqa	.Lbarrett_reduction_consts(%rip), FOLD_CONSTS
68411521SHerbert Xu
0974037fSEric Biggers	# Use Barrett reduction to compute the final CRC value.
68411521SHerbert Xu	movdqa	%xmm7, %xmm0
0974037fSEric Biggers	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
0974037fSEric Biggers	psrlq	$32, %xmm7			  # /= x^32
0974037fSEric Biggers	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # *= G(x)
0974037fSEric Biggers	psrlq	$48, %xmm0
0974037fSEric Biggers	pxor	%xmm7, %xmm0		     # + low 16 nonzero bits
0974037fSEric Biggers	# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
68411521SHerbert Xu
0974037fSEric Biggers	pextrw	$0, %xmm0, %eax
*f94909ceSPeter Zijlstra	RET
68411521SHerbert Xu
68411521SHerbert Xu.align 16
0974037fSEric Biggers.Lless_than_256_bytes:
0974037fSEric Biggers	# Checksumming a buffer of length 16...255 bytes
68411521SHerbert Xu
0974037fSEric Biggers	# Load the first 16 data bytes.
0974037fSEric Biggers	movdqu	(buf), %xmm7
0974037fSEric Biggers	pshufb	BSWAP_MASK, %xmm7
0974037fSEric Biggers	add	$16, buf
68411521SHerbert Xu
0974037fSEric Biggers	# XOR the first 16 data *bits* with the initial CRC value.
0974037fSEric Biggers	pxor	%xmm0, %xmm0
0974037fSEric Biggers	pinsrw	$7, init_crc, %xmm0
68411521SHerbert Xu	pxor	%xmm0, %xmm7
68411521SHerbert Xu
0974037fSEric Biggers	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
0974037fSEric Biggers	cmp	$16, len
0974037fSEric Biggers	je	.Lreduce_final_16_bytes		# len == 16
0974037fSEric Biggers	sub	$32, len
0974037fSEric Biggers	jge	.Lfold_16_bytes_loop		# 32 <= len <= 255
0974037fSEric Biggers	add	$16, len
0974037fSEric Biggers	jmp	.Lhandle_partial_segment	# 17 <= len <= 31
6dcc5627SJiri SlabySYM_FUNC_END(crc_t10dif_pcl)
68411521SHerbert Xu
e183914aSDenys Vlasenko.section	.rodata, "a", @progbits
e183914aSDenys Vlasenko.align 16
68411521SHerbert Xu
0974037fSEric Biggers# Fold constants precomputed from the polynomial 0x18bb7
0974037fSEric Biggers# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
0974037fSEric Biggers.Lfold_across_128_bytes_consts:
0974037fSEric Biggers	.quad		0x0000000000006123	# x^(8*128)	mod G(x)
0974037fSEric Biggers	.quad		0x0000000000002295	# x^(8*128+64)	mod G(x)
0974037fSEric Biggers.Lfold_across_64_bytes_consts:
0974037fSEric Biggers	.quad		0x0000000000001069	# x^(4*128)	mod G(x)
0974037fSEric Biggers	.quad		0x000000000000dd31	# x^(4*128+64)	mod G(x)
0974037fSEric Biggers.Lfold_across_32_bytes_consts:
0974037fSEric Biggers	.quad		0x000000000000857d	# x^(2*128)	mod G(x)
0974037fSEric Biggers	.quad		0x0000000000007acc	# x^(2*128+64)	mod G(x)
0974037fSEric Biggers.Lfold_across_16_bytes_consts:
0974037fSEric Biggers	.quad		0x000000000000a010	# x^(1*128)	mod G(x)
0974037fSEric Biggers	.quad		0x0000000000001faa	# x^(1*128+64)	mod G(x)
0974037fSEric Biggers.Lfinal_fold_consts:
0974037fSEric Biggers	.quad		0x1368000000000000	# x^48 * (x^48 mod G(x))
0974037fSEric Biggers	.quad		0x2d56000000000000	# x^48 * (x^80 mod G(x))
0974037fSEric Biggers.Lbarrett_reduction_consts:
0974037fSEric Biggers	.quad		0x0000000000018bb7	# G(x)
0974037fSEric Biggers	.quad		0x00000001f65a57f8	# floor(x^48 / G(x))
68411521SHerbert Xu
e183914aSDenys Vlasenko.section	.rodata.cst16.mask1, "aM", @progbits, 16
e183914aSDenys Vlasenko.align 16
0974037fSEric Biggers.Lmask1:
68411521SHerbert Xu	.octa	0x80808080808080808080808080808080
e183914aSDenys Vlasenko
e183914aSDenys Vlasenko.section	.rodata.cst16.mask2, "aM", @progbits, 16
e183914aSDenys Vlasenko.align 16
0974037fSEric Biggers.Lmask2:
68411521SHerbert Xu	.octa	0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
68411521SHerbert Xu
0974037fSEric Biggers.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
e183914aSDenys Vlasenko.align 16
0974037fSEric Biggers.Lbswap_mask:
68411521SHerbert Xu	.octa	0x000102030405060708090A0B0C0D0E0F
68411521SHerbert Xu
0974037fSEric Biggers.section	.rodata.cst32.byteshift_table, "aM", @progbits, 32
0974037fSEric Biggers.align 16
0974037fSEric Biggers# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
0974037fSEric Biggers# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
0974037fSEric Biggers# 0x80} XOR the index vector to shift right by '16 - len' bytes.
0974037fSEric Biggers.Lbyteshift_table:
0974037fSEric Biggers	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
0974037fSEric Biggers	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
0974037fSEric Biggers	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
0974037fSEric Biggers	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0