xref: /openbmc/linux/arch/m68k/include/asm/hash.h (revision b2441318)
1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
214c44b95SGeorge Spelvin #ifndef _ASM_HASH_H
314c44b95SGeorge Spelvin #define _ASM_HASH_H
414c44b95SGeorge Spelvin 
514c44b95SGeorge Spelvin /*
614c44b95SGeorge Spelvin  * If CONFIG_M68000=y (original mc68000/010), this file is #included
714c44b95SGeorge Spelvin  * to work around the lack of a MULU.L instruction.
814c44b95SGeorge Spelvin  */
914c44b95SGeorge Spelvin 
1014c44b95SGeorge Spelvin #define HAVE_ARCH__HASH_32 1
1114c44b95SGeorge Spelvin /*
1214c44b95SGeorge Spelvin  * While it would be legal to substitute a different hash operation
1314c44b95SGeorge Spelvin  * entirely, let's keep it simple and just use an optimized multiply
1414c44b95SGeorge Spelvin  * by GOLDEN_RATIO_32 = 0x61C88647.
1514c44b95SGeorge Spelvin  *
1614c44b95SGeorge Spelvin  * The best way to do that appears to be to multiply by 0x8647 with
1714c44b95SGeorge Spelvin  * shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
1814c44b95SGeorge Spelvin  *
1914c44b95SGeorge Spelvin  * Because the 68000 has multi-cycle shifts, this addition chain is
2014c44b95SGeorge Spelvin  * chosen to minimise the shift distances.
2114c44b95SGeorge Spelvin  *
2214c44b95SGeorge Spelvin  * Despite every attempt to spoon-feed it simple operations, GCC
2314c44b95SGeorge Spelvin  * 6.1.1 doggedly insists on doing annoying things like converting
2414c44b95SGeorge Spelvin  * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
2514c44b95SGeorge Spelvin  *
2614c44b95SGeorge Spelvin  * It also likes to notice two shifts in a row, like "a = x << 2" and
2714c44b95SGeorge Spelvin  * "a <<= 7", and convert that to "a = x << 9".  But shifts longer
2814c44b95SGeorge Spelvin  * than 8 bits are extra-slow on m68k, so that's a lose.
2914c44b95SGeorge Spelvin  *
3014c44b95SGeorge Spelvin  * Since the 68000 is a very simple in-order processor with no
3114c44b95SGeorge Spelvin  * instruction scheduling effects on execution time, we can safely
3214c44b95SGeorge Spelvin  * take it out of GCC's hands and write one big asm() block.
3314c44b95SGeorge Spelvin  *
3414c44b95SGeorge Spelvin  * Without calling overhead, this operation is 30 bytes (14 instructions
3514c44b95SGeorge Spelvin  * plus one immediate constant) and 166 cycles.
3614c44b95SGeorge Spelvin  *
3714c44b95SGeorge Spelvin  * (Because %2 is fetched twice, it can't be postincrement, and thus it
3814c44b95SGeorge Spelvin  * can't be a fully general "g" or "m".  Register is preferred, but
3914c44b95SGeorge Spelvin  * offsettable memory or immediate will work.)
4014c44b95SGeorge Spelvin  */
__hash_32(u32 x)4114c44b95SGeorge Spelvin static inline u32 __attribute_const__ __hash_32(u32 x)
4214c44b95SGeorge Spelvin {
4314c44b95SGeorge Spelvin 	u32 a, b;
4414c44b95SGeorge Spelvin 
4514c44b95SGeorge Spelvin 	asm(   "move.l %2,%0"	/* a = x * 0x0001 */
4614c44b95SGeorge Spelvin 	"\n	lsl.l #2,%0"	/* a = x * 0x0004 */
4714c44b95SGeorge Spelvin 	"\n	move.l %0,%1"
4814c44b95SGeorge Spelvin 	"\n	lsl.l #7,%0"	/* a = x * 0x0200 */
4914c44b95SGeorge Spelvin 	"\n	add.l %2,%0"	/* a = x * 0x0201 */
5014c44b95SGeorge Spelvin 	"\n	add.l %0,%1"	/* b = x * 0x0205 */
5114c44b95SGeorge Spelvin 	"\n	add.l %0,%0"	/* a = x * 0x0402 */
5214c44b95SGeorge Spelvin 	"\n	add.l %0,%1"	/* b = x * 0x0607 */
5314c44b95SGeorge Spelvin 	"\n	lsl.l #5,%0"	/* a = x * 0x8040 */
5414c44b95SGeorge Spelvin 	: "=&d,d" (a), "=&r,r" (b)
5514c44b95SGeorge Spelvin 	: "r,roi?" (x));	/* a+b = x*0x8647 */
5614c44b95SGeorge Spelvin 
5714c44b95SGeorge Spelvin 	return ((u16)(x*0x61c8) << 16) + a + b;
5814c44b95SGeorge Spelvin }
5914c44b95SGeorge Spelvin 
6014c44b95SGeorge Spelvin #endif	/* _ASM_HASH_H */
61