ldcw.h (cbecf716ca618fd44feda6bd9a64a8179d031fc5) | ldcw.h (914988e099fc658436fbd7b8f240160c352b6552) |
---|---|
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __PARISC_LDCW_H 3#define __PARISC_LDCW_H 4 | 1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __PARISC_LDCW_H 3#define __PARISC_LDCW_H 4 |
5#ifndef CONFIG_PA20 | |
6/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, 7 and GCC only guarantees 8-byte alignment for stack locals, we can't 8 be assured of 16-byte alignment for atomic lock data even if we 9 specify "__attribute ((aligned(16)))" in the type declaration. So, 10 we use a struct containing an array of four ints for the atomic lock 11 type and dynamically select the 16-byte aligned int from the array | 5/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, 6 and GCC only guarantees 8-byte alignment for stack locals, we can't 7 be assured of 16-byte alignment for atomic lock data even if we 8 specify "__attribute ((aligned(16)))" in the type declaration. So, 9 we use a struct containing an array of four ints for the atomic lock 10 type and dynamically select the 16-byte aligned int from the array |
12 for the semaphore. */ | 11 for the semaphore. */ |
13 | 12 |
13/* From: "Jim Hull" <jim.hull of hp.com> 14 I've attached a summary of the change, but basically, for PA 2.0, as 15 long as the ",CO" (coherent operation) completer is implemented, then the 16 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead 17 they only require "natural" alignment (4-byte for ldcw, 8-byte for 18 ldcd). 19 20 Although the cache control hint is accepted by all PA 2.0 processors, 21 it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still 22 require 16-byte alignment. If the address is unaligned, the operation 23 of the instruction is undefined. The ldcw instruction does not generate 24 unaligned data reference traps so misaligned accesses are not detected. 25 This hid the problem for years. So, restore the 16-byte alignment dropped 26 by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ 27 |
|
14#define __PA_LDCW_ALIGNMENT 16 | 28#define __PA_LDCW_ALIGNMENT 16 |
15#define __PA_LDCW_ALIGN_ORDER 4 | |
16#define __ldcw_align(a) ({ \ 17 unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 18 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 19 & ~(__PA_LDCW_ALIGNMENT - 1); \ 20 (volatile unsigned int *) __ret; \ 21}) | 29#define __ldcw_align(a) ({ \ 30 unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 31 __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 32 & ~(__PA_LDCW_ALIGNMENT - 1); \ 33 (volatile unsigned int *) __ret; \ 34}) |
22#define __LDCW "ldcw" | |
23 | 35 |
24#else /*CONFIG_PA20*/ 25/* From: "Jim Hull" <jim.hull of hp.com> 26 I've attached a summary of the change, but basically, for PA 2.0, as 27 long as the ",CO" (coherent operation) completer is specified, then the 28 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead 29 they only require "natural" alignment (4-byte for ldcw, 8-byte for 30 ldcd). */ 31 32#define __PA_LDCW_ALIGNMENT 4 33#define __PA_LDCW_ALIGN_ORDER 2 34#define __ldcw_align(a) (&(a)->slock) | 36#ifdef CONFIG_PA20 |
35#define __LDCW "ldcw,co" | 37#define __LDCW "ldcw,co" |
38#else 39#define __LDCW "ldcw" 40#endif |
|
36 | 41 |
37#endif /*!CONFIG_PA20*/ 38 | |
39/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. 40 We don't explicitly expose that "*a" may be written as reload 41 fails to find a register in class R1_REGS when "a" needs to be 42 reloaded when generating 64-bit PIC code. Instead, we clobber 43 memory to indicate to the compiler that the assembly code reads 44 or writes to items other than those listed in the input and output 45 operands. This may pessimize the code somewhat but __ldcw is 46 usually used within code blocks surrounded by memory barriers. */ --- 12 unchanged lines hidden --- | 42/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. 43 We don't explicitly expose that "*a" may be written as reload 44 fails to find a register in class R1_REGS when "a" needs to be 45 reloaded when generating 64-bit PIC code. Instead, we clobber 46 memory to indicate to the compiler that the assembly code reads 47 or writes to items other than those listed in the input and output 48 operands. This may pessimize the code somewhat but __ldcw is 49 usually used within code blocks surrounded by memory barriers. */ --- 12 unchanged lines hidden --- |