1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 2527dcdccSDavid Howells #ifndef __PARISC_LDCW_H 3527dcdccSDavid Howells #define __PARISC_LDCW_H 4527dcdccSDavid Howells 5527dcdccSDavid Howells /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, 6527dcdccSDavid Howells and GCC only guarantees 8-byte alignment for stack locals, we can't 7527dcdccSDavid Howells be assured of 16-byte alignment for atomic lock data even if we 8527dcdccSDavid Howells specify "__attribute ((aligned(16)))" in the type declaration. So, 9527dcdccSDavid Howells we use a struct containing an array of four ints for the atomic lock 10527dcdccSDavid Howells type and dynamically select the 16-byte aligned int from the array 11527dcdccSDavid Howells for the semaphore. */ 12527dcdccSDavid Howells 13914988e0SJohn David Anglin /* From: "Jim Hull" <jim.hull of hp.com> 14914988e0SJohn David Anglin I've attached a summary of the change, but basically, for PA 2.0, as 15914988e0SJohn David Anglin long as the ",CO" (coherent operation) completer is implemented, then the 16914988e0SJohn David Anglin 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead 17914988e0SJohn David Anglin they only require "natural" alignment (4-byte for ldcw, 8-byte for 18914988e0SJohn David Anglin ldcd). 19914988e0SJohn David Anglin 20914988e0SJohn David Anglin Although the cache control hint is accepted by all PA 2.0 processors, 21914988e0SJohn David Anglin it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still 22914988e0SJohn David Anglin require 16-byte alignment. If the address is unaligned, the operation 23914988e0SJohn David Anglin of the instruction is undefined. The ldcw instruction does not generate 24914988e0SJohn David Anglin unaligned data reference traps so misaligned accesses are not detected. 25914988e0SJohn David Anglin This hid the problem for years. So, restore the 16-byte alignment dropped 26914988e0SJohn David Anglin by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ 27914988e0SJohn David Anglin 28527dcdccSDavid Howells #define __PA_LDCW_ALIGNMENT 16 29527dcdccSDavid Howells #define __ldcw_align(a) ({ \ 30527dcdccSDavid Howells unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 31527dcdccSDavid Howells __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 32527dcdccSDavid Howells & ~(__PA_LDCW_ALIGNMENT - 1); \ 33527dcdccSDavid Howells (volatile unsigned int *) __ret; \ 34527dcdccSDavid Howells }) 35527dcdccSDavid Howells 36914988e0SJohn David Anglin #ifdef CONFIG_PA20 37527dcdccSDavid Howells #define __LDCW "ldcw,co" 38914988e0SJohn David Anglin #else 39914988e0SJohn David Anglin #define __LDCW "ldcw" 40914988e0SJohn David Anglin #endif 41527dcdccSDavid Howells 4245db0738SJohn David Anglin /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. 4345db0738SJohn David Anglin We don't explicitly expose that "*a" may be written as reload 4445db0738SJohn David Anglin fails to find a register in class R1_REGS when "a" needs to be 4545db0738SJohn David Anglin reloaded when generating 64-bit PIC code. Instead, we clobber 4645db0738SJohn David Anglin memory to indicate to the compiler that the assembly code reads 4745db0738SJohn David Anglin or writes to items other than those listed in the input and output 4845db0738SJohn David Anglin operands. This may pessimize the code somewhat but __ldcw is 49d14b3dfcSAndrea Gelmini usually used within code blocks surrounded by memory barriers. */ 50527dcdccSDavid Howells #define __ldcw(a) ({ \ 51527dcdccSDavid Howells unsigned __ret; \ 5245db0738SJohn David Anglin __asm__ __volatile__(__LDCW " 0(%1),%0" \ 5345db0738SJohn David Anglin : "=r" (__ret) : "r" (a) : "memory"); \ 54527dcdccSDavid Howells __ret; \ 55527dcdccSDavid Howells }) 56527dcdccSDavid Howells 57527dcdccSDavid Howells #ifdef CONFIG_SMP 58*fb997f16SHelge Deller # define __lock_aligned __section(".data..lock_aligned") __aligned(16) 59527dcdccSDavid Howells #endif 60527dcdccSDavid Howells 61527dcdccSDavid Howells #endif /* __PARISC_LDCW_H */ 62