1 /* 2 * Architecture-specific unaligned trap handling. 3 * 4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co 5 * Stephane Eranian <eranian@hpl.hp.com> 6 * David Mosberger-Tang <davidm@hpl.hp.com> 7 * 8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix 9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame 10 * stacked register returns an undefined value; it does NOT trigger a 11 * "rsvd register fault"). 12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. 13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. 14 * 2001/01/17 Add support emulation of unaligned kernel accesses. 15 */ 16 #include <linux/jiffies.h> 17 #include <linux/kernel.h> 18 #include <linux/sched/signal.h> 19 #include <linux/tty.h> 20 #include <linux/extable.h> 21 #include <linux/ratelimit.h> 22 #include <linux/uaccess.h> 23 24 #include <asm/intrinsics.h> 25 #include <asm/processor.h> 26 #include <asm/rse.h> 27 #include <asm/exception.h> 28 #include <asm/unaligned.h> 29 30 extern int die_if_kernel(char *str, struct pt_regs *regs, long err); 31 32 #undef DEBUG_UNALIGNED_TRAP 33 34 #ifdef DEBUG_UNALIGNED_TRAP 35 # define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0) 36 # define DDUMP(str,vp,len) dump(str, vp, len) 37 38 static void 39 dump (const char *str, void *vp, size_t len) 40 { 41 unsigned char *cp = vp; 42 int i; 43 44 printk("%s", str); 45 for (i = 0; i < len; ++i) 46 printk (" %02x", *cp++); 47 printk("\n"); 48 } 49 #else 50 # define DPRINT(a...) 51 # define DDUMP(str,vp,len) 52 #endif 53 54 #define IA64_FIRST_STACKED_GR 32 55 #define IA64_FIRST_ROTATING_FR 32 56 #define SIGN_EXT9 0xffffffffffffff00ul 57 58 /* 59 * sysctl settable hook which tells the kernel whether to honor the 60 * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want 61 * to allow the super user to enable/disable this for security reasons 62 * (i.e. don't allow attacker to fill up logs with unaligned accesses). 63 */ 64 int no_unaligned_warning; 65 int unaligned_dump_stack; 66 67 /* 68 * For M-unit: 69 * 70 * opcode | m | x6 | 71 * --------|------|---------| 72 * [40-37] | [36] | [35:30] | 73 * --------|------|---------| 74 * 4 | 1 | 6 | = 11 bits 75 * -------------------------- 76 * However bits [31:30] are not directly useful to distinguish between 77 * load/store so we can use [35:32] instead, which gives the following 78 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer 79 * checking the m-bit until later in the load/store emulation. 80 */ 81 #define IA64_OPCODE_MASK 0x1ef 82 #define IA64_OPCODE_SHIFT 32 83 84 /* 85 * Table C-28 Integer Load/Store 86 * 87 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF 88 * 89 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on 90 * the address (bits [8:3]), so we must failed. 91 */ 92 #define LD_OP 0x080 93 #define LDS_OP 0x081 94 #define LDA_OP 0x082 95 #define LDSA_OP 0x083 96 #define LDBIAS_OP 0x084 97 #define LDACQ_OP 0x085 98 /* 0x086, 0x087 are not relevant */ 99 #define LDCCLR_OP 0x088 100 #define LDCNC_OP 0x089 101 #define LDCCLRACQ_OP 0x08a 102 #define ST_OP 0x08c 103 #define STREL_OP 0x08d 104 /* 0x08e,0x8f are not relevant */ 105 106 /* 107 * Table C-29 Integer Load +Reg 108 * 109 * we use the ld->m (bit [36:36]) field to determine whether or not we have 110 * a load/store of this form. 111 */ 112 113 /* 114 * Table C-30 Integer Load/Store +Imm 115 * 116 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF 117 * 118 * ld8.fill, st8.fill must be aligned because the Nat register are based on 119 * the address, so we must fail and the program must be fixed. 120 */ 121 #define LD_IMM_OP 0x0a0 122 #define LDS_IMM_OP 0x0a1 123 #define LDA_IMM_OP 0x0a2 124 #define LDSA_IMM_OP 0x0a3 125 #define LDBIAS_IMM_OP 0x0a4 126 #define LDACQ_IMM_OP 0x0a5 127 /* 0x0a6, 0xa7 are not relevant */ 128 #define LDCCLR_IMM_OP 0x0a8 129 #define LDCNC_IMM_OP 0x0a9 130 #define LDCCLRACQ_IMM_OP 0x0aa 131 #define ST_IMM_OP 0x0ac 132 #define STREL_IMM_OP 0x0ad 133 /* 0x0ae,0xaf are not relevant */ 134 135 /* 136 * Table C-32 Floating-point Load/Store 137 */ 138 #define LDF_OP 0x0c0 139 #define LDFS_OP 0x0c1 140 #define LDFA_OP 0x0c2 141 #define LDFSA_OP 0x0c3 142 /* 0x0c6 is irrelevant */ 143 #define LDFCCLR_OP 0x0c8 144 #define LDFCNC_OP 0x0c9 145 /* 0x0cb is irrelevant */ 146 #define STF_OP 0x0cc 147 148 /* 149 * Table C-33 Floating-point Load +Reg 150 * 151 * we use the ld->m (bit [36:36]) field to determine whether or not we have 152 * a load/store of this form. 153 */ 154 155 /* 156 * Table C-34 Floating-point Load/Store +Imm 157 */ 158 #define LDF_IMM_OP 0x0e0 159 #define LDFS_IMM_OP 0x0e1 160 #define LDFA_IMM_OP 0x0e2 161 #define LDFSA_IMM_OP 0x0e3 162 /* 0x0e6 is irrelevant */ 163 #define LDFCCLR_IMM_OP 0x0e8 164 #define LDFCNC_IMM_OP 0x0e9 165 #define STF_IMM_OP 0x0ec 166 167 typedef struct { 168 unsigned long qp:6; /* [0:5] */ 169 unsigned long r1:7; /* [6:12] */ 170 unsigned long imm:7; /* [13:19] */ 171 unsigned long r3:7; /* [20:26] */ 172 unsigned long x:1; /* [27:27] */ 173 unsigned long hint:2; /* [28:29] */ 174 unsigned long x6_sz:2; /* [30:31] */ 175 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ 176 unsigned long m:1; /* [36:36] */ 177 unsigned long op:4; /* [37:40] */ 178 unsigned long pad:23; /* [41:63] */ 179 } load_store_t; 180 181 182 typedef enum { 183 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ 184 UPD_REG /* ldXZ r1=[r3],r2 */ 185 } update_t; 186 187 /* 188 * We use tables to keep track of the offsets of registers in the saved state. 189 * This way we save having big switch/case statements. 190 * 191 * We use bit 0 to indicate switch_stack or pt_regs. 192 * The offset is simply shifted by 1 bit. 193 * A 2-byte value should be enough to hold any kind of offset 194 * 195 * In case the calling convention changes (and thus pt_regs/switch_stack) 196 * simply use RSW instead of RPT or vice-versa. 197 */ 198 199 #define RPO(x) ((size_t) &((struct pt_regs *)0)->x) 200 #define RSO(x) ((size_t) &((struct switch_stack *)0)->x) 201 202 #define RPT(x) (RPO(x) << 1) 203 #define RSW(x) (1| RSO(x)<<1) 204 205 #define GR_OFFS(x) (gr_info[x]>>1) 206 #define GR_IN_SW(x) (gr_info[x] & 0x1) 207 208 #define FR_OFFS(x) (fr_info[x]>>1) 209 #define FR_IN_SW(x) (fr_info[x] & 0x1) 210 211 static u16 gr_info[32]={ 212 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ 213 214 RPT(r1), RPT(r2), RPT(r3), 215 216 RSW(r4), RSW(r5), RSW(r6), RSW(r7), 217 218 RPT(r8), RPT(r9), RPT(r10), RPT(r11), 219 RPT(r12), RPT(r13), RPT(r14), RPT(r15), 220 221 RPT(r16), RPT(r17), RPT(r18), RPT(r19), 222 RPT(r20), RPT(r21), RPT(r22), RPT(r23), 223 RPT(r24), RPT(r25), RPT(r26), RPT(r27), 224 RPT(r28), RPT(r29), RPT(r30), RPT(r31) 225 }; 226 227 static u16 fr_info[32]={ 228 0, /* constant : WE SHOULD NEVER GET THIS */ 229 0, /* constant : WE SHOULD NEVER GET THIS */ 230 231 RSW(f2), RSW(f3), RSW(f4), RSW(f5), 232 233 RPT(f6), RPT(f7), RPT(f8), RPT(f9), 234 RPT(f10), RPT(f11), 235 236 RSW(f12), RSW(f13), RSW(f14), 237 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), 238 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), 239 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), 240 RSW(f30), RSW(f31) 241 }; 242 243 /* Invalidate ALAT entry for integer register REGNO. */ 244 static void 245 invala_gr (int regno) 246 { 247 # define F(reg) case reg: ia64_invala_gr(reg); break 248 249 switch (regno) { 250 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); 251 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); 252 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); 253 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); 254 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); 255 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); 256 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); 257 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); 258 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); 259 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); 260 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); 261 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); 262 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); 263 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); 264 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); 265 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); 266 } 267 # undef F 268 } 269 270 /* Invalidate ALAT entry for floating-point register REGNO. */ 271 static void 272 invala_fr (int regno) 273 { 274 # define F(reg) case reg: ia64_invala_fr(reg); break 275 276 switch (regno) { 277 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); 278 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); 279 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); 280 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); 281 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); 282 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); 283 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); 284 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); 285 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); 286 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); 287 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); 288 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); 289 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); 290 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); 291 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); 292 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); 293 } 294 # undef F 295 } 296 297 static inline unsigned long 298 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) 299 { 300 reg += rrb; 301 if (reg >= sor) 302 reg -= sor; 303 return reg; 304 } 305 306 static void 307 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) 308 { 309 struct switch_stack *sw = (struct switch_stack *) regs - 1; 310 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; 311 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; 312 unsigned long rnats, nat_mask; 313 unsigned long on_kbs; 314 long sof = (regs->cr_ifs) & 0x7f; 315 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); 316 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; 317 long ridx = r1 - 32; 318 319 if (ridx >= sof) { 320 /* this should never happen, as the "rsvd register fault" has higher priority */ 321 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); 322 return; 323 } 324 325 if (ridx < sor) 326 ridx = rotate_reg(sor, rrb_gr, ridx); 327 328 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", 329 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); 330 331 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); 332 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); 333 if (addr >= kbs) { 334 /* the register is on the kernel backing store: easy... */ 335 rnat_addr = ia64_rse_rnat_addr(addr); 336 if ((unsigned long) rnat_addr >= sw->ar_bspstore) 337 rnat_addr = &sw->ar_rnat; 338 nat_mask = 1UL << ia64_rse_slot_num(addr); 339 340 *addr = val; 341 if (nat) 342 *rnat_addr |= nat_mask; 343 else 344 *rnat_addr &= ~nat_mask; 345 return; 346 } 347 348 if (!user_stack(current, regs)) { 349 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); 350 return; 351 } 352 353 bspstore = (unsigned long *)regs->ar_bspstore; 354 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); 355 bsp = ia64_rse_skip_regs(ubs_end, -sof); 356 addr = ia64_rse_skip_regs(bsp, ridx); 357 358 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); 359 360 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); 361 362 rnat_addr = ia64_rse_rnat_addr(addr); 363 364 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); 365 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", 366 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); 367 368 nat_mask = 1UL << ia64_rse_slot_num(addr); 369 if (nat) 370 rnats |= nat_mask; 371 else 372 rnats &= ~nat_mask; 373 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); 374 375 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); 376 } 377 378 379 static void 380 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) 381 { 382 struct switch_stack *sw = (struct switch_stack *) regs - 1; 383 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; 384 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; 385 unsigned long rnats, nat_mask; 386 unsigned long on_kbs; 387 long sof = (regs->cr_ifs) & 0x7f; 388 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); 389 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; 390 long ridx = r1 - 32; 391 392 if (ridx >= sof) { 393 /* read of out-of-frame register returns an undefined value; 0 in our case. */ 394 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); 395 goto fail; 396 } 397 398 if (ridx < sor) 399 ridx = rotate_reg(sor, rrb_gr, ridx); 400 401 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", 402 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); 403 404 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); 405 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); 406 if (addr >= kbs) { 407 /* the register is on the kernel backing store: easy... */ 408 *val = *addr; 409 if (nat) { 410 rnat_addr = ia64_rse_rnat_addr(addr); 411 if ((unsigned long) rnat_addr >= sw->ar_bspstore) 412 rnat_addr = &sw->ar_rnat; 413 nat_mask = 1UL << ia64_rse_slot_num(addr); 414 *nat = (*rnat_addr & nat_mask) != 0; 415 } 416 return; 417 } 418 419 if (!user_stack(current, regs)) { 420 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); 421 goto fail; 422 } 423 424 bspstore = (unsigned long *)regs->ar_bspstore; 425 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); 426 bsp = ia64_rse_skip_regs(ubs_end, -sof); 427 addr = ia64_rse_skip_regs(bsp, ridx); 428 429 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); 430 431 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); 432 433 if (nat) { 434 rnat_addr = ia64_rse_rnat_addr(addr); 435 nat_mask = 1UL << ia64_rse_slot_num(addr); 436 437 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); 438 439 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); 440 *nat = (rnats & nat_mask) != 0; 441 } 442 return; 443 444 fail: 445 *val = 0; 446 if (nat) 447 *nat = 0; 448 return; 449 } 450 451 452 static void 453 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) 454 { 455 struct switch_stack *sw = (struct switch_stack *) regs - 1; 456 unsigned long addr; 457 unsigned long bitmask; 458 unsigned long *unat; 459 460 /* 461 * First takes care of stacked registers 462 */ 463 if (regnum >= IA64_FIRST_STACKED_GR) { 464 set_rse_reg(regs, regnum, val, nat); 465 return; 466 } 467 468 /* 469 * Using r0 as a target raises a General Exception fault which has higher priority 470 * than the Unaligned Reference fault. 471 */ 472 473 /* 474 * Now look at registers in [0-31] range and init correct UNAT 475 */ 476 if (GR_IN_SW(regnum)) { 477 addr = (unsigned long)sw; 478 unat = &sw->ar_unat; 479 } else { 480 addr = (unsigned long)regs; 481 unat = &sw->caller_unat; 482 } 483 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", 484 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); 485 /* 486 * add offset from base of struct 487 * and do it ! 488 */ 489 addr += GR_OFFS(regnum); 490 491 *(unsigned long *)addr = val; 492 493 /* 494 * We need to clear the corresponding UNAT bit to fully emulate the load 495 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 496 */ 497 bitmask = 1UL << (addr >> 3 & 0x3f); 498 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); 499 if (nat) { 500 *unat |= bitmask; 501 } else { 502 *unat &= ~bitmask; 503 } 504 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); 505 } 506 507 /* 508 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the 509 * range from 32-127, result is in the range from 0-95. 510 */ 511 static inline unsigned long 512 fph_index (struct pt_regs *regs, long regnum) 513 { 514 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; 515 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); 516 } 517 518 static void 519 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) 520 { 521 struct switch_stack *sw = (struct switch_stack *)regs - 1; 522 unsigned long addr; 523 524 /* 525 * From EAS-2.5: FPDisableFault has higher priority than Unaligned 526 * Fault. Thus, when we get here, we know the partition is enabled. 527 * To update f32-f127, there are three choices: 528 * 529 * (1) save f32-f127 to thread.fph and update the values there 530 * (2) use a gigantic switch statement to directly access the registers 531 * (3) generate code on the fly to update the desired register 532 * 533 * For now, we are using approach (1). 534 */ 535 if (regnum >= IA64_FIRST_ROTATING_FR) { 536 ia64_sync_fph(current); 537 current->thread.fph[fph_index(regs, regnum)] = *fpval; 538 } else { 539 /* 540 * pt_regs or switch_stack ? 541 */ 542 if (FR_IN_SW(regnum)) { 543 addr = (unsigned long)sw; 544 } else { 545 addr = (unsigned long)regs; 546 } 547 548 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); 549 550 addr += FR_OFFS(regnum); 551 *(struct ia64_fpreg *)addr = *fpval; 552 553 /* 554 * mark the low partition as being used now 555 * 556 * It is highly unlikely that this bit is not already set, but 557 * let's do it for safety. 558 */ 559 regs->cr_ipsr |= IA64_PSR_MFL; 560 } 561 } 562 563 /* 564 * Those 2 inline functions generate the spilled versions of the constant floating point 565 * registers which can be used with stfX 566 */ 567 static inline void 568 float_spill_f0 (struct ia64_fpreg *final) 569 { 570 ia64_stf_spill(final, 0); 571 } 572 573 static inline void 574 float_spill_f1 (struct ia64_fpreg *final) 575 { 576 ia64_stf_spill(final, 1); 577 } 578 579 static void 580 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) 581 { 582 struct switch_stack *sw = (struct switch_stack *) regs - 1; 583 unsigned long addr; 584 585 /* 586 * From EAS-2.5: FPDisableFault has higher priority than 587 * Unaligned Fault. Thus, when we get here, we know the partition is 588 * enabled. 589 * 590 * When regnum > 31, the register is still live and we need to force a save 591 * to current->thread.fph to get access to it. See discussion in setfpreg() 592 * for reasons and other ways of doing this. 593 */ 594 if (regnum >= IA64_FIRST_ROTATING_FR) { 595 ia64_flush_fph(current); 596 *fpval = current->thread.fph[fph_index(regs, regnum)]; 597 } else { 598 /* 599 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus 600 * not saved, we must generate their spilled form on the fly 601 */ 602 switch(regnum) { 603 case 0: 604 float_spill_f0(fpval); 605 break; 606 case 1: 607 float_spill_f1(fpval); 608 break; 609 default: 610 /* 611 * pt_regs or switch_stack ? 612 */ 613 addr = FR_IN_SW(regnum) ? (unsigned long)sw 614 : (unsigned long)regs; 615 616 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", 617 FR_IN_SW(regnum), addr, FR_OFFS(regnum)); 618 619 addr += FR_OFFS(regnum); 620 *fpval = *(struct ia64_fpreg *)addr; 621 } 622 } 623 } 624 625 626 static void 627 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) 628 { 629 struct switch_stack *sw = (struct switch_stack *) regs - 1; 630 unsigned long addr, *unat; 631 632 if (regnum >= IA64_FIRST_STACKED_GR) { 633 get_rse_reg(regs, regnum, val, nat); 634 return; 635 } 636 637 /* 638 * take care of r0 (read-only always evaluate to 0) 639 */ 640 if (regnum == 0) { 641 *val = 0; 642 if (nat) 643 *nat = 0; 644 return; 645 } 646 647 /* 648 * Now look at registers in [0-31] range and init correct UNAT 649 */ 650 if (GR_IN_SW(regnum)) { 651 addr = (unsigned long)sw; 652 unat = &sw->ar_unat; 653 } else { 654 addr = (unsigned long)regs; 655 unat = &sw->caller_unat; 656 } 657 658 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); 659 660 addr += GR_OFFS(regnum); 661 662 *val = *(unsigned long *)addr; 663 664 /* 665 * do it only when requested 666 */ 667 if (nat) 668 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; 669 } 670 671 static void 672 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) 673 { 674 /* 675 * IMPORTANT: 676 * Given the way we handle unaligned speculative loads, we should 677 * not get to this point in the code but we keep this sanity check, 678 * just in case. 679 */ 680 if (ld.x6_op == 1 || ld.x6_op == 3) { 681 printk(KERN_ERR "%s: register update on speculative load, error\n", __func__); 682 if (die_if_kernel("unaligned reference on speculative load with register update\n", 683 regs, 30)) 684 return; 685 } 686 687 688 /* 689 * at this point, we know that the base register to update is valid i.e., 690 * it's not r0 691 */ 692 if (type == UPD_IMMEDIATE) { 693 unsigned long imm; 694 695 /* 696 * Load +Imm: ldXZ r1=[r3],imm(9) 697 * 698 * 699 * form imm9: [13:19] contain the first 7 bits 700 */ 701 imm = ld.x << 7 | ld.imm; 702 703 /* 704 * sign extend (1+8bits) if m set 705 */ 706 if (ld.m) imm |= SIGN_EXT9; 707 708 /* 709 * ifa == r3 and we know that the NaT bit on r3 was clear so 710 * we can directly use ifa. 711 */ 712 ifa += imm; 713 714 setreg(ld.r3, ifa, 0, regs); 715 716 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); 717 718 } else if (ld.m) { 719 unsigned long r2; 720 int nat_r2; 721 722 /* 723 * Load +Reg Opcode: ldXZ r1=[r3],r2 724 * 725 * Note: that we update r3 even in the case of ldfX.a 726 * (where the load does not happen) 727 * 728 * The way the load algorithm works, we know that r3 does not 729 * have its NaT bit set (would have gotten NaT consumption 730 * before getting the unaligned fault). So we can use ifa 731 * which equals r3 at this point. 732 * 733 * IMPORTANT: 734 * The above statement holds ONLY because we know that we 735 * never reach this code when trying to do a ldX.s. 736 * If we ever make it to here on an ldfX.s then 737 */ 738 getreg(ld.imm, &r2, &nat_r2, regs); 739 740 ifa += r2; 741 742 /* 743 * propagate Nat r2 -> r3 744 */ 745 setreg(ld.r3, ifa, nat_r2, regs); 746 747 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); 748 } 749 } 750 751 752 static int 753 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) 754 { 755 unsigned int len = 1 << ld.x6_sz; 756 unsigned long val = 0; 757 758 /* 759 * r0, as target, doesn't need to be checked because Illegal Instruction 760 * faults have higher priority than unaligned faults. 761 * 762 * r0 cannot be found as the base as it would never generate an 763 * unaligned reference. 764 */ 765 766 /* 767 * ldX.a we will emulate load and also invalidate the ALAT entry. 768 * See comment below for explanation on how we handle ldX.a 769 */ 770 771 if (len != 2 && len != 4 && len != 8) { 772 DPRINT("unknown size: x6=%d\n", ld.x6_sz); 773 return -1; 774 } 775 /* this assumes little-endian byte-order: */ 776 if (copy_from_user(&val, (void __user *) ifa, len)) 777 return -1; 778 setreg(ld.r1, val, 0, regs); 779 780 /* 781 * check for updates on any kind of loads 782 */ 783 if (ld.op == 0x5 || ld.m) 784 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); 785 786 /* 787 * handling of various loads (based on EAS2.4): 788 * 789 * ldX.acq (ordered load): 790 * - acquire semantics would have been used, so force fence instead. 791 * 792 * ldX.c.clr (check load and clear): 793 * - if we get to this handler, it's because the entry was not in the ALAT. 794 * Therefore the operation reverts to a normal load 795 * 796 * ldX.c.nc (check load no clear): 797 * - same as previous one 798 * 799 * ldX.c.clr.acq (ordered check load and clear): 800 * - same as above for c.clr part. The load needs to have acquire semantics. So 801 * we use the fence semantics which is stronger and thus ensures correctness. 802 * 803 * ldX.a (advanced load): 804 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the 805 * address doesn't match requested size alignment. This means that we would 806 * possibly need more than one load to get the result. 807 * 808 * The load part can be handled just like a normal load, however the difficult 809 * part is to get the right thing into the ALAT. The critical piece of information 810 * in the base address of the load & size. To do that, a ld.a must be executed, 811 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now 812 * if we use the same target register, we will be okay for the check.a instruction. 813 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry 814 * which would overlap within [r3,r3+X] (the size of the load was store in the 815 * ALAT). If such an entry is found the entry is invalidated. But this is not good 816 * enough, take the following example: 817 * r3=3 818 * ld4.a r1=[r3] 819 * 820 * Could be emulated by doing: 821 * ld1.a r1=[r3],1 822 * store to temporary; 823 * ld1.a r1=[r3],1 824 * store & shift to temporary; 825 * ld1.a r1=[r3],1 826 * store & shift to temporary; 827 * ld1.a r1=[r3] 828 * store & shift to temporary; 829 * r1=temporary 830 * 831 * So in this case, you would get the right value is r1 but the wrong info in 832 * the ALAT. Notice that you could do it in reverse to finish with address 3 833 * but you would still get the size wrong. To get the size right, one needs to 834 * execute exactly the same kind of load. You could do it from a aligned 835 * temporary location, but you would get the address wrong. 836 * 837 * So no matter what, it is not possible to emulate an advanced load 838 * correctly. But is that really critical ? 839 * 840 * We will always convert ld.a into a normal load with ALAT invalidated. This 841 * will enable compiler to do optimization where certain code path after ld.a 842 * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. 843 * 844 * If there is a store after the advanced load, one must either do a ld.c.* or 845 * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no 846 * entry found in ALAT), and that's perfectly ok because: 847 * 848 * - ld.c.*, if the entry is not present a normal load is executed 849 * - chk.a.*, if the entry is not present, execution jumps to recovery code 850 * 851 * In either case, the load can be potentially retried in another form. 852 * 853 * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick 854 * up a stale entry later). The register base update MUST also be performed. 855 */ 856 857 /* 858 * when the load has the .acq completer then 859 * use ordering fence. 860 */ 861 if (ld.x6_op == 0x5 || ld.x6_op == 0xa) 862 mb(); 863 864 /* 865 * invalidate ALAT entry in case of advanced load 866 */ 867 if (ld.x6_op == 0x2) 868 invala_gr(ld.r1); 869 870 return 0; 871 } 872 873 static int 874 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) 875 { 876 unsigned long r2; 877 unsigned int len = 1 << ld.x6_sz; 878 879 /* 880 * if we get to this handler, Nat bits on both r3 and r2 have already 881 * been checked. so we don't need to do it 882 * 883 * extract the value to be stored 884 */ 885 getreg(ld.imm, &r2, NULL, regs); 886 887 /* 888 * we rely on the macros in unaligned.h for now i.e., 889 * we let the compiler figure out how to read memory gracefully. 890 * 891 * We need this switch/case because the way the inline function 892 * works. The code is optimized by the compiler and looks like 893 * a single switch/case. 894 */ 895 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); 896 897 if (len != 2 && len != 4 && len != 8) { 898 DPRINT("unknown size: x6=%d\n", ld.x6_sz); 899 return -1; 900 } 901 902 /* this assumes little-endian byte-order: */ 903 if (copy_to_user((void __user *) ifa, &r2, len)) 904 return -1; 905 906 /* 907 * stX [r3]=r2,imm(9) 908 * 909 * NOTE: 910 * ld.r3 can never be r0, because r0 would not generate an 911 * unaligned access. 912 */ 913 if (ld.op == 0x5) { 914 unsigned long imm; 915 916 /* 917 * form imm9: [12:6] contain first 7bits 918 */ 919 imm = ld.x << 7 | ld.r1; 920 /* 921 * sign extend (8bits) if m set 922 */ 923 if (ld.m) imm |= SIGN_EXT9; 924 /* 925 * ifa == r3 (NaT is necessarily cleared) 926 */ 927 ifa += imm; 928 929 DPRINT("imm=%lx r3=%lx\n", imm, ifa); 930 931 setreg(ld.r3, ifa, 0, regs); 932 } 933 /* 934 * we don't have alat_invalidate_multiple() so we need 935 * to do the complete flush :-<< 936 */ 937 ia64_invala(); 938 939 /* 940 * stX.rel: use fence instead of release 941 */ 942 if (ld.x6_op == 0xd) 943 mb(); 944 945 return 0; 946 } 947 948 /* 949 * floating point operations sizes in bytes 950 */ 951 static const unsigned char float_fsz[4]={ 952 10, /* extended precision (e) */ 953 8, /* integer (8) */ 954 4, /* single precision (s) */ 955 8 /* double precision (d) */ 956 }; 957 958 static inline void 959 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) 960 { 961 ia64_ldfe(6, init); 962 ia64_stop(); 963 ia64_stf_spill(final, 6); 964 } 965 966 static inline void 967 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) 968 { 969 ia64_ldf8(6, init); 970 ia64_stop(); 971 ia64_stf_spill(final, 6); 972 } 973 974 static inline void 975 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) 976 { 977 ia64_ldfs(6, init); 978 ia64_stop(); 979 ia64_stf_spill(final, 6); 980 } 981 982 static inline void 983 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) 984 { 985 ia64_ldfd(6, init); 986 ia64_stop(); 987 ia64_stf_spill(final, 6); 988 } 989 990 static inline void 991 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) 992 { 993 ia64_ldf_fill(6, init); 994 ia64_stop(); 995 ia64_stfe(final, 6); 996 } 997 998 static inline void 999 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) 1000 { 1001 ia64_ldf_fill(6, init); 1002 ia64_stop(); 1003 ia64_stf8(final, 6); 1004 } 1005 1006 static inline void 1007 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) 1008 { 1009 ia64_ldf_fill(6, init); 1010 ia64_stop(); 1011 ia64_stfs(final, 6); 1012 } 1013 1014 static inline void 1015 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) 1016 { 1017 ia64_ldf_fill(6, init); 1018 ia64_stop(); 1019 ia64_stfd(final, 6); 1020 } 1021 1022 static int 1023 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs) 1024 { 1025 struct ia64_fpreg fpr_init[2]; 1026 struct ia64_fpreg fpr_final[2]; 1027 unsigned long len = float_fsz[ld.x6_sz]; 1028 1029 /* 1030 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have 1031 * higher priority than unaligned faults. 1032 * 1033 * r0 cannot be found as the base as it would never generate an unaligned 1034 * reference. 1035 */ 1036 1037 /* 1038 * make sure we get clean buffers 1039 */ 1040 memset(&fpr_init, 0, sizeof(fpr_init)); 1041 memset(&fpr_final, 0, sizeof(fpr_final)); 1042 1043 /* 1044 * ldfpX.a: we don't try to emulate anything but we must 1045 * invalidate the ALAT entry and execute updates, if any. 1046 */ 1047 if (ld.x6_op != 0x2) { 1048 /* 1049 * This assumes little-endian byte-order. Note that there is no "ldfpe" 1050 * instruction: 1051 */ 1052 if (copy_from_user(&fpr_init[0], (void __user *) ifa, len) 1053 || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len)) 1054 return -1; 1055 1056 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); 1057 DDUMP("frp_init =", &fpr_init, 2*len); 1058 /* 1059 * XXX fixme 1060 * Could optimize inlines by using ldfpX & 2 spills 1061 */ 1062 switch( ld.x6_sz ) { 1063 case 0: 1064 mem2float_extended(&fpr_init[0], &fpr_final[0]); 1065 mem2float_extended(&fpr_init[1], &fpr_final[1]); 1066 break; 1067 case 1: 1068 mem2float_integer(&fpr_init[0], &fpr_final[0]); 1069 mem2float_integer(&fpr_init[1], &fpr_final[1]); 1070 break; 1071 case 2: 1072 mem2float_single(&fpr_init[0], &fpr_final[0]); 1073 mem2float_single(&fpr_init[1], &fpr_final[1]); 1074 break; 1075 case 3: 1076 mem2float_double(&fpr_init[0], &fpr_final[0]); 1077 mem2float_double(&fpr_init[1], &fpr_final[1]); 1078 break; 1079 } 1080 DDUMP("fpr_final =", &fpr_final, 2*len); 1081 /* 1082 * XXX fixme 1083 * 1084 * A possible optimization would be to drop fpr_final and directly 1085 * use the storage from the saved context i.e., the actual final 1086 * destination (pt_regs, switch_stack or thread structure). 1087 */ 1088 setfpreg(ld.r1, &fpr_final[0], regs); 1089 setfpreg(ld.imm, &fpr_final[1], regs); 1090 } 1091 1092 /* 1093 * Check for updates: only immediate updates are available for this 1094 * instruction. 1095 */ 1096 if (ld.m) { 1097 /* 1098 * the immediate is implicit given the ldsz of the operation: 1099 * single: 8 (2x4) and for all others it's 16 (2x8) 1100 */ 1101 ifa += len<<1; 1102 1103 /* 1104 * IMPORTANT: 1105 * the fact that we force the NaT of r3 to zero is ONLY valid 1106 * as long as we don't come here with a ldfpX.s. 1107 * For this reason we keep this sanity check 1108 */ 1109 if (ld.x6_op == 1 || ld.x6_op == 3) 1110 printk(KERN_ERR "%s: register update on speculative load pair, error\n", 1111 __func__); 1112 1113 setreg(ld.r3, ifa, 0, regs); 1114 } 1115 1116 /* 1117 * Invalidate ALAT entries, if any, for both registers. 1118 */ 1119 if (ld.x6_op == 0x2) { 1120 invala_fr(ld.r1); 1121 invala_fr(ld.imm); 1122 } 1123 return 0; 1124 } 1125 1126 1127 static int 1128 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) 1129 { 1130 struct ia64_fpreg fpr_init; 1131 struct ia64_fpreg fpr_final; 1132 unsigned long len = float_fsz[ld.x6_sz]; 1133 1134 /* 1135 * fr0 & fr1 don't need to be checked because Illegal Instruction 1136 * faults have higher priority than unaligned faults. 1137 * 1138 * r0 cannot be found as the base as it would never generate an 1139 * unaligned reference. 1140 */ 1141 1142 /* 1143 * make sure we get clean buffers 1144 */ 1145 memset(&fpr_init,0, sizeof(fpr_init)); 1146 memset(&fpr_final,0, sizeof(fpr_final)); 1147 1148 /* 1149 * ldfX.a we don't try to emulate anything but we must 1150 * invalidate the ALAT entry. 1151 * See comments in ldX for descriptions on how the various loads are handled. 1152 */ 1153 if (ld.x6_op != 0x2) { 1154 if (copy_from_user(&fpr_init, (void __user *) ifa, len)) 1155 return -1; 1156 1157 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); 1158 DDUMP("fpr_init =", &fpr_init, len); 1159 /* 1160 * we only do something for x6_op={0,8,9} 1161 */ 1162 switch( ld.x6_sz ) { 1163 case 0: 1164 mem2float_extended(&fpr_init, &fpr_final); 1165 break; 1166 case 1: 1167 mem2float_integer(&fpr_init, &fpr_final); 1168 break; 1169 case 2: 1170 mem2float_single(&fpr_init, &fpr_final); 1171 break; 1172 case 3: 1173 mem2float_double(&fpr_init, &fpr_final); 1174 break; 1175 } 1176 DDUMP("fpr_final =", &fpr_final, len); 1177 /* 1178 * XXX fixme 1179 * 1180 * A possible optimization would be to drop fpr_final and directly 1181 * use the storage from the saved context i.e., the actual final 1182 * destination (pt_regs, switch_stack or thread structure). 1183 */ 1184 setfpreg(ld.r1, &fpr_final, regs); 1185 } 1186 1187 /* 1188 * check for updates on any loads 1189 */ 1190 if (ld.op == 0x7 || ld.m) 1191 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); 1192 1193 /* 1194 * invalidate ALAT entry in case of advanced floating point loads 1195 */ 1196 if (ld.x6_op == 0x2) 1197 invala_fr(ld.r1); 1198 1199 return 0; 1200 } 1201 1202 1203 static int 1204 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) 1205 { 1206 struct ia64_fpreg fpr_init; 1207 struct ia64_fpreg fpr_final; 1208 unsigned long len = float_fsz[ld.x6_sz]; 1209 1210 /* 1211 * make sure we get clean buffers 1212 */ 1213 memset(&fpr_init,0, sizeof(fpr_init)); 1214 memset(&fpr_final,0, sizeof(fpr_final)); 1215 1216 /* 1217 * if we get to this handler, Nat bits on both r3 and r2 have already 1218 * been checked. so we don't need to do it 1219 * 1220 * extract the value to be stored 1221 */ 1222 getfpreg(ld.imm, &fpr_init, regs); 1223 /* 1224 * during this step, we extract the spilled registers from the saved 1225 * context i.e., we refill. Then we store (no spill) to temporary 1226 * aligned location 1227 */ 1228 switch( ld.x6_sz ) { 1229 case 0: 1230 float2mem_extended(&fpr_init, &fpr_final); 1231 break; 1232 case 1: 1233 float2mem_integer(&fpr_init, &fpr_final); 1234 break; 1235 case 2: 1236 float2mem_single(&fpr_init, &fpr_final); 1237 break; 1238 case 3: 1239 float2mem_double(&fpr_init, &fpr_final); 1240 break; 1241 } 1242 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); 1243 DDUMP("fpr_init =", &fpr_init, len); 1244 DDUMP("fpr_final =", &fpr_final, len); 1245 1246 if (copy_to_user((void __user *) ifa, &fpr_final, len)) 1247 return -1; 1248 1249 /* 1250 * stfX [r3]=r2,imm(9) 1251 * 1252 * NOTE: 1253 * ld.r3 can never be r0, because r0 would not generate an 1254 * unaligned access. 1255 */ 1256 if (ld.op == 0x7) { 1257 unsigned long imm; 1258 1259 /* 1260 * form imm9: [12:6] contain first 7bits 1261 */ 1262 imm = ld.x << 7 | ld.r1; 1263 /* 1264 * sign extend (8bits) if m set 1265 */ 1266 if (ld.m) 1267 imm |= SIGN_EXT9; 1268 /* 1269 * ifa == r3 (NaT is necessarily cleared) 1270 */ 1271 ifa += imm; 1272 1273 DPRINT("imm=%lx r3=%lx\n", imm, ifa); 1274 1275 setreg(ld.r3, ifa, 0, regs); 1276 } 1277 /* 1278 * we don't have alat_invalidate_multiple() so we need 1279 * to do the complete flush :-<< 1280 */ 1281 ia64_invala(); 1282 1283 return 0; 1284 } 1285 1286 /* 1287 * Make sure we log the unaligned access, so that user/sysadmin can notice it and 1288 * eventually fix the program. However, we don't want to do that for every access so we 1289 * pace it with jiffies. 1290 */ 1291 static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5); 1292 1293 void 1294 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) 1295 { 1296 struct ia64_psr *ipsr = ia64_psr(regs); 1297 mm_segment_t old_fs = get_fs(); 1298 unsigned long bundle[2]; 1299 unsigned long opcode; 1300 struct siginfo si; 1301 const struct exception_table_entry *eh = NULL; 1302 union { 1303 unsigned long l; 1304 load_store_t insn; 1305 } u; 1306 int ret = -1; 1307 1308 if (ia64_psr(regs)->be) { 1309 /* we don't support big-endian accesses */ 1310 if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0)) 1311 return; 1312 goto force_sigbus; 1313 } 1314 1315 /* 1316 * Treat kernel accesses for which there is an exception handler entry the same as 1317 * user-level unaligned accesses. Otherwise, a clever program could trick this 1318 * handler into reading an arbitrary kernel addresses... 1319 */ 1320 if (!user_mode(regs)) 1321 eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); 1322 if (user_mode(regs) || eh) { 1323 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) 1324 goto force_sigbus; 1325 1326 if (!no_unaligned_warning && 1327 !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && 1328 __ratelimit(&logging_rate_limit)) 1329 { 1330 char buf[200]; /* comm[] is at most 16 bytes... */ 1331 size_t len; 1332 1333 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " 1334 "ip=0x%016lx\n\r", current->comm, 1335 task_pid_nr(current), 1336 ifa, regs->cr_iip + ipsr->ri); 1337 /* 1338 * Don't call tty_write_message() if we're in the kernel; we might 1339 * be holding locks... 1340 */ 1341 if (user_mode(regs)) { 1342 struct tty_struct *tty = get_current_tty(); 1343 tty_write_message(tty, buf); 1344 tty_kref_put(tty); 1345 } 1346 buf[len-1] = '\0'; /* drop '\r' */ 1347 /* watch for command names containing %s */ 1348 printk(KERN_WARNING "%s", buf); 1349 } else { 1350 if (no_unaligned_warning) { 1351 printk_once(KERN_WARNING "%s(%d) encountered an " 1352 "unaligned exception which required\n" 1353 "kernel assistance, which degrades " 1354 "the performance of the application.\n" 1355 "Unaligned exception warnings have " 1356 "been disabled by the system " 1357 "administrator\n" 1358 "echo 0 > /proc/sys/kernel/ignore-" 1359 "unaligned-usertrap to re-enable\n", 1360 current->comm, task_pid_nr(current)); 1361 } 1362 } 1363 } else { 1364 if (__ratelimit(&logging_rate_limit)) { 1365 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", 1366 ifa, regs->cr_iip + ipsr->ri); 1367 if (unaligned_dump_stack) 1368 dump_stack(); 1369 } 1370 set_fs(KERNEL_DS); 1371 } 1372 1373 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", 1374 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); 1375 1376 if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16)) 1377 goto failure; 1378 1379 /* 1380 * extract the instruction from the bundle given the slot number 1381 */ 1382 switch (ipsr->ri) { 1383 default: 1384 case 0: u.l = (bundle[0] >> 5); break; 1385 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; 1386 case 2: u.l = (bundle[1] >> 23); break; 1387 } 1388 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; 1389 1390 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " 1391 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, 1392 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); 1393 1394 /* 1395 * IMPORTANT: 1396 * Notice that the switch statement DOES not cover all possible instructions 1397 * that DO generate unaligned references. This is made on purpose because for some 1398 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it 1399 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., 1400 * the program will get a signal and die: 1401 * 1402 * load/store: 1403 * - ldX.spill 1404 * - stX.spill 1405 * Reason: RNATs are based on addresses 1406 * - ld16 1407 * - st16 1408 * Reason: ld16 and st16 are supposed to occur in a single 1409 * memory op 1410 * 1411 * synchronization: 1412 * - cmpxchg 1413 * - fetchadd 1414 * - xchg 1415 * Reason: ATOMIC operations cannot be emulated properly using multiple 1416 * instructions. 1417 * 1418 * speculative loads: 1419 * - ldX.sZ 1420 * Reason: side effects, code must be ready to deal with failure so simpler 1421 * to let the load fail. 1422 * --------------------------------------------------------------------------------- 1423 * XXX fixme 1424 * 1425 * I would like to get rid of this switch case and do something 1426 * more elegant. 1427 */ 1428 switch (opcode) { 1429 case LDS_OP: 1430 case LDSA_OP: 1431 if (u.insn.x) 1432 /* oops, really a semaphore op (cmpxchg, etc) */ 1433 goto failure; 1434 /* no break */ 1435 case LDS_IMM_OP: 1436 case LDSA_IMM_OP: 1437 case LDFS_OP: 1438 case LDFSA_OP: 1439 case LDFS_IMM_OP: 1440 /* 1441 * The instruction will be retried with deferred exceptions turned on, and 1442 * we should get Nat bit installed 1443 * 1444 * IMPORTANT: When PSR_ED is set, the register & immediate update forms 1445 * are actually executed even though the operation failed. So we don't 1446 * need to take care of this. 1447 */ 1448 DPRINT("forcing PSR_ED\n"); 1449 regs->cr_ipsr |= IA64_PSR_ED; 1450 goto done; 1451 1452 case LD_OP: 1453 case LDA_OP: 1454 case LDBIAS_OP: 1455 case LDACQ_OP: 1456 case LDCCLR_OP: 1457 case LDCNC_OP: 1458 case LDCCLRACQ_OP: 1459 if (u.insn.x) 1460 /* oops, really a semaphore op (cmpxchg, etc) */ 1461 goto failure; 1462 /* no break */ 1463 case LD_IMM_OP: 1464 case LDA_IMM_OP: 1465 case LDBIAS_IMM_OP: 1466 case LDACQ_IMM_OP: 1467 case LDCCLR_IMM_OP: 1468 case LDCNC_IMM_OP: 1469 case LDCCLRACQ_IMM_OP: 1470 ret = emulate_load_int(ifa, u.insn, regs); 1471 break; 1472 1473 case ST_OP: 1474 case STREL_OP: 1475 if (u.insn.x) 1476 /* oops, really a semaphore op (cmpxchg, etc) */ 1477 goto failure; 1478 /* no break */ 1479 case ST_IMM_OP: 1480 case STREL_IMM_OP: 1481 ret = emulate_store_int(ifa, u.insn, regs); 1482 break; 1483 1484 case LDF_OP: 1485 case LDFA_OP: 1486 case LDFCCLR_OP: 1487 case LDFCNC_OP: 1488 if (u.insn.x) 1489 ret = emulate_load_floatpair(ifa, u.insn, regs); 1490 else 1491 ret = emulate_load_float(ifa, u.insn, regs); 1492 break; 1493 1494 case LDF_IMM_OP: 1495 case LDFA_IMM_OP: 1496 case LDFCCLR_IMM_OP: 1497 case LDFCNC_IMM_OP: 1498 ret = emulate_load_float(ifa, u.insn, regs); 1499 break; 1500 1501 case STF_OP: 1502 case STF_IMM_OP: 1503 ret = emulate_store_float(ifa, u.insn, regs); 1504 break; 1505 1506 default: 1507 goto failure; 1508 } 1509 DPRINT("ret=%d\n", ret); 1510 if (ret) 1511 goto failure; 1512 1513 if (ipsr->ri == 2) 1514 /* 1515 * given today's architecture this case is not likely to happen because a 1516 * memory access instruction (M) can never be in the last slot of a 1517 * bundle. But let's keep it for now. 1518 */ 1519 regs->cr_iip += 16; 1520 ipsr->ri = (ipsr->ri + 1) & 0x3; 1521 1522 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); 1523 done: 1524 set_fs(old_fs); /* restore original address limit */ 1525 return; 1526 1527 failure: 1528 /* something went wrong... */ 1529 if (!user_mode(regs)) { 1530 if (eh) { 1531 ia64_handle_exception(regs, eh); 1532 goto done; 1533 } 1534 if (die_if_kernel("error during unaligned kernel access\n", regs, ret)) 1535 return; 1536 /* NOT_REACHED */ 1537 } 1538 force_sigbus: 1539 si.si_signo = SIGBUS; 1540 si.si_errno = 0; 1541 si.si_code = BUS_ADRALN; 1542 si.si_addr = (void __user *) ifa; 1543 si.si_flags = 0; 1544 si.si_isr = 0; 1545 si.si_imm = 0; 1546 force_sig_info(SIGBUS, &si, current); 1547 goto done; 1548 } 1549