1 /* align.c - handle alignment exceptions for the Power PC. 2 * 3 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> 4 * Copyright (c) 1998-1999 TiVo, Inc. 5 * PowerPC 403GCX modifications. 6 * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> 7 * PowerPC 403GCX/405GP modifications. 8 * Copyright (c) 2001-2002 PPC64 team, IBM Corp 9 * 64-bit and Power4 support 10 * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp 11 * <benh@kernel.crashing.org> 12 * Merge ppc32 and ppc64 implementations 13 * 14 * This program is free software; you can redistribute it and/or 15 * modify it under the terms of the GNU General Public License 16 * as published by the Free Software Foundation; either version 17 * 2 of the License, or (at your option) any later version. 18 */ 19 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <asm/processor.h> 23 #include <asm/uaccess.h> 24 #include <asm/system.h> 25 #include <asm/cache.h> 26 #include <asm/cputable.h> 27 28 struct aligninfo { 29 unsigned char len; 30 unsigned char flags; 31 }; 32 33 #define IS_XFORM(inst) (((inst) >> 26) == 31) 34 #define IS_DSFORM(inst) (((inst) >> 26) >= 56) 35 36 #define INVALID { 0, 0 } 37 38 /* Bits in the flags field */ 39 #define LD 0 /* load */ 40 #define ST 1 /* store */ 41 #define SE 2 /* sign-extend value */ 42 #define F 4 /* to/from fp regs */ 43 #define U 8 /* update index register */ 44 #define M 0x10 /* multiple load/store */ 45 #define SW 0x20 /* byte swap */ 46 #define S 0x40 /* single-precision fp or... */ 47 #define SX 0x40 /* ... byte count in XER */ 48 #define HARD 0x80 /* string, stwcx. */ 49 50 /* DSISR bits reported for a DCBZ instruction: */ 51 #define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ 52 53 #define SWAP(a, b) (t = (a), (a) = (b), (b) = t) 54 55 /* 56 * The PowerPC stores certain bits of the instruction that caused the 57 * alignment exception in the DSISR register. This array maps those 58 * bits to information about the operand length and what the 59 * instruction would do. 60 */ 61 static struct aligninfo aligninfo[128] = { 62 { 4, LD }, /* 00 0 0000: lwz / lwarx */ 63 INVALID, /* 00 0 0001 */ 64 { 4, ST }, /* 00 0 0010: stw */ 65 INVALID, /* 00 0 0011 */ 66 { 2, LD }, /* 00 0 0100: lhz */ 67 { 2, LD+SE }, /* 00 0 0101: lha */ 68 { 2, ST }, /* 00 0 0110: sth */ 69 { 4, LD+M }, /* 00 0 0111: lmw */ 70 { 4, LD+F+S }, /* 00 0 1000: lfs */ 71 { 8, LD+F }, /* 00 0 1001: lfd */ 72 { 4, ST+F+S }, /* 00 0 1010: stfs */ 73 { 8, ST+F }, /* 00 0 1011: stfd */ 74 INVALID, /* 00 0 1100 */ 75 { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ 76 INVALID, /* 00 0 1110 */ 77 { 8, ST }, /* 00 0 1111: std/stdu */ 78 { 4, LD+U }, /* 00 1 0000: lwzu */ 79 INVALID, /* 00 1 0001 */ 80 { 4, ST+U }, /* 00 1 0010: stwu */ 81 INVALID, /* 00 1 0011 */ 82 { 2, LD+U }, /* 00 1 0100: lhzu */ 83 { 2, LD+SE+U }, /* 00 1 0101: lhau */ 84 { 2, ST+U }, /* 00 1 0110: sthu */ 85 { 4, ST+M }, /* 00 1 0111: stmw */ 86 { 4, LD+F+S+U }, /* 00 1 1000: lfsu */ 87 { 8, LD+F+U }, /* 00 1 1001: lfdu */ 88 { 4, ST+F+S+U }, /* 00 1 1010: stfsu */ 89 { 8, ST+F+U }, /* 00 1 1011: stfdu */ 90 INVALID, /* 00 1 1100 */ 91 INVALID, /* 00 1 1101 */ 92 INVALID, /* 00 1 1110 */ 93 INVALID, /* 00 1 1111 */ 94 { 8, LD }, /* 01 0 0000: ldx */ 95 INVALID, /* 01 0 0001 */ 96 { 8, ST }, /* 01 0 0010: stdx */ 97 INVALID, /* 01 0 0011 */ 98 INVALID, /* 01 0 0100 */ 99 { 4, LD+SE }, /* 01 0 0101: lwax */ 100 INVALID, /* 01 0 0110 */ 101 INVALID, /* 01 0 0111 */ 102 { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */ 103 { 4, LD+M+HARD }, /* 01 0 1001: lswi */ 104 { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */ 105 { 4, ST+M+HARD }, /* 01 0 1011: stswi */ 106 INVALID, /* 01 0 1100 */ 107 { 8, LD+U }, /* 01 0 1101: ldu */ 108 INVALID, /* 01 0 1110 */ 109 { 8, ST+U }, /* 01 0 1111: stdu */ 110 { 8, LD+U }, /* 01 1 0000: ldux */ 111 INVALID, /* 01 1 0001 */ 112 { 8, ST+U }, /* 01 1 0010: stdux */ 113 INVALID, /* 01 1 0011 */ 114 INVALID, /* 01 1 0100 */ 115 { 4, LD+SE+U }, /* 01 1 0101: lwaux */ 116 INVALID, /* 01 1 0110 */ 117 INVALID, /* 01 1 0111 */ 118 INVALID, /* 01 1 1000 */ 119 INVALID, /* 01 1 1001 */ 120 INVALID, /* 01 1 1010 */ 121 INVALID, /* 01 1 1011 */ 122 INVALID, /* 01 1 1100 */ 123 INVALID, /* 01 1 1101 */ 124 INVALID, /* 01 1 1110 */ 125 INVALID, /* 01 1 1111 */ 126 INVALID, /* 10 0 0000 */ 127 INVALID, /* 10 0 0001 */ 128 INVALID, /* 10 0 0010: stwcx. */ 129 INVALID, /* 10 0 0011 */ 130 INVALID, /* 10 0 0100 */ 131 INVALID, /* 10 0 0101 */ 132 INVALID, /* 10 0 0110 */ 133 INVALID, /* 10 0 0111 */ 134 { 4, LD+SW }, /* 10 0 1000: lwbrx */ 135 INVALID, /* 10 0 1001 */ 136 { 4, ST+SW }, /* 10 0 1010: stwbrx */ 137 INVALID, /* 10 0 1011 */ 138 { 2, LD+SW }, /* 10 0 1100: lhbrx */ 139 { 4, LD+SE }, /* 10 0 1101 lwa */ 140 { 2, ST+SW }, /* 10 0 1110: sthbrx */ 141 INVALID, /* 10 0 1111 */ 142 INVALID, /* 10 1 0000 */ 143 INVALID, /* 10 1 0001 */ 144 INVALID, /* 10 1 0010 */ 145 INVALID, /* 10 1 0011 */ 146 INVALID, /* 10 1 0100 */ 147 INVALID, /* 10 1 0101 */ 148 INVALID, /* 10 1 0110 */ 149 INVALID, /* 10 1 0111 */ 150 INVALID, /* 10 1 1000 */ 151 INVALID, /* 10 1 1001 */ 152 INVALID, /* 10 1 1010 */ 153 INVALID, /* 10 1 1011 */ 154 INVALID, /* 10 1 1100 */ 155 INVALID, /* 10 1 1101 */ 156 INVALID, /* 10 1 1110 */ 157 { 0, ST+HARD }, /* 10 1 1111: dcbz */ 158 { 4, LD }, /* 11 0 0000: lwzx */ 159 INVALID, /* 11 0 0001 */ 160 { 4, ST }, /* 11 0 0010: stwx */ 161 INVALID, /* 11 0 0011 */ 162 { 2, LD }, /* 11 0 0100: lhzx */ 163 { 2, LD+SE }, /* 11 0 0101: lhax */ 164 { 2, ST }, /* 11 0 0110: sthx */ 165 INVALID, /* 11 0 0111 */ 166 { 4, LD+F+S }, /* 11 0 1000: lfsx */ 167 { 8, LD+F }, /* 11 0 1001: lfdx */ 168 { 4, ST+F+S }, /* 11 0 1010: stfsx */ 169 { 8, ST+F }, /* 11 0 1011: stfdx */ 170 INVALID, /* 11 0 1100 */ 171 { 8, LD+M }, /* 11 0 1101: lmd */ 172 INVALID, /* 11 0 1110 */ 173 { 8, ST+M }, /* 11 0 1111: stmd */ 174 { 4, LD+U }, /* 11 1 0000: lwzux */ 175 INVALID, /* 11 1 0001 */ 176 { 4, ST+U }, /* 11 1 0010: stwux */ 177 INVALID, /* 11 1 0011 */ 178 { 2, LD+U }, /* 11 1 0100: lhzux */ 179 { 2, LD+SE+U }, /* 11 1 0101: lhaux */ 180 { 2, ST+U }, /* 11 1 0110: sthux */ 181 INVALID, /* 11 1 0111 */ 182 { 4, LD+F+S+U }, /* 11 1 1000: lfsux */ 183 { 8, LD+F+U }, /* 11 1 1001: lfdux */ 184 { 4, ST+F+S+U }, /* 11 1 1010: stfsux */ 185 { 8, ST+F+U }, /* 11 1 1011: stfdux */ 186 INVALID, /* 11 1 1100 */ 187 INVALID, /* 11 1 1101 */ 188 INVALID, /* 11 1 1110 */ 189 INVALID, /* 11 1 1111 */ 190 }; 191 192 /* 193 * Create a DSISR value from the instruction 194 */ 195 static inline unsigned make_dsisr(unsigned instr) 196 { 197 unsigned dsisr; 198 199 200 /* bits 6:15 --> 22:31 */ 201 dsisr = (instr & 0x03ff0000) >> 16; 202 203 if (IS_XFORM(instr)) { 204 /* bits 29:30 --> 15:16 */ 205 dsisr |= (instr & 0x00000006) << 14; 206 /* bit 25 --> 17 */ 207 dsisr |= (instr & 0x00000040) << 8; 208 /* bits 21:24 --> 18:21 */ 209 dsisr |= (instr & 0x00000780) << 3; 210 } else { 211 /* bit 5 --> 17 */ 212 dsisr |= (instr & 0x04000000) >> 12; 213 /* bits 1: 4 --> 18:21 */ 214 dsisr |= (instr & 0x78000000) >> 17; 215 /* bits 30:31 --> 12:13 */ 216 if (IS_DSFORM(instr)) 217 dsisr |= (instr & 0x00000003) << 18; 218 } 219 220 return dsisr; 221 } 222 223 /* 224 * The dcbz (data cache block zero) instruction 225 * gives an alignment fault if used on non-cacheable 226 * memory. We handle the fault mainly for the 227 * case when we are running with the cache disabled 228 * for debugging. 229 */ 230 static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) 231 { 232 long __user *p; 233 int i, size; 234 235 #ifdef __powerpc64__ 236 size = ppc64_caches.dline_size; 237 #else 238 size = L1_CACHE_BYTES; 239 #endif 240 p = (long __user *) (regs->dar & -size); 241 if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size)) 242 return -EFAULT; 243 for (i = 0; i < size / sizeof(long); ++i) 244 if (__put_user_inatomic(0, p+i)) 245 return -EFAULT; 246 return 1; 247 } 248 249 /* 250 * Emulate load & store multiple instructions 251 * On 64-bit machines, these instructions only affect/use the 252 * bottom 4 bytes of each register, and the loads clear the 253 * top 4 bytes of the affected register. 254 */ 255 #ifdef CONFIG_PPC64 256 #define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4) 257 #else 258 #define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) 259 #endif 260 261 #define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) 262 263 static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, 264 unsigned int reg, unsigned int nb, 265 unsigned int flags, unsigned int instr, 266 unsigned long swiz) 267 { 268 unsigned long *rptr; 269 unsigned int nb0, i, bswiz; 270 unsigned long p; 271 272 /* 273 * We do not try to emulate 8 bytes multiple as they aren't really 274 * available in our operating environments and we don't try to 275 * emulate multiples operations in kernel land as they should never 276 * be used/generated there at least not on unaligned boundaries 277 */ 278 if (unlikely((nb > 4) || !user_mode(regs))) 279 return 0; 280 281 /* lmw, stmw, lswi/x, stswi/x */ 282 nb0 = 0; 283 if (flags & HARD) { 284 if (flags & SX) { 285 nb = regs->xer & 127; 286 if (nb == 0) 287 return 1; 288 } else { 289 unsigned long pc = regs->nip ^ (swiz & 4); 290 291 if (__get_user_inatomic(instr, 292 (unsigned int __user *)pc)) 293 return -EFAULT; 294 if (swiz == 0 && (flags & SW)) 295 instr = cpu_to_le32(instr); 296 nb = (instr >> 11) & 0x1f; 297 if (nb == 0) 298 nb = 32; 299 } 300 if (nb + reg * 4 > 128) { 301 nb0 = nb + reg * 4 - 128; 302 nb = 128 - reg * 4; 303 } 304 } else { 305 /* lwm, stmw */ 306 nb = (32 - reg) * 4; 307 } 308 309 if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0)) 310 return -EFAULT; /* bad address */ 311 312 rptr = ®s->gpr[reg]; 313 p = (unsigned long) addr; 314 bswiz = (flags & SW)? 3: 0; 315 316 if (!(flags & ST)) { 317 /* 318 * This zeroes the top 4 bytes of the affected registers 319 * in 64-bit mode, and also zeroes out any remaining 320 * bytes of the last register for lsw*. 321 */ 322 memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long)); 323 if (nb0 > 0) 324 memset(®s->gpr[0], 0, 325 ((nb0 + 3) / 4) * sizeof(unsigned long)); 326 327 for (i = 0; i < nb; ++i, ++p) 328 if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz), 329 SWIZ_PTR(p))) 330 return -EFAULT; 331 if (nb0 > 0) { 332 rptr = ®s->gpr[0]; 333 addr += nb; 334 for (i = 0; i < nb0; ++i, ++p) 335 if (__get_user_inatomic(REG_BYTE(rptr, 336 i ^ bswiz), 337 SWIZ_PTR(p))) 338 return -EFAULT; 339 } 340 341 } else { 342 for (i = 0; i < nb; ++i, ++p) 343 if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz), 344 SWIZ_PTR(p))) 345 return -EFAULT; 346 if (nb0 > 0) { 347 rptr = ®s->gpr[0]; 348 addr += nb; 349 for (i = 0; i < nb0; ++i, ++p) 350 if (__put_user_inatomic(REG_BYTE(rptr, 351 i ^ bswiz), 352 SWIZ_PTR(p))) 353 return -EFAULT; 354 } 355 } 356 return 1; 357 } 358 359 360 /* 361 * Called on alignment exception. Attempts to fixup 362 * 363 * Return 1 on success 364 * Return 0 if unable to handle the interrupt 365 * Return -EFAULT if data address is bad 366 */ 367 368 int fix_alignment(struct pt_regs *regs) 369 { 370 unsigned int instr, nb, flags; 371 unsigned int reg, areg; 372 unsigned int dsisr; 373 unsigned char __user *addr; 374 unsigned long p, swiz; 375 int ret, t; 376 union { 377 u64 ll; 378 double dd; 379 unsigned char v[8]; 380 struct { 381 unsigned hi32; 382 int low32; 383 } x32; 384 struct { 385 unsigned char hi48[6]; 386 short low16; 387 } x16; 388 } data; 389 390 /* 391 * We require a complete register set, if not, then our assembly 392 * is broken 393 */ 394 CHECK_FULL_REGS(regs); 395 396 dsisr = regs->dsisr; 397 398 /* Some processors don't provide us with a DSISR we can use here, 399 * let's make one up from the instruction 400 */ 401 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { 402 unsigned long pc = regs->nip; 403 404 if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE)) 405 pc ^= 4; 406 if (unlikely(__get_user_inatomic(instr, 407 (unsigned int __user *)pc))) 408 return -EFAULT; 409 if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE)) 410 instr = cpu_to_le32(instr); 411 dsisr = make_dsisr(instr); 412 } 413 414 /* extract the operation and registers from the dsisr */ 415 reg = (dsisr >> 5) & 0x1f; /* source/dest register */ 416 areg = dsisr & 0x1f; /* register to update */ 417 instr = (dsisr >> 10) & 0x7f; 418 instr |= (dsisr >> 13) & 0x60; 419 420 /* Lookup the operation in our table */ 421 nb = aligninfo[instr].len; 422 flags = aligninfo[instr].flags; 423 424 /* Byteswap little endian loads and stores */ 425 swiz = 0; 426 if (regs->msr & MSR_LE) { 427 flags ^= SW; 428 /* 429 * So-called "PowerPC little endian" mode works by 430 * swizzling addresses rather than by actually doing 431 * any byte-swapping. To emulate this, we XOR each 432 * byte address with 7. We also byte-swap, because 433 * the processor's address swizzling depends on the 434 * operand size (it xors the address with 7 for bytes, 435 * 6 for halfwords, 4 for words, 0 for doublewords) but 436 * we will xor with 7 and load/store each byte separately. 437 */ 438 if (cpu_has_feature(CPU_FTR_PPC_LE)) 439 swiz = 7; 440 } 441 442 /* DAR has the operand effective address */ 443 addr = (unsigned char __user *)regs->dar; 444 445 /* A size of 0 indicates an instruction we don't support, with 446 * the exception of DCBZ which is handled as a special case here 447 */ 448 if (instr == DCBZ) 449 return emulate_dcbz(regs, addr); 450 if (unlikely(nb == 0)) 451 return 0; 452 453 /* Load/Store Multiple instructions are handled in their own 454 * function 455 */ 456 if (flags & M) 457 return emulate_multiple(regs, addr, reg, nb, 458 flags, instr, swiz); 459 460 /* Verify the address of the operand */ 461 if (unlikely(user_mode(regs) && 462 !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ), 463 addr, nb))) 464 return -EFAULT; 465 466 /* Force the fprs into the save area so we can reference them */ 467 if (flags & F) { 468 /* userland only */ 469 if (unlikely(!user_mode(regs))) 470 return 0; 471 flush_fp_to_thread(current); 472 } 473 474 /* If we are loading, get the data from user space, else 475 * get it from register values 476 */ 477 if (!(flags & ST)) { 478 data.ll = 0; 479 ret = 0; 480 p = (unsigned long) addr; 481 switch (nb) { 482 case 8: 483 ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++)); 484 ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++)); 485 ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++)); 486 ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++)); 487 case 4: 488 ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++)); 489 ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++)); 490 case 2: 491 ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++)); 492 ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++)); 493 if (unlikely(ret)) 494 return -EFAULT; 495 } 496 } else if (flags & F) { 497 data.dd = current->thread.fpr[reg]; 498 if (flags & S) { 499 /* Single-precision FP store requires conversion... */ 500 #ifdef CONFIG_PPC_FPU 501 preempt_disable(); 502 enable_kernel_fp(); 503 cvt_df(&data.dd, (float *)&data.v[4], ¤t->thread); 504 preempt_enable(); 505 #else 506 return 0; 507 #endif 508 } 509 } else 510 data.ll = regs->gpr[reg]; 511 512 if (flags & SW) { 513 switch (nb) { 514 case 8: 515 SWAP(data.v[0], data.v[7]); 516 SWAP(data.v[1], data.v[6]); 517 SWAP(data.v[2], data.v[5]); 518 SWAP(data.v[3], data.v[4]); 519 break; 520 case 4: 521 SWAP(data.v[4], data.v[7]); 522 SWAP(data.v[5], data.v[6]); 523 break; 524 case 2: 525 SWAP(data.v[6], data.v[7]); 526 break; 527 } 528 } 529 530 /* Perform other misc operations like sign extension 531 * or floating point single precision conversion 532 */ 533 switch (flags & ~(U|SW)) { 534 case LD+SE: /* sign extend */ 535 if ( nb == 2 ) 536 data.ll = data.x16.low16; 537 else /* nb must be 4 */ 538 data.ll = data.x32.low32; 539 break; 540 541 /* Single-precision FP load requires conversion... */ 542 case LD+F+S: 543 #ifdef CONFIG_PPC_FPU 544 preempt_disable(); 545 enable_kernel_fp(); 546 cvt_fd((float *)&data.v[4], &data.dd, ¤t->thread); 547 preempt_enable(); 548 #else 549 return 0; 550 #endif 551 break; 552 } 553 554 /* Store result to memory or update registers */ 555 if (flags & ST) { 556 ret = 0; 557 p = (unsigned long) addr; 558 switch (nb) { 559 case 8: 560 ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++)); 561 ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++)); 562 ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++)); 563 ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++)); 564 case 4: 565 ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++)); 566 ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++)); 567 case 2: 568 ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++)); 569 ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++)); 570 } 571 if (unlikely(ret)) 572 return -EFAULT; 573 } else if (flags & F) 574 current->thread.fpr[reg] = data.dd; 575 else 576 regs->gpr[reg] = data.ll; 577 578 /* Update RA as needed */ 579 if (flags & U) 580 regs->gpr[areg] = regs->dar; 581 582 return 1; 583 } 584