1 /* 2 * S390 kdump implementation 3 * 4 * Copyright IBM Corp. 2011 5 * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> 6 */ 7 8 #include <linux/crash_dump.h> 9 #include <asm/lowcore.h> 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <linux/gfp.h> 13 #include <linux/slab.h> 14 #include <linux/bootmem.h> 15 #include <linux/elf.h> 16 #include <linux/memblock.h> 17 #include <asm/os_info.h> 18 #include <asm/elf.h> 19 #include <asm/ipl.h> 20 #include <asm/sclp.h> 21 22 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) 23 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) 24 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) 25 26 static struct memblock_region oldmem_region; 27 28 static struct memblock_type oldmem_type = { 29 .cnt = 1, 30 .max = 1, 31 .total_size = 0, 32 .regions = &oldmem_region, 33 }; 34 35 #define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ 36 for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ 37 &oldmem_type, p_start, \ 38 p_end, p_nid); \ 39 i != (u64)ULLONG_MAX; \ 40 __next_mem_range(&i, nid, &memblock.physmem, \ 41 &oldmem_type, \ 42 p_start, p_end, p_nid)) 43 44 struct dump_save_areas dump_save_areas; 45 46 /* 47 * Allocate and add a save area for a CPU 48 */ 49 struct save_area *dump_save_area_create(int cpu) 50 { 51 struct save_area **save_areas, *save_area; 52 53 save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); 54 if (!save_area) 55 return NULL; 56 if (cpu + 1 > dump_save_areas.count) { 57 dump_save_areas.count = cpu + 1; 58 save_areas = krealloc(dump_save_areas.areas, 59 dump_save_areas.count * sizeof(void *), 60 GFP_KERNEL | __GFP_ZERO); 61 if (!save_areas) { 62 kfree(save_area); 63 return NULL; 64 } 65 dump_save_areas.areas = save_areas; 66 } 67 dump_save_areas.areas[cpu] = save_area; 68 return save_area; 69 } 70 71 /* 72 * Return physical address for virtual address 73 */ 74 static inline void *load_real_addr(void *addr) 75 { 76 unsigned long real_addr; 77 78 asm volatile( 79 " lra %0,0(%1)\n" 80 " jz 0f\n" 81 " la %0,0\n" 82 "0:" 83 : "=a" (real_addr) : "a" (addr) : "cc"); 84 return (void *)real_addr; 85 } 86 87 /* 88 * Copy real to virtual or real memory 89 */ 90 static int copy_from_realmem(void *dest, void *src, size_t count) 91 { 92 unsigned long size; 93 94 if (!count) 95 return 0; 96 if (!is_vmalloc_or_module_addr(dest)) 97 return memcpy_real(dest, src, count); 98 do { 99 size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); 100 if (memcpy_real(load_real_addr(dest), src, size)) 101 return -EFAULT; 102 count -= size; 103 dest += size; 104 src += size; 105 } while (count); 106 return 0; 107 } 108 109 /* 110 * Pointer to ELF header in new kernel 111 */ 112 static void *elfcorehdr_newmem; 113 114 /* 115 * Copy one page from zfcpdump "oldmem" 116 * 117 * For pages below HSA size memory from the HSA is copied. Otherwise 118 * real memory copy is used. 119 */ 120 static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, 121 unsigned long src, int userbuf) 122 { 123 int rc; 124 125 if (src < sclp_get_hsa_size()) { 126 rc = memcpy_hsa(buf, src, csize, userbuf); 127 } else { 128 if (userbuf) 129 rc = copy_to_user_real((void __force __user *) buf, 130 (void *) src, csize); 131 else 132 rc = memcpy_real(buf, (void *) src, csize); 133 } 134 return rc ? rc : csize; 135 } 136 137 /* 138 * Copy one page from kdump "oldmem" 139 * 140 * For the kdump reserved memory this functions performs a swap operation: 141 * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. 142 * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] 143 */ 144 static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, 145 unsigned long src, int userbuf) 146 147 { 148 int rc; 149 150 if (src < OLDMEM_SIZE) 151 src += OLDMEM_BASE; 152 else if (src > OLDMEM_BASE && 153 src < OLDMEM_BASE + OLDMEM_SIZE) 154 src -= OLDMEM_BASE; 155 if (userbuf) 156 rc = copy_to_user_real((void __force __user *) buf, 157 (void *) src, csize); 158 else 159 rc = copy_from_realmem(buf, (void *) src, csize); 160 return (rc == 0) ? rc : csize; 161 } 162 163 /* 164 * Copy one page from "oldmem" 165 */ 166 ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, 167 unsigned long offset, int userbuf) 168 { 169 unsigned long src; 170 171 if (!csize) 172 return 0; 173 src = (pfn << PAGE_SHIFT) + offset; 174 if (OLDMEM_BASE) 175 return copy_oldmem_page_kdump(buf, csize, src, userbuf); 176 else 177 return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); 178 } 179 180 /* 181 * Remap "oldmem" for kdump 182 * 183 * For the kdump reserved memory this functions performs a swap operation: 184 * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] 185 */ 186 static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, 187 unsigned long from, unsigned long pfn, 188 unsigned long size, pgprot_t prot) 189 { 190 unsigned long size_old; 191 int rc; 192 193 if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { 194 size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); 195 rc = remap_pfn_range(vma, from, 196 pfn + (OLDMEM_BASE >> PAGE_SHIFT), 197 size_old, prot); 198 if (rc || size == size_old) 199 return rc; 200 size -= size_old; 201 from += size_old; 202 pfn += size_old >> PAGE_SHIFT; 203 } 204 return remap_pfn_range(vma, from, pfn, size, prot); 205 } 206 207 /* 208 * Remap "oldmem" for zfcpdump 209 * 210 * We only map available memory above HSA size. Memory below HSA size 211 * is read on demand using the copy_oldmem_page() function. 212 */ 213 static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, 214 unsigned long from, 215 unsigned long pfn, 216 unsigned long size, pgprot_t prot) 217 { 218 unsigned long hsa_end = sclp_get_hsa_size(); 219 unsigned long size_hsa; 220 221 if (pfn < hsa_end >> PAGE_SHIFT) { 222 size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); 223 if (size == size_hsa) 224 return 0; 225 size -= size_hsa; 226 from += size_hsa; 227 pfn += size_hsa >> PAGE_SHIFT; 228 } 229 return remap_pfn_range(vma, from, pfn, size, prot); 230 } 231 232 /* 233 * Remap "oldmem" for kdump or zfcpdump 234 */ 235 int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, 236 unsigned long pfn, unsigned long size, pgprot_t prot) 237 { 238 if (OLDMEM_BASE) 239 return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); 240 else 241 return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, 242 prot); 243 } 244 245 /* 246 * Copy memory from old kernel 247 */ 248 int copy_from_oldmem(void *dest, void *src, size_t count) 249 { 250 unsigned long copied = 0; 251 int rc; 252 253 if (OLDMEM_BASE) { 254 if ((unsigned long) src < OLDMEM_SIZE) { 255 copied = min(count, OLDMEM_SIZE - (unsigned long) src); 256 rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); 257 if (rc) 258 return rc; 259 } 260 } else { 261 unsigned long hsa_end = sclp_get_hsa_size(); 262 if ((unsigned long) src < hsa_end) { 263 copied = min(count, hsa_end - (unsigned long) src); 264 rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); 265 if (rc) 266 return rc; 267 } 268 } 269 return copy_from_realmem(dest + copied, src + copied, count - copied); 270 } 271 272 /* 273 * Alloc memory and panic in case of ENOMEM 274 */ 275 static void *kzalloc_panic(int len) 276 { 277 void *rc; 278 279 rc = kzalloc(len, GFP_KERNEL); 280 if (!rc) 281 panic("s390 kdump kzalloc (%d) failed", len); 282 return rc; 283 } 284 285 /* 286 * Initialize ELF note 287 */ 288 static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, 289 const char *name) 290 { 291 Elf64_Nhdr *note; 292 u64 len; 293 294 note = (Elf64_Nhdr *)buf; 295 note->n_namesz = strlen(name) + 1; 296 note->n_descsz = d_len; 297 note->n_type = type; 298 len = sizeof(Elf64_Nhdr); 299 300 memcpy(buf + len, name, note->n_namesz); 301 len = roundup(len + note->n_namesz, 4); 302 303 memcpy(buf + len, desc, note->n_descsz); 304 len = roundup(len + note->n_descsz, 4); 305 306 return PTR_ADD(buf, len); 307 } 308 309 /* 310 * Initialize prstatus note 311 */ 312 static void *nt_prstatus(void *ptr, struct save_area *sa) 313 { 314 struct elf_prstatus nt_prstatus; 315 static int cpu_nr = 1; 316 317 memset(&nt_prstatus, 0, sizeof(nt_prstatus)); 318 memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); 319 memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); 320 memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); 321 nt_prstatus.pr_pid = cpu_nr; 322 cpu_nr++; 323 324 return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), 325 "CORE"); 326 } 327 328 /* 329 * Initialize fpregset (floating point) note 330 */ 331 static void *nt_fpregset(void *ptr, struct save_area *sa) 332 { 333 elf_fpregset_t nt_fpregset; 334 335 memset(&nt_fpregset, 0, sizeof(nt_fpregset)); 336 memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); 337 memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); 338 339 return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), 340 "CORE"); 341 } 342 343 /* 344 * Initialize timer note 345 */ 346 static void *nt_s390_timer(void *ptr, struct save_area *sa) 347 { 348 return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), 349 KEXEC_CORE_NOTE_NAME); 350 } 351 352 /* 353 * Initialize TOD clock comparator note 354 */ 355 static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) 356 { 357 return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, 358 sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); 359 } 360 361 /* 362 * Initialize TOD programmable register note 363 */ 364 static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) 365 { 366 return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, 367 sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); 368 } 369 370 /* 371 * Initialize control register note 372 */ 373 static void *nt_s390_ctrs(void *ptr, struct save_area *sa) 374 { 375 return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, 376 sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); 377 } 378 379 /* 380 * Initialize prefix register note 381 */ 382 static void *nt_s390_prefix(void *ptr, struct save_area *sa) 383 { 384 return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, 385 sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); 386 } 387 388 /* 389 * Fill ELF notes for one CPU with save area registers 390 */ 391 void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) 392 { 393 ptr = nt_prstatus(ptr, sa); 394 ptr = nt_fpregset(ptr, sa); 395 ptr = nt_s390_timer(ptr, sa); 396 ptr = nt_s390_tod_cmp(ptr, sa); 397 ptr = nt_s390_tod_preg(ptr, sa); 398 ptr = nt_s390_ctrs(ptr, sa); 399 ptr = nt_s390_prefix(ptr, sa); 400 return ptr; 401 } 402 403 /* 404 * Initialize prpsinfo note (new kernel) 405 */ 406 static void *nt_prpsinfo(void *ptr) 407 { 408 struct elf_prpsinfo prpsinfo; 409 410 memset(&prpsinfo, 0, sizeof(prpsinfo)); 411 prpsinfo.pr_sname = 'R'; 412 strcpy(prpsinfo.pr_fname, "vmlinux"); 413 return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), 414 KEXEC_CORE_NOTE_NAME); 415 } 416 417 /* 418 * Get vmcoreinfo using lowcore->vmcore_info (new kernel) 419 */ 420 static void *get_vmcoreinfo_old(unsigned long *size) 421 { 422 char nt_name[11], *vmcoreinfo; 423 Elf64_Nhdr note; 424 void *addr; 425 426 if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) 427 return NULL; 428 memset(nt_name, 0, sizeof(nt_name)); 429 if (copy_from_oldmem(¬e, addr, sizeof(note))) 430 return NULL; 431 if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) 432 return NULL; 433 if (strcmp(nt_name, "VMCOREINFO") != 0) 434 return NULL; 435 vmcoreinfo = kzalloc_panic(note.n_descsz); 436 if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) 437 return NULL; 438 *size = note.n_descsz; 439 return vmcoreinfo; 440 } 441 442 /* 443 * Initialize vmcoreinfo note (new kernel) 444 */ 445 static void *nt_vmcoreinfo(void *ptr) 446 { 447 unsigned long size; 448 void *vmcoreinfo; 449 450 vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); 451 if (!vmcoreinfo) 452 vmcoreinfo = get_vmcoreinfo_old(&size); 453 if (!vmcoreinfo) 454 return ptr; 455 return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); 456 } 457 458 /* 459 * Initialize ELF header (new kernel) 460 */ 461 static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) 462 { 463 memset(ehdr, 0, sizeof(*ehdr)); 464 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 465 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 466 ehdr->e_ident[EI_DATA] = ELFDATA2MSB; 467 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 468 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 469 ehdr->e_type = ET_CORE; 470 ehdr->e_machine = EM_S390; 471 ehdr->e_version = EV_CURRENT; 472 ehdr->e_phoff = sizeof(Elf64_Ehdr); 473 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 474 ehdr->e_phentsize = sizeof(Elf64_Phdr); 475 ehdr->e_phnum = mem_chunk_cnt + 1; 476 return ehdr + 1; 477 } 478 479 /* 480 * Return CPU count for ELF header (new kernel) 481 */ 482 static int get_cpu_cnt(void) 483 { 484 int i, cpus = 0; 485 486 for (i = 0; i < dump_save_areas.count; i++) { 487 if (dump_save_areas.areas[i]->pref_reg == 0) 488 continue; 489 cpus++; 490 } 491 return cpus; 492 } 493 494 /* 495 * Return memory chunk count for ELF header (new kernel) 496 */ 497 static int get_mem_chunk_cnt(void) 498 { 499 int cnt = 0; 500 u64 idx; 501 502 for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) 503 cnt++; 504 return cnt; 505 } 506 507 /* 508 * Initialize ELF loads (new kernel) 509 */ 510 static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) 511 { 512 phys_addr_t start, end; 513 u64 idx; 514 515 for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { 516 phdr->p_filesz = end - start; 517 phdr->p_type = PT_LOAD; 518 phdr->p_offset = start; 519 phdr->p_vaddr = start; 520 phdr->p_paddr = start; 521 phdr->p_memsz = end - start; 522 phdr->p_flags = PF_R | PF_W | PF_X; 523 phdr->p_align = PAGE_SIZE; 524 phdr++; 525 } 526 } 527 528 /* 529 * Initialize notes (new kernel) 530 */ 531 static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) 532 { 533 struct save_area *sa; 534 void *ptr_start = ptr; 535 int i; 536 537 ptr = nt_prpsinfo(ptr); 538 539 for (i = 0; i < dump_save_areas.count; i++) { 540 sa = dump_save_areas.areas[i]; 541 if (sa->pref_reg == 0) 542 continue; 543 ptr = fill_cpu_elf_notes(ptr, sa); 544 } 545 ptr = nt_vmcoreinfo(ptr); 546 memset(phdr, 0, sizeof(*phdr)); 547 phdr->p_type = PT_NOTE; 548 phdr->p_offset = notes_offset; 549 phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); 550 phdr->p_memsz = phdr->p_filesz; 551 return ptr; 552 } 553 554 /* 555 * Create ELF core header (new kernel) 556 */ 557 int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) 558 { 559 Elf64_Phdr *phdr_notes, *phdr_loads; 560 int mem_chunk_cnt; 561 void *ptr, *hdr; 562 u32 alloc_size; 563 u64 hdr_off; 564 565 /* If we are not in kdump or zfcpdump mode return */ 566 if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) 567 return 0; 568 /* If elfcorehdr= has been passed via cmdline, we use that one */ 569 if (elfcorehdr_addr != ELFCORE_ADDR_MAX) 570 return 0; 571 /* If we cannot get HSA size for zfcpdump return error */ 572 if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) 573 return -ENODEV; 574 575 /* For kdump, exclude previous crashkernel memory */ 576 if (OLDMEM_BASE) { 577 oldmem_region.base = OLDMEM_BASE; 578 oldmem_region.size = OLDMEM_SIZE; 579 oldmem_type.total_size = OLDMEM_SIZE; 580 } 581 582 mem_chunk_cnt = get_mem_chunk_cnt(); 583 584 alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + 585 mem_chunk_cnt * sizeof(Elf64_Phdr); 586 hdr = kzalloc_panic(alloc_size); 587 /* Init elf header */ 588 ptr = ehdr_init(hdr, mem_chunk_cnt); 589 /* Init program headers */ 590 phdr_notes = ptr; 591 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); 592 phdr_loads = ptr; 593 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); 594 /* Init notes */ 595 hdr_off = PTR_DIFF(ptr, hdr); 596 ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); 597 /* Init loads */ 598 hdr_off = PTR_DIFF(ptr, hdr); 599 loads_init(phdr_loads, hdr_off); 600 *addr = (unsigned long long) hdr; 601 elfcorehdr_newmem = hdr; 602 *size = (unsigned long long) hdr_off; 603 BUG_ON(elfcorehdr_size > alloc_size); 604 return 0; 605 } 606 607 /* 608 * Free ELF core header (new kernel) 609 */ 610 void elfcorehdr_free(unsigned long long addr) 611 { 612 if (!elfcorehdr_newmem) 613 return; 614 kfree((void *)(unsigned long)addr); 615 } 616 617 /* 618 * Read from ELF header 619 */ 620 ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) 621 { 622 void *src = (void *)(unsigned long)*ppos; 623 624 src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; 625 memcpy(buf, src, count); 626 *ppos += count; 627 return count; 628 } 629 630 /* 631 * Read from ELF notes data 632 */ 633 ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) 634 { 635 void *src = (void *)(unsigned long)*ppos; 636 int rc; 637 638 if (elfcorehdr_newmem) { 639 memcpy(buf, src, count); 640 } else { 641 rc = copy_from_oldmem(buf, src, count); 642 if (rc) 643 return rc; 644 } 645 *ppos += count; 646 return count; 647 } 648