1 /* 2 * Machine specific setup for xen 3 * 4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/pm.h> 11 #include <linux/memblock.h> 12 #include <linux/cpuidle.h> 13 #include <linux/cpufreq.h> 14 15 #include <asm/elf.h> 16 #include <asm/vdso.h> 17 #include <asm/e820.h> 18 #include <asm/setup.h> 19 #include <asm/acpi.h> 20 #include <asm/numa.h> 21 #include <asm/xen/hypervisor.h> 22 #include <asm/xen/hypercall.h> 23 24 #include <xen/xen.h> 25 #include <xen/page.h> 26 #include <xen/interface/callback.h> 27 #include <xen/interface/memory.h> 28 #include <xen/interface/physdev.h> 29 #include <xen/features.h> 30 #include "xen-ops.h" 31 #include "vdso.h" 32 #include "p2m.h" 33 34 /* These are code, but not functions. Defined in entry.S */ 35 extern const char xen_hypervisor_callback[]; 36 extern const char xen_failsafe_callback[]; 37 #ifdef CONFIG_X86_64 38 extern asmlinkage void nmi(void); 39 #endif 40 extern void xen_sysenter_target(void); 41 extern void xen_syscall_target(void); 42 extern void xen_syscall32_target(void); 43 44 /* Amount of extra memory space we add to the e820 ranges */ 45 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 46 47 /* Number of pages released from the initial allocation. */ 48 unsigned long xen_released_pages; 49 50 /* Buffer used to remap identity mapped pages */ 51 unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; 52 53 /* 54 * The maximum amount of extra memory compared to the base size. The 55 * main scaling factor is the size of struct page. At extreme ratios 56 * of base:extra, all the base memory can be filled with page 57 * structures for the extra memory, leaving no space for anything 58 * else. 59 * 60 * 10x seems like a reasonable balance between scaling flexibility and 61 * leaving a practically usable system. 62 */ 63 #define EXTRA_MEM_RATIO (10) 64 65 static void __init xen_add_extra_mem(u64 start, u64 size) 66 { 67 unsigned long pfn; 68 int i; 69 70 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 71 /* Add new region. */ 72 if (xen_extra_mem[i].size == 0) { 73 xen_extra_mem[i].start = start; 74 xen_extra_mem[i].size = size; 75 break; 76 } 77 /* Append to existing region. */ 78 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { 79 xen_extra_mem[i].size += size; 80 break; 81 } 82 } 83 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 84 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 85 86 memblock_reserve(start, size); 87 88 xen_max_p2m_pfn = PFN_DOWN(start + size); 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 90 unsigned long mfn = pfn_to_mfn(pfn); 91 92 if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) 93 continue; 94 WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", 95 pfn, mfn); 96 97 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 98 } 99 } 100 101 static unsigned long __init xen_do_chunk(unsigned long start, 102 unsigned long end, bool release) 103 { 104 struct xen_memory_reservation reservation = { 105 .address_bits = 0, 106 .extent_order = 0, 107 .domid = DOMID_SELF 108 }; 109 unsigned long len = 0; 110 unsigned long pfn; 111 int ret; 112 113 for (pfn = start; pfn < end; pfn++) { 114 unsigned long frame; 115 unsigned long mfn = pfn_to_mfn(pfn); 116 117 if (release) { 118 /* Make sure pfn exists to start with */ 119 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 120 continue; 121 frame = mfn; 122 } else { 123 if (mfn != INVALID_P2M_ENTRY) 124 continue; 125 frame = pfn; 126 } 127 set_xen_guest_handle(reservation.extent_start, &frame); 128 reservation.nr_extents = 1; 129 130 ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, 131 &reservation); 132 WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", 133 release ? "release" : "populate", pfn, ret); 134 135 if (ret == 1) { 136 if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { 137 if (release) 138 break; 139 set_xen_guest_handle(reservation.extent_start, &frame); 140 reservation.nr_extents = 1; 141 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 142 &reservation); 143 break; 144 } 145 len++; 146 } else 147 break; 148 } 149 if (len) 150 printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", 151 release ? "Freeing" : "Populating", 152 start, end, len, 153 release ? "freed" : "added"); 154 155 return len; 156 } 157 158 /* 159 * Finds the next RAM pfn available in the E820 map after min_pfn. 160 * This function updates min_pfn with the pfn found and returns 161 * the size of that range or zero if not found. 162 */ 163 static unsigned long __init xen_find_pfn_range( 164 const struct e820entry *list, size_t map_size, 165 unsigned long *min_pfn) 166 { 167 const struct e820entry *entry; 168 unsigned int i; 169 unsigned long done = 0; 170 171 for (i = 0, entry = list; i < map_size; i++, entry++) { 172 unsigned long s_pfn; 173 unsigned long e_pfn; 174 175 if (entry->type != E820_RAM) 176 continue; 177 178 e_pfn = PFN_DOWN(entry->addr + entry->size); 179 180 /* We only care about E820 after this */ 181 if (e_pfn < *min_pfn) 182 continue; 183 184 s_pfn = PFN_UP(entry->addr); 185 186 /* If min_pfn falls within the E820 entry, we want to start 187 * at the min_pfn PFN. 188 */ 189 if (s_pfn <= *min_pfn) { 190 done = e_pfn - *min_pfn; 191 } else { 192 done = e_pfn - s_pfn; 193 *min_pfn = s_pfn; 194 } 195 break; 196 } 197 198 return done; 199 } 200 201 /* 202 * This releases a chunk of memory and then does the identity map. It's used as 203 * as a fallback if the remapping fails. 204 */ 205 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, 206 unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, 207 unsigned long *released) 208 { 209 WARN_ON(start_pfn > end_pfn); 210 211 /* Need to release pages first */ 212 *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); 213 *identity += set_phys_range_identity(start_pfn, end_pfn); 214 } 215 216 /* 217 * Helper function to update both the p2m and m2p tables. 218 */ 219 static unsigned long __init xen_update_mem_tables(unsigned long pfn, 220 unsigned long mfn) 221 { 222 struct mmu_update update = { 223 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 224 .val = pfn 225 }; 226 227 /* Update p2m */ 228 if (!early_set_phys_to_machine(pfn, mfn)) { 229 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", 230 pfn, mfn); 231 return false; 232 } 233 234 /* Update m2p */ 235 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { 236 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", 237 mfn, pfn); 238 return false; 239 } 240 241 return true; 242 } 243 244 /* 245 * This function updates the p2m and m2p tables with an identity map from 246 * start_pfn to start_pfn+size and remaps the underlying RAM of the original 247 * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks 248 * to not exhaust the reserved brk space. Doing it in properly aligned blocks 249 * ensures we only allocate the minimum required leaf pages in the p2m table. It 250 * copies the existing mfns from the p2m table under the 1:1 map, overwrites 251 * them with the identity map and then updates the p2m and m2p tables with the 252 * remapped memory. 253 */ 254 static unsigned long __init xen_do_set_identity_and_remap_chunk( 255 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) 256 { 257 unsigned long ident_pfn_iter, remap_pfn_iter; 258 unsigned long ident_start_pfn_align, remap_start_pfn_align; 259 unsigned long ident_end_pfn_align, remap_end_pfn_align; 260 unsigned long ident_boundary_pfn, remap_boundary_pfn; 261 unsigned long ident_cnt = 0; 262 unsigned long remap_cnt = 0; 263 unsigned long left = size; 264 unsigned long mod; 265 int i; 266 267 WARN_ON(size == 0); 268 269 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); 270 271 /* 272 * Determine the proper alignment to remap memory in P2M_PER_PAGE sized 273 * blocks. We need to keep track of both the existing pfn mapping and 274 * the new pfn remapping. 275 */ 276 mod = start_pfn % P2M_PER_PAGE; 277 ident_start_pfn_align = 278 mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn; 279 mod = remap_pfn % P2M_PER_PAGE; 280 remap_start_pfn_align = 281 mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn; 282 mod = (start_pfn + size) % P2M_PER_PAGE; 283 ident_end_pfn_align = start_pfn + size - mod; 284 mod = (remap_pfn + size) % P2M_PER_PAGE; 285 remap_end_pfn_align = remap_pfn + size - mod; 286 287 /* Iterate over each p2m leaf node in each range */ 288 for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; 289 ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; 290 ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { 291 /* Check we aren't past the end */ 292 BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size); 293 BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size); 294 295 /* Save p2m mappings */ 296 for (i = 0; i < P2M_PER_PAGE; i++) 297 xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); 298 299 /* Set identity map which will free a p2m leaf */ 300 ident_cnt += set_phys_range_identity(ident_pfn_iter, 301 ident_pfn_iter + P2M_PER_PAGE); 302 303 #ifdef DEBUG 304 /* Helps verify a p2m leaf has been freed */ 305 for (i = 0; i < P2M_PER_PAGE; i++) { 306 unsigned int pfn = ident_pfn_iter + i; 307 BUG_ON(pfn_to_mfn(pfn) != pfn); 308 } 309 #endif 310 /* Now remap memory */ 311 for (i = 0; i < P2M_PER_PAGE; i++) { 312 unsigned long mfn = xen_remap_buf[i]; 313 314 /* This will use the p2m leaf freed above */ 315 if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) { 316 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", 317 remap_pfn_iter + i, mfn); 318 return 0; 319 } 320 321 remap_cnt++; 322 } 323 324 left -= P2M_PER_PAGE; 325 } 326 327 /* Max boundary space possible */ 328 BUG_ON(left > (P2M_PER_PAGE - 1) * 2); 329 330 /* Now handle the boundary conditions */ 331 ident_boundary_pfn = start_pfn; 332 remap_boundary_pfn = remap_pfn; 333 for (i = 0; i < left; i++) { 334 unsigned long mfn; 335 336 /* These two checks move from the start to end boundaries */ 337 if (ident_boundary_pfn == ident_start_pfn_align) 338 ident_boundary_pfn = ident_pfn_iter; 339 if (remap_boundary_pfn == remap_start_pfn_align) 340 remap_boundary_pfn = remap_pfn_iter; 341 342 /* Check we aren't past the end */ 343 BUG_ON(ident_boundary_pfn >= start_pfn + size); 344 BUG_ON(remap_boundary_pfn >= remap_pfn + size); 345 346 mfn = pfn_to_mfn(ident_boundary_pfn); 347 348 if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) { 349 WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n", 350 remap_pfn_iter + i, mfn); 351 return 0; 352 } 353 remap_cnt++; 354 355 ident_boundary_pfn++; 356 remap_boundary_pfn++; 357 } 358 359 /* Finish up the identity map */ 360 if (ident_start_pfn_align >= ident_end_pfn_align) { 361 /* 362 * In this case we have an identity range which does not span an 363 * aligned block so everything needs to be identity mapped here. 364 * If we didn't check this we might remap too many pages since 365 * the align boundaries are not meaningful in this case. 366 */ 367 ident_cnt += set_phys_range_identity(start_pfn, 368 start_pfn + size); 369 } else { 370 /* Remapped above so check each end of the chunk */ 371 if (start_pfn < ident_start_pfn_align) 372 ident_cnt += set_phys_range_identity(start_pfn, 373 ident_start_pfn_align); 374 if (start_pfn + size > ident_pfn_iter) 375 ident_cnt += set_phys_range_identity(ident_pfn_iter, 376 start_pfn + size); 377 } 378 379 BUG_ON(ident_cnt != size); 380 BUG_ON(remap_cnt != size); 381 382 return size; 383 } 384 385 /* 386 * This function takes a contiguous pfn range that needs to be identity mapped 387 * and: 388 * 389 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. 390 * 2) Calls the do_ function to actually do the mapping/remapping work. 391 * 392 * The goal is to not allocate additional memory but to remap the existing 393 * pages. In the case of an error the underlying memory is simply released back 394 * to Xen and not remapped. 395 */ 396 static unsigned long __init xen_set_identity_and_remap_chunk( 397 const struct e820entry *list, size_t map_size, unsigned long start_pfn, 398 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, 399 unsigned long *identity, unsigned long *remapped, 400 unsigned long *released) 401 { 402 unsigned long pfn; 403 unsigned long i = 0; 404 unsigned long n = end_pfn - start_pfn; 405 406 while (i < n) { 407 unsigned long cur_pfn = start_pfn + i; 408 unsigned long left = n - i; 409 unsigned long size = left; 410 unsigned long remap_range_size; 411 412 /* Do not remap pages beyond the current allocation */ 413 if (cur_pfn >= nr_pages) { 414 /* Identity map remaining pages */ 415 *identity += set_phys_range_identity(cur_pfn, 416 cur_pfn + size); 417 break; 418 } 419 if (cur_pfn + size > nr_pages) 420 size = nr_pages - cur_pfn; 421 422 remap_range_size = xen_find_pfn_range(list, map_size, 423 &remap_pfn); 424 if (!remap_range_size) { 425 pr_warning("Unable to find available pfn range, not remapping identity pages\n"); 426 xen_set_identity_and_release_chunk(cur_pfn, 427 cur_pfn + left, nr_pages, identity, released); 428 break; 429 } 430 /* Adjust size to fit in current e820 RAM region */ 431 if (size > remap_range_size) 432 size = remap_range_size; 433 434 if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { 435 WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n", 436 cur_pfn, size, remap_pfn); 437 xen_set_identity_and_release_chunk(cur_pfn, 438 cur_pfn + left, nr_pages, identity, released); 439 break; 440 } 441 442 /* Update variables to reflect new mappings. */ 443 i += size; 444 remap_pfn += size; 445 *identity += size; 446 *remapped += size; 447 } 448 449 /* 450 * If the PFNs are currently mapped, the VA mapping also needs 451 * to be updated to be 1:1. 452 */ 453 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 454 (void)HYPERVISOR_update_va_mapping( 455 (unsigned long)__va(pfn << PAGE_SHIFT), 456 mfn_pte(pfn, PAGE_KERNEL_IO), 0); 457 458 return remap_pfn; 459 } 460 461 static unsigned long __init xen_set_identity_and_remap( 462 const struct e820entry *list, size_t map_size, unsigned long nr_pages, 463 unsigned long *released) 464 { 465 phys_addr_t start = 0; 466 unsigned long identity = 0; 467 unsigned long remapped = 0; 468 unsigned long last_pfn = nr_pages; 469 const struct e820entry *entry; 470 unsigned long num_released = 0; 471 int i; 472 473 /* 474 * Combine non-RAM regions and gaps until a RAM region (or the 475 * end of the map) is reached, then set the 1:1 map and 476 * remap the memory in those non-RAM regions. 477 * 478 * The combined non-RAM regions are rounded to a whole number 479 * of pages so any partial pages are accessible via the 1:1 480 * mapping. This is needed for some BIOSes that put (for 481 * example) the DMI tables in a reserved region that begins on 482 * a non-page boundary. 483 */ 484 for (i = 0, entry = list; i < map_size; i++, entry++) { 485 phys_addr_t end = entry->addr + entry->size; 486 if (entry->type == E820_RAM || i == map_size - 1) { 487 unsigned long start_pfn = PFN_DOWN(start); 488 unsigned long end_pfn = PFN_UP(end); 489 490 if (entry->type == E820_RAM) 491 end_pfn = PFN_UP(entry->addr); 492 493 if (start_pfn < end_pfn) 494 last_pfn = xen_set_identity_and_remap_chunk( 495 list, map_size, start_pfn, 496 end_pfn, nr_pages, last_pfn, 497 &identity, &remapped, 498 &num_released); 499 start = end; 500 } 501 } 502 503 *released = num_released; 504 505 pr_info("Set %ld page(s) to 1-1 mapping\n", identity); 506 pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped, 507 last_pfn); 508 pr_info("Released %ld page(s)\n", num_released); 509 510 return last_pfn; 511 } 512 static unsigned long __init xen_get_max_pages(void) 513 { 514 unsigned long max_pages = MAX_DOMAIN_PAGES; 515 domid_t domid = DOMID_SELF; 516 int ret; 517 518 /* 519 * For the initial domain we use the maximum reservation as 520 * the maximum page. 521 * 522 * For guest domains the current maximum reservation reflects 523 * the current maximum rather than the static maximum. In this 524 * case the e820 map provided to us will cover the static 525 * maximum region. 526 */ 527 if (xen_initial_domain()) { 528 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 529 if (ret > 0) 530 max_pages = ret; 531 } 532 533 return min(max_pages, MAX_DOMAIN_PAGES); 534 } 535 536 static void xen_align_and_add_e820_region(u64 start, u64 size, int type) 537 { 538 u64 end = start + size; 539 540 /* Align RAM regions to page boundaries. */ 541 if (type == E820_RAM) { 542 start = PAGE_ALIGN(start); 543 end &= ~((u64)PAGE_SIZE - 1); 544 } 545 546 e820_add_region(start, end - start, type); 547 } 548 549 void xen_ignore_unusable(struct e820entry *list, size_t map_size) 550 { 551 struct e820entry *entry; 552 unsigned int i; 553 554 for (i = 0, entry = list; i < map_size; i++, entry++) { 555 if (entry->type == E820_UNUSABLE) 556 entry->type = E820_RAM; 557 } 558 } 559 560 /** 561 * machine_specific_memory_setup - Hook for machine specific memory setup. 562 **/ 563 char * __init xen_memory_setup(void) 564 { 565 static struct e820entry map[E820MAX] __initdata; 566 567 unsigned long max_pfn = xen_start_info->nr_pages; 568 unsigned long long mem_end; 569 int rc; 570 struct xen_memory_map memmap; 571 unsigned long max_pages; 572 unsigned long last_pfn = 0; 573 unsigned long extra_pages = 0; 574 int i; 575 int op; 576 577 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 578 mem_end = PFN_PHYS(max_pfn); 579 580 memmap.nr_entries = E820MAX; 581 set_xen_guest_handle(memmap.buffer, map); 582 583 op = xen_initial_domain() ? 584 XENMEM_machine_memory_map : 585 XENMEM_memory_map; 586 rc = HYPERVISOR_memory_op(op, &memmap); 587 if (rc == -ENOSYS) { 588 BUG_ON(xen_initial_domain()); 589 memmap.nr_entries = 1; 590 map[0].addr = 0ULL; 591 map[0].size = mem_end; 592 /* 8MB slack (to balance backend allocations). */ 593 map[0].size += 8ULL << 20; 594 map[0].type = E820_RAM; 595 rc = 0; 596 } 597 BUG_ON(rc); 598 BUG_ON(memmap.nr_entries == 0); 599 600 /* 601 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 602 * regions, so if we're using the machine memory map leave the 603 * region as RAM as it is in the pseudo-physical map. 604 * 605 * UNUSABLE regions in domUs are not handled and will need 606 * a patch in the future. 607 */ 608 if (xen_initial_domain()) 609 xen_ignore_unusable(map, memmap.nr_entries); 610 611 /* Make sure the Xen-supplied memory map is well-ordered. */ 612 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); 613 614 max_pages = xen_get_max_pages(); 615 if (max_pages > max_pfn) 616 extra_pages += max_pages - max_pfn; 617 618 /* 619 * Set identity map on non-RAM pages and remap the underlying RAM. 620 */ 621 last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, 622 &xen_released_pages); 623 624 extra_pages += xen_released_pages; 625 626 if (last_pfn > max_pfn) { 627 max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); 628 mem_end = PFN_PHYS(max_pfn); 629 } 630 /* 631 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO 632 * factor the base size. On non-highmem systems, the base 633 * size is the full initial memory allocation; on highmem it 634 * is limited to the max size of lowmem, so that it doesn't 635 * get completely filled. 636 * 637 * In principle there could be a problem in lowmem systems if 638 * the initial memory is also very large with respect to 639 * lowmem, but we won't try to deal with that here. 640 */ 641 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), 642 extra_pages); 643 i = 0; 644 while (i < memmap.nr_entries) { 645 u64 addr = map[i].addr; 646 u64 size = map[i].size; 647 u32 type = map[i].type; 648 649 if (type == E820_RAM) { 650 if (addr < mem_end) { 651 size = min(size, mem_end - addr); 652 } else if (extra_pages) { 653 size = min(size, (u64)extra_pages * PAGE_SIZE); 654 extra_pages -= size / PAGE_SIZE; 655 xen_add_extra_mem(addr, size); 656 } else 657 type = E820_UNUSABLE; 658 } 659 660 xen_align_and_add_e820_region(addr, size, type); 661 662 map[i].addr += size; 663 map[i].size -= size; 664 if (map[i].size == 0) 665 i++; 666 } 667 668 /* 669 * Set the rest as identity mapped, in case PCI BARs are 670 * located here. 671 * 672 * PFNs above MAX_P2M_PFN are considered identity mapped as 673 * well. 674 */ 675 set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); 676 677 /* 678 * In domU, the ISA region is normal, usable memory, but we 679 * reserve ISA memory anyway because too many things poke 680 * about in there. 681 */ 682 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 683 E820_RESERVED); 684 685 /* 686 * Reserve Xen bits: 687 * - mfn_list 688 * - xen_start_info 689 * See comment above "struct start_info" in <xen/interface/xen.h> 690 * We tried to make the the memblock_reserve more selective so 691 * that it would be clear what region is reserved. Sadly we ran 692 * in the problem wherein on a 64-bit hypervisor with a 32-bit 693 * initial domain, the pt_base has the cr3 value which is not 694 * neccessarily where the pagetable starts! As Jan put it: " 695 * Actually, the adjustment turns out to be correct: The page 696 * tables for a 32-on-64 dom0 get allocated in the order "first L1", 697 * "first L2", "first L3", so the offset to the page table base is 698 * indeed 2. When reading xen/include/public/xen.h's comment 699 * very strictly, this is not a violation (since there nothing is said 700 * that the first thing in the page table space is pointed to by 701 * pt_base; I admit that this seems to be implied though, namely 702 * do I think that it is implied that the page table space is the 703 * range [pt_base, pt_base + nt_pt_frames), whereas that 704 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), 705 * which - without a priori knowledge - the kernel would have 706 * difficulty to figure out)." - so lets just fall back to the 707 * easy way and reserve the whole region. 708 */ 709 memblock_reserve(__pa(xen_start_info->mfn_list), 710 xen_start_info->pt_base - xen_start_info->mfn_list); 711 712 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 713 714 return "Xen"; 715 } 716 717 /* 718 * Machine specific memory setup for auto-translated guests. 719 */ 720 char * __init xen_auto_xlated_memory_setup(void) 721 { 722 static struct e820entry map[E820MAX] __initdata; 723 724 struct xen_memory_map memmap; 725 int i; 726 int rc; 727 728 memmap.nr_entries = E820MAX; 729 set_xen_guest_handle(memmap.buffer, map); 730 731 rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 732 if (rc < 0) 733 panic("No memory map (%d)\n", rc); 734 735 sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries); 736 737 for (i = 0; i < memmap.nr_entries; i++) 738 e820_add_region(map[i].addr, map[i].size, map[i].type); 739 740 memblock_reserve(__pa(xen_start_info->mfn_list), 741 xen_start_info->pt_base - xen_start_info->mfn_list); 742 743 return "Xen"; 744 } 745 746 /* 747 * Set the bit indicating "nosegneg" library variants should be used. 748 * We only need to bother in pure 32-bit mode; compat 32-bit processes 749 * can have un-truncated segments, so wrapping around is allowed. 750 */ 751 static void __init fiddle_vdso(void) 752 { 753 #ifdef CONFIG_X86_32 754 /* 755 * This could be called before selected_vdso32 is initialized, so 756 * just fiddle with both possible images. vdso_image_32_syscall 757 * can't be selected, since it only exists on 64-bit systems. 758 */ 759 u32 *mask; 760 mask = vdso_image_32_int80.data + 761 vdso_image_32_int80.sym_VDSO32_NOTE_MASK; 762 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 763 mask = vdso_image_32_sysenter.data + 764 vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; 765 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 766 #endif 767 } 768 769 static int register_callback(unsigned type, const void *func) 770 { 771 struct callback_register callback = { 772 .type = type, 773 .address = XEN_CALLBACK(__KERNEL_CS, func), 774 .flags = CALLBACKF_mask_events, 775 }; 776 777 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 778 } 779 780 void xen_enable_sysenter(void) 781 { 782 int ret; 783 unsigned sysenter_feature; 784 785 #ifdef CONFIG_X86_32 786 sysenter_feature = X86_FEATURE_SEP; 787 #else 788 sysenter_feature = X86_FEATURE_SYSENTER32; 789 #endif 790 791 if (!boot_cpu_has(sysenter_feature)) 792 return; 793 794 ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); 795 if(ret != 0) 796 setup_clear_cpu_cap(sysenter_feature); 797 } 798 799 void xen_enable_syscall(void) 800 { 801 #ifdef CONFIG_X86_64 802 int ret; 803 804 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 805 if (ret != 0) { 806 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 807 /* Pretty fatal; 64-bit userspace has no other 808 mechanism for syscalls. */ 809 } 810 811 if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { 812 ret = register_callback(CALLBACKTYPE_syscall32, 813 xen_syscall32_target); 814 if (ret != 0) 815 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 816 } 817 #endif /* CONFIG_X86_64 */ 818 } 819 820 void __init xen_pvmmu_arch_setup(void) 821 { 822 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 823 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 824 825 HYPERVISOR_vm_assist(VMASST_CMD_enable, 826 VMASST_TYPE_pae_extended_cr3); 827 828 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 829 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 830 BUG(); 831 832 xen_enable_sysenter(); 833 xen_enable_syscall(); 834 } 835 836 /* This function is not called for HVM domains */ 837 void __init xen_arch_setup(void) 838 { 839 xen_panic_handler_init(); 840 if (!xen_feature(XENFEAT_auto_translated_physmap)) 841 xen_pvmmu_arch_setup(); 842 843 #ifdef CONFIG_ACPI 844 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 845 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 846 disable_acpi(); 847 } 848 #endif 849 850 memcpy(boot_command_line, xen_start_info->cmd_line, 851 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 852 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 853 854 /* Set up idle, making sure it calls safe_halt() pvop */ 855 disable_cpuidle(); 856 disable_cpufreq(); 857 WARN_ON(xen_set_default_idle()); 858 fiddle_vdso(); 859 #ifdef CONFIG_NUMA 860 numa_off = 1; 861 #endif 862 } 863