1 /* 2 * Machine specific setup for xen 3 * 4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/pm.h> 11 #include <linux/memblock.h> 12 #include <linux/cpuidle.h> 13 #include <linux/cpufreq.h> 14 15 #include <asm/elf.h> 16 #include <asm/vdso.h> 17 #include <asm/e820.h> 18 #include <asm/setup.h> 19 #include <asm/acpi.h> 20 #include <asm/numa.h> 21 #include <asm/xen/hypervisor.h> 22 #include <asm/xen/hypercall.h> 23 24 #include <xen/xen.h> 25 #include <xen/page.h> 26 #include <xen/interface/callback.h> 27 #include <xen/interface/memory.h> 28 #include <xen/interface/physdev.h> 29 #include <xen/features.h> 30 #include "xen-ops.h" 31 #include "vdso.h" 32 #include "p2m.h" 33 #include "mmu.h" 34 35 /* These are code, but not functions. Defined in entry.S */ 36 extern const char xen_hypervisor_callback[]; 37 extern const char xen_failsafe_callback[]; 38 #ifdef CONFIG_X86_64 39 extern asmlinkage void nmi(void); 40 #endif 41 extern void xen_sysenter_target(void); 42 extern void xen_syscall_target(void); 43 extern void xen_syscall32_target(void); 44 45 /* Amount of extra memory space we add to the e820 ranges */ 46 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 47 48 /* Number of pages released from the initial allocation. */ 49 unsigned long xen_released_pages; 50 51 /* 52 * Buffer used to remap identity mapped pages. We only need the virtual space. 53 * The physical page behind this address is remapped as needed to different 54 * buffer pages. 55 */ 56 #define REMAP_SIZE (P2M_PER_PAGE - 3) 57 static struct { 58 unsigned long next_area_mfn; 59 unsigned long target_pfn; 60 unsigned long size; 61 unsigned long mfns[REMAP_SIZE]; 62 } xen_remap_buf __initdata __aligned(PAGE_SIZE); 63 static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; 64 65 /* 66 * The maximum amount of extra memory compared to the base size. The 67 * main scaling factor is the size of struct page. At extreme ratios 68 * of base:extra, all the base memory can be filled with page 69 * structures for the extra memory, leaving no space for anything 70 * else. 71 * 72 * 10x seems like a reasonable balance between scaling flexibility and 73 * leaving a practically usable system. 74 */ 75 #define EXTRA_MEM_RATIO (10) 76 77 static void __init xen_add_extra_mem(u64 start, u64 size) 78 { 79 int i; 80 81 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 82 /* Add new region. */ 83 if (xen_extra_mem[i].size == 0) { 84 xen_extra_mem[i].start = start; 85 xen_extra_mem[i].size = size; 86 break; 87 } 88 /* Append to existing region. */ 89 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { 90 xen_extra_mem[i].size += size; 91 break; 92 } 93 } 94 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 95 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 96 97 memblock_reserve(start, size); 98 } 99 100 static void __init xen_del_extra_mem(u64 start, u64 size) 101 { 102 int i; 103 u64 start_r, size_r; 104 105 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 106 start_r = xen_extra_mem[i].start; 107 size_r = xen_extra_mem[i].size; 108 109 /* Start of region. */ 110 if (start_r == start) { 111 BUG_ON(size > size_r); 112 xen_extra_mem[i].start += size; 113 xen_extra_mem[i].size -= size; 114 break; 115 } 116 /* End of region. */ 117 if (start_r + size_r == start + size) { 118 BUG_ON(size > size_r); 119 xen_extra_mem[i].size -= size; 120 break; 121 } 122 /* Mid of region. */ 123 if (start > start_r && start < start_r + size_r) { 124 BUG_ON(start + size > start_r + size_r); 125 xen_extra_mem[i].size = start - start_r; 126 /* Calling memblock_reserve() again is okay. */ 127 xen_add_extra_mem(start + size, start_r + size_r - 128 (start + size)); 129 break; 130 } 131 } 132 memblock_free(start, size); 133 } 134 135 /* 136 * Called during boot before the p2m list can take entries beyond the 137 * hypervisor supplied p2m list. Entries in extra mem are to be regarded as 138 * invalid. 139 */ 140 unsigned long __ref xen_chk_extra_mem(unsigned long pfn) 141 { 142 int i; 143 phys_addr_t addr = PFN_PHYS(pfn); 144 145 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 146 if (addr >= xen_extra_mem[i].start && 147 addr < xen_extra_mem[i].start + xen_extra_mem[i].size) 148 return INVALID_P2M_ENTRY; 149 } 150 151 return IDENTITY_FRAME(pfn); 152 } 153 154 /* 155 * Mark all pfns of extra mem as invalid in p2m list. 156 */ 157 void __init xen_inv_extra_mem(void) 158 { 159 unsigned long pfn, pfn_s, pfn_e; 160 int i; 161 162 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 163 if (!xen_extra_mem[i].size) 164 continue; 165 pfn_s = PFN_DOWN(xen_extra_mem[i].start); 166 pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); 167 for (pfn = pfn_s; pfn < pfn_e; pfn++) 168 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 169 } 170 } 171 172 /* 173 * Finds the next RAM pfn available in the E820 map after min_pfn. 174 * This function updates min_pfn with the pfn found and returns 175 * the size of that range or zero if not found. 176 */ 177 static unsigned long __init xen_find_pfn_range( 178 const struct e820entry *list, size_t map_size, 179 unsigned long *min_pfn) 180 { 181 const struct e820entry *entry; 182 unsigned int i; 183 unsigned long done = 0; 184 185 for (i = 0, entry = list; i < map_size; i++, entry++) { 186 unsigned long s_pfn; 187 unsigned long e_pfn; 188 189 if (entry->type != E820_RAM) 190 continue; 191 192 e_pfn = PFN_DOWN(entry->addr + entry->size); 193 194 /* We only care about E820 after this */ 195 if (e_pfn < *min_pfn) 196 continue; 197 198 s_pfn = PFN_UP(entry->addr); 199 200 /* If min_pfn falls within the E820 entry, we want to start 201 * at the min_pfn PFN. 202 */ 203 if (s_pfn <= *min_pfn) { 204 done = e_pfn - *min_pfn; 205 } else { 206 done = e_pfn - s_pfn; 207 *min_pfn = s_pfn; 208 } 209 break; 210 } 211 212 return done; 213 } 214 215 static int __init xen_free_mfn(unsigned long mfn) 216 { 217 struct xen_memory_reservation reservation = { 218 .address_bits = 0, 219 .extent_order = 0, 220 .domid = DOMID_SELF 221 }; 222 223 set_xen_guest_handle(reservation.extent_start, &mfn); 224 reservation.nr_extents = 1; 225 226 return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 227 } 228 229 /* 230 * This releases a chunk of memory and then does the identity map. It's used 231 * as a fallback if the remapping fails. 232 */ 233 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, 234 unsigned long end_pfn, unsigned long nr_pages, unsigned long *released) 235 { 236 unsigned long pfn, end; 237 int ret; 238 239 WARN_ON(start_pfn > end_pfn); 240 241 /* Release pages first. */ 242 end = min(end_pfn, nr_pages); 243 for (pfn = start_pfn; pfn < end; pfn++) { 244 unsigned long mfn = pfn_to_mfn(pfn); 245 246 /* Make sure pfn exists to start with */ 247 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 248 continue; 249 250 ret = xen_free_mfn(mfn); 251 WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); 252 253 if (ret == 1) { 254 (*released)++; 255 if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) 256 break; 257 } else 258 break; 259 } 260 261 set_phys_range_identity(start_pfn, end_pfn); 262 } 263 264 /* 265 * Helper function to update the p2m and m2p tables and kernel mapping. 266 */ 267 static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) 268 { 269 struct mmu_update update = { 270 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 271 .val = pfn 272 }; 273 274 /* Update p2m */ 275 if (!set_phys_to_machine(pfn, mfn)) { 276 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", 277 pfn, mfn); 278 BUG(); 279 } 280 281 /* Update m2p */ 282 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { 283 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", 284 mfn, pfn); 285 BUG(); 286 } 287 288 /* Update kernel mapping, but not for highmem. */ 289 if (pfn >= PFN_UP(__pa(high_memory - 1))) 290 return; 291 292 if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), 293 mfn_pte(mfn, PAGE_KERNEL), 0)) { 294 WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", 295 mfn, pfn); 296 BUG(); 297 } 298 } 299 300 /* 301 * This function updates the p2m and m2p tables with an identity map from 302 * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the 303 * original allocation at remap_pfn. The information needed for remapping is 304 * saved in the memory itself to avoid the need for allocating buffers. The 305 * complete remap information is contained in a list of MFNs each containing 306 * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. 307 * This enables us to preserve the original mfn sequence while doing the 308 * remapping at a time when the memory management is capable of allocating 309 * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and 310 * its callers. 311 */ 312 static void __init xen_do_set_identity_and_remap_chunk( 313 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) 314 { 315 unsigned long buf = (unsigned long)&xen_remap_buf; 316 unsigned long mfn_save, mfn; 317 unsigned long ident_pfn_iter, remap_pfn_iter; 318 unsigned long ident_end_pfn = start_pfn + size; 319 unsigned long left = size; 320 unsigned int i, chunk; 321 322 WARN_ON(size == 0); 323 324 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); 325 326 mfn_save = virt_to_mfn(buf); 327 328 for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; 329 ident_pfn_iter < ident_end_pfn; 330 ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { 331 chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; 332 333 /* Map first pfn to xen_remap_buf */ 334 mfn = pfn_to_mfn(ident_pfn_iter); 335 set_pte_mfn(buf, mfn, PAGE_KERNEL); 336 337 /* Save mapping information in page */ 338 xen_remap_buf.next_area_mfn = xen_remap_mfn; 339 xen_remap_buf.target_pfn = remap_pfn_iter; 340 xen_remap_buf.size = chunk; 341 for (i = 0; i < chunk; i++) 342 xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); 343 344 /* Put remap buf into list. */ 345 xen_remap_mfn = mfn; 346 347 /* Set identity map */ 348 set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); 349 350 left -= chunk; 351 } 352 353 /* Restore old xen_remap_buf mapping */ 354 set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 355 } 356 357 /* 358 * This function takes a contiguous pfn range that needs to be identity mapped 359 * and: 360 * 361 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. 362 * 2) Calls the do_ function to actually do the mapping/remapping work. 363 * 364 * The goal is to not allocate additional memory but to remap the existing 365 * pages. In the case of an error the underlying memory is simply released back 366 * to Xen and not remapped. 367 */ 368 static unsigned long __init xen_set_identity_and_remap_chunk( 369 const struct e820entry *list, size_t map_size, unsigned long start_pfn, 370 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, 371 unsigned long *released, unsigned long *remapped) 372 { 373 unsigned long pfn; 374 unsigned long i = 0; 375 unsigned long n = end_pfn - start_pfn; 376 377 while (i < n) { 378 unsigned long cur_pfn = start_pfn + i; 379 unsigned long left = n - i; 380 unsigned long size = left; 381 unsigned long remap_range_size; 382 383 /* Do not remap pages beyond the current allocation */ 384 if (cur_pfn >= nr_pages) { 385 /* Identity map remaining pages */ 386 set_phys_range_identity(cur_pfn, cur_pfn + size); 387 break; 388 } 389 if (cur_pfn + size > nr_pages) 390 size = nr_pages - cur_pfn; 391 392 remap_range_size = xen_find_pfn_range(list, map_size, 393 &remap_pfn); 394 if (!remap_range_size) { 395 pr_warning("Unable to find available pfn range, not remapping identity pages\n"); 396 xen_set_identity_and_release_chunk(cur_pfn, 397 cur_pfn + left, nr_pages, released); 398 break; 399 } 400 /* Adjust size to fit in current e820 RAM region */ 401 if (size > remap_range_size) 402 size = remap_range_size; 403 404 xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); 405 406 /* Update variables to reflect new mappings. */ 407 i += size; 408 remap_pfn += size; 409 *remapped += size; 410 } 411 412 /* 413 * If the PFNs are currently mapped, the VA mapping also needs 414 * to be updated to be 1:1. 415 */ 416 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 417 (void)HYPERVISOR_update_va_mapping( 418 (unsigned long)__va(pfn << PAGE_SHIFT), 419 mfn_pte(pfn, PAGE_KERNEL_IO), 0); 420 421 return remap_pfn; 422 } 423 424 static void __init xen_set_identity_and_remap( 425 const struct e820entry *list, size_t map_size, unsigned long nr_pages, 426 unsigned long *released, unsigned long *remapped) 427 { 428 phys_addr_t start = 0; 429 unsigned long last_pfn = nr_pages; 430 const struct e820entry *entry; 431 unsigned long num_released = 0; 432 unsigned long num_remapped = 0; 433 int i; 434 435 /* 436 * Combine non-RAM regions and gaps until a RAM region (or the 437 * end of the map) is reached, then set the 1:1 map and 438 * remap the memory in those non-RAM regions. 439 * 440 * The combined non-RAM regions are rounded to a whole number 441 * of pages so any partial pages are accessible via the 1:1 442 * mapping. This is needed for some BIOSes that put (for 443 * example) the DMI tables in a reserved region that begins on 444 * a non-page boundary. 445 */ 446 for (i = 0, entry = list; i < map_size; i++, entry++) { 447 phys_addr_t end = entry->addr + entry->size; 448 if (entry->type == E820_RAM || i == map_size - 1) { 449 unsigned long start_pfn = PFN_DOWN(start); 450 unsigned long end_pfn = PFN_UP(end); 451 452 if (entry->type == E820_RAM) 453 end_pfn = PFN_UP(entry->addr); 454 455 if (start_pfn < end_pfn) 456 last_pfn = xen_set_identity_and_remap_chunk( 457 list, map_size, start_pfn, 458 end_pfn, nr_pages, last_pfn, 459 &num_released, &num_remapped); 460 start = end; 461 } 462 } 463 464 *released = num_released; 465 *remapped = num_remapped; 466 467 pr_info("Released %ld page(s)\n", num_released); 468 } 469 470 /* 471 * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). 472 * The remap information (which mfn remap to which pfn) is contained in the 473 * to be remapped memory itself in a linked list anchored at xen_remap_mfn. 474 * This scheme allows to remap the different chunks in arbitrary order while 475 * the resulting mapping will be independant from the order. 476 */ 477 void __init xen_remap_memory(void) 478 { 479 unsigned long buf = (unsigned long)&xen_remap_buf; 480 unsigned long mfn_save, mfn, pfn; 481 unsigned long remapped = 0; 482 unsigned int i; 483 unsigned long pfn_s = ~0UL; 484 unsigned long len = 0; 485 486 mfn_save = virt_to_mfn(buf); 487 488 while (xen_remap_mfn != INVALID_P2M_ENTRY) { 489 /* Map the remap information */ 490 set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); 491 492 BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); 493 494 pfn = xen_remap_buf.target_pfn; 495 for (i = 0; i < xen_remap_buf.size; i++) { 496 mfn = xen_remap_buf.mfns[i]; 497 xen_update_mem_tables(pfn, mfn); 498 remapped++; 499 pfn++; 500 } 501 if (pfn_s == ~0UL || pfn == pfn_s) { 502 pfn_s = xen_remap_buf.target_pfn; 503 len += xen_remap_buf.size; 504 } else if (pfn_s + len == xen_remap_buf.target_pfn) { 505 len += xen_remap_buf.size; 506 } else { 507 xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); 508 pfn_s = xen_remap_buf.target_pfn; 509 len = xen_remap_buf.size; 510 } 511 512 mfn = xen_remap_mfn; 513 xen_remap_mfn = xen_remap_buf.next_area_mfn; 514 } 515 516 if (pfn_s != ~0UL && len) 517 xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); 518 519 set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 520 521 pr_info("Remapped %ld page(s)\n", remapped); 522 } 523 524 static unsigned long __init xen_get_max_pages(void) 525 { 526 unsigned long max_pages = MAX_DOMAIN_PAGES; 527 domid_t domid = DOMID_SELF; 528 int ret; 529 530 /* 531 * For the initial domain we use the maximum reservation as 532 * the maximum page. 533 * 534 * For guest domains the current maximum reservation reflects 535 * the current maximum rather than the static maximum. In this 536 * case the e820 map provided to us will cover the static 537 * maximum region. 538 */ 539 if (xen_initial_domain()) { 540 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 541 if (ret > 0) 542 max_pages = ret; 543 } 544 545 return min(max_pages, MAX_DOMAIN_PAGES); 546 } 547 548 static void xen_align_and_add_e820_region(u64 start, u64 size, int type) 549 { 550 u64 end = start + size; 551 552 /* Align RAM regions to page boundaries. */ 553 if (type == E820_RAM) { 554 start = PAGE_ALIGN(start); 555 end &= ~((u64)PAGE_SIZE - 1); 556 } 557 558 e820_add_region(start, end - start, type); 559 } 560 561 void xen_ignore_unusable(struct e820entry *list, size_t map_size) 562 { 563 struct e820entry *entry; 564 unsigned int i; 565 566 for (i = 0, entry = list; i < map_size; i++, entry++) { 567 if (entry->type == E820_UNUSABLE) 568 entry->type = E820_RAM; 569 } 570 } 571 572 /** 573 * machine_specific_memory_setup - Hook for machine specific memory setup. 574 **/ 575 char * __init xen_memory_setup(void) 576 { 577 static struct e820entry map[E820MAX] __initdata; 578 579 unsigned long max_pfn = xen_start_info->nr_pages; 580 unsigned long long mem_end; 581 int rc; 582 struct xen_memory_map memmap; 583 unsigned long max_pages; 584 unsigned long extra_pages = 0; 585 unsigned long remapped_pages; 586 int i; 587 int op; 588 589 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 590 mem_end = PFN_PHYS(max_pfn); 591 592 memmap.nr_entries = E820MAX; 593 set_xen_guest_handle(memmap.buffer, map); 594 595 op = xen_initial_domain() ? 596 XENMEM_machine_memory_map : 597 XENMEM_memory_map; 598 rc = HYPERVISOR_memory_op(op, &memmap); 599 if (rc == -ENOSYS) { 600 BUG_ON(xen_initial_domain()); 601 memmap.nr_entries = 1; 602 map[0].addr = 0ULL; 603 map[0].size = mem_end; 604 /* 8MB slack (to balance backend allocations). */ 605 map[0].size += 8ULL << 20; 606 map[0].type = E820_RAM; 607 rc = 0; 608 } 609 BUG_ON(rc); 610 BUG_ON(memmap.nr_entries == 0); 611 612 /* 613 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 614 * regions, so if we're using the machine memory map leave the 615 * region as RAM as it is in the pseudo-physical map. 616 * 617 * UNUSABLE regions in domUs are not handled and will need 618 * a patch in the future. 619 */ 620 if (xen_initial_domain()) 621 xen_ignore_unusable(map, memmap.nr_entries); 622 623 /* Make sure the Xen-supplied memory map is well-ordered. */ 624 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); 625 626 max_pages = xen_get_max_pages(); 627 if (max_pages > max_pfn) 628 extra_pages += max_pages - max_pfn; 629 630 /* 631 * Set identity map on non-RAM pages and prepare remapping the 632 * underlying RAM. 633 */ 634 xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, 635 &xen_released_pages, &remapped_pages); 636 637 extra_pages += xen_released_pages; 638 extra_pages += remapped_pages; 639 640 /* 641 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO 642 * factor the base size. On non-highmem systems, the base 643 * size is the full initial memory allocation; on highmem it 644 * is limited to the max size of lowmem, so that it doesn't 645 * get completely filled. 646 * 647 * In principle there could be a problem in lowmem systems if 648 * the initial memory is also very large with respect to 649 * lowmem, but we won't try to deal with that here. 650 */ 651 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), 652 extra_pages); 653 i = 0; 654 while (i < memmap.nr_entries) { 655 u64 addr = map[i].addr; 656 u64 size = map[i].size; 657 u32 type = map[i].type; 658 659 if (type == E820_RAM) { 660 if (addr < mem_end) { 661 size = min(size, mem_end - addr); 662 } else if (extra_pages) { 663 size = min(size, (u64)extra_pages * PAGE_SIZE); 664 extra_pages -= size / PAGE_SIZE; 665 xen_add_extra_mem(addr, size); 666 xen_max_p2m_pfn = PFN_DOWN(addr + size); 667 } else 668 type = E820_UNUSABLE; 669 } 670 671 xen_align_and_add_e820_region(addr, size, type); 672 673 map[i].addr += size; 674 map[i].size -= size; 675 if (map[i].size == 0) 676 i++; 677 } 678 679 /* 680 * Set the rest as identity mapped, in case PCI BARs are 681 * located here. 682 * 683 * PFNs above MAX_P2M_PFN are considered identity mapped as 684 * well. 685 */ 686 set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); 687 688 /* 689 * In domU, the ISA region is normal, usable memory, but we 690 * reserve ISA memory anyway because too many things poke 691 * about in there. 692 */ 693 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 694 E820_RESERVED); 695 696 /* 697 * Reserve Xen bits: 698 * - mfn_list 699 * - xen_start_info 700 * See comment above "struct start_info" in <xen/interface/xen.h> 701 * We tried to make the the memblock_reserve more selective so 702 * that it would be clear what region is reserved. Sadly we ran 703 * in the problem wherein on a 64-bit hypervisor with a 32-bit 704 * initial domain, the pt_base has the cr3 value which is not 705 * neccessarily where the pagetable starts! As Jan put it: " 706 * Actually, the adjustment turns out to be correct: The page 707 * tables for a 32-on-64 dom0 get allocated in the order "first L1", 708 * "first L2", "first L3", so the offset to the page table base is 709 * indeed 2. When reading xen/include/public/xen.h's comment 710 * very strictly, this is not a violation (since there nothing is said 711 * that the first thing in the page table space is pointed to by 712 * pt_base; I admit that this seems to be implied though, namely 713 * do I think that it is implied that the page table space is the 714 * range [pt_base, pt_base + nt_pt_frames), whereas that 715 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), 716 * which - without a priori knowledge - the kernel would have 717 * difficulty to figure out)." - so lets just fall back to the 718 * easy way and reserve the whole region. 719 */ 720 memblock_reserve(__pa(xen_start_info->mfn_list), 721 xen_start_info->pt_base - xen_start_info->mfn_list); 722 723 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 724 725 return "Xen"; 726 } 727 728 /* 729 * Machine specific memory setup for auto-translated guests. 730 */ 731 char * __init xen_auto_xlated_memory_setup(void) 732 { 733 static struct e820entry map[E820MAX] __initdata; 734 735 struct xen_memory_map memmap; 736 int i; 737 int rc; 738 739 memmap.nr_entries = E820MAX; 740 set_xen_guest_handle(memmap.buffer, map); 741 742 rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 743 if (rc < 0) 744 panic("No memory map (%d)\n", rc); 745 746 sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries); 747 748 for (i = 0; i < memmap.nr_entries; i++) 749 e820_add_region(map[i].addr, map[i].size, map[i].type); 750 751 memblock_reserve(__pa(xen_start_info->mfn_list), 752 xen_start_info->pt_base - xen_start_info->mfn_list); 753 754 return "Xen"; 755 } 756 757 /* 758 * Set the bit indicating "nosegneg" library variants should be used. 759 * We only need to bother in pure 32-bit mode; compat 32-bit processes 760 * can have un-truncated segments, so wrapping around is allowed. 761 */ 762 static void __init fiddle_vdso(void) 763 { 764 #ifdef CONFIG_X86_32 765 /* 766 * This could be called before selected_vdso32 is initialized, so 767 * just fiddle with both possible images. vdso_image_32_syscall 768 * can't be selected, since it only exists on 64-bit systems. 769 */ 770 u32 *mask; 771 mask = vdso_image_32_int80.data + 772 vdso_image_32_int80.sym_VDSO32_NOTE_MASK; 773 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 774 mask = vdso_image_32_sysenter.data + 775 vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; 776 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 777 #endif 778 } 779 780 static int register_callback(unsigned type, const void *func) 781 { 782 struct callback_register callback = { 783 .type = type, 784 .address = XEN_CALLBACK(__KERNEL_CS, func), 785 .flags = CALLBACKF_mask_events, 786 }; 787 788 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 789 } 790 791 void xen_enable_sysenter(void) 792 { 793 int ret; 794 unsigned sysenter_feature; 795 796 #ifdef CONFIG_X86_32 797 sysenter_feature = X86_FEATURE_SEP; 798 #else 799 sysenter_feature = X86_FEATURE_SYSENTER32; 800 #endif 801 802 if (!boot_cpu_has(sysenter_feature)) 803 return; 804 805 ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); 806 if(ret != 0) 807 setup_clear_cpu_cap(sysenter_feature); 808 } 809 810 void xen_enable_syscall(void) 811 { 812 #ifdef CONFIG_X86_64 813 int ret; 814 815 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 816 if (ret != 0) { 817 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 818 /* Pretty fatal; 64-bit userspace has no other 819 mechanism for syscalls. */ 820 } 821 822 if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { 823 ret = register_callback(CALLBACKTYPE_syscall32, 824 xen_syscall32_target); 825 if (ret != 0) 826 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 827 } 828 #endif /* CONFIG_X86_64 */ 829 } 830 831 void __init xen_pvmmu_arch_setup(void) 832 { 833 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 834 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 835 836 HYPERVISOR_vm_assist(VMASST_CMD_enable, 837 VMASST_TYPE_pae_extended_cr3); 838 839 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 840 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 841 BUG(); 842 843 xen_enable_sysenter(); 844 xen_enable_syscall(); 845 } 846 847 /* This function is not called for HVM domains */ 848 void __init xen_arch_setup(void) 849 { 850 xen_panic_handler_init(); 851 if (!xen_feature(XENFEAT_auto_translated_physmap)) 852 xen_pvmmu_arch_setup(); 853 854 #ifdef CONFIG_ACPI 855 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 856 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 857 disable_acpi(); 858 } 859 #endif 860 861 memcpy(boot_command_line, xen_start_info->cmd_line, 862 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 863 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 864 865 /* Set up idle, making sure it calls safe_halt() pvop */ 866 disable_cpuidle(); 867 disable_cpufreq(); 868 WARN_ON(xen_set_default_idle()); 869 fiddle_vdso(); 870 #ifdef CONFIG_NUMA 871 numa_off = 1; 872 #endif 873 } 874