1 /* 2 * Machine specific setup for xen 3 * 4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/pm.h> 11 #include <linux/memblock.h> 12 #include <linux/cpuidle.h> 13 #include <linux/cpufreq.h> 14 15 #include <asm/elf.h> 16 #include <asm/vdso.h> 17 #include <asm/e820.h> 18 #include <asm/setup.h> 19 #include <asm/acpi.h> 20 #include <asm/numa.h> 21 #include <asm/xen/hypervisor.h> 22 #include <asm/xen/hypercall.h> 23 24 #include <xen/xen.h> 25 #include <xen/page.h> 26 #include <xen/interface/callback.h> 27 #include <xen/interface/memory.h> 28 #include <xen/interface/physdev.h> 29 #include <xen/features.h> 30 #include "xen-ops.h" 31 #include "vdso.h" 32 #include "p2m.h" 33 #include "mmu.h" 34 35 /* These are code, but not functions. Defined in entry.S */ 36 extern const char xen_hypervisor_callback[]; 37 extern const char xen_failsafe_callback[]; 38 #ifdef CONFIG_X86_64 39 extern asmlinkage void nmi(void); 40 #endif 41 extern void xen_sysenter_target(void); 42 extern void xen_syscall_target(void); 43 extern void xen_syscall32_target(void); 44 45 /* Amount of extra memory space we add to the e820 ranges */ 46 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 47 48 /* Number of pages released from the initial allocation. */ 49 unsigned long xen_released_pages; 50 51 /* 52 * Buffer used to remap identity mapped pages. We only need the virtual space. 53 * The physical page behind this address is remapped as needed to different 54 * buffer pages. 55 */ 56 #define REMAP_SIZE (P2M_PER_PAGE - 3) 57 static struct { 58 unsigned long next_area_mfn; 59 unsigned long target_pfn; 60 unsigned long size; 61 unsigned long mfns[REMAP_SIZE]; 62 } xen_remap_buf __initdata __aligned(PAGE_SIZE); 63 static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; 64 65 /* 66 * The maximum amount of extra memory compared to the base size. The 67 * main scaling factor is the size of struct page. At extreme ratios 68 * of base:extra, all the base memory can be filled with page 69 * structures for the extra memory, leaving no space for anything 70 * else. 71 * 72 * 10x seems like a reasonable balance between scaling flexibility and 73 * leaving a practically usable system. 74 */ 75 #define EXTRA_MEM_RATIO (10) 76 77 static void __init xen_add_extra_mem(u64 start, u64 size) 78 { 79 int i; 80 81 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 82 /* Add new region. */ 83 if (xen_extra_mem[i].size == 0) { 84 xen_extra_mem[i].start = start; 85 xen_extra_mem[i].size = size; 86 break; 87 } 88 /* Append to existing region. */ 89 if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { 90 xen_extra_mem[i].size += size; 91 break; 92 } 93 } 94 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 95 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 96 97 memblock_reserve(start, size); 98 } 99 100 static void __init xen_del_extra_mem(u64 start, u64 size) 101 { 102 int i; 103 u64 start_r, size_r; 104 105 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 106 start_r = xen_extra_mem[i].start; 107 size_r = xen_extra_mem[i].size; 108 109 /* Start of region. */ 110 if (start_r == start) { 111 BUG_ON(size > size_r); 112 xen_extra_mem[i].start += size; 113 xen_extra_mem[i].size -= size; 114 break; 115 } 116 /* End of region. */ 117 if (start_r + size_r == start + size) { 118 BUG_ON(size > size_r); 119 xen_extra_mem[i].size -= size; 120 break; 121 } 122 /* Mid of region. */ 123 if (start > start_r && start < start_r + size_r) { 124 BUG_ON(start + size > start_r + size_r); 125 xen_extra_mem[i].size = start - start_r; 126 /* Calling memblock_reserve() again is okay. */ 127 xen_add_extra_mem(start + size, start_r + size_r - 128 (start + size)); 129 break; 130 } 131 } 132 memblock_free(start, size); 133 } 134 135 /* 136 * Called during boot before the p2m list can take entries beyond the 137 * hypervisor supplied p2m list. Entries in extra mem are to be regarded as 138 * invalid. 139 */ 140 unsigned long __ref xen_chk_extra_mem(unsigned long pfn) 141 { 142 int i; 143 unsigned long addr = PFN_PHYS(pfn); 144 145 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 146 if (addr >= xen_extra_mem[i].start && 147 addr < xen_extra_mem[i].start + xen_extra_mem[i].size) 148 return INVALID_P2M_ENTRY; 149 } 150 151 return IDENTITY_FRAME(pfn); 152 } 153 154 /* 155 * Mark all pfns of extra mem as invalid in p2m list. 156 */ 157 void __init xen_inv_extra_mem(void) 158 { 159 unsigned long pfn, pfn_s, pfn_e; 160 int i; 161 162 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 163 pfn_s = PFN_DOWN(xen_extra_mem[i].start); 164 pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); 165 for (pfn = pfn_s; pfn < pfn_e; pfn++) 166 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 167 } 168 } 169 170 /* 171 * Finds the next RAM pfn available in the E820 map after min_pfn. 172 * This function updates min_pfn with the pfn found and returns 173 * the size of that range or zero if not found. 174 */ 175 static unsigned long __init xen_find_pfn_range( 176 const struct e820entry *list, size_t map_size, 177 unsigned long *min_pfn) 178 { 179 const struct e820entry *entry; 180 unsigned int i; 181 unsigned long done = 0; 182 183 for (i = 0, entry = list; i < map_size; i++, entry++) { 184 unsigned long s_pfn; 185 unsigned long e_pfn; 186 187 if (entry->type != E820_RAM) 188 continue; 189 190 e_pfn = PFN_DOWN(entry->addr + entry->size); 191 192 /* We only care about E820 after this */ 193 if (e_pfn < *min_pfn) 194 continue; 195 196 s_pfn = PFN_UP(entry->addr); 197 198 /* If min_pfn falls within the E820 entry, we want to start 199 * at the min_pfn PFN. 200 */ 201 if (s_pfn <= *min_pfn) { 202 done = e_pfn - *min_pfn; 203 } else { 204 done = e_pfn - s_pfn; 205 *min_pfn = s_pfn; 206 } 207 break; 208 } 209 210 return done; 211 } 212 213 static int __init xen_free_mfn(unsigned long mfn) 214 { 215 struct xen_memory_reservation reservation = { 216 .address_bits = 0, 217 .extent_order = 0, 218 .domid = DOMID_SELF 219 }; 220 221 set_xen_guest_handle(reservation.extent_start, &mfn); 222 reservation.nr_extents = 1; 223 224 return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 225 } 226 227 /* 228 * This releases a chunk of memory and then does the identity map. It's used 229 * as a fallback if the remapping fails. 230 */ 231 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, 232 unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, 233 unsigned long *released) 234 { 235 unsigned long len = 0; 236 unsigned long pfn, end; 237 int ret; 238 239 WARN_ON(start_pfn > end_pfn); 240 241 end = min(end_pfn, nr_pages); 242 for (pfn = start_pfn; pfn < end; pfn++) { 243 unsigned long mfn = pfn_to_mfn(pfn); 244 245 /* Make sure pfn exists to start with */ 246 if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) 247 continue; 248 249 ret = xen_free_mfn(mfn); 250 WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); 251 252 if (ret == 1) { 253 if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) 254 break; 255 len++; 256 } else 257 break; 258 } 259 260 /* Need to release pages first */ 261 *released += len; 262 *identity += set_phys_range_identity(start_pfn, end_pfn); 263 } 264 265 /* 266 * Helper function to update the p2m and m2p tables and kernel mapping. 267 */ 268 static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) 269 { 270 struct mmu_update update = { 271 .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 272 .val = pfn 273 }; 274 275 /* Update p2m */ 276 if (!set_phys_to_machine(pfn, mfn)) { 277 WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", 278 pfn, mfn); 279 BUG(); 280 } 281 282 /* Update m2p */ 283 if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { 284 WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", 285 mfn, pfn); 286 BUG(); 287 } 288 289 /* Update kernel mapping, but not for highmem. */ 290 if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) 291 return; 292 293 if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), 294 mfn_pte(mfn, PAGE_KERNEL), 0)) { 295 WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", 296 mfn, pfn); 297 BUG(); 298 } 299 } 300 301 /* 302 * This function updates the p2m and m2p tables with an identity map from 303 * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the 304 * original allocation at remap_pfn. The information needed for remapping is 305 * saved in the memory itself to avoid the need for allocating buffers. The 306 * complete remap information is contained in a list of MFNs each containing 307 * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. 308 * This enables us to preserve the original mfn sequence while doing the 309 * remapping at a time when the memory management is capable of allocating 310 * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and 311 * its callers. 312 */ 313 static void __init xen_do_set_identity_and_remap_chunk( 314 unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) 315 { 316 unsigned long buf = (unsigned long)&xen_remap_buf; 317 unsigned long mfn_save, mfn; 318 unsigned long ident_pfn_iter, remap_pfn_iter; 319 unsigned long ident_end_pfn = start_pfn + size; 320 unsigned long left = size; 321 unsigned long ident_cnt = 0; 322 unsigned int i, chunk; 323 324 WARN_ON(size == 0); 325 326 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); 327 328 mfn_save = virt_to_mfn(buf); 329 330 for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; 331 ident_pfn_iter < ident_end_pfn; 332 ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { 333 chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; 334 335 /* Map first pfn to xen_remap_buf */ 336 mfn = pfn_to_mfn(ident_pfn_iter); 337 set_pte_mfn(buf, mfn, PAGE_KERNEL); 338 339 /* Save mapping information in page */ 340 xen_remap_buf.next_area_mfn = xen_remap_mfn; 341 xen_remap_buf.target_pfn = remap_pfn_iter; 342 xen_remap_buf.size = chunk; 343 for (i = 0; i < chunk; i++) 344 xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); 345 346 /* Put remap buf into list. */ 347 xen_remap_mfn = mfn; 348 349 /* Set identity map */ 350 ident_cnt += set_phys_range_identity(ident_pfn_iter, 351 ident_pfn_iter + chunk); 352 353 left -= chunk; 354 } 355 356 /* Restore old xen_remap_buf mapping */ 357 set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 358 } 359 360 /* 361 * This function takes a contiguous pfn range that needs to be identity mapped 362 * and: 363 * 364 * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. 365 * 2) Calls the do_ function to actually do the mapping/remapping work. 366 * 367 * The goal is to not allocate additional memory but to remap the existing 368 * pages. In the case of an error the underlying memory is simply released back 369 * to Xen and not remapped. 370 */ 371 static unsigned long __init xen_set_identity_and_remap_chunk( 372 const struct e820entry *list, size_t map_size, unsigned long start_pfn, 373 unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, 374 unsigned long *identity, unsigned long *released) 375 { 376 unsigned long pfn; 377 unsigned long i = 0; 378 unsigned long n = end_pfn - start_pfn; 379 380 while (i < n) { 381 unsigned long cur_pfn = start_pfn + i; 382 unsigned long left = n - i; 383 unsigned long size = left; 384 unsigned long remap_range_size; 385 386 /* Do not remap pages beyond the current allocation */ 387 if (cur_pfn >= nr_pages) { 388 /* Identity map remaining pages */ 389 *identity += set_phys_range_identity(cur_pfn, 390 cur_pfn + size); 391 break; 392 } 393 if (cur_pfn + size > nr_pages) 394 size = nr_pages - cur_pfn; 395 396 remap_range_size = xen_find_pfn_range(list, map_size, 397 &remap_pfn); 398 if (!remap_range_size) { 399 pr_warning("Unable to find available pfn range, not remapping identity pages\n"); 400 xen_set_identity_and_release_chunk(cur_pfn, 401 cur_pfn + left, nr_pages, identity, released); 402 break; 403 } 404 /* Adjust size to fit in current e820 RAM region */ 405 if (size > remap_range_size) 406 size = remap_range_size; 407 408 xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn); 409 410 /* Update variables to reflect new mappings. */ 411 i += size; 412 remap_pfn += size; 413 *identity += size; 414 } 415 416 /* 417 * If the PFNs are currently mapped, the VA mapping also needs 418 * to be updated to be 1:1. 419 */ 420 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 421 (void)HYPERVISOR_update_va_mapping( 422 (unsigned long)__va(pfn << PAGE_SHIFT), 423 mfn_pte(pfn, PAGE_KERNEL_IO), 0); 424 425 return remap_pfn; 426 } 427 428 static void __init xen_set_identity_and_remap( 429 const struct e820entry *list, size_t map_size, unsigned long nr_pages, 430 unsigned long *released) 431 { 432 phys_addr_t start = 0; 433 unsigned long identity = 0; 434 unsigned long last_pfn = nr_pages; 435 const struct e820entry *entry; 436 unsigned long num_released = 0; 437 int i; 438 439 /* 440 * Combine non-RAM regions and gaps until a RAM region (or the 441 * end of the map) is reached, then set the 1:1 map and 442 * remap the memory in those non-RAM regions. 443 * 444 * The combined non-RAM regions are rounded to a whole number 445 * of pages so any partial pages are accessible via the 1:1 446 * mapping. This is needed for some BIOSes that put (for 447 * example) the DMI tables in a reserved region that begins on 448 * a non-page boundary. 449 */ 450 for (i = 0, entry = list; i < map_size; i++, entry++) { 451 phys_addr_t end = entry->addr + entry->size; 452 if (entry->type == E820_RAM || i == map_size - 1) { 453 unsigned long start_pfn = PFN_DOWN(start); 454 unsigned long end_pfn = PFN_UP(end); 455 456 if (entry->type == E820_RAM) 457 end_pfn = PFN_UP(entry->addr); 458 459 if (start_pfn < end_pfn) 460 last_pfn = xen_set_identity_and_remap_chunk( 461 list, map_size, start_pfn, 462 end_pfn, nr_pages, last_pfn, 463 &identity, &num_released); 464 start = end; 465 } 466 } 467 468 *released = num_released; 469 470 pr_info("Set %ld page(s) to 1-1 mapping\n", identity); 471 pr_info("Released %ld page(s)\n", num_released); 472 } 473 474 /* 475 * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). 476 * The remap information (which mfn remap to which pfn) is contained in the 477 * to be remapped memory itself in a linked list anchored at xen_remap_mfn. 478 * This scheme allows to remap the different chunks in arbitrary order while 479 * the resulting mapping will be independant from the order. 480 */ 481 void __init xen_remap_memory(void) 482 { 483 unsigned long buf = (unsigned long)&xen_remap_buf; 484 unsigned long mfn_save, mfn, pfn; 485 unsigned long remapped = 0; 486 unsigned int i; 487 unsigned long pfn_s = ~0UL; 488 unsigned long len = 0; 489 490 mfn_save = virt_to_mfn(buf); 491 492 while (xen_remap_mfn != INVALID_P2M_ENTRY) { 493 /* Map the remap information */ 494 set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL); 495 496 BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); 497 498 pfn = xen_remap_buf.target_pfn; 499 for (i = 0; i < xen_remap_buf.size; i++) { 500 mfn = xen_remap_buf.mfns[i]; 501 xen_update_mem_tables(pfn, mfn); 502 remapped++; 503 pfn++; 504 } 505 if (pfn_s == ~0UL || pfn == pfn_s) { 506 pfn_s = xen_remap_buf.target_pfn; 507 len += xen_remap_buf.size; 508 } else if (pfn_s + len == xen_remap_buf.target_pfn) { 509 len += xen_remap_buf.size; 510 } else { 511 xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); 512 pfn_s = xen_remap_buf.target_pfn; 513 len = xen_remap_buf.size; 514 } 515 516 mfn = xen_remap_mfn; 517 xen_remap_mfn = xen_remap_buf.next_area_mfn; 518 } 519 520 if (pfn_s != ~0UL && len) 521 xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len)); 522 523 set_pte_mfn(buf, mfn_save, PAGE_KERNEL); 524 525 pr_info("Remapped %ld page(s)\n", remapped); 526 } 527 528 static unsigned long __init xen_get_max_pages(void) 529 { 530 unsigned long max_pages = MAX_DOMAIN_PAGES; 531 domid_t domid = DOMID_SELF; 532 int ret; 533 534 /* 535 * For the initial domain we use the maximum reservation as 536 * the maximum page. 537 * 538 * For guest domains the current maximum reservation reflects 539 * the current maximum rather than the static maximum. In this 540 * case the e820 map provided to us will cover the static 541 * maximum region. 542 */ 543 if (xen_initial_domain()) { 544 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 545 if (ret > 0) 546 max_pages = ret; 547 } 548 549 return min(max_pages, MAX_DOMAIN_PAGES); 550 } 551 552 static void xen_align_and_add_e820_region(u64 start, u64 size, int type) 553 { 554 u64 end = start + size; 555 556 /* Align RAM regions to page boundaries. */ 557 if (type == E820_RAM) { 558 start = PAGE_ALIGN(start); 559 end &= ~((u64)PAGE_SIZE - 1); 560 } 561 562 e820_add_region(start, end - start, type); 563 } 564 565 void xen_ignore_unusable(struct e820entry *list, size_t map_size) 566 { 567 struct e820entry *entry; 568 unsigned int i; 569 570 for (i = 0, entry = list; i < map_size; i++, entry++) { 571 if (entry->type == E820_UNUSABLE) 572 entry->type = E820_RAM; 573 } 574 } 575 576 /** 577 * machine_specific_memory_setup - Hook for machine specific memory setup. 578 **/ 579 char * __init xen_memory_setup(void) 580 { 581 static struct e820entry map[E820MAX] __initdata; 582 583 unsigned long max_pfn = xen_start_info->nr_pages; 584 unsigned long long mem_end; 585 int rc; 586 struct xen_memory_map memmap; 587 unsigned long max_pages; 588 unsigned long extra_pages = 0; 589 int i; 590 int op; 591 592 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 593 mem_end = PFN_PHYS(max_pfn); 594 595 memmap.nr_entries = E820MAX; 596 set_xen_guest_handle(memmap.buffer, map); 597 598 op = xen_initial_domain() ? 599 XENMEM_machine_memory_map : 600 XENMEM_memory_map; 601 rc = HYPERVISOR_memory_op(op, &memmap); 602 if (rc == -ENOSYS) { 603 BUG_ON(xen_initial_domain()); 604 memmap.nr_entries = 1; 605 map[0].addr = 0ULL; 606 map[0].size = mem_end; 607 /* 8MB slack (to balance backend allocations). */ 608 map[0].size += 8ULL << 20; 609 map[0].type = E820_RAM; 610 rc = 0; 611 } 612 BUG_ON(rc); 613 BUG_ON(memmap.nr_entries == 0); 614 615 /* 616 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 617 * regions, so if we're using the machine memory map leave the 618 * region as RAM as it is in the pseudo-physical map. 619 * 620 * UNUSABLE regions in domUs are not handled and will need 621 * a patch in the future. 622 */ 623 if (xen_initial_domain()) 624 xen_ignore_unusable(map, memmap.nr_entries); 625 626 /* Make sure the Xen-supplied memory map is well-ordered. */ 627 sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); 628 629 max_pages = xen_get_max_pages(); 630 if (max_pages > max_pfn) 631 extra_pages += max_pages - max_pfn; 632 633 /* 634 * Set identity map on non-RAM pages and prepare remapping the 635 * underlying RAM. 636 */ 637 xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, 638 &xen_released_pages); 639 640 extra_pages += xen_released_pages; 641 642 /* 643 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO 644 * factor the base size. On non-highmem systems, the base 645 * size is the full initial memory allocation; on highmem it 646 * is limited to the max size of lowmem, so that it doesn't 647 * get completely filled. 648 * 649 * In principle there could be a problem in lowmem systems if 650 * the initial memory is also very large with respect to 651 * lowmem, but we won't try to deal with that here. 652 */ 653 extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), 654 extra_pages); 655 i = 0; 656 while (i < memmap.nr_entries) { 657 u64 addr = map[i].addr; 658 u64 size = map[i].size; 659 u32 type = map[i].type; 660 661 if (type == E820_RAM) { 662 if (addr < mem_end) { 663 size = min(size, mem_end - addr); 664 } else if (extra_pages) { 665 size = min(size, (u64)extra_pages * PAGE_SIZE); 666 extra_pages -= size / PAGE_SIZE; 667 xen_add_extra_mem(addr, size); 668 xen_max_p2m_pfn = PFN_DOWN(addr + size); 669 } else 670 type = E820_UNUSABLE; 671 } 672 673 xen_align_and_add_e820_region(addr, size, type); 674 675 map[i].addr += size; 676 map[i].size -= size; 677 if (map[i].size == 0) 678 i++; 679 } 680 681 /* 682 * Set the rest as identity mapped, in case PCI BARs are 683 * located here. 684 * 685 * PFNs above MAX_P2M_PFN are considered identity mapped as 686 * well. 687 */ 688 set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); 689 690 /* 691 * In domU, the ISA region is normal, usable memory, but we 692 * reserve ISA memory anyway because too many things poke 693 * about in there. 694 */ 695 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, 696 E820_RESERVED); 697 698 /* 699 * Reserve Xen bits: 700 * - mfn_list 701 * - xen_start_info 702 * See comment above "struct start_info" in <xen/interface/xen.h> 703 * We tried to make the the memblock_reserve more selective so 704 * that it would be clear what region is reserved. Sadly we ran 705 * in the problem wherein on a 64-bit hypervisor with a 32-bit 706 * initial domain, the pt_base has the cr3 value which is not 707 * neccessarily where the pagetable starts! As Jan put it: " 708 * Actually, the adjustment turns out to be correct: The page 709 * tables for a 32-on-64 dom0 get allocated in the order "first L1", 710 * "first L2", "first L3", so the offset to the page table base is 711 * indeed 2. When reading xen/include/public/xen.h's comment 712 * very strictly, this is not a violation (since there nothing is said 713 * that the first thing in the page table space is pointed to by 714 * pt_base; I admit that this seems to be implied though, namely 715 * do I think that it is implied that the page table space is the 716 * range [pt_base, pt_base + nt_pt_frames), whereas that 717 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), 718 * which - without a priori knowledge - the kernel would have 719 * difficulty to figure out)." - so lets just fall back to the 720 * easy way and reserve the whole region. 721 */ 722 memblock_reserve(__pa(xen_start_info->mfn_list), 723 xen_start_info->pt_base - xen_start_info->mfn_list); 724 725 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 726 727 return "Xen"; 728 } 729 730 /* 731 * Machine specific memory setup for auto-translated guests. 732 */ 733 char * __init xen_auto_xlated_memory_setup(void) 734 { 735 static struct e820entry map[E820MAX] __initdata; 736 737 struct xen_memory_map memmap; 738 int i; 739 int rc; 740 741 memmap.nr_entries = E820MAX; 742 set_xen_guest_handle(memmap.buffer, map); 743 744 rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); 745 if (rc < 0) 746 panic("No memory map (%d)\n", rc); 747 748 sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries); 749 750 for (i = 0; i < memmap.nr_entries; i++) 751 e820_add_region(map[i].addr, map[i].size, map[i].type); 752 753 memblock_reserve(__pa(xen_start_info->mfn_list), 754 xen_start_info->pt_base - xen_start_info->mfn_list); 755 756 return "Xen"; 757 } 758 759 /* 760 * Set the bit indicating "nosegneg" library variants should be used. 761 * We only need to bother in pure 32-bit mode; compat 32-bit processes 762 * can have un-truncated segments, so wrapping around is allowed. 763 */ 764 static void __init fiddle_vdso(void) 765 { 766 #ifdef CONFIG_X86_32 767 /* 768 * This could be called before selected_vdso32 is initialized, so 769 * just fiddle with both possible images. vdso_image_32_syscall 770 * can't be selected, since it only exists on 64-bit systems. 771 */ 772 u32 *mask; 773 mask = vdso_image_32_int80.data + 774 vdso_image_32_int80.sym_VDSO32_NOTE_MASK; 775 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 776 mask = vdso_image_32_sysenter.data + 777 vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; 778 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 779 #endif 780 } 781 782 static int register_callback(unsigned type, const void *func) 783 { 784 struct callback_register callback = { 785 .type = type, 786 .address = XEN_CALLBACK(__KERNEL_CS, func), 787 .flags = CALLBACKF_mask_events, 788 }; 789 790 return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); 791 } 792 793 void xen_enable_sysenter(void) 794 { 795 int ret; 796 unsigned sysenter_feature; 797 798 #ifdef CONFIG_X86_32 799 sysenter_feature = X86_FEATURE_SEP; 800 #else 801 sysenter_feature = X86_FEATURE_SYSENTER32; 802 #endif 803 804 if (!boot_cpu_has(sysenter_feature)) 805 return; 806 807 ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); 808 if(ret != 0) 809 setup_clear_cpu_cap(sysenter_feature); 810 } 811 812 void xen_enable_syscall(void) 813 { 814 #ifdef CONFIG_X86_64 815 int ret; 816 817 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 818 if (ret != 0) { 819 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); 820 /* Pretty fatal; 64-bit userspace has no other 821 mechanism for syscalls. */ 822 } 823 824 if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { 825 ret = register_callback(CALLBACKTYPE_syscall32, 826 xen_syscall32_target); 827 if (ret != 0) 828 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); 829 } 830 #endif /* CONFIG_X86_64 */ 831 } 832 833 void __init xen_pvmmu_arch_setup(void) 834 { 835 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 836 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 837 838 HYPERVISOR_vm_assist(VMASST_CMD_enable, 839 VMASST_TYPE_pae_extended_cr3); 840 841 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 842 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 843 BUG(); 844 845 xen_enable_sysenter(); 846 xen_enable_syscall(); 847 } 848 849 /* This function is not called for HVM domains */ 850 void __init xen_arch_setup(void) 851 { 852 xen_panic_handler_init(); 853 if (!xen_feature(XENFEAT_auto_translated_physmap)) 854 xen_pvmmu_arch_setup(); 855 856 #ifdef CONFIG_ACPI 857 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 858 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 859 disable_acpi(); 860 } 861 #endif 862 863 memcpy(boot_command_line, xen_start_info->cmd_line, 864 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 865 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 866 867 /* Set up idle, making sure it calls safe_halt() pvop */ 868 disable_cpuidle(); 869 disable_cpufreq(); 870 WARN_ON(xen_set_default_idle()); 871 fiddle_vdso(); 872 #ifdef CONFIG_NUMA 873 numa_off = 1; 874 #endif 875 } 876