1 /* 2 * linux/mm/page_isolation.c 3 */ 4 5 #include <linux/mm.h> 6 #include <linux/page-isolation.h> 7 #include <linux/pageblock-flags.h> 8 #include <linux/memory.h> 9 #include <linux/hugetlb.h> 10 #include <linux/page_owner.h> 11 #include "internal.h" 12 13 #define CREATE_TRACE_POINTS 14 #include <trace/events/page_isolation.h> 15 16 static int set_migratetype_isolate(struct page *page, 17 bool skip_hwpoisoned_pages) 18 { 19 struct zone *zone; 20 unsigned long flags, pfn; 21 struct memory_isolate_notify arg; 22 int notifier_ret; 23 int ret = -EBUSY; 24 25 zone = page_zone(page); 26 27 spin_lock_irqsave(&zone->lock, flags); 28 29 pfn = page_to_pfn(page); 30 arg.start_pfn = pfn; 31 arg.nr_pages = pageblock_nr_pages; 32 arg.pages_found = 0; 33 34 /* 35 * It may be possible to isolate a pageblock even if the 36 * migratetype is not MIGRATE_MOVABLE. The memory isolation 37 * notifier chain is used by balloon drivers to return the 38 * number of pages in a range that are held by the balloon 39 * driver to shrink memory. If all the pages are accounted for 40 * by balloons, are free, or on the LRU, isolation can continue. 41 * Later, for example, when memory hotplug notifier runs, these 42 * pages reported as "can be isolated" should be isolated(freed) 43 * by the balloon driver through the memory notifier chain. 44 */ 45 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); 46 notifier_ret = notifier_to_errno(notifier_ret); 47 if (notifier_ret) 48 goto out; 49 /* 50 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. 51 * We just check MOVABLE pages. 52 */ 53 if (!has_unmovable_pages(zone, page, arg.pages_found, 54 skip_hwpoisoned_pages)) 55 ret = 0; 56 57 /* 58 * immobile means "not-on-lru" paes. If immobile is larger than 59 * removable-by-driver pages reported by notifier, we'll fail. 60 */ 61 62 out: 63 if (!ret) { 64 unsigned long nr_pages; 65 int migratetype = get_pageblock_migratetype(page); 66 67 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 68 zone->nr_isolate_pageblock++; 69 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); 70 71 __mod_zone_freepage_state(zone, -nr_pages, migratetype); 72 } 73 74 spin_unlock_irqrestore(&zone->lock, flags); 75 if (!ret) 76 drain_all_pages(zone); 77 return ret; 78 } 79 80 static void unset_migratetype_isolate(struct page *page, unsigned migratetype) 81 { 82 struct zone *zone; 83 unsigned long flags, nr_pages; 84 bool isolated_page = false; 85 unsigned int order; 86 unsigned long page_idx, buddy_idx; 87 struct page *buddy; 88 89 zone = page_zone(page); 90 spin_lock_irqsave(&zone->lock, flags); 91 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 92 goto out; 93 94 /* 95 * Because freepage with more than pageblock_order on isolated 96 * pageblock is restricted to merge due to freepage counting problem, 97 * it is possible that there is free buddy page. 98 * move_freepages_block() doesn't care of merge so we need other 99 * approach in order to merge them. Isolation and free will make 100 * these pages to be merged. 101 */ 102 if (PageBuddy(page)) { 103 order = page_order(page); 104 if (order >= pageblock_order) { 105 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 106 buddy_idx = __find_buddy_index(page_idx, order); 107 buddy = page + (buddy_idx - page_idx); 108 109 if (pfn_valid_within(page_to_pfn(buddy)) && 110 !is_migrate_isolate_page(buddy)) { 111 __isolate_free_page(page, order); 112 isolated_page = true; 113 } 114 } 115 } 116 117 /* 118 * If we isolate freepage with more than pageblock_order, there 119 * should be no freepage in the range, so we could avoid costly 120 * pageblock scanning for freepage moving. 121 */ 122 if (!isolated_page) { 123 nr_pages = move_freepages_block(zone, page, migratetype); 124 __mod_zone_freepage_state(zone, nr_pages, migratetype); 125 } 126 set_pageblock_migratetype(page, migratetype); 127 zone->nr_isolate_pageblock--; 128 out: 129 spin_unlock_irqrestore(&zone->lock, flags); 130 if (isolated_page) { 131 post_alloc_hook(page, order, __GFP_MOVABLE); 132 __free_pages(page, order); 133 } 134 } 135 136 static inline struct page * 137 __first_valid_page(unsigned long pfn, unsigned long nr_pages) 138 { 139 int i; 140 for (i = 0; i < nr_pages; i++) 141 if (pfn_valid_within(pfn + i)) 142 break; 143 if (unlikely(i == nr_pages)) 144 return NULL; 145 return pfn_to_page(pfn + i); 146 } 147 148 /* 149 * start_isolate_page_range() -- make page-allocation-type of range of pages 150 * to be MIGRATE_ISOLATE. 151 * @start_pfn: The lower PFN of the range to be isolated. 152 * @end_pfn: The upper PFN of the range to be isolated. 153 * @migratetype: migrate type to set in error recovery. 154 * 155 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 156 * the range will never be allocated. Any free pages and pages freed in the 157 * future will not be allocated again. 158 * 159 * start_pfn/end_pfn must be aligned to pageblock_order. 160 * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 161 */ 162 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 163 unsigned migratetype, bool skip_hwpoisoned_pages) 164 { 165 unsigned long pfn; 166 unsigned long undo_pfn; 167 struct page *page; 168 169 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); 170 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); 171 172 for (pfn = start_pfn; 173 pfn < end_pfn; 174 pfn += pageblock_nr_pages) { 175 page = __first_valid_page(pfn, pageblock_nr_pages); 176 if (page && 177 set_migratetype_isolate(page, skip_hwpoisoned_pages)) { 178 undo_pfn = pfn; 179 goto undo; 180 } 181 } 182 return 0; 183 undo: 184 for (pfn = start_pfn; 185 pfn < undo_pfn; 186 pfn += pageblock_nr_pages) 187 unset_migratetype_isolate(pfn_to_page(pfn), migratetype); 188 189 return -EBUSY; 190 } 191 192 /* 193 * Make isolated pages available again. 194 */ 195 int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 196 unsigned migratetype) 197 { 198 unsigned long pfn; 199 struct page *page; 200 201 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); 202 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); 203 204 for (pfn = start_pfn; 205 pfn < end_pfn; 206 pfn += pageblock_nr_pages) { 207 page = __first_valid_page(pfn, pageblock_nr_pages); 208 if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 209 continue; 210 unset_migratetype_isolate(page, migratetype); 211 } 212 return 0; 213 } 214 /* 215 * Test all pages in the range is free(means isolated) or not. 216 * all pages in [start_pfn...end_pfn) must be in the same zone. 217 * zone->lock must be held before call this. 218 * 219 * Returns the last tested pfn. 220 */ 221 static unsigned long 222 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, 223 bool skip_hwpoisoned_pages) 224 { 225 struct page *page; 226 227 while (pfn < end_pfn) { 228 if (!pfn_valid_within(pfn)) { 229 pfn++; 230 continue; 231 } 232 page = pfn_to_page(pfn); 233 if (PageBuddy(page)) 234 /* 235 * If the page is on a free list, it has to be on 236 * the correct MIGRATE_ISOLATE freelist. There is no 237 * simple way to verify that as VM_BUG_ON(), though. 238 */ 239 pfn += 1 << page_order(page); 240 else if (skip_hwpoisoned_pages && PageHWPoison(page)) 241 /* A HWPoisoned page cannot be also PageBuddy */ 242 pfn++; 243 else 244 break; 245 } 246 247 return pfn; 248 } 249 250 /* Caller should ensure that requested range is in a single zone */ 251 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, 252 bool skip_hwpoisoned_pages) 253 { 254 unsigned long pfn, flags; 255 struct page *page; 256 struct zone *zone; 257 258 /* 259 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages 260 * are not aligned to pageblock_nr_pages. 261 * Then we just check migratetype first. 262 */ 263 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 264 page = __first_valid_page(pfn, pageblock_nr_pages); 265 if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 266 break; 267 } 268 page = __first_valid_page(start_pfn, end_pfn - start_pfn); 269 if ((pfn < end_pfn) || !page) 270 return -EBUSY; 271 /* Check all pages are free or marked as ISOLATED */ 272 zone = page_zone(page); 273 spin_lock_irqsave(&zone->lock, flags); 274 pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, 275 skip_hwpoisoned_pages); 276 spin_unlock_irqrestore(&zone->lock, flags); 277 278 trace_test_pages_isolated(start_pfn, end_pfn, pfn); 279 280 return pfn < end_pfn ? -EBUSY : 0; 281 } 282 283 struct page *alloc_migrate_target(struct page *page, unsigned long private, 284 int **resultp) 285 { 286 gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; 287 288 /* 289 * TODO: allocate a destination hugepage from a nearest neighbor node, 290 * accordance with memory policy of the user process if possible. For 291 * now as a simple work-around, we use the next node for destination. 292 */ 293 if (PageHuge(page)) 294 return alloc_huge_page_node(page_hstate(compound_head(page)), 295 next_node_in(page_to_nid(page), 296 node_online_map)); 297 298 if (PageHighMem(page)) 299 gfp_mask |= __GFP_HIGHMEM; 300 301 return alloc_page(gfp_mask); 302 } 303