1 /* 2 * linux/mm/page_isolation.c 3 */ 4 5 #include <linux/mm.h> 6 #include <linux/page-isolation.h> 7 #include <linux/pageblock-flags.h> 8 #include <linux/memory.h> 9 #include <linux/hugetlb.h> 10 #include <linux/page_owner.h> 11 #include <linux/migrate.h> 12 #include "internal.h" 13 14 #define CREATE_TRACE_POINTS 15 #include <trace/events/page_isolation.h> 16 17 static int set_migratetype_isolate(struct page *page, 18 bool skip_hwpoisoned_pages) 19 { 20 struct zone *zone; 21 unsigned long flags, pfn; 22 struct memory_isolate_notify arg; 23 int notifier_ret; 24 int ret = -EBUSY; 25 26 zone = page_zone(page); 27 28 spin_lock_irqsave(&zone->lock, flags); 29 30 pfn = page_to_pfn(page); 31 arg.start_pfn = pfn; 32 arg.nr_pages = pageblock_nr_pages; 33 arg.pages_found = 0; 34 35 /* 36 * It may be possible to isolate a pageblock even if the 37 * migratetype is not MIGRATE_MOVABLE. The memory isolation 38 * notifier chain is used by balloon drivers to return the 39 * number of pages in a range that are held by the balloon 40 * driver to shrink memory. If all the pages are accounted for 41 * by balloons, are free, or on the LRU, isolation can continue. 42 * Later, for example, when memory hotplug notifier runs, these 43 * pages reported as "can be isolated" should be isolated(freed) 44 * by the balloon driver through the memory notifier chain. 45 */ 46 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); 47 notifier_ret = notifier_to_errno(notifier_ret); 48 if (notifier_ret) 49 goto out; 50 /* 51 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. 52 * We just check MOVABLE pages. 53 */ 54 if (!has_unmovable_pages(zone, page, arg.pages_found, 55 skip_hwpoisoned_pages)) 56 ret = 0; 57 58 /* 59 * immobile means "not-on-lru" pages. If immobile is larger than 60 * removable-by-driver pages reported by notifier, we'll fail. 61 */ 62 63 out: 64 if (!ret) { 65 unsigned long nr_pages; 66 int migratetype = get_pageblock_migratetype(page); 67 68 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 69 zone->nr_isolate_pageblock++; 70 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, 71 NULL); 72 73 __mod_zone_freepage_state(zone, -nr_pages, migratetype); 74 } 75 76 spin_unlock_irqrestore(&zone->lock, flags); 77 if (!ret) 78 drain_all_pages(zone); 79 return ret; 80 } 81 82 static void unset_migratetype_isolate(struct page *page, unsigned migratetype) 83 { 84 struct zone *zone; 85 unsigned long flags, nr_pages; 86 bool isolated_page = false; 87 unsigned int order; 88 unsigned long pfn, buddy_pfn; 89 struct page *buddy; 90 91 zone = page_zone(page); 92 spin_lock_irqsave(&zone->lock, flags); 93 if (!is_migrate_isolate_page(page)) 94 goto out; 95 96 /* 97 * Because freepage with more than pageblock_order on isolated 98 * pageblock is restricted to merge due to freepage counting problem, 99 * it is possible that there is free buddy page. 100 * move_freepages_block() doesn't care of merge so we need other 101 * approach in order to merge them. Isolation and free will make 102 * these pages to be merged. 103 */ 104 if (PageBuddy(page)) { 105 order = page_order(page); 106 if (order >= pageblock_order) { 107 pfn = page_to_pfn(page); 108 buddy_pfn = __find_buddy_pfn(pfn, order); 109 buddy = page + (buddy_pfn - pfn); 110 111 if (pfn_valid_within(buddy_pfn) && 112 !is_migrate_isolate_page(buddy)) { 113 __isolate_free_page(page, order); 114 isolated_page = true; 115 } 116 } 117 } 118 119 /* 120 * If we isolate freepage with more than pageblock_order, there 121 * should be no freepage in the range, so we could avoid costly 122 * pageblock scanning for freepage moving. 123 */ 124 if (!isolated_page) { 125 nr_pages = move_freepages_block(zone, page, migratetype, NULL); 126 __mod_zone_freepage_state(zone, nr_pages, migratetype); 127 } 128 set_pageblock_migratetype(page, migratetype); 129 zone->nr_isolate_pageblock--; 130 out: 131 spin_unlock_irqrestore(&zone->lock, flags); 132 if (isolated_page) { 133 post_alloc_hook(page, order, __GFP_MOVABLE); 134 __free_pages(page, order); 135 } 136 } 137 138 static inline struct page * 139 __first_valid_page(unsigned long pfn, unsigned long nr_pages) 140 { 141 int i; 142 143 for (i = 0; i < nr_pages; i++) { 144 struct page *page; 145 146 if (!pfn_valid_within(pfn + i)) 147 continue; 148 page = pfn_to_online_page(pfn + i); 149 if (!page) 150 continue; 151 return page; 152 } 153 return NULL; 154 } 155 156 /* 157 * start_isolate_page_range() -- make page-allocation-type of range of pages 158 * to be MIGRATE_ISOLATE. 159 * @start_pfn: The lower PFN of the range to be isolated. 160 * @end_pfn: The upper PFN of the range to be isolated. 161 * @migratetype: migrate type to set in error recovery. 162 * 163 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 164 * the range will never be allocated. Any free pages and pages freed in the 165 * future will not be allocated again. 166 * 167 * start_pfn/end_pfn must be aligned to pageblock_order. 168 * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 169 */ 170 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 171 unsigned migratetype, bool skip_hwpoisoned_pages) 172 { 173 unsigned long pfn; 174 unsigned long undo_pfn; 175 struct page *page; 176 177 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); 178 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); 179 180 for (pfn = start_pfn; 181 pfn < end_pfn; 182 pfn += pageblock_nr_pages) { 183 page = __first_valid_page(pfn, pageblock_nr_pages); 184 if (page && 185 set_migratetype_isolate(page, skip_hwpoisoned_pages)) { 186 undo_pfn = pfn; 187 goto undo; 188 } 189 } 190 return 0; 191 undo: 192 for (pfn = start_pfn; 193 pfn < undo_pfn; 194 pfn += pageblock_nr_pages) { 195 struct page *page = pfn_to_online_page(pfn); 196 if (!page) 197 continue; 198 unset_migratetype_isolate(page, migratetype); 199 } 200 201 return -EBUSY; 202 } 203 204 /* 205 * Make isolated pages available again. 206 */ 207 int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 208 unsigned migratetype) 209 { 210 unsigned long pfn; 211 struct page *page; 212 213 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); 214 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); 215 216 for (pfn = start_pfn; 217 pfn < end_pfn; 218 pfn += pageblock_nr_pages) { 219 page = __first_valid_page(pfn, pageblock_nr_pages); 220 if (!page || !is_migrate_isolate_page(page)) 221 continue; 222 unset_migratetype_isolate(page, migratetype); 223 } 224 return 0; 225 } 226 /* 227 * Test all pages in the range is free(means isolated) or not. 228 * all pages in [start_pfn...end_pfn) must be in the same zone. 229 * zone->lock must be held before call this. 230 * 231 * Returns the last tested pfn. 232 */ 233 static unsigned long 234 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, 235 bool skip_hwpoisoned_pages) 236 { 237 struct page *page; 238 239 while (pfn < end_pfn) { 240 if (!pfn_valid_within(pfn)) { 241 pfn++; 242 continue; 243 } 244 page = pfn_to_page(pfn); 245 if (PageBuddy(page)) 246 /* 247 * If the page is on a free list, it has to be on 248 * the correct MIGRATE_ISOLATE freelist. There is no 249 * simple way to verify that as VM_BUG_ON(), though. 250 */ 251 pfn += 1 << page_order(page); 252 else if (skip_hwpoisoned_pages && PageHWPoison(page)) 253 /* A HWPoisoned page cannot be also PageBuddy */ 254 pfn++; 255 else 256 break; 257 } 258 259 return pfn; 260 } 261 262 /* Caller should ensure that requested range is in a single zone */ 263 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, 264 bool skip_hwpoisoned_pages) 265 { 266 unsigned long pfn, flags; 267 struct page *page; 268 struct zone *zone; 269 270 /* 271 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages 272 * are not aligned to pageblock_nr_pages. 273 * Then we just check migratetype first. 274 */ 275 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 276 page = __first_valid_page(pfn, pageblock_nr_pages); 277 if (page && !is_migrate_isolate_page(page)) 278 break; 279 } 280 page = __first_valid_page(start_pfn, end_pfn - start_pfn); 281 if ((pfn < end_pfn) || !page) 282 return -EBUSY; 283 /* Check all pages are free or marked as ISOLATED */ 284 zone = page_zone(page); 285 spin_lock_irqsave(&zone->lock, flags); 286 pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, 287 skip_hwpoisoned_pages); 288 spin_unlock_irqrestore(&zone->lock, flags); 289 290 trace_test_pages_isolated(start_pfn, end_pfn, pfn); 291 292 return pfn < end_pfn ? -EBUSY : 0; 293 } 294 295 struct page *alloc_migrate_target(struct page *page, unsigned long private, 296 int **resultp) 297 { 298 return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); 299 } 300