xref: /openbmc/linux/mm/highmem.c (revision 1b69c6d0ae90b7f1a4f61d5c8209d5cb7a55f849)
1  /*
2   * High memory handling common code and variables.
3   *
4   * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
5   *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
6   *
7   *
8   * Redesigned the x86 32-bit VM architecture to deal with
9   * 64-bit physical space. With current x86 CPUs this
10   * means up to 64 Gigabytes physical RAM.
11   *
12   * Rewrote high memory support to move the page cache into
13   * high memory. Implemented permanent (schedulable) kmaps
14   * based on Linus' idea.
15   *
16   * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
17   */
18  
19  #include <linux/mm.h>
20  #include <linux/export.h>
21  #include <linux/swap.h>
22  #include <linux/bio.h>
23  #include <linux/pagemap.h>
24  #include <linux/mempool.h>
25  #include <linux/blkdev.h>
26  #include <linux/init.h>
27  #include <linux/hash.h>
28  #include <linux/highmem.h>
29  #include <linux/kgdb.h>
30  #include <asm/tlbflush.h>
31  
32  
33  #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
34  DEFINE_PER_CPU(int, __kmap_atomic_idx);
35  #endif
36  
37  /*
38   * Virtual_count is not a pure "count".
39   *  0 means that it is not mapped, and has not been mapped
40   *    since a TLB flush - it is usable.
41   *  1 means that there are no users, but it has been mapped
42   *    since the last TLB flush - so we can't use it.
43   *  n means that there are (n-1) current users of it.
44   */
45  #ifdef CONFIG_HIGHMEM
46  
47  /*
48   * Architecture with aliasing data cache may define the following family of
49   * helper functions in its asm/highmem.h to control cache color of virtual
50   * addresses where physical memory pages are mapped by kmap.
51   */
52  #ifndef get_pkmap_color
53  
54  /*
55   * Determine color of virtual address where the page should be mapped.
56   */
57  static inline unsigned int get_pkmap_color(struct page *page)
58  {
59  	return 0;
60  }
61  #define get_pkmap_color get_pkmap_color
62  
63  /*
64   * Get next index for mapping inside PKMAP region for page with given color.
65   */
66  static inline unsigned int get_next_pkmap_nr(unsigned int color)
67  {
68  	static unsigned int last_pkmap_nr;
69  
70  	last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
71  	return last_pkmap_nr;
72  }
73  
74  /*
75   * Determine if page index inside PKMAP region (pkmap_nr) of given color
76   * has wrapped around PKMAP region end. When this happens an attempt to
77   * flush all unused PKMAP slots is made.
78   */
79  static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
80  {
81  	return pkmap_nr == 0;
82  }
83  
84  /*
85   * Get the number of PKMAP entries of the given color. If no free slot is
86   * found after checking that many entries, kmap will sleep waiting for
87   * someone to call kunmap and free PKMAP slot.
88   */
89  static inline int get_pkmap_entries_count(unsigned int color)
90  {
91  	return LAST_PKMAP;
92  }
93  
94  /*
95   * Get head of a wait queue for PKMAP entries of the given color.
96   * Wait queues for different mapping colors should be independent to avoid
97   * unnecessary wakeups caused by freeing of slots of other colors.
98   */
99  static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
100  {
101  	static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
102  
103  	return &pkmap_map_wait;
104  }
105  #endif
106  
107  unsigned long totalhigh_pages __read_mostly;
108  EXPORT_SYMBOL(totalhigh_pages);
109  
110  
111  EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
112  
113  unsigned int nr_free_highpages (void)
114  {
115  	pg_data_t *pgdat;
116  	unsigned int pages = 0;
117  
118  	for_each_online_pgdat(pgdat) {
119  		pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
120  			NR_FREE_PAGES);
121  		if (zone_movable_is_highmem())
122  			pages += zone_page_state(
123  					&pgdat->node_zones[ZONE_MOVABLE],
124  					NR_FREE_PAGES);
125  	}
126  
127  	return pages;
128  }
129  
130  static int pkmap_count[LAST_PKMAP];
131  static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
132  
133  pte_t * pkmap_page_table;
134  
135  /*
136   * Most architectures have no use for kmap_high_get(), so let's abstract
137   * the disabling of IRQ out of the locking in that case to save on a
138   * potential useless overhead.
139   */
140  #ifdef ARCH_NEEDS_KMAP_HIGH_GET
141  #define lock_kmap()             spin_lock_irq(&kmap_lock)
142  #define unlock_kmap()           spin_unlock_irq(&kmap_lock)
143  #define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
144  #define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
145  #else
146  #define lock_kmap()             spin_lock(&kmap_lock)
147  #define unlock_kmap()           spin_unlock(&kmap_lock)
148  #define lock_kmap_any(flags)    \
149  		do { spin_lock(&kmap_lock); (void)(flags); } while (0)
150  #define unlock_kmap_any(flags)  \
151  		do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
152  #endif
153  
154  struct page *kmap_to_page(void *vaddr)
155  {
156  	unsigned long addr = (unsigned long)vaddr;
157  
158  	if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
159  		int i = PKMAP_NR(addr);
160  		return pte_page(pkmap_page_table[i]);
161  	}
162  
163  	return virt_to_page(addr);
164  }
165  EXPORT_SYMBOL(kmap_to_page);
166  
167  static void flush_all_zero_pkmaps(void)
168  {
169  	int i;
170  	int need_flush = 0;
171  
172  	flush_cache_kmaps();
173  
174  	for (i = 0; i < LAST_PKMAP; i++) {
175  		struct page *page;
176  
177  		/*
178  		 * zero means we don't have anything to do,
179  		 * >1 means that it is still in use. Only
180  		 * a count of 1 means that it is free but
181  		 * needs to be unmapped
182  		 */
183  		if (pkmap_count[i] != 1)
184  			continue;
185  		pkmap_count[i] = 0;
186  
187  		/* sanity check */
188  		BUG_ON(pte_none(pkmap_page_table[i]));
189  
190  		/*
191  		 * Don't need an atomic fetch-and-clear op here;
192  		 * no-one has the page mapped, and cannot get at
193  		 * its virtual address (and hence PTE) without first
194  		 * getting the kmap_lock (which is held here).
195  		 * So no dangers, even with speculative execution.
196  		 */
197  		page = pte_page(pkmap_page_table[i]);
198  		pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]);
199  
200  		set_page_address(page, NULL);
201  		need_flush = 1;
202  	}
203  	if (need_flush)
204  		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
205  }
206  
207  /**
208   * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
209   */
210  void kmap_flush_unused(void)
211  {
212  	lock_kmap();
213  	flush_all_zero_pkmaps();
214  	unlock_kmap();
215  }
216  
217  static inline unsigned long map_new_virtual(struct page *page)
218  {
219  	unsigned long vaddr;
220  	int count;
221  	unsigned int last_pkmap_nr;
222  	unsigned int color = get_pkmap_color(page);
223  
224  start:
225  	count = get_pkmap_entries_count(color);
226  	/* Find an empty entry */
227  	for (;;) {
228  		last_pkmap_nr = get_next_pkmap_nr(color);
229  		if (no_more_pkmaps(last_pkmap_nr, color)) {
230  			flush_all_zero_pkmaps();
231  			count = get_pkmap_entries_count(color);
232  		}
233  		if (!pkmap_count[last_pkmap_nr])
234  			break;	/* Found a usable entry */
235  		if (--count)
236  			continue;
237  
238  		/*
239  		 * Sleep for somebody else to unmap their entries
240  		 */
241  		{
242  			DECLARE_WAITQUEUE(wait, current);
243  			wait_queue_head_t *pkmap_map_wait =
244  				get_pkmap_wait_queue_head(color);
245  
246  			__set_current_state(TASK_UNINTERRUPTIBLE);
247  			add_wait_queue(pkmap_map_wait, &wait);
248  			unlock_kmap();
249  			schedule();
250  			remove_wait_queue(pkmap_map_wait, &wait);
251  			lock_kmap();
252  
253  			/* Somebody else might have mapped it while we slept */
254  			if (page_address(page))
255  				return (unsigned long)page_address(page);
256  
257  			/* Re-start */
258  			goto start;
259  		}
260  	}
261  	vaddr = PKMAP_ADDR(last_pkmap_nr);
262  	set_pte_at(&init_mm, vaddr,
263  		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
264  
265  	pkmap_count[last_pkmap_nr] = 1;
266  	set_page_address(page, (void *)vaddr);
267  
268  	return vaddr;
269  }
270  
271  /**
272   * kmap_high - map a highmem page into memory
273   * @page: &struct page to map
274   *
275   * Returns the page's virtual memory address.
276   *
277   * We cannot call this from interrupts, as it may block.
278   */
279  void *kmap_high(struct page *page)
280  {
281  	unsigned long vaddr;
282  
283  	/*
284  	 * For highmem pages, we can't trust "virtual" until
285  	 * after we have the lock.
286  	 */
287  	lock_kmap();
288  	vaddr = (unsigned long)page_address(page);
289  	if (!vaddr)
290  		vaddr = map_new_virtual(page);
291  	pkmap_count[PKMAP_NR(vaddr)]++;
292  	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
293  	unlock_kmap();
294  	return (void*) vaddr;
295  }
296  
297  EXPORT_SYMBOL(kmap_high);
298  
299  #ifdef ARCH_NEEDS_KMAP_HIGH_GET
300  /**
301   * kmap_high_get - pin a highmem page into memory
302   * @page: &struct page to pin
303   *
304   * Returns the page's current virtual memory address, or NULL if no mapping
305   * exists.  If and only if a non null address is returned then a
306   * matching call to kunmap_high() is necessary.
307   *
308   * This can be called from any context.
309   */
310  void *kmap_high_get(struct page *page)
311  {
312  	unsigned long vaddr, flags;
313  
314  	lock_kmap_any(flags);
315  	vaddr = (unsigned long)page_address(page);
316  	if (vaddr) {
317  		BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1);
318  		pkmap_count[PKMAP_NR(vaddr)]++;
319  	}
320  	unlock_kmap_any(flags);
321  	return (void*) vaddr;
322  }
323  #endif
324  
325  /**
326   * kunmap_high - unmap a highmem page into memory
327   * @page: &struct page to unmap
328   *
329   * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
330   * only from user context.
331   */
332  void kunmap_high(struct page *page)
333  {
334  	unsigned long vaddr;
335  	unsigned long nr;
336  	unsigned long flags;
337  	int need_wakeup;
338  	unsigned int color = get_pkmap_color(page);
339  	wait_queue_head_t *pkmap_map_wait;
340  
341  	lock_kmap_any(flags);
342  	vaddr = (unsigned long)page_address(page);
343  	BUG_ON(!vaddr);
344  	nr = PKMAP_NR(vaddr);
345  
346  	/*
347  	 * A count must never go down to zero
348  	 * without a TLB flush!
349  	 */
350  	need_wakeup = 0;
351  	switch (--pkmap_count[nr]) {
352  	case 0:
353  		BUG();
354  	case 1:
355  		/*
356  		 * Avoid an unnecessary wake_up() function call.
357  		 * The common case is pkmap_count[] == 1, but
358  		 * no waiters.
359  		 * The tasks queued in the wait-queue are guarded
360  		 * by both the lock in the wait-queue-head and by
361  		 * the kmap_lock.  As the kmap_lock is held here,
362  		 * no need for the wait-queue-head's lock.  Simply
363  		 * test if the queue is empty.
364  		 */
365  		pkmap_map_wait = get_pkmap_wait_queue_head(color);
366  		need_wakeup = waitqueue_active(pkmap_map_wait);
367  	}
368  	unlock_kmap_any(flags);
369  
370  	/* do wake-up, if needed, race-free outside of the spin lock */
371  	if (need_wakeup)
372  		wake_up(pkmap_map_wait);
373  }
374  
375  EXPORT_SYMBOL(kunmap_high);
376  #endif
377  
378  #if defined(HASHED_PAGE_VIRTUAL)
379  
380  #define PA_HASH_ORDER	7
381  
382  /*
383   * Describes one page->virtual association
384   */
385  struct page_address_map {
386  	struct page *page;
387  	void *virtual;
388  	struct list_head list;
389  };
390  
391  static struct page_address_map page_address_maps[LAST_PKMAP];
392  
393  /*
394   * Hash table bucket
395   */
396  static struct page_address_slot {
397  	struct list_head lh;			/* List of page_address_maps */
398  	spinlock_t lock;			/* Protect this bucket's list */
399  } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
400  
401  static struct page_address_slot *page_slot(const struct page *page)
402  {
403  	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
404  }
405  
406  /**
407   * page_address - get the mapped virtual address of a page
408   * @page: &struct page to get the virtual address of
409   *
410   * Returns the page's virtual address.
411   */
412  void *page_address(const struct page *page)
413  {
414  	unsigned long flags;
415  	void *ret;
416  	struct page_address_slot *pas;
417  
418  	if (!PageHighMem(page))
419  		return lowmem_page_address(page);
420  
421  	pas = page_slot(page);
422  	ret = NULL;
423  	spin_lock_irqsave(&pas->lock, flags);
424  	if (!list_empty(&pas->lh)) {
425  		struct page_address_map *pam;
426  
427  		list_for_each_entry(pam, &pas->lh, list) {
428  			if (pam->page == page) {
429  				ret = pam->virtual;
430  				goto done;
431  			}
432  		}
433  	}
434  done:
435  	spin_unlock_irqrestore(&pas->lock, flags);
436  	return ret;
437  }
438  
439  EXPORT_SYMBOL(page_address);
440  
441  /**
442   * set_page_address - set a page's virtual address
443   * @page: &struct page to set
444   * @virtual: virtual address to use
445   */
446  void set_page_address(struct page *page, void *virtual)
447  {
448  	unsigned long flags;
449  	struct page_address_slot *pas;
450  	struct page_address_map *pam;
451  
452  	BUG_ON(!PageHighMem(page));
453  
454  	pas = page_slot(page);
455  	if (virtual) {		/* Add */
456  		pam = &page_address_maps[PKMAP_NR((unsigned long)virtual)];
457  		pam->page = page;
458  		pam->virtual = virtual;
459  
460  		spin_lock_irqsave(&pas->lock, flags);
461  		list_add_tail(&pam->list, &pas->lh);
462  		spin_unlock_irqrestore(&pas->lock, flags);
463  	} else {		/* Remove */
464  		spin_lock_irqsave(&pas->lock, flags);
465  		list_for_each_entry(pam, &pas->lh, list) {
466  			if (pam->page == page) {
467  				list_del(&pam->list);
468  				spin_unlock_irqrestore(&pas->lock, flags);
469  				goto done;
470  			}
471  		}
472  		spin_unlock_irqrestore(&pas->lock, flags);
473  	}
474  done:
475  	return;
476  }
477  
478  void __init page_address_init(void)
479  {
480  	int i;
481  
482  	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
483  		INIT_LIST_HEAD(&page_address_htable[i].lh);
484  		spin_lock_init(&page_address_htable[i].lock);
485  	}
486  }
487  
488  #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
489