xref: /openbmc/linux/drivers/xen/grant-table.c (revision 089a49b6)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35 
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/mm.h>
39 #include <linux/slab.h>
40 #include <linux/vmalloc.h>
41 #include <linux/uaccess.h>
42 #include <linux/io.h>
43 #include <linux/delay.h>
44 #include <linux/hardirq.h>
45 
46 #include <xen/xen.h>
47 #include <xen/interface/xen.h>
48 #include <xen/page.h>
49 #include <xen/grant_table.h>
50 #include <xen/interface/memory.h>
51 #include <xen/hvc-console.h>
52 #include <asm/xen/hypercall.h>
53 #include <asm/xen/interface.h>
54 
55 #include <asm/pgtable.h>
56 #include <asm/sync_bitops.h>
57 
58 /* External tools reserve first few grant table entries. */
59 #define NR_RESERVED_ENTRIES 8
60 #define GNTTAB_LIST_END 0xffffffff
61 
62 static grant_ref_t **gnttab_list;
63 static unsigned int nr_grant_frames;
64 static unsigned int boot_max_nr_grant_frames;
65 static int gnttab_free_count;
66 static grant_ref_t gnttab_free_head;
67 static DEFINE_SPINLOCK(gnttab_list_lock);
68 unsigned long xen_hvm_resume_frames;
69 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
70 
71 static union {
72 	struct grant_entry_v1 *v1;
73 	union grant_entry_v2 *v2;
74 	void *addr;
75 } gnttab_shared;
76 
77 /*This is a structure of function pointers for grant table*/
78 struct gnttab_ops {
79 	/*
80 	 * Mapping a list of frames for storing grant entries. Frames parameter
81 	 * is used to store grant table address when grant table being setup,
82 	 * nr_gframes is the number of frames to map grant table. Returning
83 	 * GNTST_okay means success and negative value means failure.
84 	 */
85 	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
86 	/*
87 	 * Release a list of frames which are mapped in map_frames for grant
88 	 * entry status.
89 	 */
90 	void (*unmap_frames)(void);
91 	/*
92 	 * Introducing a valid entry into the grant table, granting the frame of
93 	 * this grant entry to domain for accessing or transfering. Ref
94 	 * parameter is reference of this introduced grant entry, domid is id of
95 	 * granted domain, frame is the page frame to be granted, and flags is
96 	 * status of the grant entry to be updated.
97 	 */
98 	void (*update_entry)(grant_ref_t ref, domid_t domid,
99 			     unsigned long frame, unsigned flags);
100 	/*
101 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
102 	 * reference of a grant entry whose grant access will be stopped,
103 	 * readonly is not in use in this function. If the grant entry is
104 	 * currently mapped for reading or writing, just return failure(==0)
105 	 * directly and don't tear down the grant access. Otherwise, stop grant
106 	 * access for this entry and return success(==1).
107 	 */
108 	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
109 	/*
110 	 * Stop granting a grant entry to domain for transfer. Ref parameter is
111 	 * reference of a grant entry whose grant transfer will be stopped. If
112 	 * tranfer has not started, just reclaim the grant entry and return
113 	 * failure(==0). Otherwise, wait for the transfer to complete and then
114 	 * return the frame.
115 	 */
116 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
117 	/*
118 	 * Query the status of a grant entry. Ref parameter is reference of
119 	 * queried grant entry, return value is the status of queried entry.
120 	 * Detailed status(writing/reading) can be gotten from the return value
121 	 * by bit operations.
122 	 */
123 	int (*query_foreign_access)(grant_ref_t ref);
124 	/*
125 	 * Grant a domain to access a range of bytes within the page referred by
126 	 * an available grant entry. Ref parameter is reference of a grant entry
127 	 * which will be sub-page accessed, domid is id of grantee domain, frame
128 	 * is frame address of subpage grant, flags is grant type and flag
129 	 * information, page_off is offset of the range of bytes, and length is
130 	 * length of bytes to be accessed.
131 	 */
132 	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
133 				     unsigned long frame, int flags,
134 				     unsigned page_off, unsigned length);
135 	/*
136 	 * Redirect an available grant entry on domain A to another grant
137 	 * reference of domain B, then allow domain C to use grant reference
138 	 * of domain B transitively. Ref parameter is an available grant entry
139 	 * reference on domain A, domid is id of domain C which accesses grant
140 	 * entry transitively, flags is grant type and flag information,
141 	 * trans_domid is id of domain B whose grant entry is finally accessed
142 	 * transitively, trans_gref is grant entry transitive reference of
143 	 * domain B.
144 	 */
145 	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
146 				   domid_t trans_domid, grant_ref_t trans_gref);
147 };
148 
149 static struct gnttab_ops *gnttab_interface;
150 
151 /*This reflects status of grant entries, so act as a global value*/
152 static grant_status_t *grstatus;
153 
154 static int grant_table_version;
155 static int grefs_per_grant_frame;
156 
157 static struct gnttab_free_callback *gnttab_free_callback_list;
158 
159 static int gnttab_expand(unsigned int req_entries);
160 
161 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
162 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
163 
164 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
165 {
166 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
167 }
168 /* This can be used as an l-value */
169 #define gnttab_entry(entry) (*__gnttab_entry(entry))
170 
171 static int get_free_entries(unsigned count)
172 {
173 	unsigned long flags;
174 	int ref, rc = 0;
175 	grant_ref_t head;
176 
177 	spin_lock_irqsave(&gnttab_list_lock, flags);
178 
179 	if ((gnttab_free_count < count) &&
180 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
181 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
182 		return rc;
183 	}
184 
185 	ref = head = gnttab_free_head;
186 	gnttab_free_count -= count;
187 	while (count-- > 1)
188 		head = gnttab_entry(head);
189 	gnttab_free_head = gnttab_entry(head);
190 	gnttab_entry(head) = GNTTAB_LIST_END;
191 
192 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
193 
194 	return ref;
195 }
196 
197 static void do_free_callbacks(void)
198 {
199 	struct gnttab_free_callback *callback, *next;
200 
201 	callback = gnttab_free_callback_list;
202 	gnttab_free_callback_list = NULL;
203 
204 	while (callback != NULL) {
205 		next = callback->next;
206 		if (gnttab_free_count >= callback->count) {
207 			callback->next = NULL;
208 			callback->fn(callback->arg);
209 		} else {
210 			callback->next = gnttab_free_callback_list;
211 			gnttab_free_callback_list = callback;
212 		}
213 		callback = next;
214 	}
215 }
216 
217 static inline void check_free_callbacks(void)
218 {
219 	if (unlikely(gnttab_free_callback_list))
220 		do_free_callbacks();
221 }
222 
223 static void put_free_entry(grant_ref_t ref)
224 {
225 	unsigned long flags;
226 	spin_lock_irqsave(&gnttab_list_lock, flags);
227 	gnttab_entry(ref) = gnttab_free_head;
228 	gnttab_free_head = ref;
229 	gnttab_free_count++;
230 	check_free_callbacks();
231 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
232 }
233 
234 /*
235  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
236  * Introducing a valid entry into the grant table:
237  *  1. Write ent->domid.
238  *  2. Write ent->frame:
239  *      GTF_permit_access:   Frame to which access is permitted.
240  *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
241  *                           frame, or zero if none.
242  *  3. Write memory barrier (WMB).
243  *  4. Write ent->flags, inc. valid type.
244  */
245 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
246 				   unsigned long frame, unsigned flags)
247 {
248 	gnttab_shared.v1[ref].domid = domid;
249 	gnttab_shared.v1[ref].frame = frame;
250 	wmb();
251 	gnttab_shared.v1[ref].flags = flags;
252 }
253 
254 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
255 				   unsigned long frame, unsigned flags)
256 {
257 	gnttab_shared.v2[ref].hdr.domid = domid;
258 	gnttab_shared.v2[ref].full_page.frame = frame;
259 	wmb();
260 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
261 }
262 
263 /*
264  * Public grant-issuing interface functions
265  */
266 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
267 				     unsigned long frame, int readonly)
268 {
269 	gnttab_interface->update_entry(ref, domid, frame,
270 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
271 }
272 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
273 
274 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
275 				int readonly)
276 {
277 	int ref;
278 
279 	ref = get_free_entries(1);
280 	if (unlikely(ref < 0))
281 		return -ENOSPC;
282 
283 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
284 
285 	return ref;
286 }
287 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
288 
289 static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
290 					   unsigned long frame, int flags,
291 					   unsigned page_off, unsigned length)
292 {
293 	gnttab_shared.v2[ref].sub_page.frame = frame;
294 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
295 	gnttab_shared.v2[ref].sub_page.length = length;
296 	gnttab_shared.v2[ref].hdr.domid = domid;
297 	wmb();
298 	gnttab_shared.v2[ref].hdr.flags =
299 				GTF_permit_access | GTF_sub_page | flags;
300 }
301 
302 int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
303 					    unsigned long frame, int flags,
304 					    unsigned page_off,
305 					    unsigned length)
306 {
307 	if (flags & (GTF_accept_transfer | GTF_reading |
308 		     GTF_writing | GTF_transitive))
309 		return -EPERM;
310 
311 	if (gnttab_interface->update_subpage_entry == NULL)
312 		return -ENOSYS;
313 
314 	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
315 					       page_off, length);
316 
317 	return 0;
318 }
319 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
320 
321 int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
322 					int flags, unsigned page_off,
323 					unsigned length)
324 {
325 	int ref, rc;
326 
327 	ref = get_free_entries(1);
328 	if (unlikely(ref < 0))
329 		return -ENOSPC;
330 
331 	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
332 						     page_off, length);
333 	if (rc < 0) {
334 		put_free_entry(ref);
335 		return rc;
336 	}
337 
338 	return ref;
339 }
340 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
341 
342 bool gnttab_subpage_grants_available(void)
343 {
344 	return gnttab_interface->update_subpage_entry != NULL;
345 }
346 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
347 
348 static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
349 					 int flags, domid_t trans_domid,
350 					 grant_ref_t trans_gref)
351 {
352 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
353 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
354 	gnttab_shared.v2[ref].hdr.domid = domid;
355 	wmb();
356 	gnttab_shared.v2[ref].hdr.flags =
357 				GTF_permit_access | GTF_transitive | flags;
358 }
359 
360 int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
361 					  int flags, domid_t trans_domid,
362 					  grant_ref_t trans_gref)
363 {
364 	if (flags & (GTF_accept_transfer | GTF_reading |
365 		     GTF_writing | GTF_sub_page))
366 		return -EPERM;
367 
368 	if (gnttab_interface->update_trans_entry == NULL)
369 		return -ENOSYS;
370 
371 	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
372 					     trans_gref);
373 
374 	return 0;
375 }
376 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
377 
378 int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
379 				      domid_t trans_domid,
380 				      grant_ref_t trans_gref)
381 {
382 	int ref, rc;
383 
384 	ref = get_free_entries(1);
385 	if (unlikely(ref < 0))
386 		return -ENOSPC;
387 
388 	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
389 						   trans_domid, trans_gref);
390 	if (rc < 0) {
391 		put_free_entry(ref);
392 		return rc;
393 	}
394 
395 	return ref;
396 }
397 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
398 
399 bool gnttab_trans_grants_available(void)
400 {
401 	return gnttab_interface->update_trans_entry != NULL;
402 }
403 EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
404 
405 static int gnttab_query_foreign_access_v1(grant_ref_t ref)
406 {
407 	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
408 }
409 
410 static int gnttab_query_foreign_access_v2(grant_ref_t ref)
411 {
412 	return grstatus[ref] & (GTF_reading|GTF_writing);
413 }
414 
415 int gnttab_query_foreign_access(grant_ref_t ref)
416 {
417 	return gnttab_interface->query_foreign_access(ref);
418 }
419 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
420 
421 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
422 {
423 	u16 flags, nflags;
424 	u16 *pflags;
425 
426 	pflags = &gnttab_shared.v1[ref].flags;
427 	nflags = *pflags;
428 	do {
429 		flags = nflags;
430 		if (flags & (GTF_reading|GTF_writing))
431 			return 0;
432 	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
433 
434 	return 1;
435 }
436 
437 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
438 {
439 	gnttab_shared.v2[ref].hdr.flags = 0;
440 	mb();
441 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
442 		return 0;
443 	} else {
444 		/* The read of grstatus needs to have acquire
445 		semantics.  On x86, reads already have
446 		that, and we just need to protect against
447 		compiler reorderings.  On other
448 		architectures we may need a full
449 		barrier. */
450 #ifdef CONFIG_X86
451 		barrier();
452 #else
453 		mb();
454 #endif
455 	}
456 
457 	return 1;
458 }
459 
460 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
461 {
462 	return gnttab_interface->end_foreign_access_ref(ref, readonly);
463 }
464 
465 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
466 {
467 	if (_gnttab_end_foreign_access_ref(ref, readonly))
468 		return 1;
469 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
470 	return 0;
471 }
472 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
473 
474 struct deferred_entry {
475 	struct list_head list;
476 	grant_ref_t ref;
477 	bool ro;
478 	uint16_t warn_delay;
479 	struct page *page;
480 };
481 static LIST_HEAD(deferred_list);
482 static void gnttab_handle_deferred(unsigned long);
483 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
484 
485 static void gnttab_handle_deferred(unsigned long unused)
486 {
487 	unsigned int nr = 10;
488 	struct deferred_entry *first = NULL;
489 	unsigned long flags;
490 
491 	spin_lock_irqsave(&gnttab_list_lock, flags);
492 	while (nr--) {
493 		struct deferred_entry *entry
494 			= list_first_entry(&deferred_list,
495 					   struct deferred_entry, list);
496 
497 		if (entry == first)
498 			break;
499 		list_del(&entry->list);
500 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
501 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
502 			put_free_entry(entry->ref);
503 			if (entry->page) {
504 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
505 					 entry->ref, page_to_pfn(entry->page));
506 				__free_page(entry->page);
507 			} else
508 				pr_info("freeing g.e. %#x\n", entry->ref);
509 			kfree(entry);
510 			entry = NULL;
511 		} else {
512 			if (!--entry->warn_delay)
513 				pr_info("g.e. %#x still pending\n", entry->ref);
514 			if (!first)
515 				first = entry;
516 		}
517 		spin_lock_irqsave(&gnttab_list_lock, flags);
518 		if (entry)
519 			list_add_tail(&entry->list, &deferred_list);
520 		else if (list_empty(&deferred_list))
521 			break;
522 	}
523 	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
524 		deferred_timer.expires = jiffies + HZ;
525 		add_timer(&deferred_timer);
526 	}
527 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
528 }
529 
530 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
531 				struct page *page)
532 {
533 	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
534 	const char *what = KERN_WARNING "leaking";
535 
536 	if (entry) {
537 		unsigned long flags;
538 
539 		entry->ref = ref;
540 		entry->ro = readonly;
541 		entry->page = page;
542 		entry->warn_delay = 60;
543 		spin_lock_irqsave(&gnttab_list_lock, flags);
544 		list_add_tail(&entry->list, &deferred_list);
545 		if (!timer_pending(&deferred_timer)) {
546 			deferred_timer.expires = jiffies + HZ;
547 			add_timer(&deferred_timer);
548 		}
549 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
550 		what = KERN_DEBUG "deferring";
551 	}
552 	printk("%s g.e. %#x (pfn %#lx)\n",
553 	       what, ref, page ? page_to_pfn(page) : -1);
554 }
555 
556 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
557 			       unsigned long page)
558 {
559 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
560 		put_free_entry(ref);
561 		if (page != 0)
562 			free_page(page);
563 	} else
564 		gnttab_add_deferred(ref, readonly,
565 				    page ? virt_to_page(page) : NULL);
566 }
567 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
568 
569 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
570 {
571 	int ref;
572 
573 	ref = get_free_entries(1);
574 	if (unlikely(ref < 0))
575 		return -ENOSPC;
576 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
577 
578 	return ref;
579 }
580 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
581 
582 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
583 				       unsigned long pfn)
584 {
585 	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
586 }
587 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
588 
589 static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
590 {
591 	unsigned long frame;
592 	u16           flags;
593 	u16          *pflags;
594 
595 	pflags = &gnttab_shared.v1[ref].flags;
596 
597 	/*
598 	 * If a transfer is not even yet started, try to reclaim the grant
599 	 * reference and return failure (== 0).
600 	 */
601 	while (!((flags = *pflags) & GTF_transfer_committed)) {
602 		if (sync_cmpxchg(pflags, flags, 0) == flags)
603 			return 0;
604 		cpu_relax();
605 	}
606 
607 	/* If a transfer is in progress then wait until it is completed. */
608 	while (!(flags & GTF_transfer_completed)) {
609 		flags = *pflags;
610 		cpu_relax();
611 	}
612 
613 	rmb();	/* Read the frame number /after/ reading completion status. */
614 	frame = gnttab_shared.v1[ref].frame;
615 	BUG_ON(frame == 0);
616 
617 	return frame;
618 }
619 
620 static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
621 {
622 	unsigned long frame;
623 	u16           flags;
624 	u16          *pflags;
625 
626 	pflags = &gnttab_shared.v2[ref].hdr.flags;
627 
628 	/*
629 	 * If a transfer is not even yet started, try to reclaim the grant
630 	 * reference and return failure (== 0).
631 	 */
632 	while (!((flags = *pflags) & GTF_transfer_committed)) {
633 		if (sync_cmpxchg(pflags, flags, 0) == flags)
634 			return 0;
635 		cpu_relax();
636 	}
637 
638 	/* If a transfer is in progress then wait until it is completed. */
639 	while (!(flags & GTF_transfer_completed)) {
640 		flags = *pflags;
641 		cpu_relax();
642 	}
643 
644 	rmb();  /* Read the frame number /after/ reading completion status. */
645 	frame = gnttab_shared.v2[ref].full_page.frame;
646 	BUG_ON(frame == 0);
647 
648 	return frame;
649 }
650 
651 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
652 {
653 	return gnttab_interface->end_foreign_transfer_ref(ref);
654 }
655 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
656 
657 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
658 {
659 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
660 	put_free_entry(ref);
661 	return frame;
662 }
663 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
664 
665 void gnttab_free_grant_reference(grant_ref_t ref)
666 {
667 	put_free_entry(ref);
668 }
669 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
670 
671 void gnttab_free_grant_references(grant_ref_t head)
672 {
673 	grant_ref_t ref;
674 	unsigned long flags;
675 	int count = 1;
676 	if (head == GNTTAB_LIST_END)
677 		return;
678 	spin_lock_irqsave(&gnttab_list_lock, flags);
679 	ref = head;
680 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
681 		ref = gnttab_entry(ref);
682 		count++;
683 	}
684 	gnttab_entry(ref) = gnttab_free_head;
685 	gnttab_free_head = head;
686 	gnttab_free_count += count;
687 	check_free_callbacks();
688 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
689 }
690 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
691 
692 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
693 {
694 	int h = get_free_entries(count);
695 
696 	if (h < 0)
697 		return -ENOSPC;
698 
699 	*head = h;
700 
701 	return 0;
702 }
703 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
704 
705 int gnttab_empty_grant_references(const grant_ref_t *private_head)
706 {
707 	return (*private_head == GNTTAB_LIST_END);
708 }
709 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
710 
711 int gnttab_claim_grant_reference(grant_ref_t *private_head)
712 {
713 	grant_ref_t g = *private_head;
714 	if (unlikely(g == GNTTAB_LIST_END))
715 		return -ENOSPC;
716 	*private_head = gnttab_entry(g);
717 	return g;
718 }
719 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
720 
721 void gnttab_release_grant_reference(grant_ref_t *private_head,
722 				    grant_ref_t release)
723 {
724 	gnttab_entry(release) = *private_head;
725 	*private_head = release;
726 }
727 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
728 
729 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
730 				  void (*fn)(void *), void *arg, u16 count)
731 {
732 	unsigned long flags;
733 	struct gnttab_free_callback *cb;
734 
735 	spin_lock_irqsave(&gnttab_list_lock, flags);
736 
737 	/* Check if the callback is already on the list */
738 	cb = gnttab_free_callback_list;
739 	while (cb) {
740 		if (cb == callback)
741 			goto out;
742 		cb = cb->next;
743 	}
744 
745 	callback->fn = fn;
746 	callback->arg = arg;
747 	callback->count = count;
748 	callback->next = gnttab_free_callback_list;
749 	gnttab_free_callback_list = callback;
750 	check_free_callbacks();
751 out:
752 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
753 }
754 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
755 
756 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
757 {
758 	struct gnttab_free_callback **pcb;
759 	unsigned long flags;
760 
761 	spin_lock_irqsave(&gnttab_list_lock, flags);
762 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
763 		if (*pcb == callback) {
764 			*pcb = callback->next;
765 			break;
766 		}
767 	}
768 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
769 }
770 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
771 
772 static int grow_gnttab_list(unsigned int more_frames)
773 {
774 	unsigned int new_nr_grant_frames, extra_entries, i;
775 	unsigned int nr_glist_frames, new_nr_glist_frames;
776 
777 	BUG_ON(grefs_per_grant_frame == 0);
778 
779 	new_nr_grant_frames = nr_grant_frames + more_frames;
780 	extra_entries       = more_frames * grefs_per_grant_frame;
781 
782 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
783 	new_nr_glist_frames =
784 		(new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
785 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
786 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
787 		if (!gnttab_list[i])
788 			goto grow_nomem;
789 	}
790 
791 
792 	for (i = grefs_per_grant_frame * nr_grant_frames;
793 	     i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
794 		gnttab_entry(i) = i + 1;
795 
796 	gnttab_entry(i) = gnttab_free_head;
797 	gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
798 	gnttab_free_count += extra_entries;
799 
800 	nr_grant_frames = new_nr_grant_frames;
801 
802 	check_free_callbacks();
803 
804 	return 0;
805 
806 grow_nomem:
807 	for ( ; i >= nr_glist_frames; i--)
808 		free_page((unsigned long) gnttab_list[i]);
809 	return -ENOMEM;
810 }
811 
812 static unsigned int __max_nr_grant_frames(void)
813 {
814 	struct gnttab_query_size query;
815 	int rc;
816 
817 	query.dom = DOMID_SELF;
818 
819 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
820 	if ((rc < 0) || (query.status != GNTST_okay))
821 		return 4; /* Legacy max supported number of frames */
822 
823 	return query.max_nr_frames;
824 }
825 
826 unsigned int gnttab_max_grant_frames(void)
827 {
828 	unsigned int xen_max = __max_nr_grant_frames();
829 
830 	if (xen_max > boot_max_nr_grant_frames)
831 		return boot_max_nr_grant_frames;
832 	return xen_max;
833 }
834 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
835 
836 /* Handling of paged out grant targets (GNTST_eagain) */
837 #define MAX_DELAY 256
838 static inline void
839 gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
840 						const char *func)
841 {
842 	unsigned delay = 1;
843 
844 	do {
845 		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
846 		if (*status == GNTST_eagain)
847 			msleep(delay++);
848 	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
849 
850 	if (delay >= MAX_DELAY) {
851 		pr_err("%s: %s eagain grant\n", func, current->comm);
852 		*status = GNTST_bad_page;
853 	}
854 }
855 
856 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
857 {
858 	struct gnttab_map_grant_ref *op;
859 
860 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
861 		BUG();
862 	for (op = batch; op < batch + count; op++)
863 		if (op->status == GNTST_eagain)
864 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
865 						&op->status, __func__);
866 }
867 EXPORT_SYMBOL_GPL(gnttab_batch_map);
868 
869 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
870 {
871 	struct gnttab_copy *op;
872 
873 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
874 		BUG();
875 	for (op = batch; op < batch + count; op++)
876 		if (op->status == GNTST_eagain)
877 			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
878 						&op->status, __func__);
879 }
880 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
881 
882 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
883 		    struct gnttab_map_grant_ref *kmap_ops,
884 		    struct page **pages, unsigned int count)
885 {
886 	int i, ret;
887 	bool lazy = false;
888 	pte_t *pte;
889 	unsigned long mfn;
890 
891 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
892 	if (ret)
893 		return ret;
894 
895 	/* Retry eagain maps */
896 	for (i = 0; i < count; i++)
897 		if (map_ops[i].status == GNTST_eagain)
898 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
899 						&map_ops[i].status, __func__);
900 
901 	if (xen_feature(XENFEAT_auto_translated_physmap))
902 		return ret;
903 
904 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
905 		arch_enter_lazy_mmu_mode();
906 		lazy = true;
907 	}
908 
909 	for (i = 0; i < count; i++) {
910 		/* Do not add to override if the map failed. */
911 		if (map_ops[i].status)
912 			continue;
913 
914 		if (map_ops[i].flags & GNTMAP_contains_pte) {
915 			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
916 				(map_ops[i].host_addr & ~PAGE_MASK));
917 			mfn = pte_mfn(*pte);
918 		} else {
919 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
920 		}
921 		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
922 				       &kmap_ops[i] : NULL);
923 		if (ret)
924 			return ret;
925 	}
926 
927 	if (lazy)
928 		arch_leave_lazy_mmu_mode();
929 
930 	return ret;
931 }
932 EXPORT_SYMBOL_GPL(gnttab_map_refs);
933 
934 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
935 		      struct gnttab_map_grant_ref *kmap_ops,
936 		      struct page **pages, unsigned int count)
937 {
938 	int i, ret;
939 	bool lazy = false;
940 
941 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
942 	if (ret)
943 		return ret;
944 
945 	if (xen_feature(XENFEAT_auto_translated_physmap))
946 		return ret;
947 
948 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
949 		arch_enter_lazy_mmu_mode();
950 		lazy = true;
951 	}
952 
953 	for (i = 0; i < count; i++) {
954 		ret = m2p_remove_override(pages[i], kmap_ops ?
955 				       &kmap_ops[i] : NULL);
956 		if (ret)
957 			return ret;
958 	}
959 
960 	if (lazy)
961 		arch_leave_lazy_mmu_mode();
962 
963 	return ret;
964 }
965 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
966 
967 static unsigned nr_status_frames(unsigned nr_grant_frames)
968 {
969 	BUG_ON(grefs_per_grant_frame == 0);
970 	return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
971 }
972 
973 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
974 {
975 	int rc;
976 
977 	rc = arch_gnttab_map_shared(frames, nr_gframes,
978 				    gnttab_max_grant_frames(),
979 				    &gnttab_shared.addr);
980 	BUG_ON(rc);
981 
982 	return 0;
983 }
984 
985 static void gnttab_unmap_frames_v1(void)
986 {
987 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
988 }
989 
990 static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
991 {
992 	uint64_t *sframes;
993 	unsigned int nr_sframes;
994 	struct gnttab_get_status_frames getframes;
995 	int rc;
996 
997 	nr_sframes = nr_status_frames(nr_gframes);
998 
999 	/* No need for kzalloc as it is initialized in following hypercall
1000 	 * GNTTABOP_get_status_frames.
1001 	 */
1002 	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
1003 	if (!sframes)
1004 		return -ENOMEM;
1005 
1006 	getframes.dom        = DOMID_SELF;
1007 	getframes.nr_frames  = nr_sframes;
1008 	set_xen_guest_handle(getframes.frame_list, sframes);
1009 
1010 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1011 				       &getframes, 1);
1012 	if (rc == -ENOSYS) {
1013 		kfree(sframes);
1014 		return -ENOSYS;
1015 	}
1016 
1017 	BUG_ON(rc || getframes.status);
1018 
1019 	rc = arch_gnttab_map_status(sframes, nr_sframes,
1020 				    nr_status_frames(gnttab_max_grant_frames()),
1021 				    &grstatus);
1022 	BUG_ON(rc);
1023 	kfree(sframes);
1024 
1025 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1026 				    gnttab_max_grant_frames(),
1027 				    &gnttab_shared.addr);
1028 	BUG_ON(rc);
1029 
1030 	return 0;
1031 }
1032 
1033 static void gnttab_unmap_frames_v2(void)
1034 {
1035 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1036 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1037 }
1038 
1039 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1040 {
1041 	struct gnttab_setup_table setup;
1042 	xen_pfn_t *frames;
1043 	unsigned int nr_gframes = end_idx + 1;
1044 	int rc;
1045 
1046 	if (xen_hvm_domain()) {
1047 		struct xen_add_to_physmap xatp;
1048 		unsigned int i = end_idx;
1049 		rc = 0;
1050 		/*
1051 		 * Loop backwards, so that the first hypercall has the largest
1052 		 * index, ensuring that the table will grow only once.
1053 		 */
1054 		do {
1055 			xatp.domid = DOMID_SELF;
1056 			xatp.idx = i;
1057 			xatp.space = XENMAPSPACE_grant_table;
1058 			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
1059 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1060 			if (rc != 0) {
1061 				pr_warn("grant table add_to_physmap failed, err=%d\n",
1062 					rc);
1063 				break;
1064 			}
1065 		} while (i-- > start_idx);
1066 
1067 		return rc;
1068 	}
1069 
1070 	/* No need for kzalloc as it is initialized in following hypercall
1071 	 * GNTTABOP_setup_table.
1072 	 */
1073 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1074 	if (!frames)
1075 		return -ENOMEM;
1076 
1077 	setup.dom        = DOMID_SELF;
1078 	setup.nr_frames  = nr_gframes;
1079 	set_xen_guest_handle(setup.frame_list, frames);
1080 
1081 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1082 	if (rc == -ENOSYS) {
1083 		kfree(frames);
1084 		return -ENOSYS;
1085 	}
1086 
1087 	BUG_ON(rc || setup.status);
1088 
1089 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1090 
1091 	kfree(frames);
1092 
1093 	return rc;
1094 }
1095 
1096 static struct gnttab_ops gnttab_v1_ops = {
1097 	.map_frames			= gnttab_map_frames_v1,
1098 	.unmap_frames			= gnttab_unmap_frames_v1,
1099 	.update_entry			= gnttab_update_entry_v1,
1100 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1101 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
1102 	.query_foreign_access		= gnttab_query_foreign_access_v1,
1103 };
1104 
1105 static struct gnttab_ops gnttab_v2_ops = {
1106 	.map_frames			= gnttab_map_frames_v2,
1107 	.unmap_frames			= gnttab_unmap_frames_v2,
1108 	.update_entry			= gnttab_update_entry_v2,
1109 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1110 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
1111 	.query_foreign_access		= gnttab_query_foreign_access_v2,
1112 	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
1113 	.update_trans_entry		= gnttab_update_trans_entry_v2,
1114 };
1115 
1116 static void gnttab_request_version(void)
1117 {
1118 	int rc;
1119 	struct gnttab_set_version gsv;
1120 
1121 	if (xen_hvm_domain())
1122 		gsv.version = 1;
1123 	else
1124 		gsv.version = 2;
1125 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1126 	if (rc == 0 && gsv.version == 2) {
1127 		grant_table_version = 2;
1128 		grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2);
1129 		gnttab_interface = &gnttab_v2_ops;
1130 	} else if (grant_table_version == 2) {
1131 		/*
1132 		 * If we've already used version 2 features,
1133 		 * but then suddenly discover that they're not
1134 		 * available (e.g. migrating to an older
1135 		 * version of Xen), almost unbounded badness
1136 		 * can happen.
1137 		 */
1138 		panic("we need grant tables version 2, but only version 1 is available");
1139 	} else {
1140 		grant_table_version = 1;
1141 		grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
1142 		gnttab_interface = &gnttab_v1_ops;
1143 	}
1144 	pr_info("Grant tables using version %d layout\n", grant_table_version);
1145 }
1146 
1147 static int gnttab_setup(void)
1148 {
1149 	unsigned int max_nr_gframes;
1150 
1151 	max_nr_gframes = gnttab_max_grant_frames();
1152 	if (max_nr_gframes < nr_grant_frames)
1153 		return -ENOSYS;
1154 
1155 	if (xen_pv_domain())
1156 		return gnttab_map(0, nr_grant_frames - 1);
1157 
1158 	if (gnttab_shared.addr == NULL) {
1159 		gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1160 						PAGE_SIZE * max_nr_gframes);
1161 		if (gnttab_shared.addr == NULL) {
1162 			pr_warn("Failed to ioremap gnttab share frames!\n");
1163 			return -ENOMEM;
1164 		}
1165 	}
1166 
1167 	gnttab_map(0, nr_grant_frames - 1);
1168 
1169 	return 0;
1170 }
1171 
1172 int gnttab_resume(void)
1173 {
1174 	gnttab_request_version();
1175 	return gnttab_setup();
1176 }
1177 
1178 int gnttab_suspend(void)
1179 {
1180 	gnttab_interface->unmap_frames();
1181 	return 0;
1182 }
1183 
1184 static int gnttab_expand(unsigned int req_entries)
1185 {
1186 	int rc;
1187 	unsigned int cur, extra;
1188 
1189 	BUG_ON(grefs_per_grant_frame == 0);
1190 	cur = nr_grant_frames;
1191 	extra = ((req_entries + (grefs_per_grant_frame-1)) /
1192 		 grefs_per_grant_frame);
1193 	if (cur + extra > gnttab_max_grant_frames())
1194 		return -ENOSPC;
1195 
1196 	rc = gnttab_map(cur, cur + extra - 1);
1197 	if (rc == 0)
1198 		rc = grow_gnttab_list(extra);
1199 
1200 	return rc;
1201 }
1202 
1203 int gnttab_init(void)
1204 {
1205 	int i;
1206 	unsigned int max_nr_glist_frames, nr_glist_frames;
1207 	unsigned int nr_init_grefs;
1208 	int ret;
1209 
1210 	gnttab_request_version();
1211 	nr_grant_frames = 1;
1212 	boot_max_nr_grant_frames = __max_nr_grant_frames();
1213 
1214 	/* Determine the maximum number of frames required for the
1215 	 * grant reference free list on the current hypervisor.
1216 	 */
1217 	BUG_ON(grefs_per_grant_frame == 0);
1218 	max_nr_glist_frames = (boot_max_nr_grant_frames *
1219 			       grefs_per_grant_frame / RPP);
1220 
1221 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1222 			      GFP_KERNEL);
1223 	if (gnttab_list == NULL)
1224 		return -ENOMEM;
1225 
1226 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
1227 	for (i = 0; i < nr_glist_frames; i++) {
1228 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1229 		if (gnttab_list[i] == NULL) {
1230 			ret = -ENOMEM;
1231 			goto ini_nomem;
1232 		}
1233 	}
1234 
1235 	if (gnttab_setup() < 0) {
1236 		ret = -ENODEV;
1237 		goto ini_nomem;
1238 	}
1239 
1240 	nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
1241 
1242 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1243 		gnttab_entry(i) = i + 1;
1244 
1245 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1246 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1247 	gnttab_free_head  = NR_RESERVED_ENTRIES;
1248 
1249 	printk("Grant table initialized\n");
1250 	return 0;
1251 
1252  ini_nomem:
1253 	for (i--; i >= 0; i--)
1254 		free_page((unsigned long)gnttab_list[i]);
1255 	kfree(gnttab_list);
1256 	return ret;
1257 }
1258 EXPORT_SYMBOL_GPL(gnttab_init);
1259 
1260 static int __gnttab_init(void)
1261 {
1262 	/* Delay grant-table initialization in the PV on HVM case */
1263 	if (xen_hvm_domain())
1264 		return 0;
1265 
1266 	if (!xen_pv_domain())
1267 		return -ENODEV;
1268 
1269 	return gnttab_init();
1270 }
1271 
1272 core_initcall(__gnttab_init);
1273