xref: /openbmc/linux/drivers/xen/grant-table.c (revision 9d749629)
1 /******************************************************************************
2  * grant_table.c
3  *
4  * Granting foreign access to our memory reservation.
5  *
6  * Copyright (c) 2005-2006, Christopher Clark
7  * Copyright (c) 2004-2005, K A Fraser
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License version 2
11  * as published by the Free Software Foundation; or, when distributed
12  * separately from the Linux kernel or incorporated into other
13  * software packages, subject to the following license:
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a copy
16  * of this source file (the "Software"), to deal in the Software without
17  * restriction, including without limitation the rights to use, copy, modify,
18  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19  * and to permit persons to whom the Software is furnished to do so, subject to
20  * the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31  * IN THE SOFTWARE.
32  */
33 
34 #include <linux/module.h>
35 #include <linux/sched.h>
36 #include <linux/mm.h>
37 #include <linux/slab.h>
38 #include <linux/vmalloc.h>
39 #include <linux/uaccess.h>
40 #include <linux/io.h>
41 #include <linux/delay.h>
42 #include <linux/hardirq.h>
43 
44 #include <xen/xen.h>
45 #include <xen/interface/xen.h>
46 #include <xen/page.h>
47 #include <xen/grant_table.h>
48 #include <xen/interface/memory.h>
49 #include <xen/hvc-console.h>
50 #include <asm/xen/hypercall.h>
51 #include <asm/xen/interface.h>
52 
53 #include <asm/pgtable.h>
54 #include <asm/sync_bitops.h>
55 
56 /* External tools reserve first few grant table entries. */
57 #define NR_RESERVED_ENTRIES 8
58 #define GNTTAB_LIST_END 0xffffffff
59 
60 static grant_ref_t **gnttab_list;
61 static unsigned int nr_grant_frames;
62 static unsigned int boot_max_nr_grant_frames;
63 static int gnttab_free_count;
64 static grant_ref_t gnttab_free_head;
65 static DEFINE_SPINLOCK(gnttab_list_lock);
66 unsigned long xen_hvm_resume_frames;
67 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
68 
69 static union {
70 	struct grant_entry_v1 *v1;
71 	union grant_entry_v2 *v2;
72 	void *addr;
73 } gnttab_shared;
74 
75 /*This is a structure of function pointers for grant table*/
76 struct gnttab_ops {
77 	/*
78 	 * Mapping a list of frames for storing grant entries. Frames parameter
79 	 * is used to store grant table address when grant table being setup,
80 	 * nr_gframes is the number of frames to map grant table. Returning
81 	 * GNTST_okay means success and negative value means failure.
82 	 */
83 	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
84 	/*
85 	 * Release a list of frames which are mapped in map_frames for grant
86 	 * entry status.
87 	 */
88 	void (*unmap_frames)(void);
89 	/*
90 	 * Introducing a valid entry into the grant table, granting the frame of
91 	 * this grant entry to domain for accessing or transfering. Ref
92 	 * parameter is reference of this introduced grant entry, domid is id of
93 	 * granted domain, frame is the page frame to be granted, and flags is
94 	 * status of the grant entry to be updated.
95 	 */
96 	void (*update_entry)(grant_ref_t ref, domid_t domid,
97 			     unsigned long frame, unsigned flags);
98 	/*
99 	 * Stop granting a grant entry to domain for accessing. Ref parameter is
100 	 * reference of a grant entry whose grant access will be stopped,
101 	 * readonly is not in use in this function. If the grant entry is
102 	 * currently mapped for reading or writing, just return failure(==0)
103 	 * directly and don't tear down the grant access. Otherwise, stop grant
104 	 * access for this entry and return success(==1).
105 	 */
106 	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
107 	/*
108 	 * Stop granting a grant entry to domain for transfer. Ref parameter is
109 	 * reference of a grant entry whose grant transfer will be stopped. If
110 	 * tranfer has not started, just reclaim the grant entry and return
111 	 * failure(==0). Otherwise, wait for the transfer to complete and then
112 	 * return the frame.
113 	 */
114 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
115 	/*
116 	 * Query the status of a grant entry. Ref parameter is reference of
117 	 * queried grant entry, return value is the status of queried entry.
118 	 * Detailed status(writing/reading) can be gotten from the return value
119 	 * by bit operations.
120 	 */
121 	int (*query_foreign_access)(grant_ref_t ref);
122 	/*
123 	 * Grant a domain to access a range of bytes within the page referred by
124 	 * an available grant entry. Ref parameter is reference of a grant entry
125 	 * which will be sub-page accessed, domid is id of grantee domain, frame
126 	 * is frame address of subpage grant, flags is grant type and flag
127 	 * information, page_off is offset of the range of bytes, and length is
128 	 * length of bytes to be accessed.
129 	 */
130 	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
131 				     unsigned long frame, int flags,
132 				     unsigned page_off, unsigned length);
133 	/*
134 	 * Redirect an available grant entry on domain A to another grant
135 	 * reference of domain B, then allow domain C to use grant reference
136 	 * of domain B transitively. Ref parameter is an available grant entry
137 	 * reference on domain A, domid is id of domain C which accesses grant
138 	 * entry transitively, flags is grant type and flag information,
139 	 * trans_domid is id of domain B whose grant entry is finally accessed
140 	 * transitively, trans_gref is grant entry transitive reference of
141 	 * domain B.
142 	 */
143 	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
144 				   domid_t trans_domid, grant_ref_t trans_gref);
145 };
146 
147 static struct gnttab_ops *gnttab_interface;
148 
149 /*This reflects status of grant entries, so act as a global value*/
150 static grant_status_t *grstatus;
151 
152 static int grant_table_version;
153 static int grefs_per_grant_frame;
154 
155 static struct gnttab_free_callback *gnttab_free_callback_list;
156 
157 static int gnttab_expand(unsigned int req_entries);
158 
159 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
160 #define SPP (PAGE_SIZE / sizeof(grant_status_t))
161 
162 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
163 {
164 	return &gnttab_list[(entry) / RPP][(entry) % RPP];
165 }
166 /* This can be used as an l-value */
167 #define gnttab_entry(entry) (*__gnttab_entry(entry))
168 
169 static int get_free_entries(unsigned count)
170 {
171 	unsigned long flags;
172 	int ref, rc = 0;
173 	grant_ref_t head;
174 
175 	spin_lock_irqsave(&gnttab_list_lock, flags);
176 
177 	if ((gnttab_free_count < count) &&
178 	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
179 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
180 		return rc;
181 	}
182 
183 	ref = head = gnttab_free_head;
184 	gnttab_free_count -= count;
185 	while (count-- > 1)
186 		head = gnttab_entry(head);
187 	gnttab_free_head = gnttab_entry(head);
188 	gnttab_entry(head) = GNTTAB_LIST_END;
189 
190 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
191 
192 	return ref;
193 }
194 
195 static void do_free_callbacks(void)
196 {
197 	struct gnttab_free_callback *callback, *next;
198 
199 	callback = gnttab_free_callback_list;
200 	gnttab_free_callback_list = NULL;
201 
202 	while (callback != NULL) {
203 		next = callback->next;
204 		if (gnttab_free_count >= callback->count) {
205 			callback->next = NULL;
206 			callback->fn(callback->arg);
207 		} else {
208 			callback->next = gnttab_free_callback_list;
209 			gnttab_free_callback_list = callback;
210 		}
211 		callback = next;
212 	}
213 }
214 
215 static inline void check_free_callbacks(void)
216 {
217 	if (unlikely(gnttab_free_callback_list))
218 		do_free_callbacks();
219 }
220 
221 static void put_free_entry(grant_ref_t ref)
222 {
223 	unsigned long flags;
224 	spin_lock_irqsave(&gnttab_list_lock, flags);
225 	gnttab_entry(ref) = gnttab_free_head;
226 	gnttab_free_head = ref;
227 	gnttab_free_count++;
228 	check_free_callbacks();
229 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
230 }
231 
232 /*
233  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
234  * Introducing a valid entry into the grant table:
235  *  1. Write ent->domid.
236  *  2. Write ent->frame:
237  *      GTF_permit_access:   Frame to which access is permitted.
238  *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
239  *                           frame, or zero if none.
240  *  3. Write memory barrier (WMB).
241  *  4. Write ent->flags, inc. valid type.
242  */
243 static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
244 				   unsigned long frame, unsigned flags)
245 {
246 	gnttab_shared.v1[ref].domid = domid;
247 	gnttab_shared.v1[ref].frame = frame;
248 	wmb();
249 	gnttab_shared.v1[ref].flags = flags;
250 }
251 
252 static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
253 				   unsigned long frame, unsigned flags)
254 {
255 	gnttab_shared.v2[ref].hdr.domid = domid;
256 	gnttab_shared.v2[ref].full_page.frame = frame;
257 	wmb();
258 	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
259 }
260 
261 /*
262  * Public grant-issuing interface functions
263  */
264 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
265 				     unsigned long frame, int readonly)
266 {
267 	gnttab_interface->update_entry(ref, domid, frame,
268 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
269 }
270 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
271 
272 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
273 				int readonly)
274 {
275 	int ref;
276 
277 	ref = get_free_entries(1);
278 	if (unlikely(ref < 0))
279 		return -ENOSPC;
280 
281 	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
282 
283 	return ref;
284 }
285 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
286 
287 static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
288 					   unsigned long frame, int flags,
289 					   unsigned page_off, unsigned length)
290 {
291 	gnttab_shared.v2[ref].sub_page.frame = frame;
292 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
293 	gnttab_shared.v2[ref].sub_page.length = length;
294 	gnttab_shared.v2[ref].hdr.domid = domid;
295 	wmb();
296 	gnttab_shared.v2[ref].hdr.flags =
297 				GTF_permit_access | GTF_sub_page | flags;
298 }
299 
300 int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
301 					    unsigned long frame, int flags,
302 					    unsigned page_off,
303 					    unsigned length)
304 {
305 	if (flags & (GTF_accept_transfer | GTF_reading |
306 		     GTF_writing | GTF_transitive))
307 		return -EPERM;
308 
309 	if (gnttab_interface->update_subpage_entry == NULL)
310 		return -ENOSYS;
311 
312 	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
313 					       page_off, length);
314 
315 	return 0;
316 }
317 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
318 
319 int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
320 					int flags, unsigned page_off,
321 					unsigned length)
322 {
323 	int ref, rc;
324 
325 	ref = get_free_entries(1);
326 	if (unlikely(ref < 0))
327 		return -ENOSPC;
328 
329 	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
330 						     page_off, length);
331 	if (rc < 0) {
332 		put_free_entry(ref);
333 		return rc;
334 	}
335 
336 	return ref;
337 }
338 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
339 
340 bool gnttab_subpage_grants_available(void)
341 {
342 	return gnttab_interface->update_subpage_entry != NULL;
343 }
344 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
345 
346 static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
347 					 int flags, domid_t trans_domid,
348 					 grant_ref_t trans_gref)
349 {
350 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
351 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
352 	gnttab_shared.v2[ref].hdr.domid = domid;
353 	wmb();
354 	gnttab_shared.v2[ref].hdr.flags =
355 				GTF_permit_access | GTF_transitive | flags;
356 }
357 
358 int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
359 					  int flags, domid_t trans_domid,
360 					  grant_ref_t trans_gref)
361 {
362 	if (flags & (GTF_accept_transfer | GTF_reading |
363 		     GTF_writing | GTF_sub_page))
364 		return -EPERM;
365 
366 	if (gnttab_interface->update_trans_entry == NULL)
367 		return -ENOSYS;
368 
369 	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
370 					     trans_gref);
371 
372 	return 0;
373 }
374 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
375 
376 int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
377 				      domid_t trans_domid,
378 				      grant_ref_t trans_gref)
379 {
380 	int ref, rc;
381 
382 	ref = get_free_entries(1);
383 	if (unlikely(ref < 0))
384 		return -ENOSPC;
385 
386 	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
387 						   trans_domid, trans_gref);
388 	if (rc < 0) {
389 		put_free_entry(ref);
390 		return rc;
391 	}
392 
393 	return ref;
394 }
395 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
396 
397 bool gnttab_trans_grants_available(void)
398 {
399 	return gnttab_interface->update_trans_entry != NULL;
400 }
401 EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
402 
403 static int gnttab_query_foreign_access_v1(grant_ref_t ref)
404 {
405 	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
406 }
407 
408 static int gnttab_query_foreign_access_v2(grant_ref_t ref)
409 {
410 	return grstatus[ref] & (GTF_reading|GTF_writing);
411 }
412 
413 int gnttab_query_foreign_access(grant_ref_t ref)
414 {
415 	return gnttab_interface->query_foreign_access(ref);
416 }
417 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
418 
419 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
420 {
421 	u16 flags, nflags;
422 	u16 *pflags;
423 
424 	pflags = &gnttab_shared.v1[ref].flags;
425 	nflags = *pflags;
426 	do {
427 		flags = nflags;
428 		if (flags & (GTF_reading|GTF_writing))
429 			return 0;
430 	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
431 
432 	return 1;
433 }
434 
435 static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
436 {
437 	gnttab_shared.v2[ref].hdr.flags = 0;
438 	mb();
439 	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
440 		return 0;
441 	} else {
442 		/* The read of grstatus needs to have acquire
443 		semantics.  On x86, reads already have
444 		that, and we just need to protect against
445 		compiler reorderings.  On other
446 		architectures we may need a full
447 		barrier. */
448 #ifdef CONFIG_X86
449 		barrier();
450 #else
451 		mb();
452 #endif
453 	}
454 
455 	return 1;
456 }
457 
458 static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
459 {
460 	return gnttab_interface->end_foreign_access_ref(ref, readonly);
461 }
462 
463 int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
464 {
465 	if (_gnttab_end_foreign_access_ref(ref, readonly))
466 		return 1;
467 	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
468 	return 0;
469 }
470 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
471 
472 struct deferred_entry {
473 	struct list_head list;
474 	grant_ref_t ref;
475 	bool ro;
476 	uint16_t warn_delay;
477 	struct page *page;
478 };
479 static LIST_HEAD(deferred_list);
480 static void gnttab_handle_deferred(unsigned long);
481 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0);
482 
483 static void gnttab_handle_deferred(unsigned long unused)
484 {
485 	unsigned int nr = 10;
486 	struct deferred_entry *first = NULL;
487 	unsigned long flags;
488 
489 	spin_lock_irqsave(&gnttab_list_lock, flags);
490 	while (nr--) {
491 		struct deferred_entry *entry
492 			= list_first_entry(&deferred_list,
493 					   struct deferred_entry, list);
494 
495 		if (entry == first)
496 			break;
497 		list_del(&entry->list);
498 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
499 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
500 			put_free_entry(entry->ref);
501 			if (entry->page) {
502 				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
503 					 entry->ref, page_to_pfn(entry->page));
504 				__free_page(entry->page);
505 			} else
506 				pr_info("freeing g.e. %#x\n", entry->ref);
507 			kfree(entry);
508 			entry = NULL;
509 		} else {
510 			if (!--entry->warn_delay)
511 				pr_info("g.e. %#x still pending\n",
512 					entry->ref);
513 			if (!first)
514 				first = entry;
515 		}
516 		spin_lock_irqsave(&gnttab_list_lock, flags);
517 		if (entry)
518 			list_add_tail(&entry->list, &deferred_list);
519 		else if (list_empty(&deferred_list))
520 			break;
521 	}
522 	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
523 		deferred_timer.expires = jiffies + HZ;
524 		add_timer(&deferred_timer);
525 	}
526 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
527 }
528 
529 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
530 				struct page *page)
531 {
532 	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
533 	const char *what = KERN_WARNING "leaking";
534 
535 	if (entry) {
536 		unsigned long flags;
537 
538 		entry->ref = ref;
539 		entry->ro = readonly;
540 		entry->page = page;
541 		entry->warn_delay = 60;
542 		spin_lock_irqsave(&gnttab_list_lock, flags);
543 		list_add_tail(&entry->list, &deferred_list);
544 		if (!timer_pending(&deferred_timer)) {
545 			deferred_timer.expires = jiffies + HZ;
546 			add_timer(&deferred_timer);
547 		}
548 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
549 		what = KERN_DEBUG "deferring";
550 	}
551 	printk("%s g.e. %#x (pfn %#lx)\n",
552 	       what, ref, page ? page_to_pfn(page) : -1);
553 }
554 
555 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
556 			       unsigned long page)
557 {
558 	if (gnttab_end_foreign_access_ref(ref, readonly)) {
559 		put_free_entry(ref);
560 		if (page != 0)
561 			free_page(page);
562 	} else
563 		gnttab_add_deferred(ref, readonly,
564 				    page ? virt_to_page(page) : NULL);
565 }
566 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
567 
568 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
569 {
570 	int ref;
571 
572 	ref = get_free_entries(1);
573 	if (unlikely(ref < 0))
574 		return -ENOSPC;
575 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
576 
577 	return ref;
578 }
579 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
580 
581 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
582 				       unsigned long pfn)
583 {
584 	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
585 }
586 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
587 
588 static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
589 {
590 	unsigned long frame;
591 	u16           flags;
592 	u16          *pflags;
593 
594 	pflags = &gnttab_shared.v1[ref].flags;
595 
596 	/*
597 	 * If a transfer is not even yet started, try to reclaim the grant
598 	 * reference and return failure (== 0).
599 	 */
600 	while (!((flags = *pflags) & GTF_transfer_committed)) {
601 		if (sync_cmpxchg(pflags, flags, 0) == flags)
602 			return 0;
603 		cpu_relax();
604 	}
605 
606 	/* If a transfer is in progress then wait until it is completed. */
607 	while (!(flags & GTF_transfer_completed)) {
608 		flags = *pflags;
609 		cpu_relax();
610 	}
611 
612 	rmb();	/* Read the frame number /after/ reading completion status. */
613 	frame = gnttab_shared.v1[ref].frame;
614 	BUG_ON(frame == 0);
615 
616 	return frame;
617 }
618 
619 static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
620 {
621 	unsigned long frame;
622 	u16           flags;
623 	u16          *pflags;
624 
625 	pflags = &gnttab_shared.v2[ref].hdr.flags;
626 
627 	/*
628 	 * If a transfer is not even yet started, try to reclaim the grant
629 	 * reference and return failure (== 0).
630 	 */
631 	while (!((flags = *pflags) & GTF_transfer_committed)) {
632 		if (sync_cmpxchg(pflags, flags, 0) == flags)
633 			return 0;
634 		cpu_relax();
635 	}
636 
637 	/* If a transfer is in progress then wait until it is completed. */
638 	while (!(flags & GTF_transfer_completed)) {
639 		flags = *pflags;
640 		cpu_relax();
641 	}
642 
643 	rmb();  /* Read the frame number /after/ reading completion status. */
644 	frame = gnttab_shared.v2[ref].full_page.frame;
645 	BUG_ON(frame == 0);
646 
647 	return frame;
648 }
649 
650 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
651 {
652 	return gnttab_interface->end_foreign_transfer_ref(ref);
653 }
654 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
655 
656 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
657 {
658 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
659 	put_free_entry(ref);
660 	return frame;
661 }
662 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
663 
664 void gnttab_free_grant_reference(grant_ref_t ref)
665 {
666 	put_free_entry(ref);
667 }
668 EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
669 
670 void gnttab_free_grant_references(grant_ref_t head)
671 {
672 	grant_ref_t ref;
673 	unsigned long flags;
674 	int count = 1;
675 	if (head == GNTTAB_LIST_END)
676 		return;
677 	spin_lock_irqsave(&gnttab_list_lock, flags);
678 	ref = head;
679 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
680 		ref = gnttab_entry(ref);
681 		count++;
682 	}
683 	gnttab_entry(ref) = gnttab_free_head;
684 	gnttab_free_head = head;
685 	gnttab_free_count += count;
686 	check_free_callbacks();
687 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
688 }
689 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
690 
691 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
692 {
693 	int h = get_free_entries(count);
694 
695 	if (h < 0)
696 		return -ENOSPC;
697 
698 	*head = h;
699 
700 	return 0;
701 }
702 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
703 
704 int gnttab_empty_grant_references(const grant_ref_t *private_head)
705 {
706 	return (*private_head == GNTTAB_LIST_END);
707 }
708 EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
709 
710 int gnttab_claim_grant_reference(grant_ref_t *private_head)
711 {
712 	grant_ref_t g = *private_head;
713 	if (unlikely(g == GNTTAB_LIST_END))
714 		return -ENOSPC;
715 	*private_head = gnttab_entry(g);
716 	return g;
717 }
718 EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
719 
720 void gnttab_release_grant_reference(grant_ref_t *private_head,
721 				    grant_ref_t release)
722 {
723 	gnttab_entry(release) = *private_head;
724 	*private_head = release;
725 }
726 EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
727 
728 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
729 				  void (*fn)(void *), void *arg, u16 count)
730 {
731 	unsigned long flags;
732 	spin_lock_irqsave(&gnttab_list_lock, flags);
733 	if (callback->next)
734 		goto out;
735 	callback->fn = fn;
736 	callback->arg = arg;
737 	callback->count = count;
738 	callback->next = gnttab_free_callback_list;
739 	gnttab_free_callback_list = callback;
740 	check_free_callbacks();
741 out:
742 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
743 }
744 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
745 
746 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
747 {
748 	struct gnttab_free_callback **pcb;
749 	unsigned long flags;
750 
751 	spin_lock_irqsave(&gnttab_list_lock, flags);
752 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
753 		if (*pcb == callback) {
754 			*pcb = callback->next;
755 			break;
756 		}
757 	}
758 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
759 }
760 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
761 
762 static int grow_gnttab_list(unsigned int more_frames)
763 {
764 	unsigned int new_nr_grant_frames, extra_entries, i;
765 	unsigned int nr_glist_frames, new_nr_glist_frames;
766 
767 	BUG_ON(grefs_per_grant_frame == 0);
768 
769 	new_nr_grant_frames = nr_grant_frames + more_frames;
770 	extra_entries       = more_frames * grefs_per_grant_frame;
771 
772 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
773 	new_nr_glist_frames =
774 		(new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
775 	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
776 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
777 		if (!gnttab_list[i])
778 			goto grow_nomem;
779 	}
780 
781 
782 	for (i = grefs_per_grant_frame * nr_grant_frames;
783 	     i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
784 		gnttab_entry(i) = i + 1;
785 
786 	gnttab_entry(i) = gnttab_free_head;
787 	gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
788 	gnttab_free_count += extra_entries;
789 
790 	nr_grant_frames = new_nr_grant_frames;
791 
792 	check_free_callbacks();
793 
794 	return 0;
795 
796 grow_nomem:
797 	for ( ; i >= nr_glist_frames; i--)
798 		free_page((unsigned long) gnttab_list[i]);
799 	return -ENOMEM;
800 }
801 
802 static unsigned int __max_nr_grant_frames(void)
803 {
804 	struct gnttab_query_size query;
805 	int rc;
806 
807 	query.dom = DOMID_SELF;
808 
809 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
810 	if ((rc < 0) || (query.status != GNTST_okay))
811 		return 4; /* Legacy max supported number of frames */
812 
813 	return query.max_nr_frames;
814 }
815 
816 unsigned int gnttab_max_grant_frames(void)
817 {
818 	unsigned int xen_max = __max_nr_grant_frames();
819 
820 	if (xen_max > boot_max_nr_grant_frames)
821 		return boot_max_nr_grant_frames;
822 	return xen_max;
823 }
824 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
825 
826 /* Handling of paged out grant targets (GNTST_eagain) */
827 #define MAX_DELAY 256
828 static inline void
829 gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
830 						const char *func)
831 {
832 	unsigned delay = 1;
833 
834 	do {
835 		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
836 		if (*status == GNTST_eagain)
837 			msleep(delay++);
838 	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
839 
840 	if (delay >= MAX_DELAY) {
841 		printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
842 		*status = GNTST_bad_page;
843 	}
844 }
845 
846 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
847 {
848 	struct gnttab_map_grant_ref *op;
849 
850 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
851 		BUG();
852 	for (op = batch; op < batch + count; op++)
853 		if (op->status == GNTST_eagain)
854 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
855 						&op->status, __func__);
856 }
857 EXPORT_SYMBOL_GPL(gnttab_batch_map);
858 
859 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
860 {
861 	struct gnttab_copy *op;
862 
863 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
864 		BUG();
865 	for (op = batch; op < batch + count; op++)
866 		if (op->status == GNTST_eagain)
867 			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
868 						&op->status, __func__);
869 }
870 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
871 
872 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
873 		    struct gnttab_map_grant_ref *kmap_ops,
874 		    struct page **pages, unsigned int count)
875 {
876 	int i, ret;
877 	bool lazy = false;
878 	pte_t *pte;
879 	unsigned long mfn;
880 
881 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
882 	if (ret)
883 		return ret;
884 
885 	/* Retry eagain maps */
886 	for (i = 0; i < count; i++)
887 		if (map_ops[i].status == GNTST_eagain)
888 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
889 						&map_ops[i].status, __func__);
890 
891 	if (xen_feature(XENFEAT_auto_translated_physmap))
892 		return ret;
893 
894 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
895 		arch_enter_lazy_mmu_mode();
896 		lazy = true;
897 	}
898 
899 	for (i = 0; i < count; i++) {
900 		/* Do not add to override if the map failed. */
901 		if (map_ops[i].status)
902 			continue;
903 
904 		if (map_ops[i].flags & GNTMAP_contains_pte) {
905 			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
906 				(map_ops[i].host_addr & ~PAGE_MASK));
907 			mfn = pte_mfn(*pte);
908 		} else {
909 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
910 		}
911 		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
912 				       &kmap_ops[i] : NULL);
913 		if (ret)
914 			return ret;
915 	}
916 
917 	if (lazy)
918 		arch_leave_lazy_mmu_mode();
919 
920 	return ret;
921 }
922 EXPORT_SYMBOL_GPL(gnttab_map_refs);
923 
924 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
925 		      struct gnttab_map_grant_ref *kmap_ops,
926 		      struct page **pages, unsigned int count)
927 {
928 	int i, ret;
929 	bool lazy = false;
930 
931 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
932 	if (ret)
933 		return ret;
934 
935 	if (xen_feature(XENFEAT_auto_translated_physmap))
936 		return ret;
937 
938 	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
939 		arch_enter_lazy_mmu_mode();
940 		lazy = true;
941 	}
942 
943 	for (i = 0; i < count; i++) {
944 		ret = m2p_remove_override(pages[i], kmap_ops ?
945 				       &kmap_ops[i] : NULL);
946 		if (ret)
947 			return ret;
948 	}
949 
950 	if (lazy)
951 		arch_leave_lazy_mmu_mode();
952 
953 	return ret;
954 }
955 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
956 
957 static unsigned nr_status_frames(unsigned nr_grant_frames)
958 {
959 	BUG_ON(grefs_per_grant_frame == 0);
960 	return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
961 }
962 
963 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
964 {
965 	int rc;
966 
967 	rc = arch_gnttab_map_shared(frames, nr_gframes,
968 				    gnttab_max_grant_frames(),
969 				    &gnttab_shared.addr);
970 	BUG_ON(rc);
971 
972 	return 0;
973 }
974 
975 static void gnttab_unmap_frames_v1(void)
976 {
977 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
978 }
979 
980 static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
981 {
982 	uint64_t *sframes;
983 	unsigned int nr_sframes;
984 	struct gnttab_get_status_frames getframes;
985 	int rc;
986 
987 	nr_sframes = nr_status_frames(nr_gframes);
988 
989 	/* No need for kzalloc as it is initialized in following hypercall
990 	 * GNTTABOP_get_status_frames.
991 	 */
992 	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
993 	if (!sframes)
994 		return -ENOMEM;
995 
996 	getframes.dom        = DOMID_SELF;
997 	getframes.nr_frames  = nr_sframes;
998 	set_xen_guest_handle(getframes.frame_list, sframes);
999 
1000 	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
1001 				       &getframes, 1);
1002 	if (rc == -ENOSYS) {
1003 		kfree(sframes);
1004 		return -ENOSYS;
1005 	}
1006 
1007 	BUG_ON(rc || getframes.status);
1008 
1009 	rc = arch_gnttab_map_status(sframes, nr_sframes,
1010 				    nr_status_frames(gnttab_max_grant_frames()),
1011 				    &grstatus);
1012 	BUG_ON(rc);
1013 	kfree(sframes);
1014 
1015 	rc = arch_gnttab_map_shared(frames, nr_gframes,
1016 				    gnttab_max_grant_frames(),
1017 				    &gnttab_shared.addr);
1018 	BUG_ON(rc);
1019 
1020 	return 0;
1021 }
1022 
1023 static void gnttab_unmap_frames_v2(void)
1024 {
1025 	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
1026 	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
1027 }
1028 
1029 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1030 {
1031 	struct gnttab_setup_table setup;
1032 	xen_pfn_t *frames;
1033 	unsigned int nr_gframes = end_idx + 1;
1034 	int rc;
1035 
1036 	if (xen_hvm_domain()) {
1037 		struct xen_add_to_physmap xatp;
1038 		unsigned int i = end_idx;
1039 		rc = 0;
1040 		/*
1041 		 * Loop backwards, so that the first hypercall has the largest
1042 		 * index, ensuring that the table will grow only once.
1043 		 */
1044 		do {
1045 			xatp.domid = DOMID_SELF;
1046 			xatp.idx = i;
1047 			xatp.space = XENMAPSPACE_grant_table;
1048 			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
1049 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1050 			if (rc != 0) {
1051 				printk(KERN_WARNING
1052 						"grant table add_to_physmap failed, err=%d\n", rc);
1053 				break;
1054 			}
1055 		} while (i-- > start_idx);
1056 
1057 		return rc;
1058 	}
1059 
1060 	/* No need for kzalloc as it is initialized in following hypercall
1061 	 * GNTTABOP_setup_table.
1062 	 */
1063 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
1064 	if (!frames)
1065 		return -ENOMEM;
1066 
1067 	setup.dom        = DOMID_SELF;
1068 	setup.nr_frames  = nr_gframes;
1069 	set_xen_guest_handle(setup.frame_list, frames);
1070 
1071 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
1072 	if (rc == -ENOSYS) {
1073 		kfree(frames);
1074 		return -ENOSYS;
1075 	}
1076 
1077 	BUG_ON(rc || setup.status);
1078 
1079 	rc = gnttab_interface->map_frames(frames, nr_gframes);
1080 
1081 	kfree(frames);
1082 
1083 	return rc;
1084 }
1085 
1086 static struct gnttab_ops gnttab_v1_ops = {
1087 	.map_frames			= gnttab_map_frames_v1,
1088 	.unmap_frames			= gnttab_unmap_frames_v1,
1089 	.update_entry			= gnttab_update_entry_v1,
1090 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
1091 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
1092 	.query_foreign_access		= gnttab_query_foreign_access_v1,
1093 };
1094 
1095 static struct gnttab_ops gnttab_v2_ops = {
1096 	.map_frames			= gnttab_map_frames_v2,
1097 	.unmap_frames			= gnttab_unmap_frames_v2,
1098 	.update_entry			= gnttab_update_entry_v2,
1099 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
1100 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
1101 	.query_foreign_access		= gnttab_query_foreign_access_v2,
1102 	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
1103 	.update_trans_entry		= gnttab_update_trans_entry_v2,
1104 };
1105 
1106 static void gnttab_request_version(void)
1107 {
1108 	int rc;
1109 	struct gnttab_set_version gsv;
1110 
1111 	if (xen_hvm_domain())
1112 		gsv.version = 1;
1113 	else
1114 		gsv.version = 2;
1115 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1116 	if (rc == 0 && gsv.version == 2) {
1117 		grant_table_version = 2;
1118 		grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2);
1119 		gnttab_interface = &gnttab_v2_ops;
1120 	} else if (grant_table_version == 2) {
1121 		/*
1122 		 * If we've already used version 2 features,
1123 		 * but then suddenly discover that they're not
1124 		 * available (e.g. migrating to an older
1125 		 * version of Xen), almost unbounded badness
1126 		 * can happen.
1127 		 */
1128 		panic("we need grant tables version 2, but only version 1 is available");
1129 	} else {
1130 		grant_table_version = 1;
1131 		grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
1132 		gnttab_interface = &gnttab_v1_ops;
1133 	}
1134 	printk(KERN_INFO "Grant tables using version %d layout.\n",
1135 		grant_table_version);
1136 }
1137 
1138 static int gnttab_setup(void)
1139 {
1140 	unsigned int max_nr_gframes;
1141 
1142 	max_nr_gframes = gnttab_max_grant_frames();
1143 	if (max_nr_gframes < nr_grant_frames)
1144 		return -ENOSYS;
1145 
1146 	if (xen_pv_domain())
1147 		return gnttab_map(0, nr_grant_frames - 1);
1148 
1149 	if (gnttab_shared.addr == NULL) {
1150 		gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
1151 						PAGE_SIZE * max_nr_gframes);
1152 		if (gnttab_shared.addr == NULL) {
1153 			printk(KERN_WARNING
1154 					"Failed to ioremap gnttab share frames!");
1155 			return -ENOMEM;
1156 		}
1157 	}
1158 
1159 	gnttab_map(0, nr_grant_frames - 1);
1160 
1161 	return 0;
1162 }
1163 
1164 int gnttab_resume(void)
1165 {
1166 	gnttab_request_version();
1167 	return gnttab_setup();
1168 }
1169 
1170 int gnttab_suspend(void)
1171 {
1172 	gnttab_interface->unmap_frames();
1173 	return 0;
1174 }
1175 
1176 static int gnttab_expand(unsigned int req_entries)
1177 {
1178 	int rc;
1179 	unsigned int cur, extra;
1180 
1181 	BUG_ON(grefs_per_grant_frame == 0);
1182 	cur = nr_grant_frames;
1183 	extra = ((req_entries + (grefs_per_grant_frame-1)) /
1184 		 grefs_per_grant_frame);
1185 	if (cur + extra > gnttab_max_grant_frames())
1186 		return -ENOSPC;
1187 
1188 	rc = gnttab_map(cur, cur + extra - 1);
1189 	if (rc == 0)
1190 		rc = grow_gnttab_list(extra);
1191 
1192 	return rc;
1193 }
1194 
1195 int gnttab_init(void)
1196 {
1197 	int i;
1198 	unsigned int max_nr_glist_frames, nr_glist_frames;
1199 	unsigned int nr_init_grefs;
1200 	int ret;
1201 
1202 	gnttab_request_version();
1203 	nr_grant_frames = 1;
1204 	boot_max_nr_grant_frames = __max_nr_grant_frames();
1205 
1206 	/* Determine the maximum number of frames required for the
1207 	 * grant reference free list on the current hypervisor.
1208 	 */
1209 	BUG_ON(grefs_per_grant_frame == 0);
1210 	max_nr_glist_frames = (boot_max_nr_grant_frames *
1211 			       grefs_per_grant_frame / RPP);
1212 
1213 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
1214 			      GFP_KERNEL);
1215 	if (gnttab_list == NULL)
1216 		return -ENOMEM;
1217 
1218 	nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
1219 	for (i = 0; i < nr_glist_frames; i++) {
1220 		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
1221 		if (gnttab_list[i] == NULL) {
1222 			ret = -ENOMEM;
1223 			goto ini_nomem;
1224 		}
1225 	}
1226 
1227 	if (gnttab_setup() < 0) {
1228 		ret = -ENODEV;
1229 		goto ini_nomem;
1230 	}
1231 
1232 	nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
1233 
1234 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
1235 		gnttab_entry(i) = i + 1;
1236 
1237 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
1238 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
1239 	gnttab_free_head  = NR_RESERVED_ENTRIES;
1240 
1241 	printk("Grant table initialized\n");
1242 	return 0;
1243 
1244  ini_nomem:
1245 	for (i--; i >= 0; i--)
1246 		free_page((unsigned long)gnttab_list[i]);
1247 	kfree(gnttab_list);
1248 	return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(gnttab_init);
1251 
1252 static int __gnttab_init(void)
1253 {
1254 	/* Delay grant-table initialization in the PV on HVM case */
1255 	if (xen_hvm_domain())
1256 		return 0;
1257 
1258 	if (!xen_pv_domain())
1259 		return -ENODEV;
1260 
1261 	return gnttab_init();
1262 }
1263 
1264 core_initcall(__gnttab_init);
1265