1 /******************************************************************************
2  * Client-facing interface for the Xenbus driver.  In other words, the
3  * interface between the Xenbus and the device-specific code, be it the
4  * frontend or the backend of that driver.
5  *
6  * Copyright (C) 2005 XenSource Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #include <linux/mm.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/spinlock.h>
37 #include <linux/vmalloc.h>
38 #include <linux/export.h>
39 #include <asm/xen/hypervisor.h>
40 #include <xen/page.h>
41 #include <xen/interface/xen.h>
42 #include <xen/interface/event_channel.h>
43 #include <xen/balloon.h>
44 #include <xen/events.h>
45 #include <xen/grant_table.h>
46 #include <xen/xenbus.h>
47 #include <xen/xen.h>
48 #include <xen/features.h>
49 
50 #include "xenbus.h"
51 
52 #define XENBUS_PAGES(_grants)	(DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
53 
54 #define XENBUS_MAX_RING_PAGES	(XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
55 
56 struct xenbus_map_node {
57 	struct list_head next;
58 	union {
59 		struct {
60 			struct vm_struct *area;
61 		} pv;
62 		struct {
63 			struct page *pages[XENBUS_MAX_RING_PAGES];
64 			unsigned long addrs[XENBUS_MAX_RING_GRANTS];
65 			void *addr;
66 		} hvm;
67 	};
68 	grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
69 	unsigned int   nr_handles;
70 };
71 
72 struct map_ring_valloc {
73 	struct xenbus_map_node *node;
74 
75 	/* Why do we need two arrays? See comment of __xenbus_map_ring */
76 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
77 	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
78 
79 	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
80 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
81 
82 	unsigned int idx;
83 };
84 
85 static DEFINE_SPINLOCK(xenbus_valloc_lock);
86 static LIST_HEAD(xenbus_valloc_pages);
87 
88 struct xenbus_ring_ops {
89 	int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
90 		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
91 		   void **vaddr);
92 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
93 };
94 
95 static const struct xenbus_ring_ops *ring_ops __read_mostly;
96 
97 const char *xenbus_strstate(enum xenbus_state state)
98 {
99 	static const char *const name[] = {
100 		[ XenbusStateUnknown      ] = "Unknown",
101 		[ XenbusStateInitialising ] = "Initialising",
102 		[ XenbusStateInitWait     ] = "InitWait",
103 		[ XenbusStateInitialised  ] = "Initialised",
104 		[ XenbusStateConnected    ] = "Connected",
105 		[ XenbusStateClosing      ] = "Closing",
106 		[ XenbusStateClosed	  ] = "Closed",
107 		[XenbusStateReconfiguring] = "Reconfiguring",
108 		[XenbusStateReconfigured] = "Reconfigured",
109 	};
110 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
111 }
112 EXPORT_SYMBOL_GPL(xenbus_strstate);
113 
114 /**
115  * xenbus_watch_path - register a watch
116  * @dev: xenbus device
117  * @path: path to watch
118  * @watch: watch to register
119  * @callback: callback to register
120  *
121  * Register a @watch on the given path, using the given xenbus_watch structure
122  * for storage, and the given @callback function as the callback.  Return 0 on
123  * success, or -errno on error.  On success, the given @path will be saved as
124  * @watch->node, and remains the caller's to free.  On error, @watch->node will
125  * be NULL, the device will switch to %XenbusStateClosing, and the error will
126  * be saved in the store.
127  */
128 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
129 		      struct xenbus_watch *watch,
130 		      bool (*will_handle)(struct xenbus_watch *,
131 					  const char *, const char *),
132 		      void (*callback)(struct xenbus_watch *,
133 				       const char *, const char *))
134 {
135 	int err;
136 
137 	watch->node = path;
138 	watch->will_handle = will_handle;
139 	watch->callback = callback;
140 
141 	err = register_xenbus_watch(watch);
142 
143 	if (err) {
144 		watch->node = NULL;
145 		watch->will_handle = NULL;
146 		watch->callback = NULL;
147 		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
148 	}
149 
150 	return err;
151 }
152 EXPORT_SYMBOL_GPL(xenbus_watch_path);
153 
154 
155 /**
156  * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
157  * @dev: xenbus device
158  * @watch: watch to register
159  * @callback: callback to register
160  * @pathfmt: format of path to watch
161  *
162  * Register a watch on the given @path, using the given xenbus_watch
163  * structure for storage, and the given @callback function as the callback.
164  * Return 0 on success, or -errno on error.  On success, the watched path
165  * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
166  * kfree().  On error, watch->node will be NULL, so the caller has nothing to
167  * free, the device will switch to %XenbusStateClosing, and the error will be
168  * saved in the store.
169  */
170 int xenbus_watch_pathfmt(struct xenbus_device *dev,
171 			 struct xenbus_watch *watch,
172 			 bool (*will_handle)(struct xenbus_watch *,
173 					const char *, const char *),
174 			 void (*callback)(struct xenbus_watch *,
175 					  const char *, const char *),
176 			 const char *pathfmt, ...)
177 {
178 	int err;
179 	va_list ap;
180 	char *path;
181 
182 	va_start(ap, pathfmt);
183 	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
184 	va_end(ap);
185 
186 	if (!path) {
187 		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
188 		return -ENOMEM;
189 	}
190 	err = xenbus_watch_path(dev, path, watch, will_handle, callback);
191 
192 	if (err)
193 		kfree(path);
194 	return err;
195 }
196 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
197 
198 static void xenbus_switch_fatal(struct xenbus_device *, int, int,
199 				const char *, ...);
200 
201 static int
202 __xenbus_switch_state(struct xenbus_device *dev,
203 		      enum xenbus_state state, int depth)
204 {
205 	/* We check whether the state is currently set to the given value, and
206 	   if not, then the state is set.  We don't want to unconditionally
207 	   write the given state, because we don't want to fire watches
208 	   unnecessarily.  Furthermore, if the node has gone, we don't write
209 	   to it, as the device will be tearing down, and we don't want to
210 	   resurrect that directory.
211 
212 	   Note that, because of this cached value of our state, this
213 	   function will not take a caller's Xenstore transaction
214 	   (something it was trying to in the past) because dev->state
215 	   would not get reset if the transaction was aborted.
216 	 */
217 
218 	struct xenbus_transaction xbt;
219 	int current_state;
220 	int err, abort;
221 
222 	if (state == dev->state)
223 		return 0;
224 
225 again:
226 	abort = 1;
227 
228 	err = xenbus_transaction_start(&xbt);
229 	if (err) {
230 		xenbus_switch_fatal(dev, depth, err, "starting transaction");
231 		return 0;
232 	}
233 
234 	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
235 	if (err != 1)
236 		goto abort;
237 
238 	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
239 	if (err) {
240 		xenbus_switch_fatal(dev, depth, err, "writing new state");
241 		goto abort;
242 	}
243 
244 	abort = 0;
245 abort:
246 	err = xenbus_transaction_end(xbt, abort);
247 	if (err) {
248 		if (err == -EAGAIN && !abort)
249 			goto again;
250 		xenbus_switch_fatal(dev, depth, err, "ending transaction");
251 	} else
252 		dev->state = state;
253 
254 	return 0;
255 }
256 
257 /**
258  * xenbus_switch_state
259  * @dev: xenbus device
260  * @state: new state
261  *
262  * Advertise in the store a change of the given driver to the given new_state.
263  * Return 0 on success, or -errno on error.  On error, the device will switch
264  * to XenbusStateClosing, and the error will be saved in the store.
265  */
266 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
267 {
268 	return __xenbus_switch_state(dev, state, 0);
269 }
270 
271 EXPORT_SYMBOL_GPL(xenbus_switch_state);
272 
273 int xenbus_frontend_closed(struct xenbus_device *dev)
274 {
275 	xenbus_switch_state(dev, XenbusStateClosed);
276 	complete(&dev->down);
277 	return 0;
278 }
279 EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
280 
281 static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
282 				const char *fmt, va_list ap)
283 {
284 	unsigned int len;
285 	char *printf_buffer;
286 	char *path_buffer;
287 
288 #define PRINTF_BUFFER_SIZE 4096
289 
290 	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
291 	if (!printf_buffer)
292 		return;
293 
294 	len = sprintf(printf_buffer, "%i ", -err);
295 	vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
296 
297 	dev_err(&dev->dev, "%s\n", printf_buffer);
298 
299 	path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
300 	if (path_buffer)
301 		xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer);
302 
303 	kfree(printf_buffer);
304 	kfree(path_buffer);
305 }
306 
307 /**
308  * xenbus_dev_error
309  * @dev: xenbus device
310  * @err: error to report
311  * @fmt: error message format
312  *
313  * Report the given negative errno into the store, along with the given
314  * formatted message.
315  */
316 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
317 {
318 	va_list ap;
319 
320 	va_start(ap, fmt);
321 	xenbus_va_dev_error(dev, err, fmt, ap);
322 	va_end(ap);
323 }
324 EXPORT_SYMBOL_GPL(xenbus_dev_error);
325 
326 /**
327  * xenbus_dev_fatal
328  * @dev: xenbus device
329  * @err: error to report
330  * @fmt: error message format
331  *
332  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
333  * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
334  * closedown of this driver and its peer.
335  */
336 
337 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
338 {
339 	va_list ap;
340 
341 	va_start(ap, fmt);
342 	xenbus_va_dev_error(dev, err, fmt, ap);
343 	va_end(ap);
344 
345 	xenbus_switch_state(dev, XenbusStateClosing);
346 }
347 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
348 
349 /**
350  * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
351  * avoiding recursion within xenbus_switch_state.
352  */
353 static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
354 				const char *fmt, ...)
355 {
356 	va_list ap;
357 
358 	va_start(ap, fmt);
359 	xenbus_va_dev_error(dev, err, fmt, ap);
360 	va_end(ap);
361 
362 	if (!depth)
363 		__xenbus_switch_state(dev, XenbusStateClosing, 1);
364 }
365 
366 /*
367  * xenbus_setup_ring
368  * @dev: xenbus device
369  * @vaddr: pointer to starting virtual address of the ring
370  * @nr_pages: number of pages to be granted
371  * @grefs: grant reference array to be filled in
372  *
373  * Allocate physically contiguous pages for a shared ring buffer and grant it
374  * to the peer of the given device. The ring buffer is initially filled with
375  * zeroes. The virtual address of the ring is stored at @vaddr and the
376  * grant references are stored in the @grefs array. In case of error @vaddr
377  * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF.
378  */
379 int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
380 		      unsigned int nr_pages, grant_ref_t *grefs)
381 {
382 	unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
383 	grant_ref_t gref_head;
384 	unsigned int i;
385 	void *addr;
386 	int ret;
387 
388 	addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
389 	if (!*vaddr) {
390 		ret = -ENOMEM;
391 		goto err;
392 	}
393 
394 	ret = gnttab_alloc_grant_references(nr_pages, &gref_head);
395 	if (ret) {
396 		xenbus_dev_fatal(dev, ret, "granting access to %u ring pages",
397 				 nr_pages);
398 		goto err;
399 	}
400 
401 	for (i = 0; i < nr_pages; i++) {
402 		unsigned long gfn;
403 
404 		if (is_vmalloc_addr(*vaddr))
405 			gfn = pfn_to_gfn(vmalloc_to_pfn(addr));
406 		else
407 			gfn = virt_to_gfn(addr);
408 
409 		grefs[i] = gnttab_claim_grant_reference(&gref_head);
410 		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
411 						gfn, 0);
412 
413 		addr += XEN_PAGE_SIZE;
414 	}
415 
416 	return 0;
417 
418  err:
419 	if (*vaddr)
420 		free_pages_exact(*vaddr, ring_size);
421 	for (i = 0; i < nr_pages; i++)
422 		grefs[i] = INVALID_GRANT_REF;
423 	*vaddr = NULL;
424 
425 	return ret;
426 }
427 EXPORT_SYMBOL_GPL(xenbus_setup_ring);
428 
429 /*
430  * xenbus_teardown_ring
431  * @vaddr: starting virtual address of the ring
432  * @nr_pages: number of pages
433  * @grefs: grant reference array
434  *
435  * Remove grants for the shared ring buffer and free the associated memory.
436  * On return the grant reference array is filled with INVALID_GRANT_REF.
437  */
438 void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
439 			  grant_ref_t *grefs)
440 {
441 	unsigned int i;
442 
443 	for (i = 0; i < nr_pages; i++) {
444 		if (grefs[i] != INVALID_GRANT_REF) {
445 			gnttab_end_foreign_access(grefs[i], NULL);
446 			grefs[i] = INVALID_GRANT_REF;
447 		}
448 	}
449 
450 	if (*vaddr)
451 		free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE);
452 	*vaddr = NULL;
453 }
454 EXPORT_SYMBOL_GPL(xenbus_teardown_ring);
455 
456 /**
457  * Allocate an event channel for the given xenbus_device, assigning the newly
458  * created local port to *port.  Return 0 on success, or -errno on error.  On
459  * error, the device will switch to XenbusStateClosing, and the error will be
460  * saved in the store.
461  */
462 int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
463 {
464 	struct evtchn_alloc_unbound alloc_unbound;
465 	int err;
466 
467 	alloc_unbound.dom = DOMID_SELF;
468 	alloc_unbound.remote_dom = dev->otherend_id;
469 
470 	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
471 					  &alloc_unbound);
472 	if (err)
473 		xenbus_dev_fatal(dev, err, "allocating event channel");
474 	else
475 		*port = alloc_unbound.port;
476 
477 	return err;
478 }
479 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
480 
481 
482 /**
483  * Free an existing event channel. Returns 0 on success or -errno on error.
484  */
485 int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
486 {
487 	struct evtchn_close close;
488 	int err;
489 
490 	close.port = port;
491 
492 	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
493 	if (err)
494 		xenbus_dev_error(dev, err, "freeing event channel %u", port);
495 
496 	return err;
497 }
498 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
499 
500 
501 /**
502  * xenbus_map_ring_valloc
503  * @dev: xenbus device
504  * @gnt_refs: grant reference array
505  * @nr_grefs: number of grant references
506  * @vaddr: pointer to address to be filled out by mapping
507  *
508  * Map @nr_grefs pages of memory into this domain from another
509  * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
510  * pages of virtual address space, maps the pages to that address, and
511  * sets *vaddr to that address.  Returns 0 on success, and -errno on
512  * error. If an error is returned, device will switch to
513  * XenbusStateClosing and the error message will be saved in XenStore.
514  */
515 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
516 			   unsigned int nr_grefs, void **vaddr)
517 {
518 	int err;
519 	struct map_ring_valloc *info;
520 
521 	*vaddr = NULL;
522 
523 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
524 		return -EINVAL;
525 
526 	info = kzalloc(sizeof(*info), GFP_KERNEL);
527 	if (!info)
528 		return -ENOMEM;
529 
530 	info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
531 	if (!info->node)
532 		err = -ENOMEM;
533 	else
534 		err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
535 
536 	kfree(info->node);
537 	kfree(info);
538 	return err;
539 }
540 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
541 
542 /* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
543  * long), e.g. 32-on-64.  Caller is responsible for preparing the
544  * right array to feed into this function */
545 static int __xenbus_map_ring(struct xenbus_device *dev,
546 			     grant_ref_t *gnt_refs,
547 			     unsigned int nr_grefs,
548 			     grant_handle_t *handles,
549 			     struct map_ring_valloc *info,
550 			     unsigned int flags,
551 			     bool *leaked)
552 {
553 	int i, j;
554 
555 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
556 		return -EINVAL;
557 
558 	for (i = 0; i < nr_grefs; i++) {
559 		gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
560 				  gnt_refs[i], dev->otherend_id);
561 		handles[i] = INVALID_GRANT_HANDLE;
562 	}
563 
564 	gnttab_batch_map(info->map, i);
565 
566 	for (i = 0; i < nr_grefs; i++) {
567 		if (info->map[i].status != GNTST_okay) {
568 			xenbus_dev_fatal(dev, info->map[i].status,
569 					 "mapping in shared page %d from domain %d",
570 					 gnt_refs[i], dev->otherend_id);
571 			goto fail;
572 		} else
573 			handles[i] = info->map[i].handle;
574 	}
575 
576 	return 0;
577 
578  fail:
579 	for (i = j = 0; i < nr_grefs; i++) {
580 		if (handles[i] != INVALID_GRANT_HANDLE) {
581 			gnttab_set_unmap_op(&info->unmap[j],
582 					    info->phys_addrs[i],
583 					    GNTMAP_host_map, handles[i]);
584 			j++;
585 		}
586 	}
587 
588 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
589 
590 	*leaked = false;
591 	for (i = 0; i < j; i++) {
592 		if (info->unmap[i].status != GNTST_okay) {
593 			*leaked = true;
594 			break;
595 		}
596 	}
597 
598 	return -ENOENT;
599 }
600 
601 /**
602  * xenbus_unmap_ring
603  * @dev: xenbus device
604  * @handles: grant handle array
605  * @nr_handles: number of handles in the array
606  * @vaddrs: addresses to unmap
607  *
608  * Unmap memory in this domain that was imported from another domain.
609  * Returns 0 on success and returns GNTST_* on error
610  * (see xen/include/interface/grant_table.h).
611  */
612 static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
613 			     unsigned int nr_handles, unsigned long *vaddrs)
614 {
615 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
616 	int i;
617 	int err;
618 
619 	if (nr_handles > XENBUS_MAX_RING_GRANTS)
620 		return -EINVAL;
621 
622 	for (i = 0; i < nr_handles; i++)
623 		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
624 				    GNTMAP_host_map, handles[i]);
625 
626 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
627 
628 	err = GNTST_okay;
629 	for (i = 0; i < nr_handles; i++) {
630 		if (unmap[i].status != GNTST_okay) {
631 			xenbus_dev_error(dev, unmap[i].status,
632 					 "unmapping page at handle %d error %d",
633 					 handles[i], unmap[i].status);
634 			err = unmap[i].status;
635 			break;
636 		}
637 	}
638 
639 	return err;
640 }
641 
642 static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
643 					    unsigned int goffset,
644 					    unsigned int len,
645 					    void *data)
646 {
647 	struct map_ring_valloc *info = data;
648 	unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
649 
650 	info->phys_addrs[info->idx] = vaddr;
651 	info->addrs[info->idx] = vaddr;
652 
653 	info->idx++;
654 }
655 
656 static int xenbus_map_ring_hvm(struct xenbus_device *dev,
657 			       struct map_ring_valloc *info,
658 			       grant_ref_t *gnt_ref,
659 			       unsigned int nr_grefs,
660 			       void **vaddr)
661 {
662 	struct xenbus_map_node *node = info->node;
663 	int err;
664 	void *addr;
665 	bool leaked = false;
666 	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
667 
668 	err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
669 	if (err)
670 		goto out_err;
671 
672 	gnttab_foreach_grant(node->hvm.pages, nr_grefs,
673 			     xenbus_map_ring_setup_grant_hvm,
674 			     info);
675 
676 	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
677 				info, GNTMAP_host_map, &leaked);
678 	node->nr_handles = nr_grefs;
679 
680 	if (err)
681 		goto out_free_ballooned_pages;
682 
683 	addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
684 		    PAGE_KERNEL);
685 	if (!addr) {
686 		err = -ENOMEM;
687 		goto out_xenbus_unmap_ring;
688 	}
689 
690 	node->hvm.addr = addr;
691 
692 	spin_lock(&xenbus_valloc_lock);
693 	list_add(&node->next, &xenbus_valloc_pages);
694 	spin_unlock(&xenbus_valloc_lock);
695 
696 	*vaddr = addr;
697 	info->node = NULL;
698 
699 	return 0;
700 
701  out_xenbus_unmap_ring:
702 	if (!leaked)
703 		xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
704 	else
705 		pr_alert("leaking %p size %u page(s)",
706 			 addr, nr_pages);
707  out_free_ballooned_pages:
708 	if (!leaked)
709 		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
710  out_err:
711 	return err;
712 }
713 
714 /**
715  * xenbus_unmap_ring_vfree
716  * @dev: xenbus device
717  * @vaddr: addr to unmap
718  *
719  * Based on Rusty Russell's skeleton driver's unmap_page.
720  * Unmap a page of memory in this domain that was imported from another domain.
721  * Use xenbus_unmap_ring_vfree if you mapped in your memory with
722  * xenbus_map_ring_valloc (it will free the virtual address space).
723  * Returns 0 on success and returns GNTST_* on error
724  * (see xen/include/interface/grant_table.h).
725  */
726 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
727 {
728 	return ring_ops->unmap(dev, vaddr);
729 }
730 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
731 
732 #ifdef CONFIG_XEN_PV
733 static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
734 {
735 	struct map_ring_valloc *info = data;
736 
737 	info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
738 	return 0;
739 }
740 
741 static int xenbus_map_ring_pv(struct xenbus_device *dev,
742 			      struct map_ring_valloc *info,
743 			      grant_ref_t *gnt_refs,
744 			      unsigned int nr_grefs,
745 			      void **vaddr)
746 {
747 	struct xenbus_map_node *node = info->node;
748 	struct vm_struct *area;
749 	bool leaked = false;
750 	int err = -ENOMEM;
751 
752 	area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
753 	if (!area)
754 		return -ENOMEM;
755 	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
756 				XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
757 		goto failed;
758 	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
759 				info, GNTMAP_host_map | GNTMAP_contains_pte,
760 				&leaked);
761 	if (err)
762 		goto failed;
763 
764 	node->nr_handles = nr_grefs;
765 	node->pv.area = area;
766 
767 	spin_lock(&xenbus_valloc_lock);
768 	list_add(&node->next, &xenbus_valloc_pages);
769 	spin_unlock(&xenbus_valloc_lock);
770 
771 	*vaddr = area->addr;
772 	info->node = NULL;
773 
774 	return 0;
775 
776 failed:
777 	if (!leaked)
778 		free_vm_area(area);
779 	else
780 		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
781 
782 	return err;
783 }
784 
785 static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
786 {
787 	struct xenbus_map_node *node;
788 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
789 	unsigned int level;
790 	int i;
791 	bool leaked = false;
792 	int err;
793 
794 	spin_lock(&xenbus_valloc_lock);
795 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
796 		if (node->pv.area->addr == vaddr) {
797 			list_del(&node->next);
798 			goto found;
799 		}
800 	}
801 	node = NULL;
802  found:
803 	spin_unlock(&xenbus_valloc_lock);
804 
805 	if (!node) {
806 		xenbus_dev_error(dev, -ENOENT,
807 				 "can't find mapped virtual address %p", vaddr);
808 		return GNTST_bad_virt_addr;
809 	}
810 
811 	for (i = 0; i < node->nr_handles; i++) {
812 		unsigned long addr;
813 
814 		memset(&unmap[i], 0, sizeof(unmap[i]));
815 		addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
816 		unmap[i].host_addr = arbitrary_virt_to_machine(
817 			lookup_address(addr, &level)).maddr;
818 		unmap[i].dev_bus_addr = 0;
819 		unmap[i].handle = node->handles[i];
820 	}
821 
822 	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
823 
824 	err = GNTST_okay;
825 	leaked = false;
826 	for (i = 0; i < node->nr_handles; i++) {
827 		if (unmap[i].status != GNTST_okay) {
828 			leaked = true;
829 			xenbus_dev_error(dev, unmap[i].status,
830 					 "unmapping page at handle %d error %d",
831 					 node->handles[i], unmap[i].status);
832 			err = unmap[i].status;
833 			break;
834 		}
835 	}
836 
837 	if (!leaked)
838 		free_vm_area(node->pv.area);
839 	else
840 		pr_alert("leaking VM area %p size %u page(s)",
841 			 node->pv.area, node->nr_handles);
842 
843 	kfree(node);
844 	return err;
845 }
846 
847 static const struct xenbus_ring_ops ring_ops_pv = {
848 	.map = xenbus_map_ring_pv,
849 	.unmap = xenbus_unmap_ring_pv,
850 };
851 #endif
852 
853 struct unmap_ring_hvm
854 {
855 	unsigned int idx;
856 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
857 };
858 
859 static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
860 					      unsigned int goffset,
861 					      unsigned int len,
862 					      void *data)
863 {
864 	struct unmap_ring_hvm *info = data;
865 
866 	info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
867 
868 	info->idx++;
869 }
870 
871 static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
872 {
873 	int rv;
874 	struct xenbus_map_node *node;
875 	void *addr;
876 	struct unmap_ring_hvm info = {
877 		.idx = 0,
878 	};
879 	unsigned int nr_pages;
880 
881 	spin_lock(&xenbus_valloc_lock);
882 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
883 		addr = node->hvm.addr;
884 		if (addr == vaddr) {
885 			list_del(&node->next);
886 			goto found;
887 		}
888 	}
889 	node = addr = NULL;
890  found:
891 	spin_unlock(&xenbus_valloc_lock);
892 
893 	if (!node) {
894 		xenbus_dev_error(dev, -ENOENT,
895 				 "can't find mapped virtual address %p", vaddr);
896 		return GNTST_bad_virt_addr;
897 	}
898 
899 	nr_pages = XENBUS_PAGES(node->nr_handles);
900 
901 	gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
902 			     xenbus_unmap_ring_setup_grant_hvm,
903 			     &info);
904 
905 	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
906 			       info.addrs);
907 	if (!rv) {
908 		vunmap(vaddr);
909 		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
910 	}
911 	else
912 		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
913 
914 	kfree(node);
915 	return rv;
916 }
917 
918 /**
919  * xenbus_read_driver_state
920  * @path: path for driver
921  *
922  * Return the state of the driver rooted at the given store path, or
923  * XenbusStateUnknown if no state can be read.
924  */
925 enum xenbus_state xenbus_read_driver_state(const char *path)
926 {
927 	enum xenbus_state result;
928 	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
929 	if (err)
930 		result = XenbusStateUnknown;
931 
932 	return result;
933 }
934 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
935 
936 static const struct xenbus_ring_ops ring_ops_hvm = {
937 	.map = xenbus_map_ring_hvm,
938 	.unmap = xenbus_unmap_ring_hvm,
939 };
940 
941 void __init xenbus_ring_ops_init(void)
942 {
943 #ifdef CONFIG_XEN_PV
944 	if (!xen_feature(XENFEAT_auto_translated_physmap))
945 		ring_ops = &ring_ops_pv;
946 	else
947 #endif
948 		ring_ops = &ring_ops_hvm;
949 }
950