1 /******************************************************************************
2  * Client-facing interface for the Xenbus driver.  In other words, the
3  * interface between the Xenbus and the device-specific code, be it the
4  * frontend or the backend of that driver.
5  *
6  * Copyright (C) 2005 XenSource Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #include <linux/mm.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/spinlock.h>
37 #include <linux/vmalloc.h>
38 #include <linux/export.h>
39 #include <asm/xen/hypervisor.h>
40 #include <xen/page.h>
41 #include <xen/interface/xen.h>
42 #include <xen/interface/event_channel.h>
43 #include <xen/balloon.h>
44 #include <xen/events.h>
45 #include <xen/grant_table.h>
46 #include <xen/xenbus.h>
47 #include <xen/xen.h>
48 #include <xen/features.h>
49 
50 #include "xenbus_probe.h"
51 
52 #define XENBUS_PAGES(_grants)	(DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
53 
54 #define XENBUS_MAX_RING_PAGES	(XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
55 
56 struct xenbus_map_node {
57 	struct list_head next;
58 	union {
59 		struct {
60 			struct vm_struct *area;
61 		} pv;
62 		struct {
63 			struct page *pages[XENBUS_MAX_RING_PAGES];
64 			unsigned long addrs[XENBUS_MAX_RING_GRANTS];
65 			void *addr;
66 		} hvm;
67 	};
68 	grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
69 	unsigned int   nr_handles;
70 };
71 
72 static DEFINE_SPINLOCK(xenbus_valloc_lock);
73 static LIST_HEAD(xenbus_valloc_pages);
74 
75 struct xenbus_ring_ops {
76 	int (*map)(struct xenbus_device *dev,
77 		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
78 		   void **vaddr);
79 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
80 };
81 
82 static const struct xenbus_ring_ops *ring_ops __read_mostly;
83 
84 const char *xenbus_strstate(enum xenbus_state state)
85 {
86 	static const char *const name[] = {
87 		[ XenbusStateUnknown      ] = "Unknown",
88 		[ XenbusStateInitialising ] = "Initialising",
89 		[ XenbusStateInitWait     ] = "InitWait",
90 		[ XenbusStateInitialised  ] = "Initialised",
91 		[ XenbusStateConnected    ] = "Connected",
92 		[ XenbusStateClosing      ] = "Closing",
93 		[ XenbusStateClosed	  ] = "Closed",
94 		[XenbusStateReconfiguring] = "Reconfiguring",
95 		[XenbusStateReconfigured] = "Reconfigured",
96 	};
97 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
98 }
99 EXPORT_SYMBOL_GPL(xenbus_strstate);
100 
101 /**
102  * xenbus_watch_path - register a watch
103  * @dev: xenbus device
104  * @path: path to watch
105  * @watch: watch to register
106  * @callback: callback to register
107  *
108  * Register a @watch on the given path, using the given xenbus_watch structure
109  * for storage, and the given @callback function as the callback.  Return 0 on
110  * success, or -errno on error.  On success, the given @path will be saved as
111  * @watch->node, and remains the caller's to free.  On error, @watch->node will
112  * be NULL, the device will switch to %XenbusStateClosing, and the error will
113  * be saved in the store.
114  */
115 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
116 		      struct xenbus_watch *watch,
117 		      void (*callback)(struct xenbus_watch *,
118 				       const char **, unsigned int))
119 {
120 	int err;
121 
122 	watch->node = path;
123 	watch->callback = callback;
124 
125 	err = register_xenbus_watch(watch);
126 
127 	if (err) {
128 		watch->node = NULL;
129 		watch->callback = NULL;
130 		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
131 	}
132 
133 	return err;
134 }
135 EXPORT_SYMBOL_GPL(xenbus_watch_path);
136 
137 
138 /**
139  * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
140  * @dev: xenbus device
141  * @watch: watch to register
142  * @callback: callback to register
143  * @pathfmt: format of path to watch
144  *
145  * Register a watch on the given @path, using the given xenbus_watch
146  * structure for storage, and the given @callback function as the callback.
147  * Return 0 on success, or -errno on error.  On success, the watched path
148  * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
149  * kfree().  On error, watch->node will be NULL, so the caller has nothing to
150  * free, the device will switch to %XenbusStateClosing, and the error will be
151  * saved in the store.
152  */
153 int xenbus_watch_pathfmt(struct xenbus_device *dev,
154 			 struct xenbus_watch *watch,
155 			 void (*callback)(struct xenbus_watch *,
156 					const char **, unsigned int),
157 			 const char *pathfmt, ...)
158 {
159 	int err;
160 	va_list ap;
161 	char *path;
162 
163 	va_start(ap, pathfmt);
164 	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
165 	va_end(ap);
166 
167 	if (!path) {
168 		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
169 		return -ENOMEM;
170 	}
171 	err = xenbus_watch_path(dev, path, watch, callback);
172 
173 	if (err)
174 		kfree(path);
175 	return err;
176 }
177 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
178 
179 static void xenbus_switch_fatal(struct xenbus_device *, int, int,
180 				const char *, ...);
181 
182 static int
183 __xenbus_switch_state(struct xenbus_device *dev,
184 		      enum xenbus_state state, int depth)
185 {
186 	/* We check whether the state is currently set to the given value, and
187 	   if not, then the state is set.  We don't want to unconditionally
188 	   write the given state, because we don't want to fire watches
189 	   unnecessarily.  Furthermore, if the node has gone, we don't write
190 	   to it, as the device will be tearing down, and we don't want to
191 	   resurrect that directory.
192 
193 	   Note that, because of this cached value of our state, this
194 	   function will not take a caller's Xenstore transaction
195 	   (something it was trying to in the past) because dev->state
196 	   would not get reset if the transaction was aborted.
197 	 */
198 
199 	struct xenbus_transaction xbt;
200 	int current_state;
201 	int err, abort;
202 
203 	if (state == dev->state)
204 		return 0;
205 
206 again:
207 	abort = 1;
208 
209 	err = xenbus_transaction_start(&xbt);
210 	if (err) {
211 		xenbus_switch_fatal(dev, depth, err, "starting transaction");
212 		return 0;
213 	}
214 
215 	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
216 	if (err != 1)
217 		goto abort;
218 
219 	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
220 	if (err) {
221 		xenbus_switch_fatal(dev, depth, err, "writing new state");
222 		goto abort;
223 	}
224 
225 	abort = 0;
226 abort:
227 	err = xenbus_transaction_end(xbt, abort);
228 	if (err) {
229 		if (err == -EAGAIN && !abort)
230 			goto again;
231 		xenbus_switch_fatal(dev, depth, err, "ending transaction");
232 	} else
233 		dev->state = state;
234 
235 	return 0;
236 }
237 
238 /**
239  * xenbus_switch_state
240  * @dev: xenbus device
241  * @state: new state
242  *
243  * Advertise in the store a change of the given driver to the given new_state.
244  * Return 0 on success, or -errno on error.  On error, the device will switch
245  * to XenbusStateClosing, and the error will be saved in the store.
246  */
247 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
248 {
249 	return __xenbus_switch_state(dev, state, 0);
250 }
251 
252 EXPORT_SYMBOL_GPL(xenbus_switch_state);
253 
254 int xenbus_frontend_closed(struct xenbus_device *dev)
255 {
256 	xenbus_switch_state(dev, XenbusStateClosed);
257 	complete(&dev->down);
258 	return 0;
259 }
260 EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
261 
262 /**
263  * Return the path to the error node for the given device, or NULL on failure.
264  * If the value returned is non-NULL, then it is the caller's to kfree.
265  */
266 static char *error_path(struct xenbus_device *dev)
267 {
268 	return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
269 }
270 
271 
272 static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
273 				const char *fmt, va_list ap)
274 {
275 	unsigned int len;
276 	char *printf_buffer = NULL;
277 	char *path_buffer = NULL;
278 
279 #define PRINTF_BUFFER_SIZE 4096
280 	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
281 	if (printf_buffer == NULL)
282 		goto fail;
283 
284 	len = sprintf(printf_buffer, "%i ", -err);
285 	vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
286 
287 	dev_err(&dev->dev, "%s\n", printf_buffer);
288 
289 	path_buffer = error_path(dev);
290 
291 	if (path_buffer == NULL) {
292 		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
293 		       dev->nodename, printf_buffer);
294 		goto fail;
295 	}
296 
297 	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
298 		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
299 		       dev->nodename, printf_buffer);
300 		goto fail;
301 	}
302 
303 fail:
304 	kfree(printf_buffer);
305 	kfree(path_buffer);
306 }
307 
308 
309 /**
310  * xenbus_dev_error
311  * @dev: xenbus device
312  * @err: error to report
313  * @fmt: error message format
314  *
315  * Report the given negative errno into the store, along with the given
316  * formatted message.
317  */
318 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
319 {
320 	va_list ap;
321 
322 	va_start(ap, fmt);
323 	xenbus_va_dev_error(dev, err, fmt, ap);
324 	va_end(ap);
325 }
326 EXPORT_SYMBOL_GPL(xenbus_dev_error);
327 
328 /**
329  * xenbus_dev_fatal
330  * @dev: xenbus device
331  * @err: error to report
332  * @fmt: error message format
333  *
334  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
335  * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
336  * closedown of this driver and its peer.
337  */
338 
339 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
340 {
341 	va_list ap;
342 
343 	va_start(ap, fmt);
344 	xenbus_va_dev_error(dev, err, fmt, ap);
345 	va_end(ap);
346 
347 	xenbus_switch_state(dev, XenbusStateClosing);
348 }
349 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
350 
351 /**
352  * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
353  * avoiding recursion within xenbus_switch_state.
354  */
355 static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
356 				const char *fmt, ...)
357 {
358 	va_list ap;
359 
360 	va_start(ap, fmt);
361 	xenbus_va_dev_error(dev, err, fmt, ap);
362 	va_end(ap);
363 
364 	if (!depth)
365 		__xenbus_switch_state(dev, XenbusStateClosing, 1);
366 }
367 
368 /**
369  * xenbus_grant_ring
370  * @dev: xenbus device
371  * @vaddr: starting virtual address of the ring
372  * @nr_pages: number of pages to be granted
373  * @grefs: grant reference array to be filled in
374  *
375  * Grant access to the given @vaddr to the peer of the given device.
376  * Then fill in @grefs with grant references.  Return 0 on success, or
377  * -errno on error.  On error, the device will switch to
378  * XenbusStateClosing, and the error will be saved in the store.
379  */
380 int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
381 		      unsigned int nr_pages, grant_ref_t *grefs)
382 {
383 	int err;
384 	int i, j;
385 
386 	for (i = 0; i < nr_pages; i++) {
387 		err = gnttab_grant_foreign_access(dev->otherend_id,
388 						  virt_to_gfn(vaddr), 0);
389 		if (err < 0) {
390 			xenbus_dev_fatal(dev, err,
391 					 "granting access to ring page");
392 			goto fail;
393 		}
394 		grefs[i] = err;
395 
396 		vaddr = vaddr + XEN_PAGE_SIZE;
397 	}
398 
399 	return 0;
400 
401 fail:
402 	for (j = 0; j < i; j++)
403 		gnttab_end_foreign_access_ref(grefs[j], 0);
404 	return err;
405 }
406 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
407 
408 
409 /**
410  * Allocate an event channel for the given xenbus_device, assigning the newly
411  * created local port to *port.  Return 0 on success, or -errno on error.  On
412  * error, the device will switch to XenbusStateClosing, and the error will be
413  * saved in the store.
414  */
415 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
416 {
417 	struct evtchn_alloc_unbound alloc_unbound;
418 	int err;
419 
420 	alloc_unbound.dom = DOMID_SELF;
421 	alloc_unbound.remote_dom = dev->otherend_id;
422 
423 	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
424 					  &alloc_unbound);
425 	if (err)
426 		xenbus_dev_fatal(dev, err, "allocating event channel");
427 	else
428 		*port = alloc_unbound.port;
429 
430 	return err;
431 }
432 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
433 
434 
435 /**
436  * Free an existing event channel. Returns 0 on success or -errno on error.
437  */
438 int xenbus_free_evtchn(struct xenbus_device *dev, int port)
439 {
440 	struct evtchn_close close;
441 	int err;
442 
443 	close.port = port;
444 
445 	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
446 	if (err)
447 		xenbus_dev_error(dev, err, "freeing event channel %d", port);
448 
449 	return err;
450 }
451 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
452 
453 
454 /**
455  * xenbus_map_ring_valloc
456  * @dev: xenbus device
457  * @gnt_refs: grant reference array
458  * @nr_grefs: number of grant references
459  * @vaddr: pointer to address to be filled out by mapping
460  *
461  * Map @nr_grefs pages of memory into this domain from another
462  * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
463  * pages of virtual address space, maps the pages to that address, and
464  * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
465  * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
466  * error. If an error is returned, device will switch to
467  * XenbusStateClosing and the error message will be saved in XenStore.
468  */
469 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
470 			   unsigned int nr_grefs, void **vaddr)
471 {
472 	return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
473 }
474 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
475 
476 /* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
477  * long), e.g. 32-on-64.  Caller is responsible for preparing the
478  * right array to feed into this function */
479 static int __xenbus_map_ring(struct xenbus_device *dev,
480 			     grant_ref_t *gnt_refs,
481 			     unsigned int nr_grefs,
482 			     grant_handle_t *handles,
483 			     phys_addr_t *addrs,
484 			     unsigned int flags,
485 			     bool *leaked)
486 {
487 	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
488 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
489 	int i, j;
490 	int err = GNTST_okay;
491 
492 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
493 		return -EINVAL;
494 
495 	for (i = 0; i < nr_grefs; i++) {
496 		memset(&map[i], 0, sizeof(map[i]));
497 		gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
498 				  dev->otherend_id);
499 		handles[i] = INVALID_GRANT_HANDLE;
500 	}
501 
502 	gnttab_batch_map(map, i);
503 
504 	for (i = 0; i < nr_grefs; i++) {
505 		if (map[i].status != GNTST_okay) {
506 			err = map[i].status;
507 			xenbus_dev_fatal(dev, map[i].status,
508 					 "mapping in shared page %d from domain %d",
509 					 gnt_refs[i], dev->otherend_id);
510 			goto fail;
511 		} else
512 			handles[i] = map[i].handle;
513 	}
514 
515 	return GNTST_okay;
516 
517  fail:
518 	for (i = j = 0; i < nr_grefs; i++) {
519 		if (handles[i] != INVALID_GRANT_HANDLE) {
520 			memset(&unmap[j], 0, sizeof(unmap[j]));
521 			gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
522 					    GNTMAP_host_map, handles[i]);
523 			j++;
524 		}
525 	}
526 
527 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
528 		BUG();
529 
530 	*leaked = false;
531 	for (i = 0; i < j; i++) {
532 		if (unmap[i].status != GNTST_okay) {
533 			*leaked = true;
534 			break;
535 		}
536 	}
537 
538 	return err;
539 }
540 
541 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
542 				     grant_ref_t *gnt_refs,
543 				     unsigned int nr_grefs,
544 				     void **vaddr)
545 {
546 	struct xenbus_map_node *node;
547 	struct vm_struct *area;
548 	pte_t *ptes[XENBUS_MAX_RING_GRANTS];
549 	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
550 	int err = GNTST_okay;
551 	int i;
552 	bool leaked;
553 
554 	*vaddr = NULL;
555 
556 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
557 		return -EINVAL;
558 
559 	node = kzalloc(sizeof(*node), GFP_KERNEL);
560 	if (!node)
561 		return -ENOMEM;
562 
563 	area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
564 	if (!area) {
565 		kfree(node);
566 		return -ENOMEM;
567 	}
568 
569 	for (i = 0; i < nr_grefs; i++)
570 		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
571 
572 	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
573 				phys_addrs,
574 				GNTMAP_host_map | GNTMAP_contains_pte,
575 				&leaked);
576 	if (err)
577 		goto failed;
578 
579 	node->nr_handles = nr_grefs;
580 	node->pv.area = area;
581 
582 	spin_lock(&xenbus_valloc_lock);
583 	list_add(&node->next, &xenbus_valloc_pages);
584 	spin_unlock(&xenbus_valloc_lock);
585 
586 	*vaddr = area->addr;
587 	return 0;
588 
589 failed:
590 	if (!leaked)
591 		free_vm_area(area);
592 	else
593 		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
594 
595 	kfree(node);
596 	return err;
597 }
598 
599 struct map_ring_valloc_hvm
600 {
601 	unsigned int idx;
602 
603 	/* Why do we need two arrays? See comment of __xenbus_map_ring */
604 	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
605 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
606 };
607 
608 static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
609 					    unsigned int goffset,
610 					    unsigned int len,
611 					    void *data)
612 {
613 	struct map_ring_valloc_hvm *info = data;
614 	unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
615 
616 	info->phys_addrs[info->idx] = vaddr;
617 	info->addrs[info->idx] = vaddr;
618 
619 	info->idx++;
620 }
621 
622 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
623 				      grant_ref_t *gnt_ref,
624 				      unsigned int nr_grefs,
625 				      void **vaddr)
626 {
627 	struct xenbus_map_node *node;
628 	int err;
629 	void *addr;
630 	bool leaked = false;
631 	struct map_ring_valloc_hvm info = {
632 		.idx = 0,
633 	};
634 	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
635 
636 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
637 		return -EINVAL;
638 
639 	*vaddr = NULL;
640 
641 	node = kzalloc(sizeof(*node), GFP_KERNEL);
642 	if (!node)
643 		return -ENOMEM;
644 
645 	err = alloc_xenballooned_pages(nr_pages, node->hvm.pages);
646 	if (err)
647 		goto out_err;
648 
649 	gnttab_foreach_grant(node->hvm.pages, nr_grefs,
650 			     xenbus_map_ring_setup_grant_hvm,
651 			     &info);
652 
653 	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
654 				info.phys_addrs, GNTMAP_host_map, &leaked);
655 	node->nr_handles = nr_grefs;
656 
657 	if (err)
658 		goto out_free_ballooned_pages;
659 
660 	addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
661 		    PAGE_KERNEL);
662 	if (!addr) {
663 		err = -ENOMEM;
664 		goto out_xenbus_unmap_ring;
665 	}
666 
667 	node->hvm.addr = addr;
668 
669 	spin_lock(&xenbus_valloc_lock);
670 	list_add(&node->next, &xenbus_valloc_pages);
671 	spin_unlock(&xenbus_valloc_lock);
672 
673 	*vaddr = addr;
674 	return 0;
675 
676  out_xenbus_unmap_ring:
677 	if (!leaked)
678 		xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs);
679 	else
680 		pr_alert("leaking %p size %u page(s)",
681 			 addr, nr_pages);
682  out_free_ballooned_pages:
683 	if (!leaked)
684 		free_xenballooned_pages(nr_pages, node->hvm.pages);
685  out_err:
686 	kfree(node);
687 	return err;
688 }
689 
690 
691 /**
692  * xenbus_map_ring
693  * @dev: xenbus device
694  * @gnt_refs: grant reference array
695  * @nr_grefs: number of grant reference
696  * @handles: pointer to grant handle to be filled
697  * @vaddrs: addresses to be mapped to
698  * @leaked: fail to clean up a failed map, caller should not free vaddr
699  *
700  * Map pages of memory into this domain from another domain's grant table.
701  * xenbus_map_ring does not allocate the virtual address space (you must do
702  * this yourself!). It only maps in the pages to the specified address.
703  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
704  * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
705  * XenbusStateClosing and the first error message will be saved in XenStore.
706  * Further more if we fail to map the ring, caller should check @leaked.
707  * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
708  * should not free the address space of @vaddr.
709  */
710 int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
711 		    unsigned int nr_grefs, grant_handle_t *handles,
712 		    unsigned long *vaddrs, bool *leaked)
713 {
714 	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
715 	int i;
716 
717 	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
718 		return -EINVAL;
719 
720 	for (i = 0; i < nr_grefs; i++)
721 		phys_addrs[i] = (unsigned long)vaddrs[i];
722 
723 	return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
724 				 phys_addrs, GNTMAP_host_map, leaked);
725 }
726 EXPORT_SYMBOL_GPL(xenbus_map_ring);
727 
728 
729 /**
730  * xenbus_unmap_ring_vfree
731  * @dev: xenbus device
732  * @vaddr: addr to unmap
733  *
734  * Based on Rusty Russell's skeleton driver's unmap_page.
735  * Unmap a page of memory in this domain that was imported from another domain.
736  * Use xenbus_unmap_ring_vfree if you mapped in your memory with
737  * xenbus_map_ring_valloc (it will free the virtual address space).
738  * Returns 0 on success and returns GNTST_* on error
739  * (see xen/include/interface/grant_table.h).
740  */
741 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
742 {
743 	return ring_ops->unmap(dev, vaddr);
744 }
745 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
746 
747 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
748 {
749 	struct xenbus_map_node *node;
750 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
751 	unsigned int level;
752 	int i;
753 	bool leaked = false;
754 	int err;
755 
756 	spin_lock(&xenbus_valloc_lock);
757 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
758 		if (node->pv.area->addr == vaddr) {
759 			list_del(&node->next);
760 			goto found;
761 		}
762 	}
763 	node = NULL;
764  found:
765 	spin_unlock(&xenbus_valloc_lock);
766 
767 	if (!node) {
768 		xenbus_dev_error(dev, -ENOENT,
769 				 "can't find mapped virtual address %p", vaddr);
770 		return GNTST_bad_virt_addr;
771 	}
772 
773 	for (i = 0; i < node->nr_handles; i++) {
774 		unsigned long addr;
775 
776 		memset(&unmap[i], 0, sizeof(unmap[i]));
777 		addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
778 		unmap[i].host_addr = arbitrary_virt_to_machine(
779 			lookup_address(addr, &level)).maddr;
780 		unmap[i].dev_bus_addr = 0;
781 		unmap[i].handle = node->handles[i];
782 	}
783 
784 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
785 		BUG();
786 
787 	err = GNTST_okay;
788 	leaked = false;
789 	for (i = 0; i < node->nr_handles; i++) {
790 		if (unmap[i].status != GNTST_okay) {
791 			leaked = true;
792 			xenbus_dev_error(dev, unmap[i].status,
793 					 "unmapping page at handle %d error %d",
794 					 node->handles[i], unmap[i].status);
795 			err = unmap[i].status;
796 			break;
797 		}
798 	}
799 
800 	if (!leaked)
801 		free_vm_area(node->pv.area);
802 	else
803 		pr_alert("leaking VM area %p size %u page(s)",
804 			 node->pv.area, node->nr_handles);
805 
806 	kfree(node);
807 	return err;
808 }
809 
810 struct unmap_ring_vfree_hvm
811 {
812 	unsigned int idx;
813 	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
814 };
815 
816 static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
817 					      unsigned int goffset,
818 					      unsigned int len,
819 					      void *data)
820 {
821 	struct unmap_ring_vfree_hvm *info = data;
822 
823 	info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
824 
825 	info->idx++;
826 }
827 
828 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
829 {
830 	int rv;
831 	struct xenbus_map_node *node;
832 	void *addr;
833 	struct unmap_ring_vfree_hvm info = {
834 		.idx = 0,
835 	};
836 	unsigned int nr_pages;
837 
838 	spin_lock(&xenbus_valloc_lock);
839 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
840 		addr = node->hvm.addr;
841 		if (addr == vaddr) {
842 			list_del(&node->next);
843 			goto found;
844 		}
845 	}
846 	node = addr = NULL;
847  found:
848 	spin_unlock(&xenbus_valloc_lock);
849 
850 	if (!node) {
851 		xenbus_dev_error(dev, -ENOENT,
852 				 "can't find mapped virtual address %p", vaddr);
853 		return GNTST_bad_virt_addr;
854 	}
855 
856 	nr_pages = XENBUS_PAGES(node->nr_handles);
857 
858 	gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
859 			     xenbus_unmap_ring_setup_grant_hvm,
860 			     &info);
861 
862 	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
863 			       info.addrs);
864 	if (!rv) {
865 		vunmap(vaddr);
866 		free_xenballooned_pages(nr_pages, node->hvm.pages);
867 	}
868 	else
869 		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
870 
871 	kfree(node);
872 	return rv;
873 }
874 
875 /**
876  * xenbus_unmap_ring
877  * @dev: xenbus device
878  * @handles: grant handle array
879  * @nr_handles: number of handles in the array
880  * @vaddrs: addresses to unmap
881  *
882  * Unmap memory in this domain that was imported from another domain.
883  * Returns 0 on success and returns GNTST_* on error
884  * (see xen/include/interface/grant_table.h).
885  */
886 int xenbus_unmap_ring(struct xenbus_device *dev,
887 		      grant_handle_t *handles, unsigned int nr_handles,
888 		      unsigned long *vaddrs)
889 {
890 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
891 	int i;
892 	int err;
893 
894 	if (nr_handles > XENBUS_MAX_RING_GRANTS)
895 		return -EINVAL;
896 
897 	for (i = 0; i < nr_handles; i++)
898 		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
899 				    GNTMAP_host_map, handles[i]);
900 
901 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
902 		BUG();
903 
904 	err = GNTST_okay;
905 	for (i = 0; i < nr_handles; i++) {
906 		if (unmap[i].status != GNTST_okay) {
907 			xenbus_dev_error(dev, unmap[i].status,
908 					 "unmapping page at handle %d error %d",
909 					 handles[i], unmap[i].status);
910 			err = unmap[i].status;
911 			break;
912 		}
913 	}
914 
915 	return err;
916 }
917 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
918 
919 
920 /**
921  * xenbus_read_driver_state
922  * @path: path for driver
923  *
924  * Return the state of the driver rooted at the given store path, or
925  * XenbusStateUnknown if no state can be read.
926  */
927 enum xenbus_state xenbus_read_driver_state(const char *path)
928 {
929 	enum xenbus_state result;
930 	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
931 	if (err)
932 		result = XenbusStateUnknown;
933 
934 	return result;
935 }
936 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
937 
938 static const struct xenbus_ring_ops ring_ops_pv = {
939 	.map = xenbus_map_ring_valloc_pv,
940 	.unmap = xenbus_unmap_ring_vfree_pv,
941 };
942 
943 static const struct xenbus_ring_ops ring_ops_hvm = {
944 	.map = xenbus_map_ring_valloc_hvm,
945 	.unmap = xenbus_unmap_ring_vfree_hvm,
946 };
947 
948 void __init xenbus_ring_ops_init(void)
949 {
950 	if (!xen_feature(XENFEAT_auto_translated_physmap))
951 		ring_ops = &ring_ops_pv;
952 	else
953 		ring_ops = &ring_ops_hvm;
954 }
955