1 /******************************************************************************
2  * Client-facing interface for the Xenbus driver.  In other words, the
3  * interface between the Xenbus and the device-specific code, be it the
4  * frontend or the backend of that driver.
5  *
6  * Copyright (C) 2005 XenSource Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #include <linux/mm.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/spinlock.h>
37 #include <linux/vmalloc.h>
38 #include <linux/export.h>
39 #include <asm/xen/hypervisor.h>
40 #include <xen/page.h>
41 #include <xen/interface/xen.h>
42 #include <xen/interface/event_channel.h>
43 #include <xen/balloon.h>
44 #include <xen/events.h>
45 #include <xen/grant_table.h>
46 #include <xen/xenbus.h>
47 #include <xen/xen.h>
48 #include <xen/features.h>
49 
50 #include "xenbus_probe.h"
51 
52 struct xenbus_map_node {
53 	struct list_head next;
54 	union {
55 		struct {
56 			struct vm_struct *area;
57 		} pv;
58 		struct {
59 			struct page *pages[XENBUS_MAX_RING_PAGES];
60 			void *addr;
61 		} hvm;
62 	};
63 	grant_handle_t handles[XENBUS_MAX_RING_PAGES];
64 	unsigned int   nr_handles;
65 };
66 
67 static DEFINE_SPINLOCK(xenbus_valloc_lock);
68 static LIST_HEAD(xenbus_valloc_pages);
69 
70 struct xenbus_ring_ops {
71 	int (*map)(struct xenbus_device *dev,
72 		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
73 		   void **vaddr);
74 	int (*unmap)(struct xenbus_device *dev, void *vaddr);
75 };
76 
77 static const struct xenbus_ring_ops *ring_ops __read_mostly;
78 
79 const char *xenbus_strstate(enum xenbus_state state)
80 {
81 	static const char *const name[] = {
82 		[ XenbusStateUnknown      ] = "Unknown",
83 		[ XenbusStateInitialising ] = "Initialising",
84 		[ XenbusStateInitWait     ] = "InitWait",
85 		[ XenbusStateInitialised  ] = "Initialised",
86 		[ XenbusStateConnected    ] = "Connected",
87 		[ XenbusStateClosing      ] = "Closing",
88 		[ XenbusStateClosed	  ] = "Closed",
89 		[XenbusStateReconfiguring] = "Reconfiguring",
90 		[XenbusStateReconfigured] = "Reconfigured",
91 	};
92 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
93 }
94 EXPORT_SYMBOL_GPL(xenbus_strstate);
95 
96 /**
97  * xenbus_watch_path - register a watch
98  * @dev: xenbus device
99  * @path: path to watch
100  * @watch: watch to register
101  * @callback: callback to register
102  *
103  * Register a @watch on the given path, using the given xenbus_watch structure
104  * for storage, and the given @callback function as the callback.  Return 0 on
105  * success, or -errno on error.  On success, the given @path will be saved as
106  * @watch->node, and remains the caller's to free.  On error, @watch->node will
107  * be NULL, the device will switch to %XenbusStateClosing, and the error will
108  * be saved in the store.
109  */
110 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
111 		      struct xenbus_watch *watch,
112 		      void (*callback)(struct xenbus_watch *,
113 				       const char **, unsigned int))
114 {
115 	int err;
116 
117 	watch->node = path;
118 	watch->callback = callback;
119 
120 	err = register_xenbus_watch(watch);
121 
122 	if (err) {
123 		watch->node = NULL;
124 		watch->callback = NULL;
125 		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
126 	}
127 
128 	return err;
129 }
130 EXPORT_SYMBOL_GPL(xenbus_watch_path);
131 
132 
133 /**
134  * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
135  * @dev: xenbus device
136  * @watch: watch to register
137  * @callback: callback to register
138  * @pathfmt: format of path to watch
139  *
140  * Register a watch on the given @path, using the given xenbus_watch
141  * structure for storage, and the given @callback function as the callback.
142  * Return 0 on success, or -errno on error.  On success, the watched path
143  * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
144  * kfree().  On error, watch->node will be NULL, so the caller has nothing to
145  * free, the device will switch to %XenbusStateClosing, and the error will be
146  * saved in the store.
147  */
148 int xenbus_watch_pathfmt(struct xenbus_device *dev,
149 			 struct xenbus_watch *watch,
150 			 void (*callback)(struct xenbus_watch *,
151 					const char **, unsigned int),
152 			 const char *pathfmt, ...)
153 {
154 	int err;
155 	va_list ap;
156 	char *path;
157 
158 	va_start(ap, pathfmt);
159 	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
160 	va_end(ap);
161 
162 	if (!path) {
163 		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
164 		return -ENOMEM;
165 	}
166 	err = xenbus_watch_path(dev, path, watch, callback);
167 
168 	if (err)
169 		kfree(path);
170 	return err;
171 }
172 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
173 
174 static void xenbus_switch_fatal(struct xenbus_device *, int, int,
175 				const char *, ...);
176 
177 static int
178 __xenbus_switch_state(struct xenbus_device *dev,
179 		      enum xenbus_state state, int depth)
180 {
181 	/* We check whether the state is currently set to the given value, and
182 	   if not, then the state is set.  We don't want to unconditionally
183 	   write the given state, because we don't want to fire watches
184 	   unnecessarily.  Furthermore, if the node has gone, we don't write
185 	   to it, as the device will be tearing down, and we don't want to
186 	   resurrect that directory.
187 
188 	   Note that, because of this cached value of our state, this
189 	   function will not take a caller's Xenstore transaction
190 	   (something it was trying to in the past) because dev->state
191 	   would not get reset if the transaction was aborted.
192 	 */
193 
194 	struct xenbus_transaction xbt;
195 	int current_state;
196 	int err, abort;
197 
198 	if (state == dev->state)
199 		return 0;
200 
201 again:
202 	abort = 1;
203 
204 	err = xenbus_transaction_start(&xbt);
205 	if (err) {
206 		xenbus_switch_fatal(dev, depth, err, "starting transaction");
207 		return 0;
208 	}
209 
210 	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
211 	if (err != 1)
212 		goto abort;
213 
214 	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
215 	if (err) {
216 		xenbus_switch_fatal(dev, depth, err, "writing new state");
217 		goto abort;
218 	}
219 
220 	abort = 0;
221 abort:
222 	err = xenbus_transaction_end(xbt, abort);
223 	if (err) {
224 		if (err == -EAGAIN && !abort)
225 			goto again;
226 		xenbus_switch_fatal(dev, depth, err, "ending transaction");
227 	} else
228 		dev->state = state;
229 
230 	return 0;
231 }
232 
233 /**
234  * xenbus_switch_state
235  * @dev: xenbus device
236  * @state: new state
237  *
238  * Advertise in the store a change of the given driver to the given new_state.
239  * Return 0 on success, or -errno on error.  On error, the device will switch
240  * to XenbusStateClosing, and the error will be saved in the store.
241  */
242 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
243 {
244 	return __xenbus_switch_state(dev, state, 0);
245 }
246 
247 EXPORT_SYMBOL_GPL(xenbus_switch_state);
248 
249 int xenbus_frontend_closed(struct xenbus_device *dev)
250 {
251 	xenbus_switch_state(dev, XenbusStateClosed);
252 	complete(&dev->down);
253 	return 0;
254 }
255 EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
256 
257 /**
258  * Return the path to the error node for the given device, or NULL on failure.
259  * If the value returned is non-NULL, then it is the caller's to kfree.
260  */
261 static char *error_path(struct xenbus_device *dev)
262 {
263 	return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
264 }
265 
266 
267 static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
268 				const char *fmt, va_list ap)
269 {
270 	unsigned int len;
271 	char *printf_buffer = NULL;
272 	char *path_buffer = NULL;
273 
274 #define PRINTF_BUFFER_SIZE 4096
275 	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
276 	if (printf_buffer == NULL)
277 		goto fail;
278 
279 	len = sprintf(printf_buffer, "%i ", -err);
280 	vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
281 
282 	dev_err(&dev->dev, "%s\n", printf_buffer);
283 
284 	path_buffer = error_path(dev);
285 
286 	if (path_buffer == NULL) {
287 		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
288 		       dev->nodename, printf_buffer);
289 		goto fail;
290 	}
291 
292 	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
293 		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
294 		       dev->nodename, printf_buffer);
295 		goto fail;
296 	}
297 
298 fail:
299 	kfree(printf_buffer);
300 	kfree(path_buffer);
301 }
302 
303 
304 /**
305  * xenbus_dev_error
306  * @dev: xenbus device
307  * @err: error to report
308  * @fmt: error message format
309  *
310  * Report the given negative errno into the store, along with the given
311  * formatted message.
312  */
313 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
314 {
315 	va_list ap;
316 
317 	va_start(ap, fmt);
318 	xenbus_va_dev_error(dev, err, fmt, ap);
319 	va_end(ap);
320 }
321 EXPORT_SYMBOL_GPL(xenbus_dev_error);
322 
323 /**
324  * xenbus_dev_fatal
325  * @dev: xenbus device
326  * @err: error to report
327  * @fmt: error message format
328  *
329  * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
330  * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
331  * closedown of this driver and its peer.
332  */
333 
334 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
335 {
336 	va_list ap;
337 
338 	va_start(ap, fmt);
339 	xenbus_va_dev_error(dev, err, fmt, ap);
340 	va_end(ap);
341 
342 	xenbus_switch_state(dev, XenbusStateClosing);
343 }
344 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
345 
346 /**
347  * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
348  * avoiding recursion within xenbus_switch_state.
349  */
350 static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
351 				const char *fmt, ...)
352 {
353 	va_list ap;
354 
355 	va_start(ap, fmt);
356 	xenbus_va_dev_error(dev, err, fmt, ap);
357 	va_end(ap);
358 
359 	if (!depth)
360 		__xenbus_switch_state(dev, XenbusStateClosing, 1);
361 }
362 
363 /**
364  * xenbus_grant_ring
365  * @dev: xenbus device
366  * @vaddr: starting virtual address of the ring
367  * @nr_pages: number of pages to be granted
368  * @grefs: grant reference array to be filled in
369  *
370  * Grant access to the given @vaddr to the peer of the given device.
371  * Then fill in @grefs with grant references.  Return 0 on success, or
372  * -errno on error.  On error, the device will switch to
373  * XenbusStateClosing, and the error will be saved in the store.
374  */
375 int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
376 		      unsigned int nr_pages, grant_ref_t *grefs)
377 {
378 	int err;
379 	int i, j;
380 
381 	for (i = 0; i < nr_pages; i++) {
382 		err = gnttab_grant_foreign_access(dev->otherend_id,
383 						  virt_to_gfn(vaddr), 0);
384 		if (err < 0) {
385 			xenbus_dev_fatal(dev, err,
386 					 "granting access to ring page");
387 			goto fail;
388 		}
389 		grefs[i] = err;
390 
391 		vaddr = vaddr + PAGE_SIZE;
392 	}
393 
394 	return 0;
395 
396 fail:
397 	for (j = 0; j < i; j++)
398 		gnttab_end_foreign_access_ref(grefs[j], 0);
399 	return err;
400 }
401 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
402 
403 
404 /**
405  * Allocate an event channel for the given xenbus_device, assigning the newly
406  * created local port to *port.  Return 0 on success, or -errno on error.  On
407  * error, the device will switch to XenbusStateClosing, and the error will be
408  * saved in the store.
409  */
410 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
411 {
412 	struct evtchn_alloc_unbound alloc_unbound;
413 	int err;
414 
415 	alloc_unbound.dom = DOMID_SELF;
416 	alloc_unbound.remote_dom = dev->otherend_id;
417 
418 	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
419 					  &alloc_unbound);
420 	if (err)
421 		xenbus_dev_fatal(dev, err, "allocating event channel");
422 	else
423 		*port = alloc_unbound.port;
424 
425 	return err;
426 }
427 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
428 
429 
430 /**
431  * Free an existing event channel. Returns 0 on success or -errno on error.
432  */
433 int xenbus_free_evtchn(struct xenbus_device *dev, int port)
434 {
435 	struct evtchn_close close;
436 	int err;
437 
438 	close.port = port;
439 
440 	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
441 	if (err)
442 		xenbus_dev_error(dev, err, "freeing event channel %d", port);
443 
444 	return err;
445 }
446 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
447 
448 
449 /**
450  * xenbus_map_ring_valloc
451  * @dev: xenbus device
452  * @gnt_refs: grant reference array
453  * @nr_grefs: number of grant references
454  * @vaddr: pointer to address to be filled out by mapping
455  *
456  * Map @nr_grefs pages of memory into this domain from another
457  * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
458  * pages of virtual address space, maps the pages to that address, and
459  * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
460  * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
461  * error. If an error is returned, device will switch to
462  * XenbusStateClosing and the error message will be saved in XenStore.
463  */
464 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
465 			   unsigned int nr_grefs, void **vaddr)
466 {
467 	return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
468 }
469 EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
470 
471 /* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
472  * long), e.g. 32-on-64.  Caller is responsible for preparing the
473  * right array to feed into this function */
474 static int __xenbus_map_ring(struct xenbus_device *dev,
475 			     grant_ref_t *gnt_refs,
476 			     unsigned int nr_grefs,
477 			     grant_handle_t *handles,
478 			     phys_addr_t *addrs,
479 			     unsigned int flags,
480 			     bool *leaked)
481 {
482 	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES];
483 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
484 	int i, j;
485 	int err = GNTST_okay;
486 
487 	if (nr_grefs > XENBUS_MAX_RING_PAGES)
488 		return -EINVAL;
489 
490 	for (i = 0; i < nr_grefs; i++) {
491 		memset(&map[i], 0, sizeof(map[i]));
492 		gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
493 				  dev->otherend_id);
494 		handles[i] = INVALID_GRANT_HANDLE;
495 	}
496 
497 	gnttab_batch_map(map, i);
498 
499 	for (i = 0; i < nr_grefs; i++) {
500 		if (map[i].status != GNTST_okay) {
501 			err = map[i].status;
502 			xenbus_dev_fatal(dev, map[i].status,
503 					 "mapping in shared page %d from domain %d",
504 					 gnt_refs[i], dev->otherend_id);
505 			goto fail;
506 		} else
507 			handles[i] = map[i].handle;
508 	}
509 
510 	return GNTST_okay;
511 
512  fail:
513 	for (i = j = 0; i < nr_grefs; i++) {
514 		if (handles[i] != INVALID_GRANT_HANDLE) {
515 			memset(&unmap[j], 0, sizeof(unmap[j]));
516 			gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
517 					    GNTMAP_host_map, handles[i]);
518 			j++;
519 		}
520 	}
521 
522 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
523 		BUG();
524 
525 	*leaked = false;
526 	for (i = 0; i < j; i++) {
527 		if (unmap[i].status != GNTST_okay) {
528 			*leaked = true;
529 			break;
530 		}
531 	}
532 
533 	return err;
534 }
535 
536 static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
537 				     grant_ref_t *gnt_refs,
538 				     unsigned int nr_grefs,
539 				     void **vaddr)
540 {
541 	struct xenbus_map_node *node;
542 	struct vm_struct *area;
543 	pte_t *ptes[XENBUS_MAX_RING_PAGES];
544 	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
545 	int err = GNTST_okay;
546 	int i;
547 	bool leaked;
548 
549 	*vaddr = NULL;
550 
551 	if (nr_grefs > XENBUS_MAX_RING_PAGES)
552 		return -EINVAL;
553 
554 	node = kzalloc(sizeof(*node), GFP_KERNEL);
555 	if (!node)
556 		return -ENOMEM;
557 
558 	area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes);
559 	if (!area) {
560 		kfree(node);
561 		return -ENOMEM;
562 	}
563 
564 	for (i = 0; i < nr_grefs; i++)
565 		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
566 
567 	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
568 				phys_addrs,
569 				GNTMAP_host_map | GNTMAP_contains_pte,
570 				&leaked);
571 	if (err)
572 		goto failed;
573 
574 	node->nr_handles = nr_grefs;
575 	node->pv.area = area;
576 
577 	spin_lock(&xenbus_valloc_lock);
578 	list_add(&node->next, &xenbus_valloc_pages);
579 	spin_unlock(&xenbus_valloc_lock);
580 
581 	*vaddr = area->addr;
582 	return 0;
583 
584 failed:
585 	if (!leaked)
586 		free_vm_area(area);
587 	else
588 		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
589 
590 	kfree(node);
591 	return err;
592 }
593 
594 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
595 				      grant_ref_t *gnt_ref,
596 				      unsigned int nr_grefs,
597 				      void **vaddr)
598 {
599 	struct xenbus_map_node *node;
600 	int i;
601 	int err;
602 	void *addr;
603 	bool leaked = false;
604 	/* Why do we need two arrays? See comment of __xenbus_map_ring */
605 	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
606 	unsigned long addrs[XENBUS_MAX_RING_PAGES];
607 
608 	if (nr_grefs > XENBUS_MAX_RING_PAGES)
609 		return -EINVAL;
610 
611 	*vaddr = NULL;
612 
613 	node = kzalloc(sizeof(*node), GFP_KERNEL);
614 	if (!node)
615 		return -ENOMEM;
616 
617 	err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages,
618 				       false /* lowmem */);
619 	if (err)
620 		goto out_err;
621 
622 	for (i = 0; i < nr_grefs; i++) {
623 		unsigned long pfn = page_to_pfn(node->hvm.pages[i]);
624 		phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
625 		addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
626 	}
627 
628 	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
629 				phys_addrs, GNTMAP_host_map, &leaked);
630 	node->nr_handles = nr_grefs;
631 
632 	if (err)
633 		goto out_free_ballooned_pages;
634 
635 	addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP,
636 		    PAGE_KERNEL);
637 	if (!addr) {
638 		err = -ENOMEM;
639 		goto out_xenbus_unmap_ring;
640 	}
641 
642 	node->hvm.addr = addr;
643 
644 	spin_lock(&xenbus_valloc_lock);
645 	list_add(&node->next, &xenbus_valloc_pages);
646 	spin_unlock(&xenbus_valloc_lock);
647 
648 	*vaddr = addr;
649 	return 0;
650 
651  out_xenbus_unmap_ring:
652 	if (!leaked)
653 		xenbus_unmap_ring(dev, node->handles, node->nr_handles,
654 				  addrs);
655 	else
656 		pr_alert("leaking %p size %u page(s)",
657 			 addr, nr_grefs);
658  out_free_ballooned_pages:
659 	if (!leaked)
660 		free_xenballooned_pages(nr_grefs, node->hvm.pages);
661  out_err:
662 	kfree(node);
663 	return err;
664 }
665 
666 
667 /**
668  * xenbus_map_ring
669  * @dev: xenbus device
670  * @gnt_refs: grant reference array
671  * @nr_grefs: number of grant reference
672  * @handles: pointer to grant handle to be filled
673  * @vaddrs: addresses to be mapped to
674  * @leaked: fail to clean up a failed map, caller should not free vaddr
675  *
676  * Map pages of memory into this domain from another domain's grant table.
677  * xenbus_map_ring does not allocate the virtual address space (you must do
678  * this yourself!). It only maps in the pages to the specified address.
679  * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
680  * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
681  * XenbusStateClosing and the first error message will be saved in XenStore.
682  * Further more if we fail to map the ring, caller should check @leaked.
683  * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
684  * should not free the address space of @vaddr.
685  */
686 int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
687 		    unsigned int nr_grefs, grant_handle_t *handles,
688 		    unsigned long *vaddrs, bool *leaked)
689 {
690 	phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
691 	int i;
692 
693 	if (nr_grefs > XENBUS_MAX_RING_PAGES)
694 		return -EINVAL;
695 
696 	for (i = 0; i < nr_grefs; i++)
697 		phys_addrs[i] = (unsigned long)vaddrs[i];
698 
699 	return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
700 				 phys_addrs, GNTMAP_host_map, leaked);
701 }
702 EXPORT_SYMBOL_GPL(xenbus_map_ring);
703 
704 
705 /**
706  * xenbus_unmap_ring_vfree
707  * @dev: xenbus device
708  * @vaddr: addr to unmap
709  *
710  * Based on Rusty Russell's skeleton driver's unmap_page.
711  * Unmap a page of memory in this domain that was imported from another domain.
712  * Use xenbus_unmap_ring_vfree if you mapped in your memory with
713  * xenbus_map_ring_valloc (it will free the virtual address space).
714  * Returns 0 on success and returns GNTST_* on error
715  * (see xen/include/interface/grant_table.h).
716  */
717 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
718 {
719 	return ring_ops->unmap(dev, vaddr);
720 }
721 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
722 
723 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
724 {
725 	struct xenbus_map_node *node;
726 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
727 	unsigned int level;
728 	int i;
729 	bool leaked = false;
730 	int err;
731 
732 	spin_lock(&xenbus_valloc_lock);
733 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
734 		if (node->pv.area->addr == vaddr) {
735 			list_del(&node->next);
736 			goto found;
737 		}
738 	}
739 	node = NULL;
740  found:
741 	spin_unlock(&xenbus_valloc_lock);
742 
743 	if (!node) {
744 		xenbus_dev_error(dev, -ENOENT,
745 				 "can't find mapped virtual address %p", vaddr);
746 		return GNTST_bad_virt_addr;
747 	}
748 
749 	for (i = 0; i < node->nr_handles; i++) {
750 		unsigned long addr;
751 
752 		memset(&unmap[i], 0, sizeof(unmap[i]));
753 		addr = (unsigned long)vaddr + (PAGE_SIZE * i);
754 		unmap[i].host_addr = arbitrary_virt_to_machine(
755 			lookup_address(addr, &level)).maddr;
756 		unmap[i].dev_bus_addr = 0;
757 		unmap[i].handle = node->handles[i];
758 	}
759 
760 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
761 		BUG();
762 
763 	err = GNTST_okay;
764 	leaked = false;
765 	for (i = 0; i < node->nr_handles; i++) {
766 		if (unmap[i].status != GNTST_okay) {
767 			leaked = true;
768 			xenbus_dev_error(dev, unmap[i].status,
769 					 "unmapping page at handle %d error %d",
770 					 node->handles[i], unmap[i].status);
771 			err = unmap[i].status;
772 			break;
773 		}
774 	}
775 
776 	if (!leaked)
777 		free_vm_area(node->pv.area);
778 	else
779 		pr_alert("leaking VM area %p size %u page(s)",
780 			 node->pv.area, node->nr_handles);
781 
782 	kfree(node);
783 	return err;
784 }
785 
786 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
787 {
788 	int rv;
789 	struct xenbus_map_node *node;
790 	void *addr;
791 	unsigned long addrs[XENBUS_MAX_RING_PAGES];
792 	int i;
793 
794 	spin_lock(&xenbus_valloc_lock);
795 	list_for_each_entry(node, &xenbus_valloc_pages, next) {
796 		addr = node->hvm.addr;
797 		if (addr == vaddr) {
798 			list_del(&node->next);
799 			goto found;
800 		}
801 	}
802 	node = addr = NULL;
803  found:
804 	spin_unlock(&xenbus_valloc_lock);
805 
806 	if (!node) {
807 		xenbus_dev_error(dev, -ENOENT,
808 				 "can't find mapped virtual address %p", vaddr);
809 		return GNTST_bad_virt_addr;
810 	}
811 
812 	for (i = 0; i < node->nr_handles; i++)
813 		addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i]));
814 
815 	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
816 			       addrs);
817 	if (!rv) {
818 		vunmap(vaddr);
819 		free_xenballooned_pages(node->nr_handles, node->hvm.pages);
820 	}
821 	else
822 		WARN(1, "Leaking %p, size %u page(s)\n", vaddr,
823 		     node->nr_handles);
824 
825 	kfree(node);
826 	return rv;
827 }
828 
829 /**
830  * xenbus_unmap_ring
831  * @dev: xenbus device
832  * @handles: grant handle array
833  * @nr_handles: number of handles in the array
834  * @vaddrs: addresses to unmap
835  *
836  * Unmap memory in this domain that was imported from another domain.
837  * Returns 0 on success and returns GNTST_* on error
838  * (see xen/include/interface/grant_table.h).
839  */
840 int xenbus_unmap_ring(struct xenbus_device *dev,
841 		      grant_handle_t *handles, unsigned int nr_handles,
842 		      unsigned long *vaddrs)
843 {
844 	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
845 	int i;
846 	int err;
847 
848 	if (nr_handles > XENBUS_MAX_RING_PAGES)
849 		return -EINVAL;
850 
851 	for (i = 0; i < nr_handles; i++)
852 		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
853 				    GNTMAP_host_map, handles[i]);
854 
855 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
856 		BUG();
857 
858 	err = GNTST_okay;
859 	for (i = 0; i < nr_handles; i++) {
860 		if (unmap[i].status != GNTST_okay) {
861 			xenbus_dev_error(dev, unmap[i].status,
862 					 "unmapping page at handle %d error %d",
863 					 handles[i], unmap[i].status);
864 			err = unmap[i].status;
865 			break;
866 		}
867 	}
868 
869 	return err;
870 }
871 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
872 
873 
874 /**
875  * xenbus_read_driver_state
876  * @path: path for driver
877  *
878  * Return the state of the driver rooted at the given store path, or
879  * XenbusStateUnknown if no state can be read.
880  */
881 enum xenbus_state xenbus_read_driver_state(const char *path)
882 {
883 	enum xenbus_state result;
884 	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
885 	if (err)
886 		result = XenbusStateUnknown;
887 
888 	return result;
889 }
890 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
891 
892 static const struct xenbus_ring_ops ring_ops_pv = {
893 	.map = xenbus_map_ring_valloc_pv,
894 	.unmap = xenbus_unmap_ring_vfree_pv,
895 };
896 
897 static const struct xenbus_ring_ops ring_ops_hvm = {
898 	.map = xenbus_map_ring_valloc_hvm,
899 	.unmap = xenbus_unmap_ring_vfree_hvm,
900 };
901 
902 void __init xenbus_ring_ops_init(void)
903 {
904 	if (!xen_feature(XENFEAT_auto_translated_physmap))
905 		ring_ops = &ring_ops_pv;
906 	else
907 		ring_ops = &ring_ops_hvm;
908 }
909