1 /******************************************************************************
2  * xenbus_comms.c
3  *
4  * Low level code to talks to Xen Store: ringbuffer and event channel.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/wait.h>
36 #include <linux/interrupt.h>
37 #include <linux/kthread.h>
38 #include <linux/sched.h>
39 #include <linux/err.h>
40 #include <xen/xenbus.h>
41 #include <asm/xen/hypervisor.h>
42 #include <xen/events.h>
43 #include <xen/page.h>
44 #include "xenbus.h"
45 
46 /* A list of replies. Currently only one will ever be outstanding. */
47 LIST_HEAD(xs_reply_list);
48 
49 /* A list of write requests. */
50 LIST_HEAD(xb_write_list);
51 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52 DEFINE_MUTEX(xb_write_mutex);
53 
54 /* Protect xenbus reader thread against save/restore. */
55 DEFINE_MUTEX(xs_response_mutex);
56 
57 static int xenbus_irq;
58 static struct task_struct *xenbus_task;
59 
60 static DECLARE_WORK(probe_work, xenbus_probe);
61 
62 
63 static irqreturn_t wake_waiting(int irq, void *unused)
64 {
65 	if (unlikely(xenstored_ready == 0)) {
66 		xenstored_ready = 1;
67 		schedule_work(&probe_work);
68 	}
69 
70 	wake_up(&xb_waitq);
71 	return IRQ_HANDLED;
72 }
73 
74 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
75 {
76 	return ((prod - cons) <= XENSTORE_RING_SIZE);
77 }
78 
79 static void *get_output_chunk(XENSTORE_RING_IDX cons,
80 			      XENSTORE_RING_IDX prod,
81 			      char *buf, uint32_t *len)
82 {
83 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
84 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
85 		*len = XENSTORE_RING_SIZE - (prod - cons);
86 	return buf + MASK_XENSTORE_IDX(prod);
87 }
88 
89 static const void *get_input_chunk(XENSTORE_RING_IDX cons,
90 				   XENSTORE_RING_IDX prod,
91 				   const char *buf, uint32_t *len)
92 {
93 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
94 	if ((prod - cons) < *len)
95 		*len = prod - cons;
96 	return buf + MASK_XENSTORE_IDX(cons);
97 }
98 
99 static int xb_data_to_write(void)
100 {
101 	struct xenstore_domain_interface *intf = xen_store_interface;
102 
103 	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
104 		!list_empty(&xb_write_list);
105 }
106 
107 /**
108  * xb_write - low level write
109  * @data: buffer to send
110  * @len: length of buffer
111  *
112  * Returns number of bytes written or -err.
113  */
114 static int xb_write(const void *data, unsigned int len)
115 {
116 	struct xenstore_domain_interface *intf = xen_store_interface;
117 	XENSTORE_RING_IDX cons, prod;
118 	unsigned int bytes = 0;
119 
120 	while (len != 0) {
121 		void *dst;
122 		unsigned int avail;
123 
124 		/* Read indexes, then verify. */
125 		cons = intf->req_cons;
126 		prod = intf->req_prod;
127 		if (!check_indexes(cons, prod)) {
128 			intf->req_cons = intf->req_prod = 0;
129 			return -EIO;
130 		}
131 		if (!xb_data_to_write())
132 			return bytes;
133 
134 		/* Must write data /after/ reading the consumer index. */
135 		virt_mb();
136 
137 		dst = get_output_chunk(cons, prod, intf->req, &avail);
138 		if (avail == 0)
139 			continue;
140 		if (avail > len)
141 			avail = len;
142 
143 		memcpy(dst, data, avail);
144 		data += avail;
145 		len -= avail;
146 		bytes += avail;
147 
148 		/* Other side must not see new producer until data is there. */
149 		virt_wmb();
150 		intf->req_prod += avail;
151 
152 		/* Implies mb(): other side will see the updated producer. */
153 		if (prod <= intf->req_cons)
154 			notify_remote_via_evtchn(xen_store_evtchn);
155 	}
156 
157 	return bytes;
158 }
159 
160 static int xb_data_to_read(void)
161 {
162 	struct xenstore_domain_interface *intf = xen_store_interface;
163 	return (intf->rsp_cons != intf->rsp_prod);
164 }
165 
166 static int xb_read(void *data, unsigned int len)
167 {
168 	struct xenstore_domain_interface *intf = xen_store_interface;
169 	XENSTORE_RING_IDX cons, prod;
170 	unsigned int bytes = 0;
171 
172 	while (len != 0) {
173 		unsigned int avail;
174 		const char *src;
175 
176 		/* Read indexes, then verify. */
177 		cons = intf->rsp_cons;
178 		prod = intf->rsp_prod;
179 		if (cons == prod)
180 			return bytes;
181 
182 		if (!check_indexes(cons, prod)) {
183 			intf->rsp_cons = intf->rsp_prod = 0;
184 			return -EIO;
185 		}
186 
187 		src = get_input_chunk(cons, prod, intf->rsp, &avail);
188 		if (avail == 0)
189 			continue;
190 		if (avail > len)
191 			avail = len;
192 
193 		/* Must read data /after/ reading the producer index. */
194 		virt_rmb();
195 
196 		memcpy(data, src, avail);
197 		data += avail;
198 		len -= avail;
199 		bytes += avail;
200 
201 		/* Other side must not see free space until we've copied out */
202 		virt_mb();
203 		intf->rsp_cons += avail;
204 
205 		/* Implies mb(): other side will see the updated consumer. */
206 		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
207 			notify_remote_via_evtchn(xen_store_evtchn);
208 	}
209 
210 	return bytes;
211 }
212 
213 static int process_msg(void)
214 {
215 	static struct {
216 		struct xsd_sockmsg msg;
217 		char *body;
218 		union {
219 			void *alloc;
220 			struct xs_watch_event *watch;
221 		};
222 		bool in_msg;
223 		bool in_hdr;
224 		unsigned int read;
225 	} state;
226 	struct xb_req_data *req;
227 	int err;
228 	unsigned int len;
229 
230 	if (!state.in_msg) {
231 		state.in_msg = true;
232 		state.in_hdr = true;
233 		state.read = 0;
234 
235 		/*
236 		 * We must disallow save/restore while reading a message.
237 		 * A partial read across s/r leaves us out of sync with
238 		 * xenstored.
239 		 * xs_response_mutex is locked as long as we are processing one
240 		 * message. state.in_msg will be true as long as we are holding
241 		 * the lock here.
242 		 */
243 		mutex_lock(&xs_response_mutex);
244 
245 		if (!xb_data_to_read()) {
246 			/* We raced with save/restore: pending data 'gone'. */
247 			mutex_unlock(&xs_response_mutex);
248 			state.in_msg = false;
249 			return 0;
250 		}
251 	}
252 
253 	if (state.in_hdr) {
254 		if (state.read != sizeof(state.msg)) {
255 			err = xb_read((void *)&state.msg + state.read,
256 				      sizeof(state.msg) - state.read);
257 			if (err < 0)
258 				goto out;
259 			state.read += err;
260 			if (state.read != sizeof(state.msg))
261 				return 0;
262 			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
263 				err = -EINVAL;
264 				goto out;
265 			}
266 		}
267 
268 		len = state.msg.len + 1;
269 		if (state.msg.type == XS_WATCH_EVENT)
270 			len += sizeof(*state.watch);
271 
272 		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
273 		if (!state.alloc)
274 			return -ENOMEM;
275 
276 		if (state.msg.type == XS_WATCH_EVENT)
277 			state.body = state.watch->body;
278 		else
279 			state.body = state.alloc;
280 		state.in_hdr = false;
281 		state.read = 0;
282 	}
283 
284 	err = xb_read(state.body + state.read, state.msg.len - state.read);
285 	if (err < 0)
286 		goto out;
287 
288 	state.read += err;
289 	if (state.read != state.msg.len)
290 		return 0;
291 
292 	state.body[state.msg.len] = '\0';
293 
294 	if (state.msg.type == XS_WATCH_EVENT) {
295 		state.watch->len = state.msg.len;
296 		err = xs_watch_msg(state.watch);
297 	} else {
298 		err = -ENOENT;
299 		mutex_lock(&xb_write_mutex);
300 		list_for_each_entry(req, &xs_reply_list, list) {
301 			if (req->msg.req_id == state.msg.req_id) {
302 				list_del(&req->list);
303 				err = 0;
304 				break;
305 			}
306 		}
307 		mutex_unlock(&xb_write_mutex);
308 		if (err)
309 			goto out;
310 
311 		if (req->state == xb_req_state_wait_reply) {
312 			req->msg.req_id = req->caller_req_id;
313 			req->msg.type = state.msg.type;
314 			req->msg.len = state.msg.len;
315 			req->body = state.body;
316 			/* write body, then update state */
317 			virt_wmb();
318 			req->state = xb_req_state_got_reply;
319 			req->cb(req);
320 		} else
321 			kfree(req);
322 	}
323 
324 	mutex_unlock(&xs_response_mutex);
325 
326 	state.in_msg = false;
327 	state.alloc = NULL;
328 	return err;
329 
330  out:
331 	mutex_unlock(&xs_response_mutex);
332 	state.in_msg = false;
333 	kfree(state.alloc);
334 	state.alloc = NULL;
335 	return err;
336 }
337 
338 static int process_writes(void)
339 {
340 	static struct {
341 		struct xb_req_data *req;
342 		int idx;
343 		unsigned int written;
344 	} state;
345 	void *base;
346 	unsigned int len;
347 	int err = 0;
348 
349 	if (!xb_data_to_write())
350 		return 0;
351 
352 	mutex_lock(&xb_write_mutex);
353 
354 	if (!state.req) {
355 		state.req = list_first_entry(&xb_write_list,
356 					     struct xb_req_data, list);
357 		state.idx = -1;
358 		state.written = 0;
359 	}
360 
361 	if (state.req->state == xb_req_state_aborted)
362 		goto out_err;
363 
364 	while (state.idx < state.req->num_vecs) {
365 		if (state.idx < 0) {
366 			base = &state.req->msg;
367 			len = sizeof(state.req->msg);
368 		} else {
369 			base = state.req->vec[state.idx].iov_base;
370 			len = state.req->vec[state.idx].iov_len;
371 		}
372 		err = xb_write(base + state.written, len - state.written);
373 		if (err < 0)
374 			goto out_err;
375 		state.written += err;
376 		if (state.written != len)
377 			goto out;
378 
379 		state.idx++;
380 		state.written = 0;
381 	}
382 
383 	list_del(&state.req->list);
384 	state.req->state = xb_req_state_wait_reply;
385 	list_add_tail(&state.req->list, &xs_reply_list);
386 	state.req = NULL;
387 
388  out:
389 	mutex_unlock(&xb_write_mutex);
390 
391 	return 0;
392 
393  out_err:
394 	state.req->msg.type = XS_ERROR;
395 	state.req->err = err;
396 	list_del(&state.req->list);
397 	if (state.req->state == xb_req_state_aborted)
398 		kfree(state.req);
399 	else {
400 		/* write err, then update state */
401 		virt_wmb();
402 		state.req->state = xb_req_state_got_reply;
403 		wake_up(&state.req->wq);
404 	}
405 
406 	mutex_unlock(&xb_write_mutex);
407 
408 	state.req = NULL;
409 
410 	return err;
411 }
412 
413 static int xb_thread_work(void)
414 {
415 	return xb_data_to_read() || xb_data_to_write();
416 }
417 
418 static int xenbus_thread(void *unused)
419 {
420 	int err;
421 
422 	while (!kthread_should_stop()) {
423 		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
424 			continue;
425 
426 		err = process_msg();
427 		if (err == -ENOMEM)
428 			schedule();
429 		else if (err)
430 			pr_warn_ratelimited("error %d while reading message\n",
431 					    err);
432 
433 		err = process_writes();
434 		if (err)
435 			pr_warn_ratelimited("error %d while writing message\n",
436 					    err);
437 	}
438 
439 	xenbus_task = NULL;
440 	return 0;
441 }
442 
443 /**
444  * xb_init_comms - Set up interrupt handler off store event channel.
445  */
446 int xb_init_comms(void)
447 {
448 	struct xenstore_domain_interface *intf = xen_store_interface;
449 
450 	if (intf->req_prod != intf->req_cons)
451 		pr_err("request ring is not quiescent (%08x:%08x)!\n",
452 		       intf->req_cons, intf->req_prod);
453 
454 	if (intf->rsp_prod != intf->rsp_cons) {
455 		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
456 			intf->rsp_cons, intf->rsp_prod);
457 		/* breaks kdump */
458 		if (!reset_devices)
459 			intf->rsp_cons = intf->rsp_prod;
460 	}
461 
462 	if (xenbus_irq) {
463 		/* Already have an irq; assume we're resuming */
464 		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
465 	} else {
466 		int err;
467 
468 		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
469 						0, "xenbus", &xb_waitq);
470 		if (err < 0) {
471 			pr_err("request irq failed %i\n", err);
472 			return err;
473 		}
474 
475 		xenbus_irq = err;
476 
477 		if (!xenbus_task) {
478 			xenbus_task = kthread_run(xenbus_thread, NULL,
479 						  "xenbus");
480 			if (IS_ERR(xenbus_task))
481 				return PTR_ERR(xenbus_task);
482 		}
483 	}
484 
485 	return 0;
486 }
487 
488 void xb_deinit_comms(void)
489 {
490 	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
491 	xenbus_irq = 0;
492 }
493