1 /******************************************************************************
2  * xenbus_comms.c
3  *
4  * Low level code to talks to Xen Store: ringbuffer and event channel.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/wait.h>
36 #include <linux/interrupt.h>
37 #include <linux/kthread.h>
38 #include <linux/sched.h>
39 #include <linux/err.h>
40 #include <xen/xenbus.h>
41 #include <asm/xen/hypervisor.h>
42 #include <xen/events.h>
43 #include <xen/page.h>
44 #include "xenbus.h"
45 
46 /* A list of replies. Currently only one will ever be outstanding. */
47 LIST_HEAD(xs_reply_list);
48 
49 /* A list of write requests. */
50 LIST_HEAD(xb_write_list);
51 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52 DEFINE_MUTEX(xb_write_mutex);
53 
54 /* Protect xenbus reader thread against save/restore. */
55 DEFINE_MUTEX(xs_response_mutex);
56 
57 static int xenbus_irq;
58 static struct task_struct *xenbus_task;
59 
60 static irqreturn_t wake_waiting(int irq, void *unused)
61 {
62 	wake_up(&xb_waitq);
63 	return IRQ_HANDLED;
64 }
65 
66 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
67 {
68 	return ((prod - cons) <= XENSTORE_RING_SIZE);
69 }
70 
71 static void *get_output_chunk(XENSTORE_RING_IDX cons,
72 			      XENSTORE_RING_IDX prod,
73 			      char *buf, uint32_t *len)
74 {
75 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
76 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
77 		*len = XENSTORE_RING_SIZE - (prod - cons);
78 	return buf + MASK_XENSTORE_IDX(prod);
79 }
80 
81 static const void *get_input_chunk(XENSTORE_RING_IDX cons,
82 				   XENSTORE_RING_IDX prod,
83 				   const char *buf, uint32_t *len)
84 {
85 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
86 	if ((prod - cons) < *len)
87 		*len = prod - cons;
88 	return buf + MASK_XENSTORE_IDX(cons);
89 }
90 
91 static int xb_data_to_write(void)
92 {
93 	struct xenstore_domain_interface *intf = xen_store_interface;
94 
95 	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
96 		!list_empty(&xb_write_list);
97 }
98 
99 /**
100  * xb_write - low level write
101  * @data: buffer to send
102  * @len: length of buffer
103  *
104  * Returns number of bytes written or -err.
105  */
106 static int xb_write(const void *data, unsigned int len)
107 {
108 	struct xenstore_domain_interface *intf = xen_store_interface;
109 	XENSTORE_RING_IDX cons, prod;
110 	unsigned int bytes = 0;
111 
112 	while (len != 0) {
113 		void *dst;
114 		unsigned int avail;
115 
116 		/* Read indexes, then verify. */
117 		cons = intf->req_cons;
118 		prod = intf->req_prod;
119 		if (!check_indexes(cons, prod)) {
120 			intf->req_cons = intf->req_prod = 0;
121 			return -EIO;
122 		}
123 		if (!xb_data_to_write())
124 			return bytes;
125 
126 		/* Must write data /after/ reading the consumer index. */
127 		virt_mb();
128 
129 		dst = get_output_chunk(cons, prod, intf->req, &avail);
130 		if (avail == 0)
131 			continue;
132 		if (avail > len)
133 			avail = len;
134 
135 		memcpy(dst, data, avail);
136 		data += avail;
137 		len -= avail;
138 		bytes += avail;
139 
140 		/* Other side must not see new producer until data is there. */
141 		virt_wmb();
142 		intf->req_prod += avail;
143 
144 		/* Implies mb(): other side will see the updated producer. */
145 		if (prod <= intf->req_cons)
146 			notify_remote_via_evtchn(xen_store_evtchn);
147 	}
148 
149 	return bytes;
150 }
151 
152 static int xb_data_to_read(void)
153 {
154 	struct xenstore_domain_interface *intf = xen_store_interface;
155 	return (intf->rsp_cons != intf->rsp_prod);
156 }
157 
158 static int xb_read(void *data, unsigned int len)
159 {
160 	struct xenstore_domain_interface *intf = xen_store_interface;
161 	XENSTORE_RING_IDX cons, prod;
162 	unsigned int bytes = 0;
163 
164 	while (len != 0) {
165 		unsigned int avail;
166 		const char *src;
167 
168 		/* Read indexes, then verify. */
169 		cons = intf->rsp_cons;
170 		prod = intf->rsp_prod;
171 		if (cons == prod)
172 			return bytes;
173 
174 		if (!check_indexes(cons, prod)) {
175 			intf->rsp_cons = intf->rsp_prod = 0;
176 			return -EIO;
177 		}
178 
179 		src = get_input_chunk(cons, prod, intf->rsp, &avail);
180 		if (avail == 0)
181 			continue;
182 		if (avail > len)
183 			avail = len;
184 
185 		/* Must read data /after/ reading the producer index. */
186 		virt_rmb();
187 
188 		memcpy(data, src, avail);
189 		data += avail;
190 		len -= avail;
191 		bytes += avail;
192 
193 		/* Other side must not see free space until we've copied out */
194 		virt_mb();
195 		intf->rsp_cons += avail;
196 
197 		/* Implies mb(): other side will see the updated consumer. */
198 		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
199 			notify_remote_via_evtchn(xen_store_evtchn);
200 	}
201 
202 	return bytes;
203 }
204 
205 static int process_msg(void)
206 {
207 	static struct {
208 		struct xsd_sockmsg msg;
209 		char *body;
210 		union {
211 			void *alloc;
212 			struct xs_watch_event *watch;
213 		};
214 		bool in_msg;
215 		bool in_hdr;
216 		unsigned int read;
217 	} state;
218 	struct xb_req_data *req;
219 	int err;
220 	unsigned int len;
221 
222 	if (!state.in_msg) {
223 		state.in_msg = true;
224 		state.in_hdr = true;
225 		state.read = 0;
226 
227 		/*
228 		 * We must disallow save/restore while reading a message.
229 		 * A partial read across s/r leaves us out of sync with
230 		 * xenstored.
231 		 * xs_response_mutex is locked as long as we are processing one
232 		 * message. state.in_msg will be true as long as we are holding
233 		 * the lock here.
234 		 */
235 		mutex_lock(&xs_response_mutex);
236 
237 		if (!xb_data_to_read()) {
238 			/* We raced with save/restore: pending data 'gone'. */
239 			mutex_unlock(&xs_response_mutex);
240 			state.in_msg = false;
241 			return 0;
242 		}
243 	}
244 
245 	if (state.in_hdr) {
246 		if (state.read != sizeof(state.msg)) {
247 			err = xb_read((void *)&state.msg + state.read,
248 				      sizeof(state.msg) - state.read);
249 			if (err < 0)
250 				goto out;
251 			state.read += err;
252 			if (state.read != sizeof(state.msg))
253 				return 0;
254 			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
255 				err = -EINVAL;
256 				goto out;
257 			}
258 		}
259 
260 		len = state.msg.len + 1;
261 		if (state.msg.type == XS_WATCH_EVENT)
262 			len += sizeof(*state.watch);
263 
264 		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
265 		if (!state.alloc)
266 			return -ENOMEM;
267 
268 		if (state.msg.type == XS_WATCH_EVENT)
269 			state.body = state.watch->body;
270 		else
271 			state.body = state.alloc;
272 		state.in_hdr = false;
273 		state.read = 0;
274 	}
275 
276 	err = xb_read(state.body + state.read, state.msg.len - state.read);
277 	if (err < 0)
278 		goto out;
279 
280 	state.read += err;
281 	if (state.read != state.msg.len)
282 		return 0;
283 
284 	state.body[state.msg.len] = '\0';
285 
286 	if (state.msg.type == XS_WATCH_EVENT) {
287 		state.watch->len = state.msg.len;
288 		err = xs_watch_msg(state.watch);
289 	} else {
290 		err = -ENOENT;
291 		mutex_lock(&xb_write_mutex);
292 		list_for_each_entry(req, &xs_reply_list, list) {
293 			if (req->msg.req_id == state.msg.req_id) {
294 				list_del(&req->list);
295 				err = 0;
296 				break;
297 			}
298 		}
299 		mutex_unlock(&xb_write_mutex);
300 		if (err)
301 			goto out;
302 
303 		if (req->state == xb_req_state_wait_reply) {
304 			req->msg.req_id = req->caller_req_id;
305 			req->msg.type = state.msg.type;
306 			req->msg.len = state.msg.len;
307 			req->body = state.body;
308 			/* write body, then update state */
309 			virt_wmb();
310 			req->state = xb_req_state_got_reply;
311 			req->cb(req);
312 		} else
313 			kfree(req);
314 	}
315 
316 	mutex_unlock(&xs_response_mutex);
317 
318 	state.in_msg = false;
319 	state.alloc = NULL;
320 	return err;
321 
322  out:
323 	mutex_unlock(&xs_response_mutex);
324 	state.in_msg = false;
325 	kfree(state.alloc);
326 	state.alloc = NULL;
327 	return err;
328 }
329 
330 static int process_writes(void)
331 {
332 	static struct {
333 		struct xb_req_data *req;
334 		int idx;
335 		unsigned int written;
336 	} state;
337 	void *base;
338 	unsigned int len;
339 	int err = 0;
340 
341 	if (!xb_data_to_write())
342 		return 0;
343 
344 	mutex_lock(&xb_write_mutex);
345 
346 	if (!state.req) {
347 		state.req = list_first_entry(&xb_write_list,
348 					     struct xb_req_data, list);
349 		state.idx = -1;
350 		state.written = 0;
351 	}
352 
353 	if (state.req->state == xb_req_state_aborted)
354 		goto out_err;
355 
356 	while (state.idx < state.req->num_vecs) {
357 		if (state.idx < 0) {
358 			base = &state.req->msg;
359 			len = sizeof(state.req->msg);
360 		} else {
361 			base = state.req->vec[state.idx].iov_base;
362 			len = state.req->vec[state.idx].iov_len;
363 		}
364 		err = xb_write(base + state.written, len - state.written);
365 		if (err < 0)
366 			goto out_err;
367 		state.written += err;
368 		if (state.written != len)
369 			goto out;
370 
371 		state.idx++;
372 		state.written = 0;
373 	}
374 
375 	list_del(&state.req->list);
376 	state.req->state = xb_req_state_wait_reply;
377 	list_add_tail(&state.req->list, &xs_reply_list);
378 	state.req = NULL;
379 
380  out:
381 	mutex_unlock(&xb_write_mutex);
382 
383 	return 0;
384 
385  out_err:
386 	state.req->msg.type = XS_ERROR;
387 	state.req->err = err;
388 	list_del(&state.req->list);
389 	if (state.req->state == xb_req_state_aborted)
390 		kfree(state.req);
391 	else {
392 		/* write err, then update state */
393 		virt_wmb();
394 		state.req->state = xb_req_state_got_reply;
395 		wake_up(&state.req->wq);
396 	}
397 
398 	mutex_unlock(&xb_write_mutex);
399 
400 	state.req = NULL;
401 
402 	return err;
403 }
404 
405 static int xb_thread_work(void)
406 {
407 	return xb_data_to_read() || xb_data_to_write();
408 }
409 
410 static int xenbus_thread(void *unused)
411 {
412 	int err;
413 
414 	while (!kthread_should_stop()) {
415 		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
416 			continue;
417 
418 		err = process_msg();
419 		if (err == -ENOMEM)
420 			schedule();
421 		else if (err)
422 			pr_warn_ratelimited("error %d while reading message\n",
423 					    err);
424 
425 		err = process_writes();
426 		if (err)
427 			pr_warn_ratelimited("error %d while writing message\n",
428 					    err);
429 	}
430 
431 	xenbus_task = NULL;
432 	return 0;
433 }
434 
435 /**
436  * xb_init_comms - Set up interrupt handler off store event channel.
437  */
438 int xb_init_comms(void)
439 {
440 	struct xenstore_domain_interface *intf = xen_store_interface;
441 
442 	if (intf->req_prod != intf->req_cons)
443 		pr_err("request ring is not quiescent (%08x:%08x)!\n",
444 		       intf->req_cons, intf->req_prod);
445 
446 	if (intf->rsp_prod != intf->rsp_cons) {
447 		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
448 			intf->rsp_cons, intf->rsp_prod);
449 		/* breaks kdump */
450 		if (!reset_devices)
451 			intf->rsp_cons = intf->rsp_prod;
452 	}
453 
454 	if (xenbus_irq) {
455 		/* Already have an irq; assume we're resuming */
456 		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
457 	} else {
458 		int err;
459 
460 		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
461 						0, "xenbus", &xb_waitq);
462 		if (err < 0) {
463 			pr_err("request irq failed %i\n", err);
464 			return err;
465 		}
466 
467 		xenbus_irq = err;
468 
469 		if (!xenbus_task) {
470 			xenbus_task = kthread_run(xenbus_thread, NULL,
471 						  "xenbus");
472 			if (IS_ERR(xenbus_task))
473 				return PTR_ERR(xenbus_task);
474 		}
475 	}
476 
477 	return 0;
478 }
479 
480 void xb_deinit_comms(void)
481 {
482 	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
483 	xenbus_irq = 0;
484 }
485