1 /******************************************************************************
2  * xenbus_comms.c
3  *
4  * Low level code to talks to Xen Store: ringbuffer and event channel.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/wait.h>
36 #include <linux/interrupt.h>
37 #include <linux/kthread.h>
38 #include <linux/sched.h>
39 #include <linux/err.h>
40 #include <xen/xenbus.h>
41 #include <asm/xen/hypervisor.h>
42 #include <xen/events.h>
43 #include <xen/page.h>
44 #include "xenbus.h"
45 
46 /* A list of replies. Currently only one will ever be outstanding. */
47 LIST_HEAD(xs_reply_list);
48 
49 /* A list of write requests. */
50 LIST_HEAD(xb_write_list);
51 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52 DEFINE_MUTEX(xb_write_mutex);
53 
54 /* Protect xenbus reader thread against save/restore. */
55 DEFINE_MUTEX(xs_response_mutex);
56 
57 static int xenbus_irq;
58 static struct task_struct *xenbus_task;
59 
60 static DECLARE_WORK(probe_work, xenbus_probe);
61 
62 
63 static irqreturn_t wake_waiting(int irq, void *unused)
64 {
65 	if (unlikely(xenstored_ready == 0)) {
66 		xenstored_ready = 1;
67 		schedule_work(&probe_work);
68 	}
69 
70 	wake_up(&xb_waitq);
71 	return IRQ_HANDLED;
72 }
73 
74 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
75 {
76 	return ((prod - cons) <= XENSTORE_RING_SIZE);
77 }
78 
79 static void *get_output_chunk(XENSTORE_RING_IDX cons,
80 			      XENSTORE_RING_IDX prod,
81 			      char *buf, uint32_t *len)
82 {
83 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
84 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
85 		*len = XENSTORE_RING_SIZE - (prod - cons);
86 	return buf + MASK_XENSTORE_IDX(prod);
87 }
88 
89 static const void *get_input_chunk(XENSTORE_RING_IDX cons,
90 				   XENSTORE_RING_IDX prod,
91 				   const char *buf, uint32_t *len)
92 {
93 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
94 	if ((prod - cons) < *len)
95 		*len = prod - cons;
96 	return buf + MASK_XENSTORE_IDX(cons);
97 }
98 
99 static int xb_data_to_write(void)
100 {
101 	struct xenstore_domain_interface *intf = xen_store_interface;
102 
103 	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
104 		!list_empty(&xb_write_list);
105 }
106 
107 /**
108  * xb_write - low level write
109  * @data: buffer to send
110  * @len: length of buffer
111  *
112  * Returns number of bytes written or -err.
113  */
114 static int xb_write(const void *data, unsigned int len)
115 {
116 	struct xenstore_domain_interface *intf = xen_store_interface;
117 	XENSTORE_RING_IDX cons, prod;
118 	unsigned int bytes = 0;
119 
120 	while (len != 0) {
121 		void *dst;
122 		unsigned int avail;
123 
124 		/* Read indexes, then verify. */
125 		cons = intf->req_cons;
126 		prod = intf->req_prod;
127 		if (!check_indexes(cons, prod)) {
128 			intf->req_cons = intf->req_prod = 0;
129 			return -EIO;
130 		}
131 		if (!xb_data_to_write())
132 			return bytes;
133 
134 		/* Must write data /after/ reading the consumer index. */
135 		virt_mb();
136 
137 		dst = get_output_chunk(cons, prod, intf->req, &avail);
138 		if (avail == 0)
139 			continue;
140 		if (avail > len)
141 			avail = len;
142 
143 		memcpy(dst, data, avail);
144 		data += avail;
145 		len -= avail;
146 		bytes += avail;
147 
148 		/* Other side must not see new producer until data is there. */
149 		virt_wmb();
150 		intf->req_prod += avail;
151 
152 		/* Implies mb(): other side will see the updated producer. */
153 		if (prod <= intf->req_cons)
154 			notify_remote_via_evtchn(xen_store_evtchn);
155 	}
156 
157 	return bytes;
158 }
159 
160 static int xb_data_to_read(void)
161 {
162 	struct xenstore_domain_interface *intf = xen_store_interface;
163 	return (intf->rsp_cons != intf->rsp_prod);
164 }
165 
166 static int xb_read(void *data, unsigned int len)
167 {
168 	struct xenstore_domain_interface *intf = xen_store_interface;
169 	XENSTORE_RING_IDX cons, prod;
170 	unsigned int bytes = 0;
171 
172 	while (len != 0) {
173 		unsigned int avail;
174 		const char *src;
175 
176 		/* Read indexes, then verify. */
177 		cons = intf->rsp_cons;
178 		prod = intf->rsp_prod;
179 		if (cons == prod)
180 			return bytes;
181 
182 		if (!check_indexes(cons, prod)) {
183 			intf->rsp_cons = intf->rsp_prod = 0;
184 			return -EIO;
185 		}
186 
187 		src = get_input_chunk(cons, prod, intf->rsp, &avail);
188 		if (avail == 0)
189 			continue;
190 		if (avail > len)
191 			avail = len;
192 
193 		/* Must read data /after/ reading the producer index. */
194 		virt_rmb();
195 
196 		memcpy(data, src, avail);
197 		data += avail;
198 		len -= avail;
199 		bytes += avail;
200 
201 		/* Other side must not see free space until we've copied out */
202 		virt_mb();
203 		intf->rsp_cons += avail;
204 
205 		/* Implies mb(): other side will see the updated consumer. */
206 		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
207 			notify_remote_via_evtchn(xen_store_evtchn);
208 	}
209 
210 	return bytes;
211 }
212 
213 static int process_msg(void)
214 {
215 	static struct {
216 		struct xsd_sockmsg msg;
217 		char *body;
218 		union {
219 			void *alloc;
220 			struct xs_watch_event *watch;
221 		};
222 		bool in_msg;
223 		bool in_hdr;
224 		unsigned int read;
225 	} state;
226 	struct xb_req_data *req;
227 	int err;
228 	unsigned int len;
229 
230 	if (!state.in_msg) {
231 		state.in_msg = true;
232 		state.in_hdr = true;
233 		state.read = 0;
234 
235 		/*
236 		 * We must disallow save/restore while reading a message.
237 		 * A partial read across s/r leaves us out of sync with
238 		 * xenstored.
239 		 * xs_response_mutex is locked as long as we are processing one
240 		 * message. state.in_msg will be true as long as we are holding
241 		 * the lock here.
242 		 */
243 		mutex_lock(&xs_response_mutex);
244 
245 		if (!xb_data_to_read()) {
246 			/* We raced with save/restore: pending data 'gone'. */
247 			mutex_unlock(&xs_response_mutex);
248 			state.in_msg = false;
249 			return 0;
250 		}
251 	}
252 
253 	if (state.in_hdr) {
254 		if (state.read != sizeof(state.msg)) {
255 			err = xb_read((void *)&state.msg + state.read,
256 				      sizeof(state.msg) - state.read);
257 			if (err < 0)
258 				goto out;
259 			state.read += err;
260 			if (state.read != sizeof(state.msg))
261 				return 0;
262 			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
263 				err = -EINVAL;
264 				goto out;
265 			}
266 		}
267 
268 		len = state.msg.len + 1;
269 		if (state.msg.type == XS_WATCH_EVENT)
270 			len += sizeof(*state.watch);
271 
272 		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
273 		if (!state.alloc)
274 			return -ENOMEM;
275 
276 		if (state.msg.type == XS_WATCH_EVENT)
277 			state.body = state.watch->body;
278 		else
279 			state.body = state.alloc;
280 		state.in_hdr = false;
281 		state.read = 0;
282 	}
283 
284 	err = xb_read(state.body + state.read, state.msg.len - state.read);
285 	if (err < 0)
286 		goto out;
287 
288 	state.read += err;
289 	if (state.read != state.msg.len)
290 		return 0;
291 
292 	state.body[state.msg.len] = '\0';
293 
294 	if (state.msg.type == XS_WATCH_EVENT) {
295 		state.watch->len = state.msg.len;
296 		err = xs_watch_msg(state.watch);
297 	} else {
298 		err = -ENOENT;
299 		mutex_lock(&xb_write_mutex);
300 		list_for_each_entry(req, &xs_reply_list, list) {
301 			if (req->msg.req_id == state.msg.req_id) {
302 				list_del(&req->list);
303 				err = 0;
304 				break;
305 			}
306 		}
307 		mutex_unlock(&xb_write_mutex);
308 		if (err)
309 			goto out;
310 
311 		if (req->state == xb_req_state_wait_reply) {
312 			req->msg.req_id = req->caller_req_id;
313 			req->msg.type = state.msg.type;
314 			req->msg.len = state.msg.len;
315 			req->body = state.body;
316 			req->state = xb_req_state_got_reply;
317 			req->cb(req);
318 		} else
319 			kfree(req);
320 	}
321 
322 	mutex_unlock(&xs_response_mutex);
323 
324 	state.in_msg = false;
325 	state.alloc = NULL;
326 	return err;
327 
328  out:
329 	mutex_unlock(&xs_response_mutex);
330 	state.in_msg = false;
331 	kfree(state.alloc);
332 	state.alloc = NULL;
333 	return err;
334 }
335 
336 static int process_writes(void)
337 {
338 	static struct {
339 		struct xb_req_data *req;
340 		int idx;
341 		unsigned int written;
342 	} state;
343 	void *base;
344 	unsigned int len;
345 	int err = 0;
346 
347 	if (!xb_data_to_write())
348 		return 0;
349 
350 	mutex_lock(&xb_write_mutex);
351 
352 	if (!state.req) {
353 		state.req = list_first_entry(&xb_write_list,
354 					     struct xb_req_data, list);
355 		state.idx = -1;
356 		state.written = 0;
357 	}
358 
359 	if (state.req->state == xb_req_state_aborted)
360 		goto out_err;
361 
362 	while (state.idx < state.req->num_vecs) {
363 		if (state.idx < 0) {
364 			base = &state.req->msg;
365 			len = sizeof(state.req->msg);
366 		} else {
367 			base = state.req->vec[state.idx].iov_base;
368 			len = state.req->vec[state.idx].iov_len;
369 		}
370 		err = xb_write(base + state.written, len - state.written);
371 		if (err < 0)
372 			goto out_err;
373 		state.written += err;
374 		if (state.written != len)
375 			goto out;
376 
377 		state.idx++;
378 		state.written = 0;
379 	}
380 
381 	list_del(&state.req->list);
382 	state.req->state = xb_req_state_wait_reply;
383 	list_add_tail(&state.req->list, &xs_reply_list);
384 	state.req = NULL;
385 
386  out:
387 	mutex_unlock(&xb_write_mutex);
388 
389 	return 0;
390 
391  out_err:
392 	state.req->msg.type = XS_ERROR;
393 	state.req->err = err;
394 	list_del(&state.req->list);
395 	if (state.req->state == xb_req_state_aborted)
396 		kfree(state.req);
397 	else {
398 		state.req->state = xb_req_state_got_reply;
399 		wake_up(&state.req->wq);
400 	}
401 
402 	mutex_unlock(&xb_write_mutex);
403 
404 	state.req = NULL;
405 
406 	return err;
407 }
408 
409 static int xb_thread_work(void)
410 {
411 	return xb_data_to_read() || xb_data_to_write();
412 }
413 
414 static int xenbus_thread(void *unused)
415 {
416 	int err;
417 
418 	while (!kthread_should_stop()) {
419 		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
420 			continue;
421 
422 		err = process_msg();
423 		if (err == -ENOMEM)
424 			schedule();
425 		else if (err)
426 			pr_warn_ratelimited("error %d while reading message\n",
427 					    err);
428 
429 		err = process_writes();
430 		if (err)
431 			pr_warn_ratelimited("error %d while writing message\n",
432 					    err);
433 	}
434 
435 	xenbus_task = NULL;
436 	return 0;
437 }
438 
439 /**
440  * xb_init_comms - Set up interrupt handler off store event channel.
441  */
442 int xb_init_comms(void)
443 {
444 	struct xenstore_domain_interface *intf = xen_store_interface;
445 
446 	if (intf->req_prod != intf->req_cons)
447 		pr_err("request ring is not quiescent (%08x:%08x)!\n",
448 		       intf->req_cons, intf->req_prod);
449 
450 	if (intf->rsp_prod != intf->rsp_cons) {
451 		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
452 			intf->rsp_cons, intf->rsp_prod);
453 		/* breaks kdump */
454 		if (!reset_devices)
455 			intf->rsp_cons = intf->rsp_prod;
456 	}
457 
458 	if (xenbus_irq) {
459 		/* Already have an irq; assume we're resuming */
460 		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
461 	} else {
462 		int err;
463 
464 		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
465 						0, "xenbus", &xb_waitq);
466 		if (err < 0) {
467 			pr_err("request irq failed %i\n", err);
468 			return err;
469 		}
470 
471 		xenbus_irq = err;
472 
473 		if (!xenbus_task) {
474 			xenbus_task = kthread_run(xenbus_thread, NULL,
475 						  "xenbus");
476 			if (IS_ERR(xenbus_task))
477 				return PTR_ERR(xenbus_task);
478 		}
479 	}
480 
481 	return 0;
482 }
483 
484 void xb_deinit_comms(void)
485 {
486 	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
487 	xenbus_irq = 0;
488 }
489