1 /****************************************************************************** 2 * xenbus_comms.c 3 * 4 * Low level code to talks to Xen Store: ringbuffer and event channel. 5 * 6 * Copyright (C) 2005 Rusty Russell, IBM Corporation 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License version 2 10 * as published by the Free Software Foundation; or, when distributed 11 * separately from the Linux kernel or incorporated into other 12 * software packages, subject to the following license: 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this source file (the "Software"), to deal in the Software without 16 * restriction, including without limitation the rights to use, copy, modify, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 * and to permit persons to whom the Software is furnished to do so, subject to 19 * the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included in 22 * all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 * IN THE SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/wait.h> 36 #include <linux/interrupt.h> 37 #include <linux/kthread.h> 38 #include <linux/sched.h> 39 #include <linux/err.h> 40 #include <xen/xenbus.h> 41 #include <asm/xen/hypervisor.h> 42 #include <xen/events.h> 43 #include <xen/page.h> 44 #include "xenbus.h" 45 46 /* A list of replies. Currently only one will ever be outstanding. */ 47 LIST_HEAD(xs_reply_list); 48 49 /* A list of write requests. */ 50 LIST_HEAD(xb_write_list); 51 DECLARE_WAIT_QUEUE_HEAD(xb_waitq); 52 DEFINE_MUTEX(xb_write_mutex); 53 54 /* Protect xenbus reader thread against save/restore. */ 55 DEFINE_MUTEX(xs_response_mutex); 56 57 static int xenbus_irq; 58 static struct task_struct *xenbus_task; 59 60 static DECLARE_WORK(probe_work, xenbus_probe); 61 62 63 static irqreturn_t wake_waiting(int irq, void *unused) 64 { 65 if (unlikely(xenstored_ready == 0)) { 66 xenstored_ready = 1; 67 schedule_work(&probe_work); 68 } 69 70 wake_up(&xb_waitq); 71 return IRQ_HANDLED; 72 } 73 74 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) 75 { 76 return ((prod - cons) <= XENSTORE_RING_SIZE); 77 } 78 79 static void *get_output_chunk(XENSTORE_RING_IDX cons, 80 XENSTORE_RING_IDX prod, 81 char *buf, uint32_t *len) 82 { 83 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); 84 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) 85 *len = XENSTORE_RING_SIZE - (prod - cons); 86 return buf + MASK_XENSTORE_IDX(prod); 87 } 88 89 static const void *get_input_chunk(XENSTORE_RING_IDX cons, 90 XENSTORE_RING_IDX prod, 91 const char *buf, uint32_t *len) 92 { 93 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); 94 if ((prod - cons) < *len) 95 *len = prod - cons; 96 return buf + MASK_XENSTORE_IDX(cons); 97 } 98 99 static int xb_data_to_write(void) 100 { 101 struct xenstore_domain_interface *intf = xen_store_interface; 102 103 return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && 104 !list_empty(&xb_write_list); 105 } 106 107 /** 108 * xb_write - low level write 109 * @data: buffer to send 110 * @len: length of buffer 111 * 112 * Returns number of bytes written or -err. 113 */ 114 static int xb_write(const void *data, unsigned int len) 115 { 116 struct xenstore_domain_interface *intf = xen_store_interface; 117 XENSTORE_RING_IDX cons, prod; 118 unsigned int bytes = 0; 119 120 while (len != 0) { 121 void *dst; 122 unsigned int avail; 123 124 /* Read indexes, then verify. */ 125 cons = intf->req_cons; 126 prod = intf->req_prod; 127 if (!check_indexes(cons, prod)) { 128 intf->req_cons = intf->req_prod = 0; 129 return -EIO; 130 } 131 if (!xb_data_to_write()) 132 return bytes; 133 134 /* Must write data /after/ reading the consumer index. */ 135 virt_mb(); 136 137 dst = get_output_chunk(cons, prod, intf->req, &avail); 138 if (avail == 0) 139 continue; 140 if (avail > len) 141 avail = len; 142 143 memcpy(dst, data, avail); 144 data += avail; 145 len -= avail; 146 bytes += avail; 147 148 /* Other side must not see new producer until data is there. */ 149 virt_wmb(); 150 intf->req_prod += avail; 151 152 /* Implies mb(): other side will see the updated producer. */ 153 if (prod <= intf->req_cons) 154 notify_remote_via_evtchn(xen_store_evtchn); 155 } 156 157 return bytes; 158 } 159 160 static int xb_data_to_read(void) 161 { 162 struct xenstore_domain_interface *intf = xen_store_interface; 163 return (intf->rsp_cons != intf->rsp_prod); 164 } 165 166 static int xb_read(void *data, unsigned int len) 167 { 168 struct xenstore_domain_interface *intf = xen_store_interface; 169 XENSTORE_RING_IDX cons, prod; 170 unsigned int bytes = 0; 171 172 while (len != 0) { 173 unsigned int avail; 174 const char *src; 175 176 /* Read indexes, then verify. */ 177 cons = intf->rsp_cons; 178 prod = intf->rsp_prod; 179 if (cons == prod) 180 return bytes; 181 182 if (!check_indexes(cons, prod)) { 183 intf->rsp_cons = intf->rsp_prod = 0; 184 return -EIO; 185 } 186 187 src = get_input_chunk(cons, prod, intf->rsp, &avail); 188 if (avail == 0) 189 continue; 190 if (avail > len) 191 avail = len; 192 193 /* Must read data /after/ reading the producer index. */ 194 virt_rmb(); 195 196 memcpy(data, src, avail); 197 data += avail; 198 len -= avail; 199 bytes += avail; 200 201 /* Other side must not see free space until we've copied out */ 202 virt_mb(); 203 intf->rsp_cons += avail; 204 205 /* Implies mb(): other side will see the updated consumer. */ 206 if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) 207 notify_remote_via_evtchn(xen_store_evtchn); 208 } 209 210 return bytes; 211 } 212 213 static int process_msg(void) 214 { 215 static struct { 216 struct xsd_sockmsg msg; 217 char *body; 218 union { 219 void *alloc; 220 struct xs_watch_event *watch; 221 }; 222 bool in_msg; 223 bool in_hdr; 224 unsigned int read; 225 } state; 226 struct xb_req_data *req; 227 int err; 228 unsigned int len; 229 230 if (!state.in_msg) { 231 state.in_msg = true; 232 state.in_hdr = true; 233 state.read = 0; 234 235 /* 236 * We must disallow save/restore while reading a message. 237 * A partial read across s/r leaves us out of sync with 238 * xenstored. 239 * xs_response_mutex is locked as long as we are processing one 240 * message. state.in_msg will be true as long as we are holding 241 * the lock here. 242 */ 243 mutex_lock(&xs_response_mutex); 244 245 if (!xb_data_to_read()) { 246 /* We raced with save/restore: pending data 'gone'. */ 247 mutex_unlock(&xs_response_mutex); 248 state.in_msg = false; 249 return 0; 250 } 251 } 252 253 if (state.in_hdr) { 254 if (state.read != sizeof(state.msg)) { 255 err = xb_read((void *)&state.msg + state.read, 256 sizeof(state.msg) - state.read); 257 if (err < 0) 258 goto out; 259 state.read += err; 260 if (state.read != sizeof(state.msg)) 261 return 0; 262 if (state.msg.len > XENSTORE_PAYLOAD_MAX) { 263 err = -EINVAL; 264 goto out; 265 } 266 } 267 268 len = state.msg.len + 1; 269 if (state.msg.type == XS_WATCH_EVENT) 270 len += sizeof(*state.watch); 271 272 state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); 273 if (!state.alloc) 274 return -ENOMEM; 275 276 if (state.msg.type == XS_WATCH_EVENT) 277 state.body = state.watch->body; 278 else 279 state.body = state.alloc; 280 state.in_hdr = false; 281 state.read = 0; 282 } 283 284 err = xb_read(state.body + state.read, state.msg.len - state.read); 285 if (err < 0) 286 goto out; 287 288 state.read += err; 289 if (state.read != state.msg.len) 290 return 0; 291 292 state.body[state.msg.len] = '\0'; 293 294 if (state.msg.type == XS_WATCH_EVENT) { 295 state.watch->len = state.msg.len; 296 err = xs_watch_msg(state.watch); 297 } else { 298 err = -ENOENT; 299 mutex_lock(&xb_write_mutex); 300 list_for_each_entry(req, &xs_reply_list, list) { 301 if (req->msg.req_id == state.msg.req_id) { 302 list_del(&req->list); 303 err = 0; 304 break; 305 } 306 } 307 mutex_unlock(&xb_write_mutex); 308 if (err) 309 goto out; 310 311 if (req->state == xb_req_state_wait_reply) { 312 req->msg.req_id = req->caller_req_id; 313 req->msg.type = state.msg.type; 314 req->msg.len = state.msg.len; 315 req->body = state.body; 316 /* write body, then update state */ 317 virt_wmb(); 318 req->state = xb_req_state_got_reply; 319 req->cb(req); 320 } else 321 kfree(req); 322 } 323 324 mutex_unlock(&xs_response_mutex); 325 326 state.in_msg = false; 327 state.alloc = NULL; 328 return err; 329 330 out: 331 mutex_unlock(&xs_response_mutex); 332 state.in_msg = false; 333 kfree(state.alloc); 334 state.alloc = NULL; 335 return err; 336 } 337 338 static int process_writes(void) 339 { 340 static struct { 341 struct xb_req_data *req; 342 int idx; 343 unsigned int written; 344 } state; 345 void *base; 346 unsigned int len; 347 int err = 0; 348 349 if (!xb_data_to_write()) 350 return 0; 351 352 mutex_lock(&xb_write_mutex); 353 354 if (!state.req) { 355 state.req = list_first_entry(&xb_write_list, 356 struct xb_req_data, list); 357 state.idx = -1; 358 state.written = 0; 359 } 360 361 if (state.req->state == xb_req_state_aborted) 362 goto out_err; 363 364 while (state.idx < state.req->num_vecs) { 365 if (state.idx < 0) { 366 base = &state.req->msg; 367 len = sizeof(state.req->msg); 368 } else { 369 base = state.req->vec[state.idx].iov_base; 370 len = state.req->vec[state.idx].iov_len; 371 } 372 err = xb_write(base + state.written, len - state.written); 373 if (err < 0) 374 goto out_err; 375 state.written += err; 376 if (state.written != len) 377 goto out; 378 379 state.idx++; 380 state.written = 0; 381 } 382 383 list_del(&state.req->list); 384 state.req->state = xb_req_state_wait_reply; 385 list_add_tail(&state.req->list, &xs_reply_list); 386 state.req = NULL; 387 388 out: 389 mutex_unlock(&xb_write_mutex); 390 391 return 0; 392 393 out_err: 394 state.req->msg.type = XS_ERROR; 395 state.req->err = err; 396 list_del(&state.req->list); 397 if (state.req->state == xb_req_state_aborted) 398 kfree(state.req); 399 else { 400 /* write err, then update state */ 401 virt_wmb(); 402 state.req->state = xb_req_state_got_reply; 403 wake_up(&state.req->wq); 404 } 405 406 mutex_unlock(&xb_write_mutex); 407 408 state.req = NULL; 409 410 return err; 411 } 412 413 static int xb_thread_work(void) 414 { 415 return xb_data_to_read() || xb_data_to_write(); 416 } 417 418 static int xenbus_thread(void *unused) 419 { 420 int err; 421 422 while (!kthread_should_stop()) { 423 if (wait_event_interruptible(xb_waitq, xb_thread_work())) 424 continue; 425 426 err = process_msg(); 427 if (err == -ENOMEM) 428 schedule(); 429 else if (err) 430 pr_warn_ratelimited("error %d while reading message\n", 431 err); 432 433 err = process_writes(); 434 if (err) 435 pr_warn_ratelimited("error %d while writing message\n", 436 err); 437 } 438 439 xenbus_task = NULL; 440 return 0; 441 } 442 443 /** 444 * xb_init_comms - Set up interrupt handler off store event channel. 445 */ 446 int xb_init_comms(void) 447 { 448 struct xenstore_domain_interface *intf = xen_store_interface; 449 450 if (intf->req_prod != intf->req_cons) 451 pr_err("request ring is not quiescent (%08x:%08x)!\n", 452 intf->req_cons, intf->req_prod); 453 454 if (intf->rsp_prod != intf->rsp_cons) { 455 pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", 456 intf->rsp_cons, intf->rsp_prod); 457 /* breaks kdump */ 458 if (!reset_devices) 459 intf->rsp_cons = intf->rsp_prod; 460 } 461 462 if (xenbus_irq) { 463 /* Already have an irq; assume we're resuming */ 464 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); 465 } else { 466 int err; 467 468 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 469 0, "xenbus", &xb_waitq); 470 if (err < 0) { 471 pr_err("request irq failed %i\n", err); 472 return err; 473 } 474 475 xenbus_irq = err; 476 477 if (!xenbus_task) { 478 xenbus_task = kthread_run(xenbus_thread, NULL, 479 "xenbus"); 480 if (IS_ERR(xenbus_task)) 481 return PTR_ERR(xenbus_task); 482 } 483 } 484 485 return 0; 486 } 487 488 void xb_deinit_comms(void) 489 { 490 unbind_from_irqhandler(xenbus_irq, &xb_waitq); 491 xenbus_irq = 0; 492 } 493