1 /****************************************************************************** 2 * xenbus_comms.c 3 * 4 * Low level code to talks to Xen Store: ringbuffer and event channel. 5 * 6 * Copyright (C) 2005 Rusty Russell, IBM Corporation 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License version 2 10 * as published by the Free Software Foundation; or, when distributed 11 * separately from the Linux kernel or incorporated into other 12 * software packages, subject to the following license: 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this source file (the "Software"), to deal in the Software without 16 * restriction, including without limitation the rights to use, copy, modify, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 * and to permit persons to whom the Software is furnished to do so, subject to 19 * the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included in 22 * all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 * IN THE SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/wait.h> 36 #include <linux/interrupt.h> 37 #include <linux/kthread.h> 38 #include <linux/sched.h> 39 #include <linux/err.h> 40 #include <xen/xenbus.h> 41 #include <asm/xen/hypervisor.h> 42 #include <xen/events.h> 43 #include <xen/page.h> 44 #include "xenbus.h" 45 46 /* A list of replies. Currently only one will ever be outstanding. */ 47 LIST_HEAD(xs_reply_list); 48 49 /* A list of write requests. */ 50 LIST_HEAD(xb_write_list); 51 DECLARE_WAIT_QUEUE_HEAD(xb_waitq); 52 DEFINE_MUTEX(xb_write_mutex); 53 54 /* Protect xenbus reader thread against save/restore. */ 55 DEFINE_MUTEX(xs_response_mutex); 56 57 static int xenbus_irq; 58 static struct task_struct *xenbus_task; 59 60 static DECLARE_WORK(probe_work, xenbus_probe); 61 62 63 static irqreturn_t wake_waiting(int irq, void *unused) 64 { 65 if (unlikely(xenstored_ready == 0)) { 66 xenstored_ready = 1; 67 schedule_work(&probe_work); 68 } 69 70 wake_up(&xb_waitq); 71 return IRQ_HANDLED; 72 } 73 74 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) 75 { 76 return ((prod - cons) <= XENSTORE_RING_SIZE); 77 } 78 79 static void *get_output_chunk(XENSTORE_RING_IDX cons, 80 XENSTORE_RING_IDX prod, 81 char *buf, uint32_t *len) 82 { 83 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); 84 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) 85 *len = XENSTORE_RING_SIZE - (prod - cons); 86 return buf + MASK_XENSTORE_IDX(prod); 87 } 88 89 static const void *get_input_chunk(XENSTORE_RING_IDX cons, 90 XENSTORE_RING_IDX prod, 91 const char *buf, uint32_t *len) 92 { 93 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); 94 if ((prod - cons) < *len) 95 *len = prod - cons; 96 return buf + MASK_XENSTORE_IDX(cons); 97 } 98 99 static int xb_data_to_write(void) 100 { 101 struct xenstore_domain_interface *intf = xen_store_interface; 102 103 return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && 104 !list_empty(&xb_write_list); 105 } 106 107 /** 108 * xb_write - low level write 109 * @data: buffer to send 110 * @len: length of buffer 111 * 112 * Returns number of bytes written or -err. 113 */ 114 static int xb_write(const void *data, unsigned int len) 115 { 116 struct xenstore_domain_interface *intf = xen_store_interface; 117 XENSTORE_RING_IDX cons, prod; 118 unsigned int bytes = 0; 119 120 while (len != 0) { 121 void *dst; 122 unsigned int avail; 123 124 /* Read indexes, then verify. */ 125 cons = intf->req_cons; 126 prod = intf->req_prod; 127 if (!check_indexes(cons, prod)) { 128 intf->req_cons = intf->req_prod = 0; 129 return -EIO; 130 } 131 if (!xb_data_to_write()) 132 return bytes; 133 134 /* Must write data /after/ reading the consumer index. */ 135 virt_mb(); 136 137 dst = get_output_chunk(cons, prod, intf->req, &avail); 138 if (avail == 0) 139 continue; 140 if (avail > len) 141 avail = len; 142 143 memcpy(dst, data, avail); 144 data += avail; 145 len -= avail; 146 bytes += avail; 147 148 /* Other side must not see new producer until data is there. */ 149 virt_wmb(); 150 intf->req_prod += avail; 151 152 /* Implies mb(): other side will see the updated producer. */ 153 if (prod <= intf->req_cons) 154 notify_remote_via_evtchn(xen_store_evtchn); 155 } 156 157 return bytes; 158 } 159 160 static int xb_data_to_read(void) 161 { 162 struct xenstore_domain_interface *intf = xen_store_interface; 163 return (intf->rsp_cons != intf->rsp_prod); 164 } 165 166 static int xb_read(void *data, unsigned int len) 167 { 168 struct xenstore_domain_interface *intf = xen_store_interface; 169 XENSTORE_RING_IDX cons, prod; 170 unsigned int bytes = 0; 171 172 while (len != 0) { 173 unsigned int avail; 174 const char *src; 175 176 /* Read indexes, then verify. */ 177 cons = intf->rsp_cons; 178 prod = intf->rsp_prod; 179 if (cons == prod) 180 return bytes; 181 182 if (!check_indexes(cons, prod)) { 183 intf->rsp_cons = intf->rsp_prod = 0; 184 return -EIO; 185 } 186 187 src = get_input_chunk(cons, prod, intf->rsp, &avail); 188 if (avail == 0) 189 continue; 190 if (avail > len) 191 avail = len; 192 193 /* Must read data /after/ reading the producer index. */ 194 virt_rmb(); 195 196 memcpy(data, src, avail); 197 data += avail; 198 len -= avail; 199 bytes += avail; 200 201 /* Other side must not see free space until we've copied out */ 202 virt_mb(); 203 intf->rsp_cons += avail; 204 205 /* Implies mb(): other side will see the updated consumer. */ 206 if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) 207 notify_remote_via_evtchn(xen_store_evtchn); 208 } 209 210 return bytes; 211 } 212 213 static int process_msg(void) 214 { 215 static struct { 216 struct xsd_sockmsg msg; 217 char *body; 218 union { 219 void *alloc; 220 struct xs_watch_event *watch; 221 }; 222 bool in_msg; 223 bool in_hdr; 224 unsigned int read; 225 } state; 226 struct xb_req_data *req; 227 int err; 228 unsigned int len; 229 230 if (!state.in_msg) { 231 state.in_msg = true; 232 state.in_hdr = true; 233 state.read = 0; 234 235 /* 236 * We must disallow save/restore while reading a message. 237 * A partial read across s/r leaves us out of sync with 238 * xenstored. 239 * xs_response_mutex is locked as long as we are processing one 240 * message. state.in_msg will be true as long as we are holding 241 * the lock here. 242 */ 243 mutex_lock(&xs_response_mutex); 244 245 if (!xb_data_to_read()) { 246 /* We raced with save/restore: pending data 'gone'. */ 247 mutex_unlock(&xs_response_mutex); 248 state.in_msg = false; 249 return 0; 250 } 251 } 252 253 if (state.in_hdr) { 254 if (state.read != sizeof(state.msg)) { 255 err = xb_read((void *)&state.msg + state.read, 256 sizeof(state.msg) - state.read); 257 if (err < 0) 258 goto out; 259 state.read += err; 260 if (state.read != sizeof(state.msg)) 261 return 0; 262 if (state.msg.len > XENSTORE_PAYLOAD_MAX) { 263 err = -EINVAL; 264 goto out; 265 } 266 } 267 268 len = state.msg.len + 1; 269 if (state.msg.type == XS_WATCH_EVENT) 270 len += sizeof(*state.watch); 271 272 state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); 273 if (!state.alloc) 274 return -ENOMEM; 275 276 if (state.msg.type == XS_WATCH_EVENT) 277 state.body = state.watch->body; 278 else 279 state.body = state.alloc; 280 state.in_hdr = false; 281 state.read = 0; 282 } 283 284 err = xb_read(state.body + state.read, state.msg.len - state.read); 285 if (err < 0) 286 goto out; 287 288 state.read += err; 289 if (state.read != state.msg.len) 290 return 0; 291 292 state.body[state.msg.len] = '\0'; 293 294 if (state.msg.type == XS_WATCH_EVENT) { 295 state.watch->len = state.msg.len; 296 err = xs_watch_msg(state.watch); 297 } else { 298 err = -ENOENT; 299 mutex_lock(&xb_write_mutex); 300 list_for_each_entry(req, &xs_reply_list, list) { 301 if (req->msg.req_id == state.msg.req_id) { 302 if (req->state == xb_req_state_wait_reply) { 303 req->msg.type = state.msg.type; 304 req->msg.len = state.msg.len; 305 req->body = state.body; 306 req->state = xb_req_state_got_reply; 307 list_del(&req->list); 308 req->cb(req); 309 } else { 310 list_del(&req->list); 311 kfree(req); 312 } 313 err = 0; 314 break; 315 } 316 } 317 mutex_unlock(&xb_write_mutex); 318 if (err) 319 goto out; 320 } 321 322 mutex_unlock(&xs_response_mutex); 323 324 state.in_msg = false; 325 state.alloc = NULL; 326 return err; 327 328 out: 329 mutex_unlock(&xs_response_mutex); 330 state.in_msg = false; 331 kfree(state.alloc); 332 state.alloc = NULL; 333 return err; 334 } 335 336 static int process_writes(void) 337 { 338 static struct { 339 struct xb_req_data *req; 340 int idx; 341 unsigned int written; 342 } state; 343 void *base; 344 unsigned int len; 345 int err = 0; 346 347 if (!xb_data_to_write()) 348 return 0; 349 350 mutex_lock(&xb_write_mutex); 351 352 if (!state.req) { 353 state.req = list_first_entry(&xb_write_list, 354 struct xb_req_data, list); 355 state.idx = -1; 356 state.written = 0; 357 } 358 359 if (state.req->state == xb_req_state_aborted) 360 goto out_err; 361 362 while (state.idx < state.req->num_vecs) { 363 if (state.idx < 0) { 364 base = &state.req->msg; 365 len = sizeof(state.req->msg); 366 } else { 367 base = state.req->vec[state.idx].iov_base; 368 len = state.req->vec[state.idx].iov_len; 369 } 370 err = xb_write(base + state.written, len - state.written); 371 if (err < 0) 372 goto out_err; 373 state.written += err; 374 if (state.written != len) 375 goto out; 376 377 state.idx++; 378 state.written = 0; 379 } 380 381 list_del(&state.req->list); 382 state.req->state = xb_req_state_wait_reply; 383 list_add_tail(&state.req->list, &xs_reply_list); 384 state.req = NULL; 385 386 out: 387 mutex_unlock(&xb_write_mutex); 388 389 return 0; 390 391 out_err: 392 state.req->msg.type = XS_ERROR; 393 state.req->err = err; 394 list_del(&state.req->list); 395 if (state.req->state == xb_req_state_aborted) 396 kfree(state.req); 397 else { 398 state.req->state = xb_req_state_got_reply; 399 wake_up(&state.req->wq); 400 } 401 402 mutex_unlock(&xb_write_mutex); 403 404 state.req = NULL; 405 406 return err; 407 } 408 409 static int xb_thread_work(void) 410 { 411 return xb_data_to_read() || xb_data_to_write(); 412 } 413 414 static int xenbus_thread(void *unused) 415 { 416 int err; 417 418 while (!kthread_should_stop()) { 419 if (wait_event_interruptible(xb_waitq, xb_thread_work())) 420 continue; 421 422 err = process_msg(); 423 if (err == -ENOMEM) 424 schedule(); 425 else if (err) 426 pr_warn_ratelimited("error %d while reading message\n", 427 err); 428 429 err = process_writes(); 430 if (err) 431 pr_warn_ratelimited("error %d while writing message\n", 432 err); 433 } 434 435 xenbus_task = NULL; 436 return 0; 437 } 438 439 /** 440 * xb_init_comms - Set up interrupt handler off store event channel. 441 */ 442 int xb_init_comms(void) 443 { 444 struct xenstore_domain_interface *intf = xen_store_interface; 445 446 if (intf->req_prod != intf->req_cons) 447 pr_err("request ring is not quiescent (%08x:%08x)!\n", 448 intf->req_cons, intf->req_prod); 449 450 if (intf->rsp_prod != intf->rsp_cons) { 451 pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", 452 intf->rsp_cons, intf->rsp_prod); 453 /* breaks kdump */ 454 if (!reset_devices) 455 intf->rsp_cons = intf->rsp_prod; 456 } 457 458 if (xenbus_irq) { 459 /* Already have an irq; assume we're resuming */ 460 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); 461 } else { 462 int err; 463 464 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 465 0, "xenbus", &xb_waitq); 466 if (err < 0) { 467 pr_err("request irq failed %i\n", err); 468 return err; 469 } 470 471 xenbus_irq = err; 472 473 if (!xenbus_task) { 474 xenbus_task = kthread_run(xenbus_thread, NULL, 475 "xenbus"); 476 if (IS_ERR(xenbus_task)) 477 return PTR_ERR(xenbus_task); 478 } 479 } 480 481 return 0; 482 } 483 484 void xb_deinit_comms(void) 485 { 486 unbind_from_irqhandler(xenbus_irq, &xb_waitq); 487 xenbus_irq = 0; 488 } 489