1 /*
2 * vhost-user.c
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "clients.h"
13 #include "net/vhost_net.h"
14 #include "hw/virtio/vhost.h"
15 #include "hw/virtio/vhost-user.h"
16 #include "standard-headers/linux/virtio_net.h"
17 #include "chardev/char-fe.h"
18 #include "qapi/error.h"
19 #include "qapi/qapi-commands-net.h"
20 #include "qapi/qapi-events-net.h"
21 #include "qemu/config-file.h"
22 #include "qemu/error-report.h"
23 #include "qemu/option.h"
24 #include "trace.h"
25
26 static const int user_feature_bits[] = {
27 VIRTIO_F_NOTIFY_ON_EMPTY,
28 VIRTIO_F_NOTIFICATION_DATA,
29 VIRTIO_RING_F_INDIRECT_DESC,
30 VIRTIO_RING_F_EVENT_IDX,
31
32 VIRTIO_F_ANY_LAYOUT,
33 VIRTIO_F_VERSION_1,
34 VIRTIO_NET_F_CSUM,
35 VIRTIO_NET_F_GUEST_CSUM,
36 VIRTIO_NET_F_GSO,
37 VIRTIO_NET_F_GUEST_TSO4,
38 VIRTIO_NET_F_GUEST_TSO6,
39 VIRTIO_NET_F_GUEST_ECN,
40 VIRTIO_NET_F_GUEST_UFO,
41 VIRTIO_NET_F_HOST_TSO4,
42 VIRTIO_NET_F_HOST_TSO6,
43 VIRTIO_NET_F_HOST_ECN,
44 VIRTIO_NET_F_HOST_UFO,
45 VIRTIO_NET_F_MRG_RXBUF,
46 VIRTIO_NET_F_MTU,
47 VIRTIO_F_IOMMU_PLATFORM,
48 VIRTIO_F_RING_PACKED,
49 VIRTIO_F_RING_RESET,
50 VIRTIO_F_IN_ORDER,
51 VIRTIO_NET_F_RSS,
52 VIRTIO_NET_F_RSC_EXT,
53 VIRTIO_NET_F_HASH_REPORT,
54 VIRTIO_NET_F_GUEST_USO4,
55 VIRTIO_NET_F_GUEST_USO6,
56 VIRTIO_NET_F_HOST_USO,
57
58 /* This bit implies RARP isn't sent by QEMU out of band */
59 VIRTIO_NET_F_GUEST_ANNOUNCE,
60
61 VIRTIO_NET_F_MQ,
62
63 VHOST_INVALID_FEATURE_BIT
64 };
65
66 typedef struct NetVhostUserState {
67 NetClientState nc;
68 CharBackend chr; /* only queue index 0 */
69 VhostUserState *vhost_user;
70 VHostNetState *vhost_net;
71 guint watch;
72 uint64_t acked_features;
73 bool started;
74 } NetVhostUserState;
75
vhost_user_get_vhost_net(NetClientState * nc)76 static struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc)
77 {
78 NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
79 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
80 return s->vhost_net;
81 }
82
vhost_user_get_acked_features(NetClientState * nc)83 static uint64_t vhost_user_get_acked_features(NetClientState *nc)
84 {
85 NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
86 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
87 return s->acked_features;
88 }
89
vhost_user_save_acked_features(NetClientState * nc)90 static void vhost_user_save_acked_features(NetClientState *nc)
91 {
92 NetVhostUserState *s;
93
94 s = DO_UPCAST(NetVhostUserState, nc, nc);
95 if (s->vhost_net) {
96 uint64_t features = vhost_net_get_acked_features(s->vhost_net);
97 if (features) {
98 s->acked_features = features;
99 }
100 }
101 }
102
vhost_user_stop(int queues,NetClientState * ncs[])103 static void vhost_user_stop(int queues, NetClientState *ncs[])
104 {
105 int i;
106 NetVhostUserState *s;
107
108 for (i = 0; i < queues; i++) {
109 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
110
111 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
112
113 if (s->vhost_net) {
114 vhost_user_save_acked_features(ncs[i]);
115 vhost_net_cleanup(s->vhost_net);
116 }
117 }
118 }
119
vhost_user_start(int queues,NetClientState * ncs[],VhostUserState * be)120 static int vhost_user_start(int queues, NetClientState *ncs[],
121 VhostUserState *be)
122 {
123 VhostNetOptions options;
124 struct vhost_net *net = NULL;
125 NetVhostUserState *s;
126 int max_queues;
127 int i;
128
129 options.backend_type = VHOST_BACKEND_TYPE_USER;
130
131 for (i = 0; i < queues; i++) {
132 assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
133
134 s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
135
136 options.net_backend = ncs[i];
137 options.opaque = be;
138 options.busyloop_timeout = 0;
139 options.nvqs = 2;
140 options.feature_bits = user_feature_bits;
141 options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE;
142 options.get_acked_features = vhost_user_get_acked_features;
143 options.save_acked_features = vhost_user_save_acked_features;
144 options.is_vhost_user = true;
145
146 net = vhost_net_init(&options);
147 if (!net) {
148 error_report("failed to init vhost_net for queue %d", i);
149 goto err;
150 }
151
152 if (i == 0) {
153 max_queues = vhost_net_get_max_queues(net);
154 if (queues > max_queues) {
155 error_report("you are asking more queues than supported: %d",
156 max_queues);
157 goto err;
158 }
159 }
160
161 if (s->vhost_net) {
162 vhost_net_cleanup(s->vhost_net);
163 g_free(s->vhost_net);
164 }
165 s->vhost_net = net;
166 }
167
168 return 0;
169
170 err:
171 if (net) {
172 vhost_net_cleanup(net);
173 g_free(net);
174 }
175 vhost_user_stop(i, ncs);
176 return -1;
177 }
178
vhost_user_receive(NetClientState * nc,const uint8_t * buf,size_t size)179 static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf,
180 size_t size)
181 {
182 /* In case of RARP (message size is 60) notify backup to send a fake RARP.
183 This fake RARP will be sent by backend only for guest
184 without GUEST_ANNOUNCE capability.
185 */
186 if (size == 60) {
187 NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
188 int r;
189 static int display_rarp_failure = 1;
190 char mac_addr[6];
191
192 /* extract guest mac address from the RARP message */
193 memcpy(mac_addr, &buf[6], 6);
194
195 r = vhost_net_notify_migration_done(s->vhost_net, mac_addr);
196
197 if ((r != 0) && (display_rarp_failure)) {
198 fprintf(stderr,
199 "Vhost user backend fails to broadcast fake RARP\n");
200 fflush(stderr);
201 display_rarp_failure = 0;
202 }
203 }
204
205 return size;
206 }
207
net_vhost_user_cleanup(NetClientState * nc)208 static void net_vhost_user_cleanup(NetClientState *nc)
209 {
210 NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
211
212 if (s->vhost_net) {
213 vhost_net_cleanup(s->vhost_net);
214 g_free(s->vhost_net);
215 s->vhost_net = NULL;
216 }
217 if (nc->queue_index == 0) {
218 if (s->watch) {
219 g_source_remove(s->watch);
220 s->watch = 0;
221 }
222 qemu_chr_fe_deinit(&s->chr, true);
223 if (s->vhost_user) {
224 vhost_user_cleanup(s->vhost_user);
225 g_free(s->vhost_user);
226 s->vhost_user = NULL;
227 }
228 }
229
230 qemu_purge_queued_packets(nc);
231 }
232
vhost_user_set_vnet_endianness(NetClientState * nc,bool enable)233 static int vhost_user_set_vnet_endianness(NetClientState *nc,
234 bool enable)
235 {
236 /* Nothing to do. If the server supports
237 * VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, it will get the
238 * vnet header endianness from there. If it doesn't, negotiation
239 * fails.
240 */
241 return 0;
242 }
243
vhost_user_has_vnet_hdr(NetClientState * nc)244 static bool vhost_user_has_vnet_hdr(NetClientState *nc)
245 {
246 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
247
248 return true;
249 }
250
vhost_user_has_ufo(NetClientState * nc)251 static bool vhost_user_has_ufo(NetClientState *nc)
252 {
253 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
254
255 return true;
256 }
257
vhost_user_check_peer_type(NetClientState * nc,ObjectClass * oc,Error ** errp)258 static bool vhost_user_check_peer_type(NetClientState *nc, ObjectClass *oc,
259 Error **errp)
260 {
261 const char *driver = object_class_get_name(oc);
262
263 if (!g_str_has_prefix(driver, "virtio-net-")) {
264 error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
265 return false;
266 }
267
268 return true;
269 }
270
271 static NetClientInfo net_vhost_user_info = {
272 .type = NET_CLIENT_DRIVER_VHOST_USER,
273 .size = sizeof(NetVhostUserState),
274 .receive = vhost_user_receive,
275 .cleanup = net_vhost_user_cleanup,
276 .has_vnet_hdr = vhost_user_has_vnet_hdr,
277 .has_ufo = vhost_user_has_ufo,
278 .set_vnet_be = vhost_user_set_vnet_endianness,
279 .set_vnet_le = vhost_user_set_vnet_endianness,
280 .check_peer_type = vhost_user_check_peer_type,
281 .get_vhost_net = vhost_user_get_vhost_net,
282 };
283
net_vhost_user_watch(void * do_not_use,GIOCondition cond,void * opaque)284 static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,
285 void *opaque)
286 {
287 NetVhostUserState *s = opaque;
288
289 qemu_chr_fe_disconnect(&s->chr);
290
291 return G_SOURCE_CONTINUE;
292 }
293
294 static void net_vhost_user_event(void *opaque, QEMUChrEvent event);
295
chr_closed_bh(void * opaque)296 static void chr_closed_bh(void *opaque)
297 {
298 const char *name = opaque;
299 NetClientState *ncs[MAX_QUEUE_NUM];
300 NetVhostUserState *s;
301 int queues, i;
302
303 queues = qemu_find_net_clients_except(name, ncs,
304 NET_CLIENT_DRIVER_NIC,
305 MAX_QUEUE_NUM);
306 assert(queues < MAX_QUEUE_NUM);
307
308 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
309
310 for (i = queues -1; i >= 0; i--) {
311 vhost_user_save_acked_features(ncs[i]);
312 }
313
314 net_client_set_link(ncs, queues, false);
315
316 qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
317 NULL, opaque, NULL, true);
318
319 qapi_event_send_netdev_vhost_user_disconnected(name);
320 }
321
net_vhost_user_event(void * opaque,QEMUChrEvent event)322 static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
323 {
324 const char *name = opaque;
325 NetClientState *ncs[MAX_QUEUE_NUM];
326 NetVhostUserState *s;
327 Chardev *chr;
328 int queues;
329
330 queues = qemu_find_net_clients_except(name, ncs,
331 NET_CLIENT_DRIVER_NIC,
332 MAX_QUEUE_NUM);
333 assert(queues < MAX_QUEUE_NUM);
334
335 s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
336 chr = qemu_chr_fe_get_driver(&s->chr);
337 trace_vhost_user_event(chr->label, event);
338 switch (event) {
339 case CHR_EVENT_OPENED:
340 if (vhost_user_start(queues, ncs, s->vhost_user) < 0) {
341 qemu_chr_fe_disconnect(&s->chr);
342 return;
343 }
344 s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
345 net_vhost_user_watch, s);
346 net_client_set_link(ncs, queues, true);
347 s->started = true;
348 qapi_event_send_netdev_vhost_user_connected(name, chr->label);
349 break;
350 case CHR_EVENT_CLOSED:
351 /* a close event may happen during a read/write, but vhost
352 * code assumes the vhost_dev remains setup, so delay the
353 * stop & clear to idle.
354 * FIXME: better handle failure in vhost code, remove bh
355 */
356 if (s->watch) {
357 AioContext *ctx = qemu_get_current_aio_context();
358
359 g_source_remove(s->watch);
360 s->watch = 0;
361 qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL,
362 NULL, NULL, false);
363
364 aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
365 }
366 break;
367 case CHR_EVENT_BREAK:
368 case CHR_EVENT_MUX_IN:
369 case CHR_EVENT_MUX_OUT:
370 /* Ignore */
371 break;
372 }
373 }
374
net_vhost_user_init(NetClientState * peer,const char * device,const char * name,Chardev * chr,int queues)375 static int net_vhost_user_init(NetClientState *peer, const char *device,
376 const char *name, Chardev *chr,
377 int queues)
378 {
379 Error *err = NULL;
380 NetClientState *nc, *nc0 = NULL;
381 NetVhostUserState *s = NULL;
382 VhostUserState *user;
383 int i;
384
385 assert(name);
386 assert(queues > 0);
387
388 user = g_new0(struct VhostUserState, 1);
389 for (i = 0; i < queues; i++) {
390 nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
391 qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label);
392 nc->queue_index = i;
393 if (!nc0) {
394 nc0 = nc;
395 s = DO_UPCAST(NetVhostUserState, nc, nc);
396 if (!qemu_chr_fe_init(&s->chr, chr, &err) ||
397 !vhost_user_init(user, &s->chr, &err)) {
398 error_report_err(err);
399 goto err;
400 }
401 }
402 s = DO_UPCAST(NetVhostUserState, nc, nc);
403 s->vhost_user = user;
404 }
405
406 s = DO_UPCAST(NetVhostUserState, nc, nc0);
407 do {
408 if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
409 error_report_err(err);
410 goto err;
411 }
412 qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
413 net_vhost_user_event, NULL, nc0->name, NULL,
414 true);
415 } while (!s->started);
416
417 assert(s->vhost_net);
418
419 return 0;
420
421 err:
422 if (user) {
423 vhost_user_cleanup(user);
424 g_free(user);
425 if (s) {
426 s->vhost_user = NULL;
427 }
428 }
429 if (nc0) {
430 qemu_del_net_client(nc0);
431 }
432
433 return -1;
434 }
435
net_vhost_claim_chardev(const NetdevVhostUserOptions * opts,Error ** errp)436 static Chardev *net_vhost_claim_chardev(
437 const NetdevVhostUserOptions *opts, Error **errp)
438 {
439 Chardev *chr = qemu_chr_find(opts->chardev);
440
441 if (chr == NULL) {
442 error_setg(errp, "chardev \"%s\" not found", opts->chardev);
443 return NULL;
444 }
445
446 if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
447 error_setg(errp, "chardev \"%s\" is not reconnectable",
448 opts->chardev);
449 return NULL;
450 }
451 if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_FD_PASS)) {
452 error_setg(errp, "chardev \"%s\" does not support FD passing",
453 opts->chardev);
454 return NULL;
455 }
456
457 return chr;
458 }
459
net_init_vhost_user(const Netdev * netdev,const char * name,NetClientState * peer,Error ** errp)460 int net_init_vhost_user(const Netdev *netdev, const char *name,
461 NetClientState *peer, Error **errp)
462 {
463 int queues;
464 const NetdevVhostUserOptions *vhost_user_opts;
465 Chardev *chr;
466
467 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER);
468 vhost_user_opts = &netdev->u.vhost_user;
469
470 chr = net_vhost_claim_chardev(vhost_user_opts, errp);
471 if (!chr) {
472 return -1;
473 }
474
475 queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
476 if (queues < 1 || queues > MAX_QUEUE_NUM) {
477 error_setg(errp,
478 "vhost-user number of queues must be in range [1, %d]",
479 MAX_QUEUE_NUM);
480 return -1;
481 }
482
483 return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
484 }
485