xref: /openbmc/linux/net/9p/trans_fd.c (revision 9b005ce9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Fd transport layer.  Includes deprecated socket layer.
4  *
5  *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
6  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
7  *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
8  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
9  */
10 
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 
13 #include <linux/in.h>
14 #include <linux/module.h>
15 #include <linux/net.h>
16 #include <linux/ipv6.h>
17 #include <linux/kthread.h>
18 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/un.h>
21 #include <linux/uaccess.h>
22 #include <linux/inet.h>
23 #include <linux/idr.h>
24 #include <linux/file.h>
25 #include <linux/parser.h>
26 #include <linux/slab.h>
27 #include <linux/seq_file.h>
28 #include <net/9p/9p.h>
29 #include <net/9p/client.h>
30 #include <net/9p/transport.h>
31 
32 #include <linux/syscalls.h> /* killme */
33 
34 #define P9_PORT 564
35 #define MAX_SOCK_BUF (1024*1024)
36 #define MAXPOLLWADDR	2
37 
38 static struct p9_trans_module p9_tcp_trans;
39 static struct p9_trans_module p9_fd_trans;
40 
41 /**
42  * struct p9_fd_opts - per-transport options
43  * @rfd: file descriptor for reading (trans=fd)
44  * @wfd: file descriptor for writing (trans=fd)
45  * @port: port to connect to (trans=tcp)
46  * @privport: port is privileged
47  */
48 
49 struct p9_fd_opts {
50 	int rfd;
51 	int wfd;
52 	u16 port;
53 	bool privport;
54 };
55 
56 /*
57   * Option Parsing (code inspired by NFS code)
58   *  - a little lazy - parse all fd-transport options
59   */
60 
61 enum {
62 	/* Options that take integer arguments */
63 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
64 	/* Options that take no arguments */
65 	Opt_privport,
66 };
67 
68 static const match_table_t tokens = {
69 	{Opt_port, "port=%u"},
70 	{Opt_rfdno, "rfdno=%u"},
71 	{Opt_wfdno, "wfdno=%u"},
72 	{Opt_privport, "privport"},
73 	{Opt_err, NULL},
74 };
75 
76 enum {
77 	Rworksched = 1,		/* read work scheduled or running */
78 	Rpending = 2,		/* can read */
79 	Wworksched = 4,		/* write work scheduled or running */
80 	Wpending = 8,		/* can write */
81 };
82 
83 struct p9_poll_wait {
84 	struct p9_conn *conn;
85 	wait_queue_entry_t wait;
86 	wait_queue_head_t *wait_addr;
87 };
88 
89 /**
90  * struct p9_conn - fd mux connection state information
91  * @mux_list: list link for mux to manage multiple connections (?)
92  * @client: reference to client instance for this connection
93  * @err: error state
94  * @req_list: accounting for requests which have been sent
95  * @unsent_req_list: accounting for requests that haven't been sent
96  * @rreq: read request
97  * @wreq: write request
98  * @req: current request being processed (if any)
99  * @tmp_buf: temporary buffer to read in header
100  * @rc: temporary fcall for reading current frame
101  * @wpos: write position for current frame
102  * @wsize: amount of data to write for current frame
103  * @wbuf: current write buffer
104  * @poll_pending_link: pending links to be polled per conn
105  * @poll_wait: array of wait_q's for various worker threads
106  * @pt: poll state
107  * @rq: current read work
108  * @wq: current write work
109  * @wsched: ????
110  *
111  */
112 
113 struct p9_conn {
114 	struct list_head mux_list;
115 	struct p9_client *client;
116 	int err;
117 	struct list_head req_list;
118 	struct list_head unsent_req_list;
119 	struct p9_req_t *rreq;
120 	struct p9_req_t *wreq;
121 	char tmp_buf[7];
122 	struct p9_fcall rc;
123 	int wpos;
124 	int wsize;
125 	char *wbuf;
126 	struct list_head poll_pending_link;
127 	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
128 	poll_table pt;
129 	struct work_struct rq;
130 	struct work_struct wq;
131 	unsigned long wsched;
132 };
133 
134 /**
135  * struct p9_trans_fd - transport state
136  * @rd: reference to file to read from
137  * @wr: reference of file to write to
138  * @conn: connection state reference
139  *
140  */
141 
142 struct p9_trans_fd {
143 	struct file *rd;
144 	struct file *wr;
145 	struct p9_conn conn;
146 };
147 
148 static void p9_poll_workfn(struct work_struct *work);
149 
150 static DEFINE_SPINLOCK(p9_poll_lock);
151 static LIST_HEAD(p9_poll_pending_list);
152 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
153 
154 static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
155 static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
156 
157 static void p9_mux_poll_stop(struct p9_conn *m)
158 {
159 	unsigned long flags;
160 	int i;
161 
162 	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
163 		struct p9_poll_wait *pwait = &m->poll_wait[i];
164 
165 		if (pwait->wait_addr) {
166 			remove_wait_queue(pwait->wait_addr, &pwait->wait);
167 			pwait->wait_addr = NULL;
168 		}
169 	}
170 
171 	spin_lock_irqsave(&p9_poll_lock, flags);
172 	list_del_init(&m->poll_pending_link);
173 	spin_unlock_irqrestore(&p9_poll_lock, flags);
174 
175 	flush_work(&p9_poll_work);
176 }
177 
178 /**
179  * p9_conn_cancel - cancel all pending requests with error
180  * @m: mux data
181  * @err: error code
182  *
183  */
184 
185 static void p9_conn_cancel(struct p9_conn *m, int err)
186 {
187 	struct p9_req_t *req, *rtmp;
188 	LIST_HEAD(cancel_list);
189 
190 	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
191 
192 	spin_lock(&m->client->lock);
193 
194 	if (m->err) {
195 		spin_unlock(&m->client->lock);
196 		return;
197 	}
198 
199 	m->err = err;
200 
201 	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
202 		list_move(&req->req_list, &cancel_list);
203 	}
204 	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
205 		list_move(&req->req_list, &cancel_list);
206 	}
207 
208 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
209 		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
210 		list_del(&req->req_list);
211 		if (!req->t_err)
212 			req->t_err = err;
213 		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
214 	}
215 	spin_unlock(&m->client->lock);
216 }
217 
218 static __poll_t
219 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
220 {
221 	__poll_t ret;
222 	struct p9_trans_fd *ts = NULL;
223 
224 	if (client && client->status == Connected)
225 		ts = client->trans;
226 
227 	if (!ts) {
228 		if (err)
229 			*err = -EREMOTEIO;
230 		return EPOLLERR;
231 	}
232 
233 	ret = vfs_poll(ts->rd, pt);
234 	if (ts->rd != ts->wr)
235 		ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
236 	return ret;
237 }
238 
239 /**
240  * p9_fd_read- read from a fd
241  * @client: client instance
242  * @v: buffer to receive data into
243  * @len: size of receive buffer
244  *
245  */
246 
247 static int p9_fd_read(struct p9_client *client, void *v, int len)
248 {
249 	int ret;
250 	struct p9_trans_fd *ts = NULL;
251 	loff_t pos;
252 
253 	if (client && client->status != Disconnected)
254 		ts = client->trans;
255 
256 	if (!ts)
257 		return -EREMOTEIO;
258 
259 	if (!(ts->rd->f_flags & O_NONBLOCK))
260 		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
261 
262 	pos = ts->rd->f_pos;
263 	ret = kernel_read(ts->rd, v, len, &pos);
264 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
265 		client->status = Disconnected;
266 	return ret;
267 }
268 
269 /**
270  * p9_read_work - called when there is some data to be read from a transport
271  * @work: container of work to be done
272  *
273  */
274 
275 static void p9_read_work(struct work_struct *work)
276 {
277 	__poll_t n;
278 	int err;
279 	struct p9_conn *m;
280 
281 	m = container_of(work, struct p9_conn, rq);
282 
283 	if (m->err < 0)
284 		return;
285 
286 	p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
287 
288 	if (!m->rc.sdata) {
289 		m->rc.sdata = m->tmp_buf;
290 		m->rc.offset = 0;
291 		m->rc.capacity = 7; /* start by reading header */
292 	}
293 
294 	clear_bit(Rpending, &m->wsched);
295 	p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
296 		 m, m->rc.offset, m->rc.capacity,
297 		 m->rc.capacity - m->rc.offset);
298 	err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
299 			 m->rc.capacity - m->rc.offset);
300 	p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
301 	if (err == -EAGAIN)
302 		goto end_clear;
303 
304 	if (err <= 0)
305 		goto error;
306 
307 	m->rc.offset += err;
308 
309 	/* header read in */
310 	if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
311 		p9_debug(P9_DEBUG_TRANS, "got new header\n");
312 
313 		/* Header size */
314 		m->rc.size = 7;
315 		err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
316 		if (err) {
317 			p9_debug(P9_DEBUG_ERROR,
318 				 "error parsing header: %d\n", err);
319 			goto error;
320 		}
321 
322 		if (m->rc.size >= m->client->msize) {
323 			p9_debug(P9_DEBUG_ERROR,
324 				 "requested packet size too big: %d\n",
325 				 m->rc.size);
326 			err = -EIO;
327 			goto error;
328 		}
329 
330 		p9_debug(P9_DEBUG_TRANS,
331 			 "mux %p pkt: size: %d bytes tag: %d\n",
332 			 m, m->rc.size, m->rc.tag);
333 
334 		m->rreq = p9_tag_lookup(m->client, m->rc.tag);
335 		if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
336 			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
337 				 m->rc.tag);
338 			err = -EIO;
339 			goto error;
340 		}
341 
342 		if (!m->rreq->rc.sdata) {
343 			p9_debug(P9_DEBUG_ERROR,
344 				 "No recv fcall for tag %d (req %p), disconnecting!\n",
345 				 m->rc.tag, m->rreq);
346 			m->rreq = NULL;
347 			err = -EIO;
348 			goto error;
349 		}
350 		m->rc.sdata = m->rreq->rc.sdata;
351 		memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
352 		m->rc.capacity = m->rc.size;
353 	}
354 
355 	/* packet is read in
356 	 * not an else because some packets (like clunk) have no payload
357 	 */
358 	if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
359 		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
360 		m->rreq->rc.size = m->rc.offset;
361 		spin_lock(&m->client->lock);
362 		if (m->rreq->status == REQ_STATUS_SENT) {
363 			list_del(&m->rreq->req_list);
364 			p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
365 		} else if (m->rreq->status == REQ_STATUS_FLSHD) {
366 			/* Ignore replies associated with a cancelled request. */
367 			p9_debug(P9_DEBUG_TRANS,
368 				 "Ignore replies associated with a cancelled request\n");
369 		} else {
370 			spin_unlock(&m->client->lock);
371 			p9_debug(P9_DEBUG_ERROR,
372 				 "Request tag %d errored out while we were reading the reply\n",
373 				 m->rc.tag);
374 			err = -EIO;
375 			goto error;
376 		}
377 		spin_unlock(&m->client->lock);
378 		m->rc.sdata = NULL;
379 		m->rc.offset = 0;
380 		m->rc.capacity = 0;
381 		p9_req_put(m->rreq);
382 		m->rreq = NULL;
383 	}
384 
385 end_clear:
386 	clear_bit(Rworksched, &m->wsched);
387 
388 	if (!list_empty(&m->req_list)) {
389 		if (test_and_clear_bit(Rpending, &m->wsched))
390 			n = EPOLLIN;
391 		else
392 			n = p9_fd_poll(m->client, NULL, NULL);
393 
394 		if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
395 			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
396 			schedule_work(&m->rq);
397 		}
398 	}
399 
400 	return;
401 error:
402 	p9_conn_cancel(m, err);
403 	clear_bit(Rworksched, &m->wsched);
404 }
405 
406 /**
407  * p9_fd_write - write to a socket
408  * @client: client instance
409  * @v: buffer to send data from
410  * @len: size of send buffer
411  *
412  */
413 
414 static int p9_fd_write(struct p9_client *client, void *v, int len)
415 {
416 	ssize_t ret;
417 	struct p9_trans_fd *ts = NULL;
418 
419 	if (client && client->status != Disconnected)
420 		ts = client->trans;
421 
422 	if (!ts)
423 		return -EREMOTEIO;
424 
425 	if (!(ts->wr->f_flags & O_NONBLOCK))
426 		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
427 
428 	ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
429 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
430 		client->status = Disconnected;
431 	return ret;
432 }
433 
434 /**
435  * p9_write_work - called when a transport can send some data
436  * @work: container for work to be done
437  *
438  */
439 
440 static void p9_write_work(struct work_struct *work)
441 {
442 	__poll_t n;
443 	int err;
444 	struct p9_conn *m;
445 	struct p9_req_t *req;
446 
447 	m = container_of(work, struct p9_conn, wq);
448 
449 	if (m->err < 0) {
450 		clear_bit(Wworksched, &m->wsched);
451 		return;
452 	}
453 
454 	if (!m->wsize) {
455 		spin_lock(&m->client->lock);
456 		if (list_empty(&m->unsent_req_list)) {
457 			clear_bit(Wworksched, &m->wsched);
458 			spin_unlock(&m->client->lock);
459 			return;
460 		}
461 
462 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
463 			       req_list);
464 		req->status = REQ_STATUS_SENT;
465 		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
466 		list_move_tail(&req->req_list, &m->req_list);
467 
468 		m->wbuf = req->tc.sdata;
469 		m->wsize = req->tc.size;
470 		m->wpos = 0;
471 		p9_req_get(req);
472 		m->wreq = req;
473 		spin_unlock(&m->client->lock);
474 	}
475 
476 	p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
477 		 m, m->wpos, m->wsize);
478 	clear_bit(Wpending, &m->wsched);
479 	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
480 	p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
481 	if (err == -EAGAIN)
482 		goto end_clear;
483 
484 
485 	if (err < 0)
486 		goto error;
487 	else if (err == 0) {
488 		err = -EREMOTEIO;
489 		goto error;
490 	}
491 
492 	m->wpos += err;
493 	if (m->wpos == m->wsize) {
494 		m->wpos = m->wsize = 0;
495 		p9_req_put(m->wreq);
496 		m->wreq = NULL;
497 	}
498 
499 end_clear:
500 	clear_bit(Wworksched, &m->wsched);
501 
502 	if (m->wsize || !list_empty(&m->unsent_req_list)) {
503 		if (test_and_clear_bit(Wpending, &m->wsched))
504 			n = EPOLLOUT;
505 		else
506 			n = p9_fd_poll(m->client, NULL, NULL);
507 
508 		if ((n & EPOLLOUT) &&
509 		   !test_and_set_bit(Wworksched, &m->wsched)) {
510 			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
511 			schedule_work(&m->wq);
512 		}
513 	}
514 
515 	return;
516 
517 error:
518 	p9_conn_cancel(m, err);
519 	clear_bit(Wworksched, &m->wsched);
520 }
521 
522 static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
523 {
524 	struct p9_poll_wait *pwait =
525 		container_of(wait, struct p9_poll_wait, wait);
526 	struct p9_conn *m = pwait->conn;
527 	unsigned long flags;
528 
529 	spin_lock_irqsave(&p9_poll_lock, flags);
530 	if (list_empty(&m->poll_pending_link))
531 		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
532 	spin_unlock_irqrestore(&p9_poll_lock, flags);
533 
534 	schedule_work(&p9_poll_work);
535 	return 1;
536 }
537 
538 /**
539  * p9_pollwait - add poll task to the wait queue
540  * @filp: file pointer being polled
541  * @wait_address: wait_q to block on
542  * @p: poll state
543  *
544  * called by files poll operation to add v9fs-poll task to files wait queue
545  */
546 
547 static void
548 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
549 {
550 	struct p9_conn *m = container_of(p, struct p9_conn, pt);
551 	struct p9_poll_wait *pwait = NULL;
552 	int i;
553 
554 	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
555 		if (m->poll_wait[i].wait_addr == NULL) {
556 			pwait = &m->poll_wait[i];
557 			break;
558 		}
559 	}
560 
561 	if (!pwait) {
562 		p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
563 		return;
564 	}
565 
566 	pwait->conn = m;
567 	pwait->wait_addr = wait_address;
568 	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
569 	add_wait_queue(wait_address, &pwait->wait);
570 }
571 
572 /**
573  * p9_conn_create - initialize the per-session mux data
574  * @client: client instance
575  *
576  * Note: Creates the polling task if this is the first session.
577  */
578 
579 static void p9_conn_create(struct p9_client *client)
580 {
581 	__poll_t n;
582 	struct p9_trans_fd *ts = client->trans;
583 	struct p9_conn *m = &ts->conn;
584 
585 	p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
586 
587 	INIT_LIST_HEAD(&m->mux_list);
588 	m->client = client;
589 
590 	INIT_LIST_HEAD(&m->req_list);
591 	INIT_LIST_HEAD(&m->unsent_req_list);
592 	INIT_WORK(&m->rq, p9_read_work);
593 	INIT_WORK(&m->wq, p9_write_work);
594 	INIT_LIST_HEAD(&m->poll_pending_link);
595 	init_poll_funcptr(&m->pt, p9_pollwait);
596 
597 	n = p9_fd_poll(client, &m->pt, NULL);
598 	if (n & EPOLLIN) {
599 		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
600 		set_bit(Rpending, &m->wsched);
601 	}
602 
603 	if (n & EPOLLOUT) {
604 		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
605 		set_bit(Wpending, &m->wsched);
606 	}
607 }
608 
609 /**
610  * p9_poll_mux - polls a mux and schedules read or write works if necessary
611  * @m: connection to poll
612  *
613  */
614 
615 static void p9_poll_mux(struct p9_conn *m)
616 {
617 	__poll_t n;
618 	int err = -ECONNRESET;
619 
620 	if (m->err < 0)
621 		return;
622 
623 	n = p9_fd_poll(m->client, NULL, &err);
624 	if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
625 		p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
626 		p9_conn_cancel(m, err);
627 	}
628 
629 	if (n & EPOLLIN) {
630 		set_bit(Rpending, &m->wsched);
631 		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
632 		if (!test_and_set_bit(Rworksched, &m->wsched)) {
633 			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
634 			schedule_work(&m->rq);
635 		}
636 	}
637 
638 	if (n & EPOLLOUT) {
639 		set_bit(Wpending, &m->wsched);
640 		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
641 		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
642 		    !test_and_set_bit(Wworksched, &m->wsched)) {
643 			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
644 			schedule_work(&m->wq);
645 		}
646 	}
647 }
648 
649 /**
650  * p9_fd_request - send 9P request
651  * The function can sleep until the request is scheduled for sending.
652  * The function can be interrupted. Return from the function is not
653  * a guarantee that the request is sent successfully.
654  *
655  * @client: client instance
656  * @req: request to be sent
657  *
658  */
659 
660 static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
661 {
662 	__poll_t n;
663 	struct p9_trans_fd *ts = client->trans;
664 	struct p9_conn *m = &ts->conn;
665 
666 	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
667 		 m, current, &req->tc, req->tc.id);
668 	if (m->err < 0)
669 		return m->err;
670 
671 	spin_lock(&client->lock);
672 	req->status = REQ_STATUS_UNSENT;
673 	list_add_tail(&req->req_list, &m->unsent_req_list);
674 	spin_unlock(&client->lock);
675 
676 	if (test_and_clear_bit(Wpending, &m->wsched))
677 		n = EPOLLOUT;
678 	else
679 		n = p9_fd_poll(m->client, NULL, NULL);
680 
681 	if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
682 		schedule_work(&m->wq);
683 
684 	return 0;
685 }
686 
687 static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
688 {
689 	int ret = 1;
690 
691 	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
692 
693 	spin_lock(&client->lock);
694 
695 	if (req->status == REQ_STATUS_UNSENT) {
696 		list_del(&req->req_list);
697 		req->status = REQ_STATUS_FLSHD;
698 		p9_req_put(req);
699 		ret = 0;
700 	}
701 	spin_unlock(&client->lock);
702 
703 	return ret;
704 }
705 
706 static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
707 {
708 	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
709 
710 	spin_lock(&client->lock);
711 	/* Ignore cancelled request if message has been received
712 	 * before lock.
713 	 */
714 	if (req->status == REQ_STATUS_RCVD) {
715 		spin_unlock(&client->lock);
716 		return 0;
717 	}
718 
719 	/* we haven't received a response for oldreq,
720 	 * remove it from the list.
721 	 */
722 	list_del(&req->req_list);
723 	req->status = REQ_STATUS_FLSHD;
724 	spin_unlock(&client->lock);
725 	p9_req_put(req);
726 
727 	return 0;
728 }
729 
730 static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
731 {
732 	if (clnt->trans_mod == &p9_tcp_trans) {
733 		if (clnt->trans_opts.tcp.port != P9_PORT)
734 			seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
735 	} else if (clnt->trans_mod == &p9_fd_trans) {
736 		if (clnt->trans_opts.fd.rfd != ~0)
737 			seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
738 		if (clnt->trans_opts.fd.wfd != ~0)
739 			seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
740 	}
741 	return 0;
742 }
743 
744 /**
745  * parse_opts - parse mount options into p9_fd_opts structure
746  * @params: options string passed from mount
747  * @opts: fd transport-specific structure to parse options into
748  *
749  * Returns 0 upon success, -ERRNO upon failure
750  */
751 
752 static int parse_opts(char *params, struct p9_fd_opts *opts)
753 {
754 	char *p;
755 	substring_t args[MAX_OPT_ARGS];
756 	int option;
757 	char *options, *tmp_options;
758 
759 	opts->port = P9_PORT;
760 	opts->rfd = ~0;
761 	opts->wfd = ~0;
762 	opts->privport = false;
763 
764 	if (!params)
765 		return 0;
766 
767 	tmp_options = kstrdup(params, GFP_KERNEL);
768 	if (!tmp_options) {
769 		p9_debug(P9_DEBUG_ERROR,
770 			 "failed to allocate copy of option string\n");
771 		return -ENOMEM;
772 	}
773 	options = tmp_options;
774 
775 	while ((p = strsep(&options, ",")) != NULL) {
776 		int token;
777 		int r;
778 		if (!*p)
779 			continue;
780 		token = match_token(p, tokens, args);
781 		if ((token != Opt_err) && (token != Opt_privport)) {
782 			r = match_int(&args[0], &option);
783 			if (r < 0) {
784 				p9_debug(P9_DEBUG_ERROR,
785 					 "integer field, but no integer?\n");
786 				continue;
787 			}
788 		}
789 		switch (token) {
790 		case Opt_port:
791 			opts->port = option;
792 			break;
793 		case Opt_rfdno:
794 			opts->rfd = option;
795 			break;
796 		case Opt_wfdno:
797 			opts->wfd = option;
798 			break;
799 		case Opt_privport:
800 			opts->privport = true;
801 			break;
802 		default:
803 			continue;
804 		}
805 	}
806 
807 	kfree(tmp_options);
808 	return 0;
809 }
810 
811 static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
812 {
813 	struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
814 					   GFP_KERNEL);
815 	if (!ts)
816 		return -ENOMEM;
817 
818 	ts->rd = fget(rfd);
819 	if (!ts->rd)
820 		goto out_free_ts;
821 	if (!(ts->rd->f_mode & FMODE_READ))
822 		goto out_put_rd;
823 	ts->wr = fget(wfd);
824 	if (!ts->wr)
825 		goto out_put_rd;
826 	if (!(ts->wr->f_mode & FMODE_WRITE))
827 		goto out_put_wr;
828 
829 	client->trans = ts;
830 	client->status = Connected;
831 
832 	return 0;
833 
834 out_put_wr:
835 	fput(ts->wr);
836 out_put_rd:
837 	fput(ts->rd);
838 out_free_ts:
839 	kfree(ts);
840 	return -EIO;
841 }
842 
843 static int p9_socket_open(struct p9_client *client, struct socket *csocket)
844 {
845 	struct p9_trans_fd *p;
846 	struct file *file;
847 
848 	p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
849 	if (!p)
850 		return -ENOMEM;
851 
852 	csocket->sk->sk_allocation = GFP_NOIO;
853 	file = sock_alloc_file(csocket, 0, NULL);
854 	if (IS_ERR(file)) {
855 		pr_err("%s (%d): failed to map fd\n",
856 		       __func__, task_pid_nr(current));
857 		kfree(p);
858 		return PTR_ERR(file);
859 	}
860 
861 	get_file(file);
862 	p->wr = p->rd = file;
863 	client->trans = p;
864 	client->status = Connected;
865 
866 	p->rd->f_flags |= O_NONBLOCK;
867 
868 	p9_conn_create(client);
869 	return 0;
870 }
871 
872 /**
873  * p9_conn_destroy - cancels all pending requests of mux
874  * @m: mux to destroy
875  *
876  */
877 
878 static void p9_conn_destroy(struct p9_conn *m)
879 {
880 	p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
881 		 m, m->mux_list.prev, m->mux_list.next);
882 
883 	p9_mux_poll_stop(m);
884 	cancel_work_sync(&m->rq);
885 	if (m->rreq) {
886 		p9_req_put(m->rreq);
887 		m->rreq = NULL;
888 	}
889 	cancel_work_sync(&m->wq);
890 	if (m->wreq) {
891 		p9_req_put(m->wreq);
892 		m->wreq = NULL;
893 	}
894 
895 	p9_conn_cancel(m, -ECONNRESET);
896 
897 	m->client = NULL;
898 }
899 
900 /**
901  * p9_fd_close - shutdown file descriptor transport
902  * @client: client instance
903  *
904  */
905 
906 static void p9_fd_close(struct p9_client *client)
907 {
908 	struct p9_trans_fd *ts;
909 
910 	if (!client)
911 		return;
912 
913 	ts = client->trans;
914 	if (!ts)
915 		return;
916 
917 	client->status = Disconnected;
918 
919 	p9_conn_destroy(&ts->conn);
920 
921 	if (ts->rd)
922 		fput(ts->rd);
923 	if (ts->wr)
924 		fput(ts->wr);
925 
926 	kfree(ts);
927 }
928 
929 /*
930  * stolen from NFS - maybe should be made a generic function?
931  */
932 static inline int valid_ipaddr4(const char *buf)
933 {
934 	int rc, count, in[4];
935 
936 	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
937 	if (rc != 4)
938 		return -EINVAL;
939 	for (count = 0; count < 4; count++) {
940 		if (in[count] > 255)
941 			return -EINVAL;
942 	}
943 	return 0;
944 }
945 
946 static int p9_bind_privport(struct socket *sock)
947 {
948 	struct sockaddr_in cl;
949 	int port, err = -EINVAL;
950 
951 	memset(&cl, 0, sizeof(cl));
952 	cl.sin_family = AF_INET;
953 	cl.sin_addr.s_addr = htonl(INADDR_ANY);
954 	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
955 		cl.sin_port = htons((ushort)port);
956 		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
957 		if (err != -EADDRINUSE)
958 			break;
959 	}
960 	return err;
961 }
962 
963 
964 static int
965 p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
966 {
967 	int err;
968 	struct socket *csocket;
969 	struct sockaddr_in sin_server;
970 	struct p9_fd_opts opts;
971 
972 	err = parse_opts(args, &opts);
973 	if (err < 0)
974 		return err;
975 
976 	if (addr == NULL || valid_ipaddr4(addr) < 0)
977 		return -EINVAL;
978 
979 	csocket = NULL;
980 
981 	client->trans_opts.tcp.port = opts.port;
982 	client->trans_opts.tcp.privport = opts.privport;
983 	sin_server.sin_family = AF_INET;
984 	sin_server.sin_addr.s_addr = in_aton(addr);
985 	sin_server.sin_port = htons(opts.port);
986 	err = __sock_create(current->nsproxy->net_ns, PF_INET,
987 			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
988 	if (err) {
989 		pr_err("%s (%d): problem creating socket\n",
990 		       __func__, task_pid_nr(current));
991 		return err;
992 	}
993 
994 	if (opts.privport) {
995 		err = p9_bind_privport(csocket);
996 		if (err < 0) {
997 			pr_err("%s (%d): problem binding to privport\n",
998 			       __func__, task_pid_nr(current));
999 			sock_release(csocket);
1000 			return err;
1001 		}
1002 	}
1003 
1004 	err = csocket->ops->connect(csocket,
1005 				    (struct sockaddr *)&sin_server,
1006 				    sizeof(struct sockaddr_in), 0);
1007 	if (err < 0) {
1008 		pr_err("%s (%d): problem connecting socket to %s\n",
1009 		       __func__, task_pid_nr(current), addr);
1010 		sock_release(csocket);
1011 		return err;
1012 	}
1013 
1014 	return p9_socket_open(client, csocket);
1015 }
1016 
1017 static int
1018 p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
1019 {
1020 	int err;
1021 	struct socket *csocket;
1022 	struct sockaddr_un sun_server;
1023 
1024 	csocket = NULL;
1025 
1026 	if (!addr || !strlen(addr))
1027 		return -EINVAL;
1028 
1029 	if (strlen(addr) >= UNIX_PATH_MAX) {
1030 		pr_err("%s (%d): address too long: %s\n",
1031 		       __func__, task_pid_nr(current), addr);
1032 		return -ENAMETOOLONG;
1033 	}
1034 
1035 	sun_server.sun_family = PF_UNIX;
1036 	strcpy(sun_server.sun_path, addr);
1037 	err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
1038 			    SOCK_STREAM, 0, &csocket, 1);
1039 	if (err < 0) {
1040 		pr_err("%s (%d): problem creating socket\n",
1041 		       __func__, task_pid_nr(current));
1042 
1043 		return err;
1044 	}
1045 	err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
1046 			sizeof(struct sockaddr_un) - 1, 0);
1047 	if (err < 0) {
1048 		pr_err("%s (%d): problem connecting socket: %s: %d\n",
1049 		       __func__, task_pid_nr(current), addr, err);
1050 		sock_release(csocket);
1051 		return err;
1052 	}
1053 
1054 	return p9_socket_open(client, csocket);
1055 }
1056 
1057 static int
1058 p9_fd_create(struct p9_client *client, const char *addr, char *args)
1059 {
1060 	int err;
1061 	struct p9_fd_opts opts;
1062 
1063 	parse_opts(args, &opts);
1064 	client->trans_opts.fd.rfd = opts.rfd;
1065 	client->trans_opts.fd.wfd = opts.wfd;
1066 
1067 	if (opts.rfd == ~0 || opts.wfd == ~0) {
1068 		pr_err("Insufficient options for proto=fd\n");
1069 		return -ENOPROTOOPT;
1070 	}
1071 
1072 	err = p9_fd_open(client, opts.rfd, opts.wfd);
1073 	if (err < 0)
1074 		return err;
1075 
1076 	p9_conn_create(client);
1077 
1078 	return 0;
1079 }
1080 
1081 static struct p9_trans_module p9_tcp_trans = {
1082 	.name = "tcp",
1083 	.maxsize = MAX_SOCK_BUF,
1084 	.def = 0,
1085 	.create = p9_fd_create_tcp,
1086 	.close = p9_fd_close,
1087 	.request = p9_fd_request,
1088 	.cancel = p9_fd_cancel,
1089 	.cancelled = p9_fd_cancelled,
1090 	.show_options = p9_fd_show_options,
1091 	.owner = THIS_MODULE,
1092 };
1093 
1094 static struct p9_trans_module p9_unix_trans = {
1095 	.name = "unix",
1096 	.maxsize = MAX_SOCK_BUF,
1097 	.def = 0,
1098 	.create = p9_fd_create_unix,
1099 	.close = p9_fd_close,
1100 	.request = p9_fd_request,
1101 	.cancel = p9_fd_cancel,
1102 	.cancelled = p9_fd_cancelled,
1103 	.show_options = p9_fd_show_options,
1104 	.owner = THIS_MODULE,
1105 };
1106 
1107 static struct p9_trans_module p9_fd_trans = {
1108 	.name = "fd",
1109 	.maxsize = MAX_SOCK_BUF,
1110 	.def = 0,
1111 	.create = p9_fd_create,
1112 	.close = p9_fd_close,
1113 	.request = p9_fd_request,
1114 	.cancel = p9_fd_cancel,
1115 	.cancelled = p9_fd_cancelled,
1116 	.show_options = p9_fd_show_options,
1117 	.owner = THIS_MODULE,
1118 };
1119 
1120 /**
1121  * p9_poll_workfn - poll worker thread
1122  * @work: work queue
1123  *
1124  * polls all v9fs transports for new events and queues the appropriate
1125  * work to the work queue
1126  *
1127  */
1128 
1129 static void p9_poll_workfn(struct work_struct *work)
1130 {
1131 	unsigned long flags;
1132 
1133 	p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
1134 
1135 	spin_lock_irqsave(&p9_poll_lock, flags);
1136 	while (!list_empty(&p9_poll_pending_list)) {
1137 		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
1138 							struct p9_conn,
1139 							poll_pending_link);
1140 		list_del_init(&conn->poll_pending_link);
1141 		spin_unlock_irqrestore(&p9_poll_lock, flags);
1142 
1143 		p9_poll_mux(conn);
1144 
1145 		spin_lock_irqsave(&p9_poll_lock, flags);
1146 	}
1147 	spin_unlock_irqrestore(&p9_poll_lock, flags);
1148 
1149 	p9_debug(P9_DEBUG_TRANS, "finish\n");
1150 }
1151 
1152 int p9_trans_fd_init(void)
1153 {
1154 	v9fs_register_trans(&p9_tcp_trans);
1155 	v9fs_register_trans(&p9_unix_trans);
1156 	v9fs_register_trans(&p9_fd_trans);
1157 
1158 	return 0;
1159 }
1160 
1161 void p9_trans_fd_exit(void)
1162 {
1163 	flush_work(&p9_poll_work);
1164 	v9fs_unregister_trans(&p9_tcp_trans);
1165 	v9fs_unregister_trans(&p9_unix_trans);
1166 	v9fs_unregister_trans(&p9_fd_trans);
1167 }
1168