xref: /openbmc/linux/net/9p/trans_fd.c (revision e2eb96ae)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Fd transport layer.  Includes deprecated socket layer.
4   *
5   *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
6   *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
7   *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
8   *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
9   */
10  
11  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12  
13  #include <linux/in.h>
14  #include <linux/module.h>
15  #include <linux/net.h>
16  #include <linux/ipv6.h>
17  #include <linux/kthread.h>
18  #include <linux/errno.h>
19  #include <linux/kernel.h>
20  #include <linux/un.h>
21  #include <linux/uaccess.h>
22  #include <linux/inet.h>
23  #include <linux/file.h>
24  #include <linux/parser.h>
25  #include <linux/slab.h>
26  #include <linux/seq_file.h>
27  #include <net/9p/9p.h>
28  #include <net/9p/client.h>
29  #include <net/9p/transport.h>
30  
31  #include <linux/syscalls.h> /* killme */
32  
33  #define P9_PORT 564
34  #define MAX_SOCK_BUF (1024*1024)
35  #define MAXPOLLWADDR	2
36  
37  static struct p9_trans_module p9_tcp_trans;
38  static struct p9_trans_module p9_fd_trans;
39  
40  /**
41   * struct p9_fd_opts - per-transport options
42   * @rfd: file descriptor for reading (trans=fd)
43   * @wfd: file descriptor for writing (trans=fd)
44   * @port: port to connect to (trans=tcp)
45   * @privport: port is privileged
46   */
47  
48  struct p9_fd_opts {
49  	int rfd;
50  	int wfd;
51  	u16 port;
52  	bool privport;
53  };
54  
55  /*
56    * Option Parsing (code inspired by NFS code)
57    *  - a little lazy - parse all fd-transport options
58    */
59  
60  enum {
61  	/* Options that take integer arguments */
62  	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
63  	/* Options that take no arguments */
64  	Opt_privport,
65  };
66  
67  static const match_table_t tokens = {
68  	{Opt_port, "port=%u"},
69  	{Opt_rfdno, "rfdno=%u"},
70  	{Opt_wfdno, "wfdno=%u"},
71  	{Opt_privport, "privport"},
72  	{Opt_err, NULL},
73  };
74  
75  enum {
76  	Rworksched = 1,		/* read work scheduled or running */
77  	Rpending = 2,		/* can read */
78  	Wworksched = 4,		/* write work scheduled or running */
79  	Wpending = 8,		/* can write */
80  };
81  
82  struct p9_poll_wait {
83  	struct p9_conn *conn;
84  	wait_queue_entry_t wait;
85  	wait_queue_head_t *wait_addr;
86  };
87  
88  /**
89   * struct p9_conn - fd mux connection state information
90   * @mux_list: list link for mux to manage multiple connections (?)
91   * @client: reference to client instance for this connection
92   * @err: error state
93   * @req_lock: lock protecting req_list and requests statuses
94   * @req_list: accounting for requests which have been sent
95   * @unsent_req_list: accounting for requests that haven't been sent
96   * @rreq: read request
97   * @wreq: write request
98   * @req: current request being processed (if any)
99   * @tmp_buf: temporary buffer to read in header
100   * @rc: temporary fcall for reading current frame
101   * @wpos: write position for current frame
102   * @wsize: amount of data to write for current frame
103   * @wbuf: current write buffer
104   * @poll_pending_link: pending links to be polled per conn
105   * @poll_wait: array of wait_q's for various worker threads
106   * @pt: poll state
107   * @rq: current read work
108   * @wq: current write work
109   * @wsched: ????
110   *
111   */
112  
113  struct p9_conn {
114  	struct list_head mux_list;
115  	struct p9_client *client;
116  	int err;
117  	spinlock_t req_lock;
118  	struct list_head req_list;
119  	struct list_head unsent_req_list;
120  	struct p9_req_t *rreq;
121  	struct p9_req_t *wreq;
122  	char tmp_buf[P9_HDRSZ];
123  	struct p9_fcall rc;
124  	int wpos;
125  	int wsize;
126  	char *wbuf;
127  	struct list_head poll_pending_link;
128  	struct p9_poll_wait poll_wait[MAXPOLLWADDR];
129  	poll_table pt;
130  	struct work_struct rq;
131  	struct work_struct wq;
132  	unsigned long wsched;
133  };
134  
135  /**
136   * struct p9_trans_fd - transport state
137   * @rd: reference to file to read from
138   * @wr: reference of file to write to
139   * @conn: connection state reference
140   *
141   */
142  
143  struct p9_trans_fd {
144  	struct file *rd;
145  	struct file *wr;
146  	struct p9_conn conn;
147  };
148  
149  static void p9_poll_workfn(struct work_struct *work);
150  
151  static DEFINE_SPINLOCK(p9_poll_lock);
152  static LIST_HEAD(p9_poll_pending_list);
153  static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
154  
155  static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
156  static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
157  
158  static void p9_mux_poll_stop(struct p9_conn *m)
159  {
160  	unsigned long flags;
161  	int i;
162  
163  	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
164  		struct p9_poll_wait *pwait = &m->poll_wait[i];
165  
166  		if (pwait->wait_addr) {
167  			remove_wait_queue(pwait->wait_addr, &pwait->wait);
168  			pwait->wait_addr = NULL;
169  		}
170  	}
171  
172  	spin_lock_irqsave(&p9_poll_lock, flags);
173  	list_del_init(&m->poll_pending_link);
174  	spin_unlock_irqrestore(&p9_poll_lock, flags);
175  
176  	flush_work(&p9_poll_work);
177  }
178  
179  /**
180   * p9_conn_cancel - cancel all pending requests with error
181   * @m: mux data
182   * @err: error code
183   *
184   */
185  
186  static void p9_conn_cancel(struct p9_conn *m, int err)
187  {
188  	struct p9_req_t *req, *rtmp;
189  	LIST_HEAD(cancel_list);
190  
191  	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
192  
193  	spin_lock(&m->req_lock);
194  
195  	if (m->err) {
196  		spin_unlock(&m->req_lock);
197  		return;
198  	}
199  
200  	m->err = err;
201  
202  	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
203  		list_move(&req->req_list, &cancel_list);
204  		WRITE_ONCE(req->status, REQ_STATUS_ERROR);
205  	}
206  	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
207  		list_move(&req->req_list, &cancel_list);
208  		WRITE_ONCE(req->status, REQ_STATUS_ERROR);
209  	}
210  
211  	spin_unlock(&m->req_lock);
212  
213  	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
214  		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
215  		list_del(&req->req_list);
216  		if (!req->t_err)
217  			req->t_err = err;
218  		p9_client_cb(m->client, req, REQ_STATUS_ERROR);
219  	}
220  }
221  
222  static __poll_t
223  p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
224  {
225  	__poll_t ret;
226  	struct p9_trans_fd *ts = NULL;
227  
228  	if (client && client->status == Connected)
229  		ts = client->trans;
230  
231  	if (!ts) {
232  		if (err)
233  			*err = -EREMOTEIO;
234  		return EPOLLERR;
235  	}
236  
237  	ret = vfs_poll(ts->rd, pt);
238  	if (ts->rd != ts->wr)
239  		ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
240  	return ret;
241  }
242  
243  /**
244   * p9_fd_read- read from a fd
245   * @client: client instance
246   * @v: buffer to receive data into
247   * @len: size of receive buffer
248   *
249   */
250  
251  static int p9_fd_read(struct p9_client *client, void *v, int len)
252  {
253  	int ret;
254  	struct p9_trans_fd *ts = NULL;
255  	loff_t pos;
256  
257  	if (client && client->status != Disconnected)
258  		ts = client->trans;
259  
260  	if (!ts)
261  		return -EREMOTEIO;
262  
263  	if (!(ts->rd->f_flags & O_NONBLOCK))
264  		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
265  
266  	pos = ts->rd->f_pos;
267  	ret = kernel_read(ts->rd, v, len, &pos);
268  	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
269  		client->status = Disconnected;
270  	return ret;
271  }
272  
273  /**
274   * p9_read_work - called when there is some data to be read from a transport
275   * @work: container of work to be done
276   *
277   */
278  
279  static void p9_read_work(struct work_struct *work)
280  {
281  	__poll_t n;
282  	int err;
283  	struct p9_conn *m;
284  
285  	m = container_of(work, struct p9_conn, rq);
286  
287  	if (m->err < 0)
288  		return;
289  
290  	p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
291  
292  	if (!m->rc.sdata) {
293  		m->rc.sdata = m->tmp_buf;
294  		m->rc.offset = 0;
295  		m->rc.capacity = P9_HDRSZ; /* start by reading header */
296  	}
297  
298  	clear_bit(Rpending, &m->wsched);
299  	p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
300  		 m, m->rc.offset, m->rc.capacity,
301  		 m->rc.capacity - m->rc.offset);
302  	err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
303  			 m->rc.capacity - m->rc.offset);
304  	p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
305  	if (err == -EAGAIN)
306  		goto end_clear;
307  
308  	if (err <= 0)
309  		goto error;
310  
311  	m->rc.offset += err;
312  
313  	/* header read in */
314  	if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
315  		p9_debug(P9_DEBUG_TRANS, "got new header\n");
316  
317  		/* Header size */
318  		m->rc.size = P9_HDRSZ;
319  		err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
320  		if (err) {
321  			p9_debug(P9_DEBUG_ERROR,
322  				 "error parsing header: %d\n", err);
323  			goto error;
324  		}
325  
326  		p9_debug(P9_DEBUG_TRANS,
327  			 "mux %p pkt: size: %d bytes tag: %d\n",
328  			 m, m->rc.size, m->rc.tag);
329  
330  		m->rreq = p9_tag_lookup(m->client, m->rc.tag);
331  		if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
332  			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
333  				 m->rc.tag);
334  			err = -EIO;
335  			goto error;
336  		}
337  
338  		if (m->rc.size > m->rreq->rc.capacity) {
339  			p9_debug(P9_DEBUG_ERROR,
340  				 "requested packet size too big: %d for tag %d with capacity %zd\n",
341  				 m->rc.size, m->rc.tag, m->rreq->rc.capacity);
342  			err = -EIO;
343  			goto error;
344  		}
345  
346  		if (!m->rreq->rc.sdata) {
347  			p9_debug(P9_DEBUG_ERROR,
348  				 "No recv fcall for tag %d (req %p), disconnecting!\n",
349  				 m->rc.tag, m->rreq);
350  			p9_req_put(m->client, m->rreq);
351  			m->rreq = NULL;
352  			err = -EIO;
353  			goto error;
354  		}
355  		m->rc.sdata = m->rreq->rc.sdata;
356  		memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
357  		m->rc.capacity = m->rc.size;
358  	}
359  
360  	/* packet is read in
361  	 * not an else because some packets (like clunk) have no payload
362  	 */
363  	if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
364  		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
365  		m->rreq->rc.size = m->rc.offset;
366  		spin_lock(&m->req_lock);
367  		if (m->rreq->status == REQ_STATUS_SENT) {
368  			list_del(&m->rreq->req_list);
369  			p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
370  		} else if (m->rreq->status == REQ_STATUS_FLSHD) {
371  			/* Ignore replies associated with a cancelled request. */
372  			p9_debug(P9_DEBUG_TRANS,
373  				 "Ignore replies associated with a cancelled request\n");
374  		} else {
375  			spin_unlock(&m->req_lock);
376  			p9_debug(P9_DEBUG_ERROR,
377  				 "Request tag %d errored out while we were reading the reply\n",
378  				 m->rc.tag);
379  			err = -EIO;
380  			goto error;
381  		}
382  		spin_unlock(&m->req_lock);
383  		m->rc.sdata = NULL;
384  		m->rc.offset = 0;
385  		m->rc.capacity = 0;
386  		p9_req_put(m->client, m->rreq);
387  		m->rreq = NULL;
388  	}
389  
390  end_clear:
391  	clear_bit(Rworksched, &m->wsched);
392  
393  	if (!list_empty(&m->req_list)) {
394  		if (test_and_clear_bit(Rpending, &m->wsched))
395  			n = EPOLLIN;
396  		else
397  			n = p9_fd_poll(m->client, NULL, NULL);
398  
399  		if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
400  			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
401  			schedule_work(&m->rq);
402  		}
403  	}
404  
405  	return;
406  error:
407  	p9_conn_cancel(m, err);
408  	clear_bit(Rworksched, &m->wsched);
409  }
410  
411  /**
412   * p9_fd_write - write to a socket
413   * @client: client instance
414   * @v: buffer to send data from
415   * @len: size of send buffer
416   *
417   */
418  
419  static int p9_fd_write(struct p9_client *client, void *v, int len)
420  {
421  	ssize_t ret;
422  	struct p9_trans_fd *ts = NULL;
423  
424  	if (client && client->status != Disconnected)
425  		ts = client->trans;
426  
427  	if (!ts)
428  		return -EREMOTEIO;
429  
430  	if (!(ts->wr->f_flags & O_NONBLOCK))
431  		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
432  
433  	ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
434  	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
435  		client->status = Disconnected;
436  	return ret;
437  }
438  
439  /**
440   * p9_write_work - called when a transport can send some data
441   * @work: container for work to be done
442   *
443   */
444  
445  static void p9_write_work(struct work_struct *work)
446  {
447  	__poll_t n;
448  	int err;
449  	struct p9_conn *m;
450  	struct p9_req_t *req;
451  
452  	m = container_of(work, struct p9_conn, wq);
453  
454  	if (m->err < 0) {
455  		clear_bit(Wworksched, &m->wsched);
456  		return;
457  	}
458  
459  	if (!m->wsize) {
460  		spin_lock(&m->req_lock);
461  		if (list_empty(&m->unsent_req_list)) {
462  			clear_bit(Wworksched, &m->wsched);
463  			spin_unlock(&m->req_lock);
464  			return;
465  		}
466  
467  		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
468  			       req_list);
469  		WRITE_ONCE(req->status, REQ_STATUS_SENT);
470  		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
471  		list_move_tail(&req->req_list, &m->req_list);
472  
473  		m->wbuf = req->tc.sdata;
474  		m->wsize = req->tc.size;
475  		m->wpos = 0;
476  		p9_req_get(req);
477  		m->wreq = req;
478  		spin_unlock(&m->req_lock);
479  	}
480  
481  	p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
482  		 m, m->wpos, m->wsize);
483  	clear_bit(Wpending, &m->wsched);
484  	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
485  	p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
486  	if (err == -EAGAIN)
487  		goto end_clear;
488  
489  
490  	if (err < 0)
491  		goto error;
492  	else if (err == 0) {
493  		err = -EREMOTEIO;
494  		goto error;
495  	}
496  
497  	m->wpos += err;
498  	if (m->wpos == m->wsize) {
499  		m->wpos = m->wsize = 0;
500  		p9_req_put(m->client, m->wreq);
501  		m->wreq = NULL;
502  	}
503  
504  end_clear:
505  	clear_bit(Wworksched, &m->wsched);
506  
507  	if (m->wsize || !list_empty(&m->unsent_req_list)) {
508  		if (test_and_clear_bit(Wpending, &m->wsched))
509  			n = EPOLLOUT;
510  		else
511  			n = p9_fd_poll(m->client, NULL, NULL);
512  
513  		if ((n & EPOLLOUT) &&
514  		   !test_and_set_bit(Wworksched, &m->wsched)) {
515  			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
516  			schedule_work(&m->wq);
517  		}
518  	}
519  
520  	return;
521  
522  error:
523  	p9_conn_cancel(m, err);
524  	clear_bit(Wworksched, &m->wsched);
525  }
526  
527  static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
528  {
529  	struct p9_poll_wait *pwait =
530  		container_of(wait, struct p9_poll_wait, wait);
531  	struct p9_conn *m = pwait->conn;
532  	unsigned long flags;
533  
534  	spin_lock_irqsave(&p9_poll_lock, flags);
535  	if (list_empty(&m->poll_pending_link))
536  		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
537  	spin_unlock_irqrestore(&p9_poll_lock, flags);
538  
539  	schedule_work(&p9_poll_work);
540  	return 1;
541  }
542  
543  /**
544   * p9_pollwait - add poll task to the wait queue
545   * @filp: file pointer being polled
546   * @wait_address: wait_q to block on
547   * @p: poll state
548   *
549   * called by files poll operation to add v9fs-poll task to files wait queue
550   */
551  
552  static void
553  p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
554  {
555  	struct p9_conn *m = container_of(p, struct p9_conn, pt);
556  	struct p9_poll_wait *pwait = NULL;
557  	int i;
558  
559  	for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) {
560  		if (m->poll_wait[i].wait_addr == NULL) {
561  			pwait = &m->poll_wait[i];
562  			break;
563  		}
564  	}
565  
566  	if (!pwait) {
567  		p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");
568  		return;
569  	}
570  
571  	pwait->conn = m;
572  	pwait->wait_addr = wait_address;
573  	init_waitqueue_func_entry(&pwait->wait, p9_pollwake);
574  	add_wait_queue(wait_address, &pwait->wait);
575  }
576  
577  /**
578   * p9_conn_create - initialize the per-session mux data
579   * @client: client instance
580   *
581   * Note: Creates the polling task if this is the first session.
582   */
583  
584  static void p9_conn_create(struct p9_client *client)
585  {
586  	__poll_t n;
587  	struct p9_trans_fd *ts = client->trans;
588  	struct p9_conn *m = &ts->conn;
589  
590  	p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
591  
592  	INIT_LIST_HEAD(&m->mux_list);
593  	m->client = client;
594  
595  	spin_lock_init(&m->req_lock);
596  	INIT_LIST_HEAD(&m->req_list);
597  	INIT_LIST_HEAD(&m->unsent_req_list);
598  	INIT_WORK(&m->rq, p9_read_work);
599  	INIT_WORK(&m->wq, p9_write_work);
600  	INIT_LIST_HEAD(&m->poll_pending_link);
601  	init_poll_funcptr(&m->pt, p9_pollwait);
602  
603  	n = p9_fd_poll(client, &m->pt, NULL);
604  	if (n & EPOLLIN) {
605  		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
606  		set_bit(Rpending, &m->wsched);
607  	}
608  
609  	if (n & EPOLLOUT) {
610  		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
611  		set_bit(Wpending, &m->wsched);
612  	}
613  }
614  
615  /**
616   * p9_poll_mux - polls a mux and schedules read or write works if necessary
617   * @m: connection to poll
618   *
619   */
620  
621  static void p9_poll_mux(struct p9_conn *m)
622  {
623  	__poll_t n;
624  	int err = -ECONNRESET;
625  
626  	if (m->err < 0)
627  		return;
628  
629  	n = p9_fd_poll(m->client, NULL, &err);
630  	if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
631  		p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
632  		p9_conn_cancel(m, err);
633  	}
634  
635  	if (n & EPOLLIN) {
636  		set_bit(Rpending, &m->wsched);
637  		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
638  		if (!test_and_set_bit(Rworksched, &m->wsched)) {
639  			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
640  			schedule_work(&m->rq);
641  		}
642  	}
643  
644  	if (n & EPOLLOUT) {
645  		set_bit(Wpending, &m->wsched);
646  		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
647  		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
648  		    !test_and_set_bit(Wworksched, &m->wsched)) {
649  			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
650  			schedule_work(&m->wq);
651  		}
652  	}
653  }
654  
655  /**
656   * p9_fd_request - send 9P request
657   * The function can sleep until the request is scheduled for sending.
658   * The function can be interrupted. Return from the function is not
659   * a guarantee that the request is sent successfully.
660   *
661   * @client: client instance
662   * @req: request to be sent
663   *
664   */
665  
666  static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
667  {
668  	__poll_t n;
669  	struct p9_trans_fd *ts = client->trans;
670  	struct p9_conn *m = &ts->conn;
671  
672  	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
673  		 m, current, &req->tc, req->tc.id);
674  	if (m->err < 0)
675  		return m->err;
676  
677  	spin_lock(&m->req_lock);
678  	WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
679  	list_add_tail(&req->req_list, &m->unsent_req_list);
680  	spin_unlock(&m->req_lock);
681  
682  	if (test_and_clear_bit(Wpending, &m->wsched))
683  		n = EPOLLOUT;
684  	else
685  		n = p9_fd_poll(m->client, NULL, NULL);
686  
687  	if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
688  		schedule_work(&m->wq);
689  
690  	return 0;
691  }
692  
693  static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
694  {
695  	struct p9_trans_fd *ts = client->trans;
696  	struct p9_conn *m = &ts->conn;
697  	int ret = 1;
698  
699  	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
700  
701  	spin_lock(&m->req_lock);
702  
703  	if (req->status == REQ_STATUS_UNSENT) {
704  		list_del(&req->req_list);
705  		WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
706  		p9_req_put(client, req);
707  		ret = 0;
708  	}
709  	spin_unlock(&m->req_lock);
710  
711  	return ret;
712  }
713  
714  static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
715  {
716  	struct p9_trans_fd *ts = client->trans;
717  	struct p9_conn *m = &ts->conn;
718  
719  	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
720  
721  	spin_lock(&m->req_lock);
722  	/* Ignore cancelled request if message has been received
723  	 * before lock.
724  	 */
725  	if (req->status == REQ_STATUS_RCVD) {
726  		spin_unlock(&m->req_lock);
727  		return 0;
728  	}
729  
730  	/* we haven't received a response for oldreq,
731  	 * remove it from the list.
732  	 */
733  	list_del(&req->req_list);
734  	WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
735  	spin_unlock(&m->req_lock);
736  
737  	p9_req_put(client, req);
738  
739  	return 0;
740  }
741  
742  static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
743  {
744  	if (clnt->trans_mod == &p9_tcp_trans) {
745  		if (clnt->trans_opts.tcp.port != P9_PORT)
746  			seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port);
747  	} else if (clnt->trans_mod == &p9_fd_trans) {
748  		if (clnt->trans_opts.fd.rfd != ~0)
749  			seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd);
750  		if (clnt->trans_opts.fd.wfd != ~0)
751  			seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd);
752  	}
753  	return 0;
754  }
755  
756  /**
757   * parse_opts - parse mount options into p9_fd_opts structure
758   * @params: options string passed from mount
759   * @opts: fd transport-specific structure to parse options into
760   *
761   * Returns 0 upon success, -ERRNO upon failure
762   */
763  
764  static int parse_opts(char *params, struct p9_fd_opts *opts)
765  {
766  	char *p;
767  	substring_t args[MAX_OPT_ARGS];
768  	int option;
769  	char *options, *tmp_options;
770  
771  	opts->port = P9_PORT;
772  	opts->rfd = ~0;
773  	opts->wfd = ~0;
774  	opts->privport = false;
775  
776  	if (!params)
777  		return 0;
778  
779  	tmp_options = kstrdup(params, GFP_KERNEL);
780  	if (!tmp_options) {
781  		p9_debug(P9_DEBUG_ERROR,
782  			 "failed to allocate copy of option string\n");
783  		return -ENOMEM;
784  	}
785  	options = tmp_options;
786  
787  	while ((p = strsep(&options, ",")) != NULL) {
788  		int token;
789  		int r;
790  		if (!*p)
791  			continue;
792  		token = match_token(p, tokens, args);
793  		if ((token != Opt_err) && (token != Opt_privport)) {
794  			r = match_int(&args[0], &option);
795  			if (r < 0) {
796  				p9_debug(P9_DEBUG_ERROR,
797  					 "integer field, but no integer?\n");
798  				continue;
799  			}
800  		}
801  		switch (token) {
802  		case Opt_port:
803  			opts->port = option;
804  			break;
805  		case Opt_rfdno:
806  			opts->rfd = option;
807  			break;
808  		case Opt_wfdno:
809  			opts->wfd = option;
810  			break;
811  		case Opt_privport:
812  			opts->privport = true;
813  			break;
814  		default:
815  			continue;
816  		}
817  	}
818  
819  	kfree(tmp_options);
820  	return 0;
821  }
822  
823  static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
824  {
825  	struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
826  					   GFP_KERNEL);
827  	if (!ts)
828  		return -ENOMEM;
829  
830  	ts->rd = fget(rfd);
831  	if (!ts->rd)
832  		goto out_free_ts;
833  	if (!(ts->rd->f_mode & FMODE_READ))
834  		goto out_put_rd;
835  	/* prevent workers from hanging on IO when fd is a pipe */
836  	ts->rd->f_flags |= O_NONBLOCK;
837  	ts->wr = fget(wfd);
838  	if (!ts->wr)
839  		goto out_put_rd;
840  	if (!(ts->wr->f_mode & FMODE_WRITE))
841  		goto out_put_wr;
842  	ts->wr->f_flags |= O_NONBLOCK;
843  
844  	client->trans = ts;
845  	client->status = Connected;
846  
847  	return 0;
848  
849  out_put_wr:
850  	fput(ts->wr);
851  out_put_rd:
852  	fput(ts->rd);
853  out_free_ts:
854  	kfree(ts);
855  	return -EIO;
856  }
857  
858  static int p9_socket_open(struct p9_client *client, struct socket *csocket)
859  {
860  	struct p9_trans_fd *p;
861  	struct file *file;
862  
863  	p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
864  	if (!p) {
865  		sock_release(csocket);
866  		return -ENOMEM;
867  	}
868  
869  	csocket->sk->sk_allocation = GFP_NOIO;
870  	csocket->sk->sk_use_task_frag = false;
871  	file = sock_alloc_file(csocket, 0, NULL);
872  	if (IS_ERR(file)) {
873  		pr_err("%s (%d): failed to map fd\n",
874  		       __func__, task_pid_nr(current));
875  		kfree(p);
876  		return PTR_ERR(file);
877  	}
878  
879  	get_file(file);
880  	p->wr = p->rd = file;
881  	client->trans = p;
882  	client->status = Connected;
883  
884  	p->rd->f_flags |= O_NONBLOCK;
885  
886  	p9_conn_create(client);
887  	return 0;
888  }
889  
890  /**
891   * p9_conn_destroy - cancels all pending requests of mux
892   * @m: mux to destroy
893   *
894   */
895  
896  static void p9_conn_destroy(struct p9_conn *m)
897  {
898  	p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n",
899  		 m, m->mux_list.prev, m->mux_list.next);
900  
901  	p9_mux_poll_stop(m);
902  	cancel_work_sync(&m->rq);
903  	if (m->rreq) {
904  		p9_req_put(m->client, m->rreq);
905  		m->rreq = NULL;
906  	}
907  	cancel_work_sync(&m->wq);
908  	if (m->wreq) {
909  		p9_req_put(m->client, m->wreq);
910  		m->wreq = NULL;
911  	}
912  
913  	p9_conn_cancel(m, -ECONNRESET);
914  
915  	m->client = NULL;
916  }
917  
918  /**
919   * p9_fd_close - shutdown file descriptor transport
920   * @client: client instance
921   *
922   */
923  
924  static void p9_fd_close(struct p9_client *client)
925  {
926  	struct p9_trans_fd *ts;
927  
928  	if (!client)
929  		return;
930  
931  	ts = client->trans;
932  	if (!ts)
933  		return;
934  
935  	client->status = Disconnected;
936  
937  	p9_conn_destroy(&ts->conn);
938  
939  	if (ts->rd)
940  		fput(ts->rd);
941  	if (ts->wr)
942  		fput(ts->wr);
943  
944  	kfree(ts);
945  }
946  
947  /*
948   * stolen from NFS - maybe should be made a generic function?
949   */
950  static inline int valid_ipaddr4(const char *buf)
951  {
952  	int rc, count, in[4];
953  
954  	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
955  	if (rc != 4)
956  		return -EINVAL;
957  	for (count = 0; count < 4; count++) {
958  		if (in[count] > 255)
959  			return -EINVAL;
960  	}
961  	return 0;
962  }
963  
964  static int p9_bind_privport(struct socket *sock)
965  {
966  	struct sockaddr_in cl;
967  	int port, err = -EINVAL;
968  
969  	memset(&cl, 0, sizeof(cl));
970  	cl.sin_family = AF_INET;
971  	cl.sin_addr.s_addr = htonl(INADDR_ANY);
972  	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
973  		cl.sin_port = htons((ushort)port);
974  		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
975  		if (err != -EADDRINUSE)
976  			break;
977  	}
978  	return err;
979  }
980  
981  
982  static int
983  p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
984  {
985  	int err;
986  	struct socket *csocket;
987  	struct sockaddr_in sin_server;
988  	struct p9_fd_opts opts;
989  
990  	err = parse_opts(args, &opts);
991  	if (err < 0)
992  		return err;
993  
994  	if (addr == NULL || valid_ipaddr4(addr) < 0)
995  		return -EINVAL;
996  
997  	csocket = NULL;
998  
999  	client->trans_opts.tcp.port = opts.port;
1000  	client->trans_opts.tcp.privport = opts.privport;
1001  	sin_server.sin_family = AF_INET;
1002  	sin_server.sin_addr.s_addr = in_aton(addr);
1003  	sin_server.sin_port = htons(opts.port);
1004  	err = __sock_create(current->nsproxy->net_ns, PF_INET,
1005  			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
1006  	if (err) {
1007  		pr_err("%s (%d): problem creating socket\n",
1008  		       __func__, task_pid_nr(current));
1009  		return err;
1010  	}
1011  
1012  	if (opts.privport) {
1013  		err = p9_bind_privport(csocket);
1014  		if (err < 0) {
1015  			pr_err("%s (%d): problem binding to privport\n",
1016  			       __func__, task_pid_nr(current));
1017  			sock_release(csocket);
1018  			return err;
1019  		}
1020  	}
1021  
1022  	err = READ_ONCE(csocket->ops)->connect(csocket,
1023  				    (struct sockaddr *)&sin_server,
1024  				    sizeof(struct sockaddr_in), 0);
1025  	if (err < 0) {
1026  		pr_err("%s (%d): problem connecting socket to %s\n",
1027  		       __func__, task_pid_nr(current), addr);
1028  		sock_release(csocket);
1029  		return err;
1030  	}
1031  
1032  	return p9_socket_open(client, csocket);
1033  }
1034  
1035  static int
1036  p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
1037  {
1038  	int err;
1039  	struct socket *csocket;
1040  	struct sockaddr_un sun_server;
1041  
1042  	csocket = NULL;
1043  
1044  	if (!addr || !strlen(addr))
1045  		return -EINVAL;
1046  
1047  	if (strlen(addr) >= UNIX_PATH_MAX) {
1048  		pr_err("%s (%d): address too long: %s\n",
1049  		       __func__, task_pid_nr(current), addr);
1050  		return -ENAMETOOLONG;
1051  	}
1052  
1053  	sun_server.sun_family = PF_UNIX;
1054  	strcpy(sun_server.sun_path, addr);
1055  	err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
1056  			    SOCK_STREAM, 0, &csocket, 1);
1057  	if (err < 0) {
1058  		pr_err("%s (%d): problem creating socket\n",
1059  		       __func__, task_pid_nr(current));
1060  
1061  		return err;
1062  	}
1063  	err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
1064  			sizeof(struct sockaddr_un) - 1, 0);
1065  	if (err < 0) {
1066  		pr_err("%s (%d): problem connecting socket: %s: %d\n",
1067  		       __func__, task_pid_nr(current), addr, err);
1068  		sock_release(csocket);
1069  		return err;
1070  	}
1071  
1072  	return p9_socket_open(client, csocket);
1073  }
1074  
1075  static int
1076  p9_fd_create(struct p9_client *client, const char *addr, char *args)
1077  {
1078  	int err;
1079  	struct p9_fd_opts opts;
1080  
1081  	err = parse_opts(args, &opts);
1082  	if (err < 0)
1083  		return err;
1084  	client->trans_opts.fd.rfd = opts.rfd;
1085  	client->trans_opts.fd.wfd = opts.wfd;
1086  
1087  	if (opts.rfd == ~0 || opts.wfd == ~0) {
1088  		pr_err("Insufficient options for proto=fd\n");
1089  		return -ENOPROTOOPT;
1090  	}
1091  
1092  	err = p9_fd_open(client, opts.rfd, opts.wfd);
1093  	if (err < 0)
1094  		return err;
1095  
1096  	p9_conn_create(client);
1097  
1098  	return 0;
1099  }
1100  
1101  static struct p9_trans_module p9_tcp_trans = {
1102  	.name = "tcp",
1103  	.maxsize = MAX_SOCK_BUF,
1104  	.pooled_rbuffers = false,
1105  	.def = 0,
1106  	.create = p9_fd_create_tcp,
1107  	.close = p9_fd_close,
1108  	.request = p9_fd_request,
1109  	.cancel = p9_fd_cancel,
1110  	.cancelled = p9_fd_cancelled,
1111  	.show_options = p9_fd_show_options,
1112  	.owner = THIS_MODULE,
1113  };
1114  MODULE_ALIAS_9P("tcp");
1115  
1116  static struct p9_trans_module p9_unix_trans = {
1117  	.name = "unix",
1118  	.maxsize = MAX_SOCK_BUF,
1119  	.def = 0,
1120  	.create = p9_fd_create_unix,
1121  	.close = p9_fd_close,
1122  	.request = p9_fd_request,
1123  	.cancel = p9_fd_cancel,
1124  	.cancelled = p9_fd_cancelled,
1125  	.show_options = p9_fd_show_options,
1126  	.owner = THIS_MODULE,
1127  };
1128  MODULE_ALIAS_9P("unix");
1129  
1130  static struct p9_trans_module p9_fd_trans = {
1131  	.name = "fd",
1132  	.maxsize = MAX_SOCK_BUF,
1133  	.def = 0,
1134  	.create = p9_fd_create,
1135  	.close = p9_fd_close,
1136  	.request = p9_fd_request,
1137  	.cancel = p9_fd_cancel,
1138  	.cancelled = p9_fd_cancelled,
1139  	.show_options = p9_fd_show_options,
1140  	.owner = THIS_MODULE,
1141  };
1142  MODULE_ALIAS_9P("fd");
1143  
1144  /**
1145   * p9_poll_workfn - poll worker thread
1146   * @work: work queue
1147   *
1148   * polls all v9fs transports for new events and queues the appropriate
1149   * work to the work queue
1150   *
1151   */
1152  
1153  static void p9_poll_workfn(struct work_struct *work)
1154  {
1155  	unsigned long flags;
1156  
1157  	p9_debug(P9_DEBUG_TRANS, "start %p\n", current);
1158  
1159  	spin_lock_irqsave(&p9_poll_lock, flags);
1160  	while (!list_empty(&p9_poll_pending_list)) {
1161  		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
1162  							struct p9_conn,
1163  							poll_pending_link);
1164  		list_del_init(&conn->poll_pending_link);
1165  		spin_unlock_irqrestore(&p9_poll_lock, flags);
1166  
1167  		p9_poll_mux(conn);
1168  
1169  		spin_lock_irqsave(&p9_poll_lock, flags);
1170  	}
1171  	spin_unlock_irqrestore(&p9_poll_lock, flags);
1172  
1173  	p9_debug(P9_DEBUG_TRANS, "finish\n");
1174  }
1175  
1176  static int __init p9_trans_fd_init(void)
1177  {
1178  	v9fs_register_trans(&p9_tcp_trans);
1179  	v9fs_register_trans(&p9_unix_trans);
1180  	v9fs_register_trans(&p9_fd_trans);
1181  
1182  	return 0;
1183  }
1184  
1185  static void __exit p9_trans_fd_exit(void)
1186  {
1187  	flush_work(&p9_poll_work);
1188  	v9fs_unregister_trans(&p9_tcp_trans);
1189  	v9fs_unregister_trans(&p9_unix_trans);
1190  	v9fs_unregister_trans(&p9_fd_trans);
1191  }
1192  
1193  module_init(p9_trans_fd_init);
1194  module_exit(p9_trans_fd_exit);
1195  
1196  MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
1197  MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
1198  MODULE_LICENSE("GPL");
1199