1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/drivers/misc/xillybus_core.c
4  *
5  * Copyright 2011 Xillybus Ltd, http://xillybus.com
6  *
7  * Driver for the Xillybus FPGA/host framework.
8  *
9  * This driver interfaces with a special IP core in an FPGA, setting up
10  * a pipe between a hardware FIFO in the programmable logic and a device
11  * file in the host. The number of such pipes and their attributes are
12  * set up on the logic. This driver detects these automatically and
13  * creates the device files accordingly.
14  */
15 
16 #include <linux/list.h>
17 #include <linux/device.h>
18 #include <linux/module.h>
19 #include <linux/io.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/interrupt.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/spinlock.h>
25 #include <linux/mutex.h>
26 #include <linux/crc32.h>
27 #include <linux/poll.h>
28 #include <linux/delay.h>
29 #include <linux/slab.h>
30 #include <linux/workqueue.h>
31 #include "xillybus.h"
32 #include "xillybus_class.h"
33 
34 MODULE_DESCRIPTION("Xillybus core functions");
35 MODULE_AUTHOR("Eli Billauer, Xillybus Ltd.");
36 MODULE_ALIAS("xillybus_core");
37 MODULE_LICENSE("GPL v2");
38 
39 /* General timeout is 100 ms, rx timeout is 10 ms */
40 #define XILLY_RX_TIMEOUT (10*HZ/1000)
41 #define XILLY_TIMEOUT (100*HZ/1000)
42 
43 #define fpga_msg_ctrl_reg              0x0008
44 #define fpga_dma_control_reg           0x0020
45 #define fpga_dma_bufno_reg             0x0024
46 #define fpga_dma_bufaddr_lowaddr_reg   0x0028
47 #define fpga_dma_bufaddr_highaddr_reg  0x002c
48 #define fpga_buf_ctrl_reg              0x0030
49 #define fpga_buf_offset_reg            0x0034
50 #define fpga_endian_reg                0x0040
51 
52 #define XILLYMSG_OPCODE_RELEASEBUF 1
53 #define XILLYMSG_OPCODE_QUIESCEACK 2
54 #define XILLYMSG_OPCODE_FIFOEOF 3
55 #define XILLYMSG_OPCODE_FATAL_ERROR 4
56 #define XILLYMSG_OPCODE_NONEMPTY 5
57 
58 static const char xillyname[] = "xillybus";
59 
60 static struct workqueue_struct *xillybus_wq;
61 
62 /*
63  * Locking scheme: Mutexes protect invocations of character device methods.
64  * If both locks are taken, wr_mutex is taken first, rd_mutex second.
65  *
66  * wr_spinlock protects wr_*_buf_idx, wr_empty, wr_sleepy, wr_ready and the
67  * buffers' end_offset fields against changes made by IRQ handler (and in
68  * theory, other file request handlers, but the mutex handles that). Nothing
69  * else.
70  * They are held for short direct memory manipulations. Needless to say,
71  * no mutex locking is allowed when a spinlock is held.
72  *
73  * rd_spinlock does the same with rd_*_buf_idx, rd_empty and end_offset.
74  *
75  * register_mutex is endpoint-specific, and is held when non-atomic
76  * register operations are performed. wr_mutex and rd_mutex may be
77  * held when register_mutex is taken, but none of the spinlocks. Note that
78  * register_mutex doesn't protect against sporadic buf_ctrl_reg writes
79  * which are unrelated to buf_offset_reg, since they are harmless.
80  *
81  * Blocking on the wait queues is allowed with mutexes held, but not with
82  * spinlocks.
83  *
84  * Only interruptible blocking is allowed on mutexes and wait queues.
85  *
86  * All in all, the locking order goes (with skips allowed, of course):
87  * wr_mutex -> rd_mutex -> register_mutex -> wr_spinlock -> rd_spinlock
88  */
89 
90 static void malformed_message(struct xilly_endpoint *endpoint, u32 *buf)
91 {
92 	int opcode;
93 	int msg_channel, msg_bufno, msg_data, msg_dir;
94 
95 	opcode = (buf[0] >> 24) & 0xff;
96 	msg_dir = buf[0] & 1;
97 	msg_channel = (buf[0] >> 1) & 0x7ff;
98 	msg_bufno = (buf[0] >> 12) & 0x3ff;
99 	msg_data = buf[1] & 0xfffffff;
100 
101 	dev_warn(endpoint->dev,
102 		 "Malformed message (skipping): opcode=%d, channel=%03x, dir=%d, bufno=%03x, data=%07x\n",
103 		 opcode, msg_channel, msg_dir, msg_bufno, msg_data);
104 }
105 
106 /*
107  * xillybus_isr assumes the interrupt is allocated exclusively to it,
108  * which is the natural case MSI and several other hardware-oriented
109  * interrupts. Sharing is not allowed.
110  */
111 
112 irqreturn_t xillybus_isr(int irq, void *data)
113 {
114 	struct xilly_endpoint *ep = data;
115 	u32 *buf;
116 	unsigned int buf_size;
117 	int i;
118 	int opcode;
119 	unsigned int msg_channel, msg_bufno, msg_data, msg_dir;
120 	struct xilly_channel *channel;
121 
122 	buf = ep->msgbuf_addr;
123 	buf_size = ep->msg_buf_size/sizeof(u32);
124 
125 	ep->ephw->hw_sync_sgl_for_cpu(ep,
126 				      ep->msgbuf_dma_addr,
127 				      ep->msg_buf_size,
128 				      DMA_FROM_DEVICE);
129 
130 	for (i = 0; i < buf_size; i += 2) {
131 		if (((buf[i+1] >> 28) & 0xf) != ep->msg_counter) {
132 			malformed_message(ep, &buf[i]);
133 			dev_warn(ep->dev,
134 				 "Sending a NACK on counter %x (instead of %x) on entry %d\n",
135 				 ((buf[i+1] >> 28) & 0xf),
136 				 ep->msg_counter,
137 				 i/2);
138 
139 			if (++ep->failed_messages > 10) {
140 				dev_err(ep->dev,
141 					"Lost sync with interrupt messages. Stopping.\n");
142 			} else {
143 				ep->ephw->hw_sync_sgl_for_device(
144 					ep,
145 					ep->msgbuf_dma_addr,
146 					ep->msg_buf_size,
147 					DMA_FROM_DEVICE);
148 
149 				iowrite32(0x01,  /* Message NACK */
150 					  ep->registers + fpga_msg_ctrl_reg);
151 			}
152 			return IRQ_HANDLED;
153 		} else if (buf[i] & (1 << 22)) /* Last message */
154 			break;
155 	}
156 
157 	if (i >= buf_size) {
158 		dev_err(ep->dev, "Bad interrupt message. Stopping.\n");
159 		return IRQ_HANDLED;
160 	}
161 
162 	buf_size = i + 2;
163 
164 	for (i = 0; i < buf_size; i += 2) { /* Scan through messages */
165 		opcode = (buf[i] >> 24) & 0xff;
166 
167 		msg_dir = buf[i] & 1;
168 		msg_channel = (buf[i] >> 1) & 0x7ff;
169 		msg_bufno = (buf[i] >> 12) & 0x3ff;
170 		msg_data = buf[i+1] & 0xfffffff;
171 
172 		switch (opcode) {
173 		case XILLYMSG_OPCODE_RELEASEBUF:
174 			if ((msg_channel > ep->num_channels) ||
175 			    (msg_channel == 0)) {
176 				malformed_message(ep, &buf[i]);
177 				break;
178 			}
179 
180 			channel = ep->channels[msg_channel];
181 
182 			if (msg_dir) { /* Write channel */
183 				if (msg_bufno >= channel->num_wr_buffers) {
184 					malformed_message(ep, &buf[i]);
185 					break;
186 				}
187 				spin_lock(&channel->wr_spinlock);
188 				channel->wr_buffers[msg_bufno]->end_offset =
189 					msg_data;
190 				channel->wr_fpga_buf_idx = msg_bufno;
191 				channel->wr_empty = 0;
192 				channel->wr_sleepy = 0;
193 				spin_unlock(&channel->wr_spinlock);
194 
195 				wake_up_interruptible(&channel->wr_wait);
196 
197 			} else {
198 				/* Read channel */
199 
200 				if (msg_bufno >= channel->num_rd_buffers) {
201 					malformed_message(ep, &buf[i]);
202 					break;
203 				}
204 
205 				spin_lock(&channel->rd_spinlock);
206 				channel->rd_fpga_buf_idx = msg_bufno;
207 				channel->rd_full = 0;
208 				spin_unlock(&channel->rd_spinlock);
209 
210 				wake_up_interruptible(&channel->rd_wait);
211 				if (!channel->rd_synchronous)
212 					queue_delayed_work(
213 						xillybus_wq,
214 						&channel->rd_workitem,
215 						XILLY_RX_TIMEOUT);
216 			}
217 
218 			break;
219 		case XILLYMSG_OPCODE_NONEMPTY:
220 			if ((msg_channel > ep->num_channels) ||
221 			    (msg_channel == 0) || (!msg_dir) ||
222 			    !ep->channels[msg_channel]->wr_supports_nonempty) {
223 				malformed_message(ep, &buf[i]);
224 				break;
225 			}
226 
227 			channel = ep->channels[msg_channel];
228 
229 			if (msg_bufno >= channel->num_wr_buffers) {
230 				malformed_message(ep, &buf[i]);
231 				break;
232 			}
233 			spin_lock(&channel->wr_spinlock);
234 			if (msg_bufno == channel->wr_host_buf_idx)
235 				channel->wr_ready = 1;
236 			spin_unlock(&channel->wr_spinlock);
237 
238 			wake_up_interruptible(&channel->wr_ready_wait);
239 
240 			break;
241 		case XILLYMSG_OPCODE_QUIESCEACK:
242 			ep->idtlen = msg_data;
243 			wake_up_interruptible(&ep->ep_wait);
244 
245 			break;
246 		case XILLYMSG_OPCODE_FIFOEOF:
247 			if ((msg_channel > ep->num_channels) ||
248 			    (msg_channel == 0) || (!msg_dir) ||
249 			    !ep->channels[msg_channel]->num_wr_buffers) {
250 				malformed_message(ep, &buf[i]);
251 				break;
252 			}
253 			channel = ep->channels[msg_channel];
254 			spin_lock(&channel->wr_spinlock);
255 			channel->wr_eof = msg_bufno;
256 			channel->wr_sleepy = 0;
257 
258 			channel->wr_hangup = channel->wr_empty &&
259 				(channel->wr_host_buf_idx == msg_bufno);
260 
261 			spin_unlock(&channel->wr_spinlock);
262 
263 			wake_up_interruptible(&channel->wr_wait);
264 
265 			break;
266 		case XILLYMSG_OPCODE_FATAL_ERROR:
267 			ep->fatal_error = 1;
268 			wake_up_interruptible(&ep->ep_wait); /* For select() */
269 			dev_err(ep->dev,
270 				"FPGA reported a fatal error. This means that the low-level communication with the device has failed. This hardware problem is most likely unrelated to Xillybus (neither kernel module nor FPGA core), but reports are still welcome. All I/O is aborted.\n");
271 			break;
272 		default:
273 			malformed_message(ep, &buf[i]);
274 			break;
275 		}
276 	}
277 
278 	ep->ephw->hw_sync_sgl_for_device(ep,
279 					 ep->msgbuf_dma_addr,
280 					 ep->msg_buf_size,
281 					 DMA_FROM_DEVICE);
282 
283 	ep->msg_counter = (ep->msg_counter + 1) & 0xf;
284 	ep->failed_messages = 0;
285 	iowrite32(0x03, ep->registers + fpga_msg_ctrl_reg); /* Message ACK */
286 
287 	return IRQ_HANDLED;
288 }
289 EXPORT_SYMBOL(xillybus_isr);
290 
291 /*
292  * A few trivial memory management functions.
293  * NOTE: These functions are used only on probe and remove, and therefore
294  * no locks are applied!
295  */
296 
297 static void xillybus_autoflush(struct work_struct *work);
298 
299 struct xilly_alloc_state {
300 	void *salami;
301 	int left_of_salami;
302 	int nbuffer;
303 	enum dma_data_direction direction;
304 	u32 regdirection;
305 };
306 
307 static int xilly_get_dma_buffers(struct xilly_endpoint *ep,
308 				 struct xilly_alloc_state *s,
309 				 struct xilly_buffer **buffers,
310 				 int bufnum, int bytebufsize)
311 {
312 	int i, rc;
313 	dma_addr_t dma_addr;
314 	struct device *dev = ep->dev;
315 	struct xilly_buffer *this_buffer = NULL; /* Init to silence warning */
316 
317 	if (buffers) { /* Not the message buffer */
318 		this_buffer = devm_kcalloc(dev, bufnum,
319 					   sizeof(struct xilly_buffer),
320 					   GFP_KERNEL);
321 		if (!this_buffer)
322 			return -ENOMEM;
323 	}
324 
325 	for (i = 0; i < bufnum; i++) {
326 		/*
327 		 * Buffers are expected in descending size order, so there
328 		 * is either enough space for this buffer or none at all.
329 		 */
330 
331 		if ((s->left_of_salami < bytebufsize) &&
332 		    (s->left_of_salami > 0)) {
333 			dev_err(ep->dev,
334 				"Corrupt buffer allocation in IDT. Aborting.\n");
335 			return -ENODEV;
336 		}
337 
338 		if (s->left_of_salami == 0) {
339 			int allocorder, allocsize;
340 
341 			allocsize = PAGE_SIZE;
342 			allocorder = 0;
343 			while (bytebufsize > allocsize) {
344 				allocsize *= 2;
345 				allocorder++;
346 			}
347 
348 			s->salami = (void *) devm_get_free_pages(
349 				dev,
350 				GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO,
351 				allocorder);
352 			if (!s->salami)
353 				return -ENOMEM;
354 
355 			s->left_of_salami = allocsize;
356 		}
357 
358 		rc = ep->ephw->map_single(ep, s->salami,
359 					  bytebufsize, s->direction,
360 					  &dma_addr);
361 		if (rc)
362 			return rc;
363 
364 		iowrite32((u32) (dma_addr & 0xffffffff),
365 			  ep->registers + fpga_dma_bufaddr_lowaddr_reg);
366 		iowrite32(((u32) ((((u64) dma_addr) >> 32) & 0xffffffff)),
367 			  ep->registers + fpga_dma_bufaddr_highaddr_reg);
368 
369 		if (buffers) { /* Not the message buffer */
370 			this_buffer->addr = s->salami;
371 			this_buffer->dma_addr = dma_addr;
372 			buffers[i] = this_buffer++;
373 
374 			iowrite32(s->regdirection | s->nbuffer++,
375 				  ep->registers + fpga_dma_bufno_reg);
376 		} else {
377 			ep->msgbuf_addr = s->salami;
378 			ep->msgbuf_dma_addr = dma_addr;
379 			ep->msg_buf_size = bytebufsize;
380 
381 			iowrite32(s->regdirection,
382 				  ep->registers + fpga_dma_bufno_reg);
383 		}
384 
385 		s->left_of_salami -= bytebufsize;
386 		s->salami += bytebufsize;
387 	}
388 	return 0;
389 }
390 
391 static int xilly_setupchannels(struct xilly_endpoint *ep,
392 			       unsigned char *chandesc,
393 			       int entries)
394 {
395 	struct device *dev = ep->dev;
396 	int i, entry, rc;
397 	struct xilly_channel *channel;
398 	int channelnum, bufnum, bufsize, format, is_writebuf;
399 	int bytebufsize;
400 	int synchronous, allowpartial, exclusive_open, seekable;
401 	int supports_nonempty;
402 	int msg_buf_done = 0;
403 
404 	struct xilly_alloc_state rd_alloc = {
405 		.salami = NULL,
406 		.left_of_salami = 0,
407 		.nbuffer = 1,
408 		.direction = DMA_TO_DEVICE,
409 		.regdirection = 0,
410 	};
411 
412 	struct xilly_alloc_state wr_alloc = {
413 		.salami = NULL,
414 		.left_of_salami = 0,
415 		.nbuffer = 1,
416 		.direction = DMA_FROM_DEVICE,
417 		.regdirection = 0x80000000,
418 	};
419 
420 	channel = devm_kcalloc(dev, ep->num_channels,
421 			       sizeof(struct xilly_channel), GFP_KERNEL);
422 	if (!channel)
423 		return -ENOMEM;
424 
425 	ep->channels = devm_kcalloc(dev, ep->num_channels + 1,
426 				    sizeof(struct xilly_channel *),
427 				    GFP_KERNEL);
428 	if (!ep->channels)
429 		return -ENOMEM;
430 
431 	ep->channels[0] = NULL; /* Channel 0 is message buf. */
432 
433 	/* Initialize all channels with defaults */
434 
435 	for (i = 1; i <= ep->num_channels; i++) {
436 		channel->wr_buffers = NULL;
437 		channel->rd_buffers = NULL;
438 		channel->num_wr_buffers = 0;
439 		channel->num_rd_buffers = 0;
440 		channel->wr_fpga_buf_idx = -1;
441 		channel->wr_host_buf_idx = 0;
442 		channel->wr_host_buf_pos = 0;
443 		channel->wr_empty = 1;
444 		channel->wr_ready = 0;
445 		channel->wr_sleepy = 1;
446 		channel->rd_fpga_buf_idx = 0;
447 		channel->rd_host_buf_idx = 0;
448 		channel->rd_host_buf_pos = 0;
449 		channel->rd_full = 0;
450 		channel->wr_ref_count = 0;
451 		channel->rd_ref_count = 0;
452 
453 		spin_lock_init(&channel->wr_spinlock);
454 		spin_lock_init(&channel->rd_spinlock);
455 		mutex_init(&channel->wr_mutex);
456 		mutex_init(&channel->rd_mutex);
457 		init_waitqueue_head(&channel->rd_wait);
458 		init_waitqueue_head(&channel->wr_wait);
459 		init_waitqueue_head(&channel->wr_ready_wait);
460 
461 		INIT_DELAYED_WORK(&channel->rd_workitem, xillybus_autoflush);
462 
463 		channel->endpoint = ep;
464 		channel->chan_num = i;
465 
466 		channel->log2_element_size = 0;
467 
468 		ep->channels[i] = channel++;
469 	}
470 
471 	for (entry = 0; entry < entries; entry++, chandesc += 4) {
472 		struct xilly_buffer **buffers = NULL;
473 
474 		is_writebuf = chandesc[0] & 0x01;
475 		channelnum = (chandesc[0] >> 1) | ((chandesc[1] & 0x0f) << 7);
476 		format = (chandesc[1] >> 4) & 0x03;
477 		allowpartial = (chandesc[1] >> 6) & 0x01;
478 		synchronous = (chandesc[1] >> 7) & 0x01;
479 		bufsize = 1 << (chandesc[2] & 0x1f);
480 		bufnum = 1 << (chandesc[3] & 0x0f);
481 		exclusive_open = (chandesc[2] >> 7) & 0x01;
482 		seekable = (chandesc[2] >> 6) & 0x01;
483 		supports_nonempty = (chandesc[2] >> 5) & 0x01;
484 
485 		if ((channelnum > ep->num_channels) ||
486 		    ((channelnum == 0) && !is_writebuf)) {
487 			dev_err(ep->dev,
488 				"IDT requests channel out of range. Aborting.\n");
489 			return -ENODEV;
490 		}
491 
492 		channel = ep->channels[channelnum]; /* NULL for msg channel */
493 
494 		if (!is_writebuf || channelnum > 0) {
495 			channel->log2_element_size = ((format > 2) ?
496 						      2 : format);
497 
498 			bytebufsize = bufsize *
499 				(1 << channel->log2_element_size);
500 
501 			buffers = devm_kcalloc(dev, bufnum,
502 					       sizeof(struct xilly_buffer *),
503 					       GFP_KERNEL);
504 			if (!buffers)
505 				return -ENOMEM;
506 		} else {
507 			bytebufsize = bufsize << 2;
508 		}
509 
510 		if (!is_writebuf) {
511 			channel->num_rd_buffers = bufnum;
512 			channel->rd_buf_size = bytebufsize;
513 			channel->rd_allow_partial = allowpartial;
514 			channel->rd_synchronous = synchronous;
515 			channel->rd_exclusive_open = exclusive_open;
516 			channel->seekable = seekable;
517 
518 			channel->rd_buffers = buffers;
519 			rc = xilly_get_dma_buffers(ep, &rd_alloc, buffers,
520 						   bufnum, bytebufsize);
521 		} else if (channelnum > 0) {
522 			channel->num_wr_buffers = bufnum;
523 			channel->wr_buf_size = bytebufsize;
524 
525 			channel->seekable = seekable;
526 			channel->wr_supports_nonempty = supports_nonempty;
527 
528 			channel->wr_allow_partial = allowpartial;
529 			channel->wr_synchronous = synchronous;
530 			channel->wr_exclusive_open = exclusive_open;
531 
532 			channel->wr_buffers = buffers;
533 			rc = xilly_get_dma_buffers(ep, &wr_alloc, buffers,
534 						   bufnum, bytebufsize);
535 		} else {
536 			rc = xilly_get_dma_buffers(ep, &wr_alloc, NULL,
537 						   bufnum, bytebufsize);
538 			msg_buf_done++;
539 		}
540 
541 		if (rc)
542 			return -ENOMEM;
543 	}
544 
545 	if (!msg_buf_done) {
546 		dev_err(ep->dev,
547 			"Corrupt IDT: No message buffer. Aborting.\n");
548 		return -ENODEV;
549 	}
550 	return 0;
551 }
552 
553 static int xilly_scan_idt(struct xilly_endpoint *endpoint,
554 			  struct xilly_idt_handle *idt_handle)
555 {
556 	int count = 0;
557 	unsigned char *idt = endpoint->channels[1]->wr_buffers[0]->addr;
558 	unsigned char *end_of_idt = idt + endpoint->idtlen - 4;
559 	unsigned char *scan;
560 	int len;
561 
562 	scan = idt + 1;
563 	idt_handle->names = scan;
564 
565 	while ((scan <= end_of_idt) && *scan) {
566 		while ((scan <= end_of_idt) && *scan++)
567 			/* Do nothing, just scan thru string */;
568 		count++;
569 	}
570 
571 	idt_handle->names_len = scan - idt_handle->names;
572 
573 	scan++;
574 
575 	if (scan > end_of_idt) {
576 		dev_err(endpoint->dev,
577 			"IDT device name list overflow. Aborting.\n");
578 		return -ENODEV;
579 	}
580 	idt_handle->chandesc = scan;
581 
582 	len = endpoint->idtlen - (3 + ((int) (scan - idt)));
583 
584 	if (len & 0x03) {
585 		dev_err(endpoint->dev,
586 			"Corrupt IDT device name list. Aborting.\n");
587 		return -ENODEV;
588 	}
589 
590 	idt_handle->entries = len >> 2;
591 	endpoint->num_channels = count;
592 
593 	return 0;
594 }
595 
596 static int xilly_obtain_idt(struct xilly_endpoint *endpoint)
597 {
598 	struct xilly_channel *channel;
599 	unsigned char *version;
600 	long t;
601 
602 	channel = endpoint->channels[1]; /* This should be generated ad-hoc */
603 
604 	channel->wr_sleepy = 1;
605 
606 	iowrite32(1 |
607 		  (3 << 24), /* Opcode 3 for channel 0 = Send IDT */
608 		  endpoint->registers + fpga_buf_ctrl_reg);
609 
610 	t = wait_event_interruptible_timeout(channel->wr_wait,
611 					     (!channel->wr_sleepy),
612 					     XILLY_TIMEOUT);
613 
614 	if (t <= 0) {
615 		dev_err(endpoint->dev, "Failed to obtain IDT. Aborting.\n");
616 
617 		if (endpoint->fatal_error)
618 			return -EIO;
619 
620 		return -ENODEV;
621 	}
622 
623 	endpoint->ephw->hw_sync_sgl_for_cpu(
624 		channel->endpoint,
625 		channel->wr_buffers[0]->dma_addr,
626 		channel->wr_buf_size,
627 		DMA_FROM_DEVICE);
628 
629 	if (channel->wr_buffers[0]->end_offset != endpoint->idtlen) {
630 		dev_err(endpoint->dev,
631 			"IDT length mismatch (%d != %d). Aborting.\n",
632 			channel->wr_buffers[0]->end_offset, endpoint->idtlen);
633 		return -ENODEV;
634 	}
635 
636 	if (crc32_le(~0, channel->wr_buffers[0]->addr,
637 		     endpoint->idtlen+1) != 0) {
638 		dev_err(endpoint->dev, "IDT failed CRC check. Aborting.\n");
639 		return -ENODEV;
640 	}
641 
642 	version = channel->wr_buffers[0]->addr;
643 
644 	/* Check version number. Reject anything above 0x82. */
645 	if (*version > 0x82) {
646 		dev_err(endpoint->dev,
647 			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
648 			*version);
649 		return -ENODEV;
650 	}
651 
652 	return 0;
653 }
654 
655 static ssize_t xillybus_read(struct file *filp, char __user *userbuf,
656 			     size_t count, loff_t *f_pos)
657 {
658 	ssize_t rc;
659 	unsigned long flags;
660 	int bytes_done = 0;
661 	int no_time_left = 0;
662 	long deadline, left_to_sleep;
663 	struct xilly_channel *channel = filp->private_data;
664 
665 	int empty, reached_eof, exhausted, ready;
666 	/* Initializations are there only to silence warnings */
667 
668 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
669 	int waiting_bufidx;
670 
671 	if (channel->endpoint->fatal_error)
672 		return -EIO;
673 
674 	deadline = jiffies + 1 + XILLY_RX_TIMEOUT;
675 
676 	rc = mutex_lock_interruptible(&channel->wr_mutex);
677 	if (rc)
678 		return rc;
679 
680 	while (1) { /* Note that we may drop mutex within this loop */
681 		int bytes_to_do = count - bytes_done;
682 
683 		spin_lock_irqsave(&channel->wr_spinlock, flags);
684 
685 		empty = channel->wr_empty;
686 		ready = !empty || channel->wr_ready;
687 
688 		if (!empty) {
689 			bufidx = channel->wr_host_buf_idx;
690 			bufpos = channel->wr_host_buf_pos;
691 			howmany = ((channel->wr_buffers[bufidx]->end_offset
692 				    + 1) << channel->log2_element_size)
693 				- bufpos;
694 
695 			/* Update wr_host_* to its post-operation state */
696 			if (howmany > bytes_to_do) {
697 				bufferdone = 0;
698 
699 				howmany = bytes_to_do;
700 				channel->wr_host_buf_pos += howmany;
701 			} else {
702 				bufferdone = 1;
703 
704 				channel->wr_host_buf_pos = 0;
705 
706 				if (bufidx == channel->wr_fpga_buf_idx) {
707 					channel->wr_empty = 1;
708 					channel->wr_sleepy = 1;
709 					channel->wr_ready = 0;
710 				}
711 
712 				if (bufidx >= (channel->num_wr_buffers - 1))
713 					channel->wr_host_buf_idx = 0;
714 				else
715 					channel->wr_host_buf_idx++;
716 			}
717 		}
718 
719 		/*
720 		 * Marking our situation after the possible changes above,
721 		 * for use after releasing the spinlock.
722 		 *
723 		 * empty = empty before change
724 		 * exhasted = empty after possible change
725 		 */
726 
727 		reached_eof = channel->wr_empty &&
728 			(channel->wr_host_buf_idx == channel->wr_eof);
729 		channel->wr_hangup = reached_eof;
730 		exhausted = channel->wr_empty;
731 		waiting_bufidx = channel->wr_host_buf_idx;
732 
733 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
734 
735 		if (!empty) { /* Go on, now without the spinlock */
736 
737 			if (bufpos == 0) /* Position zero means it's virgin */
738 				channel->endpoint->ephw->hw_sync_sgl_for_cpu(
739 					channel->endpoint,
740 					channel->wr_buffers[bufidx]->dma_addr,
741 					channel->wr_buf_size,
742 					DMA_FROM_DEVICE);
743 
744 			if (copy_to_user(
745 				    userbuf,
746 				    channel->wr_buffers[bufidx]->addr
747 				    + bufpos, howmany))
748 				rc = -EFAULT;
749 
750 			userbuf += howmany;
751 			bytes_done += howmany;
752 
753 			if (bufferdone) {
754 				channel->endpoint->ephw->hw_sync_sgl_for_device(
755 					channel->endpoint,
756 					channel->wr_buffers[bufidx]->dma_addr,
757 					channel->wr_buf_size,
758 					DMA_FROM_DEVICE);
759 
760 				/*
761 				 * Tell FPGA the buffer is done with. It's an
762 				 * atomic operation to the FPGA, so what
763 				 * happens with other channels doesn't matter,
764 				 * and the certain channel is protected with
765 				 * the channel-specific mutex.
766 				 */
767 
768 				iowrite32(1 | (channel->chan_num << 1) |
769 					  (bufidx << 12),
770 					  channel->endpoint->registers +
771 					  fpga_buf_ctrl_reg);
772 			}
773 
774 			if (rc) {
775 				mutex_unlock(&channel->wr_mutex);
776 				return rc;
777 			}
778 		}
779 
780 		/* This includes a zero-count return = EOF */
781 		if ((bytes_done >= count) || reached_eof)
782 			break;
783 
784 		if (!exhausted)
785 			continue; /* More in RAM buffer(s)? Just go on. */
786 
787 		if ((bytes_done > 0) &&
788 		    (no_time_left ||
789 		     (channel->wr_synchronous && channel->wr_allow_partial)))
790 			break;
791 
792 		/*
793 		 * Nonblocking read: The "ready" flag tells us that the FPGA
794 		 * has data to send. In non-blocking mode, if it isn't on,
795 		 * just return. But if there is, we jump directly to the point
796 		 * where we ask for the FPGA to send all it has, and wait
797 		 * until that data arrives. So in a sense, we *do* block in
798 		 * nonblocking mode, but only for a very short time.
799 		 */
800 
801 		if (!no_time_left && (filp->f_flags & O_NONBLOCK)) {
802 			if (bytes_done > 0)
803 				break;
804 
805 			if (ready)
806 				goto desperate;
807 
808 			rc = -EAGAIN;
809 			break;
810 		}
811 
812 		if (!no_time_left || (bytes_done > 0)) {
813 			/*
814 			 * Note that in case of an element-misaligned read
815 			 * request, offsetlimit will include the last element,
816 			 * which will be partially read from.
817 			 */
818 			int offsetlimit = ((count - bytes_done) - 1) >>
819 				channel->log2_element_size;
820 			int buf_elements = channel->wr_buf_size >>
821 				channel->log2_element_size;
822 
823 			/*
824 			 * In synchronous mode, always send an offset limit.
825 			 * Just don't send a value too big.
826 			 */
827 
828 			if (channel->wr_synchronous) {
829 				/* Don't request more than one buffer */
830 				if (channel->wr_allow_partial &&
831 				    (offsetlimit >= buf_elements))
832 					offsetlimit = buf_elements - 1;
833 
834 				/* Don't request more than all buffers */
835 				if (!channel->wr_allow_partial &&
836 				    (offsetlimit >=
837 				     (buf_elements * channel->num_wr_buffers)))
838 					offsetlimit = buf_elements *
839 						channel->num_wr_buffers - 1;
840 			}
841 
842 			/*
843 			 * In asynchronous mode, force early flush of a buffer
844 			 * only if that will allow returning a full count. The
845 			 * "offsetlimit < ( ... )" rather than "<=" excludes
846 			 * requesting a full buffer, which would obviously
847 			 * cause a buffer transmission anyhow
848 			 */
849 
850 			if (channel->wr_synchronous ||
851 			    (offsetlimit < (buf_elements - 1))) {
852 				mutex_lock(&channel->endpoint->register_mutex);
853 
854 				iowrite32(offsetlimit,
855 					  channel->endpoint->registers +
856 					  fpga_buf_offset_reg);
857 
858 				iowrite32(1 | (channel->chan_num << 1) |
859 					  (2 << 24) |  /* 2 = offset limit */
860 					  (waiting_bufidx << 12),
861 					  channel->endpoint->registers +
862 					  fpga_buf_ctrl_reg);
863 
864 				mutex_unlock(&channel->endpoint->
865 					     register_mutex);
866 			}
867 		}
868 
869 		/*
870 		 * If partial completion is disallowed, there is no point in
871 		 * timeout sleeping. Neither if no_time_left is set and
872 		 * there's no data.
873 		 */
874 
875 		if (!channel->wr_allow_partial ||
876 		    (no_time_left && (bytes_done == 0))) {
877 			/*
878 			 * This do-loop will run more than once if another
879 			 * thread reasserted wr_sleepy before we got the mutex
880 			 * back, so we try again.
881 			 */
882 
883 			do {
884 				mutex_unlock(&channel->wr_mutex);
885 
886 				if (wait_event_interruptible(
887 					    channel->wr_wait,
888 					    (!channel->wr_sleepy)))
889 					goto interrupted;
890 
891 				if (mutex_lock_interruptible(
892 					    &channel->wr_mutex))
893 					goto interrupted;
894 			} while (channel->wr_sleepy);
895 
896 			continue;
897 
898 interrupted: /* Mutex is not held if got here */
899 			if (channel->endpoint->fatal_error)
900 				return -EIO;
901 			if (bytes_done)
902 				return bytes_done;
903 			if (filp->f_flags & O_NONBLOCK)
904 				return -EAGAIN; /* Don't admit snoozing */
905 			return -EINTR;
906 		}
907 
908 		left_to_sleep = deadline - ((long) jiffies);
909 
910 		/*
911 		 * If our time is out, skip the waiting. We may miss wr_sleepy
912 		 * being deasserted but hey, almost missing the train is like
913 		 * missing it.
914 		 */
915 
916 		if (left_to_sleep > 0) {
917 			left_to_sleep =
918 				wait_event_interruptible_timeout(
919 					channel->wr_wait,
920 					(!channel->wr_sleepy),
921 					left_to_sleep);
922 
923 			if (left_to_sleep > 0) /* wr_sleepy deasserted */
924 				continue;
925 
926 			if (left_to_sleep < 0) { /* Interrupt */
927 				mutex_unlock(&channel->wr_mutex);
928 				if (channel->endpoint->fatal_error)
929 					return -EIO;
930 				if (bytes_done)
931 					return bytes_done;
932 				return -EINTR;
933 			}
934 		}
935 
936 desperate:
937 		no_time_left = 1; /* We're out of sleeping time. Desperate! */
938 
939 		if (bytes_done == 0) {
940 			/*
941 			 * Reaching here means that we allow partial return,
942 			 * that we've run out of time, and that we have
943 			 * nothing to return.
944 			 * So tell the FPGA to send anything it has or gets.
945 			 */
946 
947 			iowrite32(1 | (channel->chan_num << 1) |
948 				  (3 << 24) |  /* Opcode 3, flush it all! */
949 				  (waiting_bufidx << 12),
950 				  channel->endpoint->registers +
951 				  fpga_buf_ctrl_reg);
952 		}
953 
954 		/*
955 		 * Reaching here means that we *do* have data in the buffer,
956 		 * but the "partial" flag disallows returning less than
957 		 * required. And we don't have as much. So loop again,
958 		 * which is likely to end up blocking indefinitely until
959 		 * enough data has arrived.
960 		 */
961 	}
962 
963 	mutex_unlock(&channel->wr_mutex);
964 
965 	if (channel->endpoint->fatal_error)
966 		return -EIO;
967 
968 	if (rc)
969 		return rc;
970 
971 	return bytes_done;
972 }
973 
974 /*
975  * The timeout argument takes values as follows:
976  *  >0 : Flush with timeout
977  * ==0 : Flush, and wait idefinitely for the flush to complete
978  *  <0 : Autoflush: Flush only if there's a single buffer occupied
979  */
980 
981 static int xillybus_myflush(struct xilly_channel *channel, long timeout)
982 {
983 	int rc;
984 	unsigned long flags;
985 
986 	int end_offset_plus1;
987 	int bufidx, bufidx_minus1;
988 	int i;
989 	int empty;
990 	int new_rd_host_buf_pos;
991 
992 	if (channel->endpoint->fatal_error)
993 		return -EIO;
994 	rc = mutex_lock_interruptible(&channel->rd_mutex);
995 	if (rc)
996 		return rc;
997 
998 	/*
999 	 * Don't flush a closed channel. This can happen when the work queued
1000 	 * autoflush thread fires off after the file has closed. This is not
1001 	 * an error, just something to dismiss.
1002 	 */
1003 
1004 	if (!channel->rd_ref_count)
1005 		goto done;
1006 
1007 	bufidx = channel->rd_host_buf_idx;
1008 
1009 	bufidx_minus1 = (bufidx == 0) ?
1010 		channel->num_rd_buffers - 1 :
1011 		bufidx - 1;
1012 
1013 	end_offset_plus1 = channel->rd_host_buf_pos >>
1014 		channel->log2_element_size;
1015 
1016 	new_rd_host_buf_pos = channel->rd_host_buf_pos -
1017 		(end_offset_plus1 << channel->log2_element_size);
1018 
1019 	/* Submit the current buffer if it's nonempty */
1020 	if (end_offset_plus1) {
1021 		unsigned char *tail = channel->rd_buffers[bufidx]->addr +
1022 			(end_offset_plus1 << channel->log2_element_size);
1023 
1024 		/* Copy  unflushed data, so we can put it in next buffer */
1025 		for (i = 0; i < new_rd_host_buf_pos; i++)
1026 			channel->rd_leftovers[i] = *tail++;
1027 
1028 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1029 
1030 		/* Autoflush only if a single buffer is occupied */
1031 
1032 		if ((timeout < 0) &&
1033 		    (channel->rd_full ||
1034 		     (bufidx_minus1 != channel->rd_fpga_buf_idx))) {
1035 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1036 			/*
1037 			 * A new work item may be queued by the ISR exactly
1038 			 * now, since the execution of a work item allows the
1039 			 * queuing of a new one while it's running.
1040 			 */
1041 			goto done;
1042 		}
1043 
1044 		/* The 4th element is never needed for data, so it's a flag */
1045 		channel->rd_leftovers[3] = (new_rd_host_buf_pos != 0);
1046 
1047 		/* Set up rd_full to reflect a certain moment's state */
1048 
1049 		if (bufidx == channel->rd_fpga_buf_idx)
1050 			channel->rd_full = 1;
1051 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1052 
1053 		if (bufidx >= (channel->num_rd_buffers - 1))
1054 			channel->rd_host_buf_idx = 0;
1055 		else
1056 			channel->rd_host_buf_idx++;
1057 
1058 		channel->endpoint->ephw->hw_sync_sgl_for_device(
1059 			channel->endpoint,
1060 			channel->rd_buffers[bufidx]->dma_addr,
1061 			channel->rd_buf_size,
1062 			DMA_TO_DEVICE);
1063 
1064 		mutex_lock(&channel->endpoint->register_mutex);
1065 
1066 		iowrite32(end_offset_plus1 - 1,
1067 			  channel->endpoint->registers + fpga_buf_offset_reg);
1068 
1069 		iowrite32((channel->chan_num << 1) | /* Channel ID */
1070 			  (2 << 24) |  /* Opcode 2, submit buffer */
1071 			  (bufidx << 12),
1072 			  channel->endpoint->registers + fpga_buf_ctrl_reg);
1073 
1074 		mutex_unlock(&channel->endpoint->register_mutex);
1075 	} else if (bufidx == 0) {
1076 		bufidx = channel->num_rd_buffers - 1;
1077 	} else {
1078 		bufidx--;
1079 	}
1080 
1081 	channel->rd_host_buf_pos = new_rd_host_buf_pos;
1082 
1083 	if (timeout < 0)
1084 		goto done; /* Autoflush */
1085 
1086 	/*
1087 	 * bufidx is now the last buffer written to (or equal to
1088 	 * rd_fpga_buf_idx if buffer was never written to), and
1089 	 * channel->rd_host_buf_idx the one after it.
1090 	 *
1091 	 * If bufidx == channel->rd_fpga_buf_idx we're either empty or full.
1092 	 */
1093 
1094 	while (1) { /* Loop waiting for draining of buffers */
1095 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1096 
1097 		if (bufidx != channel->rd_fpga_buf_idx)
1098 			channel->rd_full = 1; /*
1099 					       * Not really full,
1100 					       * but needs waiting.
1101 					       */
1102 
1103 		empty = !channel->rd_full;
1104 
1105 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1106 
1107 		if (empty)
1108 			break;
1109 
1110 		/*
1111 		 * Indefinite sleep with mutex taken. With data waiting for
1112 		 * flushing user should not be surprised if open() for write
1113 		 * sleeps.
1114 		 */
1115 		if (timeout == 0)
1116 			wait_event_interruptible(channel->rd_wait,
1117 						 (!channel->rd_full));
1118 
1119 		else if (wait_event_interruptible_timeout(
1120 				 channel->rd_wait,
1121 				 (!channel->rd_full),
1122 				 timeout) == 0) {
1123 			dev_warn(channel->endpoint->dev,
1124 				 "Timed out while flushing. Output data may be lost.\n");
1125 
1126 			rc = -ETIMEDOUT;
1127 			break;
1128 		}
1129 
1130 		if (channel->rd_full) {
1131 			rc = -EINTR;
1132 			break;
1133 		}
1134 	}
1135 
1136 done:
1137 	mutex_unlock(&channel->rd_mutex);
1138 
1139 	if (channel->endpoint->fatal_error)
1140 		return -EIO;
1141 
1142 	return rc;
1143 }
1144 
1145 static int xillybus_flush(struct file *filp, fl_owner_t id)
1146 {
1147 	if (!(filp->f_mode & FMODE_WRITE))
1148 		return 0;
1149 
1150 	return xillybus_myflush(filp->private_data, HZ); /* 1 second timeout */
1151 }
1152 
1153 static void xillybus_autoflush(struct work_struct *work)
1154 {
1155 	struct delayed_work *workitem = container_of(
1156 		work, struct delayed_work, work);
1157 	struct xilly_channel *channel = container_of(
1158 		workitem, struct xilly_channel, rd_workitem);
1159 	int rc;
1160 
1161 	rc = xillybus_myflush(channel, -1);
1162 	if (rc == -EINTR)
1163 		dev_warn(channel->endpoint->dev,
1164 			 "Autoflush failed because work queue thread got a signal.\n");
1165 	else if (rc)
1166 		dev_err(channel->endpoint->dev,
1167 			"Autoflush failed under weird circumstances.\n");
1168 }
1169 
1170 static ssize_t xillybus_write(struct file *filp, const char __user *userbuf,
1171 			      size_t count, loff_t *f_pos)
1172 {
1173 	ssize_t rc;
1174 	unsigned long flags;
1175 	int bytes_done = 0;
1176 	struct xilly_channel *channel = filp->private_data;
1177 
1178 	int full, exhausted;
1179 	/* Initializations are there only to silence warnings */
1180 
1181 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
1182 	int end_offset_plus1 = 0;
1183 
1184 	if (channel->endpoint->fatal_error)
1185 		return -EIO;
1186 
1187 	rc = mutex_lock_interruptible(&channel->rd_mutex);
1188 	if (rc)
1189 		return rc;
1190 
1191 	while (1) {
1192 		int bytes_to_do = count - bytes_done;
1193 
1194 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1195 
1196 		full = channel->rd_full;
1197 
1198 		if (!full) {
1199 			bufidx = channel->rd_host_buf_idx;
1200 			bufpos = channel->rd_host_buf_pos;
1201 			howmany = channel->rd_buf_size - bufpos;
1202 
1203 			/*
1204 			 * Update rd_host_* to its state after this operation.
1205 			 * count=0 means committing the buffer immediately,
1206 			 * which is like flushing, but not necessarily block.
1207 			 */
1208 
1209 			if ((howmany > bytes_to_do) &&
1210 			    (count ||
1211 			     ((bufpos >> channel->log2_element_size) == 0))) {
1212 				bufferdone = 0;
1213 
1214 				howmany = bytes_to_do;
1215 				channel->rd_host_buf_pos += howmany;
1216 			} else {
1217 				bufferdone = 1;
1218 
1219 				if (count) {
1220 					end_offset_plus1 =
1221 						channel->rd_buf_size >>
1222 						channel->log2_element_size;
1223 					channel->rd_host_buf_pos = 0;
1224 				} else {
1225 					unsigned char *tail;
1226 					int i;
1227 
1228 					howmany = 0;
1229 
1230 					end_offset_plus1 = bufpos >>
1231 						channel->log2_element_size;
1232 
1233 					channel->rd_host_buf_pos -=
1234 						end_offset_plus1 <<
1235 						channel->log2_element_size;
1236 
1237 					tail = channel->
1238 						rd_buffers[bufidx]->addr +
1239 						(end_offset_plus1 <<
1240 						 channel->log2_element_size);
1241 
1242 					for (i = 0;
1243 					     i < channel->rd_host_buf_pos;
1244 					     i++)
1245 						channel->rd_leftovers[i] =
1246 							*tail++;
1247 				}
1248 
1249 				if (bufidx == channel->rd_fpga_buf_idx)
1250 					channel->rd_full = 1;
1251 
1252 				if (bufidx >= (channel->num_rd_buffers - 1))
1253 					channel->rd_host_buf_idx = 0;
1254 				else
1255 					channel->rd_host_buf_idx++;
1256 			}
1257 		}
1258 
1259 		/*
1260 		 * Marking our situation after the possible changes above,
1261 		 * for use  after releasing the spinlock.
1262 		 *
1263 		 * full = full before change
1264 		 * exhasted = full after possible change
1265 		 */
1266 
1267 		exhausted = channel->rd_full;
1268 
1269 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1270 
1271 		if (!full) { /* Go on, now without the spinlock */
1272 			unsigned char *head =
1273 				channel->rd_buffers[bufidx]->addr;
1274 			int i;
1275 
1276 			if ((bufpos == 0) || /* Zero means it's virgin */
1277 			    (channel->rd_leftovers[3] != 0)) {
1278 				channel->endpoint->ephw->hw_sync_sgl_for_cpu(
1279 					channel->endpoint,
1280 					channel->rd_buffers[bufidx]->dma_addr,
1281 					channel->rd_buf_size,
1282 					DMA_TO_DEVICE);
1283 
1284 				/* Virgin, but leftovers are due */
1285 				for (i = 0; i < bufpos; i++)
1286 					*head++ = channel->rd_leftovers[i];
1287 
1288 				channel->rd_leftovers[3] = 0; /* Clear flag */
1289 			}
1290 
1291 			if (copy_from_user(
1292 				    channel->rd_buffers[bufidx]->addr + bufpos,
1293 				    userbuf, howmany))
1294 				rc = -EFAULT;
1295 
1296 			userbuf += howmany;
1297 			bytes_done += howmany;
1298 
1299 			if (bufferdone) {
1300 				channel->endpoint->ephw->hw_sync_sgl_for_device(
1301 					channel->endpoint,
1302 					channel->rd_buffers[bufidx]->dma_addr,
1303 					channel->rd_buf_size,
1304 					DMA_TO_DEVICE);
1305 
1306 				mutex_lock(&channel->endpoint->register_mutex);
1307 
1308 				iowrite32(end_offset_plus1 - 1,
1309 					  channel->endpoint->registers +
1310 					  fpga_buf_offset_reg);
1311 
1312 				iowrite32((channel->chan_num << 1) |
1313 					  (2 << 24) |  /* 2 = submit buffer */
1314 					  (bufidx << 12),
1315 					  channel->endpoint->registers +
1316 					  fpga_buf_ctrl_reg);
1317 
1318 				mutex_unlock(&channel->endpoint->
1319 					     register_mutex);
1320 
1321 				channel->rd_leftovers[3] =
1322 					(channel->rd_host_buf_pos != 0);
1323 			}
1324 
1325 			if (rc) {
1326 				mutex_unlock(&channel->rd_mutex);
1327 
1328 				if (channel->endpoint->fatal_error)
1329 					return -EIO;
1330 
1331 				if (!channel->rd_synchronous)
1332 					queue_delayed_work(
1333 						xillybus_wq,
1334 						&channel->rd_workitem,
1335 						XILLY_RX_TIMEOUT);
1336 
1337 				return rc;
1338 			}
1339 		}
1340 
1341 		if (bytes_done >= count)
1342 			break;
1343 
1344 		if (!exhausted)
1345 			continue; /* If there's more space, just go on */
1346 
1347 		if ((bytes_done > 0) && channel->rd_allow_partial)
1348 			break;
1349 
1350 		/*
1351 		 * Indefinite sleep with mutex taken. With data waiting for
1352 		 * flushing, user should not be surprised if open() for write
1353 		 * sleeps.
1354 		 */
1355 
1356 		if (filp->f_flags & O_NONBLOCK) {
1357 			rc = -EAGAIN;
1358 			break;
1359 		}
1360 
1361 		if (wait_event_interruptible(channel->rd_wait,
1362 					     (!channel->rd_full))) {
1363 			mutex_unlock(&channel->rd_mutex);
1364 
1365 			if (channel->endpoint->fatal_error)
1366 				return -EIO;
1367 
1368 			if (bytes_done)
1369 				return bytes_done;
1370 			return -EINTR;
1371 		}
1372 	}
1373 
1374 	mutex_unlock(&channel->rd_mutex);
1375 
1376 	if (!channel->rd_synchronous)
1377 		queue_delayed_work(xillybus_wq,
1378 				   &channel->rd_workitem,
1379 				   XILLY_RX_TIMEOUT);
1380 
1381 	if (channel->endpoint->fatal_error)
1382 		return -EIO;
1383 
1384 	if (rc)
1385 		return rc;
1386 
1387 	if ((channel->rd_synchronous) && (bytes_done > 0)) {
1388 		rc = xillybus_myflush(filp->private_data, 0); /* No timeout */
1389 
1390 		if (rc && (rc != -EINTR))
1391 			return rc;
1392 	}
1393 
1394 	return bytes_done;
1395 }
1396 
1397 static int xillybus_open(struct inode *inode, struct file *filp)
1398 {
1399 	int rc;
1400 	unsigned long flags;
1401 	struct xilly_endpoint *endpoint;
1402 	struct xilly_channel *channel;
1403 	int index;
1404 
1405 	rc = xillybus_find_inode(inode, (void **)&endpoint, &index);
1406 	if (rc)
1407 		return rc;
1408 
1409 	if (endpoint->fatal_error)
1410 		return -EIO;
1411 
1412 	channel = endpoint->channels[1 + index];
1413 	filp->private_data = channel;
1414 
1415 	/*
1416 	 * It gets complicated because:
1417 	 * 1. We don't want to take a mutex we don't have to
1418 	 * 2. We don't want to open one direction if the other will fail.
1419 	 */
1420 
1421 	if ((filp->f_mode & FMODE_READ) && (!channel->num_wr_buffers))
1422 		return -ENODEV;
1423 
1424 	if ((filp->f_mode & FMODE_WRITE) && (!channel->num_rd_buffers))
1425 		return -ENODEV;
1426 
1427 	if ((filp->f_mode & FMODE_READ) && (filp->f_flags & O_NONBLOCK) &&
1428 	    (channel->wr_synchronous || !channel->wr_allow_partial ||
1429 	     !channel->wr_supports_nonempty)) {
1430 		dev_err(endpoint->dev,
1431 			"open() failed: O_NONBLOCK not allowed for read on this device\n");
1432 		return -ENODEV;
1433 	}
1434 
1435 	if ((filp->f_mode & FMODE_WRITE) && (filp->f_flags & O_NONBLOCK) &&
1436 	    (channel->rd_synchronous || !channel->rd_allow_partial)) {
1437 		dev_err(endpoint->dev,
1438 			"open() failed: O_NONBLOCK not allowed for write on this device\n");
1439 		return -ENODEV;
1440 	}
1441 
1442 	/*
1443 	 * Note: open() may block on getting mutexes despite O_NONBLOCK.
1444 	 * This shouldn't occur normally, since multiple open of the same
1445 	 * file descriptor is almost always prohibited anyhow
1446 	 * (*_exclusive_open is normally set in real-life systems).
1447 	 */
1448 
1449 	if (filp->f_mode & FMODE_READ) {
1450 		rc = mutex_lock_interruptible(&channel->wr_mutex);
1451 		if (rc)
1452 			return rc;
1453 	}
1454 
1455 	if (filp->f_mode & FMODE_WRITE) {
1456 		rc = mutex_lock_interruptible(&channel->rd_mutex);
1457 		if (rc)
1458 			goto unlock_wr;
1459 	}
1460 
1461 	if ((filp->f_mode & FMODE_READ) &&
1462 	    (channel->wr_ref_count != 0) &&
1463 	    (channel->wr_exclusive_open)) {
1464 		rc = -EBUSY;
1465 		goto unlock;
1466 	}
1467 
1468 	if ((filp->f_mode & FMODE_WRITE) &&
1469 	    (channel->rd_ref_count != 0) &&
1470 	    (channel->rd_exclusive_open)) {
1471 		rc = -EBUSY;
1472 		goto unlock;
1473 	}
1474 
1475 	if (filp->f_mode & FMODE_READ) {
1476 		if (channel->wr_ref_count == 0) { /* First open of file */
1477 			/* Move the host to first buffer */
1478 			spin_lock_irqsave(&channel->wr_spinlock, flags);
1479 			channel->wr_host_buf_idx = 0;
1480 			channel->wr_host_buf_pos = 0;
1481 			channel->wr_fpga_buf_idx = -1;
1482 			channel->wr_empty = 1;
1483 			channel->wr_ready = 0;
1484 			channel->wr_sleepy = 1;
1485 			channel->wr_eof = -1;
1486 			channel->wr_hangup = 0;
1487 
1488 			spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1489 
1490 			iowrite32(1 | (channel->chan_num << 1) |
1491 				  (4 << 24) |  /* Opcode 4, open channel */
1492 				  ((channel->wr_synchronous & 1) << 23),
1493 				  channel->endpoint->registers +
1494 				  fpga_buf_ctrl_reg);
1495 		}
1496 
1497 		channel->wr_ref_count++;
1498 	}
1499 
1500 	if (filp->f_mode & FMODE_WRITE) {
1501 		if (channel->rd_ref_count == 0) { /* First open of file */
1502 			/* Move the host to first buffer */
1503 			spin_lock_irqsave(&channel->rd_spinlock, flags);
1504 			channel->rd_host_buf_idx = 0;
1505 			channel->rd_host_buf_pos = 0;
1506 			channel->rd_leftovers[3] = 0; /* No leftovers. */
1507 			channel->rd_fpga_buf_idx = channel->num_rd_buffers - 1;
1508 			channel->rd_full = 0;
1509 
1510 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1511 
1512 			iowrite32((channel->chan_num << 1) |
1513 				  (4 << 24),   /* Opcode 4, open channel */
1514 				  channel->endpoint->registers +
1515 				  fpga_buf_ctrl_reg);
1516 		}
1517 
1518 		channel->rd_ref_count++;
1519 	}
1520 
1521 unlock:
1522 	if (filp->f_mode & FMODE_WRITE)
1523 		mutex_unlock(&channel->rd_mutex);
1524 unlock_wr:
1525 	if (filp->f_mode & FMODE_READ)
1526 		mutex_unlock(&channel->wr_mutex);
1527 
1528 	if (!rc && (!channel->seekable))
1529 		return nonseekable_open(inode, filp);
1530 
1531 	return rc;
1532 }
1533 
1534 static int xillybus_release(struct inode *inode, struct file *filp)
1535 {
1536 	unsigned long flags;
1537 	struct xilly_channel *channel = filp->private_data;
1538 
1539 	int buf_idx;
1540 	int eof;
1541 
1542 	if (channel->endpoint->fatal_error)
1543 		return -EIO;
1544 
1545 	if (filp->f_mode & FMODE_WRITE) {
1546 		mutex_lock(&channel->rd_mutex);
1547 
1548 		channel->rd_ref_count--;
1549 
1550 		if (channel->rd_ref_count == 0) {
1551 			/*
1552 			 * We rely on the kernel calling flush()
1553 			 * before we get here.
1554 			 */
1555 
1556 			iowrite32((channel->chan_num << 1) | /* Channel ID */
1557 				  (5 << 24),  /* Opcode 5, close channel */
1558 				  channel->endpoint->registers +
1559 				  fpga_buf_ctrl_reg);
1560 		}
1561 		mutex_unlock(&channel->rd_mutex);
1562 	}
1563 
1564 	if (filp->f_mode & FMODE_READ) {
1565 		mutex_lock(&channel->wr_mutex);
1566 
1567 		channel->wr_ref_count--;
1568 
1569 		if (channel->wr_ref_count == 0) {
1570 			iowrite32(1 | (channel->chan_num << 1) |
1571 				  (5 << 24),  /* Opcode 5, close channel */
1572 				  channel->endpoint->registers +
1573 				  fpga_buf_ctrl_reg);
1574 
1575 			/*
1576 			 * This is crazily cautious: We make sure that not
1577 			 * only that we got an EOF (be it because we closed
1578 			 * the channel or because of a user's EOF), but verify
1579 			 * that it's one beyond the last buffer arrived, so
1580 			 * we have no leftover buffers pending before wrapping
1581 			 * up (which can only happen in asynchronous channels,
1582 			 * BTW)
1583 			 */
1584 
1585 			while (1) {
1586 				spin_lock_irqsave(&channel->wr_spinlock,
1587 						  flags);
1588 				buf_idx = channel->wr_fpga_buf_idx;
1589 				eof = channel->wr_eof;
1590 				channel->wr_sleepy = 1;
1591 				spin_unlock_irqrestore(&channel->wr_spinlock,
1592 						       flags);
1593 
1594 				/*
1595 				 * Check if eof points at the buffer after
1596 				 * the last one the FPGA submitted. Note that
1597 				 * no EOF is marked by negative eof.
1598 				 */
1599 
1600 				buf_idx++;
1601 				if (buf_idx == channel->num_wr_buffers)
1602 					buf_idx = 0;
1603 
1604 				if (buf_idx == eof)
1605 					break;
1606 
1607 				/*
1608 				 * Steal extra 100 ms if awaken by interrupt.
1609 				 * This is a simple workaround for an
1610 				 * interrupt pending when entering, which would
1611 				 * otherwise result in declaring the hardware
1612 				 * non-responsive.
1613 				 */
1614 
1615 				if (wait_event_interruptible(
1616 					    channel->wr_wait,
1617 					    (!channel->wr_sleepy)))
1618 					msleep(100);
1619 
1620 				if (channel->wr_sleepy) {
1621 					mutex_unlock(&channel->wr_mutex);
1622 					dev_warn(channel->endpoint->dev,
1623 						 "Hardware failed to respond to close command, therefore left in messy state.\n");
1624 					return -EINTR;
1625 				}
1626 			}
1627 		}
1628 
1629 		mutex_unlock(&channel->wr_mutex);
1630 	}
1631 
1632 	return 0;
1633 }
1634 
1635 static loff_t xillybus_llseek(struct file *filp, loff_t offset, int whence)
1636 {
1637 	struct xilly_channel *channel = filp->private_data;
1638 	loff_t pos = filp->f_pos;
1639 	int rc = 0;
1640 
1641 	/*
1642 	 * Take both mutexes not allowing interrupts, since it seems like
1643 	 * common applications don't expect an -EINTR here. Besides, multiple
1644 	 * access to a single file descriptor on seekable devices is a mess
1645 	 * anyhow.
1646 	 */
1647 
1648 	if (channel->endpoint->fatal_error)
1649 		return -EIO;
1650 
1651 	mutex_lock(&channel->wr_mutex);
1652 	mutex_lock(&channel->rd_mutex);
1653 
1654 	switch (whence) {
1655 	case SEEK_SET:
1656 		pos = offset;
1657 		break;
1658 	case SEEK_CUR:
1659 		pos += offset;
1660 		break;
1661 	case SEEK_END:
1662 		pos = offset; /* Going to the end => to the beginning */
1663 		break;
1664 	default:
1665 		rc = -EINVAL;
1666 		goto end;
1667 	}
1668 
1669 	/* In any case, we must finish on an element boundary */
1670 	if (pos & ((1 << channel->log2_element_size) - 1)) {
1671 		rc = -EINVAL;
1672 		goto end;
1673 	}
1674 
1675 	mutex_lock(&channel->endpoint->register_mutex);
1676 
1677 	iowrite32(pos >> channel->log2_element_size,
1678 		  channel->endpoint->registers + fpga_buf_offset_reg);
1679 
1680 	iowrite32((channel->chan_num << 1) |
1681 		  (6 << 24),  /* Opcode 6, set address */
1682 		  channel->endpoint->registers + fpga_buf_ctrl_reg);
1683 
1684 	mutex_unlock(&channel->endpoint->register_mutex);
1685 
1686 end:
1687 	mutex_unlock(&channel->rd_mutex);
1688 	mutex_unlock(&channel->wr_mutex);
1689 
1690 	if (rc) /* Return error after releasing mutexes */
1691 		return rc;
1692 
1693 	filp->f_pos = pos;
1694 
1695 	/*
1696 	 * Since seekable devices are allowed only when the channel is
1697 	 * synchronous, we assume that there is no data pending in either
1698 	 * direction (which holds true as long as no concurrent access on the
1699 	 * file descriptor takes place).
1700 	 * The only thing we may need to throw away is leftovers from partial
1701 	 * write() flush.
1702 	 */
1703 
1704 	channel->rd_leftovers[3] = 0;
1705 
1706 	return pos;
1707 }
1708 
1709 static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
1710 {
1711 	struct xilly_channel *channel = filp->private_data;
1712 	__poll_t mask = 0;
1713 	unsigned long flags;
1714 
1715 	poll_wait(filp, &channel->endpoint->ep_wait, wait);
1716 
1717 	/*
1718 	 * poll() won't play ball regarding read() channels which
1719 	 * aren't asynchronous and support the nonempty message. Allowing
1720 	 * that will create situations where data has been delivered at
1721 	 * the FPGA, and users expecting select() to wake up, which it may
1722 	 * not.
1723 	 */
1724 
1725 	if (!channel->wr_synchronous && channel->wr_supports_nonempty) {
1726 		poll_wait(filp, &channel->wr_wait, wait);
1727 		poll_wait(filp, &channel->wr_ready_wait, wait);
1728 
1729 		spin_lock_irqsave(&channel->wr_spinlock, flags);
1730 		if (!channel->wr_empty || channel->wr_ready)
1731 			mask |= EPOLLIN | EPOLLRDNORM;
1732 
1733 		if (channel->wr_hangup)
1734 			/*
1735 			 * Not EPOLLHUP, because its behavior is in the
1736 			 * mist, and EPOLLIN does what we want: Wake up
1737 			 * the read file descriptor so it sees EOF.
1738 			 */
1739 			mask |=  EPOLLIN | EPOLLRDNORM;
1740 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1741 	}
1742 
1743 	/*
1744 	 * If partial data write is disallowed on a write() channel,
1745 	 * it's pointless to ever signal OK to write, because is could
1746 	 * block despite some space being available.
1747 	 */
1748 
1749 	if (channel->rd_allow_partial) {
1750 		poll_wait(filp, &channel->rd_wait, wait);
1751 
1752 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1753 		if (!channel->rd_full)
1754 			mask |= EPOLLOUT | EPOLLWRNORM;
1755 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1756 	}
1757 
1758 	if (channel->endpoint->fatal_error)
1759 		mask |= EPOLLERR;
1760 
1761 	return mask;
1762 }
1763 
1764 static const struct file_operations xillybus_fops = {
1765 	.owner      = THIS_MODULE,
1766 	.read       = xillybus_read,
1767 	.write      = xillybus_write,
1768 	.open       = xillybus_open,
1769 	.flush      = xillybus_flush,
1770 	.release    = xillybus_release,
1771 	.llseek     = xillybus_llseek,
1772 	.poll       = xillybus_poll,
1773 };
1774 
1775 struct xilly_endpoint *xillybus_init_endpoint(struct pci_dev *pdev,
1776 					      struct device *dev,
1777 					      struct xilly_endpoint_hardware
1778 					      *ephw)
1779 {
1780 	struct xilly_endpoint *endpoint;
1781 
1782 	endpoint = devm_kzalloc(dev, sizeof(*endpoint), GFP_KERNEL);
1783 	if (!endpoint)
1784 		return NULL;
1785 
1786 	endpoint->pdev = pdev;
1787 	endpoint->dev = dev;
1788 	endpoint->ephw = ephw;
1789 	endpoint->msg_counter = 0x0b;
1790 	endpoint->failed_messages = 0;
1791 	endpoint->fatal_error = 0;
1792 
1793 	init_waitqueue_head(&endpoint->ep_wait);
1794 	mutex_init(&endpoint->register_mutex);
1795 
1796 	return endpoint;
1797 }
1798 EXPORT_SYMBOL(xillybus_init_endpoint);
1799 
1800 static int xilly_quiesce(struct xilly_endpoint *endpoint)
1801 {
1802 	long t;
1803 
1804 	endpoint->idtlen = -1;
1805 
1806 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1807 		  endpoint->registers + fpga_dma_control_reg);
1808 
1809 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1810 					     (endpoint->idtlen >= 0),
1811 					     XILLY_TIMEOUT);
1812 	if (t <= 0) {
1813 		dev_err(endpoint->dev,
1814 			"Failed to quiesce the device on exit.\n");
1815 		return -ENODEV;
1816 	}
1817 	return 0;
1818 }
1819 
1820 int xillybus_endpoint_discovery(struct xilly_endpoint *endpoint)
1821 {
1822 	int rc;
1823 	long t;
1824 
1825 	void *bootstrap_resources;
1826 	int idtbuffersize = (1 << PAGE_SHIFT);
1827 	struct device *dev = endpoint->dev;
1828 
1829 	/*
1830 	 * The bogus IDT is used during bootstrap for allocating the initial
1831 	 * message buffer, and then the message buffer and space for the IDT
1832 	 * itself. The initial message buffer is of a single page's size, but
1833 	 * it's soon replaced with a more modest one (and memory is freed).
1834 	 */
1835 
1836 	unsigned char bogus_idt[8] = { 1, 224, (PAGE_SHIFT)-2, 0,
1837 				       3, 192, PAGE_SHIFT, 0 };
1838 	struct xilly_idt_handle idt_handle;
1839 
1840 	/*
1841 	 * Writing the value 0x00000001 to Endianness register signals which
1842 	 * endianness this processor is using, so the FPGA can swap words as
1843 	 * necessary.
1844 	 */
1845 
1846 	iowrite32(1, endpoint->registers + fpga_endian_reg);
1847 
1848 	/* Bootstrap phase I: Allocate temporary message buffer */
1849 
1850 	bootstrap_resources = devres_open_group(dev, NULL, GFP_KERNEL);
1851 	if (!bootstrap_resources)
1852 		return -ENOMEM;
1853 
1854 	endpoint->num_channels = 0;
1855 
1856 	rc = xilly_setupchannels(endpoint, bogus_idt, 1);
1857 	if (rc)
1858 		return rc;
1859 
1860 	/* Clear the message subsystem (and counter in particular) */
1861 	iowrite32(0x04, endpoint->registers + fpga_msg_ctrl_reg);
1862 
1863 	endpoint->idtlen = -1;
1864 
1865 	/*
1866 	 * Set DMA 32/64 bit mode, quiesce the device (?!) and get IDT
1867 	 * buffer size.
1868 	 */
1869 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1870 		  endpoint->registers + fpga_dma_control_reg);
1871 
1872 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1873 					     (endpoint->idtlen >= 0),
1874 					     XILLY_TIMEOUT);
1875 	if (t <= 0) {
1876 		dev_err(endpoint->dev, "No response from FPGA. Aborting.\n");
1877 		return -ENODEV;
1878 	}
1879 
1880 	/* Enable DMA */
1881 	iowrite32((u32) (0x0002 | (endpoint->dma_using_dac & 0x0001)),
1882 		  endpoint->registers + fpga_dma_control_reg);
1883 
1884 	/* Bootstrap phase II: Allocate buffer for IDT and obtain it */
1885 	while (endpoint->idtlen >= idtbuffersize) {
1886 		idtbuffersize *= 2;
1887 		bogus_idt[6]++;
1888 	}
1889 
1890 	endpoint->num_channels = 1;
1891 
1892 	rc = xilly_setupchannels(endpoint, bogus_idt, 2);
1893 	if (rc)
1894 		goto failed_idt;
1895 
1896 	rc = xilly_obtain_idt(endpoint);
1897 	if (rc)
1898 		goto failed_idt;
1899 
1900 	rc = xilly_scan_idt(endpoint, &idt_handle);
1901 	if (rc)
1902 		goto failed_idt;
1903 
1904 	devres_close_group(dev, bootstrap_resources);
1905 
1906 	/* Bootstrap phase III: Allocate buffers according to IDT */
1907 
1908 	rc = xilly_setupchannels(endpoint,
1909 				 idt_handle.chandesc,
1910 				 idt_handle.entries);
1911 	if (rc)
1912 		goto failed_idt;
1913 
1914 	rc = xillybus_init_chrdev(dev, &xillybus_fops,
1915 				  endpoint->ephw->owner, endpoint,
1916 				  idt_handle.names,
1917 				  idt_handle.names_len,
1918 				  endpoint->num_channels,
1919 				  xillyname, false);
1920 
1921 	if (rc)
1922 		goto failed_idt;
1923 
1924 	devres_release_group(dev, bootstrap_resources);
1925 
1926 	return 0;
1927 
1928 failed_idt:
1929 	xilly_quiesce(endpoint);
1930 	flush_workqueue(xillybus_wq);
1931 
1932 	return rc;
1933 }
1934 EXPORT_SYMBOL(xillybus_endpoint_discovery);
1935 
1936 void xillybus_endpoint_remove(struct xilly_endpoint *endpoint)
1937 {
1938 	xillybus_cleanup_chrdev(endpoint, endpoint->dev);
1939 
1940 	xilly_quiesce(endpoint);
1941 
1942 	/*
1943 	 * Flushing is done upon endpoint release to prevent access to memory
1944 	 * just about to be released. This makes the quiesce complete.
1945 	 */
1946 	flush_workqueue(xillybus_wq);
1947 }
1948 EXPORT_SYMBOL(xillybus_endpoint_remove);
1949 
1950 static int __init xillybus_init(void)
1951 {
1952 	xillybus_wq = alloc_workqueue(xillyname, 0, 0);
1953 	if (!xillybus_wq)
1954 		return -ENOMEM;
1955 
1956 	return 0;
1957 }
1958 
1959 static void __exit xillybus_exit(void)
1960 {
1961 	/* flush_workqueue() was called for each endpoint released */
1962 	destroy_workqueue(xillybus_wq);
1963 }
1964 
1965 module_init(xillybus_init);
1966 module_exit(xillybus_exit);
1967