xref: /openbmc/linux/drivers/char/xillybus/xillybus_core.c (revision 31ab09b4218879bc394c9faa6da983a82a694600)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/drivers/misc/xillybus_core.c
4  *
5  * Copyright 2011 Xillybus Ltd, http://xillybus.com
6  *
7  * Driver for the Xillybus FPGA/host framework.
8  *
9  * This driver interfaces with a special IP core in an FPGA, setting up
10  * a pipe between a hardware FIFO in the programmable logic and a device
11  * file in the host. The number of such pipes and their attributes are
12  * set up on the logic. This driver detects these automatically and
13  * creates the device files accordingly.
14  */
15 
16 #include <linux/list.h>
17 #include <linux/device.h>
18 #include <linux/module.h>
19 #include <linux/io.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/interrupt.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/spinlock.h>
25 #include <linux/mutex.h>
26 #include <linux/crc32.h>
27 #include <linux/poll.h>
28 #include <linux/delay.h>
29 #include <linux/slab.h>
30 #include <linux/workqueue.h>
31 #include "xillybus.h"
32 #include "xillybus_class.h"
33 
34 MODULE_DESCRIPTION("Xillybus core functions");
35 MODULE_AUTHOR("Eli Billauer, Xillybus Ltd.");
36 MODULE_ALIAS("xillybus_core");
37 MODULE_LICENSE("GPL v2");
38 
39 /* General timeout is 100 ms, rx timeout is 10 ms */
40 #define XILLY_RX_TIMEOUT (10*HZ/1000)
41 #define XILLY_TIMEOUT (100*HZ/1000)
42 
43 #define fpga_msg_ctrl_reg              0x0008
44 #define fpga_dma_control_reg           0x0020
45 #define fpga_dma_bufno_reg             0x0024
46 #define fpga_dma_bufaddr_lowaddr_reg   0x0028
47 #define fpga_dma_bufaddr_highaddr_reg  0x002c
48 #define fpga_buf_ctrl_reg              0x0030
49 #define fpga_buf_offset_reg            0x0034
50 #define fpga_endian_reg                0x0040
51 
52 #define XILLYMSG_OPCODE_RELEASEBUF 1
53 #define XILLYMSG_OPCODE_QUIESCEACK 2
54 #define XILLYMSG_OPCODE_FIFOEOF 3
55 #define XILLYMSG_OPCODE_FATAL_ERROR 4
56 #define XILLYMSG_OPCODE_NONEMPTY 5
57 
58 static const char xillyname[] = "xillybus";
59 
60 static struct workqueue_struct *xillybus_wq;
61 
62 /*
63  * Locking scheme: Mutexes protect invocations of character device methods.
64  * If both locks are taken, wr_mutex is taken first, rd_mutex second.
65  *
66  * wr_spinlock protects wr_*_buf_idx, wr_empty, wr_sleepy, wr_ready and the
67  * buffers' end_offset fields against changes made by IRQ handler (and in
68  * theory, other file request handlers, but the mutex handles that). Nothing
69  * else.
70  * They are held for short direct memory manipulations. Needless to say,
71  * no mutex locking is allowed when a spinlock is held.
72  *
73  * rd_spinlock does the same with rd_*_buf_idx, rd_empty and end_offset.
74  *
75  * register_mutex is endpoint-specific, and is held when non-atomic
76  * register operations are performed. wr_mutex and rd_mutex may be
77  * held when register_mutex is taken, but none of the spinlocks. Note that
78  * register_mutex doesn't protect against sporadic buf_ctrl_reg writes
79  * which are unrelated to buf_offset_reg, since they are harmless.
80  *
81  * Blocking on the wait queues is allowed with mutexes held, but not with
82  * spinlocks.
83  *
84  * Only interruptible blocking is allowed on mutexes and wait queues.
85  *
86  * All in all, the locking order goes (with skips allowed, of course):
87  * wr_mutex -> rd_mutex -> register_mutex -> wr_spinlock -> rd_spinlock
88  */
89 
90 static void malformed_message(struct xilly_endpoint *endpoint, u32 *buf)
91 {
92 	int opcode;
93 	int msg_channel, msg_bufno, msg_data, msg_dir;
94 
95 	opcode = (buf[0] >> 24) & 0xff;
96 	msg_dir = buf[0] & 1;
97 	msg_channel = (buf[0] >> 1) & 0x7ff;
98 	msg_bufno = (buf[0] >> 12) & 0x3ff;
99 	msg_data = buf[1] & 0xfffffff;
100 
101 	dev_warn(endpoint->dev,
102 		 "Malformed message (skipping): opcode=%d, channel=%03x, dir=%d, bufno=%03x, data=%07x\n",
103 		 opcode, msg_channel, msg_dir, msg_bufno, msg_data);
104 }
105 
106 /*
107  * xillybus_isr assumes the interrupt is allocated exclusively to it,
108  * which is the natural case MSI and several other hardware-oriented
109  * interrupts. Sharing is not allowed.
110  */
111 
112 irqreturn_t xillybus_isr(int irq, void *data)
113 {
114 	struct xilly_endpoint *ep = data;
115 	u32 *buf;
116 	unsigned int buf_size;
117 	int i;
118 	int opcode;
119 	unsigned int msg_channel, msg_bufno, msg_data, msg_dir;
120 	struct xilly_channel *channel;
121 
122 	buf = ep->msgbuf_addr;
123 	buf_size = ep->msg_buf_size/sizeof(u32);
124 
125 	dma_sync_single_for_cpu(ep->dev, ep->msgbuf_dma_addr,
126 				ep->msg_buf_size, DMA_FROM_DEVICE);
127 
128 	for (i = 0; i < buf_size; i += 2) {
129 		if (((buf[i+1] >> 28) & 0xf) != ep->msg_counter) {
130 			malformed_message(ep, &buf[i]);
131 			dev_warn(ep->dev,
132 				 "Sending a NACK on counter %x (instead of %x) on entry %d\n",
133 				 ((buf[i+1] >> 28) & 0xf),
134 				 ep->msg_counter,
135 				 i/2);
136 
137 			if (++ep->failed_messages > 10) {
138 				dev_err(ep->dev,
139 					"Lost sync with interrupt messages. Stopping.\n");
140 			} else {
141 				dma_sync_single_for_device(ep->dev,
142 							   ep->msgbuf_dma_addr,
143 							   ep->msg_buf_size,
144 							   DMA_FROM_DEVICE);
145 
146 				iowrite32(0x01,  /* Message NACK */
147 					  ep->registers + fpga_msg_ctrl_reg);
148 			}
149 			return IRQ_HANDLED;
150 		} else if (buf[i] & (1 << 22)) /* Last message */
151 			break;
152 	}
153 
154 	if (i >= buf_size) {
155 		dev_err(ep->dev, "Bad interrupt message. Stopping.\n");
156 		return IRQ_HANDLED;
157 	}
158 
159 	buf_size = i + 2;
160 
161 	for (i = 0; i < buf_size; i += 2) { /* Scan through messages */
162 		opcode = (buf[i] >> 24) & 0xff;
163 
164 		msg_dir = buf[i] & 1;
165 		msg_channel = (buf[i] >> 1) & 0x7ff;
166 		msg_bufno = (buf[i] >> 12) & 0x3ff;
167 		msg_data = buf[i+1] & 0xfffffff;
168 
169 		switch (opcode) {
170 		case XILLYMSG_OPCODE_RELEASEBUF:
171 			if ((msg_channel > ep->num_channels) ||
172 			    (msg_channel == 0)) {
173 				malformed_message(ep, &buf[i]);
174 				break;
175 			}
176 
177 			channel = ep->channels[msg_channel];
178 
179 			if (msg_dir) { /* Write channel */
180 				if (msg_bufno >= channel->num_wr_buffers) {
181 					malformed_message(ep, &buf[i]);
182 					break;
183 				}
184 				spin_lock(&channel->wr_spinlock);
185 				channel->wr_buffers[msg_bufno]->end_offset =
186 					msg_data;
187 				channel->wr_fpga_buf_idx = msg_bufno;
188 				channel->wr_empty = 0;
189 				channel->wr_sleepy = 0;
190 				spin_unlock(&channel->wr_spinlock);
191 
192 				wake_up_interruptible(&channel->wr_wait);
193 
194 			} else {
195 				/* Read channel */
196 
197 				if (msg_bufno >= channel->num_rd_buffers) {
198 					malformed_message(ep, &buf[i]);
199 					break;
200 				}
201 
202 				spin_lock(&channel->rd_spinlock);
203 				channel->rd_fpga_buf_idx = msg_bufno;
204 				channel->rd_full = 0;
205 				spin_unlock(&channel->rd_spinlock);
206 
207 				wake_up_interruptible(&channel->rd_wait);
208 				if (!channel->rd_synchronous)
209 					queue_delayed_work(
210 						xillybus_wq,
211 						&channel->rd_workitem,
212 						XILLY_RX_TIMEOUT);
213 			}
214 
215 			break;
216 		case XILLYMSG_OPCODE_NONEMPTY:
217 			if ((msg_channel > ep->num_channels) ||
218 			    (msg_channel == 0) || (!msg_dir) ||
219 			    !ep->channels[msg_channel]->wr_supports_nonempty) {
220 				malformed_message(ep, &buf[i]);
221 				break;
222 			}
223 
224 			channel = ep->channels[msg_channel];
225 
226 			if (msg_bufno >= channel->num_wr_buffers) {
227 				malformed_message(ep, &buf[i]);
228 				break;
229 			}
230 			spin_lock(&channel->wr_spinlock);
231 			if (msg_bufno == channel->wr_host_buf_idx)
232 				channel->wr_ready = 1;
233 			spin_unlock(&channel->wr_spinlock);
234 
235 			wake_up_interruptible(&channel->wr_ready_wait);
236 
237 			break;
238 		case XILLYMSG_OPCODE_QUIESCEACK:
239 			ep->idtlen = msg_data;
240 			wake_up_interruptible(&ep->ep_wait);
241 
242 			break;
243 		case XILLYMSG_OPCODE_FIFOEOF:
244 			if ((msg_channel > ep->num_channels) ||
245 			    (msg_channel == 0) || (!msg_dir) ||
246 			    !ep->channels[msg_channel]->num_wr_buffers) {
247 				malformed_message(ep, &buf[i]);
248 				break;
249 			}
250 			channel = ep->channels[msg_channel];
251 			spin_lock(&channel->wr_spinlock);
252 			channel->wr_eof = msg_bufno;
253 			channel->wr_sleepy = 0;
254 
255 			channel->wr_hangup = channel->wr_empty &&
256 				(channel->wr_host_buf_idx == msg_bufno);
257 
258 			spin_unlock(&channel->wr_spinlock);
259 
260 			wake_up_interruptible(&channel->wr_wait);
261 
262 			break;
263 		case XILLYMSG_OPCODE_FATAL_ERROR:
264 			ep->fatal_error = 1;
265 			wake_up_interruptible(&ep->ep_wait); /* For select() */
266 			dev_err(ep->dev,
267 				"FPGA reported a fatal error. This means that the low-level communication with the device has failed. This hardware problem is most likely unrelated to Xillybus (neither kernel module nor FPGA core), but reports are still welcome. All I/O is aborted.\n");
268 			break;
269 		default:
270 			malformed_message(ep, &buf[i]);
271 			break;
272 		}
273 	}
274 
275 	dma_sync_single_for_device(ep->dev, ep->msgbuf_dma_addr,
276 				   ep->msg_buf_size, DMA_FROM_DEVICE);
277 
278 	ep->msg_counter = (ep->msg_counter + 1) & 0xf;
279 	ep->failed_messages = 0;
280 	iowrite32(0x03, ep->registers + fpga_msg_ctrl_reg); /* Message ACK */
281 
282 	return IRQ_HANDLED;
283 }
284 EXPORT_SYMBOL(xillybus_isr);
285 
286 /*
287  * A few trivial memory management functions.
288  * NOTE: These functions are used only on probe and remove, and therefore
289  * no locks are applied!
290  */
291 
292 static void xillybus_autoflush(struct work_struct *work);
293 
294 struct xilly_alloc_state {
295 	void *salami;
296 	int left_of_salami;
297 	int nbuffer;
298 	enum dma_data_direction direction;
299 	u32 regdirection;
300 };
301 
302 static void xilly_unmap(void *ptr)
303 {
304 	struct xilly_mapping *data = ptr;
305 
306 	dma_unmap_single(data->device, data->dma_addr,
307 			 data->size, data->direction);
308 
309 	kfree(ptr);
310 }
311 
312 static int xilly_map_single(struct xilly_endpoint *ep,
313 			    void *ptr,
314 			    size_t size,
315 			    int direction,
316 			    dma_addr_t *ret_dma_handle
317 	)
318 {
319 	dma_addr_t addr;
320 	struct xilly_mapping *this;
321 
322 	this = kzalloc(sizeof(*this), GFP_KERNEL);
323 	if (!this)
324 		return -ENOMEM;
325 
326 	addr = dma_map_single(ep->dev, ptr, size, direction);
327 
328 	if (dma_mapping_error(ep->dev, addr)) {
329 		kfree(this);
330 		return -ENODEV;
331 	}
332 
333 	this->device = ep->dev;
334 	this->dma_addr = addr;
335 	this->size = size;
336 	this->direction = direction;
337 
338 	*ret_dma_handle = addr;
339 
340 	return devm_add_action_or_reset(ep->dev, xilly_unmap, this);
341 }
342 
343 static int xilly_get_dma_buffers(struct xilly_endpoint *ep,
344 				 struct xilly_alloc_state *s,
345 				 struct xilly_buffer **buffers,
346 				 int bufnum, int bytebufsize)
347 {
348 	int i, rc;
349 	dma_addr_t dma_addr;
350 	struct device *dev = ep->dev;
351 	struct xilly_buffer *this_buffer = NULL; /* Init to silence warning */
352 
353 	if (buffers) { /* Not the message buffer */
354 		this_buffer = devm_kcalloc(dev, bufnum,
355 					   sizeof(struct xilly_buffer),
356 					   GFP_KERNEL);
357 		if (!this_buffer)
358 			return -ENOMEM;
359 	}
360 
361 	for (i = 0; i < bufnum; i++) {
362 		/*
363 		 * Buffers are expected in descending size order, so there
364 		 * is either enough space for this buffer or none at all.
365 		 */
366 
367 		if ((s->left_of_salami < bytebufsize) &&
368 		    (s->left_of_salami > 0)) {
369 			dev_err(ep->dev,
370 				"Corrupt buffer allocation in IDT. Aborting.\n");
371 			return -ENODEV;
372 		}
373 
374 		if (s->left_of_salami == 0) {
375 			int allocorder, allocsize;
376 
377 			allocsize = PAGE_SIZE;
378 			allocorder = 0;
379 			while (bytebufsize > allocsize) {
380 				allocsize *= 2;
381 				allocorder++;
382 			}
383 
384 			s->salami = (void *) devm_get_free_pages(
385 				dev,
386 				GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO,
387 				allocorder);
388 			if (!s->salami)
389 				return -ENOMEM;
390 
391 			s->left_of_salami = allocsize;
392 		}
393 
394 		rc = xilly_map_single(ep, s->salami,
395 				      bytebufsize, s->direction,
396 				      &dma_addr);
397 		if (rc)
398 			return rc;
399 
400 		iowrite32((u32) (dma_addr & 0xffffffff),
401 			  ep->registers + fpga_dma_bufaddr_lowaddr_reg);
402 		iowrite32(((u32) ((((u64) dma_addr) >> 32) & 0xffffffff)),
403 			  ep->registers + fpga_dma_bufaddr_highaddr_reg);
404 
405 		if (buffers) { /* Not the message buffer */
406 			this_buffer->addr = s->salami;
407 			this_buffer->dma_addr = dma_addr;
408 			buffers[i] = this_buffer++;
409 
410 			iowrite32(s->regdirection | s->nbuffer++,
411 				  ep->registers + fpga_dma_bufno_reg);
412 		} else {
413 			ep->msgbuf_addr = s->salami;
414 			ep->msgbuf_dma_addr = dma_addr;
415 			ep->msg_buf_size = bytebufsize;
416 
417 			iowrite32(s->regdirection,
418 				  ep->registers + fpga_dma_bufno_reg);
419 		}
420 
421 		s->left_of_salami -= bytebufsize;
422 		s->salami += bytebufsize;
423 	}
424 	return 0;
425 }
426 
427 static int xilly_setupchannels(struct xilly_endpoint *ep,
428 			       unsigned char *chandesc,
429 			       int entries)
430 {
431 	struct device *dev = ep->dev;
432 	int i, entry, rc;
433 	struct xilly_channel *channel;
434 	int channelnum, bufnum, bufsize, format, is_writebuf;
435 	int bytebufsize;
436 	int synchronous, allowpartial, exclusive_open, seekable;
437 	int supports_nonempty;
438 	int msg_buf_done = 0;
439 
440 	struct xilly_alloc_state rd_alloc = {
441 		.salami = NULL,
442 		.left_of_salami = 0,
443 		.nbuffer = 1,
444 		.direction = DMA_TO_DEVICE,
445 		.regdirection = 0,
446 	};
447 
448 	struct xilly_alloc_state wr_alloc = {
449 		.salami = NULL,
450 		.left_of_salami = 0,
451 		.nbuffer = 1,
452 		.direction = DMA_FROM_DEVICE,
453 		.regdirection = 0x80000000,
454 	};
455 
456 	channel = devm_kcalloc(dev, ep->num_channels,
457 			       sizeof(struct xilly_channel), GFP_KERNEL);
458 	if (!channel)
459 		return -ENOMEM;
460 
461 	ep->channels = devm_kcalloc(dev, ep->num_channels + 1,
462 				    sizeof(struct xilly_channel *),
463 				    GFP_KERNEL);
464 	if (!ep->channels)
465 		return -ENOMEM;
466 
467 	ep->channels[0] = NULL; /* Channel 0 is message buf. */
468 
469 	/* Initialize all channels with defaults */
470 
471 	for (i = 1; i <= ep->num_channels; i++) {
472 		channel->wr_buffers = NULL;
473 		channel->rd_buffers = NULL;
474 		channel->num_wr_buffers = 0;
475 		channel->num_rd_buffers = 0;
476 		channel->wr_fpga_buf_idx = -1;
477 		channel->wr_host_buf_idx = 0;
478 		channel->wr_host_buf_pos = 0;
479 		channel->wr_empty = 1;
480 		channel->wr_ready = 0;
481 		channel->wr_sleepy = 1;
482 		channel->rd_fpga_buf_idx = 0;
483 		channel->rd_host_buf_idx = 0;
484 		channel->rd_host_buf_pos = 0;
485 		channel->rd_full = 0;
486 		channel->wr_ref_count = 0;
487 		channel->rd_ref_count = 0;
488 
489 		spin_lock_init(&channel->wr_spinlock);
490 		spin_lock_init(&channel->rd_spinlock);
491 		mutex_init(&channel->wr_mutex);
492 		mutex_init(&channel->rd_mutex);
493 		init_waitqueue_head(&channel->rd_wait);
494 		init_waitqueue_head(&channel->wr_wait);
495 		init_waitqueue_head(&channel->wr_ready_wait);
496 
497 		INIT_DELAYED_WORK(&channel->rd_workitem, xillybus_autoflush);
498 
499 		channel->endpoint = ep;
500 		channel->chan_num = i;
501 
502 		channel->log2_element_size = 0;
503 
504 		ep->channels[i] = channel++;
505 	}
506 
507 	for (entry = 0; entry < entries; entry++, chandesc += 4) {
508 		struct xilly_buffer **buffers = NULL;
509 
510 		is_writebuf = chandesc[0] & 0x01;
511 		channelnum = (chandesc[0] >> 1) | ((chandesc[1] & 0x0f) << 7);
512 		format = (chandesc[1] >> 4) & 0x03;
513 		allowpartial = (chandesc[1] >> 6) & 0x01;
514 		synchronous = (chandesc[1] >> 7) & 0x01;
515 		bufsize = 1 << (chandesc[2] & 0x1f);
516 		bufnum = 1 << (chandesc[3] & 0x0f);
517 		exclusive_open = (chandesc[2] >> 7) & 0x01;
518 		seekable = (chandesc[2] >> 6) & 0x01;
519 		supports_nonempty = (chandesc[2] >> 5) & 0x01;
520 
521 		if ((channelnum > ep->num_channels) ||
522 		    ((channelnum == 0) && !is_writebuf)) {
523 			dev_err(ep->dev,
524 				"IDT requests channel out of range. Aborting.\n");
525 			return -ENODEV;
526 		}
527 
528 		channel = ep->channels[channelnum]; /* NULL for msg channel */
529 
530 		if (!is_writebuf || channelnum > 0) {
531 			channel->log2_element_size = ((format > 2) ?
532 						      2 : format);
533 
534 			bytebufsize = bufsize *
535 				(1 << channel->log2_element_size);
536 
537 			buffers = devm_kcalloc(dev, bufnum,
538 					       sizeof(struct xilly_buffer *),
539 					       GFP_KERNEL);
540 			if (!buffers)
541 				return -ENOMEM;
542 		} else {
543 			bytebufsize = bufsize << 2;
544 		}
545 
546 		if (!is_writebuf) {
547 			channel->num_rd_buffers = bufnum;
548 			channel->rd_buf_size = bytebufsize;
549 			channel->rd_allow_partial = allowpartial;
550 			channel->rd_synchronous = synchronous;
551 			channel->rd_exclusive_open = exclusive_open;
552 			channel->seekable = seekable;
553 
554 			channel->rd_buffers = buffers;
555 			rc = xilly_get_dma_buffers(ep, &rd_alloc, buffers,
556 						   bufnum, bytebufsize);
557 		} else if (channelnum > 0) {
558 			channel->num_wr_buffers = bufnum;
559 			channel->wr_buf_size = bytebufsize;
560 
561 			channel->seekable = seekable;
562 			channel->wr_supports_nonempty = supports_nonempty;
563 
564 			channel->wr_allow_partial = allowpartial;
565 			channel->wr_synchronous = synchronous;
566 			channel->wr_exclusive_open = exclusive_open;
567 
568 			channel->wr_buffers = buffers;
569 			rc = xilly_get_dma_buffers(ep, &wr_alloc, buffers,
570 						   bufnum, bytebufsize);
571 		} else {
572 			rc = xilly_get_dma_buffers(ep, &wr_alloc, NULL,
573 						   bufnum, bytebufsize);
574 			msg_buf_done++;
575 		}
576 
577 		if (rc)
578 			return -ENOMEM;
579 	}
580 
581 	if (!msg_buf_done) {
582 		dev_err(ep->dev,
583 			"Corrupt IDT: No message buffer. Aborting.\n");
584 		return -ENODEV;
585 	}
586 	return 0;
587 }
588 
589 static int xilly_scan_idt(struct xilly_endpoint *endpoint,
590 			  struct xilly_idt_handle *idt_handle)
591 {
592 	int count = 0;
593 	unsigned char *idt = endpoint->channels[1]->wr_buffers[0]->addr;
594 	unsigned char *end_of_idt = idt + endpoint->idtlen - 4;
595 	unsigned char *scan;
596 	int len;
597 
598 	scan = idt + 1;
599 	idt_handle->names = scan;
600 
601 	while ((scan <= end_of_idt) && *scan) {
602 		while ((scan <= end_of_idt) && *scan++)
603 			/* Do nothing, just scan thru string */;
604 		count++;
605 	}
606 
607 	idt_handle->names_len = scan - idt_handle->names;
608 
609 	scan++;
610 
611 	if (scan > end_of_idt) {
612 		dev_err(endpoint->dev,
613 			"IDT device name list overflow. Aborting.\n");
614 		return -ENODEV;
615 	}
616 	idt_handle->chandesc = scan;
617 
618 	len = endpoint->idtlen - (3 + ((int) (scan - idt)));
619 
620 	if (len & 0x03) {
621 		dev_err(endpoint->dev,
622 			"Corrupt IDT device name list. Aborting.\n");
623 		return -ENODEV;
624 	}
625 
626 	idt_handle->entries = len >> 2;
627 	endpoint->num_channels = count;
628 
629 	return 0;
630 }
631 
632 static int xilly_obtain_idt(struct xilly_endpoint *endpoint)
633 {
634 	struct xilly_channel *channel;
635 	unsigned char *version;
636 	long t;
637 
638 	channel = endpoint->channels[1]; /* This should be generated ad-hoc */
639 
640 	channel->wr_sleepy = 1;
641 
642 	iowrite32(1 |
643 		  (3 << 24), /* Opcode 3 for channel 0 = Send IDT */
644 		  endpoint->registers + fpga_buf_ctrl_reg);
645 
646 	t = wait_event_interruptible_timeout(channel->wr_wait,
647 					     (!channel->wr_sleepy),
648 					     XILLY_TIMEOUT);
649 
650 	if (t <= 0) {
651 		dev_err(endpoint->dev, "Failed to obtain IDT. Aborting.\n");
652 
653 		if (endpoint->fatal_error)
654 			return -EIO;
655 
656 		return -ENODEV;
657 	}
658 
659 	dma_sync_single_for_cpu(channel->endpoint->dev,
660 				channel->wr_buffers[0]->dma_addr,
661 				channel->wr_buf_size,
662 				DMA_FROM_DEVICE);
663 
664 	if (channel->wr_buffers[0]->end_offset != endpoint->idtlen) {
665 		dev_err(endpoint->dev,
666 			"IDT length mismatch (%d != %d). Aborting.\n",
667 			channel->wr_buffers[0]->end_offset, endpoint->idtlen);
668 		return -ENODEV;
669 	}
670 
671 	if (crc32_le(~0, channel->wr_buffers[0]->addr,
672 		     endpoint->idtlen+1) != 0) {
673 		dev_err(endpoint->dev, "IDT failed CRC check. Aborting.\n");
674 		return -ENODEV;
675 	}
676 
677 	version = channel->wr_buffers[0]->addr;
678 
679 	/* Check version number. Reject anything above 0x82. */
680 	if (*version > 0x82) {
681 		dev_err(endpoint->dev,
682 			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
683 			*version);
684 		return -ENODEV;
685 	}
686 
687 	return 0;
688 }
689 
690 static ssize_t xillybus_read(struct file *filp, char __user *userbuf,
691 			     size_t count, loff_t *f_pos)
692 {
693 	ssize_t rc;
694 	unsigned long flags;
695 	int bytes_done = 0;
696 	int no_time_left = 0;
697 	long deadline, left_to_sleep;
698 	struct xilly_channel *channel = filp->private_data;
699 
700 	int empty, reached_eof, exhausted, ready;
701 	/* Initializations are there only to silence warnings */
702 
703 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
704 	int waiting_bufidx;
705 
706 	if (channel->endpoint->fatal_error)
707 		return -EIO;
708 
709 	deadline = jiffies + 1 + XILLY_RX_TIMEOUT;
710 
711 	rc = mutex_lock_interruptible(&channel->wr_mutex);
712 	if (rc)
713 		return rc;
714 
715 	while (1) { /* Note that we may drop mutex within this loop */
716 		int bytes_to_do = count - bytes_done;
717 
718 		spin_lock_irqsave(&channel->wr_spinlock, flags);
719 
720 		empty = channel->wr_empty;
721 		ready = !empty || channel->wr_ready;
722 
723 		if (!empty) {
724 			bufidx = channel->wr_host_buf_idx;
725 			bufpos = channel->wr_host_buf_pos;
726 			howmany = ((channel->wr_buffers[bufidx]->end_offset
727 				    + 1) << channel->log2_element_size)
728 				- bufpos;
729 
730 			/* Update wr_host_* to its post-operation state */
731 			if (howmany > bytes_to_do) {
732 				bufferdone = 0;
733 
734 				howmany = bytes_to_do;
735 				channel->wr_host_buf_pos += howmany;
736 			} else {
737 				bufferdone = 1;
738 
739 				channel->wr_host_buf_pos = 0;
740 
741 				if (bufidx == channel->wr_fpga_buf_idx) {
742 					channel->wr_empty = 1;
743 					channel->wr_sleepy = 1;
744 					channel->wr_ready = 0;
745 				}
746 
747 				if (bufidx >= (channel->num_wr_buffers - 1))
748 					channel->wr_host_buf_idx = 0;
749 				else
750 					channel->wr_host_buf_idx++;
751 			}
752 		}
753 
754 		/*
755 		 * Marking our situation after the possible changes above,
756 		 * for use after releasing the spinlock.
757 		 *
758 		 * empty = empty before change
759 		 * exhasted = empty after possible change
760 		 */
761 
762 		reached_eof = channel->wr_empty &&
763 			(channel->wr_host_buf_idx == channel->wr_eof);
764 		channel->wr_hangup = reached_eof;
765 		exhausted = channel->wr_empty;
766 		waiting_bufidx = channel->wr_host_buf_idx;
767 
768 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
769 
770 		if (!empty) { /* Go on, now without the spinlock */
771 
772 			if (bufpos == 0) /* Position zero means it's virgin */
773 				dma_sync_single_for_cpu(channel->endpoint->dev,
774 							channel->wr_buffers[bufidx]->dma_addr,
775 							channel->wr_buf_size,
776 							DMA_FROM_DEVICE);
777 
778 			if (copy_to_user(
779 				    userbuf,
780 				    channel->wr_buffers[bufidx]->addr
781 				    + bufpos, howmany))
782 				rc = -EFAULT;
783 
784 			userbuf += howmany;
785 			bytes_done += howmany;
786 
787 			if (bufferdone) {
788 				dma_sync_single_for_device(channel->endpoint->dev,
789 							   channel->wr_buffers[bufidx]->dma_addr,
790 							   channel->wr_buf_size,
791 							   DMA_FROM_DEVICE);
792 
793 				/*
794 				 * Tell FPGA the buffer is done with. It's an
795 				 * atomic operation to the FPGA, so what
796 				 * happens with other channels doesn't matter,
797 				 * and the certain channel is protected with
798 				 * the channel-specific mutex.
799 				 */
800 
801 				iowrite32(1 | (channel->chan_num << 1) |
802 					  (bufidx << 12),
803 					  channel->endpoint->registers +
804 					  fpga_buf_ctrl_reg);
805 			}
806 
807 			if (rc) {
808 				mutex_unlock(&channel->wr_mutex);
809 				return rc;
810 			}
811 		}
812 
813 		/* This includes a zero-count return = EOF */
814 		if ((bytes_done >= count) || reached_eof)
815 			break;
816 
817 		if (!exhausted)
818 			continue; /* More in RAM buffer(s)? Just go on. */
819 
820 		if ((bytes_done > 0) &&
821 		    (no_time_left ||
822 		     (channel->wr_synchronous && channel->wr_allow_partial)))
823 			break;
824 
825 		/*
826 		 * Nonblocking read: The "ready" flag tells us that the FPGA
827 		 * has data to send. In non-blocking mode, if it isn't on,
828 		 * just return. But if there is, we jump directly to the point
829 		 * where we ask for the FPGA to send all it has, and wait
830 		 * until that data arrives. So in a sense, we *do* block in
831 		 * nonblocking mode, but only for a very short time.
832 		 */
833 
834 		if (!no_time_left && (filp->f_flags & O_NONBLOCK)) {
835 			if (bytes_done > 0)
836 				break;
837 
838 			if (ready)
839 				goto desperate;
840 
841 			rc = -EAGAIN;
842 			break;
843 		}
844 
845 		if (!no_time_left || (bytes_done > 0)) {
846 			/*
847 			 * Note that in case of an element-misaligned read
848 			 * request, offsetlimit will include the last element,
849 			 * which will be partially read from.
850 			 */
851 			int offsetlimit = ((count - bytes_done) - 1) >>
852 				channel->log2_element_size;
853 			int buf_elements = channel->wr_buf_size >>
854 				channel->log2_element_size;
855 
856 			/*
857 			 * In synchronous mode, always send an offset limit.
858 			 * Just don't send a value too big.
859 			 */
860 
861 			if (channel->wr_synchronous) {
862 				/* Don't request more than one buffer */
863 				if (channel->wr_allow_partial &&
864 				    (offsetlimit >= buf_elements))
865 					offsetlimit = buf_elements - 1;
866 
867 				/* Don't request more than all buffers */
868 				if (!channel->wr_allow_partial &&
869 				    (offsetlimit >=
870 				     (buf_elements * channel->num_wr_buffers)))
871 					offsetlimit = buf_elements *
872 						channel->num_wr_buffers - 1;
873 			}
874 
875 			/*
876 			 * In asynchronous mode, force early flush of a buffer
877 			 * only if that will allow returning a full count. The
878 			 * "offsetlimit < ( ... )" rather than "<=" excludes
879 			 * requesting a full buffer, which would obviously
880 			 * cause a buffer transmission anyhow
881 			 */
882 
883 			if (channel->wr_synchronous ||
884 			    (offsetlimit < (buf_elements - 1))) {
885 				mutex_lock(&channel->endpoint->register_mutex);
886 
887 				iowrite32(offsetlimit,
888 					  channel->endpoint->registers +
889 					  fpga_buf_offset_reg);
890 
891 				iowrite32(1 | (channel->chan_num << 1) |
892 					  (2 << 24) |  /* 2 = offset limit */
893 					  (waiting_bufidx << 12),
894 					  channel->endpoint->registers +
895 					  fpga_buf_ctrl_reg);
896 
897 				mutex_unlock(&channel->endpoint->
898 					     register_mutex);
899 			}
900 		}
901 
902 		/*
903 		 * If partial completion is disallowed, there is no point in
904 		 * timeout sleeping. Neither if no_time_left is set and
905 		 * there's no data.
906 		 */
907 
908 		if (!channel->wr_allow_partial ||
909 		    (no_time_left && (bytes_done == 0))) {
910 			/*
911 			 * This do-loop will run more than once if another
912 			 * thread reasserted wr_sleepy before we got the mutex
913 			 * back, so we try again.
914 			 */
915 
916 			do {
917 				mutex_unlock(&channel->wr_mutex);
918 
919 				if (wait_event_interruptible(
920 					    channel->wr_wait,
921 					    (!channel->wr_sleepy)))
922 					goto interrupted;
923 
924 				if (mutex_lock_interruptible(
925 					    &channel->wr_mutex))
926 					goto interrupted;
927 			} while (channel->wr_sleepy);
928 
929 			continue;
930 
931 interrupted: /* Mutex is not held if got here */
932 			if (channel->endpoint->fatal_error)
933 				return -EIO;
934 			if (bytes_done)
935 				return bytes_done;
936 			if (filp->f_flags & O_NONBLOCK)
937 				return -EAGAIN; /* Don't admit snoozing */
938 			return -EINTR;
939 		}
940 
941 		left_to_sleep = deadline - ((long) jiffies);
942 
943 		/*
944 		 * If our time is out, skip the waiting. We may miss wr_sleepy
945 		 * being deasserted but hey, almost missing the train is like
946 		 * missing it.
947 		 */
948 
949 		if (left_to_sleep > 0) {
950 			left_to_sleep =
951 				wait_event_interruptible_timeout(
952 					channel->wr_wait,
953 					(!channel->wr_sleepy),
954 					left_to_sleep);
955 
956 			if (left_to_sleep > 0) /* wr_sleepy deasserted */
957 				continue;
958 
959 			if (left_to_sleep < 0) { /* Interrupt */
960 				mutex_unlock(&channel->wr_mutex);
961 				if (channel->endpoint->fatal_error)
962 					return -EIO;
963 				if (bytes_done)
964 					return bytes_done;
965 				return -EINTR;
966 			}
967 		}
968 
969 desperate:
970 		no_time_left = 1; /* We're out of sleeping time. Desperate! */
971 
972 		if (bytes_done == 0) {
973 			/*
974 			 * Reaching here means that we allow partial return,
975 			 * that we've run out of time, and that we have
976 			 * nothing to return.
977 			 * So tell the FPGA to send anything it has or gets.
978 			 */
979 
980 			iowrite32(1 | (channel->chan_num << 1) |
981 				  (3 << 24) |  /* Opcode 3, flush it all! */
982 				  (waiting_bufidx << 12),
983 				  channel->endpoint->registers +
984 				  fpga_buf_ctrl_reg);
985 		}
986 
987 		/*
988 		 * Reaching here means that we *do* have data in the buffer,
989 		 * but the "partial" flag disallows returning less than
990 		 * required. And we don't have as much. So loop again,
991 		 * which is likely to end up blocking indefinitely until
992 		 * enough data has arrived.
993 		 */
994 	}
995 
996 	mutex_unlock(&channel->wr_mutex);
997 
998 	if (channel->endpoint->fatal_error)
999 		return -EIO;
1000 
1001 	if (rc)
1002 		return rc;
1003 
1004 	return bytes_done;
1005 }
1006 
1007 /*
1008  * The timeout argument takes values as follows:
1009  *  >0 : Flush with timeout
1010  * ==0 : Flush, and wait idefinitely for the flush to complete
1011  *  <0 : Autoflush: Flush only if there's a single buffer occupied
1012  */
1013 
1014 static int xillybus_myflush(struct xilly_channel *channel, long timeout)
1015 {
1016 	int rc;
1017 	unsigned long flags;
1018 
1019 	int end_offset_plus1;
1020 	int bufidx, bufidx_minus1;
1021 	int i;
1022 	int empty;
1023 	int new_rd_host_buf_pos;
1024 
1025 	if (channel->endpoint->fatal_error)
1026 		return -EIO;
1027 	rc = mutex_lock_interruptible(&channel->rd_mutex);
1028 	if (rc)
1029 		return rc;
1030 
1031 	/*
1032 	 * Don't flush a closed channel. This can happen when the work queued
1033 	 * autoflush thread fires off after the file has closed. This is not
1034 	 * an error, just something to dismiss.
1035 	 */
1036 
1037 	if (!channel->rd_ref_count)
1038 		goto done;
1039 
1040 	bufidx = channel->rd_host_buf_idx;
1041 
1042 	bufidx_minus1 = (bufidx == 0) ?
1043 		channel->num_rd_buffers - 1 :
1044 		bufidx - 1;
1045 
1046 	end_offset_plus1 = channel->rd_host_buf_pos >>
1047 		channel->log2_element_size;
1048 
1049 	new_rd_host_buf_pos = channel->rd_host_buf_pos -
1050 		(end_offset_plus1 << channel->log2_element_size);
1051 
1052 	/* Submit the current buffer if it's nonempty */
1053 	if (end_offset_plus1) {
1054 		unsigned char *tail = channel->rd_buffers[bufidx]->addr +
1055 			(end_offset_plus1 << channel->log2_element_size);
1056 
1057 		/* Copy  unflushed data, so we can put it in next buffer */
1058 		for (i = 0; i < new_rd_host_buf_pos; i++)
1059 			channel->rd_leftovers[i] = *tail++;
1060 
1061 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1062 
1063 		/* Autoflush only if a single buffer is occupied */
1064 
1065 		if ((timeout < 0) &&
1066 		    (channel->rd_full ||
1067 		     (bufidx_minus1 != channel->rd_fpga_buf_idx))) {
1068 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1069 			/*
1070 			 * A new work item may be queued by the ISR exactly
1071 			 * now, since the execution of a work item allows the
1072 			 * queuing of a new one while it's running.
1073 			 */
1074 			goto done;
1075 		}
1076 
1077 		/* The 4th element is never needed for data, so it's a flag */
1078 		channel->rd_leftovers[3] = (new_rd_host_buf_pos != 0);
1079 
1080 		/* Set up rd_full to reflect a certain moment's state */
1081 
1082 		if (bufidx == channel->rd_fpga_buf_idx)
1083 			channel->rd_full = 1;
1084 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1085 
1086 		if (bufidx >= (channel->num_rd_buffers - 1))
1087 			channel->rd_host_buf_idx = 0;
1088 		else
1089 			channel->rd_host_buf_idx++;
1090 
1091 		dma_sync_single_for_device(channel->endpoint->dev,
1092 					   channel->rd_buffers[bufidx]->dma_addr,
1093 					   channel->rd_buf_size,
1094 					   DMA_TO_DEVICE);
1095 
1096 		mutex_lock(&channel->endpoint->register_mutex);
1097 
1098 		iowrite32(end_offset_plus1 - 1,
1099 			  channel->endpoint->registers + fpga_buf_offset_reg);
1100 
1101 		iowrite32((channel->chan_num << 1) | /* Channel ID */
1102 			  (2 << 24) |  /* Opcode 2, submit buffer */
1103 			  (bufidx << 12),
1104 			  channel->endpoint->registers + fpga_buf_ctrl_reg);
1105 
1106 		mutex_unlock(&channel->endpoint->register_mutex);
1107 	} else if (bufidx == 0) {
1108 		bufidx = channel->num_rd_buffers - 1;
1109 	} else {
1110 		bufidx--;
1111 	}
1112 
1113 	channel->rd_host_buf_pos = new_rd_host_buf_pos;
1114 
1115 	if (timeout < 0)
1116 		goto done; /* Autoflush */
1117 
1118 	/*
1119 	 * bufidx is now the last buffer written to (or equal to
1120 	 * rd_fpga_buf_idx if buffer was never written to), and
1121 	 * channel->rd_host_buf_idx the one after it.
1122 	 *
1123 	 * If bufidx == channel->rd_fpga_buf_idx we're either empty or full.
1124 	 */
1125 
1126 	while (1) { /* Loop waiting for draining of buffers */
1127 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1128 
1129 		if (bufidx != channel->rd_fpga_buf_idx)
1130 			channel->rd_full = 1; /*
1131 					       * Not really full,
1132 					       * but needs waiting.
1133 					       */
1134 
1135 		empty = !channel->rd_full;
1136 
1137 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1138 
1139 		if (empty)
1140 			break;
1141 
1142 		/*
1143 		 * Indefinite sleep with mutex taken. With data waiting for
1144 		 * flushing user should not be surprised if open() for write
1145 		 * sleeps.
1146 		 */
1147 		if (timeout == 0)
1148 			wait_event_interruptible(channel->rd_wait,
1149 						 (!channel->rd_full));
1150 
1151 		else if (wait_event_interruptible_timeout(
1152 				 channel->rd_wait,
1153 				 (!channel->rd_full),
1154 				 timeout) == 0) {
1155 			dev_warn(channel->endpoint->dev,
1156 				 "Timed out while flushing. Output data may be lost.\n");
1157 
1158 			rc = -ETIMEDOUT;
1159 			break;
1160 		}
1161 
1162 		if (channel->rd_full) {
1163 			rc = -EINTR;
1164 			break;
1165 		}
1166 	}
1167 
1168 done:
1169 	mutex_unlock(&channel->rd_mutex);
1170 
1171 	if (channel->endpoint->fatal_error)
1172 		return -EIO;
1173 
1174 	return rc;
1175 }
1176 
1177 static int xillybus_flush(struct file *filp, fl_owner_t id)
1178 {
1179 	if (!(filp->f_mode & FMODE_WRITE))
1180 		return 0;
1181 
1182 	return xillybus_myflush(filp->private_data, HZ); /* 1 second timeout */
1183 }
1184 
1185 static void xillybus_autoflush(struct work_struct *work)
1186 {
1187 	struct delayed_work *workitem = container_of(
1188 		work, struct delayed_work, work);
1189 	struct xilly_channel *channel = container_of(
1190 		workitem, struct xilly_channel, rd_workitem);
1191 	int rc;
1192 
1193 	rc = xillybus_myflush(channel, -1);
1194 	if (rc == -EINTR)
1195 		dev_warn(channel->endpoint->dev,
1196 			 "Autoflush failed because work queue thread got a signal.\n");
1197 	else if (rc)
1198 		dev_err(channel->endpoint->dev,
1199 			"Autoflush failed under weird circumstances.\n");
1200 }
1201 
1202 static ssize_t xillybus_write(struct file *filp, const char __user *userbuf,
1203 			      size_t count, loff_t *f_pos)
1204 {
1205 	ssize_t rc;
1206 	unsigned long flags;
1207 	int bytes_done = 0;
1208 	struct xilly_channel *channel = filp->private_data;
1209 
1210 	int full, exhausted;
1211 	/* Initializations are there only to silence warnings */
1212 
1213 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
1214 	int end_offset_plus1 = 0;
1215 
1216 	if (channel->endpoint->fatal_error)
1217 		return -EIO;
1218 
1219 	rc = mutex_lock_interruptible(&channel->rd_mutex);
1220 	if (rc)
1221 		return rc;
1222 
1223 	while (1) {
1224 		int bytes_to_do = count - bytes_done;
1225 
1226 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1227 
1228 		full = channel->rd_full;
1229 
1230 		if (!full) {
1231 			bufidx = channel->rd_host_buf_idx;
1232 			bufpos = channel->rd_host_buf_pos;
1233 			howmany = channel->rd_buf_size - bufpos;
1234 
1235 			/*
1236 			 * Update rd_host_* to its state after this operation.
1237 			 * count=0 means committing the buffer immediately,
1238 			 * which is like flushing, but not necessarily block.
1239 			 */
1240 
1241 			if ((howmany > bytes_to_do) &&
1242 			    (count ||
1243 			     ((bufpos >> channel->log2_element_size) == 0))) {
1244 				bufferdone = 0;
1245 
1246 				howmany = bytes_to_do;
1247 				channel->rd_host_buf_pos += howmany;
1248 			} else {
1249 				bufferdone = 1;
1250 
1251 				if (count) {
1252 					end_offset_plus1 =
1253 						channel->rd_buf_size >>
1254 						channel->log2_element_size;
1255 					channel->rd_host_buf_pos = 0;
1256 				} else {
1257 					unsigned char *tail;
1258 					int i;
1259 
1260 					howmany = 0;
1261 
1262 					end_offset_plus1 = bufpos >>
1263 						channel->log2_element_size;
1264 
1265 					channel->rd_host_buf_pos -=
1266 						end_offset_plus1 <<
1267 						channel->log2_element_size;
1268 
1269 					tail = channel->
1270 						rd_buffers[bufidx]->addr +
1271 						(end_offset_plus1 <<
1272 						 channel->log2_element_size);
1273 
1274 					for (i = 0;
1275 					     i < channel->rd_host_buf_pos;
1276 					     i++)
1277 						channel->rd_leftovers[i] =
1278 							*tail++;
1279 				}
1280 
1281 				if (bufidx == channel->rd_fpga_buf_idx)
1282 					channel->rd_full = 1;
1283 
1284 				if (bufidx >= (channel->num_rd_buffers - 1))
1285 					channel->rd_host_buf_idx = 0;
1286 				else
1287 					channel->rd_host_buf_idx++;
1288 			}
1289 		}
1290 
1291 		/*
1292 		 * Marking our situation after the possible changes above,
1293 		 * for use  after releasing the spinlock.
1294 		 *
1295 		 * full = full before change
1296 		 * exhasted = full after possible change
1297 		 */
1298 
1299 		exhausted = channel->rd_full;
1300 
1301 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1302 
1303 		if (!full) { /* Go on, now without the spinlock */
1304 			unsigned char *head =
1305 				channel->rd_buffers[bufidx]->addr;
1306 			int i;
1307 
1308 			if ((bufpos == 0) || /* Zero means it's virgin */
1309 			    (channel->rd_leftovers[3] != 0)) {
1310 				dma_sync_single_for_cpu(channel->endpoint->dev,
1311 							channel->rd_buffers[bufidx]->dma_addr,
1312 							channel->rd_buf_size,
1313 							DMA_TO_DEVICE);
1314 
1315 				/* Virgin, but leftovers are due */
1316 				for (i = 0; i < bufpos; i++)
1317 					*head++ = channel->rd_leftovers[i];
1318 
1319 				channel->rd_leftovers[3] = 0; /* Clear flag */
1320 			}
1321 
1322 			if (copy_from_user(
1323 				    channel->rd_buffers[bufidx]->addr + bufpos,
1324 				    userbuf, howmany))
1325 				rc = -EFAULT;
1326 
1327 			userbuf += howmany;
1328 			bytes_done += howmany;
1329 
1330 			if (bufferdone) {
1331 				dma_sync_single_for_device(channel->endpoint->dev,
1332 							   channel->rd_buffers[bufidx]->dma_addr,
1333 							   channel->rd_buf_size,
1334 							   DMA_TO_DEVICE);
1335 
1336 				mutex_lock(&channel->endpoint->register_mutex);
1337 
1338 				iowrite32(end_offset_plus1 - 1,
1339 					  channel->endpoint->registers +
1340 					  fpga_buf_offset_reg);
1341 
1342 				iowrite32((channel->chan_num << 1) |
1343 					  (2 << 24) |  /* 2 = submit buffer */
1344 					  (bufidx << 12),
1345 					  channel->endpoint->registers +
1346 					  fpga_buf_ctrl_reg);
1347 
1348 				mutex_unlock(&channel->endpoint->
1349 					     register_mutex);
1350 
1351 				channel->rd_leftovers[3] =
1352 					(channel->rd_host_buf_pos != 0);
1353 			}
1354 
1355 			if (rc) {
1356 				mutex_unlock(&channel->rd_mutex);
1357 
1358 				if (channel->endpoint->fatal_error)
1359 					return -EIO;
1360 
1361 				if (!channel->rd_synchronous)
1362 					queue_delayed_work(
1363 						xillybus_wq,
1364 						&channel->rd_workitem,
1365 						XILLY_RX_TIMEOUT);
1366 
1367 				return rc;
1368 			}
1369 		}
1370 
1371 		if (bytes_done >= count)
1372 			break;
1373 
1374 		if (!exhausted)
1375 			continue; /* If there's more space, just go on */
1376 
1377 		if ((bytes_done > 0) && channel->rd_allow_partial)
1378 			break;
1379 
1380 		/*
1381 		 * Indefinite sleep with mutex taken. With data waiting for
1382 		 * flushing, user should not be surprised if open() for write
1383 		 * sleeps.
1384 		 */
1385 
1386 		if (filp->f_flags & O_NONBLOCK) {
1387 			rc = -EAGAIN;
1388 			break;
1389 		}
1390 
1391 		if (wait_event_interruptible(channel->rd_wait,
1392 					     (!channel->rd_full))) {
1393 			mutex_unlock(&channel->rd_mutex);
1394 
1395 			if (channel->endpoint->fatal_error)
1396 				return -EIO;
1397 
1398 			if (bytes_done)
1399 				return bytes_done;
1400 			return -EINTR;
1401 		}
1402 	}
1403 
1404 	mutex_unlock(&channel->rd_mutex);
1405 
1406 	if (!channel->rd_synchronous)
1407 		queue_delayed_work(xillybus_wq,
1408 				   &channel->rd_workitem,
1409 				   XILLY_RX_TIMEOUT);
1410 
1411 	if (channel->endpoint->fatal_error)
1412 		return -EIO;
1413 
1414 	if (rc)
1415 		return rc;
1416 
1417 	if ((channel->rd_synchronous) && (bytes_done > 0)) {
1418 		rc = xillybus_myflush(filp->private_data, 0); /* No timeout */
1419 
1420 		if (rc && (rc != -EINTR))
1421 			return rc;
1422 	}
1423 
1424 	return bytes_done;
1425 }
1426 
1427 static int xillybus_open(struct inode *inode, struct file *filp)
1428 {
1429 	int rc;
1430 	unsigned long flags;
1431 	struct xilly_endpoint *endpoint;
1432 	struct xilly_channel *channel;
1433 	int index;
1434 
1435 	rc = xillybus_find_inode(inode, (void **)&endpoint, &index);
1436 	if (rc)
1437 		return rc;
1438 
1439 	if (endpoint->fatal_error)
1440 		return -EIO;
1441 
1442 	channel = endpoint->channels[1 + index];
1443 	filp->private_data = channel;
1444 
1445 	/*
1446 	 * It gets complicated because:
1447 	 * 1. We don't want to take a mutex we don't have to
1448 	 * 2. We don't want to open one direction if the other will fail.
1449 	 */
1450 
1451 	if ((filp->f_mode & FMODE_READ) && (!channel->num_wr_buffers))
1452 		return -ENODEV;
1453 
1454 	if ((filp->f_mode & FMODE_WRITE) && (!channel->num_rd_buffers))
1455 		return -ENODEV;
1456 
1457 	if ((filp->f_mode & FMODE_READ) && (filp->f_flags & O_NONBLOCK) &&
1458 	    (channel->wr_synchronous || !channel->wr_allow_partial ||
1459 	     !channel->wr_supports_nonempty)) {
1460 		dev_err(endpoint->dev,
1461 			"open() failed: O_NONBLOCK not allowed for read on this device\n");
1462 		return -ENODEV;
1463 	}
1464 
1465 	if ((filp->f_mode & FMODE_WRITE) && (filp->f_flags & O_NONBLOCK) &&
1466 	    (channel->rd_synchronous || !channel->rd_allow_partial)) {
1467 		dev_err(endpoint->dev,
1468 			"open() failed: O_NONBLOCK not allowed for write on this device\n");
1469 		return -ENODEV;
1470 	}
1471 
1472 	/*
1473 	 * Note: open() may block on getting mutexes despite O_NONBLOCK.
1474 	 * This shouldn't occur normally, since multiple open of the same
1475 	 * file descriptor is almost always prohibited anyhow
1476 	 * (*_exclusive_open is normally set in real-life systems).
1477 	 */
1478 
1479 	if (filp->f_mode & FMODE_READ) {
1480 		rc = mutex_lock_interruptible(&channel->wr_mutex);
1481 		if (rc)
1482 			return rc;
1483 	}
1484 
1485 	if (filp->f_mode & FMODE_WRITE) {
1486 		rc = mutex_lock_interruptible(&channel->rd_mutex);
1487 		if (rc)
1488 			goto unlock_wr;
1489 	}
1490 
1491 	if ((filp->f_mode & FMODE_READ) &&
1492 	    (channel->wr_ref_count != 0) &&
1493 	    (channel->wr_exclusive_open)) {
1494 		rc = -EBUSY;
1495 		goto unlock;
1496 	}
1497 
1498 	if ((filp->f_mode & FMODE_WRITE) &&
1499 	    (channel->rd_ref_count != 0) &&
1500 	    (channel->rd_exclusive_open)) {
1501 		rc = -EBUSY;
1502 		goto unlock;
1503 	}
1504 
1505 	if (filp->f_mode & FMODE_READ) {
1506 		if (channel->wr_ref_count == 0) { /* First open of file */
1507 			/* Move the host to first buffer */
1508 			spin_lock_irqsave(&channel->wr_spinlock, flags);
1509 			channel->wr_host_buf_idx = 0;
1510 			channel->wr_host_buf_pos = 0;
1511 			channel->wr_fpga_buf_idx = -1;
1512 			channel->wr_empty = 1;
1513 			channel->wr_ready = 0;
1514 			channel->wr_sleepy = 1;
1515 			channel->wr_eof = -1;
1516 			channel->wr_hangup = 0;
1517 
1518 			spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1519 
1520 			iowrite32(1 | (channel->chan_num << 1) |
1521 				  (4 << 24) |  /* Opcode 4, open channel */
1522 				  ((channel->wr_synchronous & 1) << 23),
1523 				  channel->endpoint->registers +
1524 				  fpga_buf_ctrl_reg);
1525 		}
1526 
1527 		channel->wr_ref_count++;
1528 	}
1529 
1530 	if (filp->f_mode & FMODE_WRITE) {
1531 		if (channel->rd_ref_count == 0) { /* First open of file */
1532 			/* Move the host to first buffer */
1533 			spin_lock_irqsave(&channel->rd_spinlock, flags);
1534 			channel->rd_host_buf_idx = 0;
1535 			channel->rd_host_buf_pos = 0;
1536 			channel->rd_leftovers[3] = 0; /* No leftovers. */
1537 			channel->rd_fpga_buf_idx = channel->num_rd_buffers - 1;
1538 			channel->rd_full = 0;
1539 
1540 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1541 
1542 			iowrite32((channel->chan_num << 1) |
1543 				  (4 << 24),   /* Opcode 4, open channel */
1544 				  channel->endpoint->registers +
1545 				  fpga_buf_ctrl_reg);
1546 		}
1547 
1548 		channel->rd_ref_count++;
1549 	}
1550 
1551 unlock:
1552 	if (filp->f_mode & FMODE_WRITE)
1553 		mutex_unlock(&channel->rd_mutex);
1554 unlock_wr:
1555 	if (filp->f_mode & FMODE_READ)
1556 		mutex_unlock(&channel->wr_mutex);
1557 
1558 	if (!rc && (!channel->seekable))
1559 		return nonseekable_open(inode, filp);
1560 
1561 	return rc;
1562 }
1563 
1564 static int xillybus_release(struct inode *inode, struct file *filp)
1565 {
1566 	unsigned long flags;
1567 	struct xilly_channel *channel = filp->private_data;
1568 
1569 	int buf_idx;
1570 	int eof;
1571 
1572 	if (channel->endpoint->fatal_error)
1573 		return -EIO;
1574 
1575 	if (filp->f_mode & FMODE_WRITE) {
1576 		mutex_lock(&channel->rd_mutex);
1577 
1578 		channel->rd_ref_count--;
1579 
1580 		if (channel->rd_ref_count == 0) {
1581 			/*
1582 			 * We rely on the kernel calling flush()
1583 			 * before we get here.
1584 			 */
1585 
1586 			iowrite32((channel->chan_num << 1) | /* Channel ID */
1587 				  (5 << 24),  /* Opcode 5, close channel */
1588 				  channel->endpoint->registers +
1589 				  fpga_buf_ctrl_reg);
1590 		}
1591 		mutex_unlock(&channel->rd_mutex);
1592 	}
1593 
1594 	if (filp->f_mode & FMODE_READ) {
1595 		mutex_lock(&channel->wr_mutex);
1596 
1597 		channel->wr_ref_count--;
1598 
1599 		if (channel->wr_ref_count == 0) {
1600 			iowrite32(1 | (channel->chan_num << 1) |
1601 				  (5 << 24),  /* Opcode 5, close channel */
1602 				  channel->endpoint->registers +
1603 				  fpga_buf_ctrl_reg);
1604 
1605 			/*
1606 			 * This is crazily cautious: We make sure that not
1607 			 * only that we got an EOF (be it because we closed
1608 			 * the channel or because of a user's EOF), but verify
1609 			 * that it's one beyond the last buffer arrived, so
1610 			 * we have no leftover buffers pending before wrapping
1611 			 * up (which can only happen in asynchronous channels,
1612 			 * BTW)
1613 			 */
1614 
1615 			while (1) {
1616 				spin_lock_irqsave(&channel->wr_spinlock,
1617 						  flags);
1618 				buf_idx = channel->wr_fpga_buf_idx;
1619 				eof = channel->wr_eof;
1620 				channel->wr_sleepy = 1;
1621 				spin_unlock_irqrestore(&channel->wr_spinlock,
1622 						       flags);
1623 
1624 				/*
1625 				 * Check if eof points at the buffer after
1626 				 * the last one the FPGA submitted. Note that
1627 				 * no EOF is marked by negative eof.
1628 				 */
1629 
1630 				buf_idx++;
1631 				if (buf_idx == channel->num_wr_buffers)
1632 					buf_idx = 0;
1633 
1634 				if (buf_idx == eof)
1635 					break;
1636 
1637 				/*
1638 				 * Steal extra 100 ms if awaken by interrupt.
1639 				 * This is a simple workaround for an
1640 				 * interrupt pending when entering, which would
1641 				 * otherwise result in declaring the hardware
1642 				 * non-responsive.
1643 				 */
1644 
1645 				if (wait_event_interruptible(
1646 					    channel->wr_wait,
1647 					    (!channel->wr_sleepy)))
1648 					msleep(100);
1649 
1650 				if (channel->wr_sleepy) {
1651 					mutex_unlock(&channel->wr_mutex);
1652 					dev_warn(channel->endpoint->dev,
1653 						 "Hardware failed to respond to close command, therefore left in messy state.\n");
1654 					return -EINTR;
1655 				}
1656 			}
1657 		}
1658 
1659 		mutex_unlock(&channel->wr_mutex);
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 static loff_t xillybus_llseek(struct file *filp, loff_t offset, int whence)
1666 {
1667 	struct xilly_channel *channel = filp->private_data;
1668 	loff_t pos = filp->f_pos;
1669 	int rc = 0;
1670 
1671 	/*
1672 	 * Take both mutexes not allowing interrupts, since it seems like
1673 	 * common applications don't expect an -EINTR here. Besides, multiple
1674 	 * access to a single file descriptor on seekable devices is a mess
1675 	 * anyhow.
1676 	 */
1677 
1678 	if (channel->endpoint->fatal_error)
1679 		return -EIO;
1680 
1681 	mutex_lock(&channel->wr_mutex);
1682 	mutex_lock(&channel->rd_mutex);
1683 
1684 	switch (whence) {
1685 	case SEEK_SET:
1686 		pos = offset;
1687 		break;
1688 	case SEEK_CUR:
1689 		pos += offset;
1690 		break;
1691 	case SEEK_END:
1692 		pos = offset; /* Going to the end => to the beginning */
1693 		break;
1694 	default:
1695 		rc = -EINVAL;
1696 		goto end;
1697 	}
1698 
1699 	/* In any case, we must finish on an element boundary */
1700 	if (pos & ((1 << channel->log2_element_size) - 1)) {
1701 		rc = -EINVAL;
1702 		goto end;
1703 	}
1704 
1705 	mutex_lock(&channel->endpoint->register_mutex);
1706 
1707 	iowrite32(pos >> channel->log2_element_size,
1708 		  channel->endpoint->registers + fpga_buf_offset_reg);
1709 
1710 	iowrite32((channel->chan_num << 1) |
1711 		  (6 << 24),  /* Opcode 6, set address */
1712 		  channel->endpoint->registers + fpga_buf_ctrl_reg);
1713 
1714 	mutex_unlock(&channel->endpoint->register_mutex);
1715 
1716 end:
1717 	mutex_unlock(&channel->rd_mutex);
1718 	mutex_unlock(&channel->wr_mutex);
1719 
1720 	if (rc) /* Return error after releasing mutexes */
1721 		return rc;
1722 
1723 	filp->f_pos = pos;
1724 
1725 	/*
1726 	 * Since seekable devices are allowed only when the channel is
1727 	 * synchronous, we assume that there is no data pending in either
1728 	 * direction (which holds true as long as no concurrent access on the
1729 	 * file descriptor takes place).
1730 	 * The only thing we may need to throw away is leftovers from partial
1731 	 * write() flush.
1732 	 */
1733 
1734 	channel->rd_leftovers[3] = 0;
1735 
1736 	return pos;
1737 }
1738 
1739 static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
1740 {
1741 	struct xilly_channel *channel = filp->private_data;
1742 	__poll_t mask = 0;
1743 	unsigned long flags;
1744 
1745 	poll_wait(filp, &channel->endpoint->ep_wait, wait);
1746 
1747 	/*
1748 	 * poll() won't play ball regarding read() channels which
1749 	 * aren't asynchronous and support the nonempty message. Allowing
1750 	 * that will create situations where data has been delivered at
1751 	 * the FPGA, and users expecting select() to wake up, which it may
1752 	 * not.
1753 	 */
1754 
1755 	if (!channel->wr_synchronous && channel->wr_supports_nonempty) {
1756 		poll_wait(filp, &channel->wr_wait, wait);
1757 		poll_wait(filp, &channel->wr_ready_wait, wait);
1758 
1759 		spin_lock_irqsave(&channel->wr_spinlock, flags);
1760 		if (!channel->wr_empty || channel->wr_ready)
1761 			mask |= EPOLLIN | EPOLLRDNORM;
1762 
1763 		if (channel->wr_hangup)
1764 			/*
1765 			 * Not EPOLLHUP, because its behavior is in the
1766 			 * mist, and EPOLLIN does what we want: Wake up
1767 			 * the read file descriptor so it sees EOF.
1768 			 */
1769 			mask |=  EPOLLIN | EPOLLRDNORM;
1770 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1771 	}
1772 
1773 	/*
1774 	 * If partial data write is disallowed on a write() channel,
1775 	 * it's pointless to ever signal OK to write, because is could
1776 	 * block despite some space being available.
1777 	 */
1778 
1779 	if (channel->rd_allow_partial) {
1780 		poll_wait(filp, &channel->rd_wait, wait);
1781 
1782 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1783 		if (!channel->rd_full)
1784 			mask |= EPOLLOUT | EPOLLWRNORM;
1785 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1786 	}
1787 
1788 	if (channel->endpoint->fatal_error)
1789 		mask |= EPOLLERR;
1790 
1791 	return mask;
1792 }
1793 
1794 static const struct file_operations xillybus_fops = {
1795 	.owner      = THIS_MODULE,
1796 	.read       = xillybus_read,
1797 	.write      = xillybus_write,
1798 	.open       = xillybus_open,
1799 	.flush      = xillybus_flush,
1800 	.release    = xillybus_release,
1801 	.llseek     = xillybus_llseek,
1802 	.poll       = xillybus_poll,
1803 };
1804 
1805 struct xilly_endpoint *xillybus_init_endpoint(struct device *dev)
1806 {
1807 	struct xilly_endpoint *endpoint;
1808 
1809 	endpoint = devm_kzalloc(dev, sizeof(*endpoint), GFP_KERNEL);
1810 	if (!endpoint)
1811 		return NULL;
1812 
1813 	endpoint->dev = dev;
1814 	endpoint->msg_counter = 0x0b;
1815 	endpoint->failed_messages = 0;
1816 	endpoint->fatal_error = 0;
1817 
1818 	init_waitqueue_head(&endpoint->ep_wait);
1819 	mutex_init(&endpoint->register_mutex);
1820 
1821 	return endpoint;
1822 }
1823 EXPORT_SYMBOL(xillybus_init_endpoint);
1824 
1825 static int xilly_quiesce(struct xilly_endpoint *endpoint)
1826 {
1827 	long t;
1828 
1829 	endpoint->idtlen = -1;
1830 
1831 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1832 		  endpoint->registers + fpga_dma_control_reg);
1833 
1834 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1835 					     (endpoint->idtlen >= 0),
1836 					     XILLY_TIMEOUT);
1837 	if (t <= 0) {
1838 		dev_err(endpoint->dev,
1839 			"Failed to quiesce the device on exit.\n");
1840 		return -ENODEV;
1841 	}
1842 	return 0;
1843 }
1844 
1845 int xillybus_endpoint_discovery(struct xilly_endpoint *endpoint)
1846 {
1847 	int rc;
1848 	long t;
1849 
1850 	void *bootstrap_resources;
1851 	int idtbuffersize = (1 << PAGE_SHIFT);
1852 	struct device *dev = endpoint->dev;
1853 
1854 	/*
1855 	 * The bogus IDT is used during bootstrap for allocating the initial
1856 	 * message buffer, and then the message buffer and space for the IDT
1857 	 * itself. The initial message buffer is of a single page's size, but
1858 	 * it's soon replaced with a more modest one (and memory is freed).
1859 	 */
1860 
1861 	unsigned char bogus_idt[8] = { 1, 224, (PAGE_SHIFT)-2, 0,
1862 				       3, 192, PAGE_SHIFT, 0 };
1863 	struct xilly_idt_handle idt_handle;
1864 
1865 	/*
1866 	 * Writing the value 0x00000001 to Endianness register signals which
1867 	 * endianness this processor is using, so the FPGA can swap words as
1868 	 * necessary.
1869 	 */
1870 
1871 	iowrite32(1, endpoint->registers + fpga_endian_reg);
1872 
1873 	/* Bootstrap phase I: Allocate temporary message buffer */
1874 
1875 	bootstrap_resources = devres_open_group(dev, NULL, GFP_KERNEL);
1876 	if (!bootstrap_resources)
1877 		return -ENOMEM;
1878 
1879 	endpoint->num_channels = 0;
1880 
1881 	rc = xilly_setupchannels(endpoint, bogus_idt, 1);
1882 	if (rc)
1883 		return rc;
1884 
1885 	/* Clear the message subsystem (and counter in particular) */
1886 	iowrite32(0x04, endpoint->registers + fpga_msg_ctrl_reg);
1887 
1888 	endpoint->idtlen = -1;
1889 
1890 	/*
1891 	 * Set DMA 32/64 bit mode, quiesce the device (?!) and get IDT
1892 	 * buffer size.
1893 	 */
1894 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1895 		  endpoint->registers + fpga_dma_control_reg);
1896 
1897 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1898 					     (endpoint->idtlen >= 0),
1899 					     XILLY_TIMEOUT);
1900 	if (t <= 0) {
1901 		dev_err(endpoint->dev, "No response from FPGA. Aborting.\n");
1902 		return -ENODEV;
1903 	}
1904 
1905 	/* Enable DMA */
1906 	iowrite32((u32) (0x0002 | (endpoint->dma_using_dac & 0x0001)),
1907 		  endpoint->registers + fpga_dma_control_reg);
1908 
1909 	/* Bootstrap phase II: Allocate buffer for IDT and obtain it */
1910 	while (endpoint->idtlen >= idtbuffersize) {
1911 		idtbuffersize *= 2;
1912 		bogus_idt[6]++;
1913 	}
1914 
1915 	endpoint->num_channels = 1;
1916 
1917 	rc = xilly_setupchannels(endpoint, bogus_idt, 2);
1918 	if (rc)
1919 		goto failed_idt;
1920 
1921 	rc = xilly_obtain_idt(endpoint);
1922 	if (rc)
1923 		goto failed_idt;
1924 
1925 	rc = xilly_scan_idt(endpoint, &idt_handle);
1926 	if (rc)
1927 		goto failed_idt;
1928 
1929 	devres_close_group(dev, bootstrap_resources);
1930 
1931 	/* Bootstrap phase III: Allocate buffers according to IDT */
1932 
1933 	rc = xilly_setupchannels(endpoint,
1934 				 idt_handle.chandesc,
1935 				 idt_handle.entries);
1936 	if (rc)
1937 		goto failed_idt;
1938 
1939 	rc = xillybus_init_chrdev(dev, &xillybus_fops,
1940 				  endpoint->owner, endpoint,
1941 				  idt_handle.names,
1942 				  idt_handle.names_len,
1943 				  endpoint->num_channels,
1944 				  xillyname, false);
1945 
1946 	if (rc)
1947 		goto failed_idt;
1948 
1949 	devres_release_group(dev, bootstrap_resources);
1950 
1951 	return 0;
1952 
1953 failed_idt:
1954 	xilly_quiesce(endpoint);
1955 	flush_workqueue(xillybus_wq);
1956 
1957 	return rc;
1958 }
1959 EXPORT_SYMBOL(xillybus_endpoint_discovery);
1960 
1961 void xillybus_endpoint_remove(struct xilly_endpoint *endpoint)
1962 {
1963 	xillybus_cleanup_chrdev(endpoint, endpoint->dev);
1964 
1965 	xilly_quiesce(endpoint);
1966 
1967 	/*
1968 	 * Flushing is done upon endpoint release to prevent access to memory
1969 	 * just about to be released. This makes the quiesce complete.
1970 	 */
1971 	flush_workqueue(xillybus_wq);
1972 }
1973 EXPORT_SYMBOL(xillybus_endpoint_remove);
1974 
1975 static int __init xillybus_init(void)
1976 {
1977 	xillybus_wq = alloc_workqueue(xillyname, 0, 0);
1978 	if (!xillybus_wq)
1979 		return -ENOMEM;
1980 
1981 	return 0;
1982 }
1983 
1984 static void __exit xillybus_exit(void)
1985 {
1986 	/* flush_workqueue() was called for each endpoint released */
1987 	destroy_workqueue(xillybus_wq);
1988 }
1989 
1990 module_init(xillybus_init);
1991 module_exit(xillybus_exit);
1992