xref: /openbmc/linux/block/bsg.c (revision f7d84fa7)
1 /*
2  * bsg.c - block layer implementation of the sg v4 interface
3  *
4  * Copyright (C) 2004 Jens Axboe <axboe@suse.de> SUSE Labs
5  * Copyright (C) 2004 Peter M. Jones <pjones@redhat.com>
6  *
7  *  This file is subject to the terms and conditions of the GNU General Public
8  *  License version 2.  See the file "COPYING" in the main directory of this
9  *  archive for more details.
10  *
11  */
12 #include <linux/module.h>
13 #include <linux/init.h>
14 #include <linux/file.h>
15 #include <linux/blkdev.h>
16 #include <linux/poll.h>
17 #include <linux/cdev.h>
18 #include <linux/jiffies.h>
19 #include <linux/percpu.h>
20 #include <linux/uio.h>
21 #include <linux/idr.h>
22 #include <linux/bsg.h>
23 #include <linux/slab.h>
24 
25 #include <scsi/scsi.h>
26 #include <scsi/scsi_ioctl.h>
27 #include <scsi/scsi_cmnd.h>
28 #include <scsi/scsi_device.h>
29 #include <scsi/scsi_driver.h>
30 #include <scsi/sg.h>
31 
32 #define BSG_DESCRIPTION	"Block layer SCSI generic (bsg) driver"
33 #define BSG_VERSION	"0.4"
34 
35 struct bsg_device {
36 	struct request_queue *queue;
37 	spinlock_t lock;
38 	struct list_head busy_list;
39 	struct list_head done_list;
40 	struct hlist_node dev_list;
41 	atomic_t ref_count;
42 	int queued_cmds;
43 	int done_cmds;
44 	wait_queue_head_t wq_done;
45 	wait_queue_head_t wq_free;
46 	char name[20];
47 	int max_queue;
48 	unsigned long flags;
49 };
50 
51 enum {
52 	BSG_F_BLOCK		= 1,
53 };
54 
55 #define BSG_DEFAULT_CMDS	64
56 #define BSG_MAX_DEVS		32768
57 
58 #undef BSG_DEBUG
59 
60 #ifdef BSG_DEBUG
61 #define dprintk(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ##args)
62 #else
63 #define dprintk(fmt, args...)
64 #endif
65 
66 static DEFINE_MUTEX(bsg_mutex);
67 static DEFINE_IDR(bsg_minor_idr);
68 
69 #define BSG_LIST_ARRAY_SIZE	8
70 static struct hlist_head bsg_device_list[BSG_LIST_ARRAY_SIZE];
71 
72 static struct class *bsg_class;
73 static int bsg_major;
74 
75 static struct kmem_cache *bsg_cmd_cachep;
76 
77 /*
78  * our internal command type
79  */
80 struct bsg_command {
81 	struct bsg_device *bd;
82 	struct list_head list;
83 	struct request *rq;
84 	struct bio *bio;
85 	struct bio *bidi_bio;
86 	int err;
87 	struct sg_io_v4 hdr;
88 };
89 
90 static void bsg_free_command(struct bsg_command *bc)
91 {
92 	struct bsg_device *bd = bc->bd;
93 	unsigned long flags;
94 
95 	kmem_cache_free(bsg_cmd_cachep, bc);
96 
97 	spin_lock_irqsave(&bd->lock, flags);
98 	bd->queued_cmds--;
99 	spin_unlock_irqrestore(&bd->lock, flags);
100 
101 	wake_up(&bd->wq_free);
102 }
103 
104 static struct bsg_command *bsg_alloc_command(struct bsg_device *bd)
105 {
106 	struct bsg_command *bc = ERR_PTR(-EINVAL);
107 
108 	spin_lock_irq(&bd->lock);
109 
110 	if (bd->queued_cmds >= bd->max_queue)
111 		goto out;
112 
113 	bd->queued_cmds++;
114 	spin_unlock_irq(&bd->lock);
115 
116 	bc = kmem_cache_zalloc(bsg_cmd_cachep, GFP_KERNEL);
117 	if (unlikely(!bc)) {
118 		spin_lock_irq(&bd->lock);
119 		bd->queued_cmds--;
120 		bc = ERR_PTR(-ENOMEM);
121 		goto out;
122 	}
123 
124 	bc->bd = bd;
125 	INIT_LIST_HEAD(&bc->list);
126 	dprintk("%s: returning free cmd %p\n", bd->name, bc);
127 	return bc;
128 out:
129 	spin_unlock_irq(&bd->lock);
130 	return bc;
131 }
132 
133 static inline struct hlist_head *bsg_dev_idx_hash(int index)
134 {
135 	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
136 }
137 
138 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
139 				struct sg_io_v4 *hdr, struct bsg_device *bd,
140 				fmode_t has_write_perm)
141 {
142 	struct scsi_request *req = scsi_req(rq);
143 
144 	if (hdr->request_len > BLK_MAX_CDB) {
145 		req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
146 		if (!req->cmd)
147 			return -ENOMEM;
148 	}
149 
150 	if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
151 			   hdr->request_len))
152 		return -EFAULT;
153 
154 	if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
155 		if (blk_verify_command(req->cmd, has_write_perm))
156 			return -EPERM;
157 	} else if (!capable(CAP_SYS_RAWIO))
158 		return -EPERM;
159 
160 	/*
161 	 * fill in request structure
162 	 */
163 	req->cmd_len = hdr->request_len;
164 
165 	rq->timeout = msecs_to_jiffies(hdr->timeout);
166 	if (!rq->timeout)
167 		rq->timeout = q->sg_timeout;
168 	if (!rq->timeout)
169 		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
170 	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
171 		rq->timeout = BLK_MIN_SG_TIMEOUT;
172 
173 	return 0;
174 }
175 
176 /*
177  * Check if sg_io_v4 from user is allowed and valid
178  */
179 static int
180 bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
181 {
182 	int ret = 0;
183 
184 	if (hdr->guard != 'Q')
185 		return -EINVAL;
186 
187 	switch (hdr->protocol) {
188 	case BSG_PROTOCOL_SCSI:
189 		switch (hdr->subprotocol) {
190 		case BSG_SUB_PROTOCOL_SCSI_CMD:
191 		case BSG_SUB_PROTOCOL_SCSI_TRANSPORT:
192 			break;
193 		default:
194 			ret = -EINVAL;
195 		}
196 		break;
197 	default:
198 		ret = -EINVAL;
199 	}
200 
201 	*op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
202 	return ret;
203 }
204 
205 /*
206  * map sg_io_v4 to a request.
207  */
208 static struct request *
209 bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
210 {
211 	struct request_queue *q = bd->queue;
212 	struct request *rq, *next_rq = NULL;
213 	int ret;
214 	unsigned int op, dxfer_len;
215 	void __user *dxferp = NULL;
216 	struct bsg_class_device *bcd = &q->bsg_dev;
217 
218 	/* if the LLD has been removed then the bsg_unregister_queue will
219 	 * eventually be called and the class_dev was freed, so we can no
220 	 * longer use this request_queue. Return no such address.
221 	 */
222 	if (!bcd->class_dev)
223 		return ERR_PTR(-ENXIO);
224 
225 	dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
226 		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
227 		hdr->din_xfer_len);
228 
229 	ret = bsg_validate_sgv4_hdr(hdr, &op);
230 	if (ret)
231 		return ERR_PTR(ret);
232 
233 	/*
234 	 * map scatter-gather elements separately and string them to request
235 	 */
236 	rq = blk_get_request(q, op, GFP_KERNEL);
237 	if (IS_ERR(rq))
238 		return rq;
239 	scsi_req_init(rq);
240 
241 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
242 	if (ret)
243 		goto out;
244 
245 	if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) {
246 		if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
247 			ret = -EOPNOTSUPP;
248 			goto out;
249 		}
250 
251 		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
252 		if (IS_ERR(next_rq)) {
253 			ret = PTR_ERR(next_rq);
254 			next_rq = NULL;
255 			goto out;
256 		}
257 		rq->next_rq = next_rq;
258 
259 		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
260 		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
261 				       hdr->din_xfer_len, GFP_KERNEL);
262 		if (ret)
263 			goto out;
264 	}
265 
266 	if (hdr->dout_xfer_len) {
267 		dxfer_len = hdr->dout_xfer_len;
268 		dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
269 	} else if (hdr->din_xfer_len) {
270 		dxfer_len = hdr->din_xfer_len;
271 		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
272 	} else
273 		dxfer_len = 0;
274 
275 	if (dxfer_len) {
276 		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
277 				      GFP_KERNEL);
278 		if (ret)
279 			goto out;
280 	}
281 
282 	return rq;
283 out:
284 	scsi_req_free_cmd(scsi_req(rq));
285 	blk_put_request(rq);
286 	if (next_rq) {
287 		blk_rq_unmap_user(next_rq->bio);
288 		blk_put_request(next_rq);
289 	}
290 	return ERR_PTR(ret);
291 }
292 
293 /*
294  * async completion call-back from the block layer, when scsi/ide/whatever
295  * calls end_that_request_last() on a request
296  */
297 static void bsg_rq_end_io(struct request *rq, int uptodate)
298 {
299 	struct bsg_command *bc = rq->end_io_data;
300 	struct bsg_device *bd = bc->bd;
301 	unsigned long flags;
302 
303 	dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
304 		bd->name, rq, bc, bc->bio, uptodate);
305 
306 	bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
307 
308 	spin_lock_irqsave(&bd->lock, flags);
309 	list_move_tail(&bc->list, &bd->done_list);
310 	bd->done_cmds++;
311 	spin_unlock_irqrestore(&bd->lock, flags);
312 
313 	wake_up(&bd->wq_done);
314 }
315 
316 /*
317  * do final setup of a 'bc' and submit the matching 'rq' to the block
318  * layer for io
319  */
320 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
321 			    struct bsg_command *bc, struct request *rq)
322 {
323 	int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
324 
325 	/*
326 	 * add bc command to busy queue and submit rq for io
327 	 */
328 	bc->rq = rq;
329 	bc->bio = rq->bio;
330 	if (rq->next_rq)
331 		bc->bidi_bio = rq->next_rq->bio;
332 	bc->hdr.duration = jiffies;
333 	spin_lock_irq(&bd->lock);
334 	list_add_tail(&bc->list, &bd->busy_list);
335 	spin_unlock_irq(&bd->lock);
336 
337 	dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
338 
339 	rq->end_io_data = bc;
340 	blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
341 }
342 
343 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
344 {
345 	struct bsg_command *bc = NULL;
346 
347 	spin_lock_irq(&bd->lock);
348 	if (bd->done_cmds) {
349 		bc = list_first_entry(&bd->done_list, struct bsg_command, list);
350 		list_del(&bc->list);
351 		bd->done_cmds--;
352 	}
353 	spin_unlock_irq(&bd->lock);
354 
355 	return bc;
356 }
357 
358 /*
359  * Get a finished command from the done list
360  */
361 static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
362 {
363 	struct bsg_command *bc;
364 	int ret;
365 
366 	do {
367 		bc = bsg_next_done_cmd(bd);
368 		if (bc)
369 			break;
370 
371 		if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
372 			bc = ERR_PTR(-EAGAIN);
373 			break;
374 		}
375 
376 		ret = wait_event_interruptible(bd->wq_done, bd->done_cmds);
377 		if (ret) {
378 			bc = ERR_PTR(-ERESTARTSYS);
379 			break;
380 		}
381 	} while (1);
382 
383 	dprintk("%s: returning done %p\n", bd->name, bc);
384 
385 	return bc;
386 }
387 
388 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
389 				    struct bio *bio, struct bio *bidi_bio)
390 {
391 	struct scsi_request *req = scsi_req(rq);
392 	int ret = 0;
393 
394 	dprintk("rq %p bio %p 0x%x\n", rq, bio, req->result);
395 	/*
396 	 * fill in all the output members
397 	 */
398 	hdr->device_status = req->result & 0xff;
399 	hdr->transport_status = host_byte(req->result);
400 	hdr->driver_status = driver_byte(req->result);
401 	hdr->info = 0;
402 	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
403 		hdr->info |= SG_INFO_CHECK;
404 	hdr->response_len = 0;
405 
406 	if (req->sense_len && hdr->response) {
407 		int len = min_t(unsigned int, hdr->max_response_len,
408 					req->sense_len);
409 
410 		ret = copy_to_user((void __user *)(unsigned long)hdr->response,
411 				   req->sense, len);
412 		if (!ret)
413 			hdr->response_len = len;
414 		else
415 			ret = -EFAULT;
416 	}
417 
418 	if (rq->next_rq) {
419 		hdr->dout_resid = req->resid_len;
420 		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
421 		blk_rq_unmap_user(bidi_bio);
422 		blk_put_request(rq->next_rq);
423 	} else if (rq_data_dir(rq) == READ)
424 		hdr->din_resid = req->resid_len;
425 	else
426 		hdr->dout_resid = req->resid_len;
427 
428 	/*
429 	 * If the request generated a negative error number, return it
430 	 * (providing we aren't already returning an error); if it's
431 	 * just a protocol response (i.e. non negative), that gets
432 	 * processed above.
433 	 */
434 	if (!ret && req->result < 0)
435 		ret = req->result;
436 
437 	blk_rq_unmap_user(bio);
438 	scsi_req_free_cmd(req);
439 	blk_put_request(rq);
440 
441 	return ret;
442 }
443 
444 static bool bsg_complete(struct bsg_device *bd)
445 {
446 	bool ret = false;
447 	bool spin;
448 
449 	do {
450 		spin_lock_irq(&bd->lock);
451 
452 		BUG_ON(bd->done_cmds > bd->queued_cmds);
453 
454 		/*
455 		 * All commands consumed.
456 		 */
457 		if (bd->done_cmds == bd->queued_cmds)
458 			ret = true;
459 
460 		spin = !test_bit(BSG_F_BLOCK, &bd->flags);
461 
462 		spin_unlock_irq(&bd->lock);
463 	} while (!ret && spin);
464 
465 	return ret;
466 }
467 
468 static int bsg_complete_all_commands(struct bsg_device *bd)
469 {
470 	struct bsg_command *bc;
471 	int ret, tret;
472 
473 	dprintk("%s: entered\n", bd->name);
474 
475 	/*
476 	 * wait for all commands to complete
477 	 */
478 	io_wait_event(bd->wq_done, bsg_complete(bd));
479 
480 	/*
481 	 * discard done commands
482 	 */
483 	ret = 0;
484 	do {
485 		spin_lock_irq(&bd->lock);
486 		if (!bd->queued_cmds) {
487 			spin_unlock_irq(&bd->lock);
488 			break;
489 		}
490 		spin_unlock_irq(&bd->lock);
491 
492 		bc = bsg_get_done_cmd(bd);
493 		if (IS_ERR(bc))
494 			break;
495 
496 		tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
497 						bc->bidi_bio);
498 		if (!ret)
499 			ret = tret;
500 
501 		bsg_free_command(bc);
502 	} while (1);
503 
504 	return ret;
505 }
506 
507 static int
508 __bsg_read(char __user *buf, size_t count, struct bsg_device *bd,
509 	   const struct iovec *iov, ssize_t *bytes_read)
510 {
511 	struct bsg_command *bc;
512 	int nr_commands, ret;
513 
514 	if (count % sizeof(struct sg_io_v4))
515 		return -EINVAL;
516 
517 	ret = 0;
518 	nr_commands = count / sizeof(struct sg_io_v4);
519 	while (nr_commands) {
520 		bc = bsg_get_done_cmd(bd);
521 		if (IS_ERR(bc)) {
522 			ret = PTR_ERR(bc);
523 			break;
524 		}
525 
526 		/*
527 		 * this is the only case where we need to copy data back
528 		 * after completing the request. so do that here,
529 		 * bsg_complete_work() cannot do that for us
530 		 */
531 		ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio,
532 					       bc->bidi_bio);
533 
534 		if (copy_to_user(buf, &bc->hdr, sizeof(bc->hdr)))
535 			ret = -EFAULT;
536 
537 		bsg_free_command(bc);
538 
539 		if (ret)
540 			break;
541 
542 		buf += sizeof(struct sg_io_v4);
543 		*bytes_read += sizeof(struct sg_io_v4);
544 		nr_commands--;
545 	}
546 
547 	return ret;
548 }
549 
550 static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
551 {
552 	if (file->f_flags & O_NONBLOCK)
553 		clear_bit(BSG_F_BLOCK, &bd->flags);
554 	else
555 		set_bit(BSG_F_BLOCK, &bd->flags);
556 }
557 
558 /*
559  * Check if the error is a "real" error that we should return.
560  */
561 static inline int err_block_err(int ret)
562 {
563 	if (ret && ret != -ENOSPC && ret != -ENODATA && ret != -EAGAIN)
564 		return 1;
565 
566 	return 0;
567 }
568 
569 static ssize_t
570 bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
571 {
572 	struct bsg_device *bd = file->private_data;
573 	int ret;
574 	ssize_t bytes_read;
575 
576 	dprintk("%s: read %zd bytes\n", bd->name, count);
577 
578 	bsg_set_block(bd, file);
579 
580 	bytes_read = 0;
581 	ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
582 	*ppos = bytes_read;
583 
584 	if (!bytes_read || err_block_err(ret))
585 		bytes_read = ret;
586 
587 	return bytes_read;
588 }
589 
590 static int __bsg_write(struct bsg_device *bd, const char __user *buf,
591 		       size_t count, ssize_t *bytes_written,
592 		       fmode_t has_write_perm)
593 {
594 	struct bsg_command *bc;
595 	struct request *rq;
596 	int ret, nr_commands;
597 
598 	if (count % sizeof(struct sg_io_v4))
599 		return -EINVAL;
600 
601 	nr_commands = count / sizeof(struct sg_io_v4);
602 	rq = NULL;
603 	bc = NULL;
604 	ret = 0;
605 	while (nr_commands) {
606 		struct request_queue *q = bd->queue;
607 
608 		bc = bsg_alloc_command(bd);
609 		if (IS_ERR(bc)) {
610 			ret = PTR_ERR(bc);
611 			bc = NULL;
612 			break;
613 		}
614 
615 		if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) {
616 			ret = -EFAULT;
617 			break;
618 		}
619 
620 		/*
621 		 * get a request, fill in the blanks, and add to request queue
622 		 */
623 		rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
624 		if (IS_ERR(rq)) {
625 			ret = PTR_ERR(rq);
626 			rq = NULL;
627 			break;
628 		}
629 
630 		bsg_add_command(bd, q, bc, rq);
631 		bc = NULL;
632 		rq = NULL;
633 		nr_commands--;
634 		buf += sizeof(struct sg_io_v4);
635 		*bytes_written += sizeof(struct sg_io_v4);
636 	}
637 
638 	if (bc)
639 		bsg_free_command(bc);
640 
641 	return ret;
642 }
643 
644 static ssize_t
645 bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
646 {
647 	struct bsg_device *bd = file->private_data;
648 	ssize_t bytes_written;
649 	int ret;
650 
651 	dprintk("%s: write %zd bytes\n", bd->name, count);
652 
653 	if (unlikely(uaccess_kernel()))
654 		return -EINVAL;
655 
656 	bsg_set_block(bd, file);
657 
658 	bytes_written = 0;
659 	ret = __bsg_write(bd, buf, count, &bytes_written,
660 			  file->f_mode & FMODE_WRITE);
661 
662 	*ppos = bytes_written;
663 
664 	/*
665 	 * return bytes written on non-fatal errors
666 	 */
667 	if (!bytes_written || err_block_err(ret))
668 		bytes_written = ret;
669 
670 	dprintk("%s: returning %zd\n", bd->name, bytes_written);
671 	return bytes_written;
672 }
673 
674 static struct bsg_device *bsg_alloc_device(void)
675 {
676 	struct bsg_device *bd;
677 
678 	bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL);
679 	if (unlikely(!bd))
680 		return NULL;
681 
682 	spin_lock_init(&bd->lock);
683 
684 	bd->max_queue = BSG_DEFAULT_CMDS;
685 
686 	INIT_LIST_HEAD(&bd->busy_list);
687 	INIT_LIST_HEAD(&bd->done_list);
688 	INIT_HLIST_NODE(&bd->dev_list);
689 
690 	init_waitqueue_head(&bd->wq_free);
691 	init_waitqueue_head(&bd->wq_done);
692 	return bd;
693 }
694 
695 static void bsg_kref_release_function(struct kref *kref)
696 {
697 	struct bsg_class_device *bcd =
698 		container_of(kref, struct bsg_class_device, ref);
699 	struct device *parent = bcd->parent;
700 
701 	if (bcd->release)
702 		bcd->release(bcd->parent);
703 
704 	put_device(parent);
705 }
706 
707 static int bsg_put_device(struct bsg_device *bd)
708 {
709 	int ret = 0, do_free;
710 	struct request_queue *q = bd->queue;
711 
712 	mutex_lock(&bsg_mutex);
713 
714 	do_free = atomic_dec_and_test(&bd->ref_count);
715 	if (!do_free) {
716 		mutex_unlock(&bsg_mutex);
717 		goto out;
718 	}
719 
720 	hlist_del(&bd->dev_list);
721 	mutex_unlock(&bsg_mutex);
722 
723 	dprintk("%s: tearing down\n", bd->name);
724 
725 	/*
726 	 * close can always block
727 	 */
728 	set_bit(BSG_F_BLOCK, &bd->flags);
729 
730 	/*
731 	 * correct error detection baddies here again. it's the responsibility
732 	 * of the app to properly reap commands before close() if it wants
733 	 * fool-proof error detection
734 	 */
735 	ret = bsg_complete_all_commands(bd);
736 
737 	kfree(bd);
738 out:
739 	kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
740 	if (do_free)
741 		blk_put_queue(q);
742 	return ret;
743 }
744 
745 static struct bsg_device *bsg_add_device(struct inode *inode,
746 					 struct request_queue *rq,
747 					 struct file *file)
748 {
749 	struct bsg_device *bd;
750 #ifdef BSG_DEBUG
751 	unsigned char buf[32];
752 #endif
753 	if (!blk_get_queue(rq))
754 		return ERR_PTR(-ENXIO);
755 
756 	bd = bsg_alloc_device();
757 	if (!bd) {
758 		blk_put_queue(rq);
759 		return ERR_PTR(-ENOMEM);
760 	}
761 
762 	bd->queue = rq;
763 
764 	bsg_set_block(bd, file);
765 
766 	atomic_set(&bd->ref_count, 1);
767 	mutex_lock(&bsg_mutex);
768 	hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
769 
770 	strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
771 	dprintk("bound to <%s>, max queue %d\n",
772 		format_dev_t(buf, inode->i_rdev), bd->max_queue);
773 
774 	mutex_unlock(&bsg_mutex);
775 	return bd;
776 }
777 
778 static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
779 {
780 	struct bsg_device *bd;
781 
782 	mutex_lock(&bsg_mutex);
783 
784 	hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
785 		if (bd->queue == q) {
786 			atomic_inc(&bd->ref_count);
787 			goto found;
788 		}
789 	}
790 	bd = NULL;
791 found:
792 	mutex_unlock(&bsg_mutex);
793 	return bd;
794 }
795 
796 static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
797 {
798 	struct bsg_device *bd;
799 	struct bsg_class_device *bcd;
800 
801 	/*
802 	 * find the class device
803 	 */
804 	mutex_lock(&bsg_mutex);
805 	bcd = idr_find(&bsg_minor_idr, iminor(inode));
806 	if (bcd)
807 		kref_get(&bcd->ref);
808 	mutex_unlock(&bsg_mutex);
809 
810 	if (!bcd)
811 		return ERR_PTR(-ENODEV);
812 
813 	bd = __bsg_get_device(iminor(inode), bcd->queue);
814 	if (bd)
815 		return bd;
816 
817 	bd = bsg_add_device(inode, bcd->queue, file);
818 	if (IS_ERR(bd))
819 		kref_put(&bcd->ref, bsg_kref_release_function);
820 
821 	return bd;
822 }
823 
824 static int bsg_open(struct inode *inode, struct file *file)
825 {
826 	struct bsg_device *bd;
827 
828 	bd = bsg_get_device(inode, file);
829 
830 	if (IS_ERR(bd))
831 		return PTR_ERR(bd);
832 
833 	file->private_data = bd;
834 	return 0;
835 }
836 
837 static int bsg_release(struct inode *inode, struct file *file)
838 {
839 	struct bsg_device *bd = file->private_data;
840 
841 	file->private_data = NULL;
842 	return bsg_put_device(bd);
843 }
844 
845 static unsigned int bsg_poll(struct file *file, poll_table *wait)
846 {
847 	struct bsg_device *bd = file->private_data;
848 	unsigned int mask = 0;
849 
850 	poll_wait(file, &bd->wq_done, wait);
851 	poll_wait(file, &bd->wq_free, wait);
852 
853 	spin_lock_irq(&bd->lock);
854 	if (!list_empty(&bd->done_list))
855 		mask |= POLLIN | POLLRDNORM;
856 	if (bd->queued_cmds < bd->max_queue)
857 		mask |= POLLOUT;
858 	spin_unlock_irq(&bd->lock);
859 
860 	return mask;
861 }
862 
863 static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
864 {
865 	struct bsg_device *bd = file->private_data;
866 	int __user *uarg = (int __user *) arg;
867 	int ret;
868 
869 	switch (cmd) {
870 		/*
871 		 * our own ioctls
872 		 */
873 	case SG_GET_COMMAND_Q:
874 		return put_user(bd->max_queue, uarg);
875 	case SG_SET_COMMAND_Q: {
876 		int queue;
877 
878 		if (get_user(queue, uarg))
879 			return -EFAULT;
880 		if (queue < 1)
881 			return -EINVAL;
882 
883 		spin_lock_irq(&bd->lock);
884 		bd->max_queue = queue;
885 		spin_unlock_irq(&bd->lock);
886 		return 0;
887 	}
888 
889 	/*
890 	 * SCSI/sg ioctls
891 	 */
892 	case SG_GET_VERSION_NUM:
893 	case SCSI_IOCTL_GET_IDLUN:
894 	case SCSI_IOCTL_GET_BUS_NUMBER:
895 	case SG_SET_TIMEOUT:
896 	case SG_GET_TIMEOUT:
897 	case SG_GET_RESERVED_SIZE:
898 	case SG_SET_RESERVED_SIZE:
899 	case SG_EMULATED_HOST:
900 	case SCSI_IOCTL_SEND_COMMAND: {
901 		void __user *uarg = (void __user *) arg;
902 		return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
903 	}
904 	case SG_IO: {
905 		struct request *rq;
906 		struct bio *bio, *bidi_bio = NULL;
907 		struct sg_io_v4 hdr;
908 		int at_head;
909 
910 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
911 			return -EFAULT;
912 
913 		rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
914 		if (IS_ERR(rq))
915 			return PTR_ERR(rq);
916 
917 		bio = rq->bio;
918 		if (rq->next_rq)
919 			bidi_bio = rq->next_rq->bio;
920 
921 		at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
922 		blk_execute_rq(bd->queue, NULL, rq, at_head);
923 		ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
924 
925 		if (copy_to_user(uarg, &hdr, sizeof(hdr)))
926 			return -EFAULT;
927 
928 		return ret;
929 	}
930 	/*
931 	 * block device ioctls
932 	 */
933 	default:
934 #if 0
935 		return ioctl_by_bdev(bd->bdev, cmd, arg);
936 #else
937 		return -ENOTTY;
938 #endif
939 	}
940 }
941 
942 static const struct file_operations bsg_fops = {
943 	.read		=	bsg_read,
944 	.write		=	bsg_write,
945 	.poll		=	bsg_poll,
946 	.open		=	bsg_open,
947 	.release	=	bsg_release,
948 	.unlocked_ioctl	=	bsg_ioctl,
949 	.owner		=	THIS_MODULE,
950 	.llseek		=	default_llseek,
951 };
952 
953 void bsg_unregister_queue(struct request_queue *q)
954 {
955 	struct bsg_class_device *bcd = &q->bsg_dev;
956 
957 	if (!bcd->class_dev)
958 		return;
959 
960 	mutex_lock(&bsg_mutex);
961 	idr_remove(&bsg_minor_idr, bcd->minor);
962 	if (q->kobj.sd)
963 		sysfs_remove_link(&q->kobj, "bsg");
964 	device_unregister(bcd->class_dev);
965 	bcd->class_dev = NULL;
966 	kref_put(&bcd->ref, bsg_kref_release_function);
967 	mutex_unlock(&bsg_mutex);
968 }
969 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
970 
971 int bsg_register_queue(struct request_queue *q, struct device *parent,
972 		       const char *name, void (*release)(struct device *))
973 {
974 	struct bsg_class_device *bcd;
975 	dev_t dev;
976 	int ret;
977 	struct device *class_dev = NULL;
978 	const char *devname;
979 
980 	if (name)
981 		devname = name;
982 	else
983 		devname = dev_name(parent);
984 
985 	/*
986 	 * we need a proper transport to send commands, not a stacked device
987 	 */
988 	if (!queue_is_rq_based(q))
989 		return 0;
990 
991 	bcd = &q->bsg_dev;
992 	memset(bcd, 0, sizeof(*bcd));
993 
994 	mutex_lock(&bsg_mutex);
995 
996 	ret = idr_alloc(&bsg_minor_idr, bcd, 0, BSG_MAX_DEVS, GFP_KERNEL);
997 	if (ret < 0) {
998 		if (ret == -ENOSPC) {
999 			printk(KERN_ERR "bsg: too many bsg devices\n");
1000 			ret = -EINVAL;
1001 		}
1002 		goto unlock;
1003 	}
1004 
1005 	bcd->minor = ret;
1006 	bcd->queue = q;
1007 	bcd->parent = get_device(parent);
1008 	bcd->release = release;
1009 	kref_init(&bcd->ref);
1010 	dev = MKDEV(bsg_major, bcd->minor);
1011 	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
1012 	if (IS_ERR(class_dev)) {
1013 		ret = PTR_ERR(class_dev);
1014 		goto put_dev;
1015 	}
1016 	bcd->class_dev = class_dev;
1017 
1018 	if (q->kobj.sd) {
1019 		ret = sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg");
1020 		if (ret)
1021 			goto unregister_class_dev;
1022 	}
1023 
1024 	mutex_unlock(&bsg_mutex);
1025 	return 0;
1026 
1027 unregister_class_dev:
1028 	device_unregister(class_dev);
1029 put_dev:
1030 	put_device(parent);
1031 	idr_remove(&bsg_minor_idr, bcd->minor);
1032 unlock:
1033 	mutex_unlock(&bsg_mutex);
1034 	return ret;
1035 }
1036 EXPORT_SYMBOL_GPL(bsg_register_queue);
1037 
1038 static struct cdev bsg_cdev;
1039 
1040 static char *bsg_devnode(struct device *dev, umode_t *mode)
1041 {
1042 	return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
1043 }
1044 
1045 static int __init bsg_init(void)
1046 {
1047 	int ret, i;
1048 	dev_t devid;
1049 
1050 	bsg_cmd_cachep = kmem_cache_create("bsg_cmd",
1051 				sizeof(struct bsg_command), 0, 0, NULL);
1052 	if (!bsg_cmd_cachep) {
1053 		printk(KERN_ERR "bsg: failed creating slab cache\n");
1054 		return -ENOMEM;
1055 	}
1056 
1057 	for (i = 0; i < BSG_LIST_ARRAY_SIZE; i++)
1058 		INIT_HLIST_HEAD(&bsg_device_list[i]);
1059 
1060 	bsg_class = class_create(THIS_MODULE, "bsg");
1061 	if (IS_ERR(bsg_class)) {
1062 		ret = PTR_ERR(bsg_class);
1063 		goto destroy_kmemcache;
1064 	}
1065 	bsg_class->devnode = bsg_devnode;
1066 
1067 	ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
1068 	if (ret)
1069 		goto destroy_bsg_class;
1070 
1071 	bsg_major = MAJOR(devid);
1072 
1073 	cdev_init(&bsg_cdev, &bsg_fops);
1074 	ret = cdev_add(&bsg_cdev, MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1075 	if (ret)
1076 		goto unregister_chrdev;
1077 
1078 	printk(KERN_INFO BSG_DESCRIPTION " version " BSG_VERSION
1079 	       " loaded (major %d)\n", bsg_major);
1080 	return 0;
1081 unregister_chrdev:
1082 	unregister_chrdev_region(MKDEV(bsg_major, 0), BSG_MAX_DEVS);
1083 destroy_bsg_class:
1084 	class_destroy(bsg_class);
1085 destroy_kmemcache:
1086 	kmem_cache_destroy(bsg_cmd_cachep);
1087 	return ret;
1088 }
1089 
1090 MODULE_AUTHOR("Jens Axboe");
1091 MODULE_DESCRIPTION(BSG_DESCRIPTION);
1092 MODULE_LICENSE("GPL");
1093 
1094 device_initcall(bsg_init);
1095