xref: /openbmc/linux/arch/um/drivers/ubd_kern.c (revision 4ed91d48259d9ddd378424d008f2e6559f7e78f8)
1 /*
2  * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
3  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
4  * Licensed under the GPL
5  */
6 
7 /* 2001-09-28...2002-04-17
8  * Partition stuff by James_McMechan@hotmail.com
9  * old style ubd by setting UBD_SHIFT to 0
10  * 2002-09-27...2002-10-18 massive tinkering for 2.5
11  * partitions have changed in 2.5
12  * 2003-01-29 more tinkering for 2.5.59-1
13  * This should now address the sysfs problems and has
14  * the symlink for devfs to allow for booting with
15  * the common /dev/ubd/discX/... names rather than
16  * only /dev/ubdN/discN this version also has lots of
17  * clean ups preparing for ubd-many.
18  * James McMechan
19  */
20 
21 #define UBD_SHIFT 4
22 
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/blkdev.h>
26 #include <linux/ata.h>
27 #include <linux/hdreg.h>
28 #include <linux/cdrom.h>
29 #include <linux/proc_fs.h>
30 #include <linux/seq_file.h>
31 #include <linux/ctype.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/platform_device.h>
35 #include <linux/scatterlist.h>
36 #include <asm/tlbflush.h>
37 #include <kern_util.h>
38 #include "mconsole_kern.h"
39 #include <init.h>
40 #include <irq_kern.h>
41 #include "ubd.h"
42 #include <os.h>
43 #include "cow.h"
44 
45 enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
46 
47 struct io_thread_req {
48 	struct request *req;
49 	enum ubd_req op;
50 	int fds[2];
51 	unsigned long offsets[2];
52 	unsigned long long offset;
53 	unsigned long length;
54 	char *buffer;
55 	int sectorsize;
56 	unsigned long sector_mask;
57 	unsigned long long cow_offset;
58 	unsigned long bitmap_words[2];
59 	int error;
60 };
61 
62 
63 static struct io_thread_req * (*irq_req_buffer)[];
64 static struct io_thread_req *irq_remainder;
65 static int irq_remainder_size;
66 
67 static struct io_thread_req * (*io_req_buffer)[];
68 static struct io_thread_req *io_remainder;
69 static int io_remainder_size;
70 
71 
72 
73 static inline int ubd_test_bit(__u64 bit, unsigned char *data)
74 {
75 	__u64 n;
76 	int bits, off;
77 
78 	bits = sizeof(data[0]) * 8;
79 	n = bit / bits;
80 	off = bit % bits;
81 	return (data[n] & (1 << off)) != 0;
82 }
83 
84 static inline void ubd_set_bit(__u64 bit, unsigned char *data)
85 {
86 	__u64 n;
87 	int bits, off;
88 
89 	bits = sizeof(data[0]) * 8;
90 	n = bit / bits;
91 	off = bit % bits;
92 	data[n] |= (1 << off);
93 }
94 /*End stuff from ubd_user.h*/
95 
96 #define DRIVER_NAME "uml-blkdev"
97 
98 static DEFINE_MUTEX(ubd_lock);
99 static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
100 
101 static int ubd_open(struct block_device *bdev, fmode_t mode);
102 static void ubd_release(struct gendisk *disk, fmode_t mode);
103 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
104 		     unsigned int cmd, unsigned long arg);
105 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
106 
107 #define MAX_DEV (16)
108 
109 static const struct block_device_operations ubd_blops = {
110         .owner		= THIS_MODULE,
111         .open		= ubd_open,
112         .release	= ubd_release,
113         .ioctl		= ubd_ioctl,
114 	.getgeo		= ubd_getgeo,
115 };
116 
117 /* Protected by ubd_lock */
118 static int fake_major = UBD_MAJOR;
119 static struct gendisk *ubd_gendisk[MAX_DEV];
120 static struct gendisk *fake_gendisk[MAX_DEV];
121 
122 #ifdef CONFIG_BLK_DEV_UBD_SYNC
123 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 					 .cl = 1 })
125 #else
126 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 					 .cl = 1 })
128 #endif
129 static struct openflags global_openflags = OPEN_FLAGS;
130 
131 struct cow {
132 	/* backing file name */
133 	char *file;
134 	/* backing file fd */
135 	int fd;
136 	unsigned long *bitmap;
137 	unsigned long bitmap_len;
138 	int bitmap_offset;
139 	int data_offset;
140 };
141 
142 #define MAX_SG 64
143 
144 struct ubd {
145 	struct list_head restart;
146 	/* name (and fd, below) of the file opened for writing, either the
147 	 * backing or the cow file. */
148 	char *file;
149 	int count;
150 	int fd;
151 	__u64 size;
152 	struct openflags boot_openflags;
153 	struct openflags openflags;
154 	unsigned shared:1;
155 	unsigned no_cow:1;
156 	struct cow cow;
157 	struct platform_device pdev;
158 	struct request_queue *queue;
159 	spinlock_t lock;
160 	struct scatterlist sg[MAX_SG];
161 	struct request *request;
162 	int start_sg, end_sg;
163 	sector_t rq_pos;
164 };
165 
166 #define DEFAULT_COW { \
167 	.file =			NULL, \
168 	.fd =			-1,	\
169 	.bitmap =		NULL, \
170 	.bitmap_offset =	0, \
171 	.data_offset =		0, \
172 }
173 
174 #define DEFAULT_UBD { \
175 	.file = 		NULL, \
176 	.count =		0, \
177 	.fd =			-1, \
178 	.size =			-1, \
179 	.boot_openflags =	OPEN_FLAGS, \
180 	.openflags =		OPEN_FLAGS, \
181 	.no_cow =               0, \
182 	.shared =		0, \
183 	.cow =			DEFAULT_COW, \
184 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
185 	.request =		NULL, \
186 	.start_sg =		0, \
187 	.end_sg =		0, \
188 	.rq_pos =		0, \
189 }
190 
191 /* Protected by ubd_lock */
192 static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
193 
194 /* Only changed by fake_ide_setup which is a setup */
195 static int fake_ide = 0;
196 static struct proc_dir_entry *proc_ide_root = NULL;
197 static struct proc_dir_entry *proc_ide = NULL;
198 
199 static void make_proc_ide(void)
200 {
201 	proc_ide_root = proc_mkdir("ide", NULL);
202 	proc_ide = proc_mkdir("ide0", proc_ide_root);
203 }
204 
205 static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206 {
207 	seq_puts(m, "disk\n");
208 	return 0;
209 }
210 
211 static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
212 {
213 	return single_open(file, fake_ide_media_proc_show, NULL);
214 }
215 
216 static const struct file_operations fake_ide_media_proc_fops = {
217 	.owner		= THIS_MODULE,
218 	.open		= fake_ide_media_proc_open,
219 	.read		= seq_read,
220 	.llseek		= seq_lseek,
221 	.release	= single_release,
222 };
223 
224 static void make_ide_entries(const char *dev_name)
225 {
226 	struct proc_dir_entry *dir, *ent;
227 	char name[64];
228 
229 	if(proc_ide_root == NULL) make_proc_ide();
230 
231 	dir = proc_mkdir(dev_name, proc_ide);
232 	if(!dir) return;
233 
234 	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
235 	if(!ent) return;
236 	snprintf(name, sizeof(name), "ide0/%s", dev_name);
237 	proc_symlink(dev_name, proc_ide_root, name);
238 }
239 
240 static int fake_ide_setup(char *str)
241 {
242 	fake_ide = 1;
243 	return 1;
244 }
245 
246 __setup("fake_ide", fake_ide_setup);
247 
248 __uml_help(fake_ide_setup,
249 "fake_ide\n"
250 "    Create ide0 entries that map onto ubd devices.\n\n"
251 );
252 
253 static int parse_unit(char **ptr)
254 {
255 	char *str = *ptr, *end;
256 	int n = -1;
257 
258 	if(isdigit(*str)) {
259 		n = simple_strtoul(str, &end, 0);
260 		if(end == str)
261 			return -1;
262 		*ptr = end;
263 	}
264 	else if (('a' <= *str) && (*str <= 'z')) {
265 		n = *str - 'a';
266 		str++;
267 		*ptr = str;
268 	}
269 	return n;
270 }
271 
272 /* If *index_out == -1 at exit, the passed option was a general one;
273  * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
274  * should not be freed on exit.
275  */
276 static int ubd_setup_common(char *str, int *index_out, char **error_out)
277 {
278 	struct ubd *ubd_dev;
279 	struct openflags flags = global_openflags;
280 	char *backing_file;
281 	int n, err = 0, i;
282 
283 	if(index_out) *index_out = -1;
284 	n = *str;
285 	if(n == '='){
286 		char *end;
287 		int major;
288 
289 		str++;
290 		if(!strcmp(str, "sync")){
291 			global_openflags = of_sync(global_openflags);
292 			goto out1;
293 		}
294 
295 		err = -EINVAL;
296 		major = simple_strtoul(str, &end, 0);
297 		if((*end != '\0') || (end == str)){
298 			*error_out = "Didn't parse major number";
299 			goto out1;
300 		}
301 
302 		mutex_lock(&ubd_lock);
303 		if (fake_major != UBD_MAJOR) {
304 			*error_out = "Can't assign a fake major twice";
305 			goto out1;
306 		}
307 
308 		fake_major = major;
309 
310 		printk(KERN_INFO "Setting extra ubd major number to %d\n",
311 		       major);
312 		err = 0;
313 	out1:
314 		mutex_unlock(&ubd_lock);
315 		return err;
316 	}
317 
318 	n = parse_unit(&str);
319 	if(n < 0){
320 		*error_out = "Couldn't parse device number";
321 		return -EINVAL;
322 	}
323 	if(n >= MAX_DEV){
324 		*error_out = "Device number out of range";
325 		return 1;
326 	}
327 
328 	err = -EBUSY;
329 	mutex_lock(&ubd_lock);
330 
331 	ubd_dev = &ubd_devs[n];
332 	if(ubd_dev->file != NULL){
333 		*error_out = "Device is already configured";
334 		goto out;
335 	}
336 
337 	if (index_out)
338 		*index_out = n;
339 
340 	err = -EINVAL;
341 	for (i = 0; i < sizeof("rscd="); i++) {
342 		switch (*str) {
343 		case 'r':
344 			flags.w = 0;
345 			break;
346 		case 's':
347 			flags.s = 1;
348 			break;
349 		case 'd':
350 			ubd_dev->no_cow = 1;
351 			break;
352 		case 'c':
353 			ubd_dev->shared = 1;
354 			break;
355 		case '=':
356 			str++;
357 			goto break_loop;
358 		default:
359 			*error_out = "Expected '=' or flag letter "
360 				"(r, s, c, or d)";
361 			goto out;
362 		}
363 		str++;
364 	}
365 
366 	if (*str == '=')
367 		*error_out = "Too many flags specified";
368 	else
369 		*error_out = "Missing '='";
370 	goto out;
371 
372 break_loop:
373 	backing_file = strchr(str, ',');
374 
375 	if (backing_file == NULL)
376 		backing_file = strchr(str, ':');
377 
378 	if(backing_file != NULL){
379 		if(ubd_dev->no_cow){
380 			*error_out = "Can't specify both 'd' and a cow file";
381 			goto out;
382 		}
383 		else {
384 			*backing_file = '\0';
385 			backing_file++;
386 		}
387 	}
388 	err = 0;
389 	ubd_dev->file = str;
390 	ubd_dev->cow.file = backing_file;
391 	ubd_dev->boot_openflags = flags;
392 out:
393 	mutex_unlock(&ubd_lock);
394 	return err;
395 }
396 
397 static int ubd_setup(char *str)
398 {
399 	char *error;
400 	int err;
401 
402 	err = ubd_setup_common(str, NULL, &error);
403 	if(err)
404 		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
405 		       "%s\n", str, error);
406 	return 1;
407 }
408 
409 __setup("ubd", ubd_setup);
410 __uml_help(ubd_setup,
411 "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412 "    This is used to associate a device with a file in the underlying\n"
413 "    filesystem. When specifying two filenames, the first one is the\n"
414 "    COW name and the second is the backing file name. As separator you can\n"
415 "    use either a ':' or a ',': the first one allows writing things like;\n"
416 "	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417 "    while with a ',' the shell would not expand the 2nd '~'.\n"
418 "    When using only one filename, UML will detect whether to treat it like\n"
419 "    a COW file or a backing file. To override this detection, add the 'd'\n"
420 "    flag:\n"
421 "	ubd0d=BackingFile\n"
422 "    Usually, there is a filesystem in the file, but \n"
423 "    that's not required. Swap devices containing swap files can be\n"
424 "    specified like this. Also, a file which doesn't contain a\n"
425 "    filesystem can have its contents read in the virtual \n"
426 "    machine by running 'dd' on the device. <n> must be in the range\n"
427 "    0 to 7. Appending an 'r' to the number will cause that device\n"
428 "    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429 "    an 's' will cause data to be written to disk on the host immediately.\n"
430 "    'c' will cause the device to be treated as being shared between multiple\n"
431 "    UMLs and file locking will be turned off - this is appropriate for a\n"
432 "    cluster filesystem and inappropriate at almost all other times.\n\n"
433 );
434 
435 static int udb_setup(char *str)
436 {
437 	printk("udb%s specified on command line is almost certainly a ubd -> "
438 	       "udb TYPO\n", str);
439 	return 1;
440 }
441 
442 __setup("udb", udb_setup);
443 __uml_help(udb_setup,
444 "udb\n"
445 "    This option is here solely to catch ubd -> udb typos, which can be\n"
446 "    to impossible to catch visually unless you specifically look for\n"
447 "    them.  The only result of any option starting with 'udb' is an error\n"
448 "    in the boot output.\n\n"
449 );
450 
451 static void do_ubd_request(struct request_queue * q);
452 
453 /* Only changed by ubd_init, which is an initcall. */
454 static int thread_fd = -1;
455 static LIST_HEAD(restart);
456 
457 /* Function to read several request pointers at a time
458 * handling fractional reads if (and as) needed
459 */
460 
461 static int bulk_req_safe_read(
462 	int fd,
463 	struct io_thread_req * (*request_buffer)[],
464 	struct io_thread_req **remainder,
465 	int *remainder_size,
466 	int max_recs
467 	)
468 {
469 	int n = 0;
470 	int res = 0;
471 
472 	if (*remainder_size > 0) {
473 		memmove(
474 			(char *) request_buffer,
475 			(char *) remainder, *remainder_size
476 		);
477 		n = *remainder_size;
478 	}
479 
480 	res = os_read_file(
481 			fd,
482 			((char *) request_buffer) + *remainder_size,
483 			sizeof(struct io_thread_req *)*max_recs
484 				- *remainder_size
485 		);
486 	if (res > 0) {
487 		n += res;
488 		if ((n % sizeof(struct io_thread_req *)) > 0) {
489 			/*
490 			* Read somehow returned not a multiple of dword
491 			* theoretically possible, but never observed in the
492 			* wild, so read routine must be able to handle it
493 			*/
494 			*remainder_size = n % sizeof(struct io_thread_req *);
495 			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
496 			memmove(
497 				remainder,
498 				((char *) request_buffer) +
499 					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
500 				*remainder_size
501 			);
502 			n = n - *remainder_size;
503 		}
504 	} else {
505 		n = res;
506 	}
507 	return n;
508 }
509 
510 /* Called without dev->lock held, and only in interrupt context. */
511 static void ubd_handler(void)
512 {
513 	struct ubd *ubd;
514 	struct list_head *list, *next_ele;
515 	unsigned long flags;
516 	int n;
517 	int count;
518 
519 	while(1){
520 		n = bulk_req_safe_read(
521 			thread_fd,
522 			irq_req_buffer,
523 			&irq_remainder,
524 			&irq_remainder_size,
525 			UBD_REQ_BUFFER_SIZE
526 		);
527 		if (n < 0) {
528 			if(n == -EAGAIN)
529 				break;
530 			printk(KERN_ERR "spurious interrupt in ubd_handler, "
531 			       "err = %d\n", -n);
532 			return;
533 		}
534 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
535 			blk_end_request(
536 				(*irq_req_buffer)[count]->req,
537 				0,
538 				(*irq_req_buffer)[count]->length
539 			);
540 			kfree((*irq_req_buffer)[count]);
541 		}
542 	}
543 	reactivate_fd(thread_fd, UBD_IRQ);
544 
545 	list_for_each_safe(list, next_ele, &restart){
546 		ubd = container_of(list, struct ubd, restart);
547 		list_del_init(&ubd->restart);
548 		spin_lock_irqsave(&ubd->lock, flags);
549 		do_ubd_request(ubd->queue);
550 		spin_unlock_irqrestore(&ubd->lock, flags);
551 	}
552 }
553 
554 static irqreturn_t ubd_intr(int irq, void *dev)
555 {
556 	ubd_handler();
557 	return IRQ_HANDLED;
558 }
559 
560 /* Only changed by ubd_init, which is an initcall. */
561 static int io_pid = -1;
562 
563 static void kill_io_thread(void)
564 {
565 	if(io_pid != -1)
566 		os_kill_process(io_pid, 1);
567 }
568 
569 __uml_exitcall(kill_io_thread);
570 
571 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
572 {
573 	char *file;
574 	int fd;
575 	int err;
576 
577 	__u32 version;
578 	__u32 align;
579 	char *backing_file;
580 	time_t mtime;
581 	unsigned long long size;
582 	int sector_size;
583 	int bitmap_offset;
584 
585 	if (ubd_dev->file && ubd_dev->cow.file) {
586 		file = ubd_dev->cow.file;
587 
588 		goto out;
589 	}
590 
591 	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
592 	if (fd < 0)
593 		return fd;
594 
595 	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
596 		&mtime, &size, &sector_size, &align, &bitmap_offset);
597 	os_close_file(fd);
598 
599 	if(err == -EINVAL)
600 		file = ubd_dev->file;
601 	else
602 		file = backing_file;
603 
604 out:
605 	return os_file_size(file, size_out);
606 }
607 
608 static int read_cow_bitmap(int fd, void *buf, int offset, int len)
609 {
610 	int err;
611 
612 	err = os_pread_file(fd, buf, len, offset);
613 	if (err < 0)
614 		return err;
615 
616 	return 0;
617 }
618 
619 static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
620 {
621 	unsigned long modtime;
622 	unsigned long long actual;
623 	int err;
624 
625 	err = os_file_modtime(file, &modtime);
626 	if (err < 0) {
627 		printk(KERN_ERR "Failed to get modification time of backing "
628 		       "file \"%s\", err = %d\n", file, -err);
629 		return err;
630 	}
631 
632 	err = os_file_size(file, &actual);
633 	if (err < 0) {
634 		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
635 		       "err = %d\n", file, -err);
636 		return err;
637 	}
638 
639 	if (actual != size) {
640 		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
641 		 * the typecast.*/
642 		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
643 		       "vs backing file\n", (unsigned long long) size, actual);
644 		return -EINVAL;
645 	}
646 	if (modtime != mtime) {
647 		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
648 		       "backing file\n", mtime, modtime);
649 		return -EINVAL;
650 	}
651 	return 0;
652 }
653 
654 static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
655 {
656 	struct uml_stat buf1, buf2;
657 	int err;
658 
659 	if (from_cmdline == NULL)
660 		return 0;
661 	if (!strcmp(from_cmdline, from_cow))
662 		return 0;
663 
664 	err = os_stat_file(from_cmdline, &buf1);
665 	if (err < 0) {
666 		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
667 		       -err);
668 		return 0;
669 	}
670 	err = os_stat_file(from_cow, &buf2);
671 	if (err < 0) {
672 		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
673 		       -err);
674 		return 1;
675 	}
676 	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
677 		return 0;
678 
679 	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
680 	       "\"%s\" specified in COW header of \"%s\"\n",
681 	       from_cmdline, from_cow, cow);
682 	return 1;
683 }
684 
685 static int open_ubd_file(char *file, struct openflags *openflags, int shared,
686 		  char **backing_file_out, int *bitmap_offset_out,
687 		  unsigned long *bitmap_len_out, int *data_offset_out,
688 		  int *create_cow_out)
689 {
690 	time_t mtime;
691 	unsigned long long size;
692 	__u32 version, align;
693 	char *backing_file;
694 	int fd, err, sectorsize, asked_switch, mode = 0644;
695 
696 	fd = os_open_file(file, *openflags, mode);
697 	if (fd < 0) {
698 		if ((fd == -ENOENT) && (create_cow_out != NULL))
699 			*create_cow_out = 1;
700 		if (!openflags->w ||
701 		    ((fd != -EROFS) && (fd != -EACCES)))
702 			return fd;
703 		openflags->w = 0;
704 		fd = os_open_file(file, *openflags, mode);
705 		if (fd < 0)
706 			return fd;
707 	}
708 
709 	if (shared)
710 		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
711 	else {
712 		err = os_lock_file(fd, openflags->w);
713 		if (err < 0) {
714 			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
715 			       file, -err);
716 			goto out_close;
717 		}
718 	}
719 
720 	/* Successful return case! */
721 	if (backing_file_out == NULL)
722 		return fd;
723 
724 	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
725 			      &size, &sectorsize, &align, bitmap_offset_out);
726 	if (err && (*backing_file_out != NULL)) {
727 		printk(KERN_ERR "Failed to read COW header from COW file "
728 		       "\"%s\", errno = %d\n", file, -err);
729 		goto out_close;
730 	}
731 	if (err)
732 		return fd;
733 
734 	asked_switch = path_requires_switch(*backing_file_out, backing_file,
735 					    file);
736 
737 	/* Allow switching only if no mismatch. */
738 	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
739 						   mtime)) {
740 		printk(KERN_ERR "Switching backing file to '%s'\n",
741 		       *backing_file_out);
742 		err = write_cow_header(file, fd, *backing_file_out,
743 				       sectorsize, align, &size);
744 		if (err) {
745 			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
746 			goto out_close;
747 		}
748 	} else {
749 		*backing_file_out = backing_file;
750 		err = backing_file_mismatch(*backing_file_out, size, mtime);
751 		if (err)
752 			goto out_close;
753 	}
754 
755 	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
756 		  bitmap_len_out, data_offset_out);
757 
758 	return fd;
759  out_close:
760 	os_close_file(fd);
761 	return err;
762 }
763 
764 static int create_cow_file(char *cow_file, char *backing_file,
765 		    struct openflags flags,
766 		    int sectorsize, int alignment, int *bitmap_offset_out,
767 		    unsigned long *bitmap_len_out, int *data_offset_out)
768 {
769 	int err, fd;
770 
771 	flags.c = 1;
772 	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
773 	if (fd < 0) {
774 		err = fd;
775 		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
776 		       cow_file, -err);
777 		goto out;
778 	}
779 
780 	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
781 			    bitmap_offset_out, bitmap_len_out,
782 			    data_offset_out);
783 	if (!err)
784 		return fd;
785 	os_close_file(fd);
786  out:
787 	return err;
788 }
789 
790 static void ubd_close_dev(struct ubd *ubd_dev)
791 {
792 	os_close_file(ubd_dev->fd);
793 	if(ubd_dev->cow.file == NULL)
794 		return;
795 
796 	os_close_file(ubd_dev->cow.fd);
797 	vfree(ubd_dev->cow.bitmap);
798 	ubd_dev->cow.bitmap = NULL;
799 }
800 
801 static int ubd_open_dev(struct ubd *ubd_dev)
802 {
803 	struct openflags flags;
804 	char **back_ptr;
805 	int err, create_cow, *create_ptr;
806 	int fd;
807 
808 	ubd_dev->openflags = ubd_dev->boot_openflags;
809 	create_cow = 0;
810 	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
811 	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
812 
813 	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
814 				back_ptr, &ubd_dev->cow.bitmap_offset,
815 				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
816 				create_ptr);
817 
818 	if((fd == -ENOENT) && create_cow){
819 		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
820 					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
821 					  &ubd_dev->cow.bitmap_offset,
822 					  &ubd_dev->cow.bitmap_len,
823 					  &ubd_dev->cow.data_offset);
824 		if(fd >= 0){
825 			printk(KERN_INFO "Creating \"%s\" as COW file for "
826 			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
827 		}
828 	}
829 
830 	if(fd < 0){
831 		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
832 		       -fd);
833 		return fd;
834 	}
835 	ubd_dev->fd = fd;
836 
837 	if(ubd_dev->cow.file != NULL){
838 		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
839 
840 		err = -ENOMEM;
841 		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
842 		if(ubd_dev->cow.bitmap == NULL){
843 			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
844 			goto error;
845 		}
846 		flush_tlb_kernel_vm();
847 
848 		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
849 				      ubd_dev->cow.bitmap_offset,
850 				      ubd_dev->cow.bitmap_len);
851 		if(err < 0)
852 			goto error;
853 
854 		flags = ubd_dev->openflags;
855 		flags.w = 0;
856 		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
857 				    NULL, NULL, NULL, NULL);
858 		if(err < 0) goto error;
859 		ubd_dev->cow.fd = err;
860 	}
861 	return 0;
862  error:
863 	os_close_file(ubd_dev->fd);
864 	return err;
865 }
866 
867 static void ubd_device_release(struct device *dev)
868 {
869 	struct ubd *ubd_dev = dev_get_drvdata(dev);
870 
871 	blk_cleanup_queue(ubd_dev->queue);
872 	*ubd_dev = ((struct ubd) DEFAULT_UBD);
873 }
874 
875 static int ubd_disk_register(int major, u64 size, int unit,
876 			     struct gendisk **disk_out)
877 {
878 	struct device *parent = NULL;
879 	struct gendisk *disk;
880 
881 	disk = alloc_disk(1 << UBD_SHIFT);
882 	if(disk == NULL)
883 		return -ENOMEM;
884 
885 	disk->major = major;
886 	disk->first_minor = unit << UBD_SHIFT;
887 	disk->fops = &ubd_blops;
888 	set_capacity(disk, size / 512);
889 	if (major == UBD_MAJOR)
890 		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
891 	else
892 		sprintf(disk->disk_name, "ubd_fake%d", unit);
893 
894 	/* sysfs register (not for ide fake devices) */
895 	if (major == UBD_MAJOR) {
896 		ubd_devs[unit].pdev.id   = unit;
897 		ubd_devs[unit].pdev.name = DRIVER_NAME;
898 		ubd_devs[unit].pdev.dev.release = ubd_device_release;
899 		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
900 		platform_device_register(&ubd_devs[unit].pdev);
901 		parent = &ubd_devs[unit].pdev.dev;
902 	}
903 
904 	disk->private_data = &ubd_devs[unit];
905 	disk->queue = ubd_devs[unit].queue;
906 	device_add_disk(parent, disk);
907 
908 	*disk_out = disk;
909 	return 0;
910 }
911 
912 #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
913 
914 static int ubd_add(int n, char **error_out)
915 {
916 	struct ubd *ubd_dev = &ubd_devs[n];
917 	int err = 0;
918 
919 	if(ubd_dev->file == NULL)
920 		goto out;
921 
922 	err = ubd_file_size(ubd_dev, &ubd_dev->size);
923 	if(err < 0){
924 		*error_out = "Couldn't determine size of device's file";
925 		goto out;
926 	}
927 
928 	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
929 
930 	INIT_LIST_HEAD(&ubd_dev->restart);
931 	sg_init_table(ubd_dev->sg, MAX_SG);
932 
933 	err = -ENOMEM;
934 	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
935 	if (ubd_dev->queue == NULL) {
936 		*error_out = "Failed to initialize device queue";
937 		goto out;
938 	}
939 	ubd_dev->queue->queuedata = ubd_dev;
940 	blk_queue_write_cache(ubd_dev->queue, true, false);
941 
942 	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
943 	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
944 	if(err){
945 		*error_out = "Failed to register device";
946 		goto out_cleanup;
947 	}
948 
949 	if (fake_major != UBD_MAJOR)
950 		ubd_disk_register(fake_major, ubd_dev->size, n,
951 				  &fake_gendisk[n]);
952 
953 	/*
954 	 * Perhaps this should also be under the "if (fake_major)" above
955 	 * using the fake_disk->disk_name
956 	 */
957 	if (fake_ide)
958 		make_ide_entries(ubd_gendisk[n]->disk_name);
959 
960 	err = 0;
961 out:
962 	return err;
963 
964 out_cleanup:
965 	blk_cleanup_queue(ubd_dev->queue);
966 	goto out;
967 }
968 
969 static int ubd_config(char *str, char **error_out)
970 {
971 	int n, ret;
972 
973 	/* This string is possibly broken up and stored, so it's only
974 	 * freed if ubd_setup_common fails, or if only general options
975 	 * were set.
976 	 */
977 	str = kstrdup(str, GFP_KERNEL);
978 	if (str == NULL) {
979 		*error_out = "Failed to allocate memory";
980 		return -ENOMEM;
981 	}
982 
983 	ret = ubd_setup_common(str, &n, error_out);
984 	if (ret)
985 		goto err_free;
986 
987 	if (n == -1) {
988 		ret = 0;
989 		goto err_free;
990 	}
991 
992 	mutex_lock(&ubd_lock);
993 	ret = ubd_add(n, error_out);
994 	if (ret)
995 		ubd_devs[n].file = NULL;
996 	mutex_unlock(&ubd_lock);
997 
998 out:
999 	return ret;
1000 
1001 err_free:
1002 	kfree(str);
1003 	goto out;
1004 }
1005 
1006 static int ubd_get_config(char *name, char *str, int size, char **error_out)
1007 {
1008 	struct ubd *ubd_dev;
1009 	int n, len = 0;
1010 
1011 	n = parse_unit(&name);
1012 	if((n >= MAX_DEV) || (n < 0)){
1013 		*error_out = "ubd_get_config : device number out of range";
1014 		return -1;
1015 	}
1016 
1017 	ubd_dev = &ubd_devs[n];
1018 	mutex_lock(&ubd_lock);
1019 
1020 	if(ubd_dev->file == NULL){
1021 		CONFIG_CHUNK(str, size, len, "", 1);
1022 		goto out;
1023 	}
1024 
1025 	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1026 
1027 	if(ubd_dev->cow.file != NULL){
1028 		CONFIG_CHUNK(str, size, len, ",", 0);
1029 		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1030 	}
1031 	else CONFIG_CHUNK(str, size, len, "", 1);
1032 
1033  out:
1034 	mutex_unlock(&ubd_lock);
1035 	return len;
1036 }
1037 
1038 static int ubd_id(char **str, int *start_out, int *end_out)
1039 {
1040 	int n;
1041 
1042 	n = parse_unit(str);
1043 	*start_out = 0;
1044 	*end_out = MAX_DEV - 1;
1045 	return n;
1046 }
1047 
1048 static int ubd_remove(int n, char **error_out)
1049 {
1050 	struct gendisk *disk = ubd_gendisk[n];
1051 	struct ubd *ubd_dev;
1052 	int err = -ENODEV;
1053 
1054 	mutex_lock(&ubd_lock);
1055 
1056 	ubd_dev = &ubd_devs[n];
1057 
1058 	if(ubd_dev->file == NULL)
1059 		goto out;
1060 
1061 	/* you cannot remove a open disk */
1062 	err = -EBUSY;
1063 	if(ubd_dev->count > 0)
1064 		goto out;
1065 
1066 	ubd_gendisk[n] = NULL;
1067 	if(disk != NULL){
1068 		del_gendisk(disk);
1069 		put_disk(disk);
1070 	}
1071 
1072 	if(fake_gendisk[n] != NULL){
1073 		del_gendisk(fake_gendisk[n]);
1074 		put_disk(fake_gendisk[n]);
1075 		fake_gendisk[n] = NULL;
1076 	}
1077 
1078 	err = 0;
1079 	platform_device_unregister(&ubd_dev->pdev);
1080 out:
1081 	mutex_unlock(&ubd_lock);
1082 	return err;
1083 }
1084 
1085 /* All these are called by mconsole in process context and without
1086  * ubd-specific locks.  The structure itself is const except for .list.
1087  */
1088 static struct mc_device ubd_mc = {
1089 	.list		= LIST_HEAD_INIT(ubd_mc.list),
1090 	.name		= "ubd",
1091 	.config		= ubd_config,
1092 	.get_config	= ubd_get_config,
1093 	.id		= ubd_id,
1094 	.remove		= ubd_remove,
1095 };
1096 
1097 static int __init ubd_mc_init(void)
1098 {
1099 	mconsole_register_dev(&ubd_mc);
1100 	return 0;
1101 }
1102 
1103 __initcall(ubd_mc_init);
1104 
1105 static int __init ubd0_init(void)
1106 {
1107 	struct ubd *ubd_dev = &ubd_devs[0];
1108 
1109 	mutex_lock(&ubd_lock);
1110 	if(ubd_dev->file == NULL)
1111 		ubd_dev->file = "root_fs";
1112 	mutex_unlock(&ubd_lock);
1113 
1114 	return 0;
1115 }
1116 
1117 __initcall(ubd0_init);
1118 
1119 /* Used in ubd_init, which is an initcall */
1120 static struct platform_driver ubd_driver = {
1121 	.driver = {
1122 		.name  = DRIVER_NAME,
1123 	},
1124 };
1125 
1126 static int __init ubd_init(void)
1127 {
1128 	char *error;
1129 	int i, err;
1130 
1131 	if (register_blkdev(UBD_MAJOR, "ubd"))
1132 		return -1;
1133 
1134 	if (fake_major != UBD_MAJOR) {
1135 		char name[sizeof("ubd_nnn\0")];
1136 
1137 		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1138 		if (register_blkdev(fake_major, "ubd"))
1139 			return -1;
1140 	}
1141 
1142 	irq_req_buffer = kmalloc(
1143 			sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1144 			GFP_KERNEL
1145 		);
1146 	irq_remainder = 0;
1147 
1148 	if (irq_req_buffer == NULL) {
1149 		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1150 		return -1;
1151 	}
1152 	io_req_buffer = kmalloc(
1153 			sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1154 			GFP_KERNEL
1155 		);
1156 
1157 	io_remainder = 0;
1158 
1159 	if (io_req_buffer == NULL) {
1160 		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1161 		return -1;
1162 	}
1163 	platform_driver_register(&ubd_driver);
1164 	mutex_lock(&ubd_lock);
1165 	for (i = 0; i < MAX_DEV; i++){
1166 		err = ubd_add(i, &error);
1167 		if(err)
1168 			printk(KERN_ERR "Failed to initialize ubd device %d :"
1169 			       "%s\n", i, error);
1170 	}
1171 	mutex_unlock(&ubd_lock);
1172 	return 0;
1173 }
1174 
1175 late_initcall(ubd_init);
1176 
1177 static int __init ubd_driver_init(void){
1178 	unsigned long stack;
1179 	int err;
1180 
1181 	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1182 	if(global_openflags.s){
1183 		printk(KERN_INFO "ubd: Synchronous mode\n");
1184 		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1185 		 * enough. So use anyway the io thread. */
1186 	}
1187 	stack = alloc_stack(0, 0);
1188 	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1189 				 &thread_fd);
1190 	if(io_pid < 0){
1191 		printk(KERN_ERR
1192 		       "ubd : Failed to start I/O thread (errno = %d) - "
1193 		       "falling back to synchronous I/O\n", -io_pid);
1194 		io_pid = -1;
1195 		return 0;
1196 	}
1197 	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1198 			     0, "ubd", ubd_devs);
1199 	if(err != 0)
1200 		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1201 	return 0;
1202 }
1203 
1204 device_initcall(ubd_driver_init);
1205 
1206 static int ubd_open(struct block_device *bdev, fmode_t mode)
1207 {
1208 	struct gendisk *disk = bdev->bd_disk;
1209 	struct ubd *ubd_dev = disk->private_data;
1210 	int err = 0;
1211 
1212 	mutex_lock(&ubd_mutex);
1213 	if(ubd_dev->count == 0){
1214 		err = ubd_open_dev(ubd_dev);
1215 		if(err){
1216 			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1217 			       disk->disk_name, ubd_dev->file, -err);
1218 			goto out;
1219 		}
1220 	}
1221 	ubd_dev->count++;
1222 	set_disk_ro(disk, !ubd_dev->openflags.w);
1223 
1224 	/* This should no more be needed. And it didn't work anyway to exclude
1225 	 * read-write remounting of filesystems.*/
1226 	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1227 	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1228 	        err = -EROFS;
1229 	}*/
1230 out:
1231 	mutex_unlock(&ubd_mutex);
1232 	return err;
1233 }
1234 
1235 static void ubd_release(struct gendisk *disk, fmode_t mode)
1236 {
1237 	struct ubd *ubd_dev = disk->private_data;
1238 
1239 	mutex_lock(&ubd_mutex);
1240 	if(--ubd_dev->count == 0)
1241 		ubd_close_dev(ubd_dev);
1242 	mutex_unlock(&ubd_mutex);
1243 }
1244 
1245 static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1246 			  __u64 *cow_offset, unsigned long *bitmap,
1247 			  __u64 bitmap_offset, unsigned long *bitmap_words,
1248 			  __u64 bitmap_len)
1249 {
1250 	__u64 sector = io_offset >> 9;
1251 	int i, update_bitmap = 0;
1252 
1253 	for(i = 0; i < length >> 9; i++){
1254 		if(cow_mask != NULL)
1255 			ubd_set_bit(i, (unsigned char *) cow_mask);
1256 		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1257 			continue;
1258 
1259 		update_bitmap = 1;
1260 		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1261 	}
1262 
1263 	if(!update_bitmap)
1264 		return;
1265 
1266 	*cow_offset = sector / (sizeof(unsigned long) * 8);
1267 
1268 	/* This takes care of the case where we're exactly at the end of the
1269 	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1270 	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1271 	 * for the original diagnosis.
1272 	 */
1273 	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1274 					 sizeof(unsigned long)) - 1))
1275 		(*cow_offset)--;
1276 
1277 	bitmap_words[0] = bitmap[*cow_offset];
1278 	bitmap_words[1] = bitmap[*cow_offset + 1];
1279 
1280 	*cow_offset *= sizeof(unsigned long);
1281 	*cow_offset += bitmap_offset;
1282 }
1283 
1284 static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1285 		       __u64 bitmap_offset, __u64 bitmap_len)
1286 {
1287 	__u64 sector = req->offset >> 9;
1288 	int i;
1289 
1290 	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1291 		panic("Operation too long");
1292 
1293 	if(req->op == UBD_READ) {
1294 		for(i = 0; i < req->length >> 9; i++){
1295 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1296 				ubd_set_bit(i, (unsigned char *)
1297 					    &req->sector_mask);
1298 		}
1299 	}
1300 	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1301 			   &req->cow_offset, bitmap, bitmap_offset,
1302 			   req->bitmap_words, bitmap_len);
1303 }
1304 
1305 /* Called with dev->lock held */
1306 static void prepare_request(struct request *req, struct io_thread_req *io_req,
1307 			    unsigned long long offset, int page_offset,
1308 			    int len, struct page *page)
1309 {
1310 	struct gendisk *disk = req->rq_disk;
1311 	struct ubd *ubd_dev = disk->private_data;
1312 
1313 	io_req->req = req;
1314 	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1315 		ubd_dev->fd;
1316 	io_req->fds[1] = ubd_dev->fd;
1317 	io_req->cow_offset = -1;
1318 	io_req->offset = offset;
1319 	io_req->length = len;
1320 	io_req->error = 0;
1321 	io_req->sector_mask = 0;
1322 
1323 	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1324 	io_req->offsets[0] = 0;
1325 	io_req->offsets[1] = ubd_dev->cow.data_offset;
1326 	io_req->buffer = page_address(page) + page_offset;
1327 	io_req->sectorsize = 1 << 9;
1328 
1329 	if(ubd_dev->cow.file != NULL)
1330 		cowify_req(io_req, ubd_dev->cow.bitmap,
1331 			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1332 
1333 }
1334 
1335 /* Called with dev->lock held */
1336 static void prepare_flush_request(struct request *req,
1337 				  struct io_thread_req *io_req)
1338 {
1339 	struct gendisk *disk = req->rq_disk;
1340 	struct ubd *ubd_dev = disk->private_data;
1341 
1342 	io_req->req = req;
1343 	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1344 		ubd_dev->fd;
1345 	io_req->op = UBD_FLUSH;
1346 }
1347 
1348 static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1349 {
1350 	int n = os_write_file(thread_fd, &io_req,
1351 			     sizeof(io_req));
1352 	if (n != sizeof(io_req)) {
1353 		if (n != -EAGAIN)
1354 			printk("write to io thread failed, "
1355 			       "errno = %d\n", -n);
1356 		else if (list_empty(&dev->restart))
1357 			list_add(&dev->restart, &restart);
1358 
1359 		kfree(io_req);
1360 		return false;
1361 	}
1362 	return true;
1363 }
1364 
1365 /* Called with dev->lock held */
1366 static void do_ubd_request(struct request_queue *q)
1367 {
1368 	struct io_thread_req *io_req;
1369 	struct request *req;
1370 
1371 	while(1){
1372 		struct ubd *dev = q->queuedata;
1373 		if(dev->request == NULL){
1374 			struct request *req = blk_fetch_request(q);
1375 			if(req == NULL)
1376 				return;
1377 
1378 			dev->request = req;
1379 			dev->rq_pos = blk_rq_pos(req);
1380 			dev->start_sg = 0;
1381 			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1382 		}
1383 
1384 		req = dev->request;
1385 
1386 		if (req_op(req) == REQ_OP_FLUSH) {
1387 			io_req = kmalloc(sizeof(struct io_thread_req),
1388 					 GFP_ATOMIC);
1389 			if (io_req == NULL) {
1390 				if (list_empty(&dev->restart))
1391 					list_add(&dev->restart, &restart);
1392 				return;
1393 			}
1394 			prepare_flush_request(req, io_req);
1395 			if (submit_request(io_req, dev) == false)
1396 				return;
1397 		}
1398 
1399 		while(dev->start_sg < dev->end_sg){
1400 			struct scatterlist *sg = &dev->sg[dev->start_sg];
1401 
1402 			io_req = kmalloc(sizeof(struct io_thread_req),
1403 					 GFP_ATOMIC);
1404 			if(io_req == NULL){
1405 				if(list_empty(&dev->restart))
1406 					list_add(&dev->restart, &restart);
1407 				return;
1408 			}
1409 			prepare_request(req, io_req,
1410 					(unsigned long long)dev->rq_pos << 9,
1411 					sg->offset, sg->length, sg_page(sg));
1412 
1413 			if (submit_request(io_req, dev) == false)
1414 				return;
1415 
1416 			dev->rq_pos += sg->length >> 9;
1417 			dev->start_sg++;
1418 		}
1419 		dev->end_sg = 0;
1420 		dev->request = NULL;
1421 	}
1422 }
1423 
1424 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1425 {
1426 	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1427 
1428 	geo->heads = 128;
1429 	geo->sectors = 32;
1430 	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1431 	return 0;
1432 }
1433 
1434 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1435 		     unsigned int cmd, unsigned long arg)
1436 {
1437 	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1438 	u16 ubd_id[ATA_ID_WORDS];
1439 
1440 	switch (cmd) {
1441 		struct cdrom_volctrl volume;
1442 	case HDIO_GET_IDENTITY:
1443 		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1444 		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1445 		ubd_id[ATA_ID_HEADS]	= 128;
1446 		ubd_id[ATA_ID_SECTORS]	= 32;
1447 		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1448 				 sizeof(ubd_id)))
1449 			return -EFAULT;
1450 		return 0;
1451 
1452 	case CDROMVOLREAD:
1453 		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1454 			return -EFAULT;
1455 		volume.channel0 = 255;
1456 		volume.channel1 = 255;
1457 		volume.channel2 = 255;
1458 		volume.channel3 = 255;
1459 		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1460 			return -EFAULT;
1461 		return 0;
1462 	}
1463 	return -EINVAL;
1464 }
1465 
1466 static int update_bitmap(struct io_thread_req *req)
1467 {
1468 	int n;
1469 
1470 	if(req->cow_offset == -1)
1471 		return 0;
1472 
1473 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1474 			  sizeof(req->bitmap_words), req->cow_offset);
1475 	if(n != sizeof(req->bitmap_words)){
1476 		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1477 		       req->fds[1]);
1478 		return 1;
1479 	}
1480 
1481 	return 0;
1482 }
1483 
1484 static void do_io(struct io_thread_req *req)
1485 {
1486 	char *buf;
1487 	unsigned long len;
1488 	int n, nsectors, start, end, bit;
1489 	__u64 off;
1490 
1491 	if (req->op == UBD_FLUSH) {
1492 		/* fds[0] is always either the rw image or our cow file */
1493 		n = os_sync_file(req->fds[0]);
1494 		if (n != 0) {
1495 			printk("do_io - sync failed err = %d "
1496 			       "fd = %d\n", -n, req->fds[0]);
1497 			req->error = 1;
1498 		}
1499 		return;
1500 	}
1501 
1502 	nsectors = req->length / req->sectorsize;
1503 	start = 0;
1504 	do {
1505 		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1506 		end = start;
1507 		while((end < nsectors) &&
1508 		      (ubd_test_bit(end, (unsigned char *)
1509 				    &req->sector_mask) == bit))
1510 			end++;
1511 
1512 		off = req->offset + req->offsets[bit] +
1513 			start * req->sectorsize;
1514 		len = (end - start) * req->sectorsize;
1515 		buf = &req->buffer[start * req->sectorsize];
1516 
1517 		if(req->op == UBD_READ){
1518 			n = 0;
1519 			do {
1520 				buf = &buf[n];
1521 				len -= n;
1522 				n = os_pread_file(req->fds[bit], buf, len, off);
1523 				if (n < 0) {
1524 					printk("do_io - read failed, err = %d "
1525 					       "fd = %d\n", -n, req->fds[bit]);
1526 					req->error = 1;
1527 					return;
1528 				}
1529 			} while((n < len) && (n != 0));
1530 			if (n < len) memset(&buf[n], 0, len - n);
1531 		} else {
1532 			n = os_pwrite_file(req->fds[bit], buf, len, off);
1533 			if(n != len){
1534 				printk("do_io - write failed err = %d "
1535 				       "fd = %d\n", -n, req->fds[bit]);
1536 				req->error = 1;
1537 				return;
1538 			}
1539 		}
1540 
1541 		start = end;
1542 	} while(start < nsectors);
1543 
1544 	req->error = update_bitmap(req);
1545 }
1546 
1547 /* Changed in start_io_thread, which is serialized by being called only
1548  * from ubd_init, which is an initcall.
1549  */
1550 int kernel_fd = -1;
1551 
1552 /* Only changed by the io thread. XXX: currently unused. */
1553 static int io_count = 0;
1554 
1555 int io_thread(void *arg)
1556 {
1557 	int n, count, written, res;
1558 
1559 	os_fix_helper_signals();
1560 
1561 	while(1){
1562 		n = bulk_req_safe_read(
1563 			kernel_fd,
1564 			io_req_buffer,
1565 			&io_remainder,
1566 			&io_remainder_size,
1567 			UBD_REQ_BUFFER_SIZE
1568 		);
1569 		if (n < 0) {
1570 			if (n == -EAGAIN) {
1571 				ubd_read_poll(-1);
1572 				continue;
1573 			} else {
1574 				printk("io_thread - read failed, fd = %d, "
1575 				       "err = %d,"
1576 				       "reminder = %d\n",
1577 				       kernel_fd, -n, io_remainder_size);
1578 			}
1579 		}
1580 
1581 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1582 			io_count++;
1583 			do_io((*io_req_buffer)[count]);
1584 		}
1585 
1586 		written = 0;
1587 
1588 		do {
1589 			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1590 			if (res > 0) {
1591 				written += res;
1592 			} else {
1593 				if (res != -EAGAIN) {
1594 					printk("io_thread - read failed, fd = %d, "
1595 					       "err = %d\n", kernel_fd, -n);
1596 				}
1597 			}
1598 			if (written < n) {
1599 				ubd_write_poll(-1);
1600 			}
1601 		} while (written < n);
1602 	}
1603 
1604 	return 0;
1605 }
1606