1 /*
2  * Copyright (C) 2006-2009 Red Hat, Inc.
3  *
4  * This file is released under the LGPL.
5  */
6 
7 #include <linux/bio.h>
8 #include <linux/dm-dirty-log.h>
9 #include <linux/device-mapper.h>
10 #include <linux/dm-log-userspace.h>
11 
12 #include "dm-log-userspace-transfer.h"
13 
14 struct flush_entry {
15 	int type;
16 	region_t region;
17 	struct list_head list;
18 };
19 
20 struct log_c {
21 	struct dm_target *ti;
22 	uint32_t region_size;
23 	region_t region_count;
24 	char uuid[DM_UUID_LEN];
25 
26 	char *usr_argv_str;
27 	uint32_t usr_argc;
28 
29 	/*
30 	 * in_sync_hint gets set when doing is_remote_recovering.  It
31 	 * represents the first region that needs recovery.  IOW, the
32 	 * first zero bit of sync_bits.  This can be useful for to limit
33 	 * traffic for calls like is_remote_recovering and get_resync_work,
34 	 * but be take care in its use for anything else.
35 	 */
36 	uint64_t in_sync_hint;
37 
38 	spinlock_t flush_lock;
39 	struct list_head flush_list;  /* only for clear and mark requests */
40 };
41 
42 static mempool_t *flush_entry_pool;
43 
44 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
45 {
46 	return kmalloc(sizeof(struct flush_entry), gfp_mask);
47 }
48 
49 static void flush_entry_free(void *element, void *pool_data)
50 {
51 	kfree(element);
52 }
53 
54 static int userspace_do_request(struct log_c *lc, const char *uuid,
55 				int request_type, char *data, size_t data_size,
56 				char *rdata, size_t *rdata_size)
57 {
58 	int r;
59 
60 	/*
61 	 * If the server isn't there, -ESRCH is returned,
62 	 * and we must keep trying until the server is
63 	 * restored.
64 	 */
65 retry:
66 	r = dm_consult_userspace(uuid, request_type, data,
67 				 data_size, rdata, rdata_size);
68 
69 	if (r != -ESRCH)
70 		return r;
71 
72 	DMERR(" Userspace log server not found.");
73 	while (1) {
74 		set_current_state(TASK_INTERRUPTIBLE);
75 		schedule_timeout(2*HZ);
76 		DMWARN("Attempting to contact userspace log server...");
77 		r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
78 					 strlen(lc->usr_argv_str) + 1,
79 					 NULL, NULL);
80 		if (!r)
81 			break;
82 	}
83 	DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
84 	r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
85 				 0, NULL, NULL);
86 	if (!r)
87 		goto retry;
88 
89 	DMERR("Error trying to resume userspace log: %d", r);
90 
91 	return -ESRCH;
92 }
93 
94 static int build_constructor_string(struct dm_target *ti,
95 				    unsigned argc, char **argv,
96 				    char **ctr_str)
97 {
98 	int i, str_size;
99 	char *str = NULL;
100 
101 	*ctr_str = NULL;
102 
103 	for (i = 0, str_size = 0; i < argc; i++)
104 		str_size += strlen(argv[i]) + 1; /* +1 for space between args */
105 
106 	str_size += 20; /* Max number of chars in a printed u64 number */
107 
108 	str = kzalloc(str_size, GFP_KERNEL);
109 	if (!str) {
110 		DMWARN("Unable to allocate memory for constructor string");
111 		return -ENOMEM;
112 	}
113 
114 	for (i = 0, str_size = 0; i < argc; i++)
115 		str_size += sprintf(str + str_size, "%s ", argv[i]);
116 	str_size += sprintf(str + str_size, "%llu",
117 			    (unsigned long long)ti->len);
118 
119 	*ctr_str = str;
120 	return str_size;
121 }
122 
123 /*
124  * userspace_ctr
125  *
126  * argv contains:
127  *	<UUID> <other args>
128  * Where 'other args' is the userspace implementation specific log
129  * arguments.  An example might be:
130  *	<UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
131  *
132  * So, this module will strip off the <UUID> for identification purposes
133  * when communicating with userspace about a log; but will pass on everything
134  * else.
135  */
136 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
137 			 unsigned argc, char **argv)
138 {
139 	int r = 0;
140 	int str_size;
141 	char *ctr_str = NULL;
142 	struct log_c *lc = NULL;
143 	uint64_t rdata;
144 	size_t rdata_size = sizeof(rdata);
145 
146 	if (argc < 3) {
147 		DMWARN("Too few arguments to userspace dirty log");
148 		return -EINVAL;
149 	}
150 
151 	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
152 	if (!lc) {
153 		DMWARN("Unable to allocate userspace log context.");
154 		return -ENOMEM;
155 	}
156 
157 	lc->ti = ti;
158 
159 	if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
160 		DMWARN("UUID argument too long.");
161 		kfree(lc);
162 		return -EINVAL;
163 	}
164 
165 	strncpy(lc->uuid, argv[0], DM_UUID_LEN);
166 	spin_lock_init(&lc->flush_lock);
167 	INIT_LIST_HEAD(&lc->flush_list);
168 
169 	str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
170 	if (str_size < 0) {
171 		kfree(lc);
172 		return str_size;
173 	}
174 
175 	/* Send table string */
176 	r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
177 				 ctr_str, str_size, NULL, NULL);
178 
179 	if (r == -ESRCH) {
180 		DMERR("Userspace log server not found");
181 		goto out;
182 	}
183 
184 	/* Since the region size does not change, get it now */
185 	rdata_size = sizeof(rdata);
186 	r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
187 				 NULL, 0, (char *)&rdata, &rdata_size);
188 
189 	if (r) {
190 		DMERR("Failed to get region size of dirty log");
191 		goto out;
192 	}
193 
194 	lc->region_size = (uint32_t)rdata;
195 	lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
196 
197 out:
198 	if (r) {
199 		kfree(lc);
200 		kfree(ctr_str);
201 	} else {
202 		lc->usr_argv_str = ctr_str;
203 		lc->usr_argc = argc;
204 		log->context = lc;
205 	}
206 
207 	return r;
208 }
209 
210 static void userspace_dtr(struct dm_dirty_log *log)
211 {
212 	int r;
213 	struct log_c *lc = log->context;
214 
215 	r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
216 				 NULL, 0,
217 				 NULL, NULL);
218 
219 	kfree(lc->usr_argv_str);
220 	kfree(lc);
221 
222 	return;
223 }
224 
225 static int userspace_presuspend(struct dm_dirty_log *log)
226 {
227 	int r;
228 	struct log_c *lc = log->context;
229 
230 	r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
231 				 NULL, 0,
232 				 NULL, NULL);
233 
234 	return r;
235 }
236 
237 static int userspace_postsuspend(struct dm_dirty_log *log)
238 {
239 	int r;
240 	struct log_c *lc = log->context;
241 
242 	r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
243 				 NULL, 0,
244 				 NULL, NULL);
245 
246 	return r;
247 }
248 
249 static int userspace_resume(struct dm_dirty_log *log)
250 {
251 	int r;
252 	struct log_c *lc = log->context;
253 
254 	lc->in_sync_hint = 0;
255 	r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
256 				 NULL, 0,
257 				 NULL, NULL);
258 
259 	return r;
260 }
261 
262 static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
263 {
264 	struct log_c *lc = log->context;
265 
266 	return lc->region_size;
267 }
268 
269 /*
270  * userspace_is_clean
271  *
272  * Check whether a region is clean.  If there is any sort of
273  * failure when consulting the server, we return not clean.
274  *
275  * Returns: 1 if clean, 0 otherwise
276  */
277 static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
278 {
279 	int r;
280 	uint64_t region64 = (uint64_t)region;
281 	int64_t is_clean;
282 	size_t rdata_size;
283 	struct log_c *lc = log->context;
284 
285 	rdata_size = sizeof(is_clean);
286 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
287 				 (char *)&region64, sizeof(region64),
288 				 (char *)&is_clean, &rdata_size);
289 
290 	return (r) ? 0 : (int)is_clean;
291 }
292 
293 /*
294  * userspace_in_sync
295  *
296  * Check if the region is in-sync.  If there is any sort
297  * of failure when consulting the server, we assume that
298  * the region is not in sync.
299  *
300  * If 'can_block' is set, return immediately
301  *
302  * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
303  */
304 static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
305 			     int can_block)
306 {
307 	int r;
308 	uint64_t region64 = region;
309 	int64_t in_sync;
310 	size_t rdata_size;
311 	struct log_c *lc = log->context;
312 
313 	/*
314 	 * We can never respond directly - even if in_sync_hint is
315 	 * set.  This is because another machine could see a device
316 	 * failure and mark the region out-of-sync.  If we don't go
317 	 * to userspace to ask, we might think the region is in-sync
318 	 * and allow a read to pick up data that is stale.  (This is
319 	 * very unlikely if a device actually fails; but it is very
320 	 * likely if a connection to one device from one machine fails.)
321 	 *
322 	 * There still might be a problem if the mirror caches the region
323 	 * state as in-sync... but then this call would not be made.  So,
324 	 * that is a mirror problem.
325 	 */
326 	if (!can_block)
327 		return -EWOULDBLOCK;
328 
329 	rdata_size = sizeof(in_sync);
330 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
331 				 (char *)&region64, sizeof(region64),
332 				 (char *)&in_sync, &rdata_size);
333 	return (r) ? 0 : (int)in_sync;
334 }
335 
336 /*
337  * userspace_flush
338  *
339  * This function is ok to block.
340  * The flush happens in two stages.  First, it sends all
341  * clear/mark requests that are on the list.  Then it
342  * tells the server to commit them.  This gives the
343  * server a chance to optimise the commit, instead of
344  * doing it for every request.
345  *
346  * Additionally, we could implement another thread that
347  * sends the requests up to the server - reducing the
348  * load on flush.  Then the flush would have less in
349  * the list and be responsible for the finishing commit.
350  *
351  * Returns: 0 on success, < 0 on failure
352  */
353 static int userspace_flush(struct dm_dirty_log *log)
354 {
355 	int r = 0;
356 	unsigned long flags;
357 	struct log_c *lc = log->context;
358 	LIST_HEAD(flush_list);
359 	struct flush_entry *fe, *tmp_fe;
360 
361 	spin_lock_irqsave(&lc->flush_lock, flags);
362 	list_splice_init(&lc->flush_list, &flush_list);
363 	spin_unlock_irqrestore(&lc->flush_lock, flags);
364 
365 	if (list_empty(&flush_list))
366 		return 0;
367 
368 	/*
369 	 * FIXME: Count up requests, group request types,
370 	 * allocate memory to stick all requests in and
371 	 * send to server in one go.  Failing the allocation,
372 	 * do it one by one.
373 	 */
374 
375 	list_for_each_entry(fe, &flush_list, list) {
376 		r = userspace_do_request(lc, lc->uuid, fe->type,
377 					 (char *)&fe->region,
378 					 sizeof(fe->region),
379 					 NULL, NULL);
380 		if (r)
381 			goto fail;
382 	}
383 
384 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
385 				 NULL, 0, NULL, NULL);
386 
387 fail:
388 	/*
389 	 * We can safely remove these entries, even if failure.
390 	 * Calling code will receive an error and will know that
391 	 * the log facility has failed.
392 	 */
393 	list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
394 		list_del(&fe->list);
395 		mempool_free(fe, flush_entry_pool);
396 	}
397 
398 	if (r)
399 		dm_table_event(lc->ti->table);
400 
401 	return r;
402 }
403 
404 /*
405  * userspace_mark_region
406  *
407  * This function should avoid blocking unless absolutely required.
408  * (Memory allocation is valid for blocking.)
409  */
410 static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
411 {
412 	unsigned long flags;
413 	struct log_c *lc = log->context;
414 	struct flush_entry *fe;
415 
416 	/* Wait for an allocation, but _never_ fail */
417 	fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
418 	BUG_ON(!fe);
419 
420 	spin_lock_irqsave(&lc->flush_lock, flags);
421 	fe->type = DM_ULOG_MARK_REGION;
422 	fe->region = region;
423 	list_add(&fe->list, &lc->flush_list);
424 	spin_unlock_irqrestore(&lc->flush_lock, flags);
425 
426 	return;
427 }
428 
429 /*
430  * userspace_clear_region
431  *
432  * This function must not block.
433  * So, the alloc can't block.  In the worst case, it is ok to
434  * fail.  It would simply mean we can't clear the region.
435  * Does nothing to current sync context, but does mean
436  * the region will be re-sync'ed on a reload of the mirror
437  * even though it is in-sync.
438  */
439 static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
440 {
441 	unsigned long flags;
442 	struct log_c *lc = log->context;
443 	struct flush_entry *fe;
444 
445 	/*
446 	 * If we fail to allocate, we skip the clearing of
447 	 * the region.  This doesn't hurt us in any way, except
448 	 * to cause the region to be resync'ed when the
449 	 * device is activated next time.
450 	 */
451 	fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
452 	if (!fe) {
453 		DMERR("Failed to allocate memory to clear region.");
454 		return;
455 	}
456 
457 	spin_lock_irqsave(&lc->flush_lock, flags);
458 	fe->type = DM_ULOG_CLEAR_REGION;
459 	fe->region = region;
460 	list_add(&fe->list, &lc->flush_list);
461 	spin_unlock_irqrestore(&lc->flush_lock, flags);
462 
463 	return;
464 }
465 
466 /*
467  * userspace_get_resync_work
468  *
469  * Get a region that needs recovery.  It is valid to return
470  * an error for this function.
471  *
472  * Returns: 1 if region filled, 0 if no work, <0 on error
473  */
474 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
475 {
476 	int r;
477 	size_t rdata_size;
478 	struct log_c *lc = log->context;
479 	struct {
480 		int64_t i; /* 64-bit for mix arch compatibility */
481 		region_t r;
482 	} pkg;
483 
484 	if (lc->in_sync_hint >= lc->region_count)
485 		return 0;
486 
487 	rdata_size = sizeof(pkg);
488 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
489 				 NULL, 0,
490 				 (char *)&pkg, &rdata_size);
491 
492 	*region = pkg.r;
493 	return (r) ? r : (int)pkg.i;
494 }
495 
496 /*
497  * userspace_set_region_sync
498  *
499  * Set the sync status of a given region.  This function
500  * must not fail.
501  */
502 static void userspace_set_region_sync(struct dm_dirty_log *log,
503 				      region_t region, int in_sync)
504 {
505 	int r;
506 	struct log_c *lc = log->context;
507 	struct {
508 		region_t r;
509 		int64_t i;
510 	} pkg;
511 
512 	pkg.r = region;
513 	pkg.i = (int64_t)in_sync;
514 
515 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
516 				 (char *)&pkg, sizeof(pkg),
517 				 NULL, NULL);
518 
519 	/*
520 	 * It would be nice to be able to report failures.
521 	 * However, it is easy emough to detect and resolve.
522 	 */
523 	return;
524 }
525 
526 /*
527  * userspace_get_sync_count
528  *
529  * If there is any sort of failure when consulting the server,
530  * we assume that the sync count is zero.
531  *
532  * Returns: sync count on success, 0 on failure
533  */
534 static region_t userspace_get_sync_count(struct dm_dirty_log *log)
535 {
536 	int r;
537 	size_t rdata_size;
538 	uint64_t sync_count;
539 	struct log_c *lc = log->context;
540 
541 	rdata_size = sizeof(sync_count);
542 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
543 				 NULL, 0,
544 				 (char *)&sync_count, &rdata_size);
545 
546 	if (r)
547 		return 0;
548 
549 	if (sync_count >= lc->region_count)
550 		lc->in_sync_hint = lc->region_count;
551 
552 	return (region_t)sync_count;
553 }
554 
555 /*
556  * userspace_status
557  *
558  * Returns: amount of space consumed
559  */
560 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
561 			    char *result, unsigned maxlen)
562 {
563 	int r = 0;
564 	size_t sz = (size_t)maxlen;
565 	struct log_c *lc = log->context;
566 
567 	switch (status_type) {
568 	case STATUSTYPE_INFO:
569 		r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
570 					 NULL, 0,
571 					 result, &sz);
572 
573 		if (r) {
574 			sz = 0;
575 			DMEMIT("%s 1 COM_FAILURE", log->type->name);
576 		}
577 		break;
578 	case STATUSTYPE_TABLE:
579 		sz = 0;
580 		DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
581 		       lc->uuid, lc->usr_argv_str);
582 		break;
583 	}
584 	return (r) ? 0 : (int)sz;
585 }
586 
587 /*
588  * userspace_is_remote_recovering
589  *
590  * Returns: 1 if region recovering, 0 otherwise
591  */
592 static int userspace_is_remote_recovering(struct dm_dirty_log *log,
593 					  region_t region)
594 {
595 	int r;
596 	uint64_t region64 = region;
597 	struct log_c *lc = log->context;
598 	static unsigned long long limit;
599 	struct {
600 		int64_t is_recovering;
601 		uint64_t in_sync_hint;
602 	} pkg;
603 	size_t rdata_size = sizeof(pkg);
604 
605 	/*
606 	 * Once the mirror has been reported to be in-sync,
607 	 * it will never again ask for recovery work.  So,
608 	 * we can safely say there is not a remote machine
609 	 * recovering if the device is in-sync.  (in_sync_hint
610 	 * must be reset at resume time.)
611 	 */
612 	if (region < lc->in_sync_hint)
613 		return 0;
614 	else if (jiffies < limit)
615 		return 1;
616 
617 	limit = jiffies + (HZ / 4);
618 	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
619 				 (char *)&region64, sizeof(region64),
620 				 (char *)&pkg, &rdata_size);
621 	if (r)
622 		return 1;
623 
624 	lc->in_sync_hint = pkg.in_sync_hint;
625 
626 	return (int)pkg.is_recovering;
627 }
628 
629 static struct dm_dirty_log_type _userspace_type = {
630 	.name = "userspace",
631 	.module = THIS_MODULE,
632 	.ctr = userspace_ctr,
633 	.dtr = userspace_dtr,
634 	.presuspend = userspace_presuspend,
635 	.postsuspend = userspace_postsuspend,
636 	.resume = userspace_resume,
637 	.get_region_size = userspace_get_region_size,
638 	.is_clean = userspace_is_clean,
639 	.in_sync = userspace_in_sync,
640 	.flush = userspace_flush,
641 	.mark_region = userspace_mark_region,
642 	.clear_region = userspace_clear_region,
643 	.get_resync_work = userspace_get_resync_work,
644 	.set_region_sync = userspace_set_region_sync,
645 	.get_sync_count = userspace_get_sync_count,
646 	.status = userspace_status,
647 	.is_remote_recovering = userspace_is_remote_recovering,
648 };
649 
650 static int __init userspace_dirty_log_init(void)
651 {
652 	int r = 0;
653 
654 	flush_entry_pool = mempool_create(100, flush_entry_alloc,
655 					  flush_entry_free, NULL);
656 
657 	if (!flush_entry_pool) {
658 		DMWARN("Unable to create flush_entry_pool:  No memory.");
659 		return -ENOMEM;
660 	}
661 
662 	r = dm_ulog_tfr_init();
663 	if (r) {
664 		DMWARN("Unable to initialize userspace log communications");
665 		mempool_destroy(flush_entry_pool);
666 		return r;
667 	}
668 
669 	r = dm_dirty_log_type_register(&_userspace_type);
670 	if (r) {
671 		DMWARN("Couldn't register userspace dirty log type");
672 		dm_ulog_tfr_exit();
673 		mempool_destroy(flush_entry_pool);
674 		return r;
675 	}
676 
677 	DMINFO("version 1.0.0 loaded");
678 	return 0;
679 }
680 
681 static void __exit userspace_dirty_log_exit(void)
682 {
683 	dm_dirty_log_type_unregister(&_userspace_type);
684 	dm_ulog_tfr_exit();
685 	mempool_destroy(flush_entry_pool);
686 
687 	DMINFO("version 1.0.0 unloaded");
688 	return;
689 }
690 
691 module_init(userspace_dirty_log_init);
692 module_exit(userspace_dirty_log_exit);
693 
694 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
695 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
696 MODULE_LICENSE("GPL");
697