xref: /openbmc/linux/drivers/nvdimm/core.c (revision 78700c0a)
1 /*
2  * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/libnvdimm.h>
14 #include <linux/badblocks.h>
15 #include <linux/export.h>
16 #include <linux/module.h>
17 #include <linux/blkdev.h>
18 #include <linux/device.h>
19 #include <linux/ctype.h>
20 #include <linux/ndctl.h>
21 #include <linux/mutex.h>
22 #include <linux/slab.h>
23 #include "nd-core.h"
24 #include "nd.h"
25 
26 LIST_HEAD(nvdimm_bus_list);
27 DEFINE_MUTEX(nvdimm_bus_list_mutex);
28 static DEFINE_IDA(nd_ida);
29 
30 void nvdimm_bus_lock(struct device *dev)
31 {
32 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
33 
34 	if (!nvdimm_bus)
35 		return;
36 	mutex_lock(&nvdimm_bus->reconfig_mutex);
37 }
38 EXPORT_SYMBOL(nvdimm_bus_lock);
39 
40 void nvdimm_bus_unlock(struct device *dev)
41 {
42 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
43 
44 	if (!nvdimm_bus)
45 		return;
46 	mutex_unlock(&nvdimm_bus->reconfig_mutex);
47 }
48 EXPORT_SYMBOL(nvdimm_bus_unlock);
49 
50 bool is_nvdimm_bus_locked(struct device *dev)
51 {
52 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
53 
54 	if (!nvdimm_bus)
55 		return false;
56 	return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
57 }
58 EXPORT_SYMBOL(is_nvdimm_bus_locked);
59 
60 u64 nd_fletcher64(void *addr, size_t len, bool le)
61 {
62 	u32 *buf = addr;
63 	u32 lo32 = 0;
64 	u64 hi32 = 0;
65 	int i;
66 
67 	for (i = 0; i < len / sizeof(u32); i++) {
68 		lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
69 		hi32 += lo32;
70 	}
71 
72 	return hi32 << 32 | lo32;
73 }
74 EXPORT_SYMBOL_GPL(nd_fletcher64);
75 
76 static void nvdimm_bus_release(struct device *dev)
77 {
78 	struct nvdimm_bus *nvdimm_bus;
79 
80 	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
81 	ida_simple_remove(&nd_ida, nvdimm_bus->id);
82 	kfree(nvdimm_bus);
83 }
84 
85 struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
86 {
87 	struct nvdimm_bus *nvdimm_bus;
88 
89 	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
90 	WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
91 	return nvdimm_bus;
92 }
93 EXPORT_SYMBOL_GPL(to_nvdimm_bus);
94 
95 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
96 {
97 	/* struct nvdimm_bus definition is private to libnvdimm */
98 	return nvdimm_bus->nd_desc;
99 }
100 EXPORT_SYMBOL_GPL(to_nd_desc);
101 
102 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
103 {
104 	struct device *dev;
105 
106 	for (dev = nd_dev; dev; dev = dev->parent)
107 		if (dev->release == nvdimm_bus_release)
108 			break;
109 	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
110 	if (dev)
111 		return to_nvdimm_bus(dev);
112 	return NULL;
113 }
114 
115 static bool is_uuid_sep(char sep)
116 {
117 	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
118 		return true;
119 	return false;
120 }
121 
122 static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
123 		size_t len)
124 {
125 	const char *str = buf;
126 	u8 uuid[16];
127 	int i;
128 
129 	for (i = 0; i < 16; i++) {
130 		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
131 			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
132 					__func__, i, str - buf, str[0],
133 					str + 1 - buf, str[1]);
134 			return -EINVAL;
135 		}
136 
137 		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
138 		str += 2;
139 		if (is_uuid_sep(*str))
140 			str++;
141 	}
142 
143 	memcpy(uuid_out, uuid, sizeof(uuid));
144 	return 0;
145 }
146 
147 /**
148  * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
149  * @dev: container device for the uuid property
150  * @uuid_out: uuid buffer to replace
151  * @buf: raw sysfs buffer to parse
152  *
153  * Enforce that uuids can only be changed while the device is disabled
154  * (driver detached)
155  * LOCKING: expects device_lock() is held on entry
156  */
157 int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
158 		size_t len)
159 {
160 	u8 uuid[16];
161 	int rc;
162 
163 	if (dev->driver)
164 		return -EBUSY;
165 
166 	rc = nd_uuid_parse(dev, uuid, buf, len);
167 	if (rc)
168 		return rc;
169 
170 	kfree(*uuid_out);
171 	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
172 	if (!(*uuid_out))
173 		return -ENOMEM;
174 
175 	return 0;
176 }
177 
178 ssize_t nd_sector_size_show(unsigned long current_lbasize,
179 		const unsigned long *supported, char *buf)
180 {
181 	ssize_t len = 0;
182 	int i;
183 
184 	for (i = 0; supported[i]; i++)
185 		if (current_lbasize == supported[i])
186 			len += sprintf(buf + len, "[%ld] ", supported[i]);
187 		else
188 			len += sprintf(buf + len, "%ld ", supported[i]);
189 	len += sprintf(buf + len, "\n");
190 	return len;
191 }
192 
193 ssize_t nd_sector_size_store(struct device *dev, const char *buf,
194 		unsigned long *current_lbasize, const unsigned long *supported)
195 {
196 	unsigned long lbasize;
197 	int rc, i;
198 
199 	if (dev->driver)
200 		return -EBUSY;
201 
202 	rc = kstrtoul(buf, 0, &lbasize);
203 	if (rc)
204 		return rc;
205 
206 	for (i = 0; supported[i]; i++)
207 		if (lbasize == supported[i])
208 			break;
209 
210 	if (supported[i]) {
211 		*current_lbasize = lbasize;
212 		return 0;
213 	} else {
214 		return -EINVAL;
215 	}
216 }
217 
218 void __nd_iostat_start(struct bio *bio, unsigned long *start)
219 {
220 	struct gendisk *disk = bio->bi_bdev->bd_disk;
221 	const int rw = bio_data_dir(bio);
222 	int cpu = part_stat_lock();
223 
224 	*start = jiffies;
225 	part_round_stats(cpu, &disk->part0);
226 	part_stat_inc(cpu, &disk->part0, ios[rw]);
227 	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
228 	part_inc_in_flight(&disk->part0, rw);
229 	part_stat_unlock();
230 }
231 EXPORT_SYMBOL(__nd_iostat_start);
232 
233 void nd_iostat_end(struct bio *bio, unsigned long start)
234 {
235 	struct gendisk *disk = bio->bi_bdev->bd_disk;
236 	unsigned long duration = jiffies - start;
237 	const int rw = bio_data_dir(bio);
238 	int cpu = part_stat_lock();
239 
240 	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
241 	part_round_stats(cpu, &disk->part0);
242 	part_dec_in_flight(&disk->part0, rw);
243 	part_stat_unlock();
244 }
245 EXPORT_SYMBOL(nd_iostat_end);
246 
247 static ssize_t commands_show(struct device *dev,
248 		struct device_attribute *attr, char *buf)
249 {
250 	int cmd, len = 0;
251 	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
252 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
253 
254 	for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG)
255 		len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
256 	len += sprintf(buf + len, "\n");
257 	return len;
258 }
259 static DEVICE_ATTR_RO(commands);
260 
261 static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
262 {
263 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
264 	struct device *parent = nvdimm_bus->dev.parent;
265 
266 	if (nd_desc->provider_name)
267 		return nd_desc->provider_name;
268 	else if (parent)
269 		return dev_name(parent);
270 	else
271 		return "unknown";
272 }
273 
274 static ssize_t provider_show(struct device *dev,
275 		struct device_attribute *attr, char *buf)
276 {
277 	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
278 
279 	return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
280 }
281 static DEVICE_ATTR_RO(provider);
282 
283 static int flush_namespaces(struct device *dev, void *data)
284 {
285 	device_lock(dev);
286 	device_unlock(dev);
287 	return 0;
288 }
289 
290 static int flush_regions_dimms(struct device *dev, void *data)
291 {
292 	device_lock(dev);
293 	device_unlock(dev);
294 	device_for_each_child(dev, NULL, flush_namespaces);
295 	return 0;
296 }
297 
298 static ssize_t wait_probe_show(struct device *dev,
299 		struct device_attribute *attr, char *buf)
300 {
301 	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
302 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
303 	int rc;
304 
305 	if (nd_desc->flush_probe) {
306 		rc = nd_desc->flush_probe(nd_desc);
307 		if (rc)
308 			return rc;
309 	}
310 	nd_synchronize();
311 	device_for_each_child(dev, NULL, flush_regions_dimms);
312 	return sprintf(buf, "1\n");
313 }
314 static DEVICE_ATTR_RO(wait_probe);
315 
316 static struct attribute *nvdimm_bus_attributes[] = {
317 	&dev_attr_commands.attr,
318 	&dev_attr_wait_probe.attr,
319 	&dev_attr_provider.attr,
320 	NULL,
321 };
322 
323 struct attribute_group nvdimm_bus_attribute_group = {
324 	.attrs = nvdimm_bus_attributes,
325 };
326 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
327 
328 struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
329 		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
330 {
331 	struct nvdimm_bus *nvdimm_bus;
332 	int rc;
333 
334 	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
335 	if (!nvdimm_bus)
336 		return NULL;
337 	INIT_LIST_HEAD(&nvdimm_bus->list);
338 	INIT_LIST_HEAD(&nvdimm_bus->poison_list);
339 	init_waitqueue_head(&nvdimm_bus->probe_wait);
340 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
341 	mutex_init(&nvdimm_bus->reconfig_mutex);
342 	if (nvdimm_bus->id < 0) {
343 		kfree(nvdimm_bus);
344 		return NULL;
345 	}
346 	nvdimm_bus->nd_desc = nd_desc;
347 	nvdimm_bus->module = module;
348 	nvdimm_bus->dev.parent = parent;
349 	nvdimm_bus->dev.release = nvdimm_bus_release;
350 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
351 	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
352 	rc = device_register(&nvdimm_bus->dev);
353 	if (rc) {
354 		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
355 		goto err;
356 	}
357 
358 	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
359 	if (rc)
360 		goto err;
361 
362 	mutex_lock(&nvdimm_bus_list_mutex);
363 	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
364 	mutex_unlock(&nvdimm_bus_list_mutex);
365 
366 	return nvdimm_bus;
367  err:
368 	put_device(&nvdimm_bus->dev);
369 	return NULL;
370 }
371 EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
372 
373 static void set_badblock(struct badblocks *bb, sector_t s, int num)
374 {
375 	dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
376 			(u64) s * 512, (u64) num * 512);
377 	/* this isn't an error as the hardware will still throw an exception */
378 	if (badblocks_set(bb, s, num, 1))
379 		dev_info_once(bb->dev, "%s: failed for sector %llx\n",
380 				__func__, (u64) s);
381 }
382 
383 /**
384  * __add_badblock_range() - Convert a physical address range to bad sectors
385  * @bb:		badblocks instance to populate
386  * @ns_offset:	namespace offset where the error range begins (in bytes)
387  * @len:	number of bytes of poison to be added
388  *
389  * This assumes that the range provided with (ns_offset, len) is within
390  * the bounds of physical addresses for this namespace, i.e. lies in the
391  * interval [ns_start, ns_start + ns_size)
392  */
393 static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
394 {
395 	const unsigned int sector_size = 512;
396 	sector_t start_sector;
397 	u64 num_sectors;
398 	u32 rem;
399 
400 	start_sector = div_u64(ns_offset, sector_size);
401 	num_sectors = div_u64_rem(len, sector_size, &rem);
402 	if (rem)
403 		num_sectors++;
404 
405 	if (unlikely(num_sectors > (u64)INT_MAX)) {
406 		u64 remaining = num_sectors;
407 		sector_t s = start_sector;
408 
409 		while (remaining) {
410 			int done = min_t(u64, remaining, INT_MAX);
411 
412 			set_badblock(bb, s, done);
413 			remaining -= done;
414 			s += done;
415 		}
416 	} else
417 		set_badblock(bb, start_sector, num_sectors);
418 }
419 
420 static void badblocks_populate(struct list_head *poison_list,
421 		struct badblocks *bb, const struct resource *res)
422 {
423 	struct nd_poison *pl;
424 
425 	if (list_empty(poison_list))
426 		return;
427 
428 	list_for_each_entry(pl, poison_list, list) {
429 		u64 pl_end = pl->start + pl->length - 1;
430 
431 		/* Discard intervals with no intersection */
432 		if (pl_end < res->start)
433 			continue;
434 		if (pl->start >  res->end)
435 			continue;
436 		/* Deal with any overlap after start of the namespace */
437 		if (pl->start >= res->start) {
438 			u64 start = pl->start;
439 			u64 len;
440 
441 			if (pl_end <= res->end)
442 				len = pl->length;
443 			else
444 				len = res->start + resource_size(res)
445 					- pl->start;
446 			__add_badblock_range(bb, start - res->start, len);
447 			continue;
448 		}
449 		/* Deal with overlap for poison starting before the namespace */
450 		if (pl->start < res->start) {
451 			u64 len;
452 
453 			if (pl_end < res->end)
454 				len = pl->start + pl->length - res->start;
455 			else
456 				len = resource_size(res);
457 			__add_badblock_range(bb, 0, len);
458 		}
459 	}
460 }
461 
462 /**
463  * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks
464  * @region: parent region of the range to interrogate
465  * @bb: badblocks instance to populate
466  * @res: resource range to consider
467  *
468  * The poison list generated during bus initialization may contain
469  * multiple, possibly overlapping physical address ranges.  Compare each
470  * of these ranges to the resource range currently being initialized,
471  * and add badblocks entries for all matching sub-ranges
472  */
473 void nvdimm_badblocks_populate(struct nd_region *nd_region,
474 		struct badblocks *bb, const struct resource *res)
475 {
476 	struct nvdimm_bus *nvdimm_bus;
477 	struct list_head *poison_list;
478 
479 	if (!is_nd_pmem(&nd_region->dev)) {
480 		dev_WARN_ONCE(&nd_region->dev, 1,
481 				"%s only valid for pmem regions\n", __func__);
482 		return;
483 	}
484 	nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
485 	poison_list = &nvdimm_bus->poison_list;
486 
487 	nvdimm_bus_lock(&nvdimm_bus->dev);
488 	badblocks_populate(poison_list, bb, res);
489 	nvdimm_bus_unlock(&nvdimm_bus->dev);
490 }
491 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
492 
493 static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
494 {
495 	struct nd_poison *pl;
496 
497 	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
498 	if (!pl)
499 		return -ENOMEM;
500 
501 	pl->start = addr;
502 	pl->length = length;
503 	list_add_tail(&pl->list, &nvdimm_bus->poison_list);
504 
505 	return 0;
506 }
507 
508 static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
509 {
510 	struct nd_poison *pl;
511 
512 	if (list_empty(&nvdimm_bus->poison_list))
513 		return add_poison(nvdimm_bus, addr, length);
514 
515 	/*
516 	 * There is a chance this is a duplicate, check for those first.
517 	 * This will be the common case as ARS_STATUS returns all known
518 	 * errors in the SPA space, and we can't query it per region
519 	 */
520 	list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
521 		if (pl->start == addr) {
522 			/* If length has changed, update this list entry */
523 			if (pl->length != length)
524 				pl->length = length;
525 			return 0;
526 		}
527 
528 	/*
529 	 * If not a duplicate or a simple length update, add the entry as is,
530 	 * as any overlapping ranges will get resolved when the list is consumed
531 	 * and converted to badblocks
532 	 */
533 	return add_poison(nvdimm_bus, addr, length);
534 }
535 
536 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
537 {
538 	int rc;
539 
540 	nvdimm_bus_lock(&nvdimm_bus->dev);
541 	rc = bus_add_poison(nvdimm_bus, addr, length);
542 	nvdimm_bus_unlock(&nvdimm_bus->dev);
543 
544 	return rc;
545 }
546 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
547 
548 static void free_poison_list(struct list_head *poison_list)
549 {
550 	struct nd_poison *pl, *next;
551 
552 	list_for_each_entry_safe(pl, next, poison_list, list) {
553 		list_del(&pl->list);
554 		kfree(pl);
555 	}
556 	list_del_init(poison_list);
557 }
558 
559 static int child_unregister(struct device *dev, void *data)
560 {
561 	/*
562 	 * the singular ndctl class device per bus needs to be
563 	 * "device_destroy"ed, so skip it here
564 	 *
565 	 * i.e. remove classless children
566 	 */
567 	if (dev->class)
568 		/* pass */;
569 	else
570 		nd_device_unregister(dev, ND_SYNC);
571 	return 0;
572 }
573 
574 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
575 {
576 	if (!nvdimm_bus)
577 		return;
578 
579 	mutex_lock(&nvdimm_bus_list_mutex);
580 	list_del_init(&nvdimm_bus->list);
581 	mutex_unlock(&nvdimm_bus_list_mutex);
582 
583 	nd_synchronize();
584 	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
585 
586 	nvdimm_bus_lock(&nvdimm_bus->dev);
587 	free_poison_list(&nvdimm_bus->poison_list);
588 	nvdimm_bus_unlock(&nvdimm_bus->dev);
589 
590 	nvdimm_bus_destroy_ndctl(nvdimm_bus);
591 
592 	device_unregister(&nvdimm_bus->dev);
593 }
594 EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
595 
596 #ifdef CONFIG_BLK_DEV_INTEGRITY
597 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
598 {
599 	struct blk_integrity bi;
600 
601 	if (meta_size == 0)
602 		return 0;
603 
604 	bi.profile = NULL;
605 	bi.tuple_size = meta_size;
606 	bi.tag_size = meta_size;
607 
608 	blk_integrity_register(disk, &bi);
609 	blk_queue_max_integrity_segments(disk->queue, 1);
610 
611 	return 0;
612 }
613 EXPORT_SYMBOL(nd_integrity_init);
614 
615 #else /* CONFIG_BLK_DEV_INTEGRITY */
616 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
617 {
618 	return 0;
619 }
620 EXPORT_SYMBOL(nd_integrity_init);
621 
622 #endif
623 
624 static __init int libnvdimm_init(void)
625 {
626 	int rc;
627 
628 	rc = nvdimm_bus_init();
629 	if (rc)
630 		return rc;
631 	rc = nvdimm_init();
632 	if (rc)
633 		goto err_dimm;
634 	rc = nd_region_init();
635 	if (rc)
636 		goto err_region;
637 	return 0;
638  err_region:
639 	nvdimm_exit();
640  err_dimm:
641 	nvdimm_bus_exit();
642 	return rc;
643 }
644 
645 static __exit void libnvdimm_exit(void)
646 {
647 	WARN_ON(!list_empty(&nvdimm_bus_list));
648 	nd_region_exit();
649 	nvdimm_exit();
650 	nvdimm_bus_exit();
651 	nd_region_devs_exit();
652 	nvdimm_devs_exit();
653 	ida_destroy(&nd_ida);
654 }
655 
656 MODULE_LICENSE("GPL v2");
657 MODULE_AUTHOR("Intel Corporation");
658 subsys_initcall(libnvdimm_init);
659 module_exit(libnvdimm_exit);
660