xref: /openbmc/linux/drivers/nvdimm/pmem.c (revision 8571e645)
1 /*
2  * Persistent Memory Driver
3  *
4  * Copyright (c) 2014-2015, Intel Corporation.
5  * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
6  * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  */
17 
18 #include <asm/cacheflush.h>
19 #include <linux/blkdev.h>
20 #include <linux/hdreg.h>
21 #include <linux/init.h>
22 #include <linux/platform_device.h>
23 #include <linux/module.h>
24 #include <linux/moduleparam.h>
25 #include <linux/badblocks.h>
26 #include <linux/memremap.h>
27 #include <linux/vmalloc.h>
28 #include <linux/pfn_t.h>
29 #include <linux/slab.h>
30 #include <linux/pmem.h>
31 #include <linux/nd.h>
32 #include "pfn.h"
33 #include "nd.h"
34 
35 struct pmem_device {
36 	struct request_queue	*pmem_queue;
37 	struct gendisk		*pmem_disk;
38 	struct nd_namespace_common *ndns;
39 
40 	/* One contiguous memory region per device */
41 	phys_addr_t		phys_addr;
42 	/* when non-zero this device is hosting a 'pfn' instance */
43 	phys_addr_t		data_offset;
44 	u64			pfn_flags;
45 	void __pmem		*virt_addr;
46 	/* immutable base size of the namespace */
47 	size_t			size;
48 	/* trim size when namespace capacity has been section aligned */
49 	u32			pfn_pad;
50 	struct badblocks	bb;
51 };
52 
53 static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
54 {
55 	if (bb->count) {
56 		sector_t first_bad;
57 		int num_bad;
58 
59 		return !!badblocks_check(bb, sector, len / 512, &first_bad,
60 				&num_bad);
61 	}
62 
63 	return false;
64 }
65 
66 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
67 		unsigned int len)
68 {
69 	struct device *dev = disk_to_dev(pmem->pmem_disk);
70 	sector_t sector;
71 	long cleared;
72 
73 	sector = (offset - pmem->data_offset) / 512;
74 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
75 
76 	if (cleared > 0 && cleared / 512) {
77 		dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
78 				__func__, (unsigned long long) sector,
79 				cleared / 512, cleared / 512 > 1 ? "s" : "");
80 		badblocks_clear(&pmem->bb, sector, cleared / 512);
81 	}
82 	invalidate_pmem(pmem->virt_addr + offset, len);
83 }
84 
85 static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
86 			unsigned int len, unsigned int off, int rw,
87 			sector_t sector)
88 {
89 	int rc = 0;
90 	bool bad_pmem = false;
91 	void *mem = kmap_atomic(page);
92 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
93 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
94 
95 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
96 		bad_pmem = true;
97 
98 	if (rw == READ) {
99 		if (unlikely(bad_pmem))
100 			rc = -EIO;
101 		else {
102 			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
103 			flush_dcache_page(page);
104 		}
105 	} else {
106 		/*
107 		 * Note that we write the data both before and after
108 		 * clearing poison.  The write before clear poison
109 		 * handles situations where the latest written data is
110 		 * preserved and the clear poison operation simply marks
111 		 * the address range as valid without changing the data.
112 		 * In this case application software can assume that an
113 		 * interrupted write will either return the new good
114 		 * data or an error.
115 		 *
116 		 * However, if pmem_clear_poison() leaves the data in an
117 		 * indeterminate state we need to perform the write
118 		 * after clear poison.
119 		 */
120 		flush_dcache_page(page);
121 		memcpy_to_pmem(pmem_addr, mem + off, len);
122 		if (unlikely(bad_pmem)) {
123 			pmem_clear_poison(pmem, pmem_off, len);
124 			memcpy_to_pmem(pmem_addr, mem + off, len);
125 		}
126 	}
127 
128 	kunmap_atomic(mem);
129 	return rc;
130 }
131 
132 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
133 {
134 	int rc = 0;
135 	bool do_acct;
136 	unsigned long start;
137 	struct bio_vec bvec;
138 	struct bvec_iter iter;
139 	struct block_device *bdev = bio->bi_bdev;
140 	struct pmem_device *pmem = bdev->bd_disk->private_data;
141 
142 	do_acct = nd_iostat_start(bio, &start);
143 	bio_for_each_segment(bvec, bio, iter) {
144 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
145 				bvec.bv_offset, bio_data_dir(bio),
146 				iter.bi_sector);
147 		if (rc) {
148 			bio->bi_error = rc;
149 			break;
150 		}
151 	}
152 	if (do_acct)
153 		nd_iostat_end(bio, start);
154 
155 	if (bio_data_dir(bio))
156 		wmb_pmem();
157 
158 	bio_endio(bio);
159 	return BLK_QC_T_NONE;
160 }
161 
162 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
163 		       struct page *page, int rw)
164 {
165 	struct pmem_device *pmem = bdev->bd_disk->private_data;
166 	int rc;
167 
168 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
169 	if (rw & WRITE)
170 		wmb_pmem();
171 
172 	/*
173 	 * The ->rw_page interface is subtle and tricky.  The core
174 	 * retries on any error, so we can only invoke page_endio() in
175 	 * the successful completion case.  Otherwise, we'll see crashes
176 	 * caused by double completion.
177 	 */
178 	if (rc == 0)
179 		page_endio(page, rw & WRITE, 0);
180 
181 	return rc;
182 }
183 
184 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
185 		      void __pmem **kaddr, pfn_t *pfn)
186 {
187 	struct pmem_device *pmem = bdev->bd_disk->private_data;
188 	resource_size_t offset = sector * 512 + pmem->data_offset;
189 
190 	*kaddr = pmem->virt_addr + offset;
191 	*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
192 
193 	return pmem->size - pmem->pfn_pad - offset;
194 }
195 
196 static const struct block_device_operations pmem_fops = {
197 	.owner =		THIS_MODULE,
198 	.rw_page =		pmem_rw_page,
199 	.direct_access =	pmem_direct_access,
200 	.revalidate_disk =	nvdimm_revalidate_disk,
201 };
202 
203 static struct pmem_device *pmem_alloc(struct device *dev,
204 		struct resource *res, int id)
205 {
206 	struct pmem_device *pmem;
207 	struct request_queue *q;
208 
209 	pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
210 	if (!pmem)
211 		return ERR_PTR(-ENOMEM);
212 
213 	pmem->phys_addr = res->start;
214 	pmem->size = resource_size(res);
215 	if (!arch_has_wmb_pmem())
216 		dev_warn(dev, "unable to guarantee persistence of writes\n");
217 
218 	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
219 			dev_name(dev))) {
220 		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
221 				&pmem->phys_addr, pmem->size);
222 		return ERR_PTR(-EBUSY);
223 	}
224 
225 	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
226 	if (!q)
227 		return ERR_PTR(-ENOMEM);
228 
229 	pmem->pfn_flags = PFN_DEV;
230 	if (pmem_should_map_pages(dev)) {
231 		pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res,
232 				&q->q_usage_counter, NULL);
233 		pmem->pfn_flags |= PFN_MAP;
234 	} else
235 		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
236 				pmem->phys_addr, pmem->size,
237 				ARCH_MEMREMAP_PMEM);
238 
239 	if (IS_ERR(pmem->virt_addr)) {
240 		blk_cleanup_queue(q);
241 		return (void __force *) pmem->virt_addr;
242 	}
243 
244 	pmem->pmem_queue = q;
245 	return pmem;
246 }
247 
248 static void pmem_detach_disk(struct pmem_device *pmem)
249 {
250 	if (!pmem->pmem_disk)
251 		return;
252 
253 	del_gendisk(pmem->pmem_disk);
254 	put_disk(pmem->pmem_disk);
255 	blk_cleanup_queue(pmem->pmem_queue);
256 }
257 
258 static int pmem_attach_disk(struct device *dev,
259 		struct nd_namespace_common *ndns, struct pmem_device *pmem)
260 {
261 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
262 	int nid = dev_to_node(dev);
263 	struct resource bb_res;
264 	struct gendisk *disk;
265 
266 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
267 	blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
268 	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
269 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
270 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
271 
272 	disk = alloc_disk_node(0, nid);
273 	if (!disk) {
274 		blk_cleanup_queue(pmem->pmem_queue);
275 		return -ENOMEM;
276 	}
277 
278 	disk->fops		= &pmem_fops;
279 	disk->private_data	= pmem;
280 	disk->queue		= pmem->pmem_queue;
281 	disk->flags		= GENHD_FL_EXT_DEVT;
282 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
283 	disk->driverfs_dev = dev;
284 	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
285 			/ 512);
286 	pmem->pmem_disk = disk;
287 	devm_exit_badblocks(dev, &pmem->bb);
288 	if (devm_init_badblocks(dev, &pmem->bb))
289 		return -ENOMEM;
290 	bb_res.start = nsio->res.start + pmem->data_offset;
291 	bb_res.end = nsio->res.end;
292 	if (is_nd_pfn(dev)) {
293 		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
294 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
295 
296 		bb_res.start += __le32_to_cpu(pfn_sb->start_pad);
297 		bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc);
298 	}
299 	nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb,
300 			&bb_res);
301 	disk->bb = &pmem->bb;
302 	add_disk(disk);
303 	revalidate_disk(disk);
304 
305 	return 0;
306 }
307 
308 static int pmem_rw_bytes(struct nd_namespace_common *ndns,
309 		resource_size_t offset, void *buf, size_t size, int rw)
310 {
311 	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
312 
313 	if (unlikely(offset + size > pmem->size)) {
314 		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
315 		return -EFAULT;
316 	}
317 
318 	if (rw == READ) {
319 		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
320 
321 		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
322 			return -EIO;
323 		return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
324 	} else {
325 		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
326 		wmb_pmem();
327 	}
328 
329 	return 0;
330 }
331 
332 static int nd_pfn_init(struct nd_pfn *nd_pfn)
333 {
334 	struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
335 	struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
336 	struct nd_namespace_common *ndns = nd_pfn->ndns;
337 	u32 start_pad = 0, end_trunc = 0;
338 	resource_size_t start, size;
339 	struct nd_namespace_io *nsio;
340 	struct nd_region *nd_region;
341 	unsigned long npfns;
342 	phys_addr_t offset;
343 	u64 checksum;
344 	int rc;
345 
346 	if (!pfn_sb)
347 		return -ENOMEM;
348 
349 	nd_pfn->pfn_sb = pfn_sb;
350 	rc = nd_pfn_validate(nd_pfn);
351 	if (rc == -ENODEV)
352 		/* no info block, do init */;
353 	else
354 		return rc;
355 
356 	nd_region = to_nd_region(nd_pfn->dev.parent);
357 	if (nd_region->ro) {
358 		dev_info(&nd_pfn->dev,
359 				"%s is read-only, unable to init metadata\n",
360 				dev_name(&nd_region->dev));
361 		goto err;
362 	}
363 
364 	memset(pfn_sb, 0, sizeof(*pfn_sb));
365 
366 	/*
367 	 * Check if pmem collides with 'System RAM' when section aligned and
368 	 * trim it accordingly
369 	 */
370 	nsio = to_nd_namespace_io(&ndns->dev);
371 	start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
372 	size = resource_size(&nsio->res);
373 	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
374 				IORES_DESC_NONE) == REGION_MIXED) {
375 
376 		start = nsio->res.start;
377 		start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
378 	}
379 
380 	start = nsio->res.start;
381 	size = PHYS_SECTION_ALIGN_UP(start + size) - start;
382 	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
383 				IORES_DESC_NONE) == REGION_MIXED) {
384 		size = resource_size(&nsio->res);
385 		end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
386 	}
387 
388 	if (start_pad + end_trunc)
389 		dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
390 				dev_name(&ndns->dev), start_pad + end_trunc);
391 
392 	/*
393 	 * Note, we use 64 here for the standard size of struct page,
394 	 * debugging options may cause it to be larger in which case the
395 	 * implementation will limit the pfns advertised through
396 	 * ->direct_access() to those that are included in the memmap.
397 	 */
398 	start += start_pad;
399 	npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
400 	if (nd_pfn->mode == PFN_MODE_PMEM) {
401 		unsigned long memmap_size;
402 
403 		/*
404 		 * vmemmap_populate_hugepages() allocates the memmap array in
405 		 * HPAGE_SIZE chunks.
406 		 */
407 		memmap_size = ALIGN(64 * npfns, HPAGE_SIZE);
408 		offset = ALIGN(start + SZ_8K + memmap_size, nd_pfn->align)
409 			- start;
410 	} else if (nd_pfn->mode == PFN_MODE_RAM)
411 		offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
412 	else
413 		goto err;
414 
415 	if (offset + start_pad + end_trunc >= pmem->size) {
416 		dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
417 				dev_name(&ndns->dev));
418 		goto err;
419 	}
420 
421 	npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
422 	pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
423 	pfn_sb->dataoff = cpu_to_le64(offset);
424 	pfn_sb->npfns = cpu_to_le64(npfns);
425 	memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
426 	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
427 	memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
428 	pfn_sb->version_major = cpu_to_le16(1);
429 	pfn_sb->version_minor = cpu_to_le16(1);
430 	pfn_sb->start_pad = cpu_to_le32(start_pad);
431 	pfn_sb->end_trunc = cpu_to_le32(end_trunc);
432 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
433 	pfn_sb->checksum = cpu_to_le64(checksum);
434 
435 	rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
436 	if (rc)
437 		goto err;
438 
439 	return 0;
440  err:
441 	nd_pfn->pfn_sb = NULL;
442 	kfree(pfn_sb);
443 	return -ENXIO;
444 }
445 
446 static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
447 {
448 	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
449 	struct pmem_device *pmem;
450 
451 	/* free pmem disk */
452 	pmem = dev_get_drvdata(&nd_pfn->dev);
453 	pmem_detach_disk(pmem);
454 
455 	/* release nd_pfn resources */
456 	kfree(nd_pfn->pfn_sb);
457 	nd_pfn->pfn_sb = NULL;
458 
459 	return 0;
460 }
461 
462 /*
463  * We hotplug memory at section granularity, pad the reserved area from
464  * the previous section base to the namespace base address.
465  */
466 static unsigned long init_altmap_base(resource_size_t base)
467 {
468 	unsigned long base_pfn = PHYS_PFN(base);
469 
470 	return PFN_SECTION_ALIGN_DOWN(base_pfn);
471 }
472 
473 static unsigned long init_altmap_reserve(resource_size_t base)
474 {
475 	unsigned long reserve = PHYS_PFN(SZ_8K);
476 	unsigned long base_pfn = PHYS_PFN(base);
477 
478 	reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
479 	return reserve;
480 }
481 
482 static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
483 {
484 	int rc;
485 	struct resource res;
486 	struct request_queue *q;
487 	struct pmem_device *pmem;
488 	struct vmem_altmap *altmap;
489 	struct device *dev = &nd_pfn->dev;
490 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
491 	struct nd_namespace_common *ndns = nd_pfn->ndns;
492 	u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
493 	u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
494 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
495 	resource_size_t base = nsio->res.start + start_pad;
496 	struct vmem_altmap __altmap = {
497 		.base_pfn = init_altmap_base(base),
498 		.reserve = init_altmap_reserve(base),
499 	};
500 
501 	pmem = dev_get_drvdata(dev);
502 	pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
503 	pmem->pfn_pad = start_pad + end_trunc;
504 	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
505 	if (nd_pfn->mode == PFN_MODE_RAM) {
506 		if (pmem->data_offset < SZ_8K)
507 			return -EINVAL;
508 		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
509 		altmap = NULL;
510 	} else if (nd_pfn->mode == PFN_MODE_PMEM) {
511 		nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
512 			/ PAGE_SIZE;
513 		if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
514 			dev_info(&nd_pfn->dev,
515 					"number of pfns truncated from %lld to %ld\n",
516 					le64_to_cpu(nd_pfn->pfn_sb->npfns),
517 					nd_pfn->npfns);
518 		altmap = & __altmap;
519 		altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
520 		altmap->alloc = 0;
521 	} else {
522 		rc = -ENXIO;
523 		goto err;
524 	}
525 
526 	/* establish pfn range for lookup, and switch to direct map */
527 	q = pmem->pmem_queue;
528 	memcpy(&res, &nsio->res, sizeof(res));
529 	res.start += start_pad;
530 	res.end -= end_trunc;
531 	devm_memunmap(dev, (void __force *) pmem->virt_addr);
532 	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
533 			&q->q_usage_counter, altmap);
534 	pmem->pfn_flags |= PFN_MAP;
535 	if (IS_ERR(pmem->virt_addr)) {
536 		rc = PTR_ERR(pmem->virt_addr);
537 		goto err;
538 	}
539 
540 	/* attach pmem disk in "pfn-mode" */
541 	rc = pmem_attach_disk(dev, ndns, pmem);
542 	if (rc)
543 		goto err;
544 
545 	return rc;
546  err:
547 	nvdimm_namespace_detach_pfn(ndns);
548 	return rc;
549 
550 }
551 
552 static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
553 {
554 	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
555 	int rc;
556 
557 	if (!nd_pfn->uuid || !nd_pfn->ndns)
558 		return -ENODEV;
559 
560 	rc = nd_pfn_init(nd_pfn);
561 	if (rc)
562 		return rc;
563 	/* we need a valid pfn_sb before we can init a vmem_altmap */
564 	return __nvdimm_namespace_attach_pfn(nd_pfn);
565 }
566 
567 static int nd_pmem_probe(struct device *dev)
568 {
569 	struct nd_region *nd_region = to_nd_region(dev->parent);
570 	struct nd_namespace_common *ndns;
571 	struct nd_namespace_io *nsio;
572 	struct pmem_device *pmem;
573 
574 	ndns = nvdimm_namespace_common_probe(dev);
575 	if (IS_ERR(ndns))
576 		return PTR_ERR(ndns);
577 
578 	nsio = to_nd_namespace_io(&ndns->dev);
579 	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
580 	if (IS_ERR(pmem))
581 		return PTR_ERR(pmem);
582 
583 	pmem->ndns = ndns;
584 	dev_set_drvdata(dev, pmem);
585 	ndns->rw_bytes = pmem_rw_bytes;
586 	if (devm_init_badblocks(dev, &pmem->bb))
587 		return -ENOMEM;
588 	nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res);
589 
590 	if (is_nd_btt(dev)) {
591 		/* btt allocates its own request_queue */
592 		blk_cleanup_queue(pmem->pmem_queue);
593 		pmem->pmem_queue = NULL;
594 		return nvdimm_namespace_attach_btt(ndns);
595 	}
596 
597 	if (is_nd_pfn(dev))
598 		return nvdimm_namespace_attach_pfn(ndns);
599 
600 	if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) {
601 		/*
602 		 * We'll come back as either btt-pmem, or pfn-pmem, so
603 		 * drop the queue allocation for now.
604 		 */
605 		blk_cleanup_queue(pmem->pmem_queue);
606 		return -ENXIO;
607 	}
608 
609 	return pmem_attach_disk(dev, ndns, pmem);
610 }
611 
612 static int nd_pmem_remove(struct device *dev)
613 {
614 	struct pmem_device *pmem = dev_get_drvdata(dev);
615 
616 	if (is_nd_btt(dev))
617 		nvdimm_namespace_detach_btt(pmem->ndns);
618 	else if (is_nd_pfn(dev))
619 		nvdimm_namespace_detach_pfn(pmem->ndns);
620 	else
621 		pmem_detach_disk(pmem);
622 
623 	return 0;
624 }
625 
626 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
627 {
628 	struct pmem_device *pmem = dev_get_drvdata(dev);
629 	struct nd_namespace_common *ndns = pmem->ndns;
630 	struct nd_region *nd_region = to_nd_region(dev->parent);
631 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
632 	struct resource res = {
633 		.start = nsio->res.start + pmem->data_offset,
634 		.end = nsio->res.end,
635 	};
636 
637 	if (event != NVDIMM_REVALIDATE_POISON)
638 		return;
639 
640 	if (is_nd_pfn(dev)) {
641 		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
642 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
643 
644 		res.start += __le32_to_cpu(pfn_sb->start_pad);
645 		res.end -= __le32_to_cpu(pfn_sb->end_trunc);
646 	}
647 
648 	nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
649 }
650 
651 MODULE_ALIAS("pmem");
652 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
653 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
654 static struct nd_device_driver nd_pmem_driver = {
655 	.probe = nd_pmem_probe,
656 	.remove = nd_pmem_remove,
657 	.notify = nd_pmem_notify,
658 	.drv = {
659 		.name = "nd_pmem",
660 	},
661 	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
662 };
663 
664 static int __init pmem_init(void)
665 {
666 	return nd_driver_register(&nd_pmem_driver);
667 }
668 module_init(pmem_init);
669 
670 static void pmem_exit(void)
671 {
672 	driver_unregister(&nd_pmem_driver.drv);
673 }
674 module_exit(pmem_exit);
675 
676 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
677 MODULE_LICENSE("GPL v2");
678