12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 25212e11fSVishal Verma /* 35212e11fSVishal Verma * Block Translation Table 45212e11fSVishal Verma * Copyright (c) 2014-2015, Intel Corporation. 55212e11fSVishal Verma */ 65212e11fSVishal Verma #include <linux/highmem.h> 75212e11fSVishal Verma #include <linux/debugfs.h> 85212e11fSVishal Verma #include <linux/blkdev.h> 95212e11fSVishal Verma #include <linux/module.h> 105212e11fSVishal Verma #include <linux/device.h> 115212e11fSVishal Verma #include <linux/mutex.h> 125212e11fSVishal Verma #include <linux/hdreg.h> 135212e11fSVishal Verma #include <linux/genhd.h> 145212e11fSVishal Verma #include <linux/sizes.h> 155212e11fSVishal Verma #include <linux/ndctl.h> 165212e11fSVishal Verma #include <linux/fs.h> 175212e11fSVishal Verma #include <linux/nd.h> 1823c47d2aSMinchan Kim #include <linux/backing-dev.h> 195212e11fSVishal Verma #include "btt.h" 205212e11fSVishal Verma #include "nd.h" 215212e11fSVishal Verma 225212e11fSVishal Verma enum log_ent_request { 235212e11fSVishal Verma LOG_NEW_ENT = 0, 245212e11fSVishal Verma LOG_OLD_ENT 255212e11fSVishal Verma }; 265212e11fSVishal Verma 2786652d2eSVishal Verma static struct device *to_dev(struct arena_info *arena) 2886652d2eSVishal Verma { 2986652d2eSVishal Verma return &arena->nd_btt->dev; 3086652d2eSVishal Verma } 3186652d2eSVishal Verma 32d9b83c75SVishal Verma static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset) 33d9b83c75SVishal Verma { 34d9b83c75SVishal Verma return offset + nd_btt->initial_offset; 35d9b83c75SVishal Verma } 36d9b83c75SVishal Verma 375212e11fSVishal Verma static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, 383ae3d67bSVishal Verma void *buf, size_t n, unsigned long flags) 395212e11fSVishal Verma { 405212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 415212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 425212e11fSVishal Verma 4314e49454SVishal Verma /* arena offsets may be shifted from the base of the device */ 44d9b83c75SVishal Verma offset = adjust_initial_offset(nd_btt, offset); 453ae3d67bSVishal Verma return nvdimm_read_bytes(ndns, offset, buf, n, flags); 465212e11fSVishal Verma } 475212e11fSVishal Verma 485212e11fSVishal Verma static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, 493ae3d67bSVishal Verma void *buf, size_t n, unsigned long flags) 505212e11fSVishal Verma { 515212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 525212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 535212e11fSVishal Verma 5414e49454SVishal Verma /* arena offsets may be shifted from the base of the device */ 55d9b83c75SVishal Verma offset = adjust_initial_offset(nd_btt, offset); 563ae3d67bSVishal Verma return nvdimm_write_bytes(ndns, offset, buf, n, flags); 575212e11fSVishal Verma } 585212e11fSVishal Verma 595212e11fSVishal Verma static int btt_info_write(struct arena_info *arena, struct btt_sb *super) 605212e11fSVishal Verma { 615212e11fSVishal Verma int ret; 625212e11fSVishal Verma 63b177fe85SVishal Verma /* 64b177fe85SVishal Verma * infooff and info2off should always be at least 512B aligned. 65b177fe85SVishal Verma * We rely on that to make sure rw_bytes does error clearing 66b177fe85SVishal Verma * correctly, so make sure that is the case. 67b177fe85SVishal Verma */ 6886652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512), 6986652d2eSVishal Verma "arena->infooff: %#llx is unaligned\n", arena->infooff); 7086652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512), 7186652d2eSVishal Verma "arena->info2off: %#llx is unaligned\n", arena->info2off); 72b177fe85SVishal Verma 735212e11fSVishal Verma ret = arena_write_bytes(arena, arena->info2off, super, 743ae3d67bSVishal Verma sizeof(struct btt_sb), 0); 755212e11fSVishal Verma if (ret) 765212e11fSVishal Verma return ret; 775212e11fSVishal Verma 785212e11fSVishal Verma return arena_write_bytes(arena, arena->infooff, super, 793ae3d67bSVishal Verma sizeof(struct btt_sb), 0); 805212e11fSVishal Verma } 815212e11fSVishal Verma 825212e11fSVishal Verma static int btt_info_read(struct arena_info *arena, struct btt_sb *super) 835212e11fSVishal Verma { 845212e11fSVishal Verma return arena_read_bytes(arena, arena->infooff, super, 853ae3d67bSVishal Verma sizeof(struct btt_sb), 0); 865212e11fSVishal Verma } 875212e11fSVishal Verma 885212e11fSVishal Verma /* 895212e11fSVishal Verma * 'raw' version of btt_map write 905212e11fSVishal Verma * Assumptions: 915212e11fSVishal Verma * mapping is in little-endian 925212e11fSVishal Verma * mapping contains 'E' and 'Z' flags as desired 935212e11fSVishal Verma */ 943ae3d67bSVishal Verma static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping, 953ae3d67bSVishal Verma unsigned long flags) 965212e11fSVishal Verma { 975212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 985212e11fSVishal Verma 9986652d2eSVishal Verma if (unlikely(lba >= arena->external_nlba)) 10086652d2eSVishal Verma dev_err_ratelimited(to_dev(arena), 10186652d2eSVishal Verma "%s: lba %#x out of range (max: %#x)\n", 10286652d2eSVishal Verma __func__, lba, arena->external_nlba); 1033ae3d67bSVishal Verma return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags); 1045212e11fSVishal Verma } 1055212e11fSVishal Verma 1065212e11fSVishal Verma static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, 1073ae3d67bSVishal Verma u32 z_flag, u32 e_flag, unsigned long rwb_flags) 1085212e11fSVishal Verma { 1095212e11fSVishal Verma u32 ze; 1105212e11fSVishal Verma __le32 mapping_le; 1115212e11fSVishal Verma 1125212e11fSVishal Verma /* 1135212e11fSVishal Verma * This 'mapping' is supposed to be just the LBA mapping, without 1145212e11fSVishal Verma * any flags set, so strip the flag bits. 1155212e11fSVishal Verma */ 1160595d539SVishal Verma mapping = ent_lba(mapping); 1175212e11fSVishal Verma 1185212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 1195212e11fSVishal Verma switch (ze) { 1205212e11fSVishal Verma case 0: 1215212e11fSVishal Verma /* 1225212e11fSVishal Verma * We want to set neither of the Z or E flags, and 1235212e11fSVishal Verma * in the actual layout, this means setting the bit 1245212e11fSVishal Verma * positions of both to '1' to indicate a 'normal' 1255212e11fSVishal Verma * map entry 1265212e11fSVishal Verma */ 1275212e11fSVishal Verma mapping |= MAP_ENT_NORMAL; 1285212e11fSVishal Verma break; 1295212e11fSVishal Verma case 1: 1305212e11fSVishal Verma mapping |= (1 << MAP_ERR_SHIFT); 1315212e11fSVishal Verma break; 1325212e11fSVishal Verma case 2: 1335212e11fSVishal Verma mapping |= (1 << MAP_TRIM_SHIFT); 1345212e11fSVishal Verma break; 1355212e11fSVishal Verma default: 1365212e11fSVishal Verma /* 1375212e11fSVishal Verma * The case where Z and E are both sent in as '1' could be 1385212e11fSVishal Verma * construed as a valid 'normal' case, but we decide not to, 1395212e11fSVishal Verma * to avoid confusion 1405212e11fSVishal Verma */ 14186652d2eSVishal Verma dev_err_ratelimited(to_dev(arena), 14286652d2eSVishal Verma "Invalid use of Z and E flags\n"); 1435212e11fSVishal Verma return -EIO; 1445212e11fSVishal Verma } 1455212e11fSVishal Verma 1465212e11fSVishal Verma mapping_le = cpu_to_le32(mapping); 1473ae3d67bSVishal Verma return __btt_map_write(arena, lba, mapping_le, rwb_flags); 1485212e11fSVishal Verma } 1495212e11fSVishal Verma 1505212e11fSVishal Verma static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, 1513ae3d67bSVishal Verma int *trim, int *error, unsigned long rwb_flags) 1525212e11fSVishal Verma { 1535212e11fSVishal Verma int ret; 1545212e11fSVishal Verma __le32 in; 1555212e11fSVishal Verma u32 raw_mapping, postmap, ze, z_flag, e_flag; 1565212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 1575212e11fSVishal Verma 15886652d2eSVishal Verma if (unlikely(lba >= arena->external_nlba)) 15986652d2eSVishal Verma dev_err_ratelimited(to_dev(arena), 16086652d2eSVishal Verma "%s: lba %#x out of range (max: %#x)\n", 16186652d2eSVishal Verma __func__, lba, arena->external_nlba); 1625212e11fSVishal Verma 1633ae3d67bSVishal Verma ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags); 1645212e11fSVishal Verma if (ret) 1655212e11fSVishal Verma return ret; 1665212e11fSVishal Verma 1675212e11fSVishal Verma raw_mapping = le32_to_cpu(in); 1685212e11fSVishal Verma 1690595d539SVishal Verma z_flag = ent_z_flag(raw_mapping); 1700595d539SVishal Verma e_flag = ent_e_flag(raw_mapping); 1715212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 1720595d539SVishal Verma postmap = ent_lba(raw_mapping); 1735212e11fSVishal Verma 1745212e11fSVishal Verma /* Reuse the {z,e}_flag variables for *trim and *error */ 1755212e11fSVishal Verma z_flag = 0; 1765212e11fSVishal Verma e_flag = 0; 1775212e11fSVishal Verma 1785212e11fSVishal Verma switch (ze) { 1795212e11fSVishal Verma case 0: 1805212e11fSVishal Verma /* Initial state. Return postmap = premap */ 1815212e11fSVishal Verma *mapping = lba; 1825212e11fSVishal Verma break; 1835212e11fSVishal Verma case 1: 1845212e11fSVishal Verma *mapping = postmap; 1855212e11fSVishal Verma e_flag = 1; 1865212e11fSVishal Verma break; 1875212e11fSVishal Verma case 2: 1885212e11fSVishal Verma *mapping = postmap; 1895212e11fSVishal Verma z_flag = 1; 1905212e11fSVishal Verma break; 1915212e11fSVishal Verma case 3: 1925212e11fSVishal Verma *mapping = postmap; 1935212e11fSVishal Verma break; 1945212e11fSVishal Verma default: 1955212e11fSVishal Verma return -EIO; 1965212e11fSVishal Verma } 1975212e11fSVishal Verma 1985212e11fSVishal Verma if (trim) 1995212e11fSVishal Verma *trim = z_flag; 2005212e11fSVishal Verma if (error) 2015212e11fSVishal Verma *error = e_flag; 2025212e11fSVishal Verma 2035212e11fSVishal Verma return ret; 2045212e11fSVishal Verma } 2055212e11fSVishal Verma 20624e3a7fbSVishal Verma static int btt_log_group_read(struct arena_info *arena, u32 lane, 20724e3a7fbSVishal Verma struct log_group *log) 2085212e11fSVishal Verma { 2095212e11fSVishal Verma return arena_read_bytes(arena, 21024e3a7fbSVishal Verma arena->logoff + (lane * LOG_GRP_SIZE), log, 21124e3a7fbSVishal Verma LOG_GRP_SIZE, 0); 2125212e11fSVishal Verma } 2135212e11fSVishal Verma 2145212e11fSVishal Verma static struct dentry *debugfs_root; 2155212e11fSVishal Verma 2165212e11fSVishal Verma static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, 2175212e11fSVishal Verma int idx) 2185212e11fSVishal Verma { 2195212e11fSVishal Verma char dirname[32]; 2205212e11fSVishal Verma struct dentry *d; 2215212e11fSVishal Verma 2225212e11fSVishal Verma /* If for some reason, parent bttN was not created, exit */ 2235212e11fSVishal Verma if (!parent) 2245212e11fSVishal Verma return; 2255212e11fSVishal Verma 2265212e11fSVishal Verma snprintf(dirname, 32, "arena%d", idx); 2275212e11fSVishal Verma d = debugfs_create_dir(dirname, parent); 2285212e11fSVishal Verma if (IS_ERR_OR_NULL(d)) 2295212e11fSVishal Verma return; 2305212e11fSVishal Verma a->debugfs_dir = d; 2315212e11fSVishal Verma 2325212e11fSVishal Verma debugfs_create_x64("size", S_IRUGO, d, &a->size); 2335212e11fSVishal Verma debugfs_create_x64("external_lba_start", S_IRUGO, d, 2345212e11fSVishal Verma &a->external_lba_start); 2355212e11fSVishal Verma debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); 2365212e11fSVishal Verma debugfs_create_u32("internal_lbasize", S_IRUGO, d, 2375212e11fSVishal Verma &a->internal_lbasize); 2385212e11fSVishal Verma debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); 2395212e11fSVishal Verma debugfs_create_u32("external_lbasize", S_IRUGO, d, 2405212e11fSVishal Verma &a->external_lbasize); 2415212e11fSVishal Verma debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); 2425212e11fSVishal Verma debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); 2435212e11fSVishal Verma debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); 2445212e11fSVishal Verma debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); 2455212e11fSVishal Verma debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); 2465212e11fSVishal Verma debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); 2475212e11fSVishal Verma debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); 2485212e11fSVishal Verma debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 2495212e11fSVishal Verma debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 2505212e11fSVishal Verma debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 25124e3a7fbSVishal Verma debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); 25224e3a7fbSVishal Verma debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); 2535212e11fSVishal Verma } 2545212e11fSVishal Verma 2555212e11fSVishal Verma static void btt_debugfs_init(struct btt *btt) 2565212e11fSVishal Verma { 2575212e11fSVishal Verma int i = 0; 2585212e11fSVishal Verma struct arena_info *arena; 2595212e11fSVishal Verma 2605212e11fSVishal Verma btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), 2615212e11fSVishal Verma debugfs_root); 2625212e11fSVishal Verma if (IS_ERR_OR_NULL(btt->debugfs_dir)) 2635212e11fSVishal Verma return; 2645212e11fSVishal Verma 2655212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 2665212e11fSVishal Verma arena_debugfs_init(arena, btt->debugfs_dir, i); 2675212e11fSVishal Verma i++; 2685212e11fSVishal Verma } 2695212e11fSVishal Verma } 2705212e11fSVishal Verma 27124e3a7fbSVishal Verma static u32 log_seq(struct log_group *log, int log_idx) 27224e3a7fbSVishal Verma { 27324e3a7fbSVishal Verma return le32_to_cpu(log->ent[log_idx].seq); 27424e3a7fbSVishal Verma } 27524e3a7fbSVishal Verma 2765212e11fSVishal Verma /* 2775212e11fSVishal Verma * This function accepts two log entries, and uses the 2785212e11fSVishal Verma * sequence number to find the 'older' entry. 2795212e11fSVishal Verma * It also updates the sequence number in this old entry to 2805212e11fSVishal Verma * make it the 'new' one if the mark_flag is set. 2815212e11fSVishal Verma * Finally, it returns which of the entries was the older one. 2825212e11fSVishal Verma * 2835212e11fSVishal Verma * TODO The logic feels a bit kludge-y. make it better.. 2845212e11fSVishal Verma */ 28524e3a7fbSVishal Verma static int btt_log_get_old(struct arena_info *a, struct log_group *log) 2865212e11fSVishal Verma { 28724e3a7fbSVishal Verma int idx0 = a->log_index[0]; 28824e3a7fbSVishal Verma int idx1 = a->log_index[1]; 2895212e11fSVishal Verma int old; 2905212e11fSVishal Verma 2915212e11fSVishal Verma /* 2925212e11fSVishal Verma * the first ever time this is seen, the entry goes into [0] 2935212e11fSVishal Verma * the next time, the following logic works out to put this 2945212e11fSVishal Verma * (next) entry into [1] 2955212e11fSVishal Verma */ 29624e3a7fbSVishal Verma if (log_seq(log, idx0) == 0) { 29724e3a7fbSVishal Verma log->ent[idx0].seq = cpu_to_le32(1); 2985212e11fSVishal Verma return 0; 2995212e11fSVishal Verma } 3005212e11fSVishal Verma 30124e3a7fbSVishal Verma if (log_seq(log, idx0) == log_seq(log, idx1)) 3025212e11fSVishal Verma return -EINVAL; 30324e3a7fbSVishal Verma if (log_seq(log, idx0) + log_seq(log, idx1) > 5) 3045212e11fSVishal Verma return -EINVAL; 3055212e11fSVishal Verma 30624e3a7fbSVishal Verma if (log_seq(log, idx0) < log_seq(log, idx1)) { 30724e3a7fbSVishal Verma if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) 3085212e11fSVishal Verma old = 0; 3095212e11fSVishal Verma else 3105212e11fSVishal Verma old = 1; 3115212e11fSVishal Verma } else { 31224e3a7fbSVishal Verma if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) 3135212e11fSVishal Verma old = 1; 3145212e11fSVishal Verma else 3155212e11fSVishal Verma old = 0; 3165212e11fSVishal Verma } 3175212e11fSVishal Verma 3185212e11fSVishal Verma return old; 3195212e11fSVishal Verma } 3205212e11fSVishal Verma 3215212e11fSVishal Verma /* 3225212e11fSVishal Verma * This function copies the desired (old/new) log entry into ent if 3235212e11fSVishal Verma * it is not NULL. It returns the sub-slot number (0 or 1) 3245212e11fSVishal Verma * where the desired log entry was found. Negative return values 3255212e11fSVishal Verma * indicate errors. 3265212e11fSVishal Verma */ 3275212e11fSVishal Verma static int btt_log_read(struct arena_info *arena, u32 lane, 3285212e11fSVishal Verma struct log_entry *ent, int old_flag) 3295212e11fSVishal Verma { 3305212e11fSVishal Verma int ret; 3315212e11fSVishal Verma int old_ent, ret_ent; 33224e3a7fbSVishal Verma struct log_group log; 3335212e11fSVishal Verma 33424e3a7fbSVishal Verma ret = btt_log_group_read(arena, lane, &log); 3355212e11fSVishal Verma if (ret) 3365212e11fSVishal Verma return -EIO; 3375212e11fSVishal Verma 33824e3a7fbSVishal Verma old_ent = btt_log_get_old(arena, &log); 3395212e11fSVishal Verma if (old_ent < 0 || old_ent > 1) { 340e6be2dcbSVishal Verma dev_err(to_dev(arena), 3415212e11fSVishal Verma "log corruption (%d): lane %d seq [%d, %d]\n", 34224e3a7fbSVishal Verma old_ent, lane, log.ent[arena->log_index[0]].seq, 34324e3a7fbSVishal Verma log.ent[arena->log_index[1]].seq); 3445212e11fSVishal Verma /* TODO set error state? */ 3455212e11fSVishal Verma return -EIO; 3465212e11fSVishal Verma } 3475212e11fSVishal Verma 3485212e11fSVishal Verma ret_ent = (old_flag ? old_ent : (1 - old_ent)); 3495212e11fSVishal Verma 3505212e11fSVishal Verma if (ent != NULL) 35124e3a7fbSVishal Verma memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); 3525212e11fSVishal Verma 3535212e11fSVishal Verma return ret_ent; 3545212e11fSVishal Verma } 3555212e11fSVishal Verma 3565212e11fSVishal Verma /* 3575212e11fSVishal Verma * This function commits a log entry to media 3585212e11fSVishal Verma * It does _not_ prepare the freelist entry for the next write 3595212e11fSVishal Verma * btt_flog_write is the wrapper for updating the freelist elements 3605212e11fSVishal Verma */ 3615212e11fSVishal Verma static int __btt_log_write(struct arena_info *arena, u32 lane, 3623ae3d67bSVishal Verma u32 sub, struct log_entry *ent, unsigned long flags) 3635212e11fSVishal Verma { 3645212e11fSVishal Verma int ret; 36524e3a7fbSVishal Verma u32 group_slot = arena->log_index[sub]; 36624e3a7fbSVishal Verma unsigned int log_half = LOG_ENT_SIZE / 2; 3675212e11fSVishal Verma void *src = ent; 36824e3a7fbSVishal Verma u64 ns_off; 3695212e11fSVishal Verma 37024e3a7fbSVishal Verma ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + 37124e3a7fbSVishal Verma (group_slot * LOG_ENT_SIZE); 3725212e11fSVishal Verma /* split the 16B write into atomic, durable halves */ 3733ae3d67bSVishal Verma ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 3745212e11fSVishal Verma if (ret) 3755212e11fSVishal Verma return ret; 3765212e11fSVishal Verma 3775212e11fSVishal Verma ns_off += log_half; 3785212e11fSVishal Verma src += log_half; 3793ae3d67bSVishal Verma return arena_write_bytes(arena, ns_off, src, log_half, flags); 3805212e11fSVishal Verma } 3815212e11fSVishal Verma 3825212e11fSVishal Verma static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, 3835212e11fSVishal Verma struct log_entry *ent) 3845212e11fSVishal Verma { 3855212e11fSVishal Verma int ret; 3865212e11fSVishal Verma 3873ae3d67bSVishal Verma ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC); 3885212e11fSVishal Verma if (ret) 3895212e11fSVishal Verma return ret; 3905212e11fSVishal Verma 3915212e11fSVishal Verma /* prepare the next free entry */ 3925212e11fSVishal Verma arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; 3935212e11fSVishal Verma if (++(arena->freelist[lane].seq) == 4) 3945212e11fSVishal Verma arena->freelist[lane].seq = 1; 395*86aa6668SAneesh Kumar K.V if (ent_e_flag(le32_to_cpu(ent->old_map))) 396d9b83c75SVishal Verma arena->freelist[lane].has_err = 1; 397*86aa6668SAneesh Kumar K.V arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map)); 3985212e11fSVishal Verma 3995212e11fSVishal Verma return ret; 4005212e11fSVishal Verma } 4015212e11fSVishal Verma 4025212e11fSVishal Verma /* 4035212e11fSVishal Verma * This function initializes the BTT map to the initial state, which is 4045212e11fSVishal Verma * all-zeroes, and indicates an identity mapping 4055212e11fSVishal Verma */ 4065212e11fSVishal Verma static int btt_map_init(struct arena_info *arena) 4075212e11fSVishal Verma { 4085212e11fSVishal Verma int ret = -EINVAL; 4095212e11fSVishal Verma void *zerobuf; 4105212e11fSVishal Verma size_t offset = 0; 4115212e11fSVishal Verma size_t chunk_size = SZ_2M; 4125212e11fSVishal Verma size_t mapsize = arena->logoff - arena->mapoff; 4135212e11fSVishal Verma 4145212e11fSVishal Verma zerobuf = kzalloc(chunk_size, GFP_KERNEL); 4155212e11fSVishal Verma if (!zerobuf) 4165212e11fSVishal Verma return -ENOMEM; 4175212e11fSVishal Verma 418b177fe85SVishal Verma /* 419b177fe85SVishal Verma * mapoff should always be at least 512B aligned. We rely on that to 420b177fe85SVishal Verma * make sure rw_bytes does error clearing correctly, so make sure that 421b177fe85SVishal Verma * is the case. 422b177fe85SVishal Verma */ 42386652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512), 42486652d2eSVishal Verma "arena->mapoff: %#llx is unaligned\n", arena->mapoff); 425b177fe85SVishal Verma 4265212e11fSVishal Verma while (mapsize) { 4275212e11fSVishal Verma size_t size = min(mapsize, chunk_size); 4285212e11fSVishal Verma 42986652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), size < 512, 43004c3c982SRandy Dunlap "chunk size: %#zx is unaligned\n", size); 4315212e11fSVishal Verma ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, 4323ae3d67bSVishal Verma size, 0); 4335212e11fSVishal Verma if (ret) 4345212e11fSVishal Verma goto free; 4355212e11fSVishal Verma 4365212e11fSVishal Verma offset += size; 4375212e11fSVishal Verma mapsize -= size; 4385212e11fSVishal Verma cond_resched(); 4395212e11fSVishal Verma } 4405212e11fSVishal Verma 4415212e11fSVishal Verma free: 4425212e11fSVishal Verma kfree(zerobuf); 4435212e11fSVishal Verma return ret; 4445212e11fSVishal Verma } 4455212e11fSVishal Verma 4465212e11fSVishal Verma /* 4475212e11fSVishal Verma * This function initializes the BTT log with 'fake' entries pointing 4485212e11fSVishal Verma * to the initial reserved set of blocks as being free 4495212e11fSVishal Verma */ 4505212e11fSVishal Verma static int btt_log_init(struct arena_info *arena) 4515212e11fSVishal Verma { 452b177fe85SVishal Verma size_t logsize = arena->info2off - arena->logoff; 453b177fe85SVishal Verma size_t chunk_size = SZ_4K, offset = 0; 45424e3a7fbSVishal Verma struct log_entry ent; 455b177fe85SVishal Verma void *zerobuf; 4565212e11fSVishal Verma int ret; 4575212e11fSVishal Verma u32 i; 4585212e11fSVishal Verma 459b177fe85SVishal Verma zerobuf = kzalloc(chunk_size, GFP_KERNEL); 460b177fe85SVishal Verma if (!zerobuf) 461b177fe85SVishal Verma return -ENOMEM; 462b177fe85SVishal Verma /* 463b177fe85SVishal Verma * logoff should always be at least 512B aligned. We rely on that to 464b177fe85SVishal Verma * make sure rw_bytes does error clearing correctly, so make sure that 465b177fe85SVishal Verma * is the case. 466b177fe85SVishal Verma */ 46786652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512), 46886652d2eSVishal Verma "arena->logoff: %#llx is unaligned\n", arena->logoff); 469b177fe85SVishal Verma 470b177fe85SVishal Verma while (logsize) { 471b177fe85SVishal Verma size_t size = min(logsize, chunk_size); 472b177fe85SVishal Verma 47386652d2eSVishal Verma dev_WARN_ONCE(to_dev(arena), size < 512, 47404c3c982SRandy Dunlap "chunk size: %#zx is unaligned\n", size); 475b177fe85SVishal Verma ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf, 476b177fe85SVishal Verma size, 0); 477b177fe85SVishal Verma if (ret) 478b177fe85SVishal Verma goto free; 479b177fe85SVishal Verma 480b177fe85SVishal Verma offset += size; 481b177fe85SVishal Verma logsize -= size; 482b177fe85SVishal Verma cond_resched(); 483b177fe85SVishal Verma } 4845212e11fSVishal Verma 4855212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 48624e3a7fbSVishal Verma ent.lba = cpu_to_le32(i); 48724e3a7fbSVishal Verma ent.old_map = cpu_to_le32(arena->external_nlba + i); 48824e3a7fbSVishal Verma ent.new_map = cpu_to_le32(arena->external_nlba + i); 48924e3a7fbSVishal Verma ent.seq = cpu_to_le32(LOG_SEQ_INIT); 49024e3a7fbSVishal Verma ret = __btt_log_write(arena, i, 0, &ent, 0); 4915212e11fSVishal Verma if (ret) 492b177fe85SVishal Verma goto free; 4935212e11fSVishal Verma } 4945212e11fSVishal Verma 495b177fe85SVishal Verma free: 496b177fe85SVishal Verma kfree(zerobuf); 497b177fe85SVishal Verma return ret; 4985212e11fSVishal Verma } 4995212e11fSVishal Verma 500d9b83c75SVishal Verma static u64 to_namespace_offset(struct arena_info *arena, u64 lba) 501d9b83c75SVishal Verma { 502d9b83c75SVishal Verma return arena->dataoff + ((u64)lba * arena->internal_lbasize); 503d9b83c75SVishal Verma } 504d9b83c75SVishal Verma 505d9b83c75SVishal Verma static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) 506d9b83c75SVishal Verma { 507d9b83c75SVishal Verma int ret = 0; 508d9b83c75SVishal Verma 509d9b83c75SVishal Verma if (arena->freelist[lane].has_err) { 510d9b83c75SVishal Verma void *zero_page = page_address(ZERO_PAGE(0)); 511d9b83c75SVishal Verma u32 lba = arena->freelist[lane].block; 512d9b83c75SVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 513d9b83c75SVishal Verma unsigned long len = arena->sector_size; 514d9b83c75SVishal Verma 515d9b83c75SVishal Verma mutex_lock(&arena->err_lock); 516d9b83c75SVishal Verma 517d9b83c75SVishal Verma while (len) { 518d9b83c75SVishal Verma unsigned long chunk = min(len, PAGE_SIZE); 519d9b83c75SVishal Verma 520d9b83c75SVishal Verma ret = arena_write_bytes(arena, nsoff, zero_page, 521d9b83c75SVishal Verma chunk, 0); 522d9b83c75SVishal Verma if (ret) 523d9b83c75SVishal Verma break; 524d9b83c75SVishal Verma len -= chunk; 525d9b83c75SVishal Verma nsoff += chunk; 526d9b83c75SVishal Verma if (len == 0) 527d9b83c75SVishal Verma arena->freelist[lane].has_err = 0; 528d9b83c75SVishal Verma } 529d9b83c75SVishal Verma mutex_unlock(&arena->err_lock); 530d9b83c75SVishal Verma } 531d9b83c75SVishal Verma return ret; 532d9b83c75SVishal Verma } 533d9b83c75SVishal Verma 5345212e11fSVishal Verma static int btt_freelist_init(struct arena_info *arena) 5355212e11fSVishal Verma { 5362f8c9011SVishal Verma int new, ret; 5372f8c9011SVishal Verma struct log_entry log_new; 5389dedc73aSVishal Verma u32 i, map_entry, log_oldmap, log_newmap; 5395212e11fSVishal Verma 5405212e11fSVishal Verma arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), 5415212e11fSVishal Verma GFP_KERNEL); 5425212e11fSVishal Verma if (!arena->freelist) 5435212e11fSVishal Verma return -ENOMEM; 5445212e11fSVishal Verma 5455212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 5465212e11fSVishal Verma new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); 5475212e11fSVishal Verma if (new < 0) 5485212e11fSVishal Verma return new; 5495212e11fSVishal Verma 5509dedc73aSVishal Verma /* old and new map entries with any flags stripped out */ 5519dedc73aSVishal Verma log_oldmap = ent_lba(le32_to_cpu(log_new.old_map)); 5529dedc73aSVishal Verma log_newmap = ent_lba(le32_to_cpu(log_new.new_map)); 5539dedc73aSVishal Verma 5545212e11fSVishal Verma /* sub points to the next one to be overwritten */ 5555212e11fSVishal Verma arena->freelist[i].sub = 1 - new; 5565212e11fSVishal Verma arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); 5579dedc73aSVishal Verma arena->freelist[i].block = log_oldmap; 5585212e11fSVishal Verma 559d9b83c75SVishal Verma /* 560d9b83c75SVishal Verma * FIXME: if error clearing fails during init, we want to make 561d9b83c75SVishal Verma * the BTT read-only 562d9b83c75SVishal Verma */ 563*86aa6668SAneesh Kumar K.V if (ent_e_flag(le32_to_cpu(log_new.old_map)) && 564*86aa6668SAneesh Kumar K.V !ent_normal(le32_to_cpu(log_new.old_map))) { 5659dedc73aSVishal Verma arena->freelist[i].has_err = 1; 566d9b83c75SVishal Verma ret = arena_clear_freelist_error(arena, i); 567d9b83c75SVishal Verma if (ret) 56886652d2eSVishal Verma dev_err_ratelimited(to_dev(arena), 56986652d2eSVishal Verma "Unable to clear known errors\n"); 570d9b83c75SVishal Verma } 571d9b83c75SVishal Verma 5725212e11fSVishal Verma /* This implies a newly created or untouched flog entry */ 5739dedc73aSVishal Verma if (log_oldmap == log_newmap) 5745212e11fSVishal Verma continue; 5755212e11fSVishal Verma 5765212e11fSVishal Verma /* Check if map recovery is needed */ 5775212e11fSVishal Verma ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, 5783ae3d67bSVishal Verma NULL, NULL, 0); 5795212e11fSVishal Verma if (ret) 5805212e11fSVishal Verma return ret; 5819dedc73aSVishal Verma 5829dedc73aSVishal Verma /* 5839dedc73aSVishal Verma * The map_entry from btt_read_map is stripped of any flag bits, 5849dedc73aSVishal Verma * so use the stripped out versions from the log as well for 5859dedc73aSVishal Verma * testing whether recovery is needed. For restoration, use the 5869dedc73aSVishal Verma * 'raw' version of the log entries as that captured what we 5879dedc73aSVishal Verma * were going to write originally. 5889dedc73aSVishal Verma */ 5899dedc73aSVishal Verma if ((log_newmap != map_entry) && (log_oldmap == map_entry)) { 5905212e11fSVishal Verma /* 5915212e11fSVishal Verma * Last transaction wrote the flog, but wasn't able 5925212e11fSVishal Verma * to complete the map write. So fix up the map. 5935212e11fSVishal Verma */ 5945212e11fSVishal Verma ret = btt_map_write(arena, le32_to_cpu(log_new.lba), 5953ae3d67bSVishal Verma le32_to_cpu(log_new.new_map), 0, 0, 0); 5965212e11fSVishal Verma if (ret) 5975212e11fSVishal Verma return ret; 5985212e11fSVishal Verma } 5995212e11fSVishal Verma } 6005212e11fSVishal Verma 6015212e11fSVishal Verma return 0; 6025212e11fSVishal Verma } 6035212e11fSVishal Verma 60424e3a7fbSVishal Verma static bool ent_is_padding(struct log_entry *ent) 60524e3a7fbSVishal Verma { 60624e3a7fbSVishal Verma return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) 60724e3a7fbSVishal Verma && (ent->seq == 0); 60824e3a7fbSVishal Verma } 60924e3a7fbSVishal Verma 61024e3a7fbSVishal Verma /* 61124e3a7fbSVishal Verma * Detecting valid log indices: We read a log group (see the comments in btt.h 61224e3a7fbSVishal Verma * for a description of a 'log_group' and its 'slots'), and iterate over its 61324e3a7fbSVishal Verma * four slots. We expect that a padding slot will be all-zeroes, and use this 61424e3a7fbSVishal Verma * to detect a padding slot vs. an actual entry. 61524e3a7fbSVishal Verma * 61624e3a7fbSVishal Verma * If a log_group is in the initial state, i.e. hasn't been used since the 61724e3a7fbSVishal Verma * creation of this BTT layout, it will have three of the four slots with 61824e3a7fbSVishal Verma * zeroes. We skip over these log_groups for the detection of log_index. If 61924e3a7fbSVishal Verma * all log_groups are in the initial state (i.e. the BTT has never been 62024e3a7fbSVishal Verma * written to), it is safe to assume the 'new format' of log entries in slots 62124e3a7fbSVishal Verma * (0, 1). 62224e3a7fbSVishal Verma */ 62324e3a7fbSVishal Verma static int log_set_indices(struct arena_info *arena) 62424e3a7fbSVishal Verma { 62524e3a7fbSVishal Verma bool idx_set = false, initial_state = true; 62624e3a7fbSVishal Verma int ret, log_index[2] = {-1, -1}; 62724e3a7fbSVishal Verma u32 i, j, next_idx = 0; 62824e3a7fbSVishal Verma struct log_group log; 62924e3a7fbSVishal Verma u32 pad_count = 0; 63024e3a7fbSVishal Verma 63124e3a7fbSVishal Verma for (i = 0; i < arena->nfree; i++) { 63224e3a7fbSVishal Verma ret = btt_log_group_read(arena, i, &log); 63324e3a7fbSVishal Verma if (ret < 0) 63424e3a7fbSVishal Verma return ret; 63524e3a7fbSVishal Verma 63624e3a7fbSVishal Verma for (j = 0; j < 4; j++) { 63724e3a7fbSVishal Verma if (!idx_set) { 63824e3a7fbSVishal Verma if (ent_is_padding(&log.ent[j])) { 63924e3a7fbSVishal Verma pad_count++; 64024e3a7fbSVishal Verma continue; 64124e3a7fbSVishal Verma } else { 64224e3a7fbSVishal Verma /* Skip if index has been recorded */ 64324e3a7fbSVishal Verma if ((next_idx == 1) && 64424e3a7fbSVishal Verma (j == log_index[0])) 64524e3a7fbSVishal Verma continue; 64624e3a7fbSVishal Verma /* valid entry, record index */ 64724e3a7fbSVishal Verma log_index[next_idx] = j; 64824e3a7fbSVishal Verma next_idx++; 64924e3a7fbSVishal Verma } 65024e3a7fbSVishal Verma if (next_idx == 2) { 65124e3a7fbSVishal Verma /* two valid entries found */ 65224e3a7fbSVishal Verma idx_set = true; 65324e3a7fbSVishal Verma } else if (next_idx > 2) { 65424e3a7fbSVishal Verma /* too many valid indices */ 65524e3a7fbSVishal Verma return -ENXIO; 65624e3a7fbSVishal Verma } 65724e3a7fbSVishal Verma } else { 65824e3a7fbSVishal Verma /* 65924e3a7fbSVishal Verma * once the indices have been set, just verify 66024e3a7fbSVishal Verma * that all subsequent log groups are either in 66124e3a7fbSVishal Verma * their initial state or follow the same 66224e3a7fbSVishal Verma * indices. 66324e3a7fbSVishal Verma */ 66424e3a7fbSVishal Verma if (j == log_index[0]) { 66524e3a7fbSVishal Verma /* entry must be 'valid' */ 66624e3a7fbSVishal Verma if (ent_is_padding(&log.ent[j])) 66724e3a7fbSVishal Verma return -ENXIO; 66824e3a7fbSVishal Verma } else if (j == log_index[1]) { 66924e3a7fbSVishal Verma ; 67024e3a7fbSVishal Verma /* 67124e3a7fbSVishal Verma * log_index[1] can be padding if the 67224e3a7fbSVishal Verma * lane never got used and it is still 67324e3a7fbSVishal Verma * in the initial state (three 'padding' 67424e3a7fbSVishal Verma * entries) 67524e3a7fbSVishal Verma */ 67624e3a7fbSVishal Verma } else { 67724e3a7fbSVishal Verma /* entry must be invalid (padding) */ 67824e3a7fbSVishal Verma if (!ent_is_padding(&log.ent[j])) 67924e3a7fbSVishal Verma return -ENXIO; 68024e3a7fbSVishal Verma } 68124e3a7fbSVishal Verma } 68224e3a7fbSVishal Verma } 68324e3a7fbSVishal Verma /* 68424e3a7fbSVishal Verma * If any of the log_groups have more than one valid, 68524e3a7fbSVishal Verma * non-padding entry, then the we are no longer in the 68624e3a7fbSVishal Verma * initial_state 68724e3a7fbSVishal Verma */ 68824e3a7fbSVishal Verma if (pad_count < 3) 68924e3a7fbSVishal Verma initial_state = false; 69024e3a7fbSVishal Verma pad_count = 0; 69124e3a7fbSVishal Verma } 69224e3a7fbSVishal Verma 69324e3a7fbSVishal Verma if (!initial_state && !idx_set) 69424e3a7fbSVishal Verma return -ENXIO; 69524e3a7fbSVishal Verma 69624e3a7fbSVishal Verma /* 69724e3a7fbSVishal Verma * If all the entries in the log were in the initial state, 69824e3a7fbSVishal Verma * assume new padding scheme 69924e3a7fbSVishal Verma */ 70024e3a7fbSVishal Verma if (initial_state) 70124e3a7fbSVishal Verma log_index[1] = 1; 70224e3a7fbSVishal Verma 70324e3a7fbSVishal Verma /* 70424e3a7fbSVishal Verma * Only allow the known permutations of log/padding indices, 70524e3a7fbSVishal Verma * i.e. (0, 1), and (0, 2) 70624e3a7fbSVishal Verma */ 70724e3a7fbSVishal Verma if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) 70824e3a7fbSVishal Verma ; /* known index possibilities */ 70924e3a7fbSVishal Verma else { 71024e3a7fbSVishal Verma dev_err(to_dev(arena), "Found an unknown padding scheme\n"); 71124e3a7fbSVishal Verma return -ENXIO; 71224e3a7fbSVishal Verma } 71324e3a7fbSVishal Verma 71424e3a7fbSVishal Verma arena->log_index[0] = log_index[0]; 71524e3a7fbSVishal Verma arena->log_index[1] = log_index[1]; 71624e3a7fbSVishal Verma dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); 71724e3a7fbSVishal Verma dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); 71824e3a7fbSVishal Verma return 0; 71924e3a7fbSVishal Verma } 72024e3a7fbSVishal Verma 7215212e11fSVishal Verma static int btt_rtt_init(struct arena_info *arena) 7225212e11fSVishal Verma { 7235212e11fSVishal Verma arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 7245212e11fSVishal Verma if (arena->rtt == NULL) 7255212e11fSVishal Verma return -ENOMEM; 7265212e11fSVishal Verma 7275212e11fSVishal Verma return 0; 7285212e11fSVishal Verma } 7295212e11fSVishal Verma 7305212e11fSVishal Verma static int btt_maplocks_init(struct arena_info *arena) 7315212e11fSVishal Verma { 7325212e11fSVishal Verma u32 i; 7335212e11fSVishal Verma 7345212e11fSVishal Verma arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), 7355212e11fSVishal Verma GFP_KERNEL); 7365212e11fSVishal Verma if (!arena->map_locks) 7375212e11fSVishal Verma return -ENOMEM; 7385212e11fSVishal Verma 7395212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 7405212e11fSVishal Verma spin_lock_init(&arena->map_locks[i].lock); 7415212e11fSVishal Verma 7425212e11fSVishal Verma return 0; 7435212e11fSVishal Verma } 7445212e11fSVishal Verma 7455212e11fSVishal Verma static struct arena_info *alloc_arena(struct btt *btt, size_t size, 7465212e11fSVishal Verma size_t start, size_t arena_off) 7475212e11fSVishal Verma { 7485212e11fSVishal Verma struct arena_info *arena; 7495212e11fSVishal Verma u64 logsize, mapsize, datasize; 7505212e11fSVishal Verma u64 available = size; 7515212e11fSVishal Verma 7525212e11fSVishal Verma arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); 7535212e11fSVishal Verma if (!arena) 7545212e11fSVishal Verma return NULL; 7555212e11fSVishal Verma arena->nd_btt = btt->nd_btt; 75675892004SVishal Verma arena->sector_size = btt->sector_size; 757d08cd5e0SJeff Moyer mutex_init(&arena->err_lock); 7585212e11fSVishal Verma 7595212e11fSVishal Verma if (!size) 7605212e11fSVishal Verma return arena; 7615212e11fSVishal Verma 7625212e11fSVishal Verma arena->size = size; 7635212e11fSVishal Verma arena->external_lba_start = start; 7645212e11fSVishal Verma arena->external_lbasize = btt->lbasize; 7655212e11fSVishal Verma arena->internal_lbasize = roundup(arena->external_lbasize, 7665212e11fSVishal Verma INT_LBASIZE_ALIGNMENT); 7675212e11fSVishal Verma arena->nfree = BTT_DEFAULT_NFREE; 76814e49454SVishal Verma arena->version_major = btt->nd_btt->version_major; 76914e49454SVishal Verma arena->version_minor = btt->nd_btt->version_minor; 7705212e11fSVishal Verma 7715212e11fSVishal Verma if (available % BTT_PG_SIZE) 7725212e11fSVishal Verma available -= (available % BTT_PG_SIZE); 7735212e11fSVishal Verma 7745212e11fSVishal Verma /* Two pages are reserved for the super block and its copy */ 7755212e11fSVishal Verma available -= 2 * BTT_PG_SIZE; 7765212e11fSVishal Verma 7775212e11fSVishal Verma /* The log takes a fixed amount of space based on nfree */ 77824e3a7fbSVishal Verma logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); 7795212e11fSVishal Verma available -= logsize; 7805212e11fSVishal Verma 7815212e11fSVishal Verma /* Calculate optimal split between map and data area */ 7825212e11fSVishal Verma arena->internal_nlba = div_u64(available - BTT_PG_SIZE, 7835212e11fSVishal Verma arena->internal_lbasize + MAP_ENT_SIZE); 7845212e11fSVishal Verma arena->external_nlba = arena->internal_nlba - arena->nfree; 7855212e11fSVishal Verma 7865212e11fSVishal Verma mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); 7875212e11fSVishal Verma datasize = available - mapsize; 7885212e11fSVishal Verma 7895212e11fSVishal Verma /* 'Absolute' values, relative to start of storage space */ 7905212e11fSVishal Verma arena->infooff = arena_off; 7915212e11fSVishal Verma arena->dataoff = arena->infooff + BTT_PG_SIZE; 7925212e11fSVishal Verma arena->mapoff = arena->dataoff + datasize; 7935212e11fSVishal Verma arena->logoff = arena->mapoff + mapsize; 7945212e11fSVishal Verma arena->info2off = arena->logoff + logsize; 79524e3a7fbSVishal Verma 79624e3a7fbSVishal Verma /* Default log indices are (0,1) */ 79724e3a7fbSVishal Verma arena->log_index[0] = 0; 79824e3a7fbSVishal Verma arena->log_index[1] = 1; 7995212e11fSVishal Verma return arena; 8005212e11fSVishal Verma } 8015212e11fSVishal Verma 8025212e11fSVishal Verma static void free_arenas(struct btt *btt) 8035212e11fSVishal Verma { 8045212e11fSVishal Verma struct arena_info *arena, *next; 8055212e11fSVishal Verma 8065212e11fSVishal Verma list_for_each_entry_safe(arena, next, &btt->arena_list, list) { 8075212e11fSVishal Verma list_del(&arena->list); 8085212e11fSVishal Verma kfree(arena->rtt); 8095212e11fSVishal Verma kfree(arena->map_locks); 8105212e11fSVishal Verma kfree(arena->freelist); 8115212e11fSVishal Verma debugfs_remove_recursive(arena->debugfs_dir); 8125212e11fSVishal Verma kfree(arena); 8135212e11fSVishal Verma } 8145212e11fSVishal Verma } 8155212e11fSVishal Verma 8165212e11fSVishal Verma /* 8175212e11fSVishal Verma * This function reads an existing valid btt superblock and 8185212e11fSVishal Verma * populates the corresponding arena_info struct 8195212e11fSVishal Verma */ 8205212e11fSVishal Verma static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, 8215212e11fSVishal Verma u64 arena_off) 8225212e11fSVishal Verma { 8235212e11fSVishal Verma arena->internal_nlba = le32_to_cpu(super->internal_nlba); 8245212e11fSVishal Verma arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); 8255212e11fSVishal Verma arena->external_nlba = le32_to_cpu(super->external_nlba); 8265212e11fSVishal Verma arena->external_lbasize = le32_to_cpu(super->external_lbasize); 8275212e11fSVishal Verma arena->nfree = le32_to_cpu(super->nfree); 8285212e11fSVishal Verma arena->version_major = le16_to_cpu(super->version_major); 8295212e11fSVishal Verma arena->version_minor = le16_to_cpu(super->version_minor); 8305212e11fSVishal Verma 8315212e11fSVishal Verma arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + 8325212e11fSVishal Verma le64_to_cpu(super->nextoff)); 8335212e11fSVishal Verma arena->infooff = arena_off; 8345212e11fSVishal Verma arena->dataoff = arena_off + le64_to_cpu(super->dataoff); 8355212e11fSVishal Verma arena->mapoff = arena_off + le64_to_cpu(super->mapoff); 8365212e11fSVishal Verma arena->logoff = arena_off + le64_to_cpu(super->logoff); 8375212e11fSVishal Verma arena->info2off = arena_off + le64_to_cpu(super->info2off); 8385212e11fSVishal Verma 8395e329406SDan Williams arena->size = (le64_to_cpu(super->nextoff) > 0) 8405e329406SDan Williams ? (le64_to_cpu(super->nextoff)) 8415e329406SDan Williams : (arena->info2off - arena->infooff + BTT_PG_SIZE); 8425212e11fSVishal Verma 8435212e11fSVishal Verma arena->flags = le32_to_cpu(super->flags); 8445212e11fSVishal Verma } 8455212e11fSVishal Verma 8465212e11fSVishal Verma static int discover_arenas(struct btt *btt) 8475212e11fSVishal Verma { 8485212e11fSVishal Verma int ret = 0; 8495212e11fSVishal Verma struct arena_info *arena; 8505212e11fSVishal Verma struct btt_sb *super; 8515212e11fSVishal Verma size_t remaining = btt->rawsize; 8525212e11fSVishal Verma u64 cur_nlba = 0; 8535212e11fSVishal Verma size_t cur_off = 0; 8545212e11fSVishal Verma int num_arenas = 0; 8555212e11fSVishal Verma 8565212e11fSVishal Verma super = kzalloc(sizeof(*super), GFP_KERNEL); 8575212e11fSVishal Verma if (!super) 8585212e11fSVishal Verma return -ENOMEM; 8595212e11fSVishal Verma 8605212e11fSVishal Verma while (remaining) { 8615212e11fSVishal Verma /* Alloc memory for arena */ 8625212e11fSVishal Verma arena = alloc_arena(btt, 0, 0, 0); 8635212e11fSVishal Verma if (!arena) { 8645212e11fSVishal Verma ret = -ENOMEM; 8655212e11fSVishal Verma goto out_super; 8665212e11fSVishal Verma } 8675212e11fSVishal Verma 8685212e11fSVishal Verma arena->infooff = cur_off; 8695212e11fSVishal Verma ret = btt_info_read(arena, super); 8705212e11fSVishal Verma if (ret) 8715212e11fSVishal Verma goto out; 8725212e11fSVishal Verma 873ab45e763SVishal Verma if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { 8745212e11fSVishal Verma if (remaining == btt->rawsize) { 8755212e11fSVishal Verma btt->init_state = INIT_NOTFOUND; 8765212e11fSVishal Verma dev_info(to_dev(arena), "No existing arenas\n"); 8775212e11fSVishal Verma goto out; 8785212e11fSVishal Verma } else { 879e6be2dcbSVishal Verma dev_err(to_dev(arena), 8805212e11fSVishal Verma "Found corrupted metadata!\n"); 8815212e11fSVishal Verma ret = -ENODEV; 8825212e11fSVishal Verma goto out; 8835212e11fSVishal Verma } 8845212e11fSVishal Verma } 8855212e11fSVishal Verma 8865212e11fSVishal Verma arena->external_lba_start = cur_nlba; 8875212e11fSVishal Verma parse_arena_meta(arena, super, cur_off); 8885212e11fSVishal Verma 88924e3a7fbSVishal Verma ret = log_set_indices(arena); 89024e3a7fbSVishal Verma if (ret) { 89124e3a7fbSVishal Verma dev_err(to_dev(arena), 89224e3a7fbSVishal Verma "Unable to deduce log/padding indices\n"); 89324e3a7fbSVishal Verma goto out; 89424e3a7fbSVishal Verma } 89524e3a7fbSVishal Verma 8965212e11fSVishal Verma ret = btt_freelist_init(arena); 8975212e11fSVishal Verma if (ret) 8985212e11fSVishal Verma goto out; 8995212e11fSVishal Verma 9005212e11fSVishal Verma ret = btt_rtt_init(arena); 9015212e11fSVishal Verma if (ret) 9025212e11fSVishal Verma goto out; 9035212e11fSVishal Verma 9045212e11fSVishal Verma ret = btt_maplocks_init(arena); 9055212e11fSVishal Verma if (ret) 9065212e11fSVishal Verma goto out; 9075212e11fSVishal Verma 9085212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 9095212e11fSVishal Verma 9105212e11fSVishal Verma remaining -= arena->size; 9115212e11fSVishal Verma cur_off += arena->size; 9125212e11fSVishal Verma cur_nlba += arena->external_nlba; 9135212e11fSVishal Verma num_arenas++; 9145212e11fSVishal Verma 9155212e11fSVishal Verma if (arena->nextoff == 0) 9165212e11fSVishal Verma break; 9175212e11fSVishal Verma } 9185212e11fSVishal Verma btt->num_arenas = num_arenas; 9195212e11fSVishal Verma btt->nlba = cur_nlba; 9205212e11fSVishal Verma btt->init_state = INIT_READY; 9215212e11fSVishal Verma 9225212e11fSVishal Verma kfree(super); 9235212e11fSVishal Verma return ret; 9245212e11fSVishal Verma 9255212e11fSVishal Verma out: 9265212e11fSVishal Verma kfree(arena); 9275212e11fSVishal Verma free_arenas(btt); 9285212e11fSVishal Verma out_super: 9295212e11fSVishal Verma kfree(super); 9305212e11fSVishal Verma return ret; 9315212e11fSVishal Verma } 9325212e11fSVishal Verma 9335212e11fSVishal Verma static int create_arenas(struct btt *btt) 9345212e11fSVishal Verma { 9355212e11fSVishal Verma size_t remaining = btt->rawsize; 9365212e11fSVishal Verma size_t cur_off = 0; 9375212e11fSVishal Verma 9385212e11fSVishal Verma while (remaining) { 9395212e11fSVishal Verma struct arena_info *arena; 9405212e11fSVishal Verma size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); 9415212e11fSVishal Verma 9425212e11fSVishal Verma remaining -= arena_size; 9435212e11fSVishal Verma if (arena_size < ARENA_MIN_SIZE) 9445212e11fSVishal Verma break; 9455212e11fSVishal Verma 9465212e11fSVishal Verma arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); 9475212e11fSVishal Verma if (!arena) { 9485212e11fSVishal Verma free_arenas(btt); 9495212e11fSVishal Verma return -ENOMEM; 9505212e11fSVishal Verma } 9515212e11fSVishal Verma btt->nlba += arena->external_nlba; 9525212e11fSVishal Verma if (remaining >= ARENA_MIN_SIZE) 9535212e11fSVishal Verma arena->nextoff = arena->size; 9545212e11fSVishal Verma else 9555212e11fSVishal Verma arena->nextoff = 0; 9565212e11fSVishal Verma cur_off += arena_size; 9575212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 9585212e11fSVishal Verma } 9595212e11fSVishal Verma 9605212e11fSVishal Verma return 0; 9615212e11fSVishal Verma } 9625212e11fSVishal Verma 9635212e11fSVishal Verma /* 9645212e11fSVishal Verma * This function completes arena initialization by writing 9655212e11fSVishal Verma * all the metadata. 9665212e11fSVishal Verma * It is only called for an uninitialized arena when a write 9675212e11fSVishal Verma * to that arena occurs for the first time. 9685212e11fSVishal Verma */ 969fbde1414SVishal Verma static int btt_arena_write_layout(struct arena_info *arena) 9705212e11fSVishal Verma { 9715212e11fSVishal Verma int ret; 972e1455744SDan Williams u64 sum; 9735212e11fSVishal Verma struct btt_sb *super; 974fbde1414SVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 9756ec68954SVishal Verma const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); 9765212e11fSVishal Verma 9775212e11fSVishal Verma ret = btt_map_init(arena); 9785212e11fSVishal Verma if (ret) 9795212e11fSVishal Verma return ret; 9805212e11fSVishal Verma 9815212e11fSVishal Verma ret = btt_log_init(arena); 9825212e11fSVishal Verma if (ret) 9835212e11fSVishal Verma return ret; 9845212e11fSVishal Verma 9855212e11fSVishal Verma super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); 9865212e11fSVishal Verma if (!super) 9875212e11fSVishal Verma return -ENOMEM; 9885212e11fSVishal Verma 9895212e11fSVishal Verma strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); 990fbde1414SVishal Verma memcpy(super->uuid, nd_btt->uuid, 16); 9916ec68954SVishal Verma memcpy(super->parent_uuid, parent_uuid, 16); 9925212e11fSVishal Verma super->flags = cpu_to_le32(arena->flags); 9935212e11fSVishal Verma super->version_major = cpu_to_le16(arena->version_major); 9945212e11fSVishal Verma super->version_minor = cpu_to_le16(arena->version_minor); 9955212e11fSVishal Verma super->external_lbasize = cpu_to_le32(arena->external_lbasize); 9965212e11fSVishal Verma super->external_nlba = cpu_to_le32(arena->external_nlba); 9975212e11fSVishal Verma super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); 9985212e11fSVishal Verma super->internal_nlba = cpu_to_le32(arena->internal_nlba); 9995212e11fSVishal Verma super->nfree = cpu_to_le32(arena->nfree); 10005212e11fSVishal Verma super->infosize = cpu_to_le32(sizeof(struct btt_sb)); 10015212e11fSVishal Verma super->nextoff = cpu_to_le64(arena->nextoff); 10025212e11fSVishal Verma /* 10035212e11fSVishal Verma * Subtract arena->infooff (arena start) so numbers are relative 10045212e11fSVishal Verma * to 'this' arena 10055212e11fSVishal Verma */ 10065212e11fSVishal Verma super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); 10075212e11fSVishal Verma super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); 10085212e11fSVishal Verma super->logoff = cpu_to_le64(arena->logoff - arena->infooff); 10095212e11fSVishal Verma super->info2off = cpu_to_le64(arena->info2off - arena->infooff); 10105212e11fSVishal Verma 10115212e11fSVishal Verma super->flags = 0; 1012e1455744SDan Williams sum = nd_sb_checksum((struct nd_gen_sb *) super); 1013e1455744SDan Williams super->checksum = cpu_to_le64(sum); 10145212e11fSVishal Verma 10155212e11fSVishal Verma ret = btt_info_write(arena, super); 10165212e11fSVishal Verma 10175212e11fSVishal Verma kfree(super); 10185212e11fSVishal Verma return ret; 10195212e11fSVishal Verma } 10205212e11fSVishal Verma 10215212e11fSVishal Verma /* 10225212e11fSVishal Verma * This function completes the initialization for the BTT namespace 10235212e11fSVishal Verma * such that it is ready to accept IOs 10245212e11fSVishal Verma */ 10255212e11fSVishal Verma static int btt_meta_init(struct btt *btt) 10265212e11fSVishal Verma { 10275212e11fSVishal Verma int ret = 0; 10285212e11fSVishal Verma struct arena_info *arena; 10295212e11fSVishal Verma 10305212e11fSVishal Verma mutex_lock(&btt->init_lock); 10315212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 1032fbde1414SVishal Verma ret = btt_arena_write_layout(arena); 10335212e11fSVishal Verma if (ret) 10345212e11fSVishal Verma goto unlock; 10355212e11fSVishal Verma 10365212e11fSVishal Verma ret = btt_freelist_init(arena); 10375212e11fSVishal Verma if (ret) 10385212e11fSVishal Verma goto unlock; 10395212e11fSVishal Verma 10405212e11fSVishal Verma ret = btt_rtt_init(arena); 10415212e11fSVishal Verma if (ret) 10425212e11fSVishal Verma goto unlock; 10435212e11fSVishal Verma 10445212e11fSVishal Verma ret = btt_maplocks_init(arena); 10455212e11fSVishal Verma if (ret) 10465212e11fSVishal Verma goto unlock; 10475212e11fSVishal Verma } 10485212e11fSVishal Verma 10495212e11fSVishal Verma btt->init_state = INIT_READY; 10505212e11fSVishal Verma 10515212e11fSVishal Verma unlock: 10525212e11fSVishal Verma mutex_unlock(&btt->init_lock); 10535212e11fSVishal Verma return ret; 10545212e11fSVishal Verma } 10555212e11fSVishal Verma 105641cd8b70SVishal Verma static u32 btt_meta_size(struct btt *btt) 105741cd8b70SVishal Verma { 105841cd8b70SVishal Verma return btt->lbasize - btt->sector_size; 105941cd8b70SVishal Verma } 106041cd8b70SVishal Verma 10615212e11fSVishal Verma /* 10625212e11fSVishal Verma * This function calculates the arena in which the given LBA lies 10635212e11fSVishal Verma * by doing a linear walk. This is acceptable since we expect only 10645212e11fSVishal Verma * a few arenas. If we have backing devices that get much larger, 10655212e11fSVishal Verma * we can construct a balanced binary tree of arenas at init time 10665212e11fSVishal Verma * so that this range search becomes faster. 10675212e11fSVishal Verma */ 10685212e11fSVishal Verma static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, 10695212e11fSVishal Verma struct arena_info **arena) 10705212e11fSVishal Verma { 10715212e11fSVishal Verma struct arena_info *arena_list; 10725212e11fSVishal Verma __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); 10735212e11fSVishal Verma 10745212e11fSVishal Verma list_for_each_entry(arena_list, &btt->arena_list, list) { 10755212e11fSVishal Verma if (lba < arena_list->external_nlba) { 10765212e11fSVishal Verma *arena = arena_list; 10775212e11fSVishal Verma *premap = lba; 10785212e11fSVishal Verma return 0; 10795212e11fSVishal Verma } 10805212e11fSVishal Verma lba -= arena_list->external_nlba; 10815212e11fSVishal Verma } 10825212e11fSVishal Verma 10835212e11fSVishal Verma return -EIO; 10845212e11fSVishal Verma } 10855212e11fSVishal Verma 10865212e11fSVishal Verma /* 10875212e11fSVishal Verma * The following (lock_map, unlock_map) are mostly just to improve 10885212e11fSVishal Verma * readability, since they index into an array of locks 10895212e11fSVishal Verma */ 10905212e11fSVishal Verma static void lock_map(struct arena_info *arena, u32 premap) 10915212e11fSVishal Verma __acquires(&arena->map_locks[idx].lock) 10925212e11fSVishal Verma { 10935212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 10945212e11fSVishal Verma 10955212e11fSVishal Verma spin_lock(&arena->map_locks[idx].lock); 10965212e11fSVishal Verma } 10975212e11fSVishal Verma 10985212e11fSVishal Verma static void unlock_map(struct arena_info *arena, u32 premap) 10995212e11fSVishal Verma __releases(&arena->map_locks[idx].lock) 11005212e11fSVishal Verma { 11015212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 11025212e11fSVishal Verma 11035212e11fSVishal Verma spin_unlock(&arena->map_locks[idx].lock); 11045212e11fSVishal Verma } 11055212e11fSVishal Verma 11065212e11fSVishal Verma static int btt_data_read(struct arena_info *arena, struct page *page, 11075212e11fSVishal Verma unsigned int off, u32 lba, u32 len) 11085212e11fSVishal Verma { 11095212e11fSVishal Verma int ret; 11105212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 11115212e11fSVishal Verma void *mem = kmap_atomic(page); 11125212e11fSVishal Verma 11133ae3d67bSVishal Verma ret = arena_read_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC); 11145212e11fSVishal Verma kunmap_atomic(mem); 11155212e11fSVishal Verma 11165212e11fSVishal Verma return ret; 11175212e11fSVishal Verma } 11185212e11fSVishal Verma 11195212e11fSVishal Verma static int btt_data_write(struct arena_info *arena, u32 lba, 11205212e11fSVishal Verma struct page *page, unsigned int off, u32 len) 11215212e11fSVishal Verma { 11225212e11fSVishal Verma int ret; 11235212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 11245212e11fSVishal Verma void *mem = kmap_atomic(page); 11255212e11fSVishal Verma 11263ae3d67bSVishal Verma ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC); 11275212e11fSVishal Verma kunmap_atomic(mem); 11285212e11fSVishal Verma 11295212e11fSVishal Verma return ret; 11305212e11fSVishal Verma } 11315212e11fSVishal Verma 11325212e11fSVishal Verma static void zero_fill_data(struct page *page, unsigned int off, u32 len) 11335212e11fSVishal Verma { 11345212e11fSVishal Verma void *mem = kmap_atomic(page); 11355212e11fSVishal Verma 11365212e11fSVishal Verma memset(mem + off, 0, len); 11375212e11fSVishal Verma kunmap_atomic(mem); 11385212e11fSVishal Verma } 11395212e11fSVishal Verma 114041cd8b70SVishal Verma #ifdef CONFIG_BLK_DEV_INTEGRITY 114141cd8b70SVishal Verma static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 114241cd8b70SVishal Verma struct arena_info *arena, u32 postmap, int rw) 114341cd8b70SVishal Verma { 114441cd8b70SVishal Verma unsigned int len = btt_meta_size(btt); 114541cd8b70SVishal Verma u64 meta_nsoff; 114641cd8b70SVishal Verma int ret = 0; 114741cd8b70SVishal Verma 114841cd8b70SVishal Verma if (bip == NULL) 114941cd8b70SVishal Verma return 0; 115041cd8b70SVishal Verma 115141cd8b70SVishal Verma meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size; 115241cd8b70SVishal Verma 115341cd8b70SVishal Verma while (len) { 115441cd8b70SVishal Verma unsigned int cur_len; 115541cd8b70SVishal Verma struct bio_vec bv; 115641cd8b70SVishal Verma void *mem; 115741cd8b70SVishal Verma 115841cd8b70SVishal Verma bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 115941cd8b70SVishal Verma /* 116041cd8b70SVishal Verma * The 'bv' obtained from bvec_iter_bvec has its .bv_len and 116141cd8b70SVishal Verma * .bv_offset already adjusted for iter->bi_bvec_done, and we 116241cd8b70SVishal Verma * can use those directly 116341cd8b70SVishal Verma */ 116441cd8b70SVishal Verma 116541cd8b70SVishal Verma cur_len = min(len, bv.bv_len); 116641cd8b70SVishal Verma mem = kmap_atomic(bv.bv_page); 116741cd8b70SVishal Verma if (rw) 116841cd8b70SVishal Verma ret = arena_write_bytes(arena, meta_nsoff, 11693ae3d67bSVishal Verma mem + bv.bv_offset, cur_len, 11703ae3d67bSVishal Verma NVDIMM_IO_ATOMIC); 117141cd8b70SVishal Verma else 117241cd8b70SVishal Verma ret = arena_read_bytes(arena, meta_nsoff, 11733ae3d67bSVishal Verma mem + bv.bv_offset, cur_len, 11743ae3d67bSVishal Verma NVDIMM_IO_ATOMIC); 117541cd8b70SVishal Verma 117641cd8b70SVishal Verma kunmap_atomic(mem); 117741cd8b70SVishal Verma if (ret) 117841cd8b70SVishal Verma return ret; 117941cd8b70SVishal Verma 118041cd8b70SVishal Verma len -= cur_len; 118141cd8b70SVishal Verma meta_nsoff += cur_len; 1182b1fb2c52SDmitry Monakhov if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len)) 1183b1fb2c52SDmitry Monakhov return -EIO; 118441cd8b70SVishal Verma } 118541cd8b70SVishal Verma 118641cd8b70SVishal Verma return ret; 118741cd8b70SVishal Verma } 118841cd8b70SVishal Verma 118941cd8b70SVishal Verma #else /* CONFIG_BLK_DEV_INTEGRITY */ 119041cd8b70SVishal Verma static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 119141cd8b70SVishal Verma struct arena_info *arena, u32 postmap, int rw) 119241cd8b70SVishal Verma { 119341cd8b70SVishal Verma return 0; 119441cd8b70SVishal Verma } 119541cd8b70SVishal Verma #endif 119641cd8b70SVishal Verma 119741cd8b70SVishal Verma static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, 119841cd8b70SVishal Verma struct page *page, unsigned int off, sector_t sector, 119941cd8b70SVishal Verma unsigned int len) 12005212e11fSVishal Verma { 12015212e11fSVishal Verma int ret = 0; 12025212e11fSVishal Verma int t_flag, e_flag; 12035212e11fSVishal Verma struct arena_info *arena = NULL; 12045212e11fSVishal Verma u32 lane = 0, premap, postmap; 12055212e11fSVishal Verma 12065212e11fSVishal Verma while (len) { 12075212e11fSVishal Verma u32 cur_len; 12085212e11fSVishal Verma 12095212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 12105212e11fSVishal Verma 12115212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 12125212e11fSVishal Verma if (ret) 12135212e11fSVishal Verma goto out_lane; 12145212e11fSVishal Verma 12155212e11fSVishal Verma cur_len = min(btt->sector_size, len); 12165212e11fSVishal Verma 12173ae3d67bSVishal Verma ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag, 12183ae3d67bSVishal Verma NVDIMM_IO_ATOMIC); 12195212e11fSVishal Verma if (ret) 12205212e11fSVishal Verma goto out_lane; 12215212e11fSVishal Verma 12225212e11fSVishal Verma /* 12235212e11fSVishal Verma * We loop to make sure that the post map LBA didn't change 12245212e11fSVishal Verma * from under us between writing the RTT and doing the actual 12255212e11fSVishal Verma * read. 12265212e11fSVishal Verma */ 12275212e11fSVishal Verma while (1) { 12285212e11fSVishal Verma u32 new_map; 12291398199dSVishal Verma int new_t, new_e; 12305212e11fSVishal Verma 12315212e11fSVishal Verma if (t_flag) { 12325212e11fSVishal Verma zero_fill_data(page, off, cur_len); 12335212e11fSVishal Verma goto out_lane; 12345212e11fSVishal Verma } 12355212e11fSVishal Verma 12365212e11fSVishal Verma if (e_flag) { 12375212e11fSVishal Verma ret = -EIO; 12385212e11fSVishal Verma goto out_lane; 12395212e11fSVishal Verma } 12405212e11fSVishal Verma 12415212e11fSVishal Verma arena->rtt[lane] = RTT_VALID | postmap; 12425212e11fSVishal Verma /* 12435212e11fSVishal Verma * Barrier to make sure this write is not reordered 12445212e11fSVishal Verma * to do the verification map_read before the RTT store 12455212e11fSVishal Verma */ 12465212e11fSVishal Verma barrier(); 12475212e11fSVishal Verma 12481398199dSVishal Verma ret = btt_map_read(arena, premap, &new_map, &new_t, 12491398199dSVishal Verma &new_e, NVDIMM_IO_ATOMIC); 12505212e11fSVishal Verma if (ret) 12515212e11fSVishal Verma goto out_rtt; 12525212e11fSVishal Verma 12531398199dSVishal Verma if ((postmap == new_map) && (t_flag == new_t) && 12541398199dSVishal Verma (e_flag == new_e)) 12555212e11fSVishal Verma break; 12565212e11fSVishal Verma 12575212e11fSVishal Verma postmap = new_map; 12581398199dSVishal Verma t_flag = new_t; 12591398199dSVishal Verma e_flag = new_e; 12605212e11fSVishal Verma } 12615212e11fSVishal Verma 12625212e11fSVishal Verma ret = btt_data_read(arena, page, off, postmap, cur_len); 1263d9b83c75SVishal Verma if (ret) { 1264d9b83c75SVishal Verma int rc; 1265d9b83c75SVishal Verma 1266d9b83c75SVishal Verma /* Media error - set the e_flag */ 1267d9b83c75SVishal Verma rc = btt_map_write(arena, premap, postmap, 0, 1, 1268d9b83c75SVishal Verma NVDIMM_IO_ATOMIC); 12695212e11fSVishal Verma goto out_rtt; 1270d9b83c75SVishal Verma } 12715212e11fSVishal Verma 127241cd8b70SVishal Verma if (bip) { 127341cd8b70SVishal Verma ret = btt_rw_integrity(btt, bip, arena, postmap, READ); 127441cd8b70SVishal Verma if (ret) 127541cd8b70SVishal Verma goto out_rtt; 127641cd8b70SVishal Verma } 127741cd8b70SVishal Verma 12785212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 12795212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 12805212e11fSVishal Verma 12815212e11fSVishal Verma len -= cur_len; 12825212e11fSVishal Verma off += cur_len; 12835212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 12845212e11fSVishal Verma } 12855212e11fSVishal Verma 12865212e11fSVishal Verma return 0; 12875212e11fSVishal Verma 12885212e11fSVishal Verma out_rtt: 12895212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 12905212e11fSVishal Verma out_lane: 12915212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 12925212e11fSVishal Verma return ret; 12935212e11fSVishal Verma } 12945212e11fSVishal Verma 1295d9b83c75SVishal Verma /* 1296d9b83c75SVishal Verma * Normally, arena_{read,write}_bytes will take care of the initial offset 1297d9b83c75SVishal Verma * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem, 1298d9b83c75SVishal Verma * we need the final, raw namespace offset here 1299d9b83c75SVishal Verma */ 1300d9b83c75SVishal Verma static bool btt_is_badblock(struct btt *btt, struct arena_info *arena, 1301d9b83c75SVishal Verma u32 postmap) 1302d9b83c75SVishal Verma { 1303d9b83c75SVishal Verma u64 nsoff = adjust_initial_offset(arena->nd_btt, 1304d9b83c75SVishal Verma to_namespace_offset(arena, postmap)); 1305d9b83c75SVishal Verma sector_t phys_sector = nsoff >> 9; 1306d9b83c75SVishal Verma 1307d9b83c75SVishal Verma return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize); 1308d9b83c75SVishal Verma } 1309d9b83c75SVishal Verma 131041cd8b70SVishal Verma static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, 131141cd8b70SVishal Verma sector_t sector, struct page *page, unsigned int off, 131241cd8b70SVishal Verma unsigned int len) 13135212e11fSVishal Verma { 13145212e11fSVishal Verma int ret = 0; 13155212e11fSVishal Verma struct arena_info *arena = NULL; 13165212e11fSVishal Verma u32 premap = 0, old_postmap, new_postmap, lane = 0, i; 13175212e11fSVishal Verma struct log_entry log; 13185212e11fSVishal Verma int sub; 13195212e11fSVishal Verma 13205212e11fSVishal Verma while (len) { 13215212e11fSVishal Verma u32 cur_len; 1322d9b83c75SVishal Verma int e_flag; 13235212e11fSVishal Verma 1324d9b83c75SVishal Verma retry: 13255212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 13265212e11fSVishal Verma 13275212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 13285212e11fSVishal Verma if (ret) 13295212e11fSVishal Verma goto out_lane; 13305212e11fSVishal Verma cur_len = min(btt->sector_size, len); 13315212e11fSVishal Verma 13325212e11fSVishal Verma if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { 13335212e11fSVishal Verma ret = -EIO; 13345212e11fSVishal Verma goto out_lane; 13355212e11fSVishal Verma } 13365212e11fSVishal Verma 1337d9b83c75SVishal Verma if (btt_is_badblock(btt, arena, arena->freelist[lane].block)) 1338d9b83c75SVishal Verma arena->freelist[lane].has_err = 1; 1339d9b83c75SVishal Verma 1340d9b83c75SVishal Verma if (mutex_is_locked(&arena->err_lock) 1341d9b83c75SVishal Verma || arena->freelist[lane].has_err) { 1342d9b83c75SVishal Verma nd_region_release_lane(btt->nd_region, lane); 1343d9b83c75SVishal Verma 1344d9b83c75SVishal Verma ret = arena_clear_freelist_error(arena, lane); 1345d9b83c75SVishal Verma if (ret) 1346d9b83c75SVishal Verma return ret; 1347d9b83c75SVishal Verma 1348d9b83c75SVishal Verma /* OK to acquire a different lane/free block */ 1349d9b83c75SVishal Verma goto retry; 1350d9b83c75SVishal Verma } 1351d9b83c75SVishal Verma 13525212e11fSVishal Verma new_postmap = arena->freelist[lane].block; 13535212e11fSVishal Verma 13545212e11fSVishal Verma /* Wait if the new block is being read from */ 13555212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 13565212e11fSVishal Verma while (arena->rtt[i] == (RTT_VALID | new_postmap)) 13575212e11fSVishal Verma cpu_relax(); 13585212e11fSVishal Verma 13595212e11fSVishal Verma 13605212e11fSVishal Verma if (new_postmap >= arena->internal_nlba) { 13615212e11fSVishal Verma ret = -EIO; 13625212e11fSVishal Verma goto out_lane; 136341cd8b70SVishal Verma } 136441cd8b70SVishal Verma 136541cd8b70SVishal Verma ret = btt_data_write(arena, new_postmap, page, off, cur_len); 13665212e11fSVishal Verma if (ret) 13675212e11fSVishal Verma goto out_lane; 13685212e11fSVishal Verma 136941cd8b70SVishal Verma if (bip) { 137041cd8b70SVishal Verma ret = btt_rw_integrity(btt, bip, arena, new_postmap, 137141cd8b70SVishal Verma WRITE); 137241cd8b70SVishal Verma if (ret) 137341cd8b70SVishal Verma goto out_lane; 137441cd8b70SVishal Verma } 137541cd8b70SVishal Verma 13765212e11fSVishal Verma lock_map(arena, premap); 1377d9b83c75SVishal Verma ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag, 13783ae3d67bSVishal Verma NVDIMM_IO_ATOMIC); 13795212e11fSVishal Verma if (ret) 13805212e11fSVishal Verma goto out_map; 13815212e11fSVishal Verma if (old_postmap >= arena->internal_nlba) { 13825212e11fSVishal Verma ret = -EIO; 13835212e11fSVishal Verma goto out_map; 13845212e11fSVishal Verma } 1385d9b83c75SVishal Verma if (e_flag) 1386d9b83c75SVishal Verma set_e_flag(old_postmap); 13875212e11fSVishal Verma 13885212e11fSVishal Verma log.lba = cpu_to_le32(premap); 13895212e11fSVishal Verma log.old_map = cpu_to_le32(old_postmap); 13905212e11fSVishal Verma log.new_map = cpu_to_le32(new_postmap); 13915212e11fSVishal Verma log.seq = cpu_to_le32(arena->freelist[lane].seq); 13925212e11fSVishal Verma sub = arena->freelist[lane].sub; 13935212e11fSVishal Verma ret = btt_flog_write(arena, lane, sub, &log); 13945212e11fSVishal Verma if (ret) 13955212e11fSVishal Verma goto out_map; 13965212e11fSVishal Verma 13971db1f3ceSVishal Verma ret = btt_map_write(arena, premap, new_postmap, 0, 0, 13981db1f3ceSVishal Verma NVDIMM_IO_ATOMIC); 13995212e11fSVishal Verma if (ret) 14005212e11fSVishal Verma goto out_map; 14015212e11fSVishal Verma 14025212e11fSVishal Verma unlock_map(arena, premap); 14035212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 14045212e11fSVishal Verma 1405d9b83c75SVishal Verma if (e_flag) { 1406d9b83c75SVishal Verma ret = arena_clear_freelist_error(arena, lane); 1407d9b83c75SVishal Verma if (ret) 1408d9b83c75SVishal Verma return ret; 1409d9b83c75SVishal Verma } 1410d9b83c75SVishal Verma 14115212e11fSVishal Verma len -= cur_len; 14125212e11fSVishal Verma off += cur_len; 14135212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 14145212e11fSVishal Verma } 14155212e11fSVishal Verma 14165212e11fSVishal Verma return 0; 14175212e11fSVishal Verma 14185212e11fSVishal Verma out_map: 14195212e11fSVishal Verma unlock_map(arena, premap); 14205212e11fSVishal Verma out_lane: 14215212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 14225212e11fSVishal Verma return ret; 14235212e11fSVishal Verma } 14245212e11fSVishal Verma 142541cd8b70SVishal Verma static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, 142641cd8b70SVishal Verma struct page *page, unsigned int len, unsigned int off, 14273f289dcbSTejun Heo unsigned int op, sector_t sector) 14285212e11fSVishal Verma { 14295212e11fSVishal Verma int ret; 14305212e11fSVishal Verma 14313f289dcbSTejun Heo if (!op_is_write(op)) { 143241cd8b70SVishal Verma ret = btt_read_pg(btt, bip, page, off, sector, len); 14335212e11fSVishal Verma flush_dcache_page(page); 14345212e11fSVishal Verma } else { 14355212e11fSVishal Verma flush_dcache_page(page); 143641cd8b70SVishal Verma ret = btt_write_pg(btt, bip, sector, page, off, len); 14375212e11fSVishal Verma } 14385212e11fSVishal Verma 14395212e11fSVishal Verma return ret; 14405212e11fSVishal Verma } 14415212e11fSVishal Verma 1442dece1635SJens Axboe static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) 14435212e11fSVishal Verma { 144441cd8b70SVishal Verma struct bio_integrity_payload *bip = bio_integrity(bio); 14455212e11fSVishal Verma struct btt *btt = q->queuedata; 14465212e11fSVishal Verma struct bvec_iter iter; 1447f0dc089cSDan Williams unsigned long start; 14485212e11fSVishal Verma struct bio_vec bvec; 1449abf54548SMike Christie int err = 0; 1450f0dc089cSDan Williams bool do_acct; 14515212e11fSVishal Verma 1452e23947bdSDmitry Monakhov if (!bio_integrity_prep(bio)) 1453e23947bdSDmitry Monakhov return BLK_QC_T_NONE; 145441cd8b70SVishal Verma 1455f0dc089cSDan Williams do_acct = nd_iostat_start(bio, &start); 14565212e11fSVishal Verma bio_for_each_segment(bvec, bio, iter) { 14575212e11fSVishal Verma unsigned int len = bvec.bv_len; 14585212e11fSVishal Verma 145986652d2eSVishal Verma if (len > PAGE_SIZE || len < btt->sector_size || 146086652d2eSVishal Verma len % btt->sector_size) { 146186652d2eSVishal Verma dev_err_ratelimited(&btt->nd_btt->dev, 146286652d2eSVishal Verma "unaligned bio segment (len: %d)\n", len); 146386652d2eSVishal Verma bio->bi_status = BLK_STS_IOERR; 146486652d2eSVishal Verma break; 146586652d2eSVishal Verma } 14665212e11fSVishal Verma 146741cd8b70SVishal Verma err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, 14683f289dcbSTejun Heo bio_op(bio), iter.bi_sector); 14695212e11fSVishal Verma if (err) { 1470e6be2dcbSVishal Verma dev_err(&btt->nd_btt->dev, 14715212e11fSVishal Verma "io error in %s sector %lld, len %d,\n", 1472abf54548SMike Christie (op_is_write(bio_op(bio))) ? "WRITE" : 1473abf54548SMike Christie "READ", 14745212e11fSVishal Verma (unsigned long long) iter.bi_sector, len); 14754e4cbee9SChristoph Hellwig bio->bi_status = errno_to_blk_status(err); 1476f0dc089cSDan Williams break; 14775212e11fSVishal Verma } 14785212e11fSVishal Verma } 1479f0dc089cSDan Williams if (do_acct) 1480f0dc089cSDan Williams nd_iostat_end(bio, start); 14815212e11fSVishal Verma 14824246a0b6SChristoph Hellwig bio_endio(bio); 1483dece1635SJens Axboe return BLK_QC_T_NONE; 14845212e11fSVishal Verma } 14855212e11fSVishal Verma 14865212e11fSVishal Verma static int btt_rw_page(struct block_device *bdev, sector_t sector, 14873f289dcbSTejun Heo struct page *page, unsigned int op) 14885212e11fSVishal Verma { 14895212e11fSVishal Verma struct btt *btt = bdev->bd_disk->private_data; 1490c13c43d5SVishal Verma int rc; 149198cc093cSHuang Ying unsigned int len; 14925212e11fSVishal Verma 149398cc093cSHuang Ying len = hpage_nr_pages(page) * PAGE_SIZE; 14943f289dcbSTejun Heo rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector); 1495c13c43d5SVishal Verma if (rc == 0) 14963f289dcbSTejun Heo page_endio(page, op_is_write(op), 0); 1497c13c43d5SVishal Verma 1498c13c43d5SVishal Verma return rc; 14995212e11fSVishal Verma } 15005212e11fSVishal Verma 15015212e11fSVishal Verma 15025212e11fSVishal Verma static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) 15035212e11fSVishal Verma { 15045212e11fSVishal Verma /* some standard values */ 15055212e11fSVishal Verma geo->heads = 1 << 6; 15065212e11fSVishal Verma geo->sectors = 1 << 5; 15075212e11fSVishal Verma geo->cylinders = get_capacity(bd->bd_disk) >> 11; 15085212e11fSVishal Verma return 0; 15095212e11fSVishal Verma } 15105212e11fSVishal Verma 15115212e11fSVishal Verma static const struct block_device_operations btt_fops = { 15125212e11fSVishal Verma .owner = THIS_MODULE, 15135212e11fSVishal Verma .rw_page = btt_rw_page, 15145212e11fSVishal Verma .getgeo = btt_getgeo, 151558138820SDan Williams .revalidate_disk = nvdimm_revalidate_disk, 15165212e11fSVishal Verma }; 15175212e11fSVishal Verma 15185212e11fSVishal Verma static int btt_blk_init(struct btt *btt) 15195212e11fSVishal Verma { 15205212e11fSVishal Verma struct nd_btt *nd_btt = btt->nd_btt; 15215212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 15225212e11fSVishal Verma 15235212e11fSVishal Verma /* create a new disk and request queue for btt */ 15245212e11fSVishal Verma btt->btt_queue = blk_alloc_queue(GFP_KERNEL); 15255212e11fSVishal Verma if (!btt->btt_queue) 15265212e11fSVishal Verma return -ENOMEM; 15275212e11fSVishal Verma 15285212e11fSVishal Verma btt->btt_disk = alloc_disk(0); 15295212e11fSVishal Verma if (!btt->btt_disk) { 15305212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 15315212e11fSVishal Verma return -ENOMEM; 15325212e11fSVishal Verma } 15335212e11fSVishal Verma 15345212e11fSVishal Verma nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); 15355212e11fSVishal Verma btt->btt_disk->first_minor = 0; 15365212e11fSVishal Verma btt->btt_disk->fops = &btt_fops; 15375212e11fSVishal Verma btt->btt_disk->private_data = btt; 15385212e11fSVishal Verma btt->btt_disk->queue = btt->btt_queue; 15395212e11fSVishal Verma btt->btt_disk->flags = GENHD_FL_EXT_DEVT; 154023c47d2aSMinchan Kim btt->btt_disk->queue->backing_dev_info->capabilities |= 154123c47d2aSMinchan Kim BDI_CAP_SYNCHRONOUS_IO; 15425212e11fSVishal Verma 15435212e11fSVishal Verma blk_queue_make_request(btt->btt_queue, btt_make_request); 15445212e11fSVishal Verma blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 15455212e11fSVishal Verma blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 15468b904b5bSBart Van Assche blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue); 15475212e11fSVishal Verma btt->btt_queue->queuedata = btt; 15485212e11fSVishal Verma 154941cd8b70SVishal Verma if (btt_meta_size(btt)) { 155041cd8b70SVishal Verma int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); 155141cd8b70SVishal Verma 155241cd8b70SVishal Verma if (rc) { 155341cd8b70SVishal Verma del_gendisk(btt->btt_disk); 155441cd8b70SVishal Verma put_disk(btt->btt_disk); 155541cd8b70SVishal Verma blk_cleanup_queue(btt->btt_queue); 155641cd8b70SVishal Verma return rc; 155741cd8b70SVishal Verma } 155841cd8b70SVishal Verma } 155941cd8b70SVishal Verma set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); 1560fef912bfSHannes Reinecke device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); 1561abe8b4e3SVishal Verma btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; 156258138820SDan Williams revalidate_disk(btt->btt_disk); 15635212e11fSVishal Verma 15645212e11fSVishal Verma return 0; 15655212e11fSVishal Verma } 15665212e11fSVishal Verma 15675212e11fSVishal Verma static void btt_blk_cleanup(struct btt *btt) 15685212e11fSVishal Verma { 15695212e11fSVishal Verma del_gendisk(btt->btt_disk); 15705212e11fSVishal Verma put_disk(btt->btt_disk); 15715212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 15725212e11fSVishal Verma } 15735212e11fSVishal Verma 15745212e11fSVishal Verma /** 15755212e11fSVishal Verma * btt_init - initialize a block translation table for the given device 15765212e11fSVishal Verma * @nd_btt: device with BTT geometry and backing device info 15775212e11fSVishal Verma * @rawsize: raw size in bytes of the backing device 15785212e11fSVishal Verma * @lbasize: lba size of the backing device 15795212e11fSVishal Verma * @uuid: A uuid for the backing device - this is stored on media 15805212e11fSVishal Verma * @maxlane: maximum number of parallel requests the device can handle 15815212e11fSVishal Verma * 15825212e11fSVishal Verma * Initialize a Block Translation Table on a backing device to provide 15835212e11fSVishal Verma * single sector power fail atomicity. 15845212e11fSVishal Verma * 15855212e11fSVishal Verma * Context: 15865212e11fSVishal Verma * Might sleep. 15875212e11fSVishal Verma * 15885212e11fSVishal Verma * Returns: 15895212e11fSVishal Verma * Pointer to a new struct btt on success, NULL on failure. 15905212e11fSVishal Verma */ 15915212e11fSVishal Verma static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, 15925212e11fSVishal Verma u32 lbasize, u8 *uuid, struct nd_region *nd_region) 15935212e11fSVishal Verma { 15945212e11fSVishal Verma int ret; 15955212e11fSVishal Verma struct btt *btt; 1596d9b83c75SVishal Verma struct nd_namespace_io *nsio; 15975212e11fSVishal Verma struct device *dev = &nd_btt->dev; 15985212e11fSVishal Verma 1599e32bc729SDan Williams btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL); 16005212e11fSVishal Verma if (!btt) 16015212e11fSVishal Verma return NULL; 16025212e11fSVishal Verma 16035212e11fSVishal Verma btt->nd_btt = nd_btt; 16045212e11fSVishal Verma btt->rawsize = rawsize; 16055212e11fSVishal Verma btt->lbasize = lbasize; 16065212e11fSVishal Verma btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); 16075212e11fSVishal Verma INIT_LIST_HEAD(&btt->arena_list); 16085212e11fSVishal Verma mutex_init(&btt->init_lock); 16095212e11fSVishal Verma btt->nd_region = nd_region; 1610d9b83c75SVishal Verma nsio = to_nd_namespace_io(&nd_btt->ndns->dev); 1611d9b83c75SVishal Verma btt->phys_bb = &nsio->bb; 16125212e11fSVishal Verma 16135212e11fSVishal Verma ret = discover_arenas(btt); 16145212e11fSVishal Verma if (ret) { 16155212e11fSVishal Verma dev_err(dev, "init: error in arena_discover: %d\n", ret); 1616e32bc729SDan Williams return NULL; 16175212e11fSVishal Verma } 16185212e11fSVishal Verma 161958138820SDan Williams if (btt->init_state != INIT_READY && nd_region->ro) { 1620e6be2dcbSVishal Verma dev_warn(dev, "%s is read-only, unable to init btt metadata\n", 162158138820SDan Williams dev_name(&nd_region->dev)); 1622e32bc729SDan Williams return NULL; 162358138820SDan Williams } else if (btt->init_state != INIT_READY) { 16245212e11fSVishal Verma btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 16255212e11fSVishal Verma ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 16265212e11fSVishal Verma dev_dbg(dev, "init: %d arenas for %llu rawsize\n", 16275212e11fSVishal Verma btt->num_arenas, rawsize); 16285212e11fSVishal Verma 16295212e11fSVishal Verma ret = create_arenas(btt); 16305212e11fSVishal Verma if (ret) { 16315212e11fSVishal Verma dev_info(dev, "init: create_arenas: %d\n", ret); 1632e32bc729SDan Williams return NULL; 16335212e11fSVishal Verma } 16345212e11fSVishal Verma 16355212e11fSVishal Verma ret = btt_meta_init(btt); 16365212e11fSVishal Verma if (ret) { 16375212e11fSVishal Verma dev_err(dev, "init: error in meta_init: %d\n", ret); 1638e32bc729SDan Williams return NULL; 16395212e11fSVishal Verma } 16405212e11fSVishal Verma } 16415212e11fSVishal Verma 16425212e11fSVishal Verma ret = btt_blk_init(btt); 16435212e11fSVishal Verma if (ret) { 16445212e11fSVishal Verma dev_err(dev, "init: error in blk_init: %d\n", ret); 1645e32bc729SDan Williams return NULL; 16465212e11fSVishal Verma } 16475212e11fSVishal Verma 16485212e11fSVishal Verma btt_debugfs_init(btt); 16495212e11fSVishal Verma 16505212e11fSVishal Verma return btt; 16515212e11fSVishal Verma } 16525212e11fSVishal Verma 16535212e11fSVishal Verma /** 16545212e11fSVishal Verma * btt_fini - de-initialize a BTT 16555212e11fSVishal Verma * @btt: the BTT handle that was generated by btt_init 16565212e11fSVishal Verma * 16575212e11fSVishal Verma * De-initialize a Block Translation Table on device removal 16585212e11fSVishal Verma * 16595212e11fSVishal Verma * Context: 16605212e11fSVishal Verma * Might sleep. 16615212e11fSVishal Verma */ 16625212e11fSVishal Verma static void btt_fini(struct btt *btt) 16635212e11fSVishal Verma { 16645212e11fSVishal Verma if (btt) { 16655212e11fSVishal Verma btt_blk_cleanup(btt); 16665212e11fSVishal Verma free_arenas(btt); 16675212e11fSVishal Verma debugfs_remove_recursive(btt->debugfs_dir); 16685212e11fSVishal Verma } 16695212e11fSVishal Verma } 16705212e11fSVishal Verma 16715212e11fSVishal Verma int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) 16725212e11fSVishal Verma { 16735212e11fSVishal Verma struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 16745212e11fSVishal Verma struct nd_region *nd_region; 167514e49454SVishal Verma struct btt_sb *btt_sb; 16765212e11fSVishal Verma struct btt *btt; 16775212e11fSVishal Verma size_t rawsize; 16785212e11fSVishal Verma 16799dec4892SDan Williams if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) { 16809dec4892SDan Williams dev_dbg(&nd_btt->dev, "incomplete btt configuration\n"); 16815212e11fSVishal Verma return -ENODEV; 16829dec4892SDan Williams } 16835212e11fSVishal Verma 168414e49454SVishal Verma btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); 1685ed36b4dbSChristophe Jaillet if (!btt_sb) 1686ed36b4dbSChristophe Jaillet return -ENOMEM; 168714e49454SVishal Verma 168814e49454SVishal Verma /* 168914e49454SVishal Verma * If this returns < 0, that is ok as it just means there wasn't 169014e49454SVishal Verma * an existing BTT, and we're creating a new one. We still need to 169114e49454SVishal Verma * call this as we need the version dependent fields in nd_btt to be 169214e49454SVishal Verma * set correctly based on the holder class 169314e49454SVishal Verma */ 169414e49454SVishal Verma nd_btt_version(nd_btt, ndns, btt_sb); 169514e49454SVishal Verma 169614e49454SVishal Verma rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset; 16975212e11fSVishal Verma if (rawsize < ARENA_MIN_SIZE) { 16989dec4892SDan Williams dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n", 169914e49454SVishal Verma dev_name(&ndns->dev), 170014e49454SVishal Verma ARENA_MIN_SIZE + nd_btt->initial_offset); 17015212e11fSVishal Verma return -ENXIO; 17025212e11fSVishal Verma } 17035212e11fSVishal Verma nd_region = to_nd_region(nd_btt->dev.parent); 17045212e11fSVishal Verma btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, 17055212e11fSVishal Verma nd_region); 17065212e11fSVishal Verma if (!btt) 17075212e11fSVishal Verma return -ENOMEM; 17085212e11fSVishal Verma nd_btt->btt = btt; 17095212e11fSVishal Verma 17105212e11fSVishal Verma return 0; 17115212e11fSVishal Verma } 17125212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 17135212e11fSVishal Verma 1714298f2bc5SDan Williams int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt) 17155212e11fSVishal Verma { 17165212e11fSVishal Verma struct btt *btt = nd_btt->btt; 17175212e11fSVishal Verma 17185212e11fSVishal Verma btt_fini(btt); 17195212e11fSVishal Verma nd_btt->btt = NULL; 17205212e11fSVishal Verma 17215212e11fSVishal Verma return 0; 17225212e11fSVishal Verma } 17235212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_detach_btt); 17245212e11fSVishal Verma 17255212e11fSVishal Verma static int __init nd_btt_init(void) 17265212e11fSVishal Verma { 1727ff8e92d5SNeilBrown int rc = 0; 17285212e11fSVishal Verma 17295212e11fSVishal Verma debugfs_root = debugfs_create_dir("btt", NULL); 1730ff8e92d5SNeilBrown if (IS_ERR_OR_NULL(debugfs_root)) 17315212e11fSVishal Verma rc = -ENXIO; 17325212e11fSVishal Verma 17335212e11fSVishal Verma return rc; 17345212e11fSVishal Verma } 17355212e11fSVishal Verma 17365212e11fSVishal Verma static void __exit nd_btt_exit(void) 17375212e11fSVishal Verma { 17385212e11fSVishal Verma debugfs_remove_recursive(debugfs_root); 17395212e11fSVishal Verma } 17405212e11fSVishal Verma 17415212e11fSVishal Verma MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); 17425212e11fSVishal Verma MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>"); 17435212e11fSVishal Verma MODULE_LICENSE("GPL v2"); 17445212e11fSVishal Verma module_init(nd_btt_init); 17455212e11fSVishal Verma module_exit(nd_btt_exit); 1746