15212e11fSVishal Verma /* 25212e11fSVishal Verma * Block Translation Table 35212e11fSVishal Verma * Copyright (c) 2014-2015, Intel Corporation. 45212e11fSVishal Verma * 55212e11fSVishal Verma * This program is free software; you can redistribute it and/or modify it 65212e11fSVishal Verma * under the terms and conditions of the GNU General Public License, 75212e11fSVishal Verma * version 2, as published by the Free Software Foundation. 85212e11fSVishal Verma * 95212e11fSVishal Verma * This program is distributed in the hope it will be useful, but WITHOUT 105212e11fSVishal Verma * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 115212e11fSVishal Verma * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 125212e11fSVishal Verma * more details. 135212e11fSVishal Verma */ 145212e11fSVishal Verma #include <linux/highmem.h> 155212e11fSVishal Verma #include <linux/debugfs.h> 165212e11fSVishal Verma #include <linux/blkdev.h> 175212e11fSVishal Verma #include <linux/module.h> 185212e11fSVishal Verma #include <linux/device.h> 195212e11fSVishal Verma #include <linux/mutex.h> 205212e11fSVishal Verma #include <linux/hdreg.h> 215212e11fSVishal Verma #include <linux/genhd.h> 225212e11fSVishal Verma #include <linux/sizes.h> 235212e11fSVishal Verma #include <linux/ndctl.h> 245212e11fSVishal Verma #include <linux/fs.h> 255212e11fSVishal Verma #include <linux/nd.h> 265212e11fSVishal Verma #include "btt.h" 275212e11fSVishal Verma #include "nd.h" 285212e11fSVishal Verma 295212e11fSVishal Verma enum log_ent_request { 305212e11fSVishal Verma LOG_NEW_ENT = 0, 315212e11fSVishal Verma LOG_OLD_ENT 325212e11fSVishal Verma }; 335212e11fSVishal Verma 345212e11fSVishal Verma static int btt_major; 355212e11fSVishal Verma 365212e11fSVishal Verma static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, 375212e11fSVishal Verma void *buf, size_t n) 385212e11fSVishal Verma { 395212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 405212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 415212e11fSVishal Verma 425212e11fSVishal Verma /* arena offsets are 4K from the base of the device */ 435212e11fSVishal Verma offset += SZ_4K; 445212e11fSVishal Verma return nvdimm_read_bytes(ndns, offset, buf, n); 455212e11fSVishal Verma } 465212e11fSVishal Verma 475212e11fSVishal Verma static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, 485212e11fSVishal Verma void *buf, size_t n) 495212e11fSVishal Verma { 505212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 515212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 525212e11fSVishal Verma 535212e11fSVishal Verma /* arena offsets are 4K from the base of the device */ 545212e11fSVishal Verma offset += SZ_4K; 555212e11fSVishal Verma return nvdimm_write_bytes(ndns, offset, buf, n); 565212e11fSVishal Verma } 575212e11fSVishal Verma 585212e11fSVishal Verma static int btt_info_write(struct arena_info *arena, struct btt_sb *super) 595212e11fSVishal Verma { 605212e11fSVishal Verma int ret; 615212e11fSVishal Verma 625212e11fSVishal Verma ret = arena_write_bytes(arena, arena->info2off, super, 635212e11fSVishal Verma sizeof(struct btt_sb)); 645212e11fSVishal Verma if (ret) 655212e11fSVishal Verma return ret; 665212e11fSVishal Verma 675212e11fSVishal Verma return arena_write_bytes(arena, arena->infooff, super, 685212e11fSVishal Verma sizeof(struct btt_sb)); 695212e11fSVishal Verma } 705212e11fSVishal Verma 715212e11fSVishal Verma static int btt_info_read(struct arena_info *arena, struct btt_sb *super) 725212e11fSVishal Verma { 735212e11fSVishal Verma WARN_ON(!super); 745212e11fSVishal Verma return arena_read_bytes(arena, arena->infooff, super, 755212e11fSVishal Verma sizeof(struct btt_sb)); 765212e11fSVishal Verma } 775212e11fSVishal Verma 785212e11fSVishal Verma /* 795212e11fSVishal Verma * 'raw' version of btt_map write 805212e11fSVishal Verma * Assumptions: 815212e11fSVishal Verma * mapping is in little-endian 825212e11fSVishal Verma * mapping contains 'E' and 'Z' flags as desired 835212e11fSVishal Verma */ 845212e11fSVishal Verma static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping) 855212e11fSVishal Verma { 865212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 875212e11fSVishal Verma 885212e11fSVishal Verma WARN_ON(lba >= arena->external_nlba); 895212e11fSVishal Verma return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE); 905212e11fSVishal Verma } 915212e11fSVishal Verma 925212e11fSVishal Verma static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, 935212e11fSVishal Verma u32 z_flag, u32 e_flag) 945212e11fSVishal Verma { 955212e11fSVishal Verma u32 ze; 965212e11fSVishal Verma __le32 mapping_le; 975212e11fSVishal Verma 985212e11fSVishal Verma /* 995212e11fSVishal Verma * This 'mapping' is supposed to be just the LBA mapping, without 1005212e11fSVishal Verma * any flags set, so strip the flag bits. 1015212e11fSVishal Verma */ 1025212e11fSVishal Verma mapping &= MAP_LBA_MASK; 1035212e11fSVishal Verma 1045212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 1055212e11fSVishal Verma switch (ze) { 1065212e11fSVishal Verma case 0: 1075212e11fSVishal Verma /* 1085212e11fSVishal Verma * We want to set neither of the Z or E flags, and 1095212e11fSVishal Verma * in the actual layout, this means setting the bit 1105212e11fSVishal Verma * positions of both to '1' to indicate a 'normal' 1115212e11fSVishal Verma * map entry 1125212e11fSVishal Verma */ 1135212e11fSVishal Verma mapping |= MAP_ENT_NORMAL; 1145212e11fSVishal Verma break; 1155212e11fSVishal Verma case 1: 1165212e11fSVishal Verma mapping |= (1 << MAP_ERR_SHIFT); 1175212e11fSVishal Verma break; 1185212e11fSVishal Verma case 2: 1195212e11fSVishal Verma mapping |= (1 << MAP_TRIM_SHIFT); 1205212e11fSVishal Verma break; 1215212e11fSVishal Verma default: 1225212e11fSVishal Verma /* 1235212e11fSVishal Verma * The case where Z and E are both sent in as '1' could be 1245212e11fSVishal Verma * construed as a valid 'normal' case, but we decide not to, 1255212e11fSVishal Verma * to avoid confusion 1265212e11fSVishal Verma */ 1275212e11fSVishal Verma WARN_ONCE(1, "Invalid use of Z and E flags\n"); 1285212e11fSVishal Verma return -EIO; 1295212e11fSVishal Verma } 1305212e11fSVishal Verma 1315212e11fSVishal Verma mapping_le = cpu_to_le32(mapping); 1325212e11fSVishal Verma return __btt_map_write(arena, lba, mapping_le); 1335212e11fSVishal Verma } 1345212e11fSVishal Verma 1355212e11fSVishal Verma static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, 1365212e11fSVishal Verma int *trim, int *error) 1375212e11fSVishal Verma { 1385212e11fSVishal Verma int ret; 1395212e11fSVishal Verma __le32 in; 1405212e11fSVishal Verma u32 raw_mapping, postmap, ze, z_flag, e_flag; 1415212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 1425212e11fSVishal Verma 1435212e11fSVishal Verma WARN_ON(lba >= arena->external_nlba); 1445212e11fSVishal Verma 1455212e11fSVishal Verma ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE); 1465212e11fSVishal Verma if (ret) 1475212e11fSVishal Verma return ret; 1485212e11fSVishal Verma 1495212e11fSVishal Verma raw_mapping = le32_to_cpu(in); 1505212e11fSVishal Verma 1515212e11fSVishal Verma z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; 1525212e11fSVishal Verma e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; 1535212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 1545212e11fSVishal Verma postmap = raw_mapping & MAP_LBA_MASK; 1555212e11fSVishal Verma 1565212e11fSVishal Verma /* Reuse the {z,e}_flag variables for *trim and *error */ 1575212e11fSVishal Verma z_flag = 0; 1585212e11fSVishal Verma e_flag = 0; 1595212e11fSVishal Verma 1605212e11fSVishal Verma switch (ze) { 1615212e11fSVishal Verma case 0: 1625212e11fSVishal Verma /* Initial state. Return postmap = premap */ 1635212e11fSVishal Verma *mapping = lba; 1645212e11fSVishal Verma break; 1655212e11fSVishal Verma case 1: 1665212e11fSVishal Verma *mapping = postmap; 1675212e11fSVishal Verma e_flag = 1; 1685212e11fSVishal Verma break; 1695212e11fSVishal Verma case 2: 1705212e11fSVishal Verma *mapping = postmap; 1715212e11fSVishal Verma z_flag = 1; 1725212e11fSVishal Verma break; 1735212e11fSVishal Verma case 3: 1745212e11fSVishal Verma *mapping = postmap; 1755212e11fSVishal Verma break; 1765212e11fSVishal Verma default: 1775212e11fSVishal Verma return -EIO; 1785212e11fSVishal Verma } 1795212e11fSVishal Verma 1805212e11fSVishal Verma if (trim) 1815212e11fSVishal Verma *trim = z_flag; 1825212e11fSVishal Verma if (error) 1835212e11fSVishal Verma *error = e_flag; 1845212e11fSVishal Verma 1855212e11fSVishal Verma return ret; 1865212e11fSVishal Verma } 1875212e11fSVishal Verma 1885212e11fSVishal Verma static int btt_log_read_pair(struct arena_info *arena, u32 lane, 1895212e11fSVishal Verma struct log_entry *ent) 1905212e11fSVishal Verma { 1915212e11fSVishal Verma WARN_ON(!ent); 1925212e11fSVishal Verma return arena_read_bytes(arena, 1935212e11fSVishal Verma arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 1945212e11fSVishal Verma 2 * LOG_ENT_SIZE); 1955212e11fSVishal Verma } 1965212e11fSVishal Verma 1975212e11fSVishal Verma static struct dentry *debugfs_root; 1985212e11fSVishal Verma 1995212e11fSVishal Verma static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, 2005212e11fSVishal Verma int idx) 2015212e11fSVishal Verma { 2025212e11fSVishal Verma char dirname[32]; 2035212e11fSVishal Verma struct dentry *d; 2045212e11fSVishal Verma 2055212e11fSVishal Verma /* If for some reason, parent bttN was not created, exit */ 2065212e11fSVishal Verma if (!parent) 2075212e11fSVishal Verma return; 2085212e11fSVishal Verma 2095212e11fSVishal Verma snprintf(dirname, 32, "arena%d", idx); 2105212e11fSVishal Verma d = debugfs_create_dir(dirname, parent); 2115212e11fSVishal Verma if (IS_ERR_OR_NULL(d)) 2125212e11fSVishal Verma return; 2135212e11fSVishal Verma a->debugfs_dir = d; 2145212e11fSVishal Verma 2155212e11fSVishal Verma debugfs_create_x64("size", S_IRUGO, d, &a->size); 2165212e11fSVishal Verma debugfs_create_x64("external_lba_start", S_IRUGO, d, 2175212e11fSVishal Verma &a->external_lba_start); 2185212e11fSVishal Verma debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); 2195212e11fSVishal Verma debugfs_create_u32("internal_lbasize", S_IRUGO, d, 2205212e11fSVishal Verma &a->internal_lbasize); 2215212e11fSVishal Verma debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); 2225212e11fSVishal Verma debugfs_create_u32("external_lbasize", S_IRUGO, d, 2235212e11fSVishal Verma &a->external_lbasize); 2245212e11fSVishal Verma debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); 2255212e11fSVishal Verma debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); 2265212e11fSVishal Verma debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); 2275212e11fSVishal Verma debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); 2285212e11fSVishal Verma debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); 2295212e11fSVishal Verma debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); 2305212e11fSVishal Verma debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); 2315212e11fSVishal Verma debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 2325212e11fSVishal Verma debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 2335212e11fSVishal Verma debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 2345212e11fSVishal Verma } 2355212e11fSVishal Verma 2365212e11fSVishal Verma static void btt_debugfs_init(struct btt *btt) 2375212e11fSVishal Verma { 2385212e11fSVishal Verma int i = 0; 2395212e11fSVishal Verma struct arena_info *arena; 2405212e11fSVishal Verma 2415212e11fSVishal Verma btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), 2425212e11fSVishal Verma debugfs_root); 2435212e11fSVishal Verma if (IS_ERR_OR_NULL(btt->debugfs_dir)) 2445212e11fSVishal Verma return; 2455212e11fSVishal Verma 2465212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 2475212e11fSVishal Verma arena_debugfs_init(arena, btt->debugfs_dir, i); 2485212e11fSVishal Verma i++; 2495212e11fSVishal Verma } 2505212e11fSVishal Verma } 2515212e11fSVishal Verma 2525212e11fSVishal Verma /* 2535212e11fSVishal Verma * This function accepts two log entries, and uses the 2545212e11fSVishal Verma * sequence number to find the 'older' entry. 2555212e11fSVishal Verma * It also updates the sequence number in this old entry to 2565212e11fSVishal Verma * make it the 'new' one if the mark_flag is set. 2575212e11fSVishal Verma * Finally, it returns which of the entries was the older one. 2585212e11fSVishal Verma * 2595212e11fSVishal Verma * TODO The logic feels a bit kludge-y. make it better.. 2605212e11fSVishal Verma */ 2615212e11fSVishal Verma static int btt_log_get_old(struct log_entry *ent) 2625212e11fSVishal Verma { 2635212e11fSVishal Verma int old; 2645212e11fSVishal Verma 2655212e11fSVishal Verma /* 2665212e11fSVishal Verma * the first ever time this is seen, the entry goes into [0] 2675212e11fSVishal Verma * the next time, the following logic works out to put this 2685212e11fSVishal Verma * (next) entry into [1] 2695212e11fSVishal Verma */ 2705212e11fSVishal Verma if (ent[0].seq == 0) { 2715212e11fSVishal Verma ent[0].seq = cpu_to_le32(1); 2725212e11fSVishal Verma return 0; 2735212e11fSVishal Verma } 2745212e11fSVishal Verma 2755212e11fSVishal Verma if (ent[0].seq == ent[1].seq) 2765212e11fSVishal Verma return -EINVAL; 2775212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 2785212e11fSVishal Verma return -EINVAL; 2795212e11fSVishal Verma 2805212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 2815212e11fSVishal Verma if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 2825212e11fSVishal Verma old = 0; 2835212e11fSVishal Verma else 2845212e11fSVishal Verma old = 1; 2855212e11fSVishal Verma } else { 2865212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 2875212e11fSVishal Verma old = 1; 2885212e11fSVishal Verma else 2895212e11fSVishal Verma old = 0; 2905212e11fSVishal Verma } 2915212e11fSVishal Verma 2925212e11fSVishal Verma return old; 2935212e11fSVishal Verma } 2945212e11fSVishal Verma 2955212e11fSVishal Verma static struct device *to_dev(struct arena_info *arena) 2965212e11fSVishal Verma { 2975212e11fSVishal Verma return &arena->nd_btt->dev; 2985212e11fSVishal Verma } 2995212e11fSVishal Verma 3005212e11fSVishal Verma /* 3015212e11fSVishal Verma * This function copies the desired (old/new) log entry into ent if 3025212e11fSVishal Verma * it is not NULL. It returns the sub-slot number (0 or 1) 3035212e11fSVishal Verma * where the desired log entry was found. Negative return values 3045212e11fSVishal Verma * indicate errors. 3055212e11fSVishal Verma */ 3065212e11fSVishal Verma static int btt_log_read(struct arena_info *arena, u32 lane, 3075212e11fSVishal Verma struct log_entry *ent, int old_flag) 3085212e11fSVishal Verma { 3095212e11fSVishal Verma int ret; 3105212e11fSVishal Verma int old_ent, ret_ent; 3115212e11fSVishal Verma struct log_entry log[2]; 3125212e11fSVishal Verma 3135212e11fSVishal Verma ret = btt_log_read_pair(arena, lane, log); 3145212e11fSVishal Verma if (ret) 3155212e11fSVishal Verma return -EIO; 3165212e11fSVishal Verma 3175212e11fSVishal Verma old_ent = btt_log_get_old(log); 3185212e11fSVishal Verma if (old_ent < 0 || old_ent > 1) { 3195212e11fSVishal Verma dev_info(to_dev(arena), 3205212e11fSVishal Verma "log corruption (%d): lane %d seq [%d, %d]\n", 3215212e11fSVishal Verma old_ent, lane, log[0].seq, log[1].seq); 3225212e11fSVishal Verma /* TODO set error state? */ 3235212e11fSVishal Verma return -EIO; 3245212e11fSVishal Verma } 3255212e11fSVishal Verma 3265212e11fSVishal Verma ret_ent = (old_flag ? old_ent : (1 - old_ent)); 3275212e11fSVishal Verma 3285212e11fSVishal Verma if (ent != NULL) 3295212e11fSVishal Verma memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 3305212e11fSVishal Verma 3315212e11fSVishal Verma return ret_ent; 3325212e11fSVishal Verma } 3335212e11fSVishal Verma 3345212e11fSVishal Verma /* 3355212e11fSVishal Verma * This function commits a log entry to media 3365212e11fSVishal Verma * It does _not_ prepare the freelist entry for the next write 3375212e11fSVishal Verma * btt_flog_write is the wrapper for updating the freelist elements 3385212e11fSVishal Verma */ 3395212e11fSVishal Verma static int __btt_log_write(struct arena_info *arena, u32 lane, 3405212e11fSVishal Verma u32 sub, struct log_entry *ent) 3415212e11fSVishal Verma { 3425212e11fSVishal Verma int ret; 3435212e11fSVishal Verma /* 3445212e11fSVishal Verma * Ignore the padding in log_entry for calculating log_half. 3455212e11fSVishal Verma * The entry is 'committed' when we write the sequence number, 3465212e11fSVishal Verma * and we want to ensure that that is the last thing written. 3475212e11fSVishal Verma * We don't bother writing the padding as that would be extra 3485212e11fSVishal Verma * media wear and write amplification 3495212e11fSVishal Verma */ 3505212e11fSVishal Verma unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; 3515212e11fSVishal Verma u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); 3525212e11fSVishal Verma void *src = ent; 3535212e11fSVishal Verma 3545212e11fSVishal Verma /* split the 16B write into atomic, durable halves */ 3555212e11fSVishal Verma ret = arena_write_bytes(arena, ns_off, src, log_half); 3565212e11fSVishal Verma if (ret) 3575212e11fSVishal Verma return ret; 3585212e11fSVishal Verma 3595212e11fSVishal Verma ns_off += log_half; 3605212e11fSVishal Verma src += log_half; 3615212e11fSVishal Verma return arena_write_bytes(arena, ns_off, src, log_half); 3625212e11fSVishal Verma } 3635212e11fSVishal Verma 3645212e11fSVishal Verma static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, 3655212e11fSVishal Verma struct log_entry *ent) 3665212e11fSVishal Verma { 3675212e11fSVishal Verma int ret; 3685212e11fSVishal Verma 3695212e11fSVishal Verma ret = __btt_log_write(arena, lane, sub, ent); 3705212e11fSVishal Verma if (ret) 3715212e11fSVishal Verma return ret; 3725212e11fSVishal Verma 3735212e11fSVishal Verma /* prepare the next free entry */ 3745212e11fSVishal Verma arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; 3755212e11fSVishal Verma if (++(arena->freelist[lane].seq) == 4) 3765212e11fSVishal Verma arena->freelist[lane].seq = 1; 3775212e11fSVishal Verma arena->freelist[lane].block = le32_to_cpu(ent->old_map); 3785212e11fSVishal Verma 3795212e11fSVishal Verma return ret; 3805212e11fSVishal Verma } 3815212e11fSVishal Verma 3825212e11fSVishal Verma /* 3835212e11fSVishal Verma * This function initializes the BTT map to the initial state, which is 3845212e11fSVishal Verma * all-zeroes, and indicates an identity mapping 3855212e11fSVishal Verma */ 3865212e11fSVishal Verma static int btt_map_init(struct arena_info *arena) 3875212e11fSVishal Verma { 3885212e11fSVishal Verma int ret = -EINVAL; 3895212e11fSVishal Verma void *zerobuf; 3905212e11fSVishal Verma size_t offset = 0; 3915212e11fSVishal Verma size_t chunk_size = SZ_2M; 3925212e11fSVishal Verma size_t mapsize = arena->logoff - arena->mapoff; 3935212e11fSVishal Verma 3945212e11fSVishal Verma zerobuf = kzalloc(chunk_size, GFP_KERNEL); 3955212e11fSVishal Verma if (!zerobuf) 3965212e11fSVishal Verma return -ENOMEM; 3975212e11fSVishal Verma 3985212e11fSVishal Verma while (mapsize) { 3995212e11fSVishal Verma size_t size = min(mapsize, chunk_size); 4005212e11fSVishal Verma 4015212e11fSVishal Verma ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, 4025212e11fSVishal Verma size); 4035212e11fSVishal Verma if (ret) 4045212e11fSVishal Verma goto free; 4055212e11fSVishal Verma 4065212e11fSVishal Verma offset += size; 4075212e11fSVishal Verma mapsize -= size; 4085212e11fSVishal Verma cond_resched(); 4095212e11fSVishal Verma } 4105212e11fSVishal Verma 4115212e11fSVishal Verma free: 4125212e11fSVishal Verma kfree(zerobuf); 4135212e11fSVishal Verma return ret; 4145212e11fSVishal Verma } 4155212e11fSVishal Verma 4165212e11fSVishal Verma /* 4175212e11fSVishal Verma * This function initializes the BTT log with 'fake' entries pointing 4185212e11fSVishal Verma * to the initial reserved set of blocks as being free 4195212e11fSVishal Verma */ 4205212e11fSVishal Verma static int btt_log_init(struct arena_info *arena) 4215212e11fSVishal Verma { 4225212e11fSVishal Verma int ret; 4235212e11fSVishal Verma u32 i; 4245212e11fSVishal Verma struct log_entry log, zerolog; 4255212e11fSVishal Verma 4265212e11fSVishal Verma memset(&zerolog, 0, sizeof(zerolog)); 4275212e11fSVishal Verma 4285212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 4295212e11fSVishal Verma log.lba = cpu_to_le32(i); 4305212e11fSVishal Verma log.old_map = cpu_to_le32(arena->external_nlba + i); 4315212e11fSVishal Verma log.new_map = cpu_to_le32(arena->external_nlba + i); 4325212e11fSVishal Verma log.seq = cpu_to_le32(LOG_SEQ_INIT); 4335212e11fSVishal Verma ret = __btt_log_write(arena, i, 0, &log); 4345212e11fSVishal Verma if (ret) 4355212e11fSVishal Verma return ret; 4365212e11fSVishal Verma ret = __btt_log_write(arena, i, 1, &zerolog); 4375212e11fSVishal Verma if (ret) 4385212e11fSVishal Verma return ret; 4395212e11fSVishal Verma } 4405212e11fSVishal Verma 4415212e11fSVishal Verma return 0; 4425212e11fSVishal Verma } 4435212e11fSVishal Verma 4445212e11fSVishal Verma static int btt_freelist_init(struct arena_info *arena) 4455212e11fSVishal Verma { 4465212e11fSVishal Verma int old, new, ret; 4475212e11fSVishal Verma u32 i, map_entry; 4485212e11fSVishal Verma struct log_entry log_new, log_old; 4495212e11fSVishal Verma 4505212e11fSVishal Verma arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), 4515212e11fSVishal Verma GFP_KERNEL); 4525212e11fSVishal Verma if (!arena->freelist) 4535212e11fSVishal Verma return -ENOMEM; 4545212e11fSVishal Verma 4555212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 4565212e11fSVishal Verma old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); 4575212e11fSVishal Verma if (old < 0) 4585212e11fSVishal Verma return old; 4595212e11fSVishal Verma 4605212e11fSVishal Verma new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); 4615212e11fSVishal Verma if (new < 0) 4625212e11fSVishal Verma return new; 4635212e11fSVishal Verma 4645212e11fSVishal Verma /* sub points to the next one to be overwritten */ 4655212e11fSVishal Verma arena->freelist[i].sub = 1 - new; 4665212e11fSVishal Verma arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); 4675212e11fSVishal Verma arena->freelist[i].block = le32_to_cpu(log_new.old_map); 4685212e11fSVishal Verma 4695212e11fSVishal Verma /* This implies a newly created or untouched flog entry */ 4705212e11fSVishal Verma if (log_new.old_map == log_new.new_map) 4715212e11fSVishal Verma continue; 4725212e11fSVishal Verma 4735212e11fSVishal Verma /* Check if map recovery is needed */ 4745212e11fSVishal Verma ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, 4755212e11fSVishal Verma NULL, NULL); 4765212e11fSVishal Verma if (ret) 4775212e11fSVishal Verma return ret; 4785212e11fSVishal Verma if ((le32_to_cpu(log_new.new_map) != map_entry) && 4795212e11fSVishal Verma (le32_to_cpu(log_new.old_map) == map_entry)) { 4805212e11fSVishal Verma /* 4815212e11fSVishal Verma * Last transaction wrote the flog, but wasn't able 4825212e11fSVishal Verma * to complete the map write. So fix up the map. 4835212e11fSVishal Verma */ 4845212e11fSVishal Verma ret = btt_map_write(arena, le32_to_cpu(log_new.lba), 4855212e11fSVishal Verma le32_to_cpu(log_new.new_map), 0, 0); 4865212e11fSVishal Verma if (ret) 4875212e11fSVishal Verma return ret; 4885212e11fSVishal Verma } 4895212e11fSVishal Verma 4905212e11fSVishal Verma } 4915212e11fSVishal Verma 4925212e11fSVishal Verma return 0; 4935212e11fSVishal Verma } 4945212e11fSVishal Verma 4955212e11fSVishal Verma static int btt_rtt_init(struct arena_info *arena) 4965212e11fSVishal Verma { 4975212e11fSVishal Verma arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 4985212e11fSVishal Verma if (arena->rtt == NULL) 4995212e11fSVishal Verma return -ENOMEM; 5005212e11fSVishal Verma 5015212e11fSVishal Verma return 0; 5025212e11fSVishal Verma } 5035212e11fSVishal Verma 5045212e11fSVishal Verma static int btt_maplocks_init(struct arena_info *arena) 5055212e11fSVishal Verma { 5065212e11fSVishal Verma u32 i; 5075212e11fSVishal Verma 5085212e11fSVishal Verma arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), 5095212e11fSVishal Verma GFP_KERNEL); 5105212e11fSVishal Verma if (!arena->map_locks) 5115212e11fSVishal Verma return -ENOMEM; 5125212e11fSVishal Verma 5135212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 5145212e11fSVishal Verma spin_lock_init(&arena->map_locks[i].lock); 5155212e11fSVishal Verma 5165212e11fSVishal Verma return 0; 5175212e11fSVishal Verma } 5185212e11fSVishal Verma 5195212e11fSVishal Verma static struct arena_info *alloc_arena(struct btt *btt, size_t size, 5205212e11fSVishal Verma size_t start, size_t arena_off) 5215212e11fSVishal Verma { 5225212e11fSVishal Verma struct arena_info *arena; 5235212e11fSVishal Verma u64 logsize, mapsize, datasize; 5245212e11fSVishal Verma u64 available = size; 5255212e11fSVishal Verma 5265212e11fSVishal Verma arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); 5275212e11fSVishal Verma if (!arena) 5285212e11fSVishal Verma return NULL; 5295212e11fSVishal Verma arena->nd_btt = btt->nd_btt; 5305212e11fSVishal Verma 5315212e11fSVishal Verma if (!size) 5325212e11fSVishal Verma return arena; 5335212e11fSVishal Verma 5345212e11fSVishal Verma arena->size = size; 5355212e11fSVishal Verma arena->external_lba_start = start; 5365212e11fSVishal Verma arena->external_lbasize = btt->lbasize; 5375212e11fSVishal Verma arena->internal_lbasize = roundup(arena->external_lbasize, 5385212e11fSVishal Verma INT_LBASIZE_ALIGNMENT); 5395212e11fSVishal Verma arena->nfree = BTT_DEFAULT_NFREE; 5405212e11fSVishal Verma arena->version_major = 1; 5415212e11fSVishal Verma arena->version_minor = 1; 5425212e11fSVishal Verma 5435212e11fSVishal Verma if (available % BTT_PG_SIZE) 5445212e11fSVishal Verma available -= (available % BTT_PG_SIZE); 5455212e11fSVishal Verma 5465212e11fSVishal Verma /* Two pages are reserved for the super block and its copy */ 5475212e11fSVishal Verma available -= 2 * BTT_PG_SIZE; 5485212e11fSVishal Verma 5495212e11fSVishal Verma /* The log takes a fixed amount of space based on nfree */ 5505212e11fSVishal Verma logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 5515212e11fSVishal Verma BTT_PG_SIZE); 5525212e11fSVishal Verma available -= logsize; 5535212e11fSVishal Verma 5545212e11fSVishal Verma /* Calculate optimal split between map and data area */ 5555212e11fSVishal Verma arena->internal_nlba = div_u64(available - BTT_PG_SIZE, 5565212e11fSVishal Verma arena->internal_lbasize + MAP_ENT_SIZE); 5575212e11fSVishal Verma arena->external_nlba = arena->internal_nlba - arena->nfree; 5585212e11fSVishal Verma 5595212e11fSVishal Verma mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); 5605212e11fSVishal Verma datasize = available - mapsize; 5615212e11fSVishal Verma 5625212e11fSVishal Verma /* 'Absolute' values, relative to start of storage space */ 5635212e11fSVishal Verma arena->infooff = arena_off; 5645212e11fSVishal Verma arena->dataoff = arena->infooff + BTT_PG_SIZE; 5655212e11fSVishal Verma arena->mapoff = arena->dataoff + datasize; 5665212e11fSVishal Verma arena->logoff = arena->mapoff + mapsize; 5675212e11fSVishal Verma arena->info2off = arena->logoff + logsize; 5685212e11fSVishal Verma return arena; 5695212e11fSVishal Verma } 5705212e11fSVishal Verma 5715212e11fSVishal Verma static void free_arenas(struct btt *btt) 5725212e11fSVishal Verma { 5735212e11fSVishal Verma struct arena_info *arena, *next; 5745212e11fSVishal Verma 5755212e11fSVishal Verma list_for_each_entry_safe(arena, next, &btt->arena_list, list) { 5765212e11fSVishal Verma list_del(&arena->list); 5775212e11fSVishal Verma kfree(arena->rtt); 5785212e11fSVishal Verma kfree(arena->map_locks); 5795212e11fSVishal Verma kfree(arena->freelist); 5805212e11fSVishal Verma debugfs_remove_recursive(arena->debugfs_dir); 5815212e11fSVishal Verma kfree(arena); 5825212e11fSVishal Verma } 5835212e11fSVishal Verma } 5845212e11fSVishal Verma 5855212e11fSVishal Verma /* 5865212e11fSVishal Verma * This function reads an existing valid btt superblock and 5875212e11fSVishal Verma * populates the corresponding arena_info struct 5885212e11fSVishal Verma */ 5895212e11fSVishal Verma static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, 5905212e11fSVishal Verma u64 arena_off) 5915212e11fSVishal Verma { 5925212e11fSVishal Verma arena->internal_nlba = le32_to_cpu(super->internal_nlba); 5935212e11fSVishal Verma arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); 5945212e11fSVishal Verma arena->external_nlba = le32_to_cpu(super->external_nlba); 5955212e11fSVishal Verma arena->external_lbasize = le32_to_cpu(super->external_lbasize); 5965212e11fSVishal Verma arena->nfree = le32_to_cpu(super->nfree); 5975212e11fSVishal Verma arena->version_major = le16_to_cpu(super->version_major); 5985212e11fSVishal Verma arena->version_minor = le16_to_cpu(super->version_minor); 5995212e11fSVishal Verma 6005212e11fSVishal Verma arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + 6015212e11fSVishal Verma le64_to_cpu(super->nextoff)); 6025212e11fSVishal Verma arena->infooff = arena_off; 6035212e11fSVishal Verma arena->dataoff = arena_off + le64_to_cpu(super->dataoff); 6045212e11fSVishal Verma arena->mapoff = arena_off + le64_to_cpu(super->mapoff); 6055212e11fSVishal Verma arena->logoff = arena_off + le64_to_cpu(super->logoff); 6065212e11fSVishal Verma arena->info2off = arena_off + le64_to_cpu(super->info2off); 6075212e11fSVishal Verma 6085e329406SDan Williams arena->size = (le64_to_cpu(super->nextoff) > 0) 6095e329406SDan Williams ? (le64_to_cpu(super->nextoff)) 6105e329406SDan Williams : (arena->info2off - arena->infooff + BTT_PG_SIZE); 6115212e11fSVishal Verma 6125212e11fSVishal Verma arena->flags = le32_to_cpu(super->flags); 6135212e11fSVishal Verma } 6145212e11fSVishal Verma 6155212e11fSVishal Verma static int discover_arenas(struct btt *btt) 6165212e11fSVishal Verma { 6175212e11fSVishal Verma int ret = 0; 6185212e11fSVishal Verma struct arena_info *arena; 6195212e11fSVishal Verma struct btt_sb *super; 6205212e11fSVishal Verma size_t remaining = btt->rawsize; 6215212e11fSVishal Verma u64 cur_nlba = 0; 6225212e11fSVishal Verma size_t cur_off = 0; 6235212e11fSVishal Verma int num_arenas = 0; 6245212e11fSVishal Verma 6255212e11fSVishal Verma super = kzalloc(sizeof(*super), GFP_KERNEL); 6265212e11fSVishal Verma if (!super) 6275212e11fSVishal Verma return -ENOMEM; 6285212e11fSVishal Verma 6295212e11fSVishal Verma while (remaining) { 6305212e11fSVishal Verma /* Alloc memory for arena */ 6315212e11fSVishal Verma arena = alloc_arena(btt, 0, 0, 0); 6325212e11fSVishal Verma if (!arena) { 6335212e11fSVishal Verma ret = -ENOMEM; 6345212e11fSVishal Verma goto out_super; 6355212e11fSVishal Verma } 6365212e11fSVishal Verma 6375212e11fSVishal Verma arena->infooff = cur_off; 6385212e11fSVishal Verma ret = btt_info_read(arena, super); 6395212e11fSVishal Verma if (ret) 6405212e11fSVishal Verma goto out; 6415212e11fSVishal Verma 642ab45e763SVishal Verma if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { 6435212e11fSVishal Verma if (remaining == btt->rawsize) { 6445212e11fSVishal Verma btt->init_state = INIT_NOTFOUND; 6455212e11fSVishal Verma dev_info(to_dev(arena), "No existing arenas\n"); 6465212e11fSVishal Verma goto out; 6475212e11fSVishal Verma } else { 6485212e11fSVishal Verma dev_info(to_dev(arena), 6495212e11fSVishal Verma "Found corrupted metadata!\n"); 6505212e11fSVishal Verma ret = -ENODEV; 6515212e11fSVishal Verma goto out; 6525212e11fSVishal Verma } 6535212e11fSVishal Verma } 6545212e11fSVishal Verma 6555212e11fSVishal Verma arena->external_lba_start = cur_nlba; 6565212e11fSVishal Verma parse_arena_meta(arena, super, cur_off); 6575212e11fSVishal Verma 6585212e11fSVishal Verma ret = btt_freelist_init(arena); 6595212e11fSVishal Verma if (ret) 6605212e11fSVishal Verma goto out; 6615212e11fSVishal Verma 6625212e11fSVishal Verma ret = btt_rtt_init(arena); 6635212e11fSVishal Verma if (ret) 6645212e11fSVishal Verma goto out; 6655212e11fSVishal Verma 6665212e11fSVishal Verma ret = btt_maplocks_init(arena); 6675212e11fSVishal Verma if (ret) 6685212e11fSVishal Verma goto out; 6695212e11fSVishal Verma 6705212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 6715212e11fSVishal Verma 6725212e11fSVishal Verma remaining -= arena->size; 6735212e11fSVishal Verma cur_off += arena->size; 6745212e11fSVishal Verma cur_nlba += arena->external_nlba; 6755212e11fSVishal Verma num_arenas++; 6765212e11fSVishal Verma 6775212e11fSVishal Verma if (arena->nextoff == 0) 6785212e11fSVishal Verma break; 6795212e11fSVishal Verma } 6805212e11fSVishal Verma btt->num_arenas = num_arenas; 6815212e11fSVishal Verma btt->nlba = cur_nlba; 6825212e11fSVishal Verma btt->init_state = INIT_READY; 6835212e11fSVishal Verma 6845212e11fSVishal Verma kfree(super); 6855212e11fSVishal Verma return ret; 6865212e11fSVishal Verma 6875212e11fSVishal Verma out: 6885212e11fSVishal Verma kfree(arena); 6895212e11fSVishal Verma free_arenas(btt); 6905212e11fSVishal Verma out_super: 6915212e11fSVishal Verma kfree(super); 6925212e11fSVishal Verma return ret; 6935212e11fSVishal Verma } 6945212e11fSVishal Verma 6955212e11fSVishal Verma static int create_arenas(struct btt *btt) 6965212e11fSVishal Verma { 6975212e11fSVishal Verma size_t remaining = btt->rawsize; 6985212e11fSVishal Verma size_t cur_off = 0; 6995212e11fSVishal Verma 7005212e11fSVishal Verma while (remaining) { 7015212e11fSVishal Verma struct arena_info *arena; 7025212e11fSVishal Verma size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); 7035212e11fSVishal Verma 7045212e11fSVishal Verma remaining -= arena_size; 7055212e11fSVishal Verma if (arena_size < ARENA_MIN_SIZE) 7065212e11fSVishal Verma break; 7075212e11fSVishal Verma 7085212e11fSVishal Verma arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); 7095212e11fSVishal Verma if (!arena) { 7105212e11fSVishal Verma free_arenas(btt); 7115212e11fSVishal Verma return -ENOMEM; 7125212e11fSVishal Verma } 7135212e11fSVishal Verma btt->nlba += arena->external_nlba; 7145212e11fSVishal Verma if (remaining >= ARENA_MIN_SIZE) 7155212e11fSVishal Verma arena->nextoff = arena->size; 7165212e11fSVishal Verma else 7175212e11fSVishal Verma arena->nextoff = 0; 7185212e11fSVishal Verma cur_off += arena_size; 7195212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 7205212e11fSVishal Verma } 7215212e11fSVishal Verma 7225212e11fSVishal Verma return 0; 7235212e11fSVishal Verma } 7245212e11fSVishal Verma 7255212e11fSVishal Verma /* 7265212e11fSVishal Verma * This function completes arena initialization by writing 7275212e11fSVishal Verma * all the metadata. 7285212e11fSVishal Verma * It is only called for an uninitialized arena when a write 7295212e11fSVishal Verma * to that arena occurs for the first time. 7305212e11fSVishal Verma */ 731fbde1414SVishal Verma static int btt_arena_write_layout(struct arena_info *arena) 7325212e11fSVishal Verma { 7335212e11fSVishal Verma int ret; 734*e1455744SDan Williams u64 sum; 7355212e11fSVishal Verma struct btt_sb *super; 736fbde1414SVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 7376ec68954SVishal Verma const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); 7385212e11fSVishal Verma 7395212e11fSVishal Verma ret = btt_map_init(arena); 7405212e11fSVishal Verma if (ret) 7415212e11fSVishal Verma return ret; 7425212e11fSVishal Verma 7435212e11fSVishal Verma ret = btt_log_init(arena); 7445212e11fSVishal Verma if (ret) 7455212e11fSVishal Verma return ret; 7465212e11fSVishal Verma 7475212e11fSVishal Verma super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); 7485212e11fSVishal Verma if (!super) 7495212e11fSVishal Verma return -ENOMEM; 7505212e11fSVishal Verma 7515212e11fSVishal Verma strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); 752fbde1414SVishal Verma memcpy(super->uuid, nd_btt->uuid, 16); 7536ec68954SVishal Verma memcpy(super->parent_uuid, parent_uuid, 16); 7545212e11fSVishal Verma super->flags = cpu_to_le32(arena->flags); 7555212e11fSVishal Verma super->version_major = cpu_to_le16(arena->version_major); 7565212e11fSVishal Verma super->version_minor = cpu_to_le16(arena->version_minor); 7575212e11fSVishal Verma super->external_lbasize = cpu_to_le32(arena->external_lbasize); 7585212e11fSVishal Verma super->external_nlba = cpu_to_le32(arena->external_nlba); 7595212e11fSVishal Verma super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); 7605212e11fSVishal Verma super->internal_nlba = cpu_to_le32(arena->internal_nlba); 7615212e11fSVishal Verma super->nfree = cpu_to_le32(arena->nfree); 7625212e11fSVishal Verma super->infosize = cpu_to_le32(sizeof(struct btt_sb)); 7635212e11fSVishal Verma super->nextoff = cpu_to_le64(arena->nextoff); 7645212e11fSVishal Verma /* 7655212e11fSVishal Verma * Subtract arena->infooff (arena start) so numbers are relative 7665212e11fSVishal Verma * to 'this' arena 7675212e11fSVishal Verma */ 7685212e11fSVishal Verma super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); 7695212e11fSVishal Verma super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); 7705212e11fSVishal Verma super->logoff = cpu_to_le64(arena->logoff - arena->infooff); 7715212e11fSVishal Verma super->info2off = cpu_to_le64(arena->info2off - arena->infooff); 7725212e11fSVishal Verma 7735212e11fSVishal Verma super->flags = 0; 774*e1455744SDan Williams sum = nd_sb_checksum((struct nd_gen_sb *) super); 775*e1455744SDan Williams super->checksum = cpu_to_le64(sum); 7765212e11fSVishal Verma 7775212e11fSVishal Verma ret = btt_info_write(arena, super); 7785212e11fSVishal Verma 7795212e11fSVishal Verma kfree(super); 7805212e11fSVishal Verma return ret; 7815212e11fSVishal Verma } 7825212e11fSVishal Verma 7835212e11fSVishal Verma /* 7845212e11fSVishal Verma * This function completes the initialization for the BTT namespace 7855212e11fSVishal Verma * such that it is ready to accept IOs 7865212e11fSVishal Verma */ 7875212e11fSVishal Verma static int btt_meta_init(struct btt *btt) 7885212e11fSVishal Verma { 7895212e11fSVishal Verma int ret = 0; 7905212e11fSVishal Verma struct arena_info *arena; 7915212e11fSVishal Verma 7925212e11fSVishal Verma mutex_lock(&btt->init_lock); 7935212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 794fbde1414SVishal Verma ret = btt_arena_write_layout(arena); 7955212e11fSVishal Verma if (ret) 7965212e11fSVishal Verma goto unlock; 7975212e11fSVishal Verma 7985212e11fSVishal Verma ret = btt_freelist_init(arena); 7995212e11fSVishal Verma if (ret) 8005212e11fSVishal Verma goto unlock; 8015212e11fSVishal Verma 8025212e11fSVishal Verma ret = btt_rtt_init(arena); 8035212e11fSVishal Verma if (ret) 8045212e11fSVishal Verma goto unlock; 8055212e11fSVishal Verma 8065212e11fSVishal Verma ret = btt_maplocks_init(arena); 8075212e11fSVishal Verma if (ret) 8085212e11fSVishal Verma goto unlock; 8095212e11fSVishal Verma } 8105212e11fSVishal Verma 8115212e11fSVishal Verma btt->init_state = INIT_READY; 8125212e11fSVishal Verma 8135212e11fSVishal Verma unlock: 8145212e11fSVishal Verma mutex_unlock(&btt->init_lock); 8155212e11fSVishal Verma return ret; 8165212e11fSVishal Verma } 8175212e11fSVishal Verma 81841cd8b70SVishal Verma static u32 btt_meta_size(struct btt *btt) 81941cd8b70SVishal Verma { 82041cd8b70SVishal Verma return btt->lbasize - btt->sector_size; 82141cd8b70SVishal Verma } 82241cd8b70SVishal Verma 8235212e11fSVishal Verma /* 8245212e11fSVishal Verma * This function calculates the arena in which the given LBA lies 8255212e11fSVishal Verma * by doing a linear walk. This is acceptable since we expect only 8265212e11fSVishal Verma * a few arenas. If we have backing devices that get much larger, 8275212e11fSVishal Verma * we can construct a balanced binary tree of arenas at init time 8285212e11fSVishal Verma * so that this range search becomes faster. 8295212e11fSVishal Verma */ 8305212e11fSVishal Verma static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, 8315212e11fSVishal Verma struct arena_info **arena) 8325212e11fSVishal Verma { 8335212e11fSVishal Verma struct arena_info *arena_list; 8345212e11fSVishal Verma __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); 8355212e11fSVishal Verma 8365212e11fSVishal Verma list_for_each_entry(arena_list, &btt->arena_list, list) { 8375212e11fSVishal Verma if (lba < arena_list->external_nlba) { 8385212e11fSVishal Verma *arena = arena_list; 8395212e11fSVishal Verma *premap = lba; 8405212e11fSVishal Verma return 0; 8415212e11fSVishal Verma } 8425212e11fSVishal Verma lba -= arena_list->external_nlba; 8435212e11fSVishal Verma } 8445212e11fSVishal Verma 8455212e11fSVishal Verma return -EIO; 8465212e11fSVishal Verma } 8475212e11fSVishal Verma 8485212e11fSVishal Verma /* 8495212e11fSVishal Verma * The following (lock_map, unlock_map) are mostly just to improve 8505212e11fSVishal Verma * readability, since they index into an array of locks 8515212e11fSVishal Verma */ 8525212e11fSVishal Verma static void lock_map(struct arena_info *arena, u32 premap) 8535212e11fSVishal Verma __acquires(&arena->map_locks[idx].lock) 8545212e11fSVishal Verma { 8555212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 8565212e11fSVishal Verma 8575212e11fSVishal Verma spin_lock(&arena->map_locks[idx].lock); 8585212e11fSVishal Verma } 8595212e11fSVishal Verma 8605212e11fSVishal Verma static void unlock_map(struct arena_info *arena, u32 premap) 8615212e11fSVishal Verma __releases(&arena->map_locks[idx].lock) 8625212e11fSVishal Verma { 8635212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 8645212e11fSVishal Verma 8655212e11fSVishal Verma spin_unlock(&arena->map_locks[idx].lock); 8665212e11fSVishal Verma } 8675212e11fSVishal Verma 8685212e11fSVishal Verma static u64 to_namespace_offset(struct arena_info *arena, u64 lba) 8695212e11fSVishal Verma { 8705212e11fSVishal Verma return arena->dataoff + ((u64)lba * arena->internal_lbasize); 8715212e11fSVishal Verma } 8725212e11fSVishal Verma 8735212e11fSVishal Verma static int btt_data_read(struct arena_info *arena, struct page *page, 8745212e11fSVishal Verma unsigned int off, u32 lba, u32 len) 8755212e11fSVishal Verma { 8765212e11fSVishal Verma int ret; 8775212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 8785212e11fSVishal Verma void *mem = kmap_atomic(page); 8795212e11fSVishal Verma 8805212e11fSVishal Verma ret = arena_read_bytes(arena, nsoff, mem + off, len); 8815212e11fSVishal Verma kunmap_atomic(mem); 8825212e11fSVishal Verma 8835212e11fSVishal Verma return ret; 8845212e11fSVishal Verma } 8855212e11fSVishal Verma 8865212e11fSVishal Verma static int btt_data_write(struct arena_info *arena, u32 lba, 8875212e11fSVishal Verma struct page *page, unsigned int off, u32 len) 8885212e11fSVishal Verma { 8895212e11fSVishal Verma int ret; 8905212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 8915212e11fSVishal Verma void *mem = kmap_atomic(page); 8925212e11fSVishal Verma 8935212e11fSVishal Verma ret = arena_write_bytes(arena, nsoff, mem + off, len); 8945212e11fSVishal Verma kunmap_atomic(mem); 8955212e11fSVishal Verma 8965212e11fSVishal Verma return ret; 8975212e11fSVishal Verma } 8985212e11fSVishal Verma 8995212e11fSVishal Verma static void zero_fill_data(struct page *page, unsigned int off, u32 len) 9005212e11fSVishal Verma { 9015212e11fSVishal Verma void *mem = kmap_atomic(page); 9025212e11fSVishal Verma 9035212e11fSVishal Verma memset(mem + off, 0, len); 9045212e11fSVishal Verma kunmap_atomic(mem); 9055212e11fSVishal Verma } 9065212e11fSVishal Verma 90741cd8b70SVishal Verma #ifdef CONFIG_BLK_DEV_INTEGRITY 90841cd8b70SVishal Verma static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 90941cd8b70SVishal Verma struct arena_info *arena, u32 postmap, int rw) 91041cd8b70SVishal Verma { 91141cd8b70SVishal Verma unsigned int len = btt_meta_size(btt); 91241cd8b70SVishal Verma u64 meta_nsoff; 91341cd8b70SVishal Verma int ret = 0; 91441cd8b70SVishal Verma 91541cd8b70SVishal Verma if (bip == NULL) 91641cd8b70SVishal Verma return 0; 91741cd8b70SVishal Verma 91841cd8b70SVishal Verma meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size; 91941cd8b70SVishal Verma 92041cd8b70SVishal Verma while (len) { 92141cd8b70SVishal Verma unsigned int cur_len; 92241cd8b70SVishal Verma struct bio_vec bv; 92341cd8b70SVishal Verma void *mem; 92441cd8b70SVishal Verma 92541cd8b70SVishal Verma bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 92641cd8b70SVishal Verma /* 92741cd8b70SVishal Verma * The 'bv' obtained from bvec_iter_bvec has its .bv_len and 92841cd8b70SVishal Verma * .bv_offset already adjusted for iter->bi_bvec_done, and we 92941cd8b70SVishal Verma * can use those directly 93041cd8b70SVishal Verma */ 93141cd8b70SVishal Verma 93241cd8b70SVishal Verma cur_len = min(len, bv.bv_len); 93341cd8b70SVishal Verma mem = kmap_atomic(bv.bv_page); 93441cd8b70SVishal Verma if (rw) 93541cd8b70SVishal Verma ret = arena_write_bytes(arena, meta_nsoff, 93641cd8b70SVishal Verma mem + bv.bv_offset, cur_len); 93741cd8b70SVishal Verma else 93841cd8b70SVishal Verma ret = arena_read_bytes(arena, meta_nsoff, 93941cd8b70SVishal Verma mem + bv.bv_offset, cur_len); 94041cd8b70SVishal Verma 94141cd8b70SVishal Verma kunmap_atomic(mem); 94241cd8b70SVishal Verma if (ret) 94341cd8b70SVishal Verma return ret; 94441cd8b70SVishal Verma 94541cd8b70SVishal Verma len -= cur_len; 94641cd8b70SVishal Verma meta_nsoff += cur_len; 94741cd8b70SVishal Verma bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); 94841cd8b70SVishal Verma } 94941cd8b70SVishal Verma 95041cd8b70SVishal Verma return ret; 95141cd8b70SVishal Verma } 95241cd8b70SVishal Verma 95341cd8b70SVishal Verma #else /* CONFIG_BLK_DEV_INTEGRITY */ 95441cd8b70SVishal Verma static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, 95541cd8b70SVishal Verma struct arena_info *arena, u32 postmap, int rw) 95641cd8b70SVishal Verma { 95741cd8b70SVishal Verma return 0; 95841cd8b70SVishal Verma } 95941cd8b70SVishal Verma #endif 96041cd8b70SVishal Verma 96141cd8b70SVishal Verma static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, 96241cd8b70SVishal Verma struct page *page, unsigned int off, sector_t sector, 96341cd8b70SVishal Verma unsigned int len) 9645212e11fSVishal Verma { 9655212e11fSVishal Verma int ret = 0; 9665212e11fSVishal Verma int t_flag, e_flag; 9675212e11fSVishal Verma struct arena_info *arena = NULL; 9685212e11fSVishal Verma u32 lane = 0, premap, postmap; 9695212e11fSVishal Verma 9705212e11fSVishal Verma while (len) { 9715212e11fSVishal Verma u32 cur_len; 9725212e11fSVishal Verma 9735212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 9745212e11fSVishal Verma 9755212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 9765212e11fSVishal Verma if (ret) 9775212e11fSVishal Verma goto out_lane; 9785212e11fSVishal Verma 9795212e11fSVishal Verma cur_len = min(btt->sector_size, len); 9805212e11fSVishal Verma 9815212e11fSVishal Verma ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag); 9825212e11fSVishal Verma if (ret) 9835212e11fSVishal Verma goto out_lane; 9845212e11fSVishal Verma 9855212e11fSVishal Verma /* 9865212e11fSVishal Verma * We loop to make sure that the post map LBA didn't change 9875212e11fSVishal Verma * from under us between writing the RTT and doing the actual 9885212e11fSVishal Verma * read. 9895212e11fSVishal Verma */ 9905212e11fSVishal Verma while (1) { 9915212e11fSVishal Verma u32 new_map; 9925212e11fSVishal Verma 9935212e11fSVishal Verma if (t_flag) { 9945212e11fSVishal Verma zero_fill_data(page, off, cur_len); 9955212e11fSVishal Verma goto out_lane; 9965212e11fSVishal Verma } 9975212e11fSVishal Verma 9985212e11fSVishal Verma if (e_flag) { 9995212e11fSVishal Verma ret = -EIO; 10005212e11fSVishal Verma goto out_lane; 10015212e11fSVishal Verma } 10025212e11fSVishal Verma 10035212e11fSVishal Verma arena->rtt[lane] = RTT_VALID | postmap; 10045212e11fSVishal Verma /* 10055212e11fSVishal Verma * Barrier to make sure this write is not reordered 10065212e11fSVishal Verma * to do the verification map_read before the RTT store 10075212e11fSVishal Verma */ 10085212e11fSVishal Verma barrier(); 10095212e11fSVishal Verma 10105212e11fSVishal Verma ret = btt_map_read(arena, premap, &new_map, &t_flag, 10115212e11fSVishal Verma &e_flag); 10125212e11fSVishal Verma if (ret) 10135212e11fSVishal Verma goto out_rtt; 10145212e11fSVishal Verma 10155212e11fSVishal Verma if (postmap == new_map) 10165212e11fSVishal Verma break; 10175212e11fSVishal Verma 10185212e11fSVishal Verma postmap = new_map; 10195212e11fSVishal Verma } 10205212e11fSVishal Verma 10215212e11fSVishal Verma ret = btt_data_read(arena, page, off, postmap, cur_len); 10225212e11fSVishal Verma if (ret) 10235212e11fSVishal Verma goto out_rtt; 10245212e11fSVishal Verma 102541cd8b70SVishal Verma if (bip) { 102641cd8b70SVishal Verma ret = btt_rw_integrity(btt, bip, arena, postmap, READ); 102741cd8b70SVishal Verma if (ret) 102841cd8b70SVishal Verma goto out_rtt; 102941cd8b70SVishal Verma } 103041cd8b70SVishal Verma 10315212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 10325212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 10335212e11fSVishal Verma 10345212e11fSVishal Verma len -= cur_len; 10355212e11fSVishal Verma off += cur_len; 10365212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 10375212e11fSVishal Verma } 10385212e11fSVishal Verma 10395212e11fSVishal Verma return 0; 10405212e11fSVishal Verma 10415212e11fSVishal Verma out_rtt: 10425212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 10435212e11fSVishal Verma out_lane: 10445212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 10455212e11fSVishal Verma return ret; 10465212e11fSVishal Verma } 10475212e11fSVishal Verma 104841cd8b70SVishal Verma static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, 104941cd8b70SVishal Verma sector_t sector, struct page *page, unsigned int off, 105041cd8b70SVishal Verma unsigned int len) 10515212e11fSVishal Verma { 10525212e11fSVishal Verma int ret = 0; 10535212e11fSVishal Verma struct arena_info *arena = NULL; 10545212e11fSVishal Verma u32 premap = 0, old_postmap, new_postmap, lane = 0, i; 10555212e11fSVishal Verma struct log_entry log; 10565212e11fSVishal Verma int sub; 10575212e11fSVishal Verma 10585212e11fSVishal Verma while (len) { 10595212e11fSVishal Verma u32 cur_len; 10605212e11fSVishal Verma 10615212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 10625212e11fSVishal Verma 10635212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 10645212e11fSVishal Verma if (ret) 10655212e11fSVishal Verma goto out_lane; 10665212e11fSVishal Verma cur_len = min(btt->sector_size, len); 10675212e11fSVishal Verma 10685212e11fSVishal Verma if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { 10695212e11fSVishal Verma ret = -EIO; 10705212e11fSVishal Verma goto out_lane; 10715212e11fSVishal Verma } 10725212e11fSVishal Verma 10735212e11fSVishal Verma new_postmap = arena->freelist[lane].block; 10745212e11fSVishal Verma 10755212e11fSVishal Verma /* Wait if the new block is being read from */ 10765212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 10775212e11fSVishal Verma while (arena->rtt[i] == (RTT_VALID | new_postmap)) 10785212e11fSVishal Verma cpu_relax(); 10795212e11fSVishal Verma 10805212e11fSVishal Verma 10815212e11fSVishal Verma if (new_postmap >= arena->internal_nlba) { 10825212e11fSVishal Verma ret = -EIO; 10835212e11fSVishal Verma goto out_lane; 108441cd8b70SVishal Verma } 108541cd8b70SVishal Verma 108641cd8b70SVishal Verma ret = btt_data_write(arena, new_postmap, page, off, cur_len); 10875212e11fSVishal Verma if (ret) 10885212e11fSVishal Verma goto out_lane; 10895212e11fSVishal Verma 109041cd8b70SVishal Verma if (bip) { 109141cd8b70SVishal Verma ret = btt_rw_integrity(btt, bip, arena, new_postmap, 109241cd8b70SVishal Verma WRITE); 109341cd8b70SVishal Verma if (ret) 109441cd8b70SVishal Verma goto out_lane; 109541cd8b70SVishal Verma } 109641cd8b70SVishal Verma 10975212e11fSVishal Verma lock_map(arena, premap); 10985212e11fSVishal Verma ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL); 10995212e11fSVishal Verma if (ret) 11005212e11fSVishal Verma goto out_map; 11015212e11fSVishal Verma if (old_postmap >= arena->internal_nlba) { 11025212e11fSVishal Verma ret = -EIO; 11035212e11fSVishal Verma goto out_map; 11045212e11fSVishal Verma } 11055212e11fSVishal Verma 11065212e11fSVishal Verma log.lba = cpu_to_le32(premap); 11075212e11fSVishal Verma log.old_map = cpu_to_le32(old_postmap); 11085212e11fSVishal Verma log.new_map = cpu_to_le32(new_postmap); 11095212e11fSVishal Verma log.seq = cpu_to_le32(arena->freelist[lane].seq); 11105212e11fSVishal Verma sub = arena->freelist[lane].sub; 11115212e11fSVishal Verma ret = btt_flog_write(arena, lane, sub, &log); 11125212e11fSVishal Verma if (ret) 11135212e11fSVishal Verma goto out_map; 11145212e11fSVishal Verma 11155212e11fSVishal Verma ret = btt_map_write(arena, premap, new_postmap, 0, 0); 11165212e11fSVishal Verma if (ret) 11175212e11fSVishal Verma goto out_map; 11185212e11fSVishal Verma 11195212e11fSVishal Verma unlock_map(arena, premap); 11205212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 11215212e11fSVishal Verma 11225212e11fSVishal Verma len -= cur_len; 11235212e11fSVishal Verma off += cur_len; 11245212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 11255212e11fSVishal Verma } 11265212e11fSVishal Verma 11275212e11fSVishal Verma return 0; 11285212e11fSVishal Verma 11295212e11fSVishal Verma out_map: 11305212e11fSVishal Verma unlock_map(arena, premap); 11315212e11fSVishal Verma out_lane: 11325212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 11335212e11fSVishal Verma return ret; 11345212e11fSVishal Verma } 11355212e11fSVishal Verma 113641cd8b70SVishal Verma static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, 113741cd8b70SVishal Verma struct page *page, unsigned int len, unsigned int off, 113841cd8b70SVishal Verma int rw, sector_t sector) 11395212e11fSVishal Verma { 11405212e11fSVishal Verma int ret; 11415212e11fSVishal Verma 11425212e11fSVishal Verma if (rw == READ) { 114341cd8b70SVishal Verma ret = btt_read_pg(btt, bip, page, off, sector, len); 11445212e11fSVishal Verma flush_dcache_page(page); 11455212e11fSVishal Verma } else { 11465212e11fSVishal Verma flush_dcache_page(page); 114741cd8b70SVishal Verma ret = btt_write_pg(btt, bip, sector, page, off, len); 11485212e11fSVishal Verma } 11495212e11fSVishal Verma 11505212e11fSVishal Verma return ret; 11515212e11fSVishal Verma } 11525212e11fSVishal Verma 11535212e11fSVishal Verma static void btt_make_request(struct request_queue *q, struct bio *bio) 11545212e11fSVishal Verma { 115541cd8b70SVishal Verma struct bio_integrity_payload *bip = bio_integrity(bio); 11565212e11fSVishal Verma struct btt *btt = q->queuedata; 11575212e11fSVishal Verma struct bvec_iter iter; 1158f0dc089cSDan Williams unsigned long start; 11595212e11fSVishal Verma struct bio_vec bvec; 11605212e11fSVishal Verma int err = 0, rw; 1161f0dc089cSDan Williams bool do_acct; 11625212e11fSVishal Verma 116341cd8b70SVishal Verma /* 116441cd8b70SVishal Verma * bio_integrity_enabled also checks if the bio already has an 116541cd8b70SVishal Verma * integrity payload attached. If it does, we *don't* do a 116641cd8b70SVishal Verma * bio_integrity_prep here - the payload has been generated by 116741cd8b70SVishal Verma * another kernel subsystem, and we just pass it through. 116841cd8b70SVishal Verma */ 116941cd8b70SVishal Verma if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 117041cd8b70SVishal Verma err = -EIO; 117141cd8b70SVishal Verma goto out; 117241cd8b70SVishal Verma } 117341cd8b70SVishal Verma 1174f0dc089cSDan Williams do_acct = nd_iostat_start(bio, &start); 11755212e11fSVishal Verma rw = bio_data_dir(bio); 11765212e11fSVishal Verma bio_for_each_segment(bvec, bio, iter) { 11775212e11fSVishal Verma unsigned int len = bvec.bv_len; 11785212e11fSVishal Verma 11795212e11fSVishal Verma BUG_ON(len > PAGE_SIZE); 11805212e11fSVishal Verma /* Make sure len is in multiples of sector size. */ 11815212e11fSVishal Verma /* XXX is this right? */ 11825212e11fSVishal Verma BUG_ON(len < btt->sector_size); 11835212e11fSVishal Verma BUG_ON(len % btt->sector_size); 11845212e11fSVishal Verma 118541cd8b70SVishal Verma err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, 11865212e11fSVishal Verma rw, iter.bi_sector); 11875212e11fSVishal Verma if (err) { 11885212e11fSVishal Verma dev_info(&btt->nd_btt->dev, 11895212e11fSVishal Verma "io error in %s sector %lld, len %d,\n", 11905212e11fSVishal Verma (rw == READ) ? "READ" : "WRITE", 11915212e11fSVishal Verma (unsigned long long) iter.bi_sector, len); 1192f0dc089cSDan Williams break; 11935212e11fSVishal Verma } 11945212e11fSVishal Verma } 1195f0dc089cSDan Williams if (do_acct) 1196f0dc089cSDan Williams nd_iostat_end(bio, start); 11975212e11fSVishal Verma 11985212e11fSVishal Verma out: 11995212e11fSVishal Verma bio_endio(bio, err); 12005212e11fSVishal Verma } 12015212e11fSVishal Verma 12025212e11fSVishal Verma static int btt_rw_page(struct block_device *bdev, sector_t sector, 12035212e11fSVishal Verma struct page *page, int rw) 12045212e11fSVishal Verma { 12055212e11fSVishal Verma struct btt *btt = bdev->bd_disk->private_data; 12065212e11fSVishal Verma 120741cd8b70SVishal Verma btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector); 12085212e11fSVishal Verma page_endio(page, rw & WRITE, 0); 12095212e11fSVishal Verma return 0; 12105212e11fSVishal Verma } 12115212e11fSVishal Verma 12125212e11fSVishal Verma 12135212e11fSVishal Verma static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) 12145212e11fSVishal Verma { 12155212e11fSVishal Verma /* some standard values */ 12165212e11fSVishal Verma geo->heads = 1 << 6; 12175212e11fSVishal Verma geo->sectors = 1 << 5; 12185212e11fSVishal Verma geo->cylinders = get_capacity(bd->bd_disk) >> 11; 12195212e11fSVishal Verma return 0; 12205212e11fSVishal Verma } 12215212e11fSVishal Verma 12225212e11fSVishal Verma static const struct block_device_operations btt_fops = { 12235212e11fSVishal Verma .owner = THIS_MODULE, 12245212e11fSVishal Verma .rw_page = btt_rw_page, 12255212e11fSVishal Verma .getgeo = btt_getgeo, 122658138820SDan Williams .revalidate_disk = nvdimm_revalidate_disk, 12275212e11fSVishal Verma }; 12285212e11fSVishal Verma 12295212e11fSVishal Verma static int btt_blk_init(struct btt *btt) 12305212e11fSVishal Verma { 12315212e11fSVishal Verma struct nd_btt *nd_btt = btt->nd_btt; 12325212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 12335212e11fSVishal Verma 12345212e11fSVishal Verma /* create a new disk and request queue for btt */ 12355212e11fSVishal Verma btt->btt_queue = blk_alloc_queue(GFP_KERNEL); 12365212e11fSVishal Verma if (!btt->btt_queue) 12375212e11fSVishal Verma return -ENOMEM; 12385212e11fSVishal Verma 12395212e11fSVishal Verma btt->btt_disk = alloc_disk(0); 12405212e11fSVishal Verma if (!btt->btt_disk) { 12415212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 12425212e11fSVishal Verma return -ENOMEM; 12435212e11fSVishal Verma } 12445212e11fSVishal Verma 12455212e11fSVishal Verma nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); 12465212e11fSVishal Verma btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; 12475212e11fSVishal Verma btt->btt_disk->major = btt_major; 12485212e11fSVishal Verma btt->btt_disk->first_minor = 0; 12495212e11fSVishal Verma btt->btt_disk->fops = &btt_fops; 12505212e11fSVishal Verma btt->btt_disk->private_data = btt; 12515212e11fSVishal Verma btt->btt_disk->queue = btt->btt_queue; 12525212e11fSVishal Verma btt->btt_disk->flags = GENHD_FL_EXT_DEVT; 12535212e11fSVishal Verma 12545212e11fSVishal Verma blk_queue_make_request(btt->btt_queue, btt_make_request); 12555212e11fSVishal Verma blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 12565212e11fSVishal Verma blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 12575212e11fSVishal Verma blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY); 12585212e11fSVishal Verma queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 12595212e11fSVishal Verma btt->btt_queue->queuedata = btt; 12605212e11fSVishal Verma 126141cd8b70SVishal Verma set_capacity(btt->btt_disk, 0); 12625212e11fSVishal Verma add_disk(btt->btt_disk); 126341cd8b70SVishal Verma if (btt_meta_size(btt)) { 126441cd8b70SVishal Verma int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); 126541cd8b70SVishal Verma 126641cd8b70SVishal Verma if (rc) { 126741cd8b70SVishal Verma del_gendisk(btt->btt_disk); 126841cd8b70SVishal Verma put_disk(btt->btt_disk); 126941cd8b70SVishal Verma blk_cleanup_queue(btt->btt_queue); 127041cd8b70SVishal Verma return rc; 127141cd8b70SVishal Verma } 127241cd8b70SVishal Verma } 127341cd8b70SVishal Verma set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); 127458138820SDan Williams revalidate_disk(btt->btt_disk); 12755212e11fSVishal Verma 12765212e11fSVishal Verma return 0; 12775212e11fSVishal Verma } 12785212e11fSVishal Verma 12795212e11fSVishal Verma static void btt_blk_cleanup(struct btt *btt) 12805212e11fSVishal Verma { 128141cd8b70SVishal Verma blk_integrity_unregister(btt->btt_disk); 12825212e11fSVishal Verma del_gendisk(btt->btt_disk); 12835212e11fSVishal Verma put_disk(btt->btt_disk); 12845212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 12855212e11fSVishal Verma } 12865212e11fSVishal Verma 12875212e11fSVishal Verma /** 12885212e11fSVishal Verma * btt_init - initialize a block translation table for the given device 12895212e11fSVishal Verma * @nd_btt: device with BTT geometry and backing device info 12905212e11fSVishal Verma * @rawsize: raw size in bytes of the backing device 12915212e11fSVishal Verma * @lbasize: lba size of the backing device 12925212e11fSVishal Verma * @uuid: A uuid for the backing device - this is stored on media 12935212e11fSVishal Verma * @maxlane: maximum number of parallel requests the device can handle 12945212e11fSVishal Verma * 12955212e11fSVishal Verma * Initialize a Block Translation Table on a backing device to provide 12965212e11fSVishal Verma * single sector power fail atomicity. 12975212e11fSVishal Verma * 12985212e11fSVishal Verma * Context: 12995212e11fSVishal Verma * Might sleep. 13005212e11fSVishal Verma * 13015212e11fSVishal Verma * Returns: 13025212e11fSVishal Verma * Pointer to a new struct btt on success, NULL on failure. 13035212e11fSVishal Verma */ 13045212e11fSVishal Verma static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, 13055212e11fSVishal Verma u32 lbasize, u8 *uuid, struct nd_region *nd_region) 13065212e11fSVishal Verma { 13075212e11fSVishal Verma int ret; 13085212e11fSVishal Verma struct btt *btt; 13095212e11fSVishal Verma struct device *dev = &nd_btt->dev; 13105212e11fSVishal Verma 13115212e11fSVishal Verma btt = kzalloc(sizeof(struct btt), GFP_KERNEL); 13125212e11fSVishal Verma if (!btt) 13135212e11fSVishal Verma return NULL; 13145212e11fSVishal Verma 13155212e11fSVishal Verma btt->nd_btt = nd_btt; 13165212e11fSVishal Verma btt->rawsize = rawsize; 13175212e11fSVishal Verma btt->lbasize = lbasize; 13185212e11fSVishal Verma btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); 13195212e11fSVishal Verma INIT_LIST_HEAD(&btt->arena_list); 13205212e11fSVishal Verma mutex_init(&btt->init_lock); 13215212e11fSVishal Verma btt->nd_region = nd_region; 13225212e11fSVishal Verma 13235212e11fSVishal Verma ret = discover_arenas(btt); 13245212e11fSVishal Verma if (ret) { 13255212e11fSVishal Verma dev_err(dev, "init: error in arena_discover: %d\n", ret); 13265212e11fSVishal Verma goto out_free; 13275212e11fSVishal Verma } 13285212e11fSVishal Verma 132958138820SDan Williams if (btt->init_state != INIT_READY && nd_region->ro) { 133058138820SDan Williams dev_info(dev, "%s is read-only, unable to init btt metadata\n", 133158138820SDan Williams dev_name(&nd_region->dev)); 133258138820SDan Williams goto out_free; 133358138820SDan Williams } else if (btt->init_state != INIT_READY) { 13345212e11fSVishal Verma btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 13355212e11fSVishal Verma ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 13365212e11fSVishal Verma dev_dbg(dev, "init: %d arenas for %llu rawsize\n", 13375212e11fSVishal Verma btt->num_arenas, rawsize); 13385212e11fSVishal Verma 13395212e11fSVishal Verma ret = create_arenas(btt); 13405212e11fSVishal Verma if (ret) { 13415212e11fSVishal Verma dev_info(dev, "init: create_arenas: %d\n", ret); 13425212e11fSVishal Verma goto out_free; 13435212e11fSVishal Verma } 13445212e11fSVishal Verma 13455212e11fSVishal Verma ret = btt_meta_init(btt); 13465212e11fSVishal Verma if (ret) { 13475212e11fSVishal Verma dev_err(dev, "init: error in meta_init: %d\n", ret); 134858138820SDan Williams goto out_free; 13495212e11fSVishal Verma } 13505212e11fSVishal Verma } 13515212e11fSVishal Verma 13525212e11fSVishal Verma ret = btt_blk_init(btt); 13535212e11fSVishal Verma if (ret) { 13545212e11fSVishal Verma dev_err(dev, "init: error in blk_init: %d\n", ret); 13555212e11fSVishal Verma goto out_free; 13565212e11fSVishal Verma } 13575212e11fSVishal Verma 13585212e11fSVishal Verma btt_debugfs_init(btt); 13595212e11fSVishal Verma 13605212e11fSVishal Verma return btt; 13615212e11fSVishal Verma 13625212e11fSVishal Verma out_free: 13635212e11fSVishal Verma kfree(btt); 13645212e11fSVishal Verma return NULL; 13655212e11fSVishal Verma } 13665212e11fSVishal Verma 13675212e11fSVishal Verma /** 13685212e11fSVishal Verma * btt_fini - de-initialize a BTT 13695212e11fSVishal Verma * @btt: the BTT handle that was generated by btt_init 13705212e11fSVishal Verma * 13715212e11fSVishal Verma * De-initialize a Block Translation Table on device removal 13725212e11fSVishal Verma * 13735212e11fSVishal Verma * Context: 13745212e11fSVishal Verma * Might sleep. 13755212e11fSVishal Verma */ 13765212e11fSVishal Verma static void btt_fini(struct btt *btt) 13775212e11fSVishal Verma { 13785212e11fSVishal Verma if (btt) { 13795212e11fSVishal Verma btt_blk_cleanup(btt); 13805212e11fSVishal Verma free_arenas(btt); 13815212e11fSVishal Verma debugfs_remove_recursive(btt->debugfs_dir); 13825212e11fSVishal Verma kfree(btt); 13835212e11fSVishal Verma } 13845212e11fSVishal Verma } 13855212e11fSVishal Verma 13865212e11fSVishal Verma int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) 13875212e11fSVishal Verma { 13885212e11fSVishal Verma struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 13895212e11fSVishal Verma struct nd_region *nd_region; 13905212e11fSVishal Verma struct btt *btt; 13915212e11fSVishal Verma size_t rawsize; 13925212e11fSVishal Verma 13935212e11fSVishal Verma if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) 13945212e11fSVishal Verma return -ENODEV; 13955212e11fSVishal Verma 13965212e11fSVishal Verma rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; 13975212e11fSVishal Verma if (rawsize < ARENA_MIN_SIZE) { 13985212e11fSVishal Verma return -ENXIO; 13995212e11fSVishal Verma } 14005212e11fSVishal Verma nd_region = to_nd_region(nd_btt->dev.parent); 14015212e11fSVishal Verma btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, 14025212e11fSVishal Verma nd_region); 14035212e11fSVishal Verma if (!btt) 14045212e11fSVishal Verma return -ENOMEM; 14055212e11fSVishal Verma nd_btt->btt = btt; 14065212e11fSVishal Verma 14075212e11fSVishal Verma return 0; 14085212e11fSVishal Verma } 14095212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 14105212e11fSVishal Verma 14115212e11fSVishal Verma int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) 14125212e11fSVishal Verma { 14135212e11fSVishal Verma struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 14145212e11fSVishal Verma struct btt *btt = nd_btt->btt; 14155212e11fSVishal Verma 14165212e11fSVishal Verma btt_fini(btt); 14175212e11fSVishal Verma nd_btt->btt = NULL; 14185212e11fSVishal Verma 14195212e11fSVishal Verma return 0; 14205212e11fSVishal Verma } 14215212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_detach_btt); 14225212e11fSVishal Verma 14235212e11fSVishal Verma static int __init nd_btt_init(void) 14245212e11fSVishal Verma { 14255212e11fSVishal Verma int rc; 14265212e11fSVishal Verma 14275212e11fSVishal Verma btt_major = register_blkdev(0, "btt"); 14285212e11fSVishal Verma if (btt_major < 0) 14295212e11fSVishal Verma return btt_major; 14305212e11fSVishal Verma 14315212e11fSVishal Verma debugfs_root = debugfs_create_dir("btt", NULL); 14325212e11fSVishal Verma if (IS_ERR_OR_NULL(debugfs_root)) { 14335212e11fSVishal Verma rc = -ENXIO; 14345212e11fSVishal Verma goto err_debugfs; 14355212e11fSVishal Verma } 14365212e11fSVishal Verma 14375212e11fSVishal Verma return 0; 14385212e11fSVishal Verma 14395212e11fSVishal Verma err_debugfs: 14405212e11fSVishal Verma unregister_blkdev(btt_major, "btt"); 14415212e11fSVishal Verma 14425212e11fSVishal Verma return rc; 14435212e11fSVishal Verma } 14445212e11fSVishal Verma 14455212e11fSVishal Verma static void __exit nd_btt_exit(void) 14465212e11fSVishal Verma { 14475212e11fSVishal Verma debugfs_remove_recursive(debugfs_root); 14485212e11fSVishal Verma unregister_blkdev(btt_major, "btt"); 14495212e11fSVishal Verma } 14505212e11fSVishal Verma 14515212e11fSVishal Verma MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); 14525212e11fSVishal Verma MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>"); 14535212e11fSVishal Verma MODULE_LICENSE("GPL v2"); 14545212e11fSVishal Verma module_init(nd_btt_init); 14555212e11fSVishal Verma module_exit(nd_btt_exit); 1456