1*5212e11fSVishal Verma /* 2*5212e11fSVishal Verma * Block Translation Table 3*5212e11fSVishal Verma * Copyright (c) 2014-2015, Intel Corporation. 4*5212e11fSVishal Verma * 5*5212e11fSVishal Verma * This program is free software; you can redistribute it and/or modify it 6*5212e11fSVishal Verma * under the terms and conditions of the GNU General Public License, 7*5212e11fSVishal Verma * version 2, as published by the Free Software Foundation. 8*5212e11fSVishal Verma * 9*5212e11fSVishal Verma * This program is distributed in the hope it will be useful, but WITHOUT 10*5212e11fSVishal Verma * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11*5212e11fSVishal Verma * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12*5212e11fSVishal Verma * more details. 13*5212e11fSVishal Verma */ 14*5212e11fSVishal Verma #include <linux/highmem.h> 15*5212e11fSVishal Verma #include <linux/debugfs.h> 16*5212e11fSVishal Verma #include <linux/blkdev.h> 17*5212e11fSVishal Verma #include <linux/module.h> 18*5212e11fSVishal Verma #include <linux/device.h> 19*5212e11fSVishal Verma #include <linux/mutex.h> 20*5212e11fSVishal Verma #include <linux/hdreg.h> 21*5212e11fSVishal Verma #include <linux/genhd.h> 22*5212e11fSVishal Verma #include <linux/sizes.h> 23*5212e11fSVishal Verma #include <linux/ndctl.h> 24*5212e11fSVishal Verma #include <linux/fs.h> 25*5212e11fSVishal Verma #include <linux/nd.h> 26*5212e11fSVishal Verma #include "btt.h" 27*5212e11fSVishal Verma #include "nd.h" 28*5212e11fSVishal Verma 29*5212e11fSVishal Verma enum log_ent_request { 30*5212e11fSVishal Verma LOG_NEW_ENT = 0, 31*5212e11fSVishal Verma LOG_OLD_ENT 32*5212e11fSVishal Verma }; 33*5212e11fSVishal Verma 34*5212e11fSVishal Verma static int btt_major; 35*5212e11fSVishal Verma 36*5212e11fSVishal Verma static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, 37*5212e11fSVishal Verma void *buf, size_t n) 38*5212e11fSVishal Verma { 39*5212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 40*5212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 41*5212e11fSVishal Verma 42*5212e11fSVishal Verma /* arena offsets are 4K from the base of the device */ 43*5212e11fSVishal Verma offset += SZ_4K; 44*5212e11fSVishal Verma return nvdimm_read_bytes(ndns, offset, buf, n); 45*5212e11fSVishal Verma } 46*5212e11fSVishal Verma 47*5212e11fSVishal Verma static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, 48*5212e11fSVishal Verma void *buf, size_t n) 49*5212e11fSVishal Verma { 50*5212e11fSVishal Verma struct nd_btt *nd_btt = arena->nd_btt; 51*5212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 52*5212e11fSVishal Verma 53*5212e11fSVishal Verma /* arena offsets are 4K from the base of the device */ 54*5212e11fSVishal Verma offset += SZ_4K; 55*5212e11fSVishal Verma return nvdimm_write_bytes(ndns, offset, buf, n); 56*5212e11fSVishal Verma } 57*5212e11fSVishal Verma 58*5212e11fSVishal Verma static int btt_info_write(struct arena_info *arena, struct btt_sb *super) 59*5212e11fSVishal Verma { 60*5212e11fSVishal Verma int ret; 61*5212e11fSVishal Verma 62*5212e11fSVishal Verma ret = arena_write_bytes(arena, arena->info2off, super, 63*5212e11fSVishal Verma sizeof(struct btt_sb)); 64*5212e11fSVishal Verma if (ret) 65*5212e11fSVishal Verma return ret; 66*5212e11fSVishal Verma 67*5212e11fSVishal Verma return arena_write_bytes(arena, arena->infooff, super, 68*5212e11fSVishal Verma sizeof(struct btt_sb)); 69*5212e11fSVishal Verma } 70*5212e11fSVishal Verma 71*5212e11fSVishal Verma static int btt_info_read(struct arena_info *arena, struct btt_sb *super) 72*5212e11fSVishal Verma { 73*5212e11fSVishal Verma WARN_ON(!super); 74*5212e11fSVishal Verma return arena_read_bytes(arena, arena->infooff, super, 75*5212e11fSVishal Verma sizeof(struct btt_sb)); 76*5212e11fSVishal Verma } 77*5212e11fSVishal Verma 78*5212e11fSVishal Verma /* 79*5212e11fSVishal Verma * 'raw' version of btt_map write 80*5212e11fSVishal Verma * Assumptions: 81*5212e11fSVishal Verma * mapping is in little-endian 82*5212e11fSVishal Verma * mapping contains 'E' and 'Z' flags as desired 83*5212e11fSVishal Verma */ 84*5212e11fSVishal Verma static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping) 85*5212e11fSVishal Verma { 86*5212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 87*5212e11fSVishal Verma 88*5212e11fSVishal Verma WARN_ON(lba >= arena->external_nlba); 89*5212e11fSVishal Verma return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE); 90*5212e11fSVishal Verma } 91*5212e11fSVishal Verma 92*5212e11fSVishal Verma static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, 93*5212e11fSVishal Verma u32 z_flag, u32 e_flag) 94*5212e11fSVishal Verma { 95*5212e11fSVishal Verma u32 ze; 96*5212e11fSVishal Verma __le32 mapping_le; 97*5212e11fSVishal Verma 98*5212e11fSVishal Verma /* 99*5212e11fSVishal Verma * This 'mapping' is supposed to be just the LBA mapping, without 100*5212e11fSVishal Verma * any flags set, so strip the flag bits. 101*5212e11fSVishal Verma */ 102*5212e11fSVishal Verma mapping &= MAP_LBA_MASK; 103*5212e11fSVishal Verma 104*5212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 105*5212e11fSVishal Verma switch (ze) { 106*5212e11fSVishal Verma case 0: 107*5212e11fSVishal Verma /* 108*5212e11fSVishal Verma * We want to set neither of the Z or E flags, and 109*5212e11fSVishal Verma * in the actual layout, this means setting the bit 110*5212e11fSVishal Verma * positions of both to '1' to indicate a 'normal' 111*5212e11fSVishal Verma * map entry 112*5212e11fSVishal Verma */ 113*5212e11fSVishal Verma mapping |= MAP_ENT_NORMAL; 114*5212e11fSVishal Verma break; 115*5212e11fSVishal Verma case 1: 116*5212e11fSVishal Verma mapping |= (1 << MAP_ERR_SHIFT); 117*5212e11fSVishal Verma break; 118*5212e11fSVishal Verma case 2: 119*5212e11fSVishal Verma mapping |= (1 << MAP_TRIM_SHIFT); 120*5212e11fSVishal Verma break; 121*5212e11fSVishal Verma default: 122*5212e11fSVishal Verma /* 123*5212e11fSVishal Verma * The case where Z and E are both sent in as '1' could be 124*5212e11fSVishal Verma * construed as a valid 'normal' case, but we decide not to, 125*5212e11fSVishal Verma * to avoid confusion 126*5212e11fSVishal Verma */ 127*5212e11fSVishal Verma WARN_ONCE(1, "Invalid use of Z and E flags\n"); 128*5212e11fSVishal Verma return -EIO; 129*5212e11fSVishal Verma } 130*5212e11fSVishal Verma 131*5212e11fSVishal Verma mapping_le = cpu_to_le32(mapping); 132*5212e11fSVishal Verma return __btt_map_write(arena, lba, mapping_le); 133*5212e11fSVishal Verma } 134*5212e11fSVishal Verma 135*5212e11fSVishal Verma static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, 136*5212e11fSVishal Verma int *trim, int *error) 137*5212e11fSVishal Verma { 138*5212e11fSVishal Verma int ret; 139*5212e11fSVishal Verma __le32 in; 140*5212e11fSVishal Verma u32 raw_mapping, postmap, ze, z_flag, e_flag; 141*5212e11fSVishal Verma u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); 142*5212e11fSVishal Verma 143*5212e11fSVishal Verma WARN_ON(lba >= arena->external_nlba); 144*5212e11fSVishal Verma 145*5212e11fSVishal Verma ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE); 146*5212e11fSVishal Verma if (ret) 147*5212e11fSVishal Verma return ret; 148*5212e11fSVishal Verma 149*5212e11fSVishal Verma raw_mapping = le32_to_cpu(in); 150*5212e11fSVishal Verma 151*5212e11fSVishal Verma z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; 152*5212e11fSVishal Verma e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; 153*5212e11fSVishal Verma ze = (z_flag << 1) + e_flag; 154*5212e11fSVishal Verma postmap = raw_mapping & MAP_LBA_MASK; 155*5212e11fSVishal Verma 156*5212e11fSVishal Verma /* Reuse the {z,e}_flag variables for *trim and *error */ 157*5212e11fSVishal Verma z_flag = 0; 158*5212e11fSVishal Verma e_flag = 0; 159*5212e11fSVishal Verma 160*5212e11fSVishal Verma switch (ze) { 161*5212e11fSVishal Verma case 0: 162*5212e11fSVishal Verma /* Initial state. Return postmap = premap */ 163*5212e11fSVishal Verma *mapping = lba; 164*5212e11fSVishal Verma break; 165*5212e11fSVishal Verma case 1: 166*5212e11fSVishal Verma *mapping = postmap; 167*5212e11fSVishal Verma e_flag = 1; 168*5212e11fSVishal Verma break; 169*5212e11fSVishal Verma case 2: 170*5212e11fSVishal Verma *mapping = postmap; 171*5212e11fSVishal Verma z_flag = 1; 172*5212e11fSVishal Verma break; 173*5212e11fSVishal Verma case 3: 174*5212e11fSVishal Verma *mapping = postmap; 175*5212e11fSVishal Verma break; 176*5212e11fSVishal Verma default: 177*5212e11fSVishal Verma return -EIO; 178*5212e11fSVishal Verma } 179*5212e11fSVishal Verma 180*5212e11fSVishal Verma if (trim) 181*5212e11fSVishal Verma *trim = z_flag; 182*5212e11fSVishal Verma if (error) 183*5212e11fSVishal Verma *error = e_flag; 184*5212e11fSVishal Verma 185*5212e11fSVishal Verma return ret; 186*5212e11fSVishal Verma } 187*5212e11fSVishal Verma 188*5212e11fSVishal Verma static int btt_log_read_pair(struct arena_info *arena, u32 lane, 189*5212e11fSVishal Verma struct log_entry *ent) 190*5212e11fSVishal Verma { 191*5212e11fSVishal Verma WARN_ON(!ent); 192*5212e11fSVishal Verma return arena_read_bytes(arena, 193*5212e11fSVishal Verma arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 194*5212e11fSVishal Verma 2 * LOG_ENT_SIZE); 195*5212e11fSVishal Verma } 196*5212e11fSVishal Verma 197*5212e11fSVishal Verma static struct dentry *debugfs_root; 198*5212e11fSVishal Verma 199*5212e11fSVishal Verma static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, 200*5212e11fSVishal Verma int idx) 201*5212e11fSVishal Verma { 202*5212e11fSVishal Verma char dirname[32]; 203*5212e11fSVishal Verma struct dentry *d; 204*5212e11fSVishal Verma 205*5212e11fSVishal Verma /* If for some reason, parent bttN was not created, exit */ 206*5212e11fSVishal Verma if (!parent) 207*5212e11fSVishal Verma return; 208*5212e11fSVishal Verma 209*5212e11fSVishal Verma snprintf(dirname, 32, "arena%d", idx); 210*5212e11fSVishal Verma d = debugfs_create_dir(dirname, parent); 211*5212e11fSVishal Verma if (IS_ERR_OR_NULL(d)) 212*5212e11fSVishal Verma return; 213*5212e11fSVishal Verma a->debugfs_dir = d; 214*5212e11fSVishal Verma 215*5212e11fSVishal Verma debugfs_create_x64("size", S_IRUGO, d, &a->size); 216*5212e11fSVishal Verma debugfs_create_x64("external_lba_start", S_IRUGO, d, 217*5212e11fSVishal Verma &a->external_lba_start); 218*5212e11fSVishal Verma debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); 219*5212e11fSVishal Verma debugfs_create_u32("internal_lbasize", S_IRUGO, d, 220*5212e11fSVishal Verma &a->internal_lbasize); 221*5212e11fSVishal Verma debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); 222*5212e11fSVishal Verma debugfs_create_u32("external_lbasize", S_IRUGO, d, 223*5212e11fSVishal Verma &a->external_lbasize); 224*5212e11fSVishal Verma debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); 225*5212e11fSVishal Verma debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); 226*5212e11fSVishal Verma debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); 227*5212e11fSVishal Verma debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); 228*5212e11fSVishal Verma debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); 229*5212e11fSVishal Verma debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); 230*5212e11fSVishal Verma debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); 231*5212e11fSVishal Verma debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 232*5212e11fSVishal Verma debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 233*5212e11fSVishal Verma debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 234*5212e11fSVishal Verma } 235*5212e11fSVishal Verma 236*5212e11fSVishal Verma static void btt_debugfs_init(struct btt *btt) 237*5212e11fSVishal Verma { 238*5212e11fSVishal Verma int i = 0; 239*5212e11fSVishal Verma struct arena_info *arena; 240*5212e11fSVishal Verma 241*5212e11fSVishal Verma btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), 242*5212e11fSVishal Verma debugfs_root); 243*5212e11fSVishal Verma if (IS_ERR_OR_NULL(btt->debugfs_dir)) 244*5212e11fSVishal Verma return; 245*5212e11fSVishal Verma 246*5212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 247*5212e11fSVishal Verma arena_debugfs_init(arena, btt->debugfs_dir, i); 248*5212e11fSVishal Verma i++; 249*5212e11fSVishal Verma } 250*5212e11fSVishal Verma } 251*5212e11fSVishal Verma 252*5212e11fSVishal Verma /* 253*5212e11fSVishal Verma * This function accepts two log entries, and uses the 254*5212e11fSVishal Verma * sequence number to find the 'older' entry. 255*5212e11fSVishal Verma * It also updates the sequence number in this old entry to 256*5212e11fSVishal Verma * make it the 'new' one if the mark_flag is set. 257*5212e11fSVishal Verma * Finally, it returns which of the entries was the older one. 258*5212e11fSVishal Verma * 259*5212e11fSVishal Verma * TODO The logic feels a bit kludge-y. make it better.. 260*5212e11fSVishal Verma */ 261*5212e11fSVishal Verma static int btt_log_get_old(struct log_entry *ent) 262*5212e11fSVishal Verma { 263*5212e11fSVishal Verma int old; 264*5212e11fSVishal Verma 265*5212e11fSVishal Verma /* 266*5212e11fSVishal Verma * the first ever time this is seen, the entry goes into [0] 267*5212e11fSVishal Verma * the next time, the following logic works out to put this 268*5212e11fSVishal Verma * (next) entry into [1] 269*5212e11fSVishal Verma */ 270*5212e11fSVishal Verma if (ent[0].seq == 0) { 271*5212e11fSVishal Verma ent[0].seq = cpu_to_le32(1); 272*5212e11fSVishal Verma return 0; 273*5212e11fSVishal Verma } 274*5212e11fSVishal Verma 275*5212e11fSVishal Verma if (ent[0].seq == ent[1].seq) 276*5212e11fSVishal Verma return -EINVAL; 277*5212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 278*5212e11fSVishal Verma return -EINVAL; 279*5212e11fSVishal Verma 280*5212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 281*5212e11fSVishal Verma if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 282*5212e11fSVishal Verma old = 0; 283*5212e11fSVishal Verma else 284*5212e11fSVishal Verma old = 1; 285*5212e11fSVishal Verma } else { 286*5212e11fSVishal Verma if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 287*5212e11fSVishal Verma old = 1; 288*5212e11fSVishal Verma else 289*5212e11fSVishal Verma old = 0; 290*5212e11fSVishal Verma } 291*5212e11fSVishal Verma 292*5212e11fSVishal Verma return old; 293*5212e11fSVishal Verma } 294*5212e11fSVishal Verma 295*5212e11fSVishal Verma static struct device *to_dev(struct arena_info *arena) 296*5212e11fSVishal Verma { 297*5212e11fSVishal Verma return &arena->nd_btt->dev; 298*5212e11fSVishal Verma } 299*5212e11fSVishal Verma 300*5212e11fSVishal Verma /* 301*5212e11fSVishal Verma * This function copies the desired (old/new) log entry into ent if 302*5212e11fSVishal Verma * it is not NULL. It returns the sub-slot number (0 or 1) 303*5212e11fSVishal Verma * where the desired log entry was found. Negative return values 304*5212e11fSVishal Verma * indicate errors. 305*5212e11fSVishal Verma */ 306*5212e11fSVishal Verma static int btt_log_read(struct arena_info *arena, u32 lane, 307*5212e11fSVishal Verma struct log_entry *ent, int old_flag) 308*5212e11fSVishal Verma { 309*5212e11fSVishal Verma int ret; 310*5212e11fSVishal Verma int old_ent, ret_ent; 311*5212e11fSVishal Verma struct log_entry log[2]; 312*5212e11fSVishal Verma 313*5212e11fSVishal Verma ret = btt_log_read_pair(arena, lane, log); 314*5212e11fSVishal Verma if (ret) 315*5212e11fSVishal Verma return -EIO; 316*5212e11fSVishal Verma 317*5212e11fSVishal Verma old_ent = btt_log_get_old(log); 318*5212e11fSVishal Verma if (old_ent < 0 || old_ent > 1) { 319*5212e11fSVishal Verma dev_info(to_dev(arena), 320*5212e11fSVishal Verma "log corruption (%d): lane %d seq [%d, %d]\n", 321*5212e11fSVishal Verma old_ent, lane, log[0].seq, log[1].seq); 322*5212e11fSVishal Verma /* TODO set error state? */ 323*5212e11fSVishal Verma return -EIO; 324*5212e11fSVishal Verma } 325*5212e11fSVishal Verma 326*5212e11fSVishal Verma ret_ent = (old_flag ? old_ent : (1 - old_ent)); 327*5212e11fSVishal Verma 328*5212e11fSVishal Verma if (ent != NULL) 329*5212e11fSVishal Verma memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 330*5212e11fSVishal Verma 331*5212e11fSVishal Verma return ret_ent; 332*5212e11fSVishal Verma } 333*5212e11fSVishal Verma 334*5212e11fSVishal Verma /* 335*5212e11fSVishal Verma * This function commits a log entry to media 336*5212e11fSVishal Verma * It does _not_ prepare the freelist entry for the next write 337*5212e11fSVishal Verma * btt_flog_write is the wrapper for updating the freelist elements 338*5212e11fSVishal Verma */ 339*5212e11fSVishal Verma static int __btt_log_write(struct arena_info *arena, u32 lane, 340*5212e11fSVishal Verma u32 sub, struct log_entry *ent) 341*5212e11fSVishal Verma { 342*5212e11fSVishal Verma int ret; 343*5212e11fSVishal Verma /* 344*5212e11fSVishal Verma * Ignore the padding in log_entry for calculating log_half. 345*5212e11fSVishal Verma * The entry is 'committed' when we write the sequence number, 346*5212e11fSVishal Verma * and we want to ensure that that is the last thing written. 347*5212e11fSVishal Verma * We don't bother writing the padding as that would be extra 348*5212e11fSVishal Verma * media wear and write amplification 349*5212e11fSVishal Verma */ 350*5212e11fSVishal Verma unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; 351*5212e11fSVishal Verma u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); 352*5212e11fSVishal Verma void *src = ent; 353*5212e11fSVishal Verma 354*5212e11fSVishal Verma /* split the 16B write into atomic, durable halves */ 355*5212e11fSVishal Verma ret = arena_write_bytes(arena, ns_off, src, log_half); 356*5212e11fSVishal Verma if (ret) 357*5212e11fSVishal Verma return ret; 358*5212e11fSVishal Verma 359*5212e11fSVishal Verma ns_off += log_half; 360*5212e11fSVishal Verma src += log_half; 361*5212e11fSVishal Verma return arena_write_bytes(arena, ns_off, src, log_half); 362*5212e11fSVishal Verma } 363*5212e11fSVishal Verma 364*5212e11fSVishal Verma static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, 365*5212e11fSVishal Verma struct log_entry *ent) 366*5212e11fSVishal Verma { 367*5212e11fSVishal Verma int ret; 368*5212e11fSVishal Verma 369*5212e11fSVishal Verma ret = __btt_log_write(arena, lane, sub, ent); 370*5212e11fSVishal Verma if (ret) 371*5212e11fSVishal Verma return ret; 372*5212e11fSVishal Verma 373*5212e11fSVishal Verma /* prepare the next free entry */ 374*5212e11fSVishal Verma arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; 375*5212e11fSVishal Verma if (++(arena->freelist[lane].seq) == 4) 376*5212e11fSVishal Verma arena->freelist[lane].seq = 1; 377*5212e11fSVishal Verma arena->freelist[lane].block = le32_to_cpu(ent->old_map); 378*5212e11fSVishal Verma 379*5212e11fSVishal Verma return ret; 380*5212e11fSVishal Verma } 381*5212e11fSVishal Verma 382*5212e11fSVishal Verma /* 383*5212e11fSVishal Verma * This function initializes the BTT map to the initial state, which is 384*5212e11fSVishal Verma * all-zeroes, and indicates an identity mapping 385*5212e11fSVishal Verma */ 386*5212e11fSVishal Verma static int btt_map_init(struct arena_info *arena) 387*5212e11fSVishal Verma { 388*5212e11fSVishal Verma int ret = -EINVAL; 389*5212e11fSVishal Verma void *zerobuf; 390*5212e11fSVishal Verma size_t offset = 0; 391*5212e11fSVishal Verma size_t chunk_size = SZ_2M; 392*5212e11fSVishal Verma size_t mapsize = arena->logoff - arena->mapoff; 393*5212e11fSVishal Verma 394*5212e11fSVishal Verma zerobuf = kzalloc(chunk_size, GFP_KERNEL); 395*5212e11fSVishal Verma if (!zerobuf) 396*5212e11fSVishal Verma return -ENOMEM; 397*5212e11fSVishal Verma 398*5212e11fSVishal Verma while (mapsize) { 399*5212e11fSVishal Verma size_t size = min(mapsize, chunk_size); 400*5212e11fSVishal Verma 401*5212e11fSVishal Verma ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, 402*5212e11fSVishal Verma size); 403*5212e11fSVishal Verma if (ret) 404*5212e11fSVishal Verma goto free; 405*5212e11fSVishal Verma 406*5212e11fSVishal Verma offset += size; 407*5212e11fSVishal Verma mapsize -= size; 408*5212e11fSVishal Verma cond_resched(); 409*5212e11fSVishal Verma } 410*5212e11fSVishal Verma 411*5212e11fSVishal Verma free: 412*5212e11fSVishal Verma kfree(zerobuf); 413*5212e11fSVishal Verma return ret; 414*5212e11fSVishal Verma } 415*5212e11fSVishal Verma 416*5212e11fSVishal Verma /* 417*5212e11fSVishal Verma * This function initializes the BTT log with 'fake' entries pointing 418*5212e11fSVishal Verma * to the initial reserved set of blocks as being free 419*5212e11fSVishal Verma */ 420*5212e11fSVishal Verma static int btt_log_init(struct arena_info *arena) 421*5212e11fSVishal Verma { 422*5212e11fSVishal Verma int ret; 423*5212e11fSVishal Verma u32 i; 424*5212e11fSVishal Verma struct log_entry log, zerolog; 425*5212e11fSVishal Verma 426*5212e11fSVishal Verma memset(&zerolog, 0, sizeof(zerolog)); 427*5212e11fSVishal Verma 428*5212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 429*5212e11fSVishal Verma log.lba = cpu_to_le32(i); 430*5212e11fSVishal Verma log.old_map = cpu_to_le32(arena->external_nlba + i); 431*5212e11fSVishal Verma log.new_map = cpu_to_le32(arena->external_nlba + i); 432*5212e11fSVishal Verma log.seq = cpu_to_le32(LOG_SEQ_INIT); 433*5212e11fSVishal Verma ret = __btt_log_write(arena, i, 0, &log); 434*5212e11fSVishal Verma if (ret) 435*5212e11fSVishal Verma return ret; 436*5212e11fSVishal Verma ret = __btt_log_write(arena, i, 1, &zerolog); 437*5212e11fSVishal Verma if (ret) 438*5212e11fSVishal Verma return ret; 439*5212e11fSVishal Verma } 440*5212e11fSVishal Verma 441*5212e11fSVishal Verma return 0; 442*5212e11fSVishal Verma } 443*5212e11fSVishal Verma 444*5212e11fSVishal Verma static int btt_freelist_init(struct arena_info *arena) 445*5212e11fSVishal Verma { 446*5212e11fSVishal Verma int old, new, ret; 447*5212e11fSVishal Verma u32 i, map_entry; 448*5212e11fSVishal Verma struct log_entry log_new, log_old; 449*5212e11fSVishal Verma 450*5212e11fSVishal Verma arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), 451*5212e11fSVishal Verma GFP_KERNEL); 452*5212e11fSVishal Verma if (!arena->freelist) 453*5212e11fSVishal Verma return -ENOMEM; 454*5212e11fSVishal Verma 455*5212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) { 456*5212e11fSVishal Verma old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); 457*5212e11fSVishal Verma if (old < 0) 458*5212e11fSVishal Verma return old; 459*5212e11fSVishal Verma 460*5212e11fSVishal Verma new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); 461*5212e11fSVishal Verma if (new < 0) 462*5212e11fSVishal Verma return new; 463*5212e11fSVishal Verma 464*5212e11fSVishal Verma /* sub points to the next one to be overwritten */ 465*5212e11fSVishal Verma arena->freelist[i].sub = 1 - new; 466*5212e11fSVishal Verma arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); 467*5212e11fSVishal Verma arena->freelist[i].block = le32_to_cpu(log_new.old_map); 468*5212e11fSVishal Verma 469*5212e11fSVishal Verma /* This implies a newly created or untouched flog entry */ 470*5212e11fSVishal Verma if (log_new.old_map == log_new.new_map) 471*5212e11fSVishal Verma continue; 472*5212e11fSVishal Verma 473*5212e11fSVishal Verma /* Check if map recovery is needed */ 474*5212e11fSVishal Verma ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, 475*5212e11fSVishal Verma NULL, NULL); 476*5212e11fSVishal Verma if (ret) 477*5212e11fSVishal Verma return ret; 478*5212e11fSVishal Verma if ((le32_to_cpu(log_new.new_map) != map_entry) && 479*5212e11fSVishal Verma (le32_to_cpu(log_new.old_map) == map_entry)) { 480*5212e11fSVishal Verma /* 481*5212e11fSVishal Verma * Last transaction wrote the flog, but wasn't able 482*5212e11fSVishal Verma * to complete the map write. So fix up the map. 483*5212e11fSVishal Verma */ 484*5212e11fSVishal Verma ret = btt_map_write(arena, le32_to_cpu(log_new.lba), 485*5212e11fSVishal Verma le32_to_cpu(log_new.new_map), 0, 0); 486*5212e11fSVishal Verma if (ret) 487*5212e11fSVishal Verma return ret; 488*5212e11fSVishal Verma } 489*5212e11fSVishal Verma 490*5212e11fSVishal Verma } 491*5212e11fSVishal Verma 492*5212e11fSVishal Verma return 0; 493*5212e11fSVishal Verma } 494*5212e11fSVishal Verma 495*5212e11fSVishal Verma static int btt_rtt_init(struct arena_info *arena) 496*5212e11fSVishal Verma { 497*5212e11fSVishal Verma arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); 498*5212e11fSVishal Verma if (arena->rtt == NULL) 499*5212e11fSVishal Verma return -ENOMEM; 500*5212e11fSVishal Verma 501*5212e11fSVishal Verma return 0; 502*5212e11fSVishal Verma } 503*5212e11fSVishal Verma 504*5212e11fSVishal Verma static int btt_maplocks_init(struct arena_info *arena) 505*5212e11fSVishal Verma { 506*5212e11fSVishal Verma u32 i; 507*5212e11fSVishal Verma 508*5212e11fSVishal Verma arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), 509*5212e11fSVishal Verma GFP_KERNEL); 510*5212e11fSVishal Verma if (!arena->map_locks) 511*5212e11fSVishal Verma return -ENOMEM; 512*5212e11fSVishal Verma 513*5212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 514*5212e11fSVishal Verma spin_lock_init(&arena->map_locks[i].lock); 515*5212e11fSVishal Verma 516*5212e11fSVishal Verma return 0; 517*5212e11fSVishal Verma } 518*5212e11fSVishal Verma 519*5212e11fSVishal Verma static struct arena_info *alloc_arena(struct btt *btt, size_t size, 520*5212e11fSVishal Verma size_t start, size_t arena_off) 521*5212e11fSVishal Verma { 522*5212e11fSVishal Verma struct arena_info *arena; 523*5212e11fSVishal Verma u64 logsize, mapsize, datasize; 524*5212e11fSVishal Verma u64 available = size; 525*5212e11fSVishal Verma 526*5212e11fSVishal Verma arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); 527*5212e11fSVishal Verma if (!arena) 528*5212e11fSVishal Verma return NULL; 529*5212e11fSVishal Verma arena->nd_btt = btt->nd_btt; 530*5212e11fSVishal Verma 531*5212e11fSVishal Verma if (!size) 532*5212e11fSVishal Verma return arena; 533*5212e11fSVishal Verma 534*5212e11fSVishal Verma arena->size = size; 535*5212e11fSVishal Verma arena->external_lba_start = start; 536*5212e11fSVishal Verma arena->external_lbasize = btt->lbasize; 537*5212e11fSVishal Verma arena->internal_lbasize = roundup(arena->external_lbasize, 538*5212e11fSVishal Verma INT_LBASIZE_ALIGNMENT); 539*5212e11fSVishal Verma arena->nfree = BTT_DEFAULT_NFREE; 540*5212e11fSVishal Verma arena->version_major = 1; 541*5212e11fSVishal Verma arena->version_minor = 1; 542*5212e11fSVishal Verma 543*5212e11fSVishal Verma if (available % BTT_PG_SIZE) 544*5212e11fSVishal Verma available -= (available % BTT_PG_SIZE); 545*5212e11fSVishal Verma 546*5212e11fSVishal Verma /* Two pages are reserved for the super block and its copy */ 547*5212e11fSVishal Verma available -= 2 * BTT_PG_SIZE; 548*5212e11fSVishal Verma 549*5212e11fSVishal Verma /* The log takes a fixed amount of space based on nfree */ 550*5212e11fSVishal Verma logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 551*5212e11fSVishal Verma BTT_PG_SIZE); 552*5212e11fSVishal Verma available -= logsize; 553*5212e11fSVishal Verma 554*5212e11fSVishal Verma /* Calculate optimal split between map and data area */ 555*5212e11fSVishal Verma arena->internal_nlba = div_u64(available - BTT_PG_SIZE, 556*5212e11fSVishal Verma arena->internal_lbasize + MAP_ENT_SIZE); 557*5212e11fSVishal Verma arena->external_nlba = arena->internal_nlba - arena->nfree; 558*5212e11fSVishal Verma 559*5212e11fSVishal Verma mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); 560*5212e11fSVishal Verma datasize = available - mapsize; 561*5212e11fSVishal Verma 562*5212e11fSVishal Verma /* 'Absolute' values, relative to start of storage space */ 563*5212e11fSVishal Verma arena->infooff = arena_off; 564*5212e11fSVishal Verma arena->dataoff = arena->infooff + BTT_PG_SIZE; 565*5212e11fSVishal Verma arena->mapoff = arena->dataoff + datasize; 566*5212e11fSVishal Verma arena->logoff = arena->mapoff + mapsize; 567*5212e11fSVishal Verma arena->info2off = arena->logoff + logsize; 568*5212e11fSVishal Verma return arena; 569*5212e11fSVishal Verma } 570*5212e11fSVishal Verma 571*5212e11fSVishal Verma static void free_arenas(struct btt *btt) 572*5212e11fSVishal Verma { 573*5212e11fSVishal Verma struct arena_info *arena, *next; 574*5212e11fSVishal Verma 575*5212e11fSVishal Verma list_for_each_entry_safe(arena, next, &btt->arena_list, list) { 576*5212e11fSVishal Verma list_del(&arena->list); 577*5212e11fSVishal Verma kfree(arena->rtt); 578*5212e11fSVishal Verma kfree(arena->map_locks); 579*5212e11fSVishal Verma kfree(arena->freelist); 580*5212e11fSVishal Verma debugfs_remove_recursive(arena->debugfs_dir); 581*5212e11fSVishal Verma kfree(arena); 582*5212e11fSVishal Verma } 583*5212e11fSVishal Verma } 584*5212e11fSVishal Verma 585*5212e11fSVishal Verma /* 586*5212e11fSVishal Verma * This function checks if the metadata layout is valid and error free 587*5212e11fSVishal Verma */ 588*5212e11fSVishal Verma static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, 589*5212e11fSVishal Verma u8 *uuid, u32 lbasize) 590*5212e11fSVishal Verma { 591*5212e11fSVishal Verma u64 checksum; 592*5212e11fSVishal Verma 593*5212e11fSVishal Verma if (memcmp(super->uuid, uuid, 16)) 594*5212e11fSVishal Verma return 0; 595*5212e11fSVishal Verma 596*5212e11fSVishal Verma checksum = le64_to_cpu(super->checksum); 597*5212e11fSVishal Verma super->checksum = 0; 598*5212e11fSVishal Verma if (checksum != nd_btt_sb_checksum(super)) 599*5212e11fSVishal Verma return 0; 600*5212e11fSVishal Verma super->checksum = cpu_to_le64(checksum); 601*5212e11fSVishal Verma 602*5212e11fSVishal Verma if (lbasize != le32_to_cpu(super->external_lbasize)) 603*5212e11fSVishal Verma return 0; 604*5212e11fSVishal Verma 605*5212e11fSVishal Verma /* TODO: figure out action for this */ 606*5212e11fSVishal Verma if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) 607*5212e11fSVishal Verma dev_info(to_dev(arena), "Found arena with an error flag\n"); 608*5212e11fSVishal Verma 609*5212e11fSVishal Verma return 1; 610*5212e11fSVishal Verma } 611*5212e11fSVishal Verma 612*5212e11fSVishal Verma /* 613*5212e11fSVishal Verma * This function reads an existing valid btt superblock and 614*5212e11fSVishal Verma * populates the corresponding arena_info struct 615*5212e11fSVishal Verma */ 616*5212e11fSVishal Verma static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, 617*5212e11fSVishal Verma u64 arena_off) 618*5212e11fSVishal Verma { 619*5212e11fSVishal Verma arena->internal_nlba = le32_to_cpu(super->internal_nlba); 620*5212e11fSVishal Verma arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); 621*5212e11fSVishal Verma arena->external_nlba = le32_to_cpu(super->external_nlba); 622*5212e11fSVishal Verma arena->external_lbasize = le32_to_cpu(super->external_lbasize); 623*5212e11fSVishal Verma arena->nfree = le32_to_cpu(super->nfree); 624*5212e11fSVishal Verma arena->version_major = le16_to_cpu(super->version_major); 625*5212e11fSVishal Verma arena->version_minor = le16_to_cpu(super->version_minor); 626*5212e11fSVishal Verma 627*5212e11fSVishal Verma arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + 628*5212e11fSVishal Verma le64_to_cpu(super->nextoff)); 629*5212e11fSVishal Verma arena->infooff = arena_off; 630*5212e11fSVishal Verma arena->dataoff = arena_off + le64_to_cpu(super->dataoff); 631*5212e11fSVishal Verma arena->mapoff = arena_off + le64_to_cpu(super->mapoff); 632*5212e11fSVishal Verma arena->logoff = arena_off + le64_to_cpu(super->logoff); 633*5212e11fSVishal Verma arena->info2off = arena_off + le64_to_cpu(super->info2off); 634*5212e11fSVishal Verma 635*5212e11fSVishal Verma arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : 636*5212e11fSVishal Verma (arena->info2off - arena->infooff + BTT_PG_SIZE); 637*5212e11fSVishal Verma 638*5212e11fSVishal Verma arena->flags = le32_to_cpu(super->flags); 639*5212e11fSVishal Verma } 640*5212e11fSVishal Verma 641*5212e11fSVishal Verma static int discover_arenas(struct btt *btt) 642*5212e11fSVishal Verma { 643*5212e11fSVishal Verma int ret = 0; 644*5212e11fSVishal Verma struct arena_info *arena; 645*5212e11fSVishal Verma struct btt_sb *super; 646*5212e11fSVishal Verma size_t remaining = btt->rawsize; 647*5212e11fSVishal Verma u64 cur_nlba = 0; 648*5212e11fSVishal Verma size_t cur_off = 0; 649*5212e11fSVishal Verma int num_arenas = 0; 650*5212e11fSVishal Verma 651*5212e11fSVishal Verma super = kzalloc(sizeof(*super), GFP_KERNEL); 652*5212e11fSVishal Verma if (!super) 653*5212e11fSVishal Verma return -ENOMEM; 654*5212e11fSVishal Verma 655*5212e11fSVishal Verma while (remaining) { 656*5212e11fSVishal Verma /* Alloc memory for arena */ 657*5212e11fSVishal Verma arena = alloc_arena(btt, 0, 0, 0); 658*5212e11fSVishal Verma if (!arena) { 659*5212e11fSVishal Verma ret = -ENOMEM; 660*5212e11fSVishal Verma goto out_super; 661*5212e11fSVishal Verma } 662*5212e11fSVishal Verma 663*5212e11fSVishal Verma arena->infooff = cur_off; 664*5212e11fSVishal Verma ret = btt_info_read(arena, super); 665*5212e11fSVishal Verma if (ret) 666*5212e11fSVishal Verma goto out; 667*5212e11fSVishal Verma 668*5212e11fSVishal Verma if (!arena_is_valid(arena, super, btt->nd_btt->uuid, 669*5212e11fSVishal Verma btt->lbasize)) { 670*5212e11fSVishal Verma if (remaining == btt->rawsize) { 671*5212e11fSVishal Verma btt->init_state = INIT_NOTFOUND; 672*5212e11fSVishal Verma dev_info(to_dev(arena), "No existing arenas\n"); 673*5212e11fSVishal Verma goto out; 674*5212e11fSVishal Verma } else { 675*5212e11fSVishal Verma dev_info(to_dev(arena), 676*5212e11fSVishal Verma "Found corrupted metadata!\n"); 677*5212e11fSVishal Verma ret = -ENODEV; 678*5212e11fSVishal Verma goto out; 679*5212e11fSVishal Verma } 680*5212e11fSVishal Verma } 681*5212e11fSVishal Verma 682*5212e11fSVishal Verma arena->external_lba_start = cur_nlba; 683*5212e11fSVishal Verma parse_arena_meta(arena, super, cur_off); 684*5212e11fSVishal Verma 685*5212e11fSVishal Verma ret = btt_freelist_init(arena); 686*5212e11fSVishal Verma if (ret) 687*5212e11fSVishal Verma goto out; 688*5212e11fSVishal Verma 689*5212e11fSVishal Verma ret = btt_rtt_init(arena); 690*5212e11fSVishal Verma if (ret) 691*5212e11fSVishal Verma goto out; 692*5212e11fSVishal Verma 693*5212e11fSVishal Verma ret = btt_maplocks_init(arena); 694*5212e11fSVishal Verma if (ret) 695*5212e11fSVishal Verma goto out; 696*5212e11fSVishal Verma 697*5212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 698*5212e11fSVishal Verma 699*5212e11fSVishal Verma remaining -= arena->size; 700*5212e11fSVishal Verma cur_off += arena->size; 701*5212e11fSVishal Verma cur_nlba += arena->external_nlba; 702*5212e11fSVishal Verma num_arenas++; 703*5212e11fSVishal Verma 704*5212e11fSVishal Verma if (arena->nextoff == 0) 705*5212e11fSVishal Verma break; 706*5212e11fSVishal Verma } 707*5212e11fSVishal Verma btt->num_arenas = num_arenas; 708*5212e11fSVishal Verma btt->nlba = cur_nlba; 709*5212e11fSVishal Verma btt->init_state = INIT_READY; 710*5212e11fSVishal Verma 711*5212e11fSVishal Verma kfree(super); 712*5212e11fSVishal Verma return ret; 713*5212e11fSVishal Verma 714*5212e11fSVishal Verma out: 715*5212e11fSVishal Verma kfree(arena); 716*5212e11fSVishal Verma free_arenas(btt); 717*5212e11fSVishal Verma out_super: 718*5212e11fSVishal Verma kfree(super); 719*5212e11fSVishal Verma return ret; 720*5212e11fSVishal Verma } 721*5212e11fSVishal Verma 722*5212e11fSVishal Verma static int create_arenas(struct btt *btt) 723*5212e11fSVishal Verma { 724*5212e11fSVishal Verma size_t remaining = btt->rawsize; 725*5212e11fSVishal Verma size_t cur_off = 0; 726*5212e11fSVishal Verma 727*5212e11fSVishal Verma while (remaining) { 728*5212e11fSVishal Verma struct arena_info *arena; 729*5212e11fSVishal Verma size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); 730*5212e11fSVishal Verma 731*5212e11fSVishal Verma remaining -= arena_size; 732*5212e11fSVishal Verma if (arena_size < ARENA_MIN_SIZE) 733*5212e11fSVishal Verma break; 734*5212e11fSVishal Verma 735*5212e11fSVishal Verma arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); 736*5212e11fSVishal Verma if (!arena) { 737*5212e11fSVishal Verma free_arenas(btt); 738*5212e11fSVishal Verma return -ENOMEM; 739*5212e11fSVishal Verma } 740*5212e11fSVishal Verma btt->nlba += arena->external_nlba; 741*5212e11fSVishal Verma if (remaining >= ARENA_MIN_SIZE) 742*5212e11fSVishal Verma arena->nextoff = arena->size; 743*5212e11fSVishal Verma else 744*5212e11fSVishal Verma arena->nextoff = 0; 745*5212e11fSVishal Verma cur_off += arena_size; 746*5212e11fSVishal Verma list_add_tail(&arena->list, &btt->arena_list); 747*5212e11fSVishal Verma } 748*5212e11fSVishal Verma 749*5212e11fSVishal Verma return 0; 750*5212e11fSVishal Verma } 751*5212e11fSVishal Verma 752*5212e11fSVishal Verma /* 753*5212e11fSVishal Verma * This function completes arena initialization by writing 754*5212e11fSVishal Verma * all the metadata. 755*5212e11fSVishal Verma * It is only called for an uninitialized arena when a write 756*5212e11fSVishal Verma * to that arena occurs for the first time. 757*5212e11fSVishal Verma */ 758*5212e11fSVishal Verma static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) 759*5212e11fSVishal Verma { 760*5212e11fSVishal Verma int ret; 761*5212e11fSVishal Verma struct btt_sb *super; 762*5212e11fSVishal Verma 763*5212e11fSVishal Verma ret = btt_map_init(arena); 764*5212e11fSVishal Verma if (ret) 765*5212e11fSVishal Verma return ret; 766*5212e11fSVishal Verma 767*5212e11fSVishal Verma ret = btt_log_init(arena); 768*5212e11fSVishal Verma if (ret) 769*5212e11fSVishal Verma return ret; 770*5212e11fSVishal Verma 771*5212e11fSVishal Verma super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); 772*5212e11fSVishal Verma if (!super) 773*5212e11fSVishal Verma return -ENOMEM; 774*5212e11fSVishal Verma 775*5212e11fSVishal Verma strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); 776*5212e11fSVishal Verma memcpy(super->uuid, uuid, 16); 777*5212e11fSVishal Verma super->flags = cpu_to_le32(arena->flags); 778*5212e11fSVishal Verma super->version_major = cpu_to_le16(arena->version_major); 779*5212e11fSVishal Verma super->version_minor = cpu_to_le16(arena->version_minor); 780*5212e11fSVishal Verma super->external_lbasize = cpu_to_le32(arena->external_lbasize); 781*5212e11fSVishal Verma super->external_nlba = cpu_to_le32(arena->external_nlba); 782*5212e11fSVishal Verma super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); 783*5212e11fSVishal Verma super->internal_nlba = cpu_to_le32(arena->internal_nlba); 784*5212e11fSVishal Verma super->nfree = cpu_to_le32(arena->nfree); 785*5212e11fSVishal Verma super->infosize = cpu_to_le32(sizeof(struct btt_sb)); 786*5212e11fSVishal Verma super->nextoff = cpu_to_le64(arena->nextoff); 787*5212e11fSVishal Verma /* 788*5212e11fSVishal Verma * Subtract arena->infooff (arena start) so numbers are relative 789*5212e11fSVishal Verma * to 'this' arena 790*5212e11fSVishal Verma */ 791*5212e11fSVishal Verma super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); 792*5212e11fSVishal Verma super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); 793*5212e11fSVishal Verma super->logoff = cpu_to_le64(arena->logoff - arena->infooff); 794*5212e11fSVishal Verma super->info2off = cpu_to_le64(arena->info2off - arena->infooff); 795*5212e11fSVishal Verma 796*5212e11fSVishal Verma super->flags = 0; 797*5212e11fSVishal Verma super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); 798*5212e11fSVishal Verma 799*5212e11fSVishal Verma ret = btt_info_write(arena, super); 800*5212e11fSVishal Verma 801*5212e11fSVishal Verma kfree(super); 802*5212e11fSVishal Verma return ret; 803*5212e11fSVishal Verma } 804*5212e11fSVishal Verma 805*5212e11fSVishal Verma /* 806*5212e11fSVishal Verma * This function completes the initialization for the BTT namespace 807*5212e11fSVishal Verma * such that it is ready to accept IOs 808*5212e11fSVishal Verma */ 809*5212e11fSVishal Verma static int btt_meta_init(struct btt *btt) 810*5212e11fSVishal Verma { 811*5212e11fSVishal Verma int ret = 0; 812*5212e11fSVishal Verma struct arena_info *arena; 813*5212e11fSVishal Verma 814*5212e11fSVishal Verma mutex_lock(&btt->init_lock); 815*5212e11fSVishal Verma list_for_each_entry(arena, &btt->arena_list, list) { 816*5212e11fSVishal Verma ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); 817*5212e11fSVishal Verma if (ret) 818*5212e11fSVishal Verma goto unlock; 819*5212e11fSVishal Verma 820*5212e11fSVishal Verma ret = btt_freelist_init(arena); 821*5212e11fSVishal Verma if (ret) 822*5212e11fSVishal Verma goto unlock; 823*5212e11fSVishal Verma 824*5212e11fSVishal Verma ret = btt_rtt_init(arena); 825*5212e11fSVishal Verma if (ret) 826*5212e11fSVishal Verma goto unlock; 827*5212e11fSVishal Verma 828*5212e11fSVishal Verma ret = btt_maplocks_init(arena); 829*5212e11fSVishal Verma if (ret) 830*5212e11fSVishal Verma goto unlock; 831*5212e11fSVishal Verma } 832*5212e11fSVishal Verma 833*5212e11fSVishal Verma btt->init_state = INIT_READY; 834*5212e11fSVishal Verma 835*5212e11fSVishal Verma unlock: 836*5212e11fSVishal Verma mutex_unlock(&btt->init_lock); 837*5212e11fSVishal Verma return ret; 838*5212e11fSVishal Verma } 839*5212e11fSVishal Verma 840*5212e11fSVishal Verma /* 841*5212e11fSVishal Verma * This function calculates the arena in which the given LBA lies 842*5212e11fSVishal Verma * by doing a linear walk. This is acceptable since we expect only 843*5212e11fSVishal Verma * a few arenas. If we have backing devices that get much larger, 844*5212e11fSVishal Verma * we can construct a balanced binary tree of arenas at init time 845*5212e11fSVishal Verma * so that this range search becomes faster. 846*5212e11fSVishal Verma */ 847*5212e11fSVishal Verma static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, 848*5212e11fSVishal Verma struct arena_info **arena) 849*5212e11fSVishal Verma { 850*5212e11fSVishal Verma struct arena_info *arena_list; 851*5212e11fSVishal Verma __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); 852*5212e11fSVishal Verma 853*5212e11fSVishal Verma list_for_each_entry(arena_list, &btt->arena_list, list) { 854*5212e11fSVishal Verma if (lba < arena_list->external_nlba) { 855*5212e11fSVishal Verma *arena = arena_list; 856*5212e11fSVishal Verma *premap = lba; 857*5212e11fSVishal Verma return 0; 858*5212e11fSVishal Verma } 859*5212e11fSVishal Verma lba -= arena_list->external_nlba; 860*5212e11fSVishal Verma } 861*5212e11fSVishal Verma 862*5212e11fSVishal Verma return -EIO; 863*5212e11fSVishal Verma } 864*5212e11fSVishal Verma 865*5212e11fSVishal Verma /* 866*5212e11fSVishal Verma * The following (lock_map, unlock_map) are mostly just to improve 867*5212e11fSVishal Verma * readability, since they index into an array of locks 868*5212e11fSVishal Verma */ 869*5212e11fSVishal Verma static void lock_map(struct arena_info *arena, u32 premap) 870*5212e11fSVishal Verma __acquires(&arena->map_locks[idx].lock) 871*5212e11fSVishal Verma { 872*5212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 873*5212e11fSVishal Verma 874*5212e11fSVishal Verma spin_lock(&arena->map_locks[idx].lock); 875*5212e11fSVishal Verma } 876*5212e11fSVishal Verma 877*5212e11fSVishal Verma static void unlock_map(struct arena_info *arena, u32 premap) 878*5212e11fSVishal Verma __releases(&arena->map_locks[idx].lock) 879*5212e11fSVishal Verma { 880*5212e11fSVishal Verma u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; 881*5212e11fSVishal Verma 882*5212e11fSVishal Verma spin_unlock(&arena->map_locks[idx].lock); 883*5212e11fSVishal Verma } 884*5212e11fSVishal Verma 885*5212e11fSVishal Verma static u64 to_namespace_offset(struct arena_info *arena, u64 lba) 886*5212e11fSVishal Verma { 887*5212e11fSVishal Verma return arena->dataoff + ((u64)lba * arena->internal_lbasize); 888*5212e11fSVishal Verma } 889*5212e11fSVishal Verma 890*5212e11fSVishal Verma static int btt_data_read(struct arena_info *arena, struct page *page, 891*5212e11fSVishal Verma unsigned int off, u32 lba, u32 len) 892*5212e11fSVishal Verma { 893*5212e11fSVishal Verma int ret; 894*5212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 895*5212e11fSVishal Verma void *mem = kmap_atomic(page); 896*5212e11fSVishal Verma 897*5212e11fSVishal Verma ret = arena_read_bytes(arena, nsoff, mem + off, len); 898*5212e11fSVishal Verma kunmap_atomic(mem); 899*5212e11fSVishal Verma 900*5212e11fSVishal Verma return ret; 901*5212e11fSVishal Verma } 902*5212e11fSVishal Verma 903*5212e11fSVishal Verma static int btt_data_write(struct arena_info *arena, u32 lba, 904*5212e11fSVishal Verma struct page *page, unsigned int off, u32 len) 905*5212e11fSVishal Verma { 906*5212e11fSVishal Verma int ret; 907*5212e11fSVishal Verma u64 nsoff = to_namespace_offset(arena, lba); 908*5212e11fSVishal Verma void *mem = kmap_atomic(page); 909*5212e11fSVishal Verma 910*5212e11fSVishal Verma ret = arena_write_bytes(arena, nsoff, mem + off, len); 911*5212e11fSVishal Verma kunmap_atomic(mem); 912*5212e11fSVishal Verma 913*5212e11fSVishal Verma return ret; 914*5212e11fSVishal Verma } 915*5212e11fSVishal Verma 916*5212e11fSVishal Verma static void zero_fill_data(struct page *page, unsigned int off, u32 len) 917*5212e11fSVishal Verma { 918*5212e11fSVishal Verma void *mem = kmap_atomic(page); 919*5212e11fSVishal Verma 920*5212e11fSVishal Verma memset(mem + off, 0, len); 921*5212e11fSVishal Verma kunmap_atomic(mem); 922*5212e11fSVishal Verma } 923*5212e11fSVishal Verma 924*5212e11fSVishal Verma static int btt_read_pg(struct btt *btt, struct page *page, unsigned int off, 925*5212e11fSVishal Verma sector_t sector, unsigned int len) 926*5212e11fSVishal Verma { 927*5212e11fSVishal Verma int ret = 0; 928*5212e11fSVishal Verma int t_flag, e_flag; 929*5212e11fSVishal Verma struct arena_info *arena = NULL; 930*5212e11fSVishal Verma u32 lane = 0, premap, postmap; 931*5212e11fSVishal Verma 932*5212e11fSVishal Verma while (len) { 933*5212e11fSVishal Verma u32 cur_len; 934*5212e11fSVishal Verma 935*5212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 936*5212e11fSVishal Verma 937*5212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 938*5212e11fSVishal Verma if (ret) 939*5212e11fSVishal Verma goto out_lane; 940*5212e11fSVishal Verma 941*5212e11fSVishal Verma cur_len = min(btt->sector_size, len); 942*5212e11fSVishal Verma 943*5212e11fSVishal Verma ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag); 944*5212e11fSVishal Verma if (ret) 945*5212e11fSVishal Verma goto out_lane; 946*5212e11fSVishal Verma 947*5212e11fSVishal Verma /* 948*5212e11fSVishal Verma * We loop to make sure that the post map LBA didn't change 949*5212e11fSVishal Verma * from under us between writing the RTT and doing the actual 950*5212e11fSVishal Verma * read. 951*5212e11fSVishal Verma */ 952*5212e11fSVishal Verma while (1) { 953*5212e11fSVishal Verma u32 new_map; 954*5212e11fSVishal Verma 955*5212e11fSVishal Verma if (t_flag) { 956*5212e11fSVishal Verma zero_fill_data(page, off, cur_len); 957*5212e11fSVishal Verma goto out_lane; 958*5212e11fSVishal Verma } 959*5212e11fSVishal Verma 960*5212e11fSVishal Verma if (e_flag) { 961*5212e11fSVishal Verma ret = -EIO; 962*5212e11fSVishal Verma goto out_lane; 963*5212e11fSVishal Verma } 964*5212e11fSVishal Verma 965*5212e11fSVishal Verma arena->rtt[lane] = RTT_VALID | postmap; 966*5212e11fSVishal Verma /* 967*5212e11fSVishal Verma * Barrier to make sure this write is not reordered 968*5212e11fSVishal Verma * to do the verification map_read before the RTT store 969*5212e11fSVishal Verma */ 970*5212e11fSVishal Verma barrier(); 971*5212e11fSVishal Verma 972*5212e11fSVishal Verma ret = btt_map_read(arena, premap, &new_map, &t_flag, 973*5212e11fSVishal Verma &e_flag); 974*5212e11fSVishal Verma if (ret) 975*5212e11fSVishal Verma goto out_rtt; 976*5212e11fSVishal Verma 977*5212e11fSVishal Verma if (postmap == new_map) 978*5212e11fSVishal Verma break; 979*5212e11fSVishal Verma 980*5212e11fSVishal Verma postmap = new_map; 981*5212e11fSVishal Verma } 982*5212e11fSVishal Verma 983*5212e11fSVishal Verma ret = btt_data_read(arena, page, off, postmap, cur_len); 984*5212e11fSVishal Verma if (ret) 985*5212e11fSVishal Verma goto out_rtt; 986*5212e11fSVishal Verma 987*5212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 988*5212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 989*5212e11fSVishal Verma 990*5212e11fSVishal Verma len -= cur_len; 991*5212e11fSVishal Verma off += cur_len; 992*5212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 993*5212e11fSVishal Verma } 994*5212e11fSVishal Verma 995*5212e11fSVishal Verma return 0; 996*5212e11fSVishal Verma 997*5212e11fSVishal Verma out_rtt: 998*5212e11fSVishal Verma arena->rtt[lane] = RTT_INVALID; 999*5212e11fSVishal Verma out_lane: 1000*5212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 1001*5212e11fSVishal Verma return ret; 1002*5212e11fSVishal Verma } 1003*5212e11fSVishal Verma 1004*5212e11fSVishal Verma static int btt_write_pg(struct btt *btt, sector_t sector, struct page *page, 1005*5212e11fSVishal Verma unsigned int off, unsigned int len) 1006*5212e11fSVishal Verma { 1007*5212e11fSVishal Verma int ret = 0; 1008*5212e11fSVishal Verma struct arena_info *arena = NULL; 1009*5212e11fSVishal Verma u32 premap = 0, old_postmap, new_postmap, lane = 0, i; 1010*5212e11fSVishal Verma struct log_entry log; 1011*5212e11fSVishal Verma int sub; 1012*5212e11fSVishal Verma 1013*5212e11fSVishal Verma while (len) { 1014*5212e11fSVishal Verma u32 cur_len; 1015*5212e11fSVishal Verma 1016*5212e11fSVishal Verma lane = nd_region_acquire_lane(btt->nd_region); 1017*5212e11fSVishal Verma 1018*5212e11fSVishal Verma ret = lba_to_arena(btt, sector, &premap, &arena); 1019*5212e11fSVishal Verma if (ret) 1020*5212e11fSVishal Verma goto out_lane; 1021*5212e11fSVishal Verma cur_len = min(btt->sector_size, len); 1022*5212e11fSVishal Verma 1023*5212e11fSVishal Verma if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { 1024*5212e11fSVishal Verma ret = -EIO; 1025*5212e11fSVishal Verma goto out_lane; 1026*5212e11fSVishal Verma } 1027*5212e11fSVishal Verma 1028*5212e11fSVishal Verma new_postmap = arena->freelist[lane].block; 1029*5212e11fSVishal Verma 1030*5212e11fSVishal Verma /* Wait if the new block is being read from */ 1031*5212e11fSVishal Verma for (i = 0; i < arena->nfree; i++) 1032*5212e11fSVishal Verma while (arena->rtt[i] == (RTT_VALID | new_postmap)) 1033*5212e11fSVishal Verma cpu_relax(); 1034*5212e11fSVishal Verma 1035*5212e11fSVishal Verma 1036*5212e11fSVishal Verma if (new_postmap >= arena->internal_nlba) { 1037*5212e11fSVishal Verma ret = -EIO; 1038*5212e11fSVishal Verma goto out_lane; 1039*5212e11fSVishal Verma } else 1040*5212e11fSVishal Verma ret = btt_data_write(arena, new_postmap, page, 1041*5212e11fSVishal Verma off, cur_len); 1042*5212e11fSVishal Verma if (ret) 1043*5212e11fSVishal Verma goto out_lane; 1044*5212e11fSVishal Verma 1045*5212e11fSVishal Verma lock_map(arena, premap); 1046*5212e11fSVishal Verma ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL); 1047*5212e11fSVishal Verma if (ret) 1048*5212e11fSVishal Verma goto out_map; 1049*5212e11fSVishal Verma if (old_postmap >= arena->internal_nlba) { 1050*5212e11fSVishal Verma ret = -EIO; 1051*5212e11fSVishal Verma goto out_map; 1052*5212e11fSVishal Verma } 1053*5212e11fSVishal Verma 1054*5212e11fSVishal Verma log.lba = cpu_to_le32(premap); 1055*5212e11fSVishal Verma log.old_map = cpu_to_le32(old_postmap); 1056*5212e11fSVishal Verma log.new_map = cpu_to_le32(new_postmap); 1057*5212e11fSVishal Verma log.seq = cpu_to_le32(arena->freelist[lane].seq); 1058*5212e11fSVishal Verma sub = arena->freelist[lane].sub; 1059*5212e11fSVishal Verma ret = btt_flog_write(arena, lane, sub, &log); 1060*5212e11fSVishal Verma if (ret) 1061*5212e11fSVishal Verma goto out_map; 1062*5212e11fSVishal Verma 1063*5212e11fSVishal Verma ret = btt_map_write(arena, premap, new_postmap, 0, 0); 1064*5212e11fSVishal Verma if (ret) 1065*5212e11fSVishal Verma goto out_map; 1066*5212e11fSVishal Verma 1067*5212e11fSVishal Verma unlock_map(arena, premap); 1068*5212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 1069*5212e11fSVishal Verma 1070*5212e11fSVishal Verma len -= cur_len; 1071*5212e11fSVishal Verma off += cur_len; 1072*5212e11fSVishal Verma sector += btt->sector_size >> SECTOR_SHIFT; 1073*5212e11fSVishal Verma } 1074*5212e11fSVishal Verma 1075*5212e11fSVishal Verma return 0; 1076*5212e11fSVishal Verma 1077*5212e11fSVishal Verma out_map: 1078*5212e11fSVishal Verma unlock_map(arena, premap); 1079*5212e11fSVishal Verma out_lane: 1080*5212e11fSVishal Verma nd_region_release_lane(btt->nd_region, lane); 1081*5212e11fSVishal Verma return ret; 1082*5212e11fSVishal Verma } 1083*5212e11fSVishal Verma 1084*5212e11fSVishal Verma static int btt_do_bvec(struct btt *btt, struct page *page, 1085*5212e11fSVishal Verma unsigned int len, unsigned int off, int rw, 1086*5212e11fSVishal Verma sector_t sector) 1087*5212e11fSVishal Verma { 1088*5212e11fSVishal Verma int ret; 1089*5212e11fSVishal Verma 1090*5212e11fSVishal Verma if (rw == READ) { 1091*5212e11fSVishal Verma ret = btt_read_pg(btt, page, off, sector, len); 1092*5212e11fSVishal Verma flush_dcache_page(page); 1093*5212e11fSVishal Verma } else { 1094*5212e11fSVishal Verma flush_dcache_page(page); 1095*5212e11fSVishal Verma ret = btt_write_pg(btt, sector, page, off, len); 1096*5212e11fSVishal Verma } 1097*5212e11fSVishal Verma 1098*5212e11fSVishal Verma return ret; 1099*5212e11fSVishal Verma } 1100*5212e11fSVishal Verma 1101*5212e11fSVishal Verma static void btt_make_request(struct request_queue *q, struct bio *bio) 1102*5212e11fSVishal Verma { 1103*5212e11fSVishal Verma struct btt *btt = q->queuedata; 1104*5212e11fSVishal Verma struct bvec_iter iter; 1105*5212e11fSVishal Verma struct bio_vec bvec; 1106*5212e11fSVishal Verma int err = 0, rw; 1107*5212e11fSVishal Verma 1108*5212e11fSVishal Verma rw = bio_data_dir(bio); 1109*5212e11fSVishal Verma bio_for_each_segment(bvec, bio, iter) { 1110*5212e11fSVishal Verma unsigned int len = bvec.bv_len; 1111*5212e11fSVishal Verma 1112*5212e11fSVishal Verma BUG_ON(len > PAGE_SIZE); 1113*5212e11fSVishal Verma /* Make sure len is in multiples of sector size. */ 1114*5212e11fSVishal Verma /* XXX is this right? */ 1115*5212e11fSVishal Verma BUG_ON(len < btt->sector_size); 1116*5212e11fSVishal Verma BUG_ON(len % btt->sector_size); 1117*5212e11fSVishal Verma 1118*5212e11fSVishal Verma err = btt_do_bvec(btt, bvec.bv_page, len, bvec.bv_offset, 1119*5212e11fSVishal Verma rw, iter.bi_sector); 1120*5212e11fSVishal Verma if (err) { 1121*5212e11fSVishal Verma dev_info(&btt->nd_btt->dev, 1122*5212e11fSVishal Verma "io error in %s sector %lld, len %d,\n", 1123*5212e11fSVishal Verma (rw == READ) ? "READ" : "WRITE", 1124*5212e11fSVishal Verma (unsigned long long) iter.bi_sector, len); 1125*5212e11fSVishal Verma goto out; 1126*5212e11fSVishal Verma } 1127*5212e11fSVishal Verma } 1128*5212e11fSVishal Verma 1129*5212e11fSVishal Verma out: 1130*5212e11fSVishal Verma bio_endio(bio, err); 1131*5212e11fSVishal Verma } 1132*5212e11fSVishal Verma 1133*5212e11fSVishal Verma static int btt_rw_page(struct block_device *bdev, sector_t sector, 1134*5212e11fSVishal Verma struct page *page, int rw) 1135*5212e11fSVishal Verma { 1136*5212e11fSVishal Verma struct btt *btt = bdev->bd_disk->private_data; 1137*5212e11fSVishal Verma 1138*5212e11fSVishal Verma btt_do_bvec(btt, page, PAGE_CACHE_SIZE, 0, rw, sector); 1139*5212e11fSVishal Verma page_endio(page, rw & WRITE, 0); 1140*5212e11fSVishal Verma return 0; 1141*5212e11fSVishal Verma } 1142*5212e11fSVishal Verma 1143*5212e11fSVishal Verma 1144*5212e11fSVishal Verma static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) 1145*5212e11fSVishal Verma { 1146*5212e11fSVishal Verma /* some standard values */ 1147*5212e11fSVishal Verma geo->heads = 1 << 6; 1148*5212e11fSVishal Verma geo->sectors = 1 << 5; 1149*5212e11fSVishal Verma geo->cylinders = get_capacity(bd->bd_disk) >> 11; 1150*5212e11fSVishal Verma return 0; 1151*5212e11fSVishal Verma } 1152*5212e11fSVishal Verma 1153*5212e11fSVishal Verma static const struct block_device_operations btt_fops = { 1154*5212e11fSVishal Verma .owner = THIS_MODULE, 1155*5212e11fSVishal Verma .rw_page = btt_rw_page, 1156*5212e11fSVishal Verma .getgeo = btt_getgeo, 1157*5212e11fSVishal Verma }; 1158*5212e11fSVishal Verma 1159*5212e11fSVishal Verma static int btt_blk_init(struct btt *btt) 1160*5212e11fSVishal Verma { 1161*5212e11fSVishal Verma struct nd_btt *nd_btt = btt->nd_btt; 1162*5212e11fSVishal Verma struct nd_namespace_common *ndns = nd_btt->ndns; 1163*5212e11fSVishal Verma 1164*5212e11fSVishal Verma /* create a new disk and request queue for btt */ 1165*5212e11fSVishal Verma btt->btt_queue = blk_alloc_queue(GFP_KERNEL); 1166*5212e11fSVishal Verma if (!btt->btt_queue) 1167*5212e11fSVishal Verma return -ENOMEM; 1168*5212e11fSVishal Verma 1169*5212e11fSVishal Verma btt->btt_disk = alloc_disk(0); 1170*5212e11fSVishal Verma if (!btt->btt_disk) { 1171*5212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 1172*5212e11fSVishal Verma return -ENOMEM; 1173*5212e11fSVishal Verma } 1174*5212e11fSVishal Verma 1175*5212e11fSVishal Verma nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); 1176*5212e11fSVishal Verma btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; 1177*5212e11fSVishal Verma btt->btt_disk->major = btt_major; 1178*5212e11fSVishal Verma btt->btt_disk->first_minor = 0; 1179*5212e11fSVishal Verma btt->btt_disk->fops = &btt_fops; 1180*5212e11fSVishal Verma btt->btt_disk->private_data = btt; 1181*5212e11fSVishal Verma btt->btt_disk->queue = btt->btt_queue; 1182*5212e11fSVishal Verma btt->btt_disk->flags = GENHD_FL_EXT_DEVT; 1183*5212e11fSVishal Verma 1184*5212e11fSVishal Verma blk_queue_make_request(btt->btt_queue, btt_make_request); 1185*5212e11fSVishal Verma blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 1186*5212e11fSVishal Verma blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 1187*5212e11fSVishal Verma blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY); 1188*5212e11fSVishal Verma queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 1189*5212e11fSVishal Verma btt->btt_queue->queuedata = btt; 1190*5212e11fSVishal Verma 1191*5212e11fSVishal Verma set_capacity(btt->btt_disk, 1192*5212e11fSVishal Verma btt->nlba * btt->sector_size >> SECTOR_SHIFT); 1193*5212e11fSVishal Verma add_disk(btt->btt_disk); 1194*5212e11fSVishal Verma 1195*5212e11fSVishal Verma return 0; 1196*5212e11fSVishal Verma } 1197*5212e11fSVishal Verma 1198*5212e11fSVishal Verma static void btt_blk_cleanup(struct btt *btt) 1199*5212e11fSVishal Verma { 1200*5212e11fSVishal Verma del_gendisk(btt->btt_disk); 1201*5212e11fSVishal Verma put_disk(btt->btt_disk); 1202*5212e11fSVishal Verma blk_cleanup_queue(btt->btt_queue); 1203*5212e11fSVishal Verma } 1204*5212e11fSVishal Verma 1205*5212e11fSVishal Verma /** 1206*5212e11fSVishal Verma * btt_init - initialize a block translation table for the given device 1207*5212e11fSVishal Verma * @nd_btt: device with BTT geometry and backing device info 1208*5212e11fSVishal Verma * @rawsize: raw size in bytes of the backing device 1209*5212e11fSVishal Verma * @lbasize: lba size of the backing device 1210*5212e11fSVishal Verma * @uuid: A uuid for the backing device - this is stored on media 1211*5212e11fSVishal Verma * @maxlane: maximum number of parallel requests the device can handle 1212*5212e11fSVishal Verma * 1213*5212e11fSVishal Verma * Initialize a Block Translation Table on a backing device to provide 1214*5212e11fSVishal Verma * single sector power fail atomicity. 1215*5212e11fSVishal Verma * 1216*5212e11fSVishal Verma * Context: 1217*5212e11fSVishal Verma * Might sleep. 1218*5212e11fSVishal Verma * 1219*5212e11fSVishal Verma * Returns: 1220*5212e11fSVishal Verma * Pointer to a new struct btt on success, NULL on failure. 1221*5212e11fSVishal Verma */ 1222*5212e11fSVishal Verma static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, 1223*5212e11fSVishal Verma u32 lbasize, u8 *uuid, struct nd_region *nd_region) 1224*5212e11fSVishal Verma { 1225*5212e11fSVishal Verma int ret; 1226*5212e11fSVishal Verma struct btt *btt; 1227*5212e11fSVishal Verma struct device *dev = &nd_btt->dev; 1228*5212e11fSVishal Verma 1229*5212e11fSVishal Verma btt = kzalloc(sizeof(struct btt), GFP_KERNEL); 1230*5212e11fSVishal Verma if (!btt) 1231*5212e11fSVishal Verma return NULL; 1232*5212e11fSVishal Verma 1233*5212e11fSVishal Verma btt->nd_btt = nd_btt; 1234*5212e11fSVishal Verma btt->rawsize = rawsize; 1235*5212e11fSVishal Verma btt->lbasize = lbasize; 1236*5212e11fSVishal Verma btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); 1237*5212e11fSVishal Verma INIT_LIST_HEAD(&btt->arena_list); 1238*5212e11fSVishal Verma mutex_init(&btt->init_lock); 1239*5212e11fSVishal Verma btt->nd_region = nd_region; 1240*5212e11fSVishal Verma 1241*5212e11fSVishal Verma ret = discover_arenas(btt); 1242*5212e11fSVishal Verma if (ret) { 1243*5212e11fSVishal Verma dev_err(dev, "init: error in arena_discover: %d\n", ret); 1244*5212e11fSVishal Verma goto out_free; 1245*5212e11fSVishal Verma } 1246*5212e11fSVishal Verma 1247*5212e11fSVishal Verma if (btt->init_state != INIT_READY) { 1248*5212e11fSVishal Verma btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + 1249*5212e11fSVishal Verma ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); 1250*5212e11fSVishal Verma dev_dbg(dev, "init: %d arenas for %llu rawsize\n", 1251*5212e11fSVishal Verma btt->num_arenas, rawsize); 1252*5212e11fSVishal Verma 1253*5212e11fSVishal Verma ret = create_arenas(btt); 1254*5212e11fSVishal Verma if (ret) { 1255*5212e11fSVishal Verma dev_info(dev, "init: create_arenas: %d\n", ret); 1256*5212e11fSVishal Verma goto out_free; 1257*5212e11fSVishal Verma } 1258*5212e11fSVishal Verma 1259*5212e11fSVishal Verma ret = btt_meta_init(btt); 1260*5212e11fSVishal Verma if (ret) { 1261*5212e11fSVishal Verma dev_err(dev, "init: error in meta_init: %d\n", ret); 1262*5212e11fSVishal Verma return NULL; 1263*5212e11fSVishal Verma } 1264*5212e11fSVishal Verma } 1265*5212e11fSVishal Verma 1266*5212e11fSVishal Verma ret = btt_blk_init(btt); 1267*5212e11fSVishal Verma if (ret) { 1268*5212e11fSVishal Verma dev_err(dev, "init: error in blk_init: %d\n", ret); 1269*5212e11fSVishal Verma goto out_free; 1270*5212e11fSVishal Verma } 1271*5212e11fSVishal Verma 1272*5212e11fSVishal Verma btt_debugfs_init(btt); 1273*5212e11fSVishal Verma 1274*5212e11fSVishal Verma return btt; 1275*5212e11fSVishal Verma 1276*5212e11fSVishal Verma out_free: 1277*5212e11fSVishal Verma kfree(btt); 1278*5212e11fSVishal Verma return NULL; 1279*5212e11fSVishal Verma } 1280*5212e11fSVishal Verma 1281*5212e11fSVishal Verma /** 1282*5212e11fSVishal Verma * btt_fini - de-initialize a BTT 1283*5212e11fSVishal Verma * @btt: the BTT handle that was generated by btt_init 1284*5212e11fSVishal Verma * 1285*5212e11fSVishal Verma * De-initialize a Block Translation Table on device removal 1286*5212e11fSVishal Verma * 1287*5212e11fSVishal Verma * Context: 1288*5212e11fSVishal Verma * Might sleep. 1289*5212e11fSVishal Verma */ 1290*5212e11fSVishal Verma static void btt_fini(struct btt *btt) 1291*5212e11fSVishal Verma { 1292*5212e11fSVishal Verma if (btt) { 1293*5212e11fSVishal Verma btt_blk_cleanup(btt); 1294*5212e11fSVishal Verma free_arenas(btt); 1295*5212e11fSVishal Verma debugfs_remove_recursive(btt->debugfs_dir); 1296*5212e11fSVishal Verma kfree(btt); 1297*5212e11fSVishal Verma } 1298*5212e11fSVishal Verma } 1299*5212e11fSVishal Verma 1300*5212e11fSVishal Verma int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) 1301*5212e11fSVishal Verma { 1302*5212e11fSVishal Verma struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 1303*5212e11fSVishal Verma struct nd_region *nd_region; 1304*5212e11fSVishal Verma struct btt *btt; 1305*5212e11fSVishal Verma size_t rawsize; 1306*5212e11fSVishal Verma 1307*5212e11fSVishal Verma if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) 1308*5212e11fSVishal Verma return -ENODEV; 1309*5212e11fSVishal Verma 1310*5212e11fSVishal Verma rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; 1311*5212e11fSVishal Verma if (rawsize < ARENA_MIN_SIZE) { 1312*5212e11fSVishal Verma return -ENXIO; 1313*5212e11fSVishal Verma } 1314*5212e11fSVishal Verma nd_region = to_nd_region(nd_btt->dev.parent); 1315*5212e11fSVishal Verma btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, 1316*5212e11fSVishal Verma nd_region); 1317*5212e11fSVishal Verma if (!btt) 1318*5212e11fSVishal Verma return -ENOMEM; 1319*5212e11fSVishal Verma nd_btt->btt = btt; 1320*5212e11fSVishal Verma 1321*5212e11fSVishal Verma return 0; 1322*5212e11fSVishal Verma } 1323*5212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_attach_btt); 1324*5212e11fSVishal Verma 1325*5212e11fSVishal Verma int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) 1326*5212e11fSVishal Verma { 1327*5212e11fSVishal Verma struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 1328*5212e11fSVishal Verma struct btt *btt = nd_btt->btt; 1329*5212e11fSVishal Verma 1330*5212e11fSVishal Verma btt_fini(btt); 1331*5212e11fSVishal Verma nd_btt->btt = NULL; 1332*5212e11fSVishal Verma 1333*5212e11fSVishal Verma return 0; 1334*5212e11fSVishal Verma } 1335*5212e11fSVishal Verma EXPORT_SYMBOL(nvdimm_namespace_detach_btt); 1336*5212e11fSVishal Verma 1337*5212e11fSVishal Verma static int __init nd_btt_init(void) 1338*5212e11fSVishal Verma { 1339*5212e11fSVishal Verma int rc; 1340*5212e11fSVishal Verma 1341*5212e11fSVishal Verma BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); 1342*5212e11fSVishal Verma 1343*5212e11fSVishal Verma btt_major = register_blkdev(0, "btt"); 1344*5212e11fSVishal Verma if (btt_major < 0) 1345*5212e11fSVishal Verma return btt_major; 1346*5212e11fSVishal Verma 1347*5212e11fSVishal Verma debugfs_root = debugfs_create_dir("btt", NULL); 1348*5212e11fSVishal Verma if (IS_ERR_OR_NULL(debugfs_root)) { 1349*5212e11fSVishal Verma rc = -ENXIO; 1350*5212e11fSVishal Verma goto err_debugfs; 1351*5212e11fSVishal Verma } 1352*5212e11fSVishal Verma 1353*5212e11fSVishal Verma return 0; 1354*5212e11fSVishal Verma 1355*5212e11fSVishal Verma err_debugfs: 1356*5212e11fSVishal Verma unregister_blkdev(btt_major, "btt"); 1357*5212e11fSVishal Verma 1358*5212e11fSVishal Verma return rc; 1359*5212e11fSVishal Verma } 1360*5212e11fSVishal Verma 1361*5212e11fSVishal Verma static void __exit nd_btt_exit(void) 1362*5212e11fSVishal Verma { 1363*5212e11fSVishal Verma debugfs_remove_recursive(debugfs_root); 1364*5212e11fSVishal Verma unregister_blkdev(btt_major, "btt"); 1365*5212e11fSVishal Verma } 1366*5212e11fSVishal Verma 1367*5212e11fSVishal Verma MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); 1368*5212e11fSVishal Verma MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>"); 1369*5212e11fSVishal Verma MODULE_LICENSE("GPL v2"); 1370*5212e11fSVishal Verma module_init(nd_btt_init); 1371*5212e11fSVishal Verma module_exit(nd_btt_exit); 1372