1 /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoedev.c 4 * AoE device utility functions; maintains device list. 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/netdevice.h> 10 #include <linux/delay.h> 11 #include <linux/slab.h> 12 #include <linux/bitmap.h> 13 #include <linux/kdev_t.h> 14 #include <linux/moduleparam.h> 15 #include "aoe.h" 16 17 static void dummy_timer(ulong); 18 static void aoedev_freedev(struct aoedev *); 19 static void freetgt(struct aoedev *d, struct aoetgt *t); 20 static void skbpoolfree(struct aoedev *d); 21 22 static int aoe_dyndevs = 1; 23 module_param(aoe_dyndevs, int, 0644); 24 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); 25 26 static struct aoedev *devlist; 27 static DEFINE_SPINLOCK(devlist_lock); 28 29 /* Because some systems will have one, many, or no 30 * - partitions, 31 * - slots per shelf, 32 * - or shelves, 33 * we need some flexibility in the way the minor numbers 34 * are allocated. So they are dynamic. 35 */ 36 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) 37 38 static DEFINE_SPINLOCK(used_minors_lock); 39 static DECLARE_BITMAP(used_minors, N_DEVS); 40 41 static int 42 minor_get_dyn(ulong *sysminor) 43 { 44 ulong flags; 45 ulong n; 46 int error = 0; 47 48 spin_lock_irqsave(&used_minors_lock, flags); 49 n = find_first_zero_bit(used_minors, N_DEVS); 50 if (n < N_DEVS) 51 set_bit(n, used_minors); 52 else 53 error = -1; 54 spin_unlock_irqrestore(&used_minors_lock, flags); 55 56 *sysminor = n * AOE_PARTITIONS; 57 return error; 58 } 59 60 static int 61 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) 62 { 63 ulong flags; 64 ulong n; 65 int error = 0; 66 enum { 67 /* for backwards compatibility when !aoe_dyndevs, 68 * a static number of supported slots per shelf */ 69 NPERSHELF = 16, 70 }; 71 72 if (aoemin >= NPERSHELF) { 73 pr_err("aoe: %s %d slots per shelf\n", 74 "static minor device numbers support only", 75 NPERSHELF); 76 error = -1; 77 goto out; 78 } 79 80 n = aoemaj * NPERSHELF + aoemin; 81 if (n >= N_DEVS) { 82 pr_err("aoe: %s with e%ld.%d\n", 83 "cannot use static minor device numbers", 84 aoemaj, aoemin); 85 error = -1; 86 goto out; 87 } 88 89 spin_lock_irqsave(&used_minors_lock, flags); 90 if (test_bit(n, used_minors)) { 91 pr_err("aoe: %s %lu\n", 92 "existing device already has static minor number", 93 n); 94 error = -1; 95 } else 96 set_bit(n, used_minors); 97 spin_unlock_irqrestore(&used_minors_lock, flags); 98 *sysminor = n; 99 out: 100 return error; 101 } 102 103 static int 104 minor_get(ulong *sysminor, ulong aoemaj, int aoemin) 105 { 106 if (aoe_dyndevs) 107 return minor_get_dyn(sysminor); 108 else 109 return minor_get_static(sysminor, aoemaj, aoemin); 110 } 111 112 static void 113 minor_free(ulong minor) 114 { 115 ulong flags; 116 117 minor /= AOE_PARTITIONS; 118 BUG_ON(minor >= N_DEVS); 119 120 spin_lock_irqsave(&used_minors_lock, flags); 121 BUG_ON(!test_bit(minor, used_minors)); 122 clear_bit(minor, used_minors); 123 spin_unlock_irqrestore(&used_minors_lock, flags); 124 } 125 126 /* 127 * Users who grab a pointer to the device with aoedev_by_aoeaddr 128 * automatically get a reference count and must be responsible 129 * for performing a aoedev_put. With the addition of async 130 * kthread processing I'm no longer confident that we can 131 * guarantee consistency in the face of device flushes. 132 * 133 * For the time being, we only bother to add extra references for 134 * frames sitting on the iocq. When the kthreads finish processing 135 * these frames, they will aoedev_put the device. 136 */ 137 138 void 139 aoedev_put(struct aoedev *d) 140 { 141 ulong flags; 142 143 spin_lock_irqsave(&devlist_lock, flags); 144 d->ref--; 145 spin_unlock_irqrestore(&devlist_lock, flags); 146 } 147 148 static void 149 dummy_timer(ulong vp) 150 { 151 struct aoedev *d; 152 153 d = (struct aoedev *)vp; 154 if (d->flags & DEVFL_TKILL) 155 return; 156 d->timer.expires = jiffies + HZ; 157 add_timer(&d->timer); 158 } 159 160 static void 161 aoe_failip(struct aoedev *d) 162 { 163 struct request *rq; 164 struct bio *bio; 165 unsigned long n; 166 167 aoe_failbuf(d, d->ip.buf); 168 169 rq = d->ip.rq; 170 if (rq == NULL) 171 return; 172 while ((bio = d->ip.nxbio)) { 173 clear_bit(BIO_UPTODATE, &bio->bi_flags); 174 d->ip.nxbio = bio->bi_next; 175 n = (unsigned long) rq->special; 176 rq->special = (void *) --n; 177 } 178 if ((unsigned long) rq->special == 0) 179 aoe_end_request(d, rq, 0); 180 } 181 182 static void 183 downdev_frame(struct list_head *pos) 184 { 185 struct frame *f; 186 187 f = list_entry(pos, struct frame, head); 188 list_del(pos); 189 if (f->buf) { 190 f->buf->nframesout--; 191 aoe_failbuf(f->t->d, f->buf); 192 } 193 aoe_freetframe(f); 194 } 195 196 void 197 aoedev_downdev(struct aoedev *d) 198 { 199 struct aoetgt *t, **tt, **te; 200 struct list_head *head, *pos, *nx; 201 struct request *rq; 202 int i; 203 204 d->flags &= ~DEVFL_UP; 205 206 /* clean out active and to-be-retransmitted buffers */ 207 for (i = 0; i < NFACTIVE; i++) { 208 head = &d->factive[i]; 209 list_for_each_safe(pos, nx, head) 210 downdev_frame(pos); 211 } 212 head = &d->rexmitq; 213 list_for_each_safe(pos, nx, head) 214 downdev_frame(pos); 215 216 /* reset window dressings */ 217 tt = d->targets; 218 te = tt + NTARGETS; 219 for (; tt < te && (t = *tt); tt++) { 220 aoecmd_wreset(t); 221 t->nout = 0; 222 } 223 224 /* clean out the in-process request (if any) */ 225 aoe_failip(d); 226 d->htgt = NULL; 227 228 /* fast fail all pending I/O */ 229 if (d->blkq) { 230 while ((rq = blk_peek_request(d->blkq))) { 231 blk_start_request(rq); 232 aoe_end_request(d, rq, 1); 233 } 234 } 235 236 if (d->gd) 237 set_capacity(d->gd, 0); 238 } 239 240 static void 241 aoedev_freedev(struct aoedev *d) 242 { 243 struct aoetgt **t, **e; 244 245 cancel_work_sync(&d->work); 246 if (d->gd) { 247 aoedisk_rm_sysfs(d); 248 del_gendisk(d->gd); 249 put_disk(d->gd); 250 blk_cleanup_queue(d->blkq); 251 } 252 t = d->targets; 253 e = t + NTARGETS; 254 for (; t < e && *t; t++) 255 freetgt(d, *t); 256 if (d->bufpool) 257 mempool_destroy(d->bufpool); 258 skbpoolfree(d); 259 minor_free(d->sysminor); 260 kfree(d); 261 } 262 263 /* return whether the user asked for this particular 264 * device to be flushed 265 */ 266 static int 267 user_req(char *s, size_t slen, struct aoedev *d) 268 { 269 char *p; 270 size_t lim; 271 272 if (!d->gd) 273 return 0; 274 p = strrchr(d->gd->disk_name, '/'); 275 if (!p) 276 p = d->gd->disk_name; 277 else 278 p += 1; 279 lim = sizeof(d->gd->disk_name); 280 lim -= p - d->gd->disk_name; 281 if (slen < lim) 282 lim = slen; 283 284 return !strncmp(s, p, lim); 285 } 286 287 int 288 aoedev_flush(const char __user *str, size_t cnt) 289 { 290 ulong flags; 291 struct aoedev *d, **dd; 292 struct aoedev *rmd = NULL; 293 char buf[16]; 294 int all = 0; 295 int specified = 0; /* flush a specific device */ 296 297 if (cnt >= 3) { 298 if (cnt > sizeof buf) 299 cnt = sizeof buf; 300 if (copy_from_user(buf, str, cnt)) 301 return -EFAULT; 302 all = !strncmp(buf, "all", 3); 303 if (!all) 304 specified = 1; 305 } 306 307 spin_lock_irqsave(&devlist_lock, flags); 308 dd = &devlist; 309 while ((d = *dd)) { 310 spin_lock(&d->lock); 311 if (specified) { 312 if (!user_req(buf, cnt, d)) 313 goto skip; 314 } else if ((!all && (d->flags & DEVFL_UP)) 315 || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 316 || d->nopen 317 || d->ref) 318 goto skip; 319 320 *dd = d->next; 321 aoedev_downdev(d); 322 d->flags |= DEVFL_TKILL; 323 spin_unlock(&d->lock); 324 d->next = rmd; 325 rmd = d; 326 continue; 327 skip: 328 spin_unlock(&d->lock); 329 dd = &d->next; 330 } 331 spin_unlock_irqrestore(&devlist_lock, flags); 332 while ((d = rmd)) { 333 rmd = d->next; 334 del_timer_sync(&d->timer); 335 aoedev_freedev(d); /* must be able to sleep */ 336 } 337 return 0; 338 } 339 340 /* This has been confirmed to occur once with Tms=3*1000 due to the 341 * driver changing link and not processing its transmit ring. The 342 * problem is hard enough to solve by returning an error that I'm 343 * still punting on "solving" this. 344 */ 345 static void 346 skbfree(struct sk_buff *skb) 347 { 348 enum { Sms = 250, Tms = 30 * 1000}; 349 int i = Tms / Sms; 350 351 if (skb == NULL) 352 return; 353 while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) 354 msleep(Sms); 355 if (i < 0) { 356 printk(KERN_ERR 357 "aoe: %s holds ref: %s\n", 358 skb->dev ? skb->dev->name : "netif", 359 "cannot free skb -- memory leaked."); 360 return; 361 } 362 skb->truesize -= skb->data_len; 363 skb_shinfo(skb)->nr_frags = skb->data_len = 0; 364 skb_trim(skb, 0); 365 dev_kfree_skb(skb); 366 } 367 368 static void 369 skbpoolfree(struct aoedev *d) 370 { 371 struct sk_buff *skb, *tmp; 372 373 skb_queue_walk_safe(&d->skbpool, skb, tmp) 374 skbfree(skb); 375 376 __skb_queue_head_init(&d->skbpool); 377 } 378 379 /* find it or allocate it */ 380 struct aoedev * 381 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) 382 { 383 struct aoedev *d; 384 int i; 385 ulong flags; 386 ulong sysminor; 387 388 spin_lock_irqsave(&devlist_lock, flags); 389 390 for (d=devlist; d; d=d->next) 391 if (d->aoemajor == maj && d->aoeminor == min) { 392 d->ref++; 393 break; 394 } 395 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) 396 goto out; 397 d = kcalloc(1, sizeof *d, GFP_ATOMIC); 398 if (!d) 399 goto out; 400 INIT_WORK(&d->work, aoecmd_sleepwork); 401 spin_lock_init(&d->lock); 402 skb_queue_head_init(&d->skbpool); 403 init_timer(&d->timer); 404 d->timer.data = (ulong) d; 405 d->timer.function = dummy_timer; 406 d->timer.expires = jiffies + HZ; 407 add_timer(&d->timer); 408 d->bufpool = NULL; /* defer to aoeblk_gdalloc */ 409 d->tgt = d->targets; 410 d->ref = 1; 411 for (i = 0; i < NFACTIVE; i++) 412 INIT_LIST_HEAD(&d->factive[i]); 413 INIT_LIST_HEAD(&d->rexmitq); 414 d->sysminor = sysminor; 415 d->aoemajor = maj; 416 d->aoeminor = min; 417 d->rttavg = RTTAVG_INIT; 418 d->rttdev = RTTDEV_INIT; 419 d->next = devlist; 420 devlist = d; 421 out: 422 spin_unlock_irqrestore(&devlist_lock, flags); 423 return d; 424 } 425 426 static void 427 freetgt(struct aoedev *d, struct aoetgt *t) 428 { 429 struct frame *f; 430 struct list_head *pos, *nx, *head; 431 struct aoeif *ifp; 432 433 for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { 434 if (!ifp->nd) 435 break; 436 dev_put(ifp->nd); 437 } 438 439 head = &t->ffree; 440 list_for_each_safe(pos, nx, head) { 441 list_del(pos); 442 f = list_entry(pos, struct frame, head); 443 skbfree(f->skb); 444 kfree(f); 445 } 446 kfree(t); 447 } 448 449 void 450 aoedev_exit(void) 451 { 452 struct aoedev *d; 453 ulong flags; 454 455 aoe_flush_iocq(); 456 while ((d = devlist)) { 457 devlist = d->next; 458 459 spin_lock_irqsave(&d->lock, flags); 460 aoedev_downdev(d); 461 d->flags |= DEVFL_TKILL; 462 spin_unlock_irqrestore(&d->lock, flags); 463 464 del_timer_sync(&d->timer); 465 aoedev_freedev(d); 466 } 467 } 468 469 int __init 470 aoedev_init(void) 471 { 472 return 0; 473 } 474