1*298fb372SMike Snitzer /* 2*298fb372SMike Snitzer * Copyright (C) 2004-2005 IBM Corp. All Rights Reserved. 3*298fb372SMike Snitzer * Copyright (C) 2006-2009 NEC Corporation. 4*298fb372SMike Snitzer * 5*298fb372SMike Snitzer * dm-queue-length.c 6*298fb372SMike Snitzer * 7*298fb372SMike Snitzer * Module Author: Stefan Bader, IBM 8*298fb372SMike Snitzer * Modified by: Kiyoshi Ueda, NEC 9*298fb372SMike Snitzer * 10*298fb372SMike Snitzer * This file is released under the GPL. 11*298fb372SMike Snitzer * 12*298fb372SMike Snitzer * queue-length path selector - choose a path with the least number of 13*298fb372SMike Snitzer * in-flight I/Os. 14*298fb372SMike Snitzer */ 15*298fb372SMike Snitzer 16*298fb372SMike Snitzer #include "dm.h" 17*298fb372SMike Snitzer #include "dm-path-selector.h" 18*298fb372SMike Snitzer 19*298fb372SMike Snitzer #include <linux/slab.h> 20*298fb372SMike Snitzer #include <linux/ctype.h> 21*298fb372SMike Snitzer #include <linux/errno.h> 22*298fb372SMike Snitzer #include <linux/module.h> 23*298fb372SMike Snitzer #include <linux/atomic.h> 24*298fb372SMike Snitzer 25*298fb372SMike Snitzer #define DM_MSG_PREFIX "multipath queue-length" 26*298fb372SMike Snitzer #define QL_MIN_IO 1 27*298fb372SMike Snitzer #define QL_VERSION "0.2.0" 28*298fb372SMike Snitzer 29*298fb372SMike Snitzer struct selector { 30*298fb372SMike Snitzer struct list_head valid_paths; 31*298fb372SMike Snitzer struct list_head failed_paths; 32*298fb372SMike Snitzer spinlock_t lock; 33*298fb372SMike Snitzer }; 34*298fb372SMike Snitzer 35*298fb372SMike Snitzer struct path_info { 36*298fb372SMike Snitzer struct list_head list; 37*298fb372SMike Snitzer struct dm_path *path; 38*298fb372SMike Snitzer unsigned repeat_count; 39*298fb372SMike Snitzer atomic_t qlen; /* the number of in-flight I/Os */ 40*298fb372SMike Snitzer }; 41*298fb372SMike Snitzer 42*298fb372SMike Snitzer static struct selector *alloc_selector(void) 43*298fb372SMike Snitzer { 44*298fb372SMike Snitzer struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); 45*298fb372SMike Snitzer 46*298fb372SMike Snitzer if (s) { 47*298fb372SMike Snitzer INIT_LIST_HEAD(&s->valid_paths); 48*298fb372SMike Snitzer INIT_LIST_HEAD(&s->failed_paths); 49*298fb372SMike Snitzer spin_lock_init(&s->lock); 50*298fb372SMike Snitzer } 51*298fb372SMike Snitzer 52*298fb372SMike Snitzer return s; 53*298fb372SMike Snitzer } 54*298fb372SMike Snitzer 55*298fb372SMike Snitzer static int ql_create(struct path_selector *ps, unsigned argc, char **argv) 56*298fb372SMike Snitzer { 57*298fb372SMike Snitzer struct selector *s = alloc_selector(); 58*298fb372SMike Snitzer 59*298fb372SMike Snitzer if (!s) 60*298fb372SMike Snitzer return -ENOMEM; 61*298fb372SMike Snitzer 62*298fb372SMike Snitzer ps->context = s; 63*298fb372SMike Snitzer return 0; 64*298fb372SMike Snitzer } 65*298fb372SMike Snitzer 66*298fb372SMike Snitzer static void ql_free_paths(struct list_head *paths) 67*298fb372SMike Snitzer { 68*298fb372SMike Snitzer struct path_info *pi, *next; 69*298fb372SMike Snitzer 70*298fb372SMike Snitzer list_for_each_entry_safe(pi, next, paths, list) { 71*298fb372SMike Snitzer list_del(&pi->list); 72*298fb372SMike Snitzer kfree(pi); 73*298fb372SMike Snitzer } 74*298fb372SMike Snitzer } 75*298fb372SMike Snitzer 76*298fb372SMike Snitzer static void ql_destroy(struct path_selector *ps) 77*298fb372SMike Snitzer { 78*298fb372SMike Snitzer struct selector *s = ps->context; 79*298fb372SMike Snitzer 80*298fb372SMike Snitzer ql_free_paths(&s->valid_paths); 81*298fb372SMike Snitzer ql_free_paths(&s->failed_paths); 82*298fb372SMike Snitzer kfree(s); 83*298fb372SMike Snitzer ps->context = NULL; 84*298fb372SMike Snitzer } 85*298fb372SMike Snitzer 86*298fb372SMike Snitzer static int ql_status(struct path_selector *ps, struct dm_path *path, 87*298fb372SMike Snitzer status_type_t type, char *result, unsigned maxlen) 88*298fb372SMike Snitzer { 89*298fb372SMike Snitzer unsigned sz = 0; 90*298fb372SMike Snitzer struct path_info *pi; 91*298fb372SMike Snitzer 92*298fb372SMike Snitzer /* When called with NULL path, return selector status/args. */ 93*298fb372SMike Snitzer if (!path) 94*298fb372SMike Snitzer DMEMIT("0 "); 95*298fb372SMike Snitzer else { 96*298fb372SMike Snitzer pi = path->pscontext; 97*298fb372SMike Snitzer 98*298fb372SMike Snitzer switch (type) { 99*298fb372SMike Snitzer case STATUSTYPE_INFO: 100*298fb372SMike Snitzer DMEMIT("%d ", atomic_read(&pi->qlen)); 101*298fb372SMike Snitzer break; 102*298fb372SMike Snitzer case STATUSTYPE_TABLE: 103*298fb372SMike Snitzer DMEMIT("%u ", pi->repeat_count); 104*298fb372SMike Snitzer break; 105*298fb372SMike Snitzer } 106*298fb372SMike Snitzer } 107*298fb372SMike Snitzer 108*298fb372SMike Snitzer return sz; 109*298fb372SMike Snitzer } 110*298fb372SMike Snitzer 111*298fb372SMike Snitzer static int ql_add_path(struct path_selector *ps, struct dm_path *path, 112*298fb372SMike Snitzer int argc, char **argv, char **error) 113*298fb372SMike Snitzer { 114*298fb372SMike Snitzer struct selector *s = ps->context; 115*298fb372SMike Snitzer struct path_info *pi; 116*298fb372SMike Snitzer unsigned repeat_count = QL_MIN_IO; 117*298fb372SMike Snitzer char dummy; 118*298fb372SMike Snitzer unsigned long flags; 119*298fb372SMike Snitzer 120*298fb372SMike Snitzer /* 121*298fb372SMike Snitzer * Arguments: [<repeat_count>] 122*298fb372SMike Snitzer * <repeat_count>: The number of I/Os before switching path. 123*298fb372SMike Snitzer * If not given, default (QL_MIN_IO) is used. 124*298fb372SMike Snitzer */ 125*298fb372SMike Snitzer if (argc > 1) { 126*298fb372SMike Snitzer *error = "queue-length ps: incorrect number of arguments"; 127*298fb372SMike Snitzer return -EINVAL; 128*298fb372SMike Snitzer } 129*298fb372SMike Snitzer 130*298fb372SMike Snitzer if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { 131*298fb372SMike Snitzer *error = "queue-length ps: invalid repeat count"; 132*298fb372SMike Snitzer return -EINVAL; 133*298fb372SMike Snitzer } 134*298fb372SMike Snitzer 135*298fb372SMike Snitzer if (repeat_count > 1) { 136*298fb372SMike Snitzer DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); 137*298fb372SMike Snitzer repeat_count = 1; 138*298fb372SMike Snitzer } 139*298fb372SMike Snitzer 140*298fb372SMike Snitzer /* Allocate the path information structure */ 141*298fb372SMike Snitzer pi = kmalloc(sizeof(*pi), GFP_KERNEL); 142*298fb372SMike Snitzer if (!pi) { 143*298fb372SMike Snitzer *error = "queue-length ps: Error allocating path information"; 144*298fb372SMike Snitzer return -ENOMEM; 145*298fb372SMike Snitzer } 146*298fb372SMike Snitzer 147*298fb372SMike Snitzer pi->path = path; 148*298fb372SMike Snitzer pi->repeat_count = repeat_count; 149*298fb372SMike Snitzer atomic_set(&pi->qlen, 0); 150*298fb372SMike Snitzer 151*298fb372SMike Snitzer path->pscontext = pi; 152*298fb372SMike Snitzer 153*298fb372SMike Snitzer spin_lock_irqsave(&s->lock, flags); 154*298fb372SMike Snitzer list_add_tail(&pi->list, &s->valid_paths); 155*298fb372SMike Snitzer spin_unlock_irqrestore(&s->lock, flags); 156*298fb372SMike Snitzer 157*298fb372SMike Snitzer return 0; 158*298fb372SMike Snitzer } 159*298fb372SMike Snitzer 160*298fb372SMike Snitzer static void ql_fail_path(struct path_selector *ps, struct dm_path *path) 161*298fb372SMike Snitzer { 162*298fb372SMike Snitzer struct selector *s = ps->context; 163*298fb372SMike Snitzer struct path_info *pi = path->pscontext; 164*298fb372SMike Snitzer unsigned long flags; 165*298fb372SMike Snitzer 166*298fb372SMike Snitzer spin_lock_irqsave(&s->lock, flags); 167*298fb372SMike Snitzer list_move(&pi->list, &s->failed_paths); 168*298fb372SMike Snitzer spin_unlock_irqrestore(&s->lock, flags); 169*298fb372SMike Snitzer } 170*298fb372SMike Snitzer 171*298fb372SMike Snitzer static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path) 172*298fb372SMike Snitzer { 173*298fb372SMike Snitzer struct selector *s = ps->context; 174*298fb372SMike Snitzer struct path_info *pi = path->pscontext; 175*298fb372SMike Snitzer unsigned long flags; 176*298fb372SMike Snitzer 177*298fb372SMike Snitzer spin_lock_irqsave(&s->lock, flags); 178*298fb372SMike Snitzer list_move_tail(&pi->list, &s->valid_paths); 179*298fb372SMike Snitzer spin_unlock_irqrestore(&s->lock, flags); 180*298fb372SMike Snitzer 181*298fb372SMike Snitzer return 0; 182*298fb372SMike Snitzer } 183*298fb372SMike Snitzer 184*298fb372SMike Snitzer /* 185*298fb372SMike Snitzer * Select a path having the minimum number of in-flight I/Os 186*298fb372SMike Snitzer */ 187*298fb372SMike Snitzer static struct dm_path *ql_select_path(struct path_selector *ps, size_t nr_bytes) 188*298fb372SMike Snitzer { 189*298fb372SMike Snitzer struct selector *s = ps->context; 190*298fb372SMike Snitzer struct path_info *pi = NULL, *best = NULL; 191*298fb372SMike Snitzer struct dm_path *ret = NULL; 192*298fb372SMike Snitzer unsigned long flags; 193*298fb372SMike Snitzer 194*298fb372SMike Snitzer spin_lock_irqsave(&s->lock, flags); 195*298fb372SMike Snitzer if (list_empty(&s->valid_paths)) 196*298fb372SMike Snitzer goto out; 197*298fb372SMike Snitzer 198*298fb372SMike Snitzer list_for_each_entry(pi, &s->valid_paths, list) { 199*298fb372SMike Snitzer if (!best || 200*298fb372SMike Snitzer (atomic_read(&pi->qlen) < atomic_read(&best->qlen))) 201*298fb372SMike Snitzer best = pi; 202*298fb372SMike Snitzer 203*298fb372SMike Snitzer if (!atomic_read(&best->qlen)) 204*298fb372SMike Snitzer break; 205*298fb372SMike Snitzer } 206*298fb372SMike Snitzer 207*298fb372SMike Snitzer if (!best) 208*298fb372SMike Snitzer goto out; 209*298fb372SMike Snitzer 210*298fb372SMike Snitzer /* Move most recently used to least preferred to evenly balance. */ 211*298fb372SMike Snitzer list_move_tail(&best->list, &s->valid_paths); 212*298fb372SMike Snitzer 213*298fb372SMike Snitzer ret = best->path; 214*298fb372SMike Snitzer out: 215*298fb372SMike Snitzer spin_unlock_irqrestore(&s->lock, flags); 216*298fb372SMike Snitzer return ret; 217*298fb372SMike Snitzer } 218*298fb372SMike Snitzer 219*298fb372SMike Snitzer static int ql_start_io(struct path_selector *ps, struct dm_path *path, 220*298fb372SMike Snitzer size_t nr_bytes) 221*298fb372SMike Snitzer { 222*298fb372SMike Snitzer struct path_info *pi = path->pscontext; 223*298fb372SMike Snitzer 224*298fb372SMike Snitzer atomic_inc(&pi->qlen); 225*298fb372SMike Snitzer 226*298fb372SMike Snitzer return 0; 227*298fb372SMike Snitzer } 228*298fb372SMike Snitzer 229*298fb372SMike Snitzer static int ql_end_io(struct path_selector *ps, struct dm_path *path, 230*298fb372SMike Snitzer size_t nr_bytes, u64 start_time) 231*298fb372SMike Snitzer { 232*298fb372SMike Snitzer struct path_info *pi = path->pscontext; 233*298fb372SMike Snitzer 234*298fb372SMike Snitzer atomic_dec(&pi->qlen); 235*298fb372SMike Snitzer 236*298fb372SMike Snitzer return 0; 237*298fb372SMike Snitzer } 238*298fb372SMike Snitzer 239*298fb372SMike Snitzer static struct path_selector_type ql_ps = { 240*298fb372SMike Snitzer .name = "queue-length", 241*298fb372SMike Snitzer .module = THIS_MODULE, 242*298fb372SMike Snitzer .table_args = 1, 243*298fb372SMike Snitzer .info_args = 1, 244*298fb372SMike Snitzer .create = ql_create, 245*298fb372SMike Snitzer .destroy = ql_destroy, 246*298fb372SMike Snitzer .status = ql_status, 247*298fb372SMike Snitzer .add_path = ql_add_path, 248*298fb372SMike Snitzer .fail_path = ql_fail_path, 249*298fb372SMike Snitzer .reinstate_path = ql_reinstate_path, 250*298fb372SMike Snitzer .select_path = ql_select_path, 251*298fb372SMike Snitzer .start_io = ql_start_io, 252*298fb372SMike Snitzer .end_io = ql_end_io, 253*298fb372SMike Snitzer }; 254*298fb372SMike Snitzer 255*298fb372SMike Snitzer static int __init dm_ql_init(void) 256*298fb372SMike Snitzer { 257*298fb372SMike Snitzer int r = dm_register_path_selector(&ql_ps); 258*298fb372SMike Snitzer 259*298fb372SMike Snitzer if (r < 0) 260*298fb372SMike Snitzer DMERR("register failed %d", r); 261*298fb372SMike Snitzer 262*298fb372SMike Snitzer DMINFO("version " QL_VERSION " loaded"); 263*298fb372SMike Snitzer 264*298fb372SMike Snitzer return r; 265*298fb372SMike Snitzer } 266*298fb372SMike Snitzer 267*298fb372SMike Snitzer static void __exit dm_ql_exit(void) 268*298fb372SMike Snitzer { 269*298fb372SMike Snitzer int r = dm_unregister_path_selector(&ql_ps); 270*298fb372SMike Snitzer 271*298fb372SMike Snitzer if (r < 0) 272*298fb372SMike Snitzer DMERR("unregister failed %d", r); 273*298fb372SMike Snitzer } 274*298fb372SMike Snitzer 275*298fb372SMike Snitzer module_init(dm_ql_init); 276*298fb372SMike Snitzer module_exit(dm_ql_exit); 277*298fb372SMike Snitzer 278*298fb372SMike Snitzer MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>"); 279*298fb372SMike Snitzer MODULE_DESCRIPTION( 280*298fb372SMike Snitzer "(C) Copyright IBM Corp. 2004,2005 All Rights Reserved.\n" 281*298fb372SMike Snitzer DM_NAME " path selector to balance the number of in-flight I/Os" 282*298fb372SMike Snitzer ); 283*298fb372SMike Snitzer MODULE_LICENSE("GPL"); 284