1298fb372SMike Snitzer // SPDX-License-Identifier: GPL-2.0-only
2298fb372SMike Snitzer /*
3298fb372SMike Snitzer * Copyright (C) 2020 Oracle Corporation
4298fb372SMike Snitzer *
5298fb372SMike Snitzer * Module Author: Mike Christie
6298fb372SMike Snitzer */
7298fb372SMike Snitzer #include "dm-path-selector.h"
8298fb372SMike Snitzer
9298fb372SMike Snitzer #include <linux/device-mapper.h>
10298fb372SMike Snitzer #include <linux/module.h>
11298fb372SMike Snitzer
12298fb372SMike Snitzer #define DM_MSG_PREFIX "multipath io-affinity"
13298fb372SMike Snitzer
14298fb372SMike Snitzer struct path_info {
15298fb372SMike Snitzer struct dm_path *path;
16298fb372SMike Snitzer cpumask_var_t cpumask;
17298fb372SMike Snitzer refcount_t refcount;
18298fb372SMike Snitzer bool failed;
19298fb372SMike Snitzer };
20298fb372SMike Snitzer
21298fb372SMike Snitzer struct selector {
22298fb372SMike Snitzer struct path_info **path_map;
23298fb372SMike Snitzer cpumask_var_t path_mask;
24298fb372SMike Snitzer atomic_t map_misses;
25298fb372SMike Snitzer };
26298fb372SMike Snitzer
ioa_free_path(struct selector * s,unsigned int cpu)27298fb372SMike Snitzer static void ioa_free_path(struct selector *s, unsigned int cpu)
28298fb372SMike Snitzer {
29298fb372SMike Snitzer struct path_info *pi = s->path_map[cpu];
30298fb372SMike Snitzer
31298fb372SMike Snitzer if (!pi)
32298fb372SMike Snitzer return;
33298fb372SMike Snitzer
34298fb372SMike Snitzer if (refcount_dec_and_test(&pi->refcount)) {
35298fb372SMike Snitzer cpumask_clear_cpu(cpu, s->path_mask);
36298fb372SMike Snitzer free_cpumask_var(pi->cpumask);
37298fb372SMike Snitzer kfree(pi);
38298fb372SMike Snitzer
39298fb372SMike Snitzer s->path_map[cpu] = NULL;
40298fb372SMike Snitzer }
41298fb372SMike Snitzer }
42298fb372SMike Snitzer
ioa_add_path(struct path_selector * ps,struct dm_path * path,int argc,char ** argv,char ** error)43298fb372SMike Snitzer static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
44298fb372SMike Snitzer int argc, char **argv, char **error)
45298fb372SMike Snitzer {
46298fb372SMike Snitzer struct selector *s = ps->context;
47298fb372SMike Snitzer struct path_info *pi = NULL;
48298fb372SMike Snitzer unsigned int cpu;
49298fb372SMike Snitzer int ret;
50298fb372SMike Snitzer
51298fb372SMike Snitzer if (argc != 1) {
52298fb372SMike Snitzer *error = "io-affinity ps: invalid number of arguments";
53298fb372SMike Snitzer return -EINVAL;
54298fb372SMike Snitzer }
55298fb372SMike Snitzer
56298fb372SMike Snitzer pi = kzalloc(sizeof(*pi), GFP_KERNEL);
57298fb372SMike Snitzer if (!pi) {
58298fb372SMike Snitzer *error = "io-affinity ps: Error allocating path context";
59298fb372SMike Snitzer return -ENOMEM;
60298fb372SMike Snitzer }
61298fb372SMike Snitzer
62298fb372SMike Snitzer pi->path = path;
63298fb372SMike Snitzer path->pscontext = pi;
64298fb372SMike Snitzer refcount_set(&pi->refcount, 1);
65298fb372SMike Snitzer
66298fb372SMike Snitzer if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) {
67298fb372SMike Snitzer *error = "io-affinity ps: Error allocating cpumask context";
68298fb372SMike Snitzer ret = -ENOMEM;
69298fb372SMike Snitzer goto free_pi;
70298fb372SMike Snitzer }
71298fb372SMike Snitzer
72298fb372SMike Snitzer ret = cpumask_parse(argv[0], pi->cpumask);
73298fb372SMike Snitzer if (ret) {
74298fb372SMike Snitzer *error = "io-affinity ps: invalid cpumask";
75298fb372SMike Snitzer ret = -EINVAL;
76298fb372SMike Snitzer goto free_mask;
77298fb372SMike Snitzer }
78298fb372SMike Snitzer
79298fb372SMike Snitzer for_each_cpu(cpu, pi->cpumask) {
80298fb372SMike Snitzer if (cpu >= nr_cpu_ids) {
81298fb372SMike Snitzer DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u",
82298fb372SMike Snitzer cpu, nr_cpu_ids);
83298fb372SMike Snitzer break;
84298fb372SMike Snitzer }
85298fb372SMike Snitzer
86298fb372SMike Snitzer if (s->path_map[cpu]) {
87298fb372SMike Snitzer DMWARN("CPU mapping for %u exists. Ignoring.", cpu);
88298fb372SMike Snitzer continue;
89298fb372SMike Snitzer }
90298fb372SMike Snitzer
91298fb372SMike Snitzer cpumask_set_cpu(cpu, s->path_mask);
92298fb372SMike Snitzer s->path_map[cpu] = pi;
93298fb372SMike Snitzer refcount_inc(&pi->refcount);
94298fb372SMike Snitzer }
95298fb372SMike Snitzer
96298fb372SMike Snitzer if (refcount_dec_and_test(&pi->refcount)) {
97298fb372SMike Snitzer *error = "io-affinity ps: No new/valid CPU mapping found";
98298fb372SMike Snitzer ret = -EINVAL;
99298fb372SMike Snitzer goto free_mask;
100298fb372SMike Snitzer }
101298fb372SMike Snitzer
102298fb372SMike Snitzer return 0;
103298fb372SMike Snitzer
104298fb372SMike Snitzer free_mask:
105298fb372SMike Snitzer free_cpumask_var(pi->cpumask);
106298fb372SMike Snitzer free_pi:
107298fb372SMike Snitzer kfree(pi);
108298fb372SMike Snitzer return ret;
109298fb372SMike Snitzer }
110298fb372SMike Snitzer
ioa_create(struct path_selector * ps,unsigned int argc,char ** argv)111*86a3238cSHeinz Mauelshagen static int ioa_create(struct path_selector *ps, unsigned int argc, char **argv)
112298fb372SMike Snitzer {
113298fb372SMike Snitzer struct selector *s;
114298fb372SMike Snitzer
115298fb372SMike Snitzer s = kmalloc(sizeof(*s), GFP_KERNEL);
116298fb372SMike Snitzer if (!s)
117298fb372SMike Snitzer return -ENOMEM;
118298fb372SMike Snitzer
119298fb372SMike Snitzer s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
120298fb372SMike Snitzer GFP_KERNEL);
121298fb372SMike Snitzer if (!s->path_map)
122298fb372SMike Snitzer goto free_selector;
123298fb372SMike Snitzer
124298fb372SMike Snitzer if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL))
125298fb372SMike Snitzer goto free_map;
126298fb372SMike Snitzer
127298fb372SMike Snitzer atomic_set(&s->map_misses, 0);
128298fb372SMike Snitzer ps->context = s;
129298fb372SMike Snitzer return 0;
130298fb372SMike Snitzer
131298fb372SMike Snitzer free_map:
132298fb372SMike Snitzer kfree(s->path_map);
133298fb372SMike Snitzer free_selector:
134298fb372SMike Snitzer kfree(s);
135298fb372SMike Snitzer return -ENOMEM;
136298fb372SMike Snitzer }
137298fb372SMike Snitzer
ioa_destroy(struct path_selector * ps)138298fb372SMike Snitzer static void ioa_destroy(struct path_selector *ps)
139298fb372SMike Snitzer {
140298fb372SMike Snitzer struct selector *s = ps->context;
141*86a3238cSHeinz Mauelshagen unsigned int cpu;
142298fb372SMike Snitzer
143298fb372SMike Snitzer for_each_cpu(cpu, s->path_mask)
144298fb372SMike Snitzer ioa_free_path(s, cpu);
145298fb372SMike Snitzer
146298fb372SMike Snitzer free_cpumask_var(s->path_mask);
147298fb372SMike Snitzer kfree(s->path_map);
148298fb372SMike Snitzer kfree(s);
149298fb372SMike Snitzer
150298fb372SMike Snitzer ps->context = NULL;
151298fb372SMike Snitzer }
152298fb372SMike Snitzer
ioa_status(struct path_selector * ps,struct dm_path * path,status_type_t type,char * result,unsigned int maxlen)153298fb372SMike Snitzer static int ioa_status(struct path_selector *ps, struct dm_path *path,
154298fb372SMike Snitzer status_type_t type, char *result, unsigned int maxlen)
155298fb372SMike Snitzer {
156298fb372SMike Snitzer struct selector *s = ps->context;
157298fb372SMike Snitzer struct path_info *pi;
158298fb372SMike Snitzer int sz = 0;
159298fb372SMike Snitzer
160298fb372SMike Snitzer if (!path) {
161298fb372SMike Snitzer DMEMIT("0 ");
162298fb372SMike Snitzer return sz;
163298fb372SMike Snitzer }
164298fb372SMike Snitzer
165298fb372SMike Snitzer switch (type) {
166298fb372SMike Snitzer case STATUSTYPE_INFO:
167298fb372SMike Snitzer DMEMIT("%d ", atomic_read(&s->map_misses));
168298fb372SMike Snitzer break;
169298fb372SMike Snitzer case STATUSTYPE_TABLE:
170298fb372SMike Snitzer pi = path->pscontext;
171298fb372SMike Snitzer DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
172298fb372SMike Snitzer break;
1738ec45662STushar Sugandhi case STATUSTYPE_IMA:
1748ec45662STushar Sugandhi *result = '\0';
1758ec45662STushar Sugandhi break;
176298fb372SMike Snitzer }
177298fb372SMike Snitzer
178298fb372SMike Snitzer return sz;
179298fb372SMike Snitzer }
180298fb372SMike Snitzer
ioa_fail_path(struct path_selector * ps,struct dm_path * p)181298fb372SMike Snitzer static void ioa_fail_path(struct path_selector *ps, struct dm_path *p)
182298fb372SMike Snitzer {
183298fb372SMike Snitzer struct path_info *pi = p->pscontext;
184298fb372SMike Snitzer
185298fb372SMike Snitzer pi->failed = true;
186298fb372SMike Snitzer }
187298fb372SMike Snitzer
ioa_reinstate_path(struct path_selector * ps,struct dm_path * p)188298fb372SMike Snitzer static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p)
189298fb372SMike Snitzer {
190298fb372SMike Snitzer struct path_info *pi = p->pscontext;
191298fb372SMike Snitzer
192298fb372SMike Snitzer pi->failed = false;
193298fb372SMike Snitzer return 0;
194298fb372SMike Snitzer }
195298fb372SMike Snitzer
ioa_select_path(struct path_selector * ps,size_t nr_bytes)196298fb372SMike Snitzer static struct dm_path *ioa_select_path(struct path_selector *ps,
197298fb372SMike Snitzer size_t nr_bytes)
198298fb372SMike Snitzer {
199298fb372SMike Snitzer unsigned int cpu, node;
200298fb372SMike Snitzer struct selector *s = ps->context;
201298fb372SMike Snitzer const struct cpumask *cpumask;
202298fb372SMike Snitzer struct path_info *pi;
203298fb372SMike Snitzer int i;
204298fb372SMike Snitzer
205298fb372SMike Snitzer cpu = get_cpu();
206298fb372SMike Snitzer
207298fb372SMike Snitzer pi = s->path_map[cpu];
208298fb372SMike Snitzer if (pi && !pi->failed)
209298fb372SMike Snitzer goto done;
210298fb372SMike Snitzer
211298fb372SMike Snitzer /*
212298fb372SMike Snitzer * Perf is not optimal, but we at least try the local node then just
213298fb372SMike Snitzer * try not to fail.
214298fb372SMike Snitzer */
215298fb372SMike Snitzer if (!pi)
216298fb372SMike Snitzer atomic_inc(&s->map_misses);
217298fb372SMike Snitzer
218298fb372SMike Snitzer node = cpu_to_node(cpu);
219298fb372SMike Snitzer cpumask = cpumask_of_node(node);
220298fb372SMike Snitzer for_each_cpu(i, cpumask) {
221298fb372SMike Snitzer pi = s->path_map[i];
222298fb372SMike Snitzer if (pi && !pi->failed)
223298fb372SMike Snitzer goto done;
224298fb372SMike Snitzer }
225298fb372SMike Snitzer
226298fb372SMike Snitzer for_each_cpu(i, s->path_mask) {
227298fb372SMike Snitzer pi = s->path_map[i];
228298fb372SMike Snitzer if (pi && !pi->failed)
229298fb372SMike Snitzer goto done;
230298fb372SMike Snitzer }
231298fb372SMike Snitzer pi = NULL;
232298fb372SMike Snitzer
233298fb372SMike Snitzer done:
234298fb372SMike Snitzer put_cpu();
235298fb372SMike Snitzer return pi ? pi->path : NULL;
236298fb372SMike Snitzer }
237298fb372SMike Snitzer
238298fb372SMike Snitzer static struct path_selector_type ioa_ps = {
239298fb372SMike Snitzer .name = "io-affinity",
240298fb372SMike Snitzer .module = THIS_MODULE,
241298fb372SMike Snitzer .table_args = 1,
242298fb372SMike Snitzer .info_args = 1,
243298fb372SMike Snitzer .create = ioa_create,
244298fb372SMike Snitzer .destroy = ioa_destroy,
245298fb372SMike Snitzer .status = ioa_status,
246298fb372SMike Snitzer .add_path = ioa_add_path,
247298fb372SMike Snitzer .fail_path = ioa_fail_path,
248298fb372SMike Snitzer .reinstate_path = ioa_reinstate_path,
249298fb372SMike Snitzer .select_path = ioa_select_path,
250298fb372SMike Snitzer };
251298fb372SMike Snitzer
dm_ioa_init(void)252298fb372SMike Snitzer static int __init dm_ioa_init(void)
253298fb372SMike Snitzer {
254298fb372SMike Snitzer int ret = dm_register_path_selector(&ioa_ps);
255298fb372SMike Snitzer
256298fb372SMike Snitzer if (ret < 0)
257298fb372SMike Snitzer DMERR("register failed %d", ret);
258298fb372SMike Snitzer return ret;
259298fb372SMike Snitzer }
260298fb372SMike Snitzer
dm_ioa_exit(void)261298fb372SMike Snitzer static void __exit dm_ioa_exit(void)
262298fb372SMike Snitzer {
263298fb372SMike Snitzer int ret = dm_unregister_path_selector(&ioa_ps);
264298fb372SMike Snitzer
265298fb372SMike Snitzer if (ret < 0)
266298fb372SMike Snitzer DMERR("unregister failed %d", ret);
267298fb372SMike Snitzer }
268298fb372SMike Snitzer
269298fb372SMike Snitzer module_init(dm_ioa_init);
270298fb372SMike Snitzer module_exit(dm_ioa_exit);
271298fb372SMike Snitzer
272298fb372SMike Snitzer MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on");
273298fb372SMike Snitzer MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>");
274298fb372SMike Snitzer MODULE_LICENSE("GPL");
275