xref: /openbmc/linux/net/ceph/ceph_common.c (revision 565d76cb)
1 
2 #include <linux/ceph/ceph_debug.h>
3 #include <linux/backing-dev.h>
4 #include <linux/ctype.h>
5 #include <linux/fs.h>
6 #include <linux/inet.h>
7 #include <linux/in6.h>
8 #include <linux/module.h>
9 #include <linux/mount.h>
10 #include <linux/parser.h>
11 #include <linux/sched.h>
12 #include <linux/seq_file.h>
13 #include <linux/slab.h>
14 #include <linux/statfs.h>
15 #include <linux/string.h>
16 
17 
18 #include <linux/ceph/libceph.h>
19 #include <linux/ceph/debugfs.h>
20 #include <linux/ceph/decode.h>
21 #include <linux/ceph/mon_client.h>
22 #include <linux/ceph/auth.h>
23 
24 
25 
26 /*
27  * find filename portion of a path (/foo/bar/baz -> baz)
28  */
29 const char *ceph_file_part(const char *s, int len)
30 {
31 	const char *e = s + len;
32 
33 	while (e != s && *(e-1) != '/')
34 		e--;
35 	return e;
36 }
37 EXPORT_SYMBOL(ceph_file_part);
38 
39 const char *ceph_msg_type_name(int type)
40 {
41 	switch (type) {
42 	case CEPH_MSG_SHUTDOWN: return "shutdown";
43 	case CEPH_MSG_PING: return "ping";
44 	case CEPH_MSG_AUTH: return "auth";
45 	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
46 	case CEPH_MSG_MON_MAP: return "mon_map";
47 	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
48 	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
49 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
50 	case CEPH_MSG_STATFS: return "statfs";
51 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
52 	case CEPH_MSG_MDS_MAP: return "mds_map";
53 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
54 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
55 	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
56 	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
57 	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
58 	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
59 	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
60 	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
61 	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
62 	case CEPH_MSG_OSD_MAP: return "osd_map";
63 	case CEPH_MSG_OSD_OP: return "osd_op";
64 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
65 	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
66 	default: return "unknown";
67 	}
68 }
69 EXPORT_SYMBOL(ceph_msg_type_name);
70 
71 /*
72  * Initially learn our fsid, or verify an fsid matches.
73  */
74 int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
75 {
76 	if (client->have_fsid) {
77 		if (ceph_fsid_compare(&client->fsid, fsid)) {
78 			pr_err("bad fsid, had %pU got %pU",
79 			       &client->fsid, fsid);
80 			return -1;
81 		}
82 	} else {
83 		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
84 		memcpy(&client->fsid, fsid, sizeof(*fsid));
85 		ceph_debugfs_client_init(client);
86 		client->have_fsid = true;
87 	}
88 	return 0;
89 }
90 EXPORT_SYMBOL(ceph_check_fsid);
91 
92 static int strcmp_null(const char *s1, const char *s2)
93 {
94 	if (!s1 && !s2)
95 		return 0;
96 	if (s1 && !s2)
97 		return -1;
98 	if (!s1 && s2)
99 		return 1;
100 	return strcmp(s1, s2);
101 }
102 
103 int ceph_compare_options(struct ceph_options *new_opt,
104 			 struct ceph_client *client)
105 {
106 	struct ceph_options *opt1 = new_opt;
107 	struct ceph_options *opt2 = client->options;
108 	int ofs = offsetof(struct ceph_options, mon_addr);
109 	int i;
110 	int ret;
111 
112 	ret = memcmp(opt1, opt2, ofs);
113 	if (ret)
114 		return ret;
115 
116 	ret = strcmp_null(opt1->name, opt2->name);
117 	if (ret)
118 		return ret;
119 
120 	ret = strcmp_null(opt1->secret, opt2->secret);
121 	if (ret)
122 		return ret;
123 
124 	/* any matching mon ip implies a match */
125 	for (i = 0; i < opt1->num_mon; i++) {
126 		if (ceph_monmap_contains(client->monc.monmap,
127 				 &opt1->mon_addr[i]))
128 			return 0;
129 	}
130 	return -1;
131 }
132 EXPORT_SYMBOL(ceph_compare_options);
133 
134 
135 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
136 {
137 	int i = 0;
138 	char tmp[3];
139 	int err = -EINVAL;
140 	int d;
141 
142 	dout("parse_fsid '%s'\n", str);
143 	tmp[2] = 0;
144 	while (*str && i < 16) {
145 		if (ispunct(*str)) {
146 			str++;
147 			continue;
148 		}
149 		if (!isxdigit(str[0]) || !isxdigit(str[1]))
150 			break;
151 		tmp[0] = str[0];
152 		tmp[1] = str[1];
153 		if (sscanf(tmp, "%x", &d) < 1)
154 			break;
155 		fsid->fsid[i] = d & 0xff;
156 		i++;
157 		str += 2;
158 	}
159 
160 	if (i == 16)
161 		err = 0;
162 	dout("parse_fsid ret %d got fsid %pU", err, fsid);
163 	return err;
164 }
165 
166 /*
167  * ceph options
168  */
169 enum {
170 	Opt_osdtimeout,
171 	Opt_osdkeepalivetimeout,
172 	Opt_mount_timeout,
173 	Opt_osd_idle_ttl,
174 	Opt_last_int,
175 	/* int args above */
176 	Opt_fsid,
177 	Opt_name,
178 	Opt_secret,
179 	Opt_ip,
180 	Opt_last_string,
181 	/* string args above */
182 	Opt_noshare,
183 	Opt_nocrc,
184 };
185 
186 static match_table_t opt_tokens = {
187 	{Opt_osdtimeout, "osdtimeout=%d"},
188 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
189 	{Opt_mount_timeout, "mount_timeout=%d"},
190 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
191 	/* int args above */
192 	{Opt_fsid, "fsid=%s"},
193 	{Opt_name, "name=%s"},
194 	{Opt_secret, "secret=%s"},
195 	{Opt_ip, "ip=%s"},
196 	/* string args above */
197 	{Opt_noshare, "noshare"},
198 	{Opt_nocrc, "nocrc"},
199 	{-1, NULL}
200 };
201 
202 void ceph_destroy_options(struct ceph_options *opt)
203 {
204 	dout("destroy_options %p\n", opt);
205 	kfree(opt->name);
206 	kfree(opt->secret);
207 	kfree(opt);
208 }
209 EXPORT_SYMBOL(ceph_destroy_options);
210 
211 int ceph_parse_options(struct ceph_options **popt, char *options,
212 		       const char *dev_name, const char *dev_name_end,
213 		       int (*parse_extra_token)(char *c, void *private),
214 		       void *private)
215 {
216 	struct ceph_options *opt;
217 	const char *c;
218 	int err = -ENOMEM;
219 	substring_t argstr[MAX_OPT_ARGS];
220 
221 	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
222 	if (!opt)
223 		return err;
224 	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
225 				GFP_KERNEL);
226 	if (!opt->mon_addr)
227 		goto out;
228 
229 	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
230 	     dev_name);
231 
232 	/* start with defaults */
233 	opt->flags = CEPH_OPT_DEFAULT;
234 	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
235 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
236 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
237 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
238 
239 	/* get mon ip(s) */
240 	/* ip1[:port1][,ip2[:port2]...] */
241 	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
242 			     CEPH_MAX_MON, &opt->num_mon);
243 	if (err < 0)
244 		goto out;
245 
246 	/* parse mount options */
247 	while ((c = strsep(&options, ",")) != NULL) {
248 		int token, intval, ret;
249 		if (!*c)
250 			continue;
251 		err = -EINVAL;
252 		token = match_token((char *)c, opt_tokens, argstr);
253 		if (token < 0 && parse_extra_token) {
254 			/* extra? */
255 			err = parse_extra_token((char *)c, private);
256 			if (err < 0) {
257 				pr_err("bad option at '%s'\n", c);
258 				goto out;
259 			}
260 			continue;
261 		}
262 		if (token < Opt_last_int) {
263 			ret = match_int(&argstr[0], &intval);
264 			if (ret < 0) {
265 				pr_err("bad mount option arg (not int) "
266 				       "at '%s'\n", c);
267 				continue;
268 			}
269 			dout("got int token %d val %d\n", token, intval);
270 		} else if (token > Opt_last_int && token < Opt_last_string) {
271 			dout("got string token %d val %s\n", token,
272 			     argstr[0].from);
273 		} else {
274 			dout("got token %d\n", token);
275 		}
276 		switch (token) {
277 		case Opt_ip:
278 			err = ceph_parse_ips(argstr[0].from,
279 					     argstr[0].to,
280 					     &opt->my_addr,
281 					     1, NULL);
282 			if (err < 0)
283 				goto out;
284 			opt->flags |= CEPH_OPT_MYIP;
285 			break;
286 
287 		case Opt_fsid:
288 			err = parse_fsid(argstr[0].from, &opt->fsid);
289 			if (err == 0)
290 				opt->flags |= CEPH_OPT_FSID;
291 			break;
292 		case Opt_name:
293 			opt->name = kstrndup(argstr[0].from,
294 					      argstr[0].to-argstr[0].from,
295 					      GFP_KERNEL);
296 			break;
297 		case Opt_secret:
298 			opt->secret = kstrndup(argstr[0].from,
299 						argstr[0].to-argstr[0].from,
300 						GFP_KERNEL);
301 			break;
302 
303 			/* misc */
304 		case Opt_osdtimeout:
305 			opt->osd_timeout = intval;
306 			break;
307 		case Opt_osdkeepalivetimeout:
308 			opt->osd_keepalive_timeout = intval;
309 			break;
310 		case Opt_osd_idle_ttl:
311 			opt->osd_idle_ttl = intval;
312 			break;
313 		case Opt_mount_timeout:
314 			opt->mount_timeout = intval;
315 			break;
316 
317 		case Opt_noshare:
318 			opt->flags |= CEPH_OPT_NOSHARE;
319 			break;
320 
321 		case Opt_nocrc:
322 			opt->flags |= CEPH_OPT_NOCRC;
323 			break;
324 
325 		default:
326 			BUG_ON(token);
327 		}
328 	}
329 
330 	/* success */
331 	*popt = opt;
332 	return 0;
333 
334 out:
335 	ceph_destroy_options(opt);
336 	return err;
337 }
338 EXPORT_SYMBOL(ceph_parse_options);
339 
340 u64 ceph_client_id(struct ceph_client *client)
341 {
342 	return client->monc.auth->global_id;
343 }
344 EXPORT_SYMBOL(ceph_client_id);
345 
346 /*
347  * create a fresh client instance
348  */
349 struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
350 {
351 	struct ceph_client *client;
352 	int err = -ENOMEM;
353 
354 	client = kzalloc(sizeof(*client), GFP_KERNEL);
355 	if (client == NULL)
356 		return ERR_PTR(-ENOMEM);
357 
358 	client->private = private;
359 	client->options = opt;
360 
361 	mutex_init(&client->mount_mutex);
362 	init_waitqueue_head(&client->auth_wq);
363 	client->auth_err = 0;
364 
365 	client->extra_mon_dispatch = NULL;
366 	client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
367 	client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
368 
369 	client->msgr = NULL;
370 
371 	/* subsystems */
372 	err = ceph_monc_init(&client->monc, client);
373 	if (err < 0)
374 		goto fail;
375 	err = ceph_osdc_init(&client->osdc, client);
376 	if (err < 0)
377 		goto fail_monc;
378 
379 	return client;
380 
381 fail_monc:
382 	ceph_monc_stop(&client->monc);
383 fail:
384 	kfree(client);
385 	return ERR_PTR(err);
386 }
387 EXPORT_SYMBOL(ceph_create_client);
388 
389 void ceph_destroy_client(struct ceph_client *client)
390 {
391 	dout("destroy_client %p\n", client);
392 
393 	/* unmount */
394 	ceph_osdc_stop(&client->osdc);
395 
396 	/*
397 	 * make sure mds and osd connections close out before destroying
398 	 * the auth module, which is needed to free those connections'
399 	 * ceph_authorizers.
400 	 */
401 	ceph_msgr_flush();
402 
403 	ceph_monc_stop(&client->monc);
404 
405 	ceph_debugfs_client_cleanup(client);
406 
407 	if (client->msgr)
408 		ceph_messenger_destroy(client->msgr);
409 
410 	ceph_destroy_options(client->options);
411 
412 	kfree(client);
413 	dout("destroy_client %p done\n", client);
414 }
415 EXPORT_SYMBOL(ceph_destroy_client);
416 
417 /*
418  * true if we have the mon map (and have thus joined the cluster)
419  */
420 static int have_mon_and_osd_map(struct ceph_client *client)
421 {
422 	return client->monc.monmap && client->monc.monmap->epoch &&
423 	       client->osdc.osdmap && client->osdc.osdmap->epoch;
424 }
425 
426 /*
427  * mount: join the ceph cluster, and open root directory.
428  */
429 int __ceph_open_session(struct ceph_client *client, unsigned long started)
430 {
431 	struct ceph_entity_addr *myaddr = NULL;
432 	int err;
433 	unsigned long timeout = client->options->mount_timeout * HZ;
434 
435 	/* initialize the messenger */
436 	if (client->msgr == NULL) {
437 		if (ceph_test_opt(client, MYIP))
438 			myaddr = &client->options->my_addr;
439 		client->msgr = ceph_messenger_create(myaddr,
440 					client->supported_features,
441 					client->required_features);
442 		if (IS_ERR(client->msgr)) {
443 			client->msgr = NULL;
444 			return PTR_ERR(client->msgr);
445 		}
446 		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
447 	}
448 
449 	/* open session, and wait for mon and osd maps */
450 	err = ceph_monc_open_session(&client->monc);
451 	if (err < 0)
452 		return err;
453 
454 	while (!have_mon_and_osd_map(client)) {
455 		err = -EIO;
456 		if (timeout && time_after_eq(jiffies, started + timeout))
457 			return err;
458 
459 		/* wait */
460 		dout("mount waiting for mon_map\n");
461 		err = wait_event_interruptible_timeout(client->auth_wq,
462 			have_mon_and_osd_map(client) || (client->auth_err < 0),
463 			timeout);
464 		if (err == -EINTR || err == -ERESTARTSYS)
465 			return err;
466 		if (client->auth_err < 0)
467 			return client->auth_err;
468 	}
469 
470 	return 0;
471 }
472 EXPORT_SYMBOL(__ceph_open_session);
473 
474 
475 int ceph_open_session(struct ceph_client *client)
476 {
477 	int ret;
478 	unsigned long started = jiffies;  /* note the start time */
479 
480 	dout("open_session start\n");
481 	mutex_lock(&client->mount_mutex);
482 
483 	ret = __ceph_open_session(client, started);
484 
485 	mutex_unlock(&client->mount_mutex);
486 	return ret;
487 }
488 EXPORT_SYMBOL(ceph_open_session);
489 
490 
491 static int __init init_ceph_lib(void)
492 {
493 	int ret = 0;
494 
495 	ret = ceph_debugfs_init();
496 	if (ret < 0)
497 		goto out;
498 
499 	ret = ceph_msgr_init();
500 	if (ret < 0)
501 		goto out_debugfs;
502 
503 	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
504 		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
505 		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
506 		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
507 
508 	return 0;
509 
510 out_debugfs:
511 	ceph_debugfs_cleanup();
512 out:
513 	return ret;
514 }
515 
516 static void __exit exit_ceph_lib(void)
517 {
518 	dout("exit_ceph_lib\n");
519 	ceph_msgr_exit();
520 	ceph_debugfs_cleanup();
521 }
522 
523 module_init(init_ceph_lib);
524 module_exit(exit_ceph_lib);
525 
526 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
527 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
528 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
529 MODULE_DESCRIPTION("Ceph filesystem for Linux");
530 MODULE_LICENSE("GPL");
531