xref: /openbmc/linux/drivers/block/drbd/drbd_nl.c (revision d2999e1b)
1 /*
2    drbd_nl.c
3 
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5 
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9 
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14 
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 
24  */
25 
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40 
41 #include <net/genetlink.h>
42 
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46 
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49 
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53 
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76 
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80 
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83 
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87 	if (genlmsg_reply(skb, info))
88 		printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90 
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92  * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95 	struct nlattr *nla;
96 	int err = -EMSGSIZE;
97 
98 	if (!info || !info[0])
99 		return 0;
100 
101 	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102 	if (!nla)
103 		return err;
104 
105 	err = nla_put_string(skb, T_info_text, info);
106 	if (err) {
107 		nla_nest_cancel(skb, nla);
108 		return err;
109 	} else
110 		nla_nest_end(skb, nla);
111 	return 0;
112 }
113 
114 /* This would be a good candidate for a "pre_doit" hook,
115  * and per-family private info->pointers.
116  * But we need to stay compatible with older kernels.
117  * If it returns successfully, adm_ctx members are valid.
118  *
119  * At this point, we still rely on the global genl_lock().
120  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
121  * to add additional synchronization against object destruction/modification.
122  */
123 #define DRBD_ADM_NEED_MINOR	1
124 #define DRBD_ADM_NEED_RESOURCE	2
125 #define DRBD_ADM_NEED_CONNECTION 4
126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
127 	struct sk_buff *skb, struct genl_info *info, unsigned flags)
128 {
129 	struct drbd_genlmsghdr *d_in = info->userhdr;
130 	const u8 cmd = info->genlhdr->cmd;
131 	int err;
132 
133 	memset(adm_ctx, 0, sizeof(*adm_ctx));
134 
135 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
136 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
137 	       return -EPERM;
138 
139 	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
140 	if (!adm_ctx->reply_skb) {
141 		err = -ENOMEM;
142 		goto fail;
143 	}
144 
145 	adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
146 					info, &drbd_genl_family, 0, cmd);
147 	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
148 	 * but anyways */
149 	if (!adm_ctx->reply_dh) {
150 		err = -ENOMEM;
151 		goto fail;
152 	}
153 
154 	adm_ctx->reply_dh->minor = d_in->minor;
155 	adm_ctx->reply_dh->ret_code = NO_ERROR;
156 
157 	adm_ctx->volume = VOLUME_UNSPECIFIED;
158 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
159 		struct nlattr *nla;
160 		/* parse and validate only */
161 		err = drbd_cfg_context_from_attrs(NULL, info);
162 		if (err)
163 			goto fail;
164 
165 		/* It was present, and valid,
166 		 * copy it over to the reply skb. */
167 		err = nla_put_nohdr(adm_ctx->reply_skb,
168 				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
169 				info->attrs[DRBD_NLA_CFG_CONTEXT]);
170 		if (err)
171 			goto fail;
172 
173 		/* and assign stuff to the adm_ctx */
174 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
175 		if (nla)
176 			adm_ctx->volume = nla_get_u32(nla);
177 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
178 		if (nla)
179 			adm_ctx->resource_name = nla_data(nla);
180 		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
181 		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
182 		if ((adm_ctx->my_addr &&
183 		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
184 		    (adm_ctx->peer_addr &&
185 		     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
186 			err = -EINVAL;
187 			goto fail;
188 		}
189 	}
190 
191 	adm_ctx->minor = d_in->minor;
192 	adm_ctx->device = minor_to_device(d_in->minor);
193 
194 	/* We are protected by the global genl_lock().
195 	 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
196 	 * so make sure this object stays around. */
197 	if (adm_ctx->device)
198 		kref_get(&adm_ctx->device->kref);
199 
200 	if (adm_ctx->resource_name) {
201 		adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
202 	}
203 
204 	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
205 		drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
206 		return ERR_MINOR_INVALID;
207 	}
208 	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
209 		drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
210 		if (adm_ctx->resource_name)
211 			return ERR_RES_NOT_KNOWN;
212 		return ERR_INVALID_REQUEST;
213 	}
214 
215 	if (flags & DRBD_ADM_NEED_CONNECTION) {
216 		if (adm_ctx->resource) {
217 			drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
218 			return ERR_INVALID_REQUEST;
219 		}
220 		if (adm_ctx->device) {
221 			drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
222 			return ERR_INVALID_REQUEST;
223 		}
224 		if (adm_ctx->my_addr && adm_ctx->peer_addr)
225 			adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
226 							  nla_len(adm_ctx->my_addr),
227 							  nla_data(adm_ctx->peer_addr),
228 							  nla_len(adm_ctx->peer_addr));
229 		if (!adm_ctx->connection) {
230 			drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
231 			return ERR_INVALID_REQUEST;
232 		}
233 	}
234 
235 	/* some more paranoia, if the request was over-determined */
236 	if (adm_ctx->device && adm_ctx->resource &&
237 	    adm_ctx->device->resource != adm_ctx->resource) {
238 		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
239 				adm_ctx->minor, adm_ctx->resource->name,
240 				adm_ctx->device->resource->name);
241 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
242 		return ERR_INVALID_REQUEST;
243 	}
244 	if (adm_ctx->device &&
245 	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
246 	    adm_ctx->volume != adm_ctx->device->vnr) {
247 		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
248 				adm_ctx->minor, adm_ctx->volume,
249 				adm_ctx->device->vnr,
250 				adm_ctx->device->resource->name);
251 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
252 		return ERR_INVALID_REQUEST;
253 	}
254 
255 	/* still, provide adm_ctx->resource always, if possible. */
256 	if (!adm_ctx->resource) {
257 		adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
258 			: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
259 		if (adm_ctx->resource)
260 			kref_get(&adm_ctx->resource->kref);
261 	}
262 
263 	return NO_ERROR;
264 
265 fail:
266 	nlmsg_free(adm_ctx->reply_skb);
267 	adm_ctx->reply_skb = NULL;
268 	return err;
269 }
270 
271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
272 	struct genl_info *info, int retcode)
273 {
274 	if (adm_ctx->device) {
275 		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
276 		adm_ctx->device = NULL;
277 	}
278 	if (adm_ctx->connection) {
279 		kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
280 		adm_ctx->connection = NULL;
281 	}
282 	if (adm_ctx->resource) {
283 		kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
284 		adm_ctx->resource = NULL;
285 	}
286 
287 	if (!adm_ctx->reply_skb)
288 		return -ENOMEM;
289 
290 	adm_ctx->reply_dh->ret_code = retcode;
291 	drbd_adm_send_reply(adm_ctx->reply_skb, info);
292 	return 0;
293 }
294 
295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
296 {
297 	char *afs;
298 
299 	/* FIXME: A future version will not allow this case. */
300 	if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
301 		return;
302 
303 	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
304 	case AF_INET6:
305 		afs = "ipv6";
306 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
307 			 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
308 		break;
309 	case AF_INET:
310 		afs = "ipv4";
311 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
312 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
313 		break;
314 	default:
315 		afs = "ssocks";
316 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
317 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
318 	}
319 	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
320 }
321 
322 int drbd_khelper(struct drbd_device *device, char *cmd)
323 {
324 	char *envp[] = { "HOME=/",
325 			"TERM=linux",
326 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
327 			 (char[20]) { }, /* address family */
328 			 (char[60]) { }, /* address */
329 			NULL };
330 	char mb[12];
331 	char *argv[] = {usermode_helper, cmd, mb, NULL };
332 	struct drbd_connection *connection = first_peer_device(device)->connection;
333 	struct sib_info sib;
334 	int ret;
335 
336 	if (current == connection->worker.task)
337 		set_bit(CALLBACK_PENDING, &connection->flags);
338 
339 	snprintf(mb, 12, "minor-%d", device_to_minor(device));
340 	setup_khelper_env(connection, envp);
341 
342 	/* The helper may take some time.
343 	 * write out any unsynced meta data changes now */
344 	drbd_md_sync(device);
345 
346 	drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
347 	sib.sib_reason = SIB_HELPER_PRE;
348 	sib.helper_name = cmd;
349 	drbd_bcast_event(device, &sib);
350 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
351 	if (ret)
352 		drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
353 				usermode_helper, cmd, mb,
354 				(ret >> 8) & 0xff, ret);
355 	else
356 		drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357 				usermode_helper, cmd, mb,
358 				(ret >> 8) & 0xff, ret);
359 	sib.sib_reason = SIB_HELPER_POST;
360 	sib.helper_exit_code = ret;
361 	drbd_bcast_event(device, &sib);
362 
363 	if (current == connection->worker.task)
364 		clear_bit(CALLBACK_PENDING, &connection->flags);
365 
366 	if (ret < 0) /* Ignore any ERRNOs we got. */
367 		ret = 0;
368 
369 	return ret;
370 }
371 
372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
373 {
374 	char *envp[] = { "HOME=/",
375 			"TERM=linux",
376 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
377 			 (char[20]) { }, /* address family */
378 			 (char[60]) { }, /* address */
379 			NULL };
380 	char *resource_name = connection->resource->name;
381 	char *argv[] = {usermode_helper, cmd, resource_name, NULL };
382 	int ret;
383 
384 	setup_khelper_env(connection, envp);
385 	conn_md_sync(connection);
386 
387 	drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
388 	/* TODO: conn_bcast_event() ?? */
389 
390 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
391 	if (ret)
392 		drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
393 			  usermode_helper, cmd, resource_name,
394 			  (ret >> 8) & 0xff, ret);
395 	else
396 		drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397 			  usermode_helper, cmd, resource_name,
398 			  (ret >> 8) & 0xff, ret);
399 	/* TODO: conn_bcast_event() ?? */
400 
401 	if (ret < 0) /* Ignore any ERRNOs we got. */
402 		ret = 0;
403 
404 	return ret;
405 }
406 
407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
408 {
409 	enum drbd_fencing_p fp = FP_NOT_AVAIL;
410 	struct drbd_peer_device *peer_device;
411 	int vnr;
412 
413 	rcu_read_lock();
414 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
415 		struct drbd_device *device = peer_device->device;
416 		if (get_ldev_if_state(device, D_CONSISTENT)) {
417 			struct disk_conf *disk_conf =
418 				rcu_dereference(peer_device->device->ldev->disk_conf);
419 			fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
420 			put_ldev(device);
421 		}
422 	}
423 	rcu_read_unlock();
424 
425 	if (fp == FP_NOT_AVAIL) {
426 		/* IO Suspending works on the whole resource.
427 		   Do it only for one device. */
428 		vnr = 0;
429 		peer_device = idr_get_next(&connection->peer_devices, &vnr);
430 		drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
431 	}
432 
433 	return fp;
434 }
435 
436 bool conn_try_outdate_peer(struct drbd_connection *connection)
437 {
438 	unsigned int connect_cnt;
439 	union drbd_state mask = { };
440 	union drbd_state val = { };
441 	enum drbd_fencing_p fp;
442 	char *ex_to_string;
443 	int r;
444 
445 	spin_lock_irq(&connection->resource->req_lock);
446 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
447 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
448 		spin_unlock_irq(&connection->resource->req_lock);
449 		return false;
450 	}
451 
452 	connect_cnt = connection->connect_cnt;
453 	spin_unlock_irq(&connection->resource->req_lock);
454 
455 	fp = highest_fencing_policy(connection);
456 	switch (fp) {
457 	case FP_NOT_AVAIL:
458 		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
459 		goto out;
460 	case FP_DONT_CARE:
461 		return true;
462 	default: ;
463 	}
464 
465 	r = conn_khelper(connection, "fence-peer");
466 
467 	switch ((r>>8) & 0xff) {
468 	case 3: /* peer is inconsistent */
469 		ex_to_string = "peer is inconsistent or worse";
470 		mask.pdsk = D_MASK;
471 		val.pdsk = D_INCONSISTENT;
472 		break;
473 	case 4: /* peer got outdated, or was already outdated */
474 		ex_to_string = "peer was fenced";
475 		mask.pdsk = D_MASK;
476 		val.pdsk = D_OUTDATED;
477 		break;
478 	case 5: /* peer was down */
479 		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
480 			/* we will(have) create(d) a new UUID anyways... */
481 			ex_to_string = "peer is unreachable, assumed to be dead";
482 			mask.pdsk = D_MASK;
483 			val.pdsk = D_OUTDATED;
484 		} else {
485 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
486 		}
487 		break;
488 	case 6: /* Peer is primary, voluntarily outdate myself.
489 		 * This is useful when an unconnected R_SECONDARY is asked to
490 		 * become R_PRIMARY, but finds the other peer being active. */
491 		ex_to_string = "peer is active";
492 		drbd_warn(connection, "Peer is primary, outdating myself.\n");
493 		mask.disk = D_MASK;
494 		val.disk = D_OUTDATED;
495 		break;
496 	case 7:
497 		if (fp != FP_STONITH)
498 			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
499 		ex_to_string = "peer was stonithed";
500 		mask.pdsk = D_MASK;
501 		val.pdsk = D_OUTDATED;
502 		break;
503 	default:
504 		/* The script is broken ... */
505 		drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
506 		return false; /* Eventually leave IO frozen */
507 	}
508 
509 	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
510 		  (r>>8) & 0xff, ex_to_string);
511 
512  out:
513 
514 	/* Not using
515 	   conn_request_state(connection, mask, val, CS_VERBOSE);
516 	   here, because we might were able to re-establish the connection in the
517 	   meantime. */
518 	spin_lock_irq(&connection->resource->req_lock);
519 	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
520 		if (connection->connect_cnt != connect_cnt)
521 			/* In case the connection was established and droped
522 			   while the fence-peer handler was running, ignore it */
523 			drbd_info(connection, "Ignoring fence-peer exit code\n");
524 		else
525 			_conn_request_state(connection, mask, val, CS_VERBOSE);
526 	}
527 	spin_unlock_irq(&connection->resource->req_lock);
528 
529 	return conn_highest_pdsk(connection) <= D_OUTDATED;
530 }
531 
532 static int _try_outdate_peer_async(void *data)
533 {
534 	struct drbd_connection *connection = (struct drbd_connection *)data;
535 
536 	conn_try_outdate_peer(connection);
537 
538 	kref_put(&connection->kref, drbd_destroy_connection);
539 	return 0;
540 }
541 
542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
543 {
544 	struct task_struct *opa;
545 
546 	kref_get(&connection->kref);
547 	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
548 	if (IS_ERR(opa)) {
549 		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
550 		kref_put(&connection->kref, drbd_destroy_connection);
551 	}
552 }
553 
554 enum drbd_state_rv
555 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
556 {
557 	const int max_tries = 4;
558 	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
559 	struct net_conf *nc;
560 	int try = 0;
561 	int forced = 0;
562 	union drbd_state mask, val;
563 
564 	if (new_role == R_PRIMARY) {
565 		struct drbd_connection *connection;
566 
567 		/* Detect dead peers as soon as possible.  */
568 
569 		rcu_read_lock();
570 		for_each_connection(connection, device->resource)
571 			request_ping(connection);
572 		rcu_read_unlock();
573 	}
574 
575 	mutex_lock(device->state_mutex);
576 
577 	mask.i = 0; mask.role = R_MASK;
578 	val.i  = 0; val.role  = new_role;
579 
580 	while (try++ < max_tries) {
581 		rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
582 
583 		/* in case we first succeeded to outdate,
584 		 * but now suddenly could establish a connection */
585 		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
586 			val.pdsk = 0;
587 			mask.pdsk = 0;
588 			continue;
589 		}
590 
591 		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
592 		    (device->state.disk < D_UP_TO_DATE &&
593 		     device->state.disk >= D_INCONSISTENT)) {
594 			mask.disk = D_MASK;
595 			val.disk  = D_UP_TO_DATE;
596 			forced = 1;
597 			continue;
598 		}
599 
600 		if (rv == SS_NO_UP_TO_DATE_DISK &&
601 		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
602 			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
603 
604 			if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
605 				val.disk = D_UP_TO_DATE;
606 				mask.disk = D_MASK;
607 			}
608 			continue;
609 		}
610 
611 		if (rv == SS_NOTHING_TO_DO)
612 			goto out;
613 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
614 			if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
615 				drbd_warn(device, "Forced into split brain situation!\n");
616 				mask.pdsk = D_MASK;
617 				val.pdsk  = D_OUTDATED;
618 
619 			}
620 			continue;
621 		}
622 		if (rv == SS_TWO_PRIMARIES) {
623 			/* Maybe the peer is detected as dead very soon...
624 			   retry at most once more in this case. */
625 			int timeo;
626 			rcu_read_lock();
627 			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
628 			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
629 			rcu_read_unlock();
630 			schedule_timeout_interruptible(timeo);
631 			if (try < max_tries)
632 				try = max_tries - 1;
633 			continue;
634 		}
635 		if (rv < SS_SUCCESS) {
636 			rv = _drbd_request_state(device, mask, val,
637 						CS_VERBOSE + CS_WAIT_COMPLETE);
638 			if (rv < SS_SUCCESS)
639 				goto out;
640 		}
641 		break;
642 	}
643 
644 	if (rv < SS_SUCCESS)
645 		goto out;
646 
647 	if (forced)
648 		drbd_warn(device, "Forced to consider local data as UpToDate!\n");
649 
650 	/* Wait until nothing is on the fly :) */
651 	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
652 
653 	/* FIXME also wait for all pending P_BARRIER_ACK? */
654 
655 	if (new_role == R_SECONDARY) {
656 		set_disk_ro(device->vdisk, true);
657 		if (get_ldev(device)) {
658 			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
659 			put_ldev(device);
660 		}
661 	} else {
662 		/* Called from drbd_adm_set_role only.
663 		 * We are still holding the conf_update mutex. */
664 		nc = first_peer_device(device)->connection->net_conf;
665 		if (nc)
666 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
667 
668 		set_disk_ro(device->vdisk, false);
669 		if (get_ldev(device)) {
670 			if (((device->state.conn < C_CONNECTED ||
671 			       device->state.pdsk <= D_FAILED)
672 			      && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
673 				drbd_uuid_new_current(device);
674 
675 			device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
676 			put_ldev(device);
677 		}
678 	}
679 
680 	/* writeout of activity log covered areas of the bitmap
681 	 * to stable storage done in after state change already */
682 
683 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
684 		/* if this was forced, we should consider sync */
685 		if (forced)
686 			drbd_send_uuids(first_peer_device(device));
687 		drbd_send_current_state(first_peer_device(device));
688 	}
689 
690 	drbd_md_sync(device);
691 
692 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
693 out:
694 	mutex_unlock(device->state_mutex);
695 	return rv;
696 }
697 
698 static const char *from_attrs_err_to_txt(int err)
699 {
700 	return	err == -ENOMSG ? "required attribute missing" :
701 		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
702 		err == -EEXIST ? "can not change invariant setting" :
703 		"invalid attribute value";
704 }
705 
706 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
707 {
708 	struct drbd_config_context adm_ctx;
709 	struct set_role_parms parms;
710 	int err;
711 	enum drbd_ret_code retcode;
712 
713 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
714 	if (!adm_ctx.reply_skb)
715 		return retcode;
716 	if (retcode != NO_ERROR)
717 		goto out;
718 
719 	memset(&parms, 0, sizeof(parms));
720 	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
721 		err = set_role_parms_from_attrs(&parms, info);
722 		if (err) {
723 			retcode = ERR_MANDATORY_TAG;
724 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
725 			goto out;
726 		}
727 	}
728 	genl_unlock();
729 	mutex_lock(&adm_ctx.resource->adm_mutex);
730 
731 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
732 		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
733 	else
734 		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
735 
736 	mutex_unlock(&adm_ctx.resource->adm_mutex);
737 	genl_lock();
738 out:
739 	drbd_adm_finish(&adm_ctx, info, retcode);
740 	return 0;
741 }
742 
743 /* Initializes the md.*_offset members, so we are able to find
744  * the on disk meta data.
745  *
746  * We currently have two possible layouts:
747  * external:
748  *   |----------- md_size_sect ------------------|
749  *   [ 4k superblock ][ activity log ][  Bitmap  ]
750  *   | al_offset == 8 |
751  *   | bm_offset = al_offset + X      |
752  *  ==> bitmap sectors = md_size_sect - bm_offset
753  *
754  * internal:
755  *            |----------- md_size_sect ------------------|
756  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
757  *                        | al_offset < 0 |
758  *            | bm_offset = al_offset - Y |
759  *  ==> bitmap sectors = Y = al_offset - bm_offset
760  *
761  *  Activity log size used to be fixed 32kB,
762  *  but is about to become configurable.
763  */
764 static void drbd_md_set_sector_offsets(struct drbd_device *device,
765 				       struct drbd_backing_dev *bdev)
766 {
767 	sector_t md_size_sect = 0;
768 	unsigned int al_size_sect = bdev->md.al_size_4k * 8;
769 
770 	bdev->md.md_offset = drbd_md_ss(bdev);
771 
772 	switch (bdev->md.meta_dev_idx) {
773 	default:
774 		/* v07 style fixed size indexed meta data */
775 		bdev->md.md_size_sect = MD_128MB_SECT;
776 		bdev->md.al_offset = MD_4kB_SECT;
777 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
778 		break;
779 	case DRBD_MD_INDEX_FLEX_EXT:
780 		/* just occupy the full device; unit: sectors */
781 		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
782 		bdev->md.al_offset = MD_4kB_SECT;
783 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
784 		break;
785 	case DRBD_MD_INDEX_INTERNAL:
786 	case DRBD_MD_INDEX_FLEX_INT:
787 		/* al size is still fixed */
788 		bdev->md.al_offset = -al_size_sect;
789 		/* we need (slightly less than) ~ this much bitmap sectors: */
790 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
791 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
792 		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
793 		md_size_sect = ALIGN(md_size_sect, 8);
794 
795 		/* plus the "drbd meta data super block",
796 		 * and the activity log; */
797 		md_size_sect += MD_4kB_SECT + al_size_sect;
798 
799 		bdev->md.md_size_sect = md_size_sect;
800 		/* bitmap offset is adjusted by 'super' block size */
801 		bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
802 		break;
803 	}
804 }
805 
806 /* input size is expected to be in KB */
807 char *ppsize(char *buf, unsigned long long size)
808 {
809 	/* Needs 9 bytes at max including trailing NUL:
810 	 * -1ULL ==> "16384 EB" */
811 	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
812 	int base = 0;
813 	while (size >= 10000 && base < sizeof(units)-1) {
814 		/* shift + round */
815 		size = (size >> 10) + !!(size & (1<<9));
816 		base++;
817 	}
818 	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
819 
820 	return buf;
821 }
822 
823 /* there is still a theoretical deadlock when called from receiver
824  * on an D_INCONSISTENT R_PRIMARY:
825  *  remote READ does inc_ap_bio, receiver would need to receive answer
826  *  packet from remote to dec_ap_bio again.
827  *  receiver receive_sizes(), comes here,
828  *  waits for ap_bio_cnt == 0. -> deadlock.
829  * but this cannot happen, actually, because:
830  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
831  *  (not connected, or bad/no disk on peer):
832  *  see drbd_fail_request_early, ap_bio_cnt is zero.
833  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
834  *  peer may not initiate a resize.
835  */
836 /* Note these are not to be confused with
837  * drbd_adm_suspend_io/drbd_adm_resume_io,
838  * which are (sub) state changes triggered by admin (drbdsetup),
839  * and can be long lived.
840  * This changes an device->flag, is triggered by drbd internals,
841  * and should be short-lived. */
842 void drbd_suspend_io(struct drbd_device *device)
843 {
844 	set_bit(SUSPEND_IO, &device->flags);
845 	if (drbd_suspended(device))
846 		return;
847 	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
848 }
849 
850 void drbd_resume_io(struct drbd_device *device)
851 {
852 	clear_bit(SUSPEND_IO, &device->flags);
853 	wake_up(&device->misc_wait);
854 }
855 
856 /**
857  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
858  * @device:	DRBD device.
859  *
860  * Returns 0 on success, negative return values indicate errors.
861  * You should call drbd_md_sync() after calling this function.
862  */
863 enum determine_dev_size
864 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
865 {
866 	sector_t prev_first_sect, prev_size; /* previous meta location */
867 	sector_t la_size_sect, u_size;
868 	struct drbd_md *md = &device->ldev->md;
869 	u32 prev_al_stripe_size_4k;
870 	u32 prev_al_stripes;
871 	sector_t size;
872 	char ppb[10];
873 	void *buffer;
874 
875 	int md_moved, la_size_changed;
876 	enum determine_dev_size rv = DS_UNCHANGED;
877 
878 	/* race:
879 	 * application request passes inc_ap_bio,
880 	 * but then cannot get an AL-reference.
881 	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
882 	 *
883 	 * to avoid that:
884 	 * Suspend IO right here.
885 	 * still lock the act_log to not trigger ASSERTs there.
886 	 */
887 	drbd_suspend_io(device);
888 	buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
889 	if (!buffer) {
890 		drbd_resume_io(device);
891 		return DS_ERROR;
892 	}
893 
894 	/* no wait necessary anymore, actually we could assert that */
895 	wait_event(device->al_wait, lc_try_lock(device->act_log));
896 
897 	prev_first_sect = drbd_md_first_sector(device->ldev);
898 	prev_size = device->ldev->md.md_size_sect;
899 	la_size_sect = device->ldev->md.la_size_sect;
900 
901 	if (rs) {
902 		/* rs is non NULL if we should change the AL layout only */
903 
904 		prev_al_stripes = md->al_stripes;
905 		prev_al_stripe_size_4k = md->al_stripe_size_4k;
906 
907 		md->al_stripes = rs->al_stripes;
908 		md->al_stripe_size_4k = rs->al_stripe_size / 4;
909 		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
910 	}
911 
912 	drbd_md_set_sector_offsets(device, device->ldev);
913 
914 	rcu_read_lock();
915 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
916 	rcu_read_unlock();
917 	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
918 
919 	if (size < la_size_sect) {
920 		if (rs && u_size == 0) {
921 			/* Remove "rs &&" later. This check should always be active, but
922 			   right now the receiver expects the permissive behavior */
923 			drbd_warn(device, "Implicit shrink not allowed. "
924 				 "Use --size=%llus for explicit shrink.\n",
925 				 (unsigned long long)size);
926 			rv = DS_ERROR_SHRINK;
927 		}
928 		if (u_size > size)
929 			rv = DS_ERROR_SPACE_MD;
930 		if (rv != DS_UNCHANGED)
931 			goto err_out;
932 	}
933 
934 	if (drbd_get_capacity(device->this_bdev) != size ||
935 	    drbd_bm_capacity(device) != size) {
936 		int err;
937 		err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
938 		if (unlikely(err)) {
939 			/* currently there is only one error: ENOMEM! */
940 			size = drbd_bm_capacity(device)>>1;
941 			if (size == 0) {
942 				drbd_err(device, "OUT OF MEMORY! "
943 				    "Could not allocate bitmap!\n");
944 			} else {
945 				drbd_err(device, "BM resizing failed. "
946 				    "Leaving size unchanged at size = %lu KB\n",
947 				    (unsigned long)size);
948 			}
949 			rv = DS_ERROR;
950 		}
951 		/* racy, see comments above. */
952 		drbd_set_my_capacity(device, size);
953 		device->ldev->md.la_size_sect = size;
954 		drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
955 		     (unsigned long long)size>>1);
956 	}
957 	if (rv <= DS_ERROR)
958 		goto err_out;
959 
960 	la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
961 
962 	md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
963 		|| prev_size	   != device->ldev->md.md_size_sect;
964 
965 	if (la_size_changed || md_moved || rs) {
966 		u32 prev_flags;
967 
968 		drbd_al_shrink(device); /* All extents inactive. */
969 
970 		prev_flags = md->flags;
971 		md->flags &= ~MDF_PRIMARY_IND;
972 		drbd_md_write(device, buffer);
973 
974 		drbd_info(device, "Writing the whole bitmap, %s\n",
975 			 la_size_changed && md_moved ? "size changed and md moved" :
976 			 la_size_changed ? "size changed" : "md moved");
977 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
978 		drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
979 			       "size changed", BM_LOCKED_MASK);
980 		drbd_initialize_al(device, buffer);
981 
982 		md->flags = prev_flags;
983 		drbd_md_write(device, buffer);
984 
985 		if (rs)
986 			drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
987 				  md->al_stripes, md->al_stripe_size_4k * 4);
988 	}
989 
990 	if (size > la_size_sect)
991 		rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
992 	if (size < la_size_sect)
993 		rv = DS_SHRUNK;
994 
995 	if (0) {
996 	err_out:
997 		if (rs) {
998 			md->al_stripes = prev_al_stripes;
999 			md->al_stripe_size_4k = prev_al_stripe_size_4k;
1000 			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1001 
1002 			drbd_md_set_sector_offsets(device, device->ldev);
1003 		}
1004 	}
1005 	lc_unlock(device->act_log);
1006 	wake_up(&device->al_wait);
1007 	drbd_md_put_buffer(device);
1008 	drbd_resume_io(device);
1009 
1010 	return rv;
1011 }
1012 
1013 sector_t
1014 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1015 		  sector_t u_size, int assume_peer_has_space)
1016 {
1017 	sector_t p_size = device->p_size;   /* partner's disk size. */
1018 	sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1019 	sector_t m_size; /* my size */
1020 	sector_t size = 0;
1021 
1022 	m_size = drbd_get_max_capacity(bdev);
1023 
1024 	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1025 		drbd_warn(device, "Resize while not connected was forced by the user!\n");
1026 		p_size = m_size;
1027 	}
1028 
1029 	if (p_size && m_size) {
1030 		size = min_t(sector_t, p_size, m_size);
1031 	} else {
1032 		if (la_size_sect) {
1033 			size = la_size_sect;
1034 			if (m_size && m_size < size)
1035 				size = m_size;
1036 			if (p_size && p_size < size)
1037 				size = p_size;
1038 		} else {
1039 			if (m_size)
1040 				size = m_size;
1041 			if (p_size)
1042 				size = p_size;
1043 		}
1044 	}
1045 
1046 	if (size == 0)
1047 		drbd_err(device, "Both nodes diskless!\n");
1048 
1049 	if (u_size) {
1050 		if (u_size > size)
1051 			drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1052 			    (unsigned long)u_size>>1, (unsigned long)size>>1);
1053 		else
1054 			size = u_size;
1055 	}
1056 
1057 	return size;
1058 }
1059 
1060 /**
1061  * drbd_check_al_size() - Ensures that the AL is of the right size
1062  * @device:	DRBD device.
1063  *
1064  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1065  * failed, and 0 on success. You should call drbd_md_sync() after you called
1066  * this function.
1067  */
1068 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1069 {
1070 	struct lru_cache *n, *t;
1071 	struct lc_element *e;
1072 	unsigned int in_use;
1073 	int i;
1074 
1075 	if (device->act_log &&
1076 	    device->act_log->nr_elements == dc->al_extents)
1077 		return 0;
1078 
1079 	in_use = 0;
1080 	t = device->act_log;
1081 	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1082 		dc->al_extents, sizeof(struct lc_element), 0);
1083 
1084 	if (n == NULL) {
1085 		drbd_err(device, "Cannot allocate act_log lru!\n");
1086 		return -ENOMEM;
1087 	}
1088 	spin_lock_irq(&device->al_lock);
1089 	if (t) {
1090 		for (i = 0; i < t->nr_elements; i++) {
1091 			e = lc_element_by_index(t, i);
1092 			if (e->refcnt)
1093 				drbd_err(device, "refcnt(%d)==%d\n",
1094 				    e->lc_number, e->refcnt);
1095 			in_use += e->refcnt;
1096 		}
1097 	}
1098 	if (!in_use)
1099 		device->act_log = n;
1100 	spin_unlock_irq(&device->al_lock);
1101 	if (in_use) {
1102 		drbd_err(device, "Activity log still in use!\n");
1103 		lc_destroy(n);
1104 		return -EBUSY;
1105 	} else {
1106 		if (t)
1107 			lc_destroy(t);
1108 	}
1109 	drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1110 	return 0;
1111 }
1112 
1113 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1114 {
1115 	struct request_queue * const q = device->rq_queue;
1116 	unsigned int max_hw_sectors = max_bio_size >> 9;
1117 	unsigned int max_segments = 0;
1118 	struct request_queue *b = NULL;
1119 
1120 	if (get_ldev_if_state(device, D_ATTACHING)) {
1121 		b = device->ldev->backing_bdev->bd_disk->queue;
1122 
1123 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1124 		rcu_read_lock();
1125 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1126 		rcu_read_unlock();
1127 
1128 		blk_set_stacking_limits(&q->limits);
1129 		blk_queue_max_write_same_sectors(q, 0);
1130 	}
1131 
1132 	blk_queue_logical_block_size(q, 512);
1133 	blk_queue_max_hw_sectors(q, max_hw_sectors);
1134 	/* This is the workaround for "bio would need to, but cannot, be split" */
1135 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1136 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1137 
1138 	if (b) {
1139 		struct drbd_connection *connection = first_peer_device(device)->connection;
1140 
1141 		if (blk_queue_discard(b) &&
1142 		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1143 			/* For now, don't allow more than one activity log extent worth of data
1144 			 * to be discarded in one go. We may need to rework drbd_al_begin_io()
1145 			 * to allow for even larger discard ranges */
1146 			q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1147 
1148 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1149 			/* REALLY? Is stacking secdiscard "legal"? */
1150 			if (blk_queue_secdiscard(b))
1151 				queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1152 		} else {
1153 			q->limits.max_discard_sectors = 0;
1154 			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1155 			queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1156 		}
1157 
1158 		blk_queue_stack_limits(q, b);
1159 
1160 		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1161 			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1162 				 q->backing_dev_info.ra_pages,
1163 				 b->backing_dev_info.ra_pages);
1164 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1165 		}
1166 		put_ldev(device);
1167 	}
1168 }
1169 
1170 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1171 {
1172 	unsigned int now, new, local, peer;
1173 
1174 	now = queue_max_hw_sectors(device->rq_queue) << 9;
1175 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1176 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1177 
1178 	if (get_ldev_if_state(device, D_ATTACHING)) {
1179 		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1180 		device->local_max_bio_size = local;
1181 		put_ldev(device);
1182 	}
1183 	local = min(local, DRBD_MAX_BIO_SIZE);
1184 
1185 	/* We may ignore peer limits if the peer is modern enough.
1186 	   Because new from 8.3.8 onwards the peer can use multiple
1187 	   BIOs for a single peer_request */
1188 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
1189 		if (first_peer_device(device)->connection->agreed_pro_version < 94)
1190 			peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1191 			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1192 		else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1193 			peer = DRBD_MAX_SIZE_H80_PACKET;
1194 		else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1195 			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1196 		else
1197 			peer = DRBD_MAX_BIO_SIZE;
1198 
1199 		/* We may later detach and re-attach on a disconnected Primary.
1200 		 * Avoid this setting to jump back in that case.
1201 		 * We want to store what we know the peer DRBD can handle,
1202 		 * not what the peer IO backend can handle. */
1203 		if (peer > device->peer_max_bio_size)
1204 			device->peer_max_bio_size = peer;
1205 	}
1206 	new = min(local, peer);
1207 
1208 	if (device->state.role == R_PRIMARY && new < now)
1209 		drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1210 
1211 	if (new != now)
1212 		drbd_info(device, "max BIO size = %u\n", new);
1213 
1214 	drbd_setup_queue_param(device, new);
1215 }
1216 
1217 /* Starts the worker thread */
1218 static void conn_reconfig_start(struct drbd_connection *connection)
1219 {
1220 	drbd_thread_start(&connection->worker);
1221 	drbd_flush_workqueue(&connection->sender_work);
1222 }
1223 
1224 /* if still unconfigured, stops worker again. */
1225 static void conn_reconfig_done(struct drbd_connection *connection)
1226 {
1227 	bool stop_threads;
1228 	spin_lock_irq(&connection->resource->req_lock);
1229 	stop_threads = conn_all_vols_unconf(connection) &&
1230 		connection->cstate == C_STANDALONE;
1231 	spin_unlock_irq(&connection->resource->req_lock);
1232 	if (stop_threads) {
1233 		/* asender is implicitly stopped by receiver
1234 		 * in conn_disconnect() */
1235 		drbd_thread_stop(&connection->receiver);
1236 		drbd_thread_stop(&connection->worker);
1237 	}
1238 }
1239 
1240 /* Make sure IO is suspended before calling this function(). */
1241 static void drbd_suspend_al(struct drbd_device *device)
1242 {
1243 	int s = 0;
1244 
1245 	if (!lc_try_lock(device->act_log)) {
1246 		drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1247 		return;
1248 	}
1249 
1250 	drbd_al_shrink(device);
1251 	spin_lock_irq(&device->resource->req_lock);
1252 	if (device->state.conn < C_CONNECTED)
1253 		s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1254 	spin_unlock_irq(&device->resource->req_lock);
1255 	lc_unlock(device->act_log);
1256 
1257 	if (s)
1258 		drbd_info(device, "Suspended AL updates\n");
1259 }
1260 
1261 
1262 static bool should_set_defaults(struct genl_info *info)
1263 {
1264 	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1265 	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1266 }
1267 
1268 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1269 {
1270 	/* This is limited by 16 bit "slot" numbers,
1271 	 * and by available on-disk context storage.
1272 	 *
1273 	 * Also (u16)~0 is special (denotes a "free" extent).
1274 	 *
1275 	 * One transaction occupies one 4kB on-disk block,
1276 	 * we have n such blocks in the on disk ring buffer,
1277 	 * the "current" transaction may fail (n-1),
1278 	 * and there is 919 slot numbers context information per transaction.
1279 	 *
1280 	 * 72 transaction blocks amounts to more than 2**16 context slots,
1281 	 * so cap there first.
1282 	 */
1283 	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1284 	const unsigned int sufficient_on_disk =
1285 		(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1286 		/AL_CONTEXT_PER_TRANSACTION;
1287 
1288 	unsigned int al_size_4k = bdev->md.al_size_4k;
1289 
1290 	if (al_size_4k > sufficient_on_disk)
1291 		return max_al_nr;
1292 
1293 	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1294 }
1295 
1296 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1297 {
1298 	struct drbd_config_context adm_ctx;
1299 	enum drbd_ret_code retcode;
1300 	struct drbd_device *device;
1301 	struct disk_conf *new_disk_conf, *old_disk_conf;
1302 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1303 	int err, fifo_size;
1304 
1305 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1306 	if (!adm_ctx.reply_skb)
1307 		return retcode;
1308 	if (retcode != NO_ERROR)
1309 		goto finish;
1310 
1311 	device = adm_ctx.device;
1312 	mutex_lock(&adm_ctx.resource->adm_mutex);
1313 
1314 	/* we also need a disk
1315 	 * to change the options on */
1316 	if (!get_ldev(device)) {
1317 		retcode = ERR_NO_DISK;
1318 		goto out;
1319 	}
1320 
1321 	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1322 	if (!new_disk_conf) {
1323 		retcode = ERR_NOMEM;
1324 		goto fail;
1325 	}
1326 
1327 	mutex_lock(&device->resource->conf_update);
1328 	old_disk_conf = device->ldev->disk_conf;
1329 	*new_disk_conf = *old_disk_conf;
1330 	if (should_set_defaults(info))
1331 		set_disk_conf_defaults(new_disk_conf);
1332 
1333 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1334 	if (err && err != -ENOMSG) {
1335 		retcode = ERR_MANDATORY_TAG;
1336 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1337 		goto fail_unlock;
1338 	}
1339 
1340 	if (!expect(new_disk_conf->resync_rate >= 1))
1341 		new_disk_conf->resync_rate = 1;
1342 
1343 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1344 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1345 	if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1346 		new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1347 
1348 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1349 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1350 
1351 	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1352 	if (fifo_size != device->rs_plan_s->size) {
1353 		new_plan = fifo_alloc(fifo_size);
1354 		if (!new_plan) {
1355 			drbd_err(device, "kmalloc of fifo_buffer failed");
1356 			retcode = ERR_NOMEM;
1357 			goto fail_unlock;
1358 		}
1359 	}
1360 
1361 	drbd_suspend_io(device);
1362 	wait_event(device->al_wait, lc_try_lock(device->act_log));
1363 	drbd_al_shrink(device);
1364 	err = drbd_check_al_size(device, new_disk_conf);
1365 	lc_unlock(device->act_log);
1366 	wake_up(&device->al_wait);
1367 	drbd_resume_io(device);
1368 
1369 	if (err) {
1370 		retcode = ERR_NOMEM;
1371 		goto fail_unlock;
1372 	}
1373 
1374 	write_lock_irq(&global_state_lock);
1375 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1376 	if (retcode == NO_ERROR) {
1377 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1378 		drbd_resync_after_changed(device);
1379 	}
1380 	write_unlock_irq(&global_state_lock);
1381 
1382 	if (retcode != NO_ERROR)
1383 		goto fail_unlock;
1384 
1385 	if (new_plan) {
1386 		old_plan = device->rs_plan_s;
1387 		rcu_assign_pointer(device->rs_plan_s, new_plan);
1388 	}
1389 
1390 	mutex_unlock(&device->resource->conf_update);
1391 
1392 	if (new_disk_conf->al_updates)
1393 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1394 	else
1395 		device->ldev->md.flags |= MDF_AL_DISABLED;
1396 
1397 	if (new_disk_conf->md_flushes)
1398 		clear_bit(MD_NO_FUA, &device->flags);
1399 	else
1400 		set_bit(MD_NO_FUA, &device->flags);
1401 
1402 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1403 
1404 	drbd_md_sync(device);
1405 
1406 	if (device->state.conn >= C_CONNECTED) {
1407 		struct drbd_peer_device *peer_device;
1408 
1409 		for_each_peer_device(peer_device, device)
1410 			drbd_send_sync_param(peer_device);
1411 	}
1412 
1413 	synchronize_rcu();
1414 	kfree(old_disk_conf);
1415 	kfree(old_plan);
1416 	mod_timer(&device->request_timer, jiffies + HZ);
1417 	goto success;
1418 
1419 fail_unlock:
1420 	mutex_unlock(&device->resource->conf_update);
1421  fail:
1422 	kfree(new_disk_conf);
1423 	kfree(new_plan);
1424 success:
1425 	put_ldev(device);
1426  out:
1427 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1428  finish:
1429 	drbd_adm_finish(&adm_ctx, info, retcode);
1430 	return 0;
1431 }
1432 
1433 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1434 {
1435 	struct drbd_config_context adm_ctx;
1436 	struct drbd_device *device;
1437 	int err;
1438 	enum drbd_ret_code retcode;
1439 	enum determine_dev_size dd;
1440 	sector_t max_possible_sectors;
1441 	sector_t min_md_device_sectors;
1442 	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1443 	struct disk_conf *new_disk_conf = NULL;
1444 	struct block_device *bdev;
1445 	struct lru_cache *resync_lru = NULL;
1446 	struct fifo_buffer *new_plan = NULL;
1447 	union drbd_state ns, os;
1448 	enum drbd_state_rv rv;
1449 	struct net_conf *nc;
1450 
1451 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1452 	if (!adm_ctx.reply_skb)
1453 		return retcode;
1454 	if (retcode != NO_ERROR)
1455 		goto finish;
1456 
1457 	device = adm_ctx.device;
1458 	mutex_lock(&adm_ctx.resource->adm_mutex);
1459 	conn_reconfig_start(first_peer_device(device)->connection);
1460 
1461 	/* if you want to reconfigure, please tear down first */
1462 	if (device->state.disk > D_DISKLESS) {
1463 		retcode = ERR_DISK_CONFIGURED;
1464 		goto fail;
1465 	}
1466 	/* It may just now have detached because of IO error.  Make sure
1467 	 * drbd_ldev_destroy is done already, we may end up here very fast,
1468 	 * e.g. if someone calls attach from the on-io-error handler,
1469 	 * to realize a "hot spare" feature (not that I'd recommend that) */
1470 	wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1471 
1472 	/* make sure there is no leftover from previous force-detach attempts */
1473 	clear_bit(FORCE_DETACH, &device->flags);
1474 	clear_bit(WAS_IO_ERROR, &device->flags);
1475 	clear_bit(WAS_READ_ERROR, &device->flags);
1476 
1477 	/* and no leftover from previously aborted resync or verify, either */
1478 	device->rs_total = 0;
1479 	device->rs_failed = 0;
1480 	atomic_set(&device->rs_pending_cnt, 0);
1481 
1482 	/* allocation not in the IO path, drbdsetup context */
1483 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1484 	if (!nbc) {
1485 		retcode = ERR_NOMEM;
1486 		goto fail;
1487 	}
1488 	spin_lock_init(&nbc->md.uuid_lock);
1489 
1490 	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1491 	if (!new_disk_conf) {
1492 		retcode = ERR_NOMEM;
1493 		goto fail;
1494 	}
1495 	nbc->disk_conf = new_disk_conf;
1496 
1497 	set_disk_conf_defaults(new_disk_conf);
1498 	err = disk_conf_from_attrs(new_disk_conf, info);
1499 	if (err) {
1500 		retcode = ERR_MANDATORY_TAG;
1501 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1502 		goto fail;
1503 	}
1504 
1505 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1506 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1507 
1508 	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1509 	if (!new_plan) {
1510 		retcode = ERR_NOMEM;
1511 		goto fail;
1512 	}
1513 
1514 	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1515 		retcode = ERR_MD_IDX_INVALID;
1516 		goto fail;
1517 	}
1518 
1519 	write_lock_irq(&global_state_lock);
1520 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1521 	write_unlock_irq(&global_state_lock);
1522 	if (retcode != NO_ERROR)
1523 		goto fail;
1524 
1525 	rcu_read_lock();
1526 	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1527 	if (nc) {
1528 		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1529 			rcu_read_unlock();
1530 			retcode = ERR_STONITH_AND_PROT_A;
1531 			goto fail;
1532 		}
1533 	}
1534 	rcu_read_unlock();
1535 
1536 	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1537 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1538 	if (IS_ERR(bdev)) {
1539 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1540 			PTR_ERR(bdev));
1541 		retcode = ERR_OPEN_DISK;
1542 		goto fail;
1543 	}
1544 	nbc->backing_bdev = bdev;
1545 
1546 	/*
1547 	 * meta_dev_idx >= 0: external fixed size, possibly multiple
1548 	 * drbd sharing one meta device.  TODO in that case, paranoia
1549 	 * check that [md_bdev, meta_dev_idx] is not yet used by some
1550 	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
1551 	 * should check it for you already; but if you don't, or
1552 	 * someone fooled it, we need to double check here)
1553 	 */
1554 	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1555 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1556 				  (new_disk_conf->meta_dev_idx < 0) ?
1557 				  (void *)device : (void *)drbd_m_holder);
1558 	if (IS_ERR(bdev)) {
1559 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1560 			PTR_ERR(bdev));
1561 		retcode = ERR_OPEN_MD_DISK;
1562 		goto fail;
1563 	}
1564 	nbc->md_bdev = bdev;
1565 
1566 	if ((nbc->backing_bdev == nbc->md_bdev) !=
1567 	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1568 	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1569 		retcode = ERR_MD_IDX_INVALID;
1570 		goto fail;
1571 	}
1572 
1573 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
1574 			1, 61, sizeof(struct bm_extent),
1575 			offsetof(struct bm_extent, lce));
1576 	if (!resync_lru) {
1577 		retcode = ERR_NOMEM;
1578 		goto fail;
1579 	}
1580 
1581 	/* Read our meta data super block early.
1582 	 * This also sets other on-disk offsets. */
1583 	retcode = drbd_md_read(device, nbc);
1584 	if (retcode != NO_ERROR)
1585 		goto fail;
1586 
1587 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1588 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1589 	if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1590 		new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1591 
1592 	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1593 		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1594 			(unsigned long long) drbd_get_max_capacity(nbc),
1595 			(unsigned long long) new_disk_conf->disk_size);
1596 		retcode = ERR_DISK_TOO_SMALL;
1597 		goto fail;
1598 	}
1599 
1600 	if (new_disk_conf->meta_dev_idx < 0) {
1601 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1602 		/* at least one MB, otherwise it does not make sense */
1603 		min_md_device_sectors = (2<<10);
1604 	} else {
1605 		max_possible_sectors = DRBD_MAX_SECTORS;
1606 		min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1607 	}
1608 
1609 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1610 		retcode = ERR_MD_DISK_TOO_SMALL;
1611 		drbd_warn(device, "refusing attach: md-device too small, "
1612 		     "at least %llu sectors needed for this meta-disk type\n",
1613 		     (unsigned long long) min_md_device_sectors);
1614 		goto fail;
1615 	}
1616 
1617 	/* Make sure the new disk is big enough
1618 	 * (we may currently be R_PRIMARY with no local disk...) */
1619 	if (drbd_get_max_capacity(nbc) <
1620 	    drbd_get_capacity(device->this_bdev)) {
1621 		retcode = ERR_DISK_TOO_SMALL;
1622 		goto fail;
1623 	}
1624 
1625 	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1626 
1627 	if (nbc->known_size > max_possible_sectors) {
1628 		drbd_warn(device, "==> truncating very big lower level device "
1629 			"to currently maximum possible %llu sectors <==\n",
1630 			(unsigned long long) max_possible_sectors);
1631 		if (new_disk_conf->meta_dev_idx >= 0)
1632 			drbd_warn(device, "==>> using internal or flexible "
1633 				      "meta data may help <<==\n");
1634 	}
1635 
1636 	drbd_suspend_io(device);
1637 	/* also wait for the last barrier ack. */
1638 	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1639 	 * We need a way to either ignore barrier acks for barriers sent before a device
1640 	 * was attached, or a way to wait for all pending barrier acks to come in.
1641 	 * As barriers are counted per resource,
1642 	 * we'd need to suspend io on all devices of a resource.
1643 	 */
1644 	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1645 	/* and for any other previously queued work */
1646 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
1647 
1648 	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1649 	retcode = rv;  /* FIXME: Type mismatch. */
1650 	drbd_resume_io(device);
1651 	if (rv < SS_SUCCESS)
1652 		goto fail;
1653 
1654 	if (!get_ldev_if_state(device, D_ATTACHING))
1655 		goto force_diskless;
1656 
1657 	if (!device->bitmap) {
1658 		if (drbd_bm_init(device)) {
1659 			retcode = ERR_NOMEM;
1660 			goto force_diskless_dec;
1661 		}
1662 	}
1663 
1664 	if (device->state.conn < C_CONNECTED &&
1665 	    device->state.role == R_PRIMARY && device->ed_uuid &&
1666 	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1667 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1668 		    (unsigned long long)device->ed_uuid);
1669 		retcode = ERR_DATA_NOT_CURRENT;
1670 		goto force_diskless_dec;
1671 	}
1672 
1673 	/* Since we are diskless, fix the activity log first... */
1674 	if (drbd_check_al_size(device, new_disk_conf)) {
1675 		retcode = ERR_NOMEM;
1676 		goto force_diskless_dec;
1677 	}
1678 
1679 	/* Prevent shrinking of consistent devices ! */
1680 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1681 	    drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1682 		drbd_warn(device, "refusing to truncate a consistent device\n");
1683 		retcode = ERR_DISK_TOO_SMALL;
1684 		goto force_diskless_dec;
1685 	}
1686 
1687 	/* Reset the "barriers don't work" bits here, then force meta data to
1688 	 * be written, to ensure we determine if barriers are supported. */
1689 	if (new_disk_conf->md_flushes)
1690 		clear_bit(MD_NO_FUA, &device->flags);
1691 	else
1692 		set_bit(MD_NO_FUA, &device->flags);
1693 
1694 	/* Point of no return reached.
1695 	 * Devices and memory are no longer released by error cleanup below.
1696 	 * now device takes over responsibility, and the state engine should
1697 	 * clean it up somewhere.  */
1698 	D_ASSERT(device, device->ldev == NULL);
1699 	device->ldev = nbc;
1700 	device->resync = resync_lru;
1701 	device->rs_plan_s = new_plan;
1702 	nbc = NULL;
1703 	resync_lru = NULL;
1704 	new_disk_conf = NULL;
1705 	new_plan = NULL;
1706 
1707 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1708 
1709 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1710 		set_bit(CRASHED_PRIMARY, &device->flags);
1711 	else
1712 		clear_bit(CRASHED_PRIMARY, &device->flags);
1713 
1714 	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1715 	    !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1716 		set_bit(CRASHED_PRIMARY, &device->flags);
1717 
1718 	device->send_cnt = 0;
1719 	device->recv_cnt = 0;
1720 	device->read_cnt = 0;
1721 	device->writ_cnt = 0;
1722 
1723 	drbd_reconsider_max_bio_size(device);
1724 
1725 	/* If I am currently not R_PRIMARY,
1726 	 * but meta data primary indicator is set,
1727 	 * I just now recover from a hard crash,
1728 	 * and have been R_PRIMARY before that crash.
1729 	 *
1730 	 * Now, if I had no connection before that crash
1731 	 * (have been degraded R_PRIMARY), chances are that
1732 	 * I won't find my peer now either.
1733 	 *
1734 	 * In that case, and _only_ in that case,
1735 	 * we use the degr-wfc-timeout instead of the default,
1736 	 * so we can automatically recover from a crash of a
1737 	 * degraded but active "cluster" after a certain timeout.
1738 	 */
1739 	clear_bit(USE_DEGR_WFC_T, &device->flags);
1740 	if (device->state.role != R_PRIMARY &&
1741 	     drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1742 	    !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1743 		set_bit(USE_DEGR_WFC_T, &device->flags);
1744 
1745 	dd = drbd_determine_dev_size(device, 0, NULL);
1746 	if (dd <= DS_ERROR) {
1747 		retcode = ERR_NOMEM_BITMAP;
1748 		goto force_diskless_dec;
1749 	} else if (dd == DS_GREW)
1750 		set_bit(RESYNC_AFTER_NEG, &device->flags);
1751 
1752 	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1753 	    (test_bit(CRASHED_PRIMARY, &device->flags) &&
1754 	     drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1755 		drbd_info(device, "Assuming that all blocks are out of sync "
1756 		     "(aka FullSync)\n");
1757 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1758 			"set_n_write from attaching", BM_LOCKED_MASK)) {
1759 			retcode = ERR_IO_MD_DISK;
1760 			goto force_diskless_dec;
1761 		}
1762 	} else {
1763 		if (drbd_bitmap_io(device, &drbd_bm_read,
1764 			"read from attaching", BM_LOCKED_MASK)) {
1765 			retcode = ERR_IO_MD_DISK;
1766 			goto force_diskless_dec;
1767 		}
1768 	}
1769 
1770 	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1771 		drbd_suspend_al(device); /* IO is still suspended here... */
1772 
1773 	spin_lock_irq(&device->resource->req_lock);
1774 	os = drbd_read_state(device);
1775 	ns = os;
1776 	/* If MDF_CONSISTENT is not set go into inconsistent state,
1777 	   otherwise investigate MDF_WasUpToDate...
1778 	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1779 	   otherwise into D_CONSISTENT state.
1780 	*/
1781 	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1782 		if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1783 			ns.disk = D_CONSISTENT;
1784 		else
1785 			ns.disk = D_OUTDATED;
1786 	} else {
1787 		ns.disk = D_INCONSISTENT;
1788 	}
1789 
1790 	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1791 		ns.pdsk = D_OUTDATED;
1792 
1793 	rcu_read_lock();
1794 	if (ns.disk == D_CONSISTENT &&
1795 	    (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1796 		ns.disk = D_UP_TO_DATE;
1797 
1798 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1799 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1800 	   this point, because drbd_request_state() modifies these
1801 	   flags. */
1802 
1803 	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1804 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1805 	else
1806 		device->ldev->md.flags |= MDF_AL_DISABLED;
1807 
1808 	rcu_read_unlock();
1809 
1810 	/* In case we are C_CONNECTED postpone any decision on the new disk
1811 	   state after the negotiation phase. */
1812 	if (device->state.conn == C_CONNECTED) {
1813 		device->new_state_tmp.i = ns.i;
1814 		ns.i = os.i;
1815 		ns.disk = D_NEGOTIATING;
1816 
1817 		/* We expect to receive up-to-date UUIDs soon.
1818 		   To avoid a race in receive_state, free p_uuid while
1819 		   holding req_lock. I.e. atomic with the state change */
1820 		kfree(device->p_uuid);
1821 		device->p_uuid = NULL;
1822 	}
1823 
1824 	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1825 	spin_unlock_irq(&device->resource->req_lock);
1826 
1827 	if (rv < SS_SUCCESS)
1828 		goto force_diskless_dec;
1829 
1830 	mod_timer(&device->request_timer, jiffies + HZ);
1831 
1832 	if (device->state.role == R_PRIMARY)
1833 		device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1834 	else
1835 		device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1836 
1837 	drbd_md_mark_dirty(device);
1838 	drbd_md_sync(device);
1839 
1840 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1841 	put_ldev(device);
1842 	conn_reconfig_done(first_peer_device(device)->connection);
1843 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1844 	drbd_adm_finish(&adm_ctx, info, retcode);
1845 	return 0;
1846 
1847  force_diskless_dec:
1848 	put_ldev(device);
1849  force_diskless:
1850 	drbd_force_state(device, NS(disk, D_DISKLESS));
1851 	drbd_md_sync(device);
1852  fail:
1853 	conn_reconfig_done(first_peer_device(device)->connection);
1854 	if (nbc) {
1855 		if (nbc->backing_bdev)
1856 			blkdev_put(nbc->backing_bdev,
1857 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1858 		if (nbc->md_bdev)
1859 			blkdev_put(nbc->md_bdev,
1860 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1861 		kfree(nbc);
1862 	}
1863 	kfree(new_disk_conf);
1864 	lc_destroy(resync_lru);
1865 	kfree(new_plan);
1866 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1867  finish:
1868 	drbd_adm_finish(&adm_ctx, info, retcode);
1869 	return 0;
1870 }
1871 
1872 static int adm_detach(struct drbd_device *device, int force)
1873 {
1874 	enum drbd_state_rv retcode;
1875 	int ret;
1876 
1877 	if (force) {
1878 		set_bit(FORCE_DETACH, &device->flags);
1879 		drbd_force_state(device, NS(disk, D_FAILED));
1880 		retcode = SS_SUCCESS;
1881 		goto out;
1882 	}
1883 
1884 	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1885 	drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1886 	retcode = drbd_request_state(device, NS(disk, D_FAILED));
1887 	drbd_md_put_buffer(device);
1888 	/* D_FAILED will transition to DISKLESS. */
1889 	ret = wait_event_interruptible(device->misc_wait,
1890 			device->state.disk != D_FAILED);
1891 	drbd_resume_io(device);
1892 	if ((int)retcode == (int)SS_IS_DISKLESS)
1893 		retcode = SS_NOTHING_TO_DO;
1894 	if (ret)
1895 		retcode = ERR_INTR;
1896 out:
1897 	return retcode;
1898 }
1899 
1900 /* Detaching the disk is a process in multiple stages.  First we need to lock
1901  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1902  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1903  * internal references as well.
1904  * Only then we have finally detached. */
1905 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1906 {
1907 	struct drbd_config_context adm_ctx;
1908 	enum drbd_ret_code retcode;
1909 	struct detach_parms parms = { };
1910 	int err;
1911 
1912 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1913 	if (!adm_ctx.reply_skb)
1914 		return retcode;
1915 	if (retcode != NO_ERROR)
1916 		goto out;
1917 
1918 	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1919 		err = detach_parms_from_attrs(&parms, info);
1920 		if (err) {
1921 			retcode = ERR_MANDATORY_TAG;
1922 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1923 			goto out;
1924 		}
1925 	}
1926 
1927 	mutex_lock(&adm_ctx.resource->adm_mutex);
1928 	retcode = adm_detach(adm_ctx.device, parms.force_detach);
1929 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1930 out:
1931 	drbd_adm_finish(&adm_ctx, info, retcode);
1932 	return 0;
1933 }
1934 
1935 static bool conn_resync_running(struct drbd_connection *connection)
1936 {
1937 	struct drbd_peer_device *peer_device;
1938 	bool rv = false;
1939 	int vnr;
1940 
1941 	rcu_read_lock();
1942 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1943 		struct drbd_device *device = peer_device->device;
1944 		if (device->state.conn == C_SYNC_SOURCE ||
1945 		    device->state.conn == C_SYNC_TARGET ||
1946 		    device->state.conn == C_PAUSED_SYNC_S ||
1947 		    device->state.conn == C_PAUSED_SYNC_T) {
1948 			rv = true;
1949 			break;
1950 		}
1951 	}
1952 	rcu_read_unlock();
1953 
1954 	return rv;
1955 }
1956 
1957 static bool conn_ov_running(struct drbd_connection *connection)
1958 {
1959 	struct drbd_peer_device *peer_device;
1960 	bool rv = false;
1961 	int vnr;
1962 
1963 	rcu_read_lock();
1964 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1965 		struct drbd_device *device = peer_device->device;
1966 		if (device->state.conn == C_VERIFY_S ||
1967 		    device->state.conn == C_VERIFY_T) {
1968 			rv = true;
1969 			break;
1970 		}
1971 	}
1972 	rcu_read_unlock();
1973 
1974 	return rv;
1975 }
1976 
1977 static enum drbd_ret_code
1978 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1979 {
1980 	struct drbd_peer_device *peer_device;
1981 	int i;
1982 
1983 	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1984 		if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1985 			return ERR_NEED_APV_100;
1986 
1987 		if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1988 			return ERR_NEED_APV_100;
1989 
1990 		if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1991 			return ERR_NEED_APV_100;
1992 	}
1993 
1994 	if (!new_net_conf->two_primaries &&
1995 	    conn_highest_role(connection) == R_PRIMARY &&
1996 	    conn_highest_peer(connection) == R_PRIMARY)
1997 		return ERR_NEED_ALLOW_TWO_PRI;
1998 
1999 	if (new_net_conf->two_primaries &&
2000 	    (new_net_conf->wire_protocol != DRBD_PROT_C))
2001 		return ERR_NOT_PROTO_C;
2002 
2003 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2004 		struct drbd_device *device = peer_device->device;
2005 		if (get_ldev(device)) {
2006 			enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2007 			put_ldev(device);
2008 			if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2009 				return ERR_STONITH_AND_PROT_A;
2010 		}
2011 		if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2012 			return ERR_DISCARD_IMPOSSIBLE;
2013 	}
2014 
2015 	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2016 		return ERR_CONG_NOT_PROTO_A;
2017 
2018 	return NO_ERROR;
2019 }
2020 
2021 static enum drbd_ret_code
2022 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2023 {
2024 	static enum drbd_ret_code rv;
2025 	struct drbd_peer_device *peer_device;
2026 	int i;
2027 
2028 	rcu_read_lock();
2029 	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2030 	rcu_read_unlock();
2031 
2032 	/* connection->volumes protected by genl_lock() here */
2033 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2034 		struct drbd_device *device = peer_device->device;
2035 		if (!device->bitmap) {
2036 			if (drbd_bm_init(device))
2037 				return ERR_NOMEM;
2038 		}
2039 	}
2040 
2041 	return rv;
2042 }
2043 
2044 struct crypto {
2045 	struct crypto_hash *verify_tfm;
2046 	struct crypto_hash *csums_tfm;
2047 	struct crypto_hash *cram_hmac_tfm;
2048 	struct crypto_hash *integrity_tfm;
2049 };
2050 
2051 static int
2052 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2053 {
2054 	if (!tfm_name[0])
2055 		return NO_ERROR;
2056 
2057 	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2058 	if (IS_ERR(*tfm)) {
2059 		*tfm = NULL;
2060 		return err_alg;
2061 	}
2062 
2063 	return NO_ERROR;
2064 }
2065 
2066 static enum drbd_ret_code
2067 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2068 {
2069 	char hmac_name[CRYPTO_MAX_ALG_NAME];
2070 	enum drbd_ret_code rv;
2071 
2072 	rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2073 		       ERR_CSUMS_ALG);
2074 	if (rv != NO_ERROR)
2075 		return rv;
2076 	rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2077 		       ERR_VERIFY_ALG);
2078 	if (rv != NO_ERROR)
2079 		return rv;
2080 	rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2081 		       ERR_INTEGRITY_ALG);
2082 	if (rv != NO_ERROR)
2083 		return rv;
2084 	if (new_net_conf->cram_hmac_alg[0] != 0) {
2085 		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2086 			 new_net_conf->cram_hmac_alg);
2087 
2088 		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2089 			       ERR_AUTH_ALG);
2090 	}
2091 
2092 	return rv;
2093 }
2094 
2095 static void free_crypto(struct crypto *crypto)
2096 {
2097 	crypto_free_hash(crypto->cram_hmac_tfm);
2098 	crypto_free_hash(crypto->integrity_tfm);
2099 	crypto_free_hash(crypto->csums_tfm);
2100 	crypto_free_hash(crypto->verify_tfm);
2101 }
2102 
2103 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2104 {
2105 	struct drbd_config_context adm_ctx;
2106 	enum drbd_ret_code retcode;
2107 	struct drbd_connection *connection;
2108 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2109 	int err;
2110 	int ovr; /* online verify running */
2111 	int rsr; /* re-sync running */
2112 	struct crypto crypto = { };
2113 
2114 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2115 	if (!adm_ctx.reply_skb)
2116 		return retcode;
2117 	if (retcode != NO_ERROR)
2118 		goto finish;
2119 
2120 	connection = adm_ctx.connection;
2121 	mutex_lock(&adm_ctx.resource->adm_mutex);
2122 
2123 	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2124 	if (!new_net_conf) {
2125 		retcode = ERR_NOMEM;
2126 		goto out;
2127 	}
2128 
2129 	conn_reconfig_start(connection);
2130 
2131 	mutex_lock(&connection->data.mutex);
2132 	mutex_lock(&connection->resource->conf_update);
2133 	old_net_conf = connection->net_conf;
2134 
2135 	if (!old_net_conf) {
2136 		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2137 		retcode = ERR_INVALID_REQUEST;
2138 		goto fail;
2139 	}
2140 
2141 	*new_net_conf = *old_net_conf;
2142 	if (should_set_defaults(info))
2143 		set_net_conf_defaults(new_net_conf);
2144 
2145 	err = net_conf_from_attrs_for_change(new_net_conf, info);
2146 	if (err && err != -ENOMSG) {
2147 		retcode = ERR_MANDATORY_TAG;
2148 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2149 		goto fail;
2150 	}
2151 
2152 	retcode = check_net_options(connection, new_net_conf);
2153 	if (retcode != NO_ERROR)
2154 		goto fail;
2155 
2156 	/* re-sync running */
2157 	rsr = conn_resync_running(connection);
2158 	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2159 		retcode = ERR_CSUMS_RESYNC_RUNNING;
2160 		goto fail;
2161 	}
2162 
2163 	/* online verify running */
2164 	ovr = conn_ov_running(connection);
2165 	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2166 		retcode = ERR_VERIFY_RUNNING;
2167 		goto fail;
2168 	}
2169 
2170 	retcode = alloc_crypto(&crypto, new_net_conf);
2171 	if (retcode != NO_ERROR)
2172 		goto fail;
2173 
2174 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2175 
2176 	if (!rsr) {
2177 		crypto_free_hash(connection->csums_tfm);
2178 		connection->csums_tfm = crypto.csums_tfm;
2179 		crypto.csums_tfm = NULL;
2180 	}
2181 	if (!ovr) {
2182 		crypto_free_hash(connection->verify_tfm);
2183 		connection->verify_tfm = crypto.verify_tfm;
2184 		crypto.verify_tfm = NULL;
2185 	}
2186 
2187 	crypto_free_hash(connection->integrity_tfm);
2188 	connection->integrity_tfm = crypto.integrity_tfm;
2189 	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2190 		/* Do this without trying to take connection->data.mutex again.  */
2191 		__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2192 
2193 	crypto_free_hash(connection->cram_hmac_tfm);
2194 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2195 
2196 	mutex_unlock(&connection->resource->conf_update);
2197 	mutex_unlock(&connection->data.mutex);
2198 	synchronize_rcu();
2199 	kfree(old_net_conf);
2200 
2201 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
2202 		struct drbd_peer_device *peer_device;
2203 		int vnr;
2204 
2205 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2206 			drbd_send_sync_param(peer_device);
2207 	}
2208 
2209 	goto done;
2210 
2211  fail:
2212 	mutex_unlock(&connection->resource->conf_update);
2213 	mutex_unlock(&connection->data.mutex);
2214 	free_crypto(&crypto);
2215 	kfree(new_net_conf);
2216  done:
2217 	conn_reconfig_done(connection);
2218  out:
2219 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2220  finish:
2221 	drbd_adm_finish(&adm_ctx, info, retcode);
2222 	return 0;
2223 }
2224 
2225 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2226 {
2227 	struct drbd_config_context adm_ctx;
2228 	struct drbd_peer_device *peer_device;
2229 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2230 	struct crypto crypto = { };
2231 	struct drbd_resource *resource;
2232 	struct drbd_connection *connection;
2233 	enum drbd_ret_code retcode;
2234 	int i;
2235 	int err;
2236 
2237 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2238 
2239 	if (!adm_ctx.reply_skb)
2240 		return retcode;
2241 	if (retcode != NO_ERROR)
2242 		goto out;
2243 	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2244 		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2245 		retcode = ERR_INVALID_REQUEST;
2246 		goto out;
2247 	}
2248 
2249 	/* No need for _rcu here. All reconfiguration is
2250 	 * strictly serialized on genl_lock(). We are protected against
2251 	 * concurrent reconfiguration/addition/deletion */
2252 	for_each_resource(resource, &drbd_resources) {
2253 		for_each_connection(connection, resource) {
2254 			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2255 			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2256 				    connection->my_addr_len)) {
2257 				retcode = ERR_LOCAL_ADDR;
2258 				goto out;
2259 			}
2260 
2261 			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2262 			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2263 				    connection->peer_addr_len)) {
2264 				retcode = ERR_PEER_ADDR;
2265 				goto out;
2266 			}
2267 		}
2268 	}
2269 
2270 	mutex_lock(&adm_ctx.resource->adm_mutex);
2271 	connection = first_connection(adm_ctx.resource);
2272 	conn_reconfig_start(connection);
2273 
2274 	if (connection->cstate > C_STANDALONE) {
2275 		retcode = ERR_NET_CONFIGURED;
2276 		goto fail;
2277 	}
2278 
2279 	/* allocation not in the IO path, drbdsetup / netlink process context */
2280 	new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2281 	if (!new_net_conf) {
2282 		retcode = ERR_NOMEM;
2283 		goto fail;
2284 	}
2285 
2286 	set_net_conf_defaults(new_net_conf);
2287 
2288 	err = net_conf_from_attrs(new_net_conf, info);
2289 	if (err && err != -ENOMSG) {
2290 		retcode = ERR_MANDATORY_TAG;
2291 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2292 		goto fail;
2293 	}
2294 
2295 	retcode = check_net_options(connection, new_net_conf);
2296 	if (retcode != NO_ERROR)
2297 		goto fail;
2298 
2299 	retcode = alloc_crypto(&crypto, new_net_conf);
2300 	if (retcode != NO_ERROR)
2301 		goto fail;
2302 
2303 	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2304 
2305 	drbd_flush_workqueue(&connection->sender_work);
2306 
2307 	mutex_lock(&adm_ctx.resource->conf_update);
2308 	old_net_conf = connection->net_conf;
2309 	if (old_net_conf) {
2310 		retcode = ERR_NET_CONFIGURED;
2311 		mutex_unlock(&adm_ctx.resource->conf_update);
2312 		goto fail;
2313 	}
2314 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2315 
2316 	conn_free_crypto(connection);
2317 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2318 	connection->integrity_tfm = crypto.integrity_tfm;
2319 	connection->csums_tfm = crypto.csums_tfm;
2320 	connection->verify_tfm = crypto.verify_tfm;
2321 
2322 	connection->my_addr_len = nla_len(adm_ctx.my_addr);
2323 	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2324 	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2325 	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2326 
2327 	mutex_unlock(&adm_ctx.resource->conf_update);
2328 
2329 	rcu_read_lock();
2330 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2331 		struct drbd_device *device = peer_device->device;
2332 		device->send_cnt = 0;
2333 		device->recv_cnt = 0;
2334 	}
2335 	rcu_read_unlock();
2336 
2337 	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2338 
2339 	conn_reconfig_done(connection);
2340 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2341 	drbd_adm_finish(&adm_ctx, info, retcode);
2342 	return 0;
2343 
2344 fail:
2345 	free_crypto(&crypto);
2346 	kfree(new_net_conf);
2347 
2348 	conn_reconfig_done(connection);
2349 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2350 out:
2351 	drbd_adm_finish(&adm_ctx, info, retcode);
2352 	return 0;
2353 }
2354 
2355 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2356 {
2357 	enum drbd_state_rv rv;
2358 
2359 	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2360 			force ? CS_HARD : 0);
2361 
2362 	switch (rv) {
2363 	case SS_NOTHING_TO_DO:
2364 		break;
2365 	case SS_ALREADY_STANDALONE:
2366 		return SS_SUCCESS;
2367 	case SS_PRIMARY_NOP:
2368 		/* Our state checking code wants to see the peer outdated. */
2369 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2370 
2371 		if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2372 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2373 
2374 		break;
2375 	case SS_CW_FAILED_BY_PEER:
2376 		/* The peer probably wants to see us outdated. */
2377 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2378 							disk, D_OUTDATED), 0);
2379 		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2380 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2381 					CS_HARD);
2382 		}
2383 		break;
2384 	default:;
2385 		/* no special handling necessary */
2386 	}
2387 
2388 	if (rv >= SS_SUCCESS) {
2389 		enum drbd_state_rv rv2;
2390 		/* No one else can reconfigure the network while I am here.
2391 		 * The state handling only uses drbd_thread_stop_nowait(),
2392 		 * we want to really wait here until the receiver is no more.
2393 		 */
2394 		drbd_thread_stop(&connection->receiver);
2395 
2396 		/* Race breaker.  This additional state change request may be
2397 		 * necessary, if this was a forced disconnect during a receiver
2398 		 * restart.  We may have "killed" the receiver thread just
2399 		 * after drbd_receiver() returned.  Typically, we should be
2400 		 * C_STANDALONE already, now, and this becomes a no-op.
2401 		 */
2402 		rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2403 				CS_VERBOSE | CS_HARD);
2404 		if (rv2 < SS_SUCCESS)
2405 			drbd_err(connection,
2406 				"unexpected rv2=%d in conn_try_disconnect()\n",
2407 				rv2);
2408 	}
2409 	return rv;
2410 }
2411 
2412 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2413 {
2414 	struct drbd_config_context adm_ctx;
2415 	struct disconnect_parms parms;
2416 	struct drbd_connection *connection;
2417 	enum drbd_state_rv rv;
2418 	enum drbd_ret_code retcode;
2419 	int err;
2420 
2421 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2422 	if (!adm_ctx.reply_skb)
2423 		return retcode;
2424 	if (retcode != NO_ERROR)
2425 		goto fail;
2426 
2427 	connection = adm_ctx.connection;
2428 	memset(&parms, 0, sizeof(parms));
2429 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2430 		err = disconnect_parms_from_attrs(&parms, info);
2431 		if (err) {
2432 			retcode = ERR_MANDATORY_TAG;
2433 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2434 			goto fail;
2435 		}
2436 	}
2437 
2438 	mutex_lock(&adm_ctx.resource->adm_mutex);
2439 	rv = conn_try_disconnect(connection, parms.force_disconnect);
2440 	if (rv < SS_SUCCESS)
2441 		retcode = rv;  /* FIXME: Type mismatch. */
2442 	else
2443 		retcode = NO_ERROR;
2444 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2445  fail:
2446 	drbd_adm_finish(&adm_ctx, info, retcode);
2447 	return 0;
2448 }
2449 
2450 void resync_after_online_grow(struct drbd_device *device)
2451 {
2452 	int iass; /* I am sync source */
2453 
2454 	drbd_info(device, "Resync of new storage after online grow\n");
2455 	if (device->state.role != device->state.peer)
2456 		iass = (device->state.role == R_PRIMARY);
2457 	else
2458 		iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2459 
2460 	if (iass)
2461 		drbd_start_resync(device, C_SYNC_SOURCE);
2462 	else
2463 		_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2464 }
2465 
2466 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2467 {
2468 	struct drbd_config_context adm_ctx;
2469 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2470 	struct resize_parms rs;
2471 	struct drbd_device *device;
2472 	enum drbd_ret_code retcode;
2473 	enum determine_dev_size dd;
2474 	bool change_al_layout = false;
2475 	enum dds_flags ddsf;
2476 	sector_t u_size;
2477 	int err;
2478 
2479 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2480 	if (!adm_ctx.reply_skb)
2481 		return retcode;
2482 	if (retcode != NO_ERROR)
2483 		goto finish;
2484 
2485 	mutex_lock(&adm_ctx.resource->adm_mutex);
2486 	device = adm_ctx.device;
2487 	if (!get_ldev(device)) {
2488 		retcode = ERR_NO_DISK;
2489 		goto fail;
2490 	}
2491 
2492 	memset(&rs, 0, sizeof(struct resize_parms));
2493 	rs.al_stripes = device->ldev->md.al_stripes;
2494 	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2495 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2496 		err = resize_parms_from_attrs(&rs, info);
2497 		if (err) {
2498 			retcode = ERR_MANDATORY_TAG;
2499 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2500 			goto fail_ldev;
2501 		}
2502 	}
2503 
2504 	if (device->state.conn > C_CONNECTED) {
2505 		retcode = ERR_RESIZE_RESYNC;
2506 		goto fail_ldev;
2507 	}
2508 
2509 	if (device->state.role == R_SECONDARY &&
2510 	    device->state.peer == R_SECONDARY) {
2511 		retcode = ERR_NO_PRIMARY;
2512 		goto fail_ldev;
2513 	}
2514 
2515 	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2516 		retcode = ERR_NEED_APV_93;
2517 		goto fail_ldev;
2518 	}
2519 
2520 	rcu_read_lock();
2521 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2522 	rcu_read_unlock();
2523 	if (u_size != (sector_t)rs.resize_size) {
2524 		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2525 		if (!new_disk_conf) {
2526 			retcode = ERR_NOMEM;
2527 			goto fail_ldev;
2528 		}
2529 	}
2530 
2531 	if (device->ldev->md.al_stripes != rs.al_stripes ||
2532 	    device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2533 		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2534 
2535 		if (al_size_k > (16 * 1024 * 1024)) {
2536 			retcode = ERR_MD_LAYOUT_TOO_BIG;
2537 			goto fail_ldev;
2538 		}
2539 
2540 		if (al_size_k < MD_32kB_SECT/2) {
2541 			retcode = ERR_MD_LAYOUT_TOO_SMALL;
2542 			goto fail_ldev;
2543 		}
2544 
2545 		if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2546 			retcode = ERR_MD_LAYOUT_CONNECTED;
2547 			goto fail_ldev;
2548 		}
2549 
2550 		change_al_layout = true;
2551 	}
2552 
2553 	if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2554 		device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2555 
2556 	if (new_disk_conf) {
2557 		mutex_lock(&device->resource->conf_update);
2558 		old_disk_conf = device->ldev->disk_conf;
2559 		*new_disk_conf = *old_disk_conf;
2560 		new_disk_conf->disk_size = (sector_t)rs.resize_size;
2561 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2562 		mutex_unlock(&device->resource->conf_update);
2563 		synchronize_rcu();
2564 		kfree(old_disk_conf);
2565 	}
2566 
2567 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2568 	dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2569 	drbd_md_sync(device);
2570 	put_ldev(device);
2571 	if (dd == DS_ERROR) {
2572 		retcode = ERR_NOMEM_BITMAP;
2573 		goto fail;
2574 	} else if (dd == DS_ERROR_SPACE_MD) {
2575 		retcode = ERR_MD_LAYOUT_NO_FIT;
2576 		goto fail;
2577 	} else if (dd == DS_ERROR_SHRINK) {
2578 		retcode = ERR_IMPLICIT_SHRINK;
2579 		goto fail;
2580 	}
2581 
2582 	if (device->state.conn == C_CONNECTED) {
2583 		if (dd == DS_GREW)
2584 			set_bit(RESIZE_PENDING, &device->flags);
2585 
2586 		drbd_send_uuids(first_peer_device(device));
2587 		drbd_send_sizes(first_peer_device(device), 1, ddsf);
2588 	}
2589 
2590  fail:
2591 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2592  finish:
2593 	drbd_adm_finish(&adm_ctx, info, retcode);
2594 	return 0;
2595 
2596  fail_ldev:
2597 	put_ldev(device);
2598 	goto fail;
2599 }
2600 
2601 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2602 {
2603 	struct drbd_config_context adm_ctx;
2604 	enum drbd_ret_code retcode;
2605 	struct res_opts res_opts;
2606 	int err;
2607 
2608 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2609 	if (!adm_ctx.reply_skb)
2610 		return retcode;
2611 	if (retcode != NO_ERROR)
2612 		goto fail;
2613 
2614 	res_opts = adm_ctx.resource->res_opts;
2615 	if (should_set_defaults(info))
2616 		set_res_opts_defaults(&res_opts);
2617 
2618 	err = res_opts_from_attrs(&res_opts, info);
2619 	if (err && err != -ENOMSG) {
2620 		retcode = ERR_MANDATORY_TAG;
2621 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2622 		goto fail;
2623 	}
2624 
2625 	mutex_lock(&adm_ctx.resource->adm_mutex);
2626 	err = set_resource_options(adm_ctx.resource, &res_opts);
2627 	if (err) {
2628 		retcode = ERR_INVALID_REQUEST;
2629 		if (err == -ENOMEM)
2630 			retcode = ERR_NOMEM;
2631 	}
2632 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2633 
2634 fail:
2635 	drbd_adm_finish(&adm_ctx, info, retcode);
2636 	return 0;
2637 }
2638 
2639 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2640 {
2641 	struct drbd_config_context adm_ctx;
2642 	struct drbd_device *device;
2643 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2644 
2645 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2646 	if (!adm_ctx.reply_skb)
2647 		return retcode;
2648 	if (retcode != NO_ERROR)
2649 		goto out;
2650 
2651 	mutex_lock(&adm_ctx.resource->adm_mutex);
2652 	device = adm_ctx.device;
2653 
2654 	/* If there is still bitmap IO pending, probably because of a previous
2655 	 * resync just being finished, wait for it before requesting a new resync.
2656 	 * Also wait for it's after_state_ch(). */
2657 	drbd_suspend_io(device);
2658 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2659 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2660 
2661 	/* If we happen to be C_STANDALONE R_SECONDARY, just change to
2662 	 * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2663 	 * try to start a resync handshake as sync target for full sync.
2664 	 */
2665 	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2666 		retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2667 		if (retcode >= SS_SUCCESS) {
2668 			if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2669 				"set_n_write from invalidate", BM_LOCKED_MASK))
2670 				retcode = ERR_IO_MD_DISK;
2671 		}
2672 	} else
2673 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2674 	drbd_resume_io(device);
2675 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2676 out:
2677 	drbd_adm_finish(&adm_ctx, info, retcode);
2678 	return 0;
2679 }
2680 
2681 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2682 		union drbd_state mask, union drbd_state val)
2683 {
2684 	struct drbd_config_context adm_ctx;
2685 	enum drbd_ret_code retcode;
2686 
2687 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2688 	if (!adm_ctx.reply_skb)
2689 		return retcode;
2690 	if (retcode != NO_ERROR)
2691 		goto out;
2692 
2693 	mutex_lock(&adm_ctx.resource->adm_mutex);
2694 	retcode = drbd_request_state(adm_ctx.device, mask, val);
2695 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2696 out:
2697 	drbd_adm_finish(&adm_ctx, info, retcode);
2698 	return 0;
2699 }
2700 
2701 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2702 {
2703 	int rv;
2704 
2705 	rv = drbd_bmio_set_n_write(device);
2706 	drbd_suspend_al(device);
2707 	return rv;
2708 }
2709 
2710 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2711 {
2712 	struct drbd_config_context adm_ctx;
2713 	int retcode; /* drbd_ret_code, drbd_state_rv */
2714 	struct drbd_device *device;
2715 
2716 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2717 	if (!adm_ctx.reply_skb)
2718 		return retcode;
2719 	if (retcode != NO_ERROR)
2720 		goto out;
2721 
2722 	mutex_lock(&adm_ctx.resource->adm_mutex);
2723 	device = adm_ctx.device;
2724 
2725 	/* If there is still bitmap IO pending, probably because of a previous
2726 	 * resync just being finished, wait for it before requesting a new resync.
2727 	 * Also wait for it's after_state_ch(). */
2728 	drbd_suspend_io(device);
2729 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2730 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2731 
2732 	/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2733 	 * in the bitmap.  Otherwise, try to start a resync handshake
2734 	 * as sync source for full sync.
2735 	 */
2736 	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2737 		/* The peer will get a resync upon connect anyways. Just make that
2738 		   into a full resync. */
2739 		retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2740 		if (retcode >= SS_SUCCESS) {
2741 			if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2742 				"set_n_write from invalidate_peer",
2743 				BM_LOCKED_SET_ALLOWED))
2744 				retcode = ERR_IO_MD_DISK;
2745 		}
2746 	} else
2747 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2748 	drbd_resume_io(device);
2749 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2750 out:
2751 	drbd_adm_finish(&adm_ctx, info, retcode);
2752 	return 0;
2753 }
2754 
2755 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2756 {
2757 	struct drbd_config_context adm_ctx;
2758 	enum drbd_ret_code retcode;
2759 
2760 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2761 	if (!adm_ctx.reply_skb)
2762 		return retcode;
2763 	if (retcode != NO_ERROR)
2764 		goto out;
2765 
2766 	mutex_lock(&adm_ctx.resource->adm_mutex);
2767 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2768 		retcode = ERR_PAUSE_IS_SET;
2769 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2770 out:
2771 	drbd_adm_finish(&adm_ctx, info, retcode);
2772 	return 0;
2773 }
2774 
2775 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2776 {
2777 	struct drbd_config_context adm_ctx;
2778 	union drbd_dev_state s;
2779 	enum drbd_ret_code retcode;
2780 
2781 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2782 	if (!adm_ctx.reply_skb)
2783 		return retcode;
2784 	if (retcode != NO_ERROR)
2785 		goto out;
2786 
2787 	mutex_lock(&adm_ctx.resource->adm_mutex);
2788 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2789 		s = adm_ctx.device->state;
2790 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2791 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2792 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2793 		} else {
2794 			retcode = ERR_PAUSE_IS_CLEAR;
2795 		}
2796 	}
2797 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2798 out:
2799 	drbd_adm_finish(&adm_ctx, info, retcode);
2800 	return 0;
2801 }
2802 
2803 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2804 {
2805 	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2806 }
2807 
2808 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2809 {
2810 	struct drbd_config_context adm_ctx;
2811 	struct drbd_device *device;
2812 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2813 
2814 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2815 	if (!adm_ctx.reply_skb)
2816 		return retcode;
2817 	if (retcode != NO_ERROR)
2818 		goto out;
2819 
2820 	mutex_lock(&adm_ctx.resource->adm_mutex);
2821 	device = adm_ctx.device;
2822 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
2823 		drbd_uuid_new_current(device);
2824 		clear_bit(NEW_CUR_UUID, &device->flags);
2825 	}
2826 	drbd_suspend_io(device);
2827 	retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2828 	if (retcode == SS_SUCCESS) {
2829 		if (device->state.conn < C_CONNECTED)
2830 			tl_clear(first_peer_device(device)->connection);
2831 		if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2832 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2833 	}
2834 	drbd_resume_io(device);
2835 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2836 out:
2837 	drbd_adm_finish(&adm_ctx, info, retcode);
2838 	return 0;
2839 }
2840 
2841 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2842 {
2843 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2844 }
2845 
2846 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2847 				    struct drbd_resource *resource,
2848 				    struct drbd_connection *connection,
2849 				    struct drbd_device *device)
2850 {
2851 	struct nlattr *nla;
2852 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2853 	if (!nla)
2854 		goto nla_put_failure;
2855 	if (device &&
2856 	    nla_put_u32(skb, T_ctx_volume, device->vnr))
2857 		goto nla_put_failure;
2858 	if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2859 		goto nla_put_failure;
2860 	if (connection) {
2861 		if (connection->my_addr_len &&
2862 		    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2863 			goto nla_put_failure;
2864 		if (connection->peer_addr_len &&
2865 		    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2866 			goto nla_put_failure;
2867 	}
2868 	nla_nest_end(skb, nla);
2869 	return 0;
2870 
2871 nla_put_failure:
2872 	if (nla)
2873 		nla_nest_cancel(skb, nla);
2874 	return -EMSGSIZE;
2875 }
2876 
2877 /*
2878  * Return the connection of @resource if @resource has exactly one connection.
2879  */
2880 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2881 {
2882 	struct list_head *connections = &resource->connections;
2883 
2884 	if (list_empty(connections) || connections->next->next != connections)
2885 		return NULL;
2886 	return list_first_entry(&resource->connections, struct drbd_connection, connections);
2887 }
2888 
2889 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2890 		const struct sib_info *sib)
2891 {
2892 	struct drbd_resource *resource = device->resource;
2893 	struct state_info *si = NULL; /* for sizeof(si->member); */
2894 	struct nlattr *nla;
2895 	int got_ldev;
2896 	int err = 0;
2897 	int exclude_sensitive;
2898 
2899 	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2900 	 * to.  So we better exclude_sensitive information.
2901 	 *
2902 	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2903 	 * in the context of the requesting user process. Exclude sensitive
2904 	 * information, unless current has superuser.
2905 	 *
2906 	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2907 	 * relies on the current implementation of netlink_dump(), which
2908 	 * executes the dump callback successively from netlink_recvmsg(),
2909 	 * always in the context of the receiving process */
2910 	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2911 
2912 	got_ldev = get_ldev(device);
2913 
2914 	/* We need to add connection name and volume number information still.
2915 	 * Minor number is in drbd_genlmsghdr. */
2916 	if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2917 		goto nla_put_failure;
2918 
2919 	if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2920 		goto nla_put_failure;
2921 
2922 	rcu_read_lock();
2923 	if (got_ldev) {
2924 		struct disk_conf *disk_conf;
2925 
2926 		disk_conf = rcu_dereference(device->ldev->disk_conf);
2927 		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2928 	}
2929 	if (!err) {
2930 		struct net_conf *nc;
2931 
2932 		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2933 		if (nc)
2934 			err = net_conf_to_skb(skb, nc, exclude_sensitive);
2935 	}
2936 	rcu_read_unlock();
2937 	if (err)
2938 		goto nla_put_failure;
2939 
2940 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2941 	if (!nla)
2942 		goto nla_put_failure;
2943 	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2944 	    nla_put_u32(skb, T_current_state, device->state.i) ||
2945 	    nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2946 	    nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2947 	    nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2948 	    nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2949 	    nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2950 	    nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2951 	    nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2952 	    nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2953 	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2954 	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2955 	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2956 		goto nla_put_failure;
2957 
2958 	if (got_ldev) {
2959 		int err;
2960 
2961 		spin_lock_irq(&device->ldev->md.uuid_lock);
2962 		err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2963 		spin_unlock_irq(&device->ldev->md.uuid_lock);
2964 
2965 		if (err)
2966 			goto nla_put_failure;
2967 
2968 		if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2969 		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2970 		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2971 			goto nla_put_failure;
2972 		if (C_SYNC_SOURCE <= device->state.conn &&
2973 		    C_PAUSED_SYNC_T >= device->state.conn) {
2974 			if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2975 			    nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2976 				goto nla_put_failure;
2977 		}
2978 	}
2979 
2980 	if (sib) {
2981 		switch(sib->sib_reason) {
2982 		case SIB_SYNC_PROGRESS:
2983 		case SIB_GET_STATUS_REPLY:
2984 			break;
2985 		case SIB_STATE_CHANGE:
2986 			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2987 			    nla_put_u32(skb, T_new_state, sib->ns.i))
2988 				goto nla_put_failure;
2989 			break;
2990 		case SIB_HELPER_POST:
2991 			if (nla_put_u32(skb, T_helper_exit_code,
2992 					sib->helper_exit_code))
2993 				goto nla_put_failure;
2994 			/* fall through */
2995 		case SIB_HELPER_PRE:
2996 			if (nla_put_string(skb, T_helper, sib->helper_name))
2997 				goto nla_put_failure;
2998 			break;
2999 		}
3000 	}
3001 	nla_nest_end(skb, nla);
3002 
3003 	if (0)
3004 nla_put_failure:
3005 		err = -EMSGSIZE;
3006 	if (got_ldev)
3007 		put_ldev(device);
3008 	return err;
3009 }
3010 
3011 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3012 {
3013 	struct drbd_config_context adm_ctx;
3014 	enum drbd_ret_code retcode;
3015 	int err;
3016 
3017 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3018 	if (!adm_ctx.reply_skb)
3019 		return retcode;
3020 	if (retcode != NO_ERROR)
3021 		goto out;
3022 
3023 	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3024 	if (err) {
3025 		nlmsg_free(adm_ctx.reply_skb);
3026 		return err;
3027 	}
3028 out:
3029 	drbd_adm_finish(&adm_ctx, info, retcode);
3030 	return 0;
3031 }
3032 
3033 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3034 {
3035 	struct drbd_device *device;
3036 	struct drbd_genlmsghdr *dh;
3037 	struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3038 	struct drbd_resource *resource = NULL;
3039 	struct drbd_resource *tmp;
3040 	unsigned volume = cb->args[1];
3041 
3042 	/* Open coded, deferred, iteration:
3043 	 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3044 	 *      connection = "first connection of resource or undefined";
3045 	 *	idr_for_each_entry(&resource->devices, device, i) {
3046 	 *	  ...
3047 	 *	}
3048 	 * }
3049 	 * where resource is cb->args[0];
3050 	 * and i is cb->args[1];
3051 	 *
3052 	 * cb->args[2] indicates if we shall loop over all resources,
3053 	 * or just dump all volumes of a single resource.
3054 	 *
3055 	 * This may miss entries inserted after this dump started,
3056 	 * or entries deleted before they are reached.
3057 	 *
3058 	 * We need to make sure the device won't disappear while
3059 	 * we are looking at it, and revalidate our iterators
3060 	 * on each iteration.
3061 	 */
3062 
3063 	/* synchronize with conn_create()/drbd_destroy_connection() */
3064 	rcu_read_lock();
3065 	/* revalidate iterator position */
3066 	for_each_resource_rcu(tmp, &drbd_resources) {
3067 		if (pos == NULL) {
3068 			/* first iteration */
3069 			pos = tmp;
3070 			resource = pos;
3071 			break;
3072 		}
3073 		if (tmp == pos) {
3074 			resource = pos;
3075 			break;
3076 		}
3077 	}
3078 	if (resource) {
3079 next_resource:
3080 		device = idr_get_next(&resource->devices, &volume);
3081 		if (!device) {
3082 			/* No more volumes to dump on this resource.
3083 			 * Advance resource iterator. */
3084 			pos = list_entry_rcu(resource->resources.next,
3085 					     struct drbd_resource, resources);
3086 			/* Did we dump any volume of this resource yet? */
3087 			if (volume != 0) {
3088 				/* If we reached the end of the list,
3089 				 * or only a single resource dump was requested,
3090 				 * we are done. */
3091 				if (&pos->resources == &drbd_resources || cb->args[2])
3092 					goto out;
3093 				volume = 0;
3094 				resource = pos;
3095 				goto next_resource;
3096 			}
3097 		}
3098 
3099 		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3100 				cb->nlh->nlmsg_seq, &drbd_genl_family,
3101 				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3102 		if (!dh)
3103 			goto out;
3104 
3105 		if (!device) {
3106 			/* This is a connection without a single volume.
3107 			 * Suprisingly enough, it may have a network
3108 			 * configuration. */
3109 			struct drbd_connection *connection;
3110 
3111 			dh->minor = -1U;
3112 			dh->ret_code = NO_ERROR;
3113 			connection = the_only_connection(resource);
3114 			if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3115 				goto cancel;
3116 			if (connection) {
3117 				struct net_conf *nc;
3118 
3119 				nc = rcu_dereference(connection->net_conf);
3120 				if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3121 					goto cancel;
3122 			}
3123 			goto done;
3124 		}
3125 
3126 		D_ASSERT(device, device->vnr == volume);
3127 		D_ASSERT(device, device->resource == resource);
3128 
3129 		dh->minor = device_to_minor(device);
3130 		dh->ret_code = NO_ERROR;
3131 
3132 		if (nla_put_status_info(skb, device, NULL)) {
3133 cancel:
3134 			genlmsg_cancel(skb, dh);
3135 			goto out;
3136 		}
3137 done:
3138 		genlmsg_end(skb, dh);
3139 	}
3140 
3141 out:
3142 	rcu_read_unlock();
3143 	/* where to start the next iteration */
3144 	cb->args[0] = (long)pos;
3145 	cb->args[1] = (pos == resource) ? volume + 1 : 0;
3146 
3147 	/* No more resources/volumes/minors found results in an empty skb.
3148 	 * Which will terminate the dump. */
3149         return skb->len;
3150 }
3151 
3152 /*
3153  * Request status of all resources, or of all volumes within a single resource.
3154  *
3155  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3156  * Which means we cannot use the family->attrbuf or other such members, because
3157  * dump is NOT protected by the genl_lock().  During dump, we only have access
3158  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3159  *
3160  * Once things are setup properly, we call into get_one_status().
3161  */
3162 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3163 {
3164 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3165 	struct nlattr *nla;
3166 	const char *resource_name;
3167 	struct drbd_resource *resource;
3168 	int maxtype;
3169 
3170 	/* Is this a followup call? */
3171 	if (cb->args[0]) {
3172 		/* ... of a single resource dump,
3173 		 * and the resource iterator has been advanced already? */
3174 		if (cb->args[2] && cb->args[2] != cb->args[0])
3175 			return 0; /* DONE. */
3176 		goto dump;
3177 	}
3178 
3179 	/* First call (from netlink_dump_start).  We need to figure out
3180 	 * which resource(s) the user wants us to dump. */
3181 	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3182 			nlmsg_attrlen(cb->nlh, hdrlen),
3183 			DRBD_NLA_CFG_CONTEXT);
3184 
3185 	/* No explicit context given.  Dump all. */
3186 	if (!nla)
3187 		goto dump;
3188 	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3189 	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3190 	if (IS_ERR(nla))
3191 		return PTR_ERR(nla);
3192 	/* context given, but no name present? */
3193 	if (!nla)
3194 		return -EINVAL;
3195 	resource_name = nla_data(nla);
3196 	if (!*resource_name)
3197 		return -ENODEV;
3198 	resource = drbd_find_resource(resource_name);
3199 	if (!resource)
3200 		return -ENODEV;
3201 
3202 	kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3203 
3204 	/* prime iterators, and set "filter" mode mark:
3205 	 * only dump this connection. */
3206 	cb->args[0] = (long)resource;
3207 	/* cb->args[1] = 0; passed in this way. */
3208 	cb->args[2] = (long)resource;
3209 
3210 dump:
3211 	return get_one_status(skb, cb);
3212 }
3213 
3214 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3215 {
3216 	struct drbd_config_context adm_ctx;
3217 	enum drbd_ret_code retcode;
3218 	struct timeout_parms tp;
3219 	int err;
3220 
3221 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3222 	if (!adm_ctx.reply_skb)
3223 		return retcode;
3224 	if (retcode != NO_ERROR)
3225 		goto out;
3226 
3227 	tp.timeout_type =
3228 		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3229 		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3230 		UT_DEFAULT;
3231 
3232 	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3233 	if (err) {
3234 		nlmsg_free(adm_ctx.reply_skb);
3235 		return err;
3236 	}
3237 out:
3238 	drbd_adm_finish(&adm_ctx, info, retcode);
3239 	return 0;
3240 }
3241 
3242 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3243 {
3244 	struct drbd_config_context adm_ctx;
3245 	struct drbd_device *device;
3246 	enum drbd_ret_code retcode;
3247 	struct start_ov_parms parms;
3248 
3249 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3250 	if (!adm_ctx.reply_skb)
3251 		return retcode;
3252 	if (retcode != NO_ERROR)
3253 		goto out;
3254 
3255 	device = adm_ctx.device;
3256 
3257 	/* resume from last known position, if possible */
3258 	parms.ov_start_sector = device->ov_start_sector;
3259 	parms.ov_stop_sector = ULLONG_MAX;
3260 	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3261 		int err = start_ov_parms_from_attrs(&parms, info);
3262 		if (err) {
3263 			retcode = ERR_MANDATORY_TAG;
3264 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3265 			goto out;
3266 		}
3267 	}
3268 	mutex_lock(&adm_ctx.resource->adm_mutex);
3269 
3270 	/* w_make_ov_request expects position to be aligned */
3271 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3272 	device->ov_stop_sector = parms.ov_stop_sector;
3273 
3274 	/* If there is still bitmap IO pending, e.g. previous resync or verify
3275 	 * just being finished, wait for it before requesting a new resync. */
3276 	drbd_suspend_io(device);
3277 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3278 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3279 	drbd_resume_io(device);
3280 
3281 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3282 out:
3283 	drbd_adm_finish(&adm_ctx, info, retcode);
3284 	return 0;
3285 }
3286 
3287 
3288 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3289 {
3290 	struct drbd_config_context adm_ctx;
3291 	struct drbd_device *device;
3292 	enum drbd_ret_code retcode;
3293 	int skip_initial_sync = 0;
3294 	int err;
3295 	struct new_c_uuid_parms args;
3296 
3297 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3298 	if (!adm_ctx.reply_skb)
3299 		return retcode;
3300 	if (retcode != NO_ERROR)
3301 		goto out_nolock;
3302 
3303 	device = adm_ctx.device;
3304 	memset(&args, 0, sizeof(args));
3305 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3306 		err = new_c_uuid_parms_from_attrs(&args, info);
3307 		if (err) {
3308 			retcode = ERR_MANDATORY_TAG;
3309 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3310 			goto out_nolock;
3311 		}
3312 	}
3313 
3314 	mutex_lock(&adm_ctx.resource->adm_mutex);
3315 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3316 
3317 	if (!get_ldev(device)) {
3318 		retcode = ERR_NO_DISK;
3319 		goto out;
3320 	}
3321 
3322 	/* this is "skip initial sync", assume to be clean */
3323 	if (device->state.conn == C_CONNECTED &&
3324 	    first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3325 	    device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3326 		drbd_info(device, "Preparing to skip initial sync\n");
3327 		skip_initial_sync = 1;
3328 	} else if (device->state.conn != C_STANDALONE) {
3329 		retcode = ERR_CONNECTED;
3330 		goto out_dec;
3331 	}
3332 
3333 	drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3334 	drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3335 
3336 	if (args.clear_bm) {
3337 		err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3338 			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3339 		if (err) {
3340 			drbd_err(device, "Writing bitmap failed with %d\n", err);
3341 			retcode = ERR_IO_MD_DISK;
3342 		}
3343 		if (skip_initial_sync) {
3344 			drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3345 			_drbd_uuid_set(device, UI_BITMAP, 0);
3346 			drbd_print_uuids(device, "cleared bitmap UUID");
3347 			spin_lock_irq(&device->resource->req_lock);
3348 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3349 					CS_VERBOSE, NULL);
3350 			spin_unlock_irq(&device->resource->req_lock);
3351 		}
3352 	}
3353 
3354 	drbd_md_sync(device);
3355 out_dec:
3356 	put_ldev(device);
3357 out:
3358 	mutex_unlock(device->state_mutex);
3359 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3360 out_nolock:
3361 	drbd_adm_finish(&adm_ctx, info, retcode);
3362 	return 0;
3363 }
3364 
3365 static enum drbd_ret_code
3366 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3367 {
3368 	const char *name = adm_ctx->resource_name;
3369 	if (!name || !name[0]) {
3370 		drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3371 		return ERR_MANDATORY_TAG;
3372 	}
3373 	/* if we want to use these in sysfs/configfs/debugfs some day,
3374 	 * we must not allow slashes */
3375 	if (strchr(name, '/')) {
3376 		drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3377 		return ERR_INVALID_REQUEST;
3378 	}
3379 	return NO_ERROR;
3380 }
3381 
3382 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3383 {
3384 	struct drbd_config_context adm_ctx;
3385 	enum drbd_ret_code retcode;
3386 	struct res_opts res_opts;
3387 	int err;
3388 
3389 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3390 	if (!adm_ctx.reply_skb)
3391 		return retcode;
3392 	if (retcode != NO_ERROR)
3393 		goto out;
3394 
3395 	set_res_opts_defaults(&res_opts);
3396 	err = res_opts_from_attrs(&res_opts, info);
3397 	if (err && err != -ENOMSG) {
3398 		retcode = ERR_MANDATORY_TAG;
3399 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3400 		goto out;
3401 	}
3402 
3403 	retcode = drbd_check_resource_name(&adm_ctx);
3404 	if (retcode != NO_ERROR)
3405 		goto out;
3406 
3407 	if (adm_ctx.resource) {
3408 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3409 			retcode = ERR_INVALID_REQUEST;
3410 			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3411 		}
3412 		/* else: still NO_ERROR */
3413 		goto out;
3414 	}
3415 
3416 	/* not yet safe for genl_family.parallel_ops */
3417 	if (!conn_create(adm_ctx.resource_name, &res_opts))
3418 		retcode = ERR_NOMEM;
3419 out:
3420 	drbd_adm_finish(&adm_ctx, info, retcode);
3421 	return 0;
3422 }
3423 
3424 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3425 {
3426 	struct drbd_config_context adm_ctx;
3427 	struct drbd_genlmsghdr *dh = info->userhdr;
3428 	enum drbd_ret_code retcode;
3429 
3430 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3431 	if (!adm_ctx.reply_skb)
3432 		return retcode;
3433 	if (retcode != NO_ERROR)
3434 		goto out;
3435 
3436 	if (dh->minor > MINORMASK) {
3437 		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3438 		retcode = ERR_INVALID_REQUEST;
3439 		goto out;
3440 	}
3441 	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3442 		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3443 		retcode = ERR_INVALID_REQUEST;
3444 		goto out;
3445 	}
3446 
3447 	/* drbd_adm_prepare made sure already
3448 	 * that first_peer_device(device)->connection and device->vnr match the request. */
3449 	if (adm_ctx.device) {
3450 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3451 			retcode = ERR_MINOR_EXISTS;
3452 		/* else: still NO_ERROR */
3453 		goto out;
3454 	}
3455 
3456 	mutex_lock(&adm_ctx.resource->adm_mutex);
3457 	retcode = drbd_create_device(&adm_ctx, dh->minor);
3458 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3459 out:
3460 	drbd_adm_finish(&adm_ctx, info, retcode);
3461 	return 0;
3462 }
3463 
3464 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3465 {
3466 	if (device->state.disk == D_DISKLESS &&
3467 	    /* no need to be device->state.conn == C_STANDALONE &&
3468 	     * we may want to delete a minor from a live replication group.
3469 	     */
3470 	    device->state.role == R_SECONDARY) {
3471 		_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3472 				    CS_VERBOSE + CS_WAIT_COMPLETE);
3473 		drbd_delete_device(device);
3474 		return NO_ERROR;
3475 	} else
3476 		return ERR_MINOR_CONFIGURED;
3477 }
3478 
3479 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3480 {
3481 	struct drbd_config_context adm_ctx;
3482 	enum drbd_ret_code retcode;
3483 
3484 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3485 	if (!adm_ctx.reply_skb)
3486 		return retcode;
3487 	if (retcode != NO_ERROR)
3488 		goto out;
3489 
3490 	mutex_lock(&adm_ctx.resource->adm_mutex);
3491 	retcode = adm_del_minor(adm_ctx.device);
3492 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3493 out:
3494 	drbd_adm_finish(&adm_ctx, info, retcode);
3495 	return 0;
3496 }
3497 
3498 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3499 {
3500 	struct drbd_config_context adm_ctx;
3501 	struct drbd_resource *resource;
3502 	struct drbd_connection *connection;
3503 	struct drbd_device *device;
3504 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3505 	unsigned i;
3506 
3507 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3508 	if (!adm_ctx.reply_skb)
3509 		return retcode;
3510 	if (retcode != NO_ERROR)
3511 		goto finish;
3512 
3513 	resource = adm_ctx.resource;
3514 	mutex_lock(&resource->adm_mutex);
3515 	/* demote */
3516 	for_each_connection(connection, resource) {
3517 		struct drbd_peer_device *peer_device;
3518 
3519 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3520 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3521 			if (retcode < SS_SUCCESS) {
3522 				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3523 				goto out;
3524 			}
3525 		}
3526 
3527 		retcode = conn_try_disconnect(connection, 0);
3528 		if (retcode < SS_SUCCESS) {
3529 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3530 			goto out;
3531 		}
3532 	}
3533 
3534 	/* detach */
3535 	idr_for_each_entry(&resource->devices, device, i) {
3536 		retcode = adm_detach(device, 0);
3537 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3538 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3539 			goto out;
3540 		}
3541 	}
3542 
3543 	/* If we reach this, all volumes (of this connection) are Secondary,
3544 	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3545 	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3546 	for_each_connection(connection, resource)
3547 		drbd_thread_stop(&connection->worker);
3548 
3549 	/* Now, nothing can fail anymore */
3550 
3551 	/* delete volumes */
3552 	idr_for_each_entry(&resource->devices, device, i) {
3553 		retcode = adm_del_minor(device);
3554 		if (retcode != NO_ERROR) {
3555 			/* "can not happen" */
3556 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3557 			goto out;
3558 		}
3559 	}
3560 
3561 	list_del_rcu(&resource->resources);
3562 	synchronize_rcu();
3563 	drbd_free_resource(resource);
3564 	retcode = NO_ERROR;
3565 out:
3566 	mutex_unlock(&resource->adm_mutex);
3567 finish:
3568 	drbd_adm_finish(&adm_ctx, info, retcode);
3569 	return 0;
3570 }
3571 
3572 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3573 {
3574 	struct drbd_config_context adm_ctx;
3575 	struct drbd_resource *resource;
3576 	struct drbd_connection *connection;
3577 	enum drbd_ret_code retcode;
3578 
3579 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3580 	if (!adm_ctx.reply_skb)
3581 		return retcode;
3582 	if (retcode != NO_ERROR)
3583 		goto finish;
3584 
3585 	resource = adm_ctx.resource;
3586 	mutex_lock(&resource->adm_mutex);
3587 	for_each_connection(connection, resource) {
3588 		if (connection->cstate > C_STANDALONE) {
3589 			retcode = ERR_NET_CONFIGURED;
3590 			goto out;
3591 		}
3592 	}
3593 	if (!idr_is_empty(&resource->devices)) {
3594 		retcode = ERR_RES_IN_USE;
3595 		goto out;
3596 	}
3597 
3598 	list_del_rcu(&resource->resources);
3599 	for_each_connection(connection, resource)
3600 		drbd_thread_stop(&connection->worker);
3601 	synchronize_rcu();
3602 	drbd_free_resource(resource);
3603 	retcode = NO_ERROR;
3604 out:
3605 	mutex_unlock(&resource->adm_mutex);
3606 finish:
3607 	drbd_adm_finish(&adm_ctx, info, retcode);
3608 	return 0;
3609 }
3610 
3611 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3612 {
3613 	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3614 	struct sk_buff *msg;
3615 	struct drbd_genlmsghdr *d_out;
3616 	unsigned seq;
3617 	int err = -ENOMEM;
3618 
3619 	if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3620 		if (time_after(jiffies, device->rs_last_bcast + HZ))
3621 			device->rs_last_bcast = jiffies;
3622 		else
3623 			return;
3624 	}
3625 
3626 	seq = atomic_inc_return(&drbd_genl_seq);
3627 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3628 	if (!msg)
3629 		goto failed;
3630 
3631 	err = -EMSGSIZE;
3632 	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3633 	if (!d_out) /* cannot happen, but anyways. */
3634 		goto nla_put_failure;
3635 	d_out->minor = device_to_minor(device);
3636 	d_out->ret_code = NO_ERROR;
3637 
3638 	if (nla_put_status_info(msg, device, sib))
3639 		goto nla_put_failure;
3640 	genlmsg_end(msg, d_out);
3641 	err = drbd_genl_multicast_events(msg, 0);
3642 	/* msg has been consumed or freed in netlink_broadcast() */
3643 	if (err && err != -ESRCH)
3644 		goto failed;
3645 
3646 	return;
3647 
3648 nla_put_failure:
3649 	nlmsg_free(msg);
3650 failed:
3651 	drbd_err(device, "Error %d while broadcasting event. "
3652 			"Event seq:%u sib_reason:%u\n",
3653 			err, seq, sib->sib_reason);
3654 }
3655