xref: /openbmc/linux/drivers/block/drbd/drbd_nl.c (revision afb46f79)
1 /*
2    drbd_nl.c
3 
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5 
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9 
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14 
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 
24  */
25 
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include "drbd_wrappers.h"
38 #include <asm/unaligned.h>
39 #include <linux/drbd_limits.h>
40 #include <linux/kthread.h>
41 
42 #include <net/genetlink.h>
43 
44 /* .doit */
45 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
46 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
47 
48 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
49 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
50 
51 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
53 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
54 
55 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
74 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
75 /* .dumpit */
76 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
77 
78 #include <linux/drbd_genl_api.h>
79 #include "drbd_nla.h"
80 #include <linux/genl_magic_func.h>
81 
82 /* used blkdev_get_by_path, to claim our meta data device(s) */
83 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
84 
85 /* Configuration is strictly serialized, because generic netlink message
86  * processing is strictly serialized by the genl_lock().
87  * Which means we can use one static global drbd_config_context struct.
88  */
89 static struct drbd_config_context {
90 	/* assigned from drbd_genlmsghdr */
91 	unsigned int minor;
92 	/* assigned from request attributes, if present */
93 	unsigned int volume;
94 #define VOLUME_UNSPECIFIED		(-1U)
95 	/* pointer into the request skb,
96 	 * limited lifetime! */
97 	char *resource_name;
98 	struct nlattr *my_addr;
99 	struct nlattr *peer_addr;
100 
101 	/* reply buffer */
102 	struct sk_buff *reply_skb;
103 	/* pointer into reply buffer */
104 	struct drbd_genlmsghdr *reply_dh;
105 	/* resolved from attributes, if possible */
106 	struct drbd_device *device;
107 	struct drbd_resource *resource;
108 	struct drbd_connection *connection;
109 } adm_ctx;
110 
111 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
112 {
113 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
114 	if (genlmsg_reply(skb, info))
115 		printk(KERN_ERR "drbd: error sending genl reply\n");
116 }
117 
118 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
119  * reason it could fail was no space in skb, and there are 4k available. */
120 int drbd_msg_put_info(const char *info)
121 {
122 	struct sk_buff *skb = adm_ctx.reply_skb;
123 	struct nlattr *nla;
124 	int err = -EMSGSIZE;
125 
126 	if (!info || !info[0])
127 		return 0;
128 
129 	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
130 	if (!nla)
131 		return err;
132 
133 	err = nla_put_string(skb, T_info_text, info);
134 	if (err) {
135 		nla_nest_cancel(skb, nla);
136 		return err;
137 	} else
138 		nla_nest_end(skb, nla);
139 	return 0;
140 }
141 
142 /* This would be a good candidate for a "pre_doit" hook,
143  * and per-family private info->pointers.
144  * But we need to stay compatible with older kernels.
145  * If it returns successfully, adm_ctx members are valid.
146  */
147 #define DRBD_ADM_NEED_MINOR	1
148 #define DRBD_ADM_NEED_RESOURCE	2
149 #define DRBD_ADM_NEED_CONNECTION 4
150 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
151 		unsigned flags)
152 {
153 	struct drbd_genlmsghdr *d_in = info->userhdr;
154 	const u8 cmd = info->genlhdr->cmd;
155 	int err;
156 
157 	memset(&adm_ctx, 0, sizeof(adm_ctx));
158 
159 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
160 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
161 	       return -EPERM;
162 
163 	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
164 	if (!adm_ctx.reply_skb) {
165 		err = -ENOMEM;
166 		goto fail;
167 	}
168 
169 	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
170 					info, &drbd_genl_family, 0, cmd);
171 	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
172 	 * but anyways */
173 	if (!adm_ctx.reply_dh) {
174 		err = -ENOMEM;
175 		goto fail;
176 	}
177 
178 	adm_ctx.reply_dh->minor = d_in->minor;
179 	adm_ctx.reply_dh->ret_code = NO_ERROR;
180 
181 	adm_ctx.volume = VOLUME_UNSPECIFIED;
182 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
183 		struct nlattr *nla;
184 		/* parse and validate only */
185 		err = drbd_cfg_context_from_attrs(NULL, info);
186 		if (err)
187 			goto fail;
188 
189 		/* It was present, and valid,
190 		 * copy it over to the reply skb. */
191 		err = nla_put_nohdr(adm_ctx.reply_skb,
192 				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
193 				info->attrs[DRBD_NLA_CFG_CONTEXT]);
194 		if (err)
195 			goto fail;
196 
197 		/* and assign stuff to the global adm_ctx */
198 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
199 		if (nla)
200 			adm_ctx.volume = nla_get_u32(nla);
201 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
202 		if (nla)
203 			adm_ctx.resource_name = nla_data(nla);
204 		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
205 		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
206 		if ((adm_ctx.my_addr &&
207 		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) ||
208 		    (adm_ctx.peer_addr &&
209 		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) {
210 			err = -EINVAL;
211 			goto fail;
212 		}
213 	}
214 
215 	adm_ctx.minor = d_in->minor;
216 	adm_ctx.device = minor_to_device(d_in->minor);
217 	if (adm_ctx.resource_name) {
218 		adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name);
219 	}
220 
221 	if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) {
222 		drbd_msg_put_info("unknown minor");
223 		return ERR_MINOR_INVALID;
224 	}
225 	if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
226 		drbd_msg_put_info("unknown resource");
227 		if (adm_ctx.resource_name)
228 			return ERR_RES_NOT_KNOWN;
229 		return ERR_INVALID_REQUEST;
230 	}
231 
232 	if (flags & DRBD_ADM_NEED_CONNECTION) {
233 		if (adm_ctx.resource) {
234 			drbd_msg_put_info("no resource name expected");
235 			return ERR_INVALID_REQUEST;
236 		}
237 		if (adm_ctx.device) {
238 			drbd_msg_put_info("no minor number expected");
239 			return ERR_INVALID_REQUEST;
240 		}
241 		if (adm_ctx.my_addr && adm_ctx.peer_addr)
242 			adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
243 							  nla_len(adm_ctx.my_addr),
244 							  nla_data(adm_ctx.peer_addr),
245 							  nla_len(adm_ctx.peer_addr));
246 		if (!adm_ctx.connection) {
247 			drbd_msg_put_info("unknown connection");
248 			return ERR_INVALID_REQUEST;
249 		}
250 	}
251 
252 	/* some more paranoia, if the request was over-determined */
253 	if (adm_ctx.device && adm_ctx.resource &&
254 	    adm_ctx.device->resource != adm_ctx.resource) {
255 		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
256 				adm_ctx.minor, adm_ctx.resource->name,
257 				adm_ctx.device->resource->name);
258 		drbd_msg_put_info("minor exists in different resource");
259 		return ERR_INVALID_REQUEST;
260 	}
261 	if (adm_ctx.device &&
262 	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
263 	    adm_ctx.volume != adm_ctx.device->vnr) {
264 		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
265 				adm_ctx.minor, adm_ctx.volume,
266 				adm_ctx.device->vnr,
267 				adm_ctx.device->resource->name);
268 		drbd_msg_put_info("minor exists as different volume");
269 		return ERR_INVALID_REQUEST;
270 	}
271 
272 	return NO_ERROR;
273 
274 fail:
275 	nlmsg_free(adm_ctx.reply_skb);
276 	adm_ctx.reply_skb = NULL;
277 	return err;
278 }
279 
280 static int drbd_adm_finish(struct genl_info *info, int retcode)
281 {
282 	if (adm_ctx.connection) {
283 		kref_put(&adm_ctx.connection->kref, drbd_destroy_connection);
284 		adm_ctx.connection = NULL;
285 	}
286 	if (adm_ctx.resource) {
287 		kref_put(&adm_ctx.resource->kref, drbd_destroy_resource);
288 		adm_ctx.resource = NULL;
289 	}
290 
291 	if (!adm_ctx.reply_skb)
292 		return -ENOMEM;
293 
294 	adm_ctx.reply_dh->ret_code = retcode;
295 	drbd_adm_send_reply(adm_ctx.reply_skb, info);
296 	return 0;
297 }
298 
299 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
300 {
301 	char *afs;
302 
303 	/* FIXME: A future version will not allow this case. */
304 	if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
305 		return;
306 
307 	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
308 	case AF_INET6:
309 		afs = "ipv6";
310 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
311 			 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
312 		break;
313 	case AF_INET:
314 		afs = "ipv4";
315 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
316 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
317 		break;
318 	default:
319 		afs = "ssocks";
320 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
321 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
322 	}
323 	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
324 }
325 
326 int drbd_khelper(struct drbd_device *device, char *cmd)
327 {
328 	char *envp[] = { "HOME=/",
329 			"TERM=linux",
330 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
331 			 (char[20]) { }, /* address family */
332 			 (char[60]) { }, /* address */
333 			NULL };
334 	char mb[12];
335 	char *argv[] = {usermode_helper, cmd, mb, NULL };
336 	struct drbd_connection *connection = first_peer_device(device)->connection;
337 	struct sib_info sib;
338 	int ret;
339 
340 	if (current == connection->worker.task)
341 		set_bit(CALLBACK_PENDING, &connection->flags);
342 
343 	snprintf(mb, 12, "minor-%d", device_to_minor(device));
344 	setup_khelper_env(connection, envp);
345 
346 	/* The helper may take some time.
347 	 * write out any unsynced meta data changes now */
348 	drbd_md_sync(device);
349 
350 	drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
351 	sib.sib_reason = SIB_HELPER_PRE;
352 	sib.helper_name = cmd;
353 	drbd_bcast_event(device, &sib);
354 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
355 	if (ret)
356 		drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357 				usermode_helper, cmd, mb,
358 				(ret >> 8) & 0xff, ret);
359 	else
360 		drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
361 				usermode_helper, cmd, mb,
362 				(ret >> 8) & 0xff, ret);
363 	sib.sib_reason = SIB_HELPER_POST;
364 	sib.helper_exit_code = ret;
365 	drbd_bcast_event(device, &sib);
366 
367 	if (current == connection->worker.task)
368 		clear_bit(CALLBACK_PENDING, &connection->flags);
369 
370 	if (ret < 0) /* Ignore any ERRNOs we got. */
371 		ret = 0;
372 
373 	return ret;
374 }
375 
376 static int conn_khelper(struct drbd_connection *connection, char *cmd)
377 {
378 	char *envp[] = { "HOME=/",
379 			"TERM=linux",
380 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
381 			 (char[20]) { }, /* address family */
382 			 (char[60]) { }, /* address */
383 			NULL };
384 	char *resource_name = connection->resource->name;
385 	char *argv[] = {usermode_helper, cmd, resource_name, NULL };
386 	int ret;
387 
388 	setup_khelper_env(connection, envp);
389 	conn_md_sync(connection);
390 
391 	drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
392 	/* TODO: conn_bcast_event() ?? */
393 
394 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
395 	if (ret)
396 		drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397 			  usermode_helper, cmd, resource_name,
398 			  (ret >> 8) & 0xff, ret);
399 	else
400 		drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
401 			  usermode_helper, cmd, resource_name,
402 			  (ret >> 8) & 0xff, ret);
403 	/* TODO: conn_bcast_event() ?? */
404 
405 	if (ret < 0) /* Ignore any ERRNOs we got. */
406 		ret = 0;
407 
408 	return ret;
409 }
410 
411 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
412 {
413 	enum drbd_fencing_p fp = FP_NOT_AVAIL;
414 	struct drbd_peer_device *peer_device;
415 	int vnr;
416 
417 	rcu_read_lock();
418 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
419 		struct drbd_device *device = peer_device->device;
420 		if (get_ldev_if_state(device, D_CONSISTENT)) {
421 			struct disk_conf *disk_conf =
422 				rcu_dereference(peer_device->device->ldev->disk_conf);
423 			fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
424 			put_ldev(device);
425 		}
426 	}
427 	rcu_read_unlock();
428 
429 	return fp;
430 }
431 
432 bool conn_try_outdate_peer(struct drbd_connection *connection)
433 {
434 	unsigned int connect_cnt;
435 	union drbd_state mask = { };
436 	union drbd_state val = { };
437 	enum drbd_fencing_p fp;
438 	char *ex_to_string;
439 	int r;
440 
441 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
442 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
443 		return false;
444 	}
445 
446 	spin_lock_irq(&connection->resource->req_lock);
447 	connect_cnt = connection->connect_cnt;
448 	spin_unlock_irq(&connection->resource->req_lock);
449 
450 	fp = highest_fencing_policy(connection);
451 	switch (fp) {
452 	case FP_NOT_AVAIL:
453 		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
454 		goto out;
455 	case FP_DONT_CARE:
456 		return true;
457 	default: ;
458 	}
459 
460 	r = conn_khelper(connection, "fence-peer");
461 
462 	switch ((r>>8) & 0xff) {
463 	case 3: /* peer is inconsistent */
464 		ex_to_string = "peer is inconsistent or worse";
465 		mask.pdsk = D_MASK;
466 		val.pdsk = D_INCONSISTENT;
467 		break;
468 	case 4: /* peer got outdated, or was already outdated */
469 		ex_to_string = "peer was fenced";
470 		mask.pdsk = D_MASK;
471 		val.pdsk = D_OUTDATED;
472 		break;
473 	case 5: /* peer was down */
474 		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
475 			/* we will(have) create(d) a new UUID anyways... */
476 			ex_to_string = "peer is unreachable, assumed to be dead";
477 			mask.pdsk = D_MASK;
478 			val.pdsk = D_OUTDATED;
479 		} else {
480 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
481 		}
482 		break;
483 	case 6: /* Peer is primary, voluntarily outdate myself.
484 		 * This is useful when an unconnected R_SECONDARY is asked to
485 		 * become R_PRIMARY, but finds the other peer being active. */
486 		ex_to_string = "peer is active";
487 		drbd_warn(connection, "Peer is primary, outdating myself.\n");
488 		mask.disk = D_MASK;
489 		val.disk = D_OUTDATED;
490 		break;
491 	case 7:
492 		if (fp != FP_STONITH)
493 			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
494 		ex_to_string = "peer was stonithed";
495 		mask.pdsk = D_MASK;
496 		val.pdsk = D_OUTDATED;
497 		break;
498 	default:
499 		/* The script is broken ... */
500 		drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
501 		return false; /* Eventually leave IO frozen */
502 	}
503 
504 	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
505 		  (r>>8) & 0xff, ex_to_string);
506 
507  out:
508 
509 	/* Not using
510 	   conn_request_state(connection, mask, val, CS_VERBOSE);
511 	   here, because we might were able to re-establish the connection in the
512 	   meantime. */
513 	spin_lock_irq(&connection->resource->req_lock);
514 	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
515 		if (connection->connect_cnt != connect_cnt)
516 			/* In case the connection was established and droped
517 			   while the fence-peer handler was running, ignore it */
518 			drbd_info(connection, "Ignoring fence-peer exit code\n");
519 		else
520 			_conn_request_state(connection, mask, val, CS_VERBOSE);
521 	}
522 	spin_unlock_irq(&connection->resource->req_lock);
523 
524 	return conn_highest_pdsk(connection) <= D_OUTDATED;
525 }
526 
527 static int _try_outdate_peer_async(void *data)
528 {
529 	struct drbd_connection *connection = (struct drbd_connection *)data;
530 
531 	conn_try_outdate_peer(connection);
532 
533 	kref_put(&connection->kref, drbd_destroy_connection);
534 	return 0;
535 }
536 
537 void conn_try_outdate_peer_async(struct drbd_connection *connection)
538 {
539 	struct task_struct *opa;
540 
541 	kref_get(&connection->kref);
542 	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
543 	if (IS_ERR(opa)) {
544 		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
545 		kref_put(&connection->kref, drbd_destroy_connection);
546 	}
547 }
548 
549 enum drbd_state_rv
550 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
551 {
552 	const int max_tries = 4;
553 	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
554 	struct net_conf *nc;
555 	int try = 0;
556 	int forced = 0;
557 	union drbd_state mask, val;
558 
559 	if (new_role == R_PRIMARY) {
560 		struct drbd_connection *connection;
561 
562 		/* Detect dead peers as soon as possible.  */
563 
564 		rcu_read_lock();
565 		for_each_connection(connection, device->resource)
566 			request_ping(connection);
567 		rcu_read_unlock();
568 	}
569 
570 	mutex_lock(device->state_mutex);
571 
572 	mask.i = 0; mask.role = R_MASK;
573 	val.i  = 0; val.role  = new_role;
574 
575 	while (try++ < max_tries) {
576 		rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
577 
578 		/* in case we first succeeded to outdate,
579 		 * but now suddenly could establish a connection */
580 		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
581 			val.pdsk = 0;
582 			mask.pdsk = 0;
583 			continue;
584 		}
585 
586 		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
587 		    (device->state.disk < D_UP_TO_DATE &&
588 		     device->state.disk >= D_INCONSISTENT)) {
589 			mask.disk = D_MASK;
590 			val.disk  = D_UP_TO_DATE;
591 			forced = 1;
592 			continue;
593 		}
594 
595 		if (rv == SS_NO_UP_TO_DATE_DISK &&
596 		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
597 			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
598 
599 			if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
600 				val.disk = D_UP_TO_DATE;
601 				mask.disk = D_MASK;
602 			}
603 			continue;
604 		}
605 
606 		if (rv == SS_NOTHING_TO_DO)
607 			goto out;
608 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
609 			if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
610 				drbd_warn(device, "Forced into split brain situation!\n");
611 				mask.pdsk = D_MASK;
612 				val.pdsk  = D_OUTDATED;
613 
614 			}
615 			continue;
616 		}
617 		if (rv == SS_TWO_PRIMARIES) {
618 			/* Maybe the peer is detected as dead very soon...
619 			   retry at most once more in this case. */
620 			int timeo;
621 			rcu_read_lock();
622 			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
623 			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
624 			rcu_read_unlock();
625 			schedule_timeout_interruptible(timeo);
626 			if (try < max_tries)
627 				try = max_tries - 1;
628 			continue;
629 		}
630 		if (rv < SS_SUCCESS) {
631 			rv = _drbd_request_state(device, mask, val,
632 						CS_VERBOSE + CS_WAIT_COMPLETE);
633 			if (rv < SS_SUCCESS)
634 				goto out;
635 		}
636 		break;
637 	}
638 
639 	if (rv < SS_SUCCESS)
640 		goto out;
641 
642 	if (forced)
643 		drbd_warn(device, "Forced to consider local data as UpToDate!\n");
644 
645 	/* Wait until nothing is on the fly :) */
646 	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
647 
648 	/* FIXME also wait for all pending P_BARRIER_ACK? */
649 
650 	if (new_role == R_SECONDARY) {
651 		set_disk_ro(device->vdisk, true);
652 		if (get_ldev(device)) {
653 			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
654 			put_ldev(device);
655 		}
656 	} else {
657 		mutex_lock(&device->resource->conf_update);
658 		nc = first_peer_device(device)->connection->net_conf;
659 		if (nc)
660 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
661 		mutex_unlock(&device->resource->conf_update);
662 
663 		set_disk_ro(device->vdisk, false);
664 		if (get_ldev(device)) {
665 			if (((device->state.conn < C_CONNECTED ||
666 			       device->state.pdsk <= D_FAILED)
667 			      && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
668 				drbd_uuid_new_current(device);
669 
670 			device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
671 			put_ldev(device);
672 		}
673 	}
674 
675 	/* writeout of activity log covered areas of the bitmap
676 	 * to stable storage done in after state change already */
677 
678 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
679 		/* if this was forced, we should consider sync */
680 		if (forced)
681 			drbd_send_uuids(first_peer_device(device));
682 		drbd_send_current_state(first_peer_device(device));
683 	}
684 
685 	drbd_md_sync(device);
686 
687 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
688 out:
689 	mutex_unlock(device->state_mutex);
690 	return rv;
691 }
692 
693 static const char *from_attrs_err_to_txt(int err)
694 {
695 	return	err == -ENOMSG ? "required attribute missing" :
696 		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
697 		err == -EEXIST ? "can not change invariant setting" :
698 		"invalid attribute value";
699 }
700 
701 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
702 {
703 	struct set_role_parms parms;
704 	int err;
705 	enum drbd_ret_code retcode;
706 
707 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
708 	if (!adm_ctx.reply_skb)
709 		return retcode;
710 	if (retcode != NO_ERROR)
711 		goto out;
712 
713 	memset(&parms, 0, sizeof(parms));
714 	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
715 		err = set_role_parms_from_attrs(&parms, info);
716 		if (err) {
717 			retcode = ERR_MANDATORY_TAG;
718 			drbd_msg_put_info(from_attrs_err_to_txt(err));
719 			goto out;
720 		}
721 	}
722 
723 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
724 		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
725 	else
726 		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
727 out:
728 	drbd_adm_finish(info, retcode);
729 	return 0;
730 }
731 
732 /* Initializes the md.*_offset members, so we are able to find
733  * the on disk meta data.
734  *
735  * We currently have two possible layouts:
736  * external:
737  *   |----------- md_size_sect ------------------|
738  *   [ 4k superblock ][ activity log ][  Bitmap  ]
739  *   | al_offset == 8 |
740  *   | bm_offset = al_offset + X      |
741  *  ==> bitmap sectors = md_size_sect - bm_offset
742  *
743  * internal:
744  *            |----------- md_size_sect ------------------|
745  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
746  *                        | al_offset < 0 |
747  *            | bm_offset = al_offset - Y |
748  *  ==> bitmap sectors = Y = al_offset - bm_offset
749  *
750  *  Activity log size used to be fixed 32kB,
751  *  but is about to become configurable.
752  */
753 static void drbd_md_set_sector_offsets(struct drbd_device *device,
754 				       struct drbd_backing_dev *bdev)
755 {
756 	sector_t md_size_sect = 0;
757 	unsigned int al_size_sect = bdev->md.al_size_4k * 8;
758 
759 	bdev->md.md_offset = drbd_md_ss(bdev);
760 
761 	switch (bdev->md.meta_dev_idx) {
762 	default:
763 		/* v07 style fixed size indexed meta data */
764 		bdev->md.md_size_sect = MD_128MB_SECT;
765 		bdev->md.al_offset = MD_4kB_SECT;
766 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
767 		break;
768 	case DRBD_MD_INDEX_FLEX_EXT:
769 		/* just occupy the full device; unit: sectors */
770 		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
771 		bdev->md.al_offset = MD_4kB_SECT;
772 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
773 		break;
774 	case DRBD_MD_INDEX_INTERNAL:
775 	case DRBD_MD_INDEX_FLEX_INT:
776 		/* al size is still fixed */
777 		bdev->md.al_offset = -al_size_sect;
778 		/* we need (slightly less than) ~ this much bitmap sectors: */
779 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
780 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
781 		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
782 		md_size_sect = ALIGN(md_size_sect, 8);
783 
784 		/* plus the "drbd meta data super block",
785 		 * and the activity log; */
786 		md_size_sect += MD_4kB_SECT + al_size_sect;
787 
788 		bdev->md.md_size_sect = md_size_sect;
789 		/* bitmap offset is adjusted by 'super' block size */
790 		bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
791 		break;
792 	}
793 }
794 
795 /* input size is expected to be in KB */
796 char *ppsize(char *buf, unsigned long long size)
797 {
798 	/* Needs 9 bytes at max including trailing NUL:
799 	 * -1ULL ==> "16384 EB" */
800 	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
801 	int base = 0;
802 	while (size >= 10000 && base < sizeof(units)-1) {
803 		/* shift + round */
804 		size = (size >> 10) + !!(size & (1<<9));
805 		base++;
806 	}
807 	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
808 
809 	return buf;
810 }
811 
812 /* there is still a theoretical deadlock when called from receiver
813  * on an D_INCONSISTENT R_PRIMARY:
814  *  remote READ does inc_ap_bio, receiver would need to receive answer
815  *  packet from remote to dec_ap_bio again.
816  *  receiver receive_sizes(), comes here,
817  *  waits for ap_bio_cnt == 0. -> deadlock.
818  * but this cannot happen, actually, because:
819  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
820  *  (not connected, or bad/no disk on peer):
821  *  see drbd_fail_request_early, ap_bio_cnt is zero.
822  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
823  *  peer may not initiate a resize.
824  */
825 /* Note these are not to be confused with
826  * drbd_adm_suspend_io/drbd_adm_resume_io,
827  * which are (sub) state changes triggered by admin (drbdsetup),
828  * and can be long lived.
829  * This changes an device->flag, is triggered by drbd internals,
830  * and should be short-lived. */
831 void drbd_suspend_io(struct drbd_device *device)
832 {
833 	set_bit(SUSPEND_IO, &device->flags);
834 	if (drbd_suspended(device))
835 		return;
836 	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
837 }
838 
839 void drbd_resume_io(struct drbd_device *device)
840 {
841 	clear_bit(SUSPEND_IO, &device->flags);
842 	wake_up(&device->misc_wait);
843 }
844 
845 /**
846  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
847  * @device:	DRBD device.
848  *
849  * Returns 0 on success, negative return values indicate errors.
850  * You should call drbd_md_sync() after calling this function.
851  */
852 enum determine_dev_size
853 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
854 {
855 	sector_t prev_first_sect, prev_size; /* previous meta location */
856 	sector_t la_size_sect, u_size;
857 	struct drbd_md *md = &device->ldev->md;
858 	u32 prev_al_stripe_size_4k;
859 	u32 prev_al_stripes;
860 	sector_t size;
861 	char ppb[10];
862 	void *buffer;
863 
864 	int md_moved, la_size_changed;
865 	enum determine_dev_size rv = DS_UNCHANGED;
866 
867 	/* race:
868 	 * application request passes inc_ap_bio,
869 	 * but then cannot get an AL-reference.
870 	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
871 	 *
872 	 * to avoid that:
873 	 * Suspend IO right here.
874 	 * still lock the act_log to not trigger ASSERTs there.
875 	 */
876 	drbd_suspend_io(device);
877 	buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
878 	if (!buffer) {
879 		drbd_resume_io(device);
880 		return DS_ERROR;
881 	}
882 
883 	/* no wait necessary anymore, actually we could assert that */
884 	wait_event(device->al_wait, lc_try_lock(device->act_log));
885 
886 	prev_first_sect = drbd_md_first_sector(device->ldev);
887 	prev_size = device->ldev->md.md_size_sect;
888 	la_size_sect = device->ldev->md.la_size_sect;
889 
890 	if (rs) {
891 		/* rs is non NULL if we should change the AL layout only */
892 
893 		prev_al_stripes = md->al_stripes;
894 		prev_al_stripe_size_4k = md->al_stripe_size_4k;
895 
896 		md->al_stripes = rs->al_stripes;
897 		md->al_stripe_size_4k = rs->al_stripe_size / 4;
898 		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
899 	}
900 
901 	drbd_md_set_sector_offsets(device, device->ldev);
902 
903 	rcu_read_lock();
904 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
905 	rcu_read_unlock();
906 	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
907 
908 	if (size < la_size_sect) {
909 		if (rs && u_size == 0) {
910 			/* Remove "rs &&" later. This check should always be active, but
911 			   right now the receiver expects the permissive behavior */
912 			drbd_warn(device, "Implicit shrink not allowed. "
913 				 "Use --size=%llus for explicit shrink.\n",
914 				 (unsigned long long)size);
915 			rv = DS_ERROR_SHRINK;
916 		}
917 		if (u_size > size)
918 			rv = DS_ERROR_SPACE_MD;
919 		if (rv != DS_UNCHANGED)
920 			goto err_out;
921 	}
922 
923 	if (drbd_get_capacity(device->this_bdev) != size ||
924 	    drbd_bm_capacity(device) != size) {
925 		int err;
926 		err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
927 		if (unlikely(err)) {
928 			/* currently there is only one error: ENOMEM! */
929 			size = drbd_bm_capacity(device)>>1;
930 			if (size == 0) {
931 				drbd_err(device, "OUT OF MEMORY! "
932 				    "Could not allocate bitmap!\n");
933 			} else {
934 				drbd_err(device, "BM resizing failed. "
935 				    "Leaving size unchanged at size = %lu KB\n",
936 				    (unsigned long)size);
937 			}
938 			rv = DS_ERROR;
939 		}
940 		/* racy, see comments above. */
941 		drbd_set_my_capacity(device, size);
942 		device->ldev->md.la_size_sect = size;
943 		drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
944 		     (unsigned long long)size>>1);
945 	}
946 	if (rv <= DS_ERROR)
947 		goto err_out;
948 
949 	la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
950 
951 	md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
952 		|| prev_size	   != device->ldev->md.md_size_sect;
953 
954 	if (la_size_changed || md_moved || rs) {
955 		u32 prev_flags;
956 
957 		drbd_al_shrink(device); /* All extents inactive. */
958 
959 		prev_flags = md->flags;
960 		md->flags &= ~MDF_PRIMARY_IND;
961 		drbd_md_write(device, buffer);
962 
963 		drbd_info(device, "Writing the whole bitmap, %s\n",
964 			 la_size_changed && md_moved ? "size changed and md moved" :
965 			 la_size_changed ? "size changed" : "md moved");
966 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
967 		drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
968 			       "size changed", BM_LOCKED_MASK);
969 		drbd_initialize_al(device, buffer);
970 
971 		md->flags = prev_flags;
972 		drbd_md_write(device, buffer);
973 
974 		if (rs)
975 			drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
976 				  md->al_stripes, md->al_stripe_size_4k * 4);
977 	}
978 
979 	if (size > la_size_sect)
980 		rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
981 	if (size < la_size_sect)
982 		rv = DS_SHRUNK;
983 
984 	if (0) {
985 	err_out:
986 		if (rs) {
987 			md->al_stripes = prev_al_stripes;
988 			md->al_stripe_size_4k = prev_al_stripe_size_4k;
989 			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
990 
991 			drbd_md_set_sector_offsets(device, device->ldev);
992 		}
993 	}
994 	lc_unlock(device->act_log);
995 	wake_up(&device->al_wait);
996 	drbd_md_put_buffer(device);
997 	drbd_resume_io(device);
998 
999 	return rv;
1000 }
1001 
1002 sector_t
1003 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1004 		  sector_t u_size, int assume_peer_has_space)
1005 {
1006 	sector_t p_size = device->p_size;   /* partner's disk size. */
1007 	sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1008 	sector_t m_size; /* my size */
1009 	sector_t size = 0;
1010 
1011 	m_size = drbd_get_max_capacity(bdev);
1012 
1013 	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1014 		drbd_warn(device, "Resize while not connected was forced by the user!\n");
1015 		p_size = m_size;
1016 	}
1017 
1018 	if (p_size && m_size) {
1019 		size = min_t(sector_t, p_size, m_size);
1020 	} else {
1021 		if (la_size_sect) {
1022 			size = la_size_sect;
1023 			if (m_size && m_size < size)
1024 				size = m_size;
1025 			if (p_size && p_size < size)
1026 				size = p_size;
1027 		} else {
1028 			if (m_size)
1029 				size = m_size;
1030 			if (p_size)
1031 				size = p_size;
1032 		}
1033 	}
1034 
1035 	if (size == 0)
1036 		drbd_err(device, "Both nodes diskless!\n");
1037 
1038 	if (u_size) {
1039 		if (u_size > size)
1040 			drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1041 			    (unsigned long)u_size>>1, (unsigned long)size>>1);
1042 		else
1043 			size = u_size;
1044 	}
1045 
1046 	return size;
1047 }
1048 
1049 /**
1050  * drbd_check_al_size() - Ensures that the AL is of the right size
1051  * @device:	DRBD device.
1052  *
1053  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1054  * failed, and 0 on success. You should call drbd_md_sync() after you called
1055  * this function.
1056  */
1057 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1058 {
1059 	struct lru_cache *n, *t;
1060 	struct lc_element *e;
1061 	unsigned int in_use;
1062 	int i;
1063 
1064 	if (device->act_log &&
1065 	    device->act_log->nr_elements == dc->al_extents)
1066 		return 0;
1067 
1068 	in_use = 0;
1069 	t = device->act_log;
1070 	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1071 		dc->al_extents, sizeof(struct lc_element), 0);
1072 
1073 	if (n == NULL) {
1074 		drbd_err(device, "Cannot allocate act_log lru!\n");
1075 		return -ENOMEM;
1076 	}
1077 	spin_lock_irq(&device->al_lock);
1078 	if (t) {
1079 		for (i = 0; i < t->nr_elements; i++) {
1080 			e = lc_element_by_index(t, i);
1081 			if (e->refcnt)
1082 				drbd_err(device, "refcnt(%d)==%d\n",
1083 				    e->lc_number, e->refcnt);
1084 			in_use += e->refcnt;
1085 		}
1086 	}
1087 	if (!in_use)
1088 		device->act_log = n;
1089 	spin_unlock_irq(&device->al_lock);
1090 	if (in_use) {
1091 		drbd_err(device, "Activity log still in use!\n");
1092 		lc_destroy(n);
1093 		return -EBUSY;
1094 	} else {
1095 		if (t)
1096 			lc_destroy(t);
1097 	}
1098 	drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1099 	return 0;
1100 }
1101 
1102 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1103 {
1104 	struct request_queue * const q = device->rq_queue;
1105 	unsigned int max_hw_sectors = max_bio_size >> 9;
1106 	unsigned int max_segments = 0;
1107 
1108 	if (get_ldev_if_state(device, D_ATTACHING)) {
1109 		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1110 
1111 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1112 		rcu_read_lock();
1113 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1114 		rcu_read_unlock();
1115 		put_ldev(device);
1116 	}
1117 
1118 	blk_queue_logical_block_size(q, 512);
1119 	blk_queue_max_hw_sectors(q, max_hw_sectors);
1120 	/* This is the workaround for "bio would need to, but cannot, be split" */
1121 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1122 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1123 
1124 	if (get_ldev_if_state(device, D_ATTACHING)) {
1125 		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
1126 
1127 		blk_queue_stack_limits(q, b);
1128 
1129 		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1130 			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1131 				 q->backing_dev_info.ra_pages,
1132 				 b->backing_dev_info.ra_pages);
1133 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1134 		}
1135 		put_ldev(device);
1136 	}
1137 }
1138 
1139 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1140 {
1141 	unsigned int now, new, local, peer;
1142 
1143 	now = queue_max_hw_sectors(device->rq_queue) << 9;
1144 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1145 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1146 
1147 	if (get_ldev_if_state(device, D_ATTACHING)) {
1148 		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1149 		device->local_max_bio_size = local;
1150 		put_ldev(device);
1151 	}
1152 	local = min(local, DRBD_MAX_BIO_SIZE);
1153 
1154 	/* We may ignore peer limits if the peer is modern enough.
1155 	   Because new from 8.3.8 onwards the peer can use multiple
1156 	   BIOs for a single peer_request */
1157 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
1158 		if (first_peer_device(device)->connection->agreed_pro_version < 94)
1159 			peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1160 			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1161 		else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1162 			peer = DRBD_MAX_SIZE_H80_PACKET;
1163 		else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1164 			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1165 		else
1166 			peer = DRBD_MAX_BIO_SIZE;
1167 	}
1168 
1169 	new = min(local, peer);
1170 
1171 	if (device->state.role == R_PRIMARY && new < now)
1172 		drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1173 
1174 	if (new != now)
1175 		drbd_info(device, "max BIO size = %u\n", new);
1176 
1177 	drbd_setup_queue_param(device, new);
1178 }
1179 
1180 /* Starts the worker thread */
1181 static void conn_reconfig_start(struct drbd_connection *connection)
1182 {
1183 	drbd_thread_start(&connection->worker);
1184 	drbd_flush_workqueue(&connection->sender_work);
1185 }
1186 
1187 /* if still unconfigured, stops worker again. */
1188 static void conn_reconfig_done(struct drbd_connection *connection)
1189 {
1190 	bool stop_threads;
1191 	spin_lock_irq(&connection->resource->req_lock);
1192 	stop_threads = conn_all_vols_unconf(connection) &&
1193 		connection->cstate == C_STANDALONE;
1194 	spin_unlock_irq(&connection->resource->req_lock);
1195 	if (stop_threads) {
1196 		/* asender is implicitly stopped by receiver
1197 		 * in conn_disconnect() */
1198 		drbd_thread_stop(&connection->receiver);
1199 		drbd_thread_stop(&connection->worker);
1200 	}
1201 }
1202 
1203 /* Make sure IO is suspended before calling this function(). */
1204 static void drbd_suspend_al(struct drbd_device *device)
1205 {
1206 	int s = 0;
1207 
1208 	if (!lc_try_lock(device->act_log)) {
1209 		drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1210 		return;
1211 	}
1212 
1213 	drbd_al_shrink(device);
1214 	spin_lock_irq(&device->resource->req_lock);
1215 	if (device->state.conn < C_CONNECTED)
1216 		s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1217 	spin_unlock_irq(&device->resource->req_lock);
1218 	lc_unlock(device->act_log);
1219 
1220 	if (s)
1221 		drbd_info(device, "Suspended AL updates\n");
1222 }
1223 
1224 
1225 static bool should_set_defaults(struct genl_info *info)
1226 {
1227 	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1228 	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1229 }
1230 
1231 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1232 {
1233 	/* This is limited by 16 bit "slot" numbers,
1234 	 * and by available on-disk context storage.
1235 	 *
1236 	 * Also (u16)~0 is special (denotes a "free" extent).
1237 	 *
1238 	 * One transaction occupies one 4kB on-disk block,
1239 	 * we have n such blocks in the on disk ring buffer,
1240 	 * the "current" transaction may fail (n-1),
1241 	 * and there is 919 slot numbers context information per transaction.
1242 	 *
1243 	 * 72 transaction blocks amounts to more than 2**16 context slots,
1244 	 * so cap there first.
1245 	 */
1246 	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1247 	const unsigned int sufficient_on_disk =
1248 		(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1249 		/AL_CONTEXT_PER_TRANSACTION;
1250 
1251 	unsigned int al_size_4k = bdev->md.al_size_4k;
1252 
1253 	if (al_size_4k > sufficient_on_disk)
1254 		return max_al_nr;
1255 
1256 	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1257 }
1258 
1259 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1260 {
1261 	enum drbd_ret_code retcode;
1262 	struct drbd_device *device;
1263 	struct disk_conf *new_disk_conf, *old_disk_conf;
1264 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1265 	int err, fifo_size;
1266 
1267 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1268 	if (!adm_ctx.reply_skb)
1269 		return retcode;
1270 	if (retcode != NO_ERROR)
1271 		goto out;
1272 
1273 	device = adm_ctx.device;
1274 
1275 	/* we also need a disk
1276 	 * to change the options on */
1277 	if (!get_ldev(device)) {
1278 		retcode = ERR_NO_DISK;
1279 		goto out;
1280 	}
1281 
1282 	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1283 	if (!new_disk_conf) {
1284 		retcode = ERR_NOMEM;
1285 		goto fail;
1286 	}
1287 
1288 	mutex_lock(&device->resource->conf_update);
1289 	old_disk_conf = device->ldev->disk_conf;
1290 	*new_disk_conf = *old_disk_conf;
1291 	if (should_set_defaults(info))
1292 		set_disk_conf_defaults(new_disk_conf);
1293 
1294 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1295 	if (err && err != -ENOMSG) {
1296 		retcode = ERR_MANDATORY_TAG;
1297 		drbd_msg_put_info(from_attrs_err_to_txt(err));
1298 		goto fail_unlock;
1299 	}
1300 
1301 	if (!expect(new_disk_conf->resync_rate >= 1))
1302 		new_disk_conf->resync_rate = 1;
1303 
1304 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1305 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1306 	if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1307 		new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1308 
1309 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1310 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1311 
1312 	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1313 	if (fifo_size != device->rs_plan_s->size) {
1314 		new_plan = fifo_alloc(fifo_size);
1315 		if (!new_plan) {
1316 			drbd_err(device, "kmalloc of fifo_buffer failed");
1317 			retcode = ERR_NOMEM;
1318 			goto fail_unlock;
1319 		}
1320 	}
1321 
1322 	drbd_suspend_io(device);
1323 	wait_event(device->al_wait, lc_try_lock(device->act_log));
1324 	drbd_al_shrink(device);
1325 	err = drbd_check_al_size(device, new_disk_conf);
1326 	lc_unlock(device->act_log);
1327 	wake_up(&device->al_wait);
1328 	drbd_resume_io(device);
1329 
1330 	if (err) {
1331 		retcode = ERR_NOMEM;
1332 		goto fail_unlock;
1333 	}
1334 
1335 	write_lock_irq(&global_state_lock);
1336 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1337 	if (retcode == NO_ERROR) {
1338 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1339 		drbd_resync_after_changed(device);
1340 	}
1341 	write_unlock_irq(&global_state_lock);
1342 
1343 	if (retcode != NO_ERROR)
1344 		goto fail_unlock;
1345 
1346 	if (new_plan) {
1347 		old_plan = device->rs_plan_s;
1348 		rcu_assign_pointer(device->rs_plan_s, new_plan);
1349 	}
1350 
1351 	mutex_unlock(&device->resource->conf_update);
1352 
1353 	if (new_disk_conf->al_updates)
1354 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1355 	else
1356 		device->ldev->md.flags |= MDF_AL_DISABLED;
1357 
1358 	if (new_disk_conf->md_flushes)
1359 		clear_bit(MD_NO_FUA, &device->flags);
1360 	else
1361 		set_bit(MD_NO_FUA, &device->flags);
1362 
1363 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1364 
1365 	drbd_md_sync(device);
1366 
1367 	if (device->state.conn >= C_CONNECTED) {
1368 		struct drbd_peer_device *peer_device;
1369 
1370 		for_each_peer_device(peer_device, device)
1371 			drbd_send_sync_param(peer_device);
1372 	}
1373 
1374 	synchronize_rcu();
1375 	kfree(old_disk_conf);
1376 	kfree(old_plan);
1377 	mod_timer(&device->request_timer, jiffies + HZ);
1378 	goto success;
1379 
1380 fail_unlock:
1381 	mutex_unlock(&device->resource->conf_update);
1382  fail:
1383 	kfree(new_disk_conf);
1384 	kfree(new_plan);
1385 success:
1386 	put_ldev(device);
1387  out:
1388 	drbd_adm_finish(info, retcode);
1389 	return 0;
1390 }
1391 
1392 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1393 {
1394 	struct drbd_device *device;
1395 	int err;
1396 	enum drbd_ret_code retcode;
1397 	enum determine_dev_size dd;
1398 	sector_t max_possible_sectors;
1399 	sector_t min_md_device_sectors;
1400 	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1401 	struct disk_conf *new_disk_conf = NULL;
1402 	struct block_device *bdev;
1403 	struct lru_cache *resync_lru = NULL;
1404 	struct fifo_buffer *new_plan = NULL;
1405 	union drbd_state ns, os;
1406 	enum drbd_state_rv rv;
1407 	struct net_conf *nc;
1408 
1409 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1410 	if (!adm_ctx.reply_skb)
1411 		return retcode;
1412 	if (retcode != NO_ERROR)
1413 		goto finish;
1414 
1415 	device = adm_ctx.device;
1416 	conn_reconfig_start(first_peer_device(device)->connection);
1417 
1418 	/* if you want to reconfigure, please tear down first */
1419 	if (device->state.disk > D_DISKLESS) {
1420 		retcode = ERR_DISK_CONFIGURED;
1421 		goto fail;
1422 	}
1423 	/* It may just now have detached because of IO error.  Make sure
1424 	 * drbd_ldev_destroy is done already, we may end up here very fast,
1425 	 * e.g. if someone calls attach from the on-io-error handler,
1426 	 * to realize a "hot spare" feature (not that I'd recommend that) */
1427 	wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1428 
1429 	/* make sure there is no leftover from previous force-detach attempts */
1430 	clear_bit(FORCE_DETACH, &device->flags);
1431 	clear_bit(WAS_IO_ERROR, &device->flags);
1432 	clear_bit(WAS_READ_ERROR, &device->flags);
1433 
1434 	/* and no leftover from previously aborted resync or verify, either */
1435 	device->rs_total = 0;
1436 	device->rs_failed = 0;
1437 	atomic_set(&device->rs_pending_cnt, 0);
1438 
1439 	/* allocation not in the IO path, drbdsetup context */
1440 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1441 	if (!nbc) {
1442 		retcode = ERR_NOMEM;
1443 		goto fail;
1444 	}
1445 	spin_lock_init(&nbc->md.uuid_lock);
1446 
1447 	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1448 	if (!new_disk_conf) {
1449 		retcode = ERR_NOMEM;
1450 		goto fail;
1451 	}
1452 	nbc->disk_conf = new_disk_conf;
1453 
1454 	set_disk_conf_defaults(new_disk_conf);
1455 	err = disk_conf_from_attrs(new_disk_conf, info);
1456 	if (err) {
1457 		retcode = ERR_MANDATORY_TAG;
1458 		drbd_msg_put_info(from_attrs_err_to_txt(err));
1459 		goto fail;
1460 	}
1461 
1462 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1463 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1464 
1465 	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1466 	if (!new_plan) {
1467 		retcode = ERR_NOMEM;
1468 		goto fail;
1469 	}
1470 
1471 	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1472 		retcode = ERR_MD_IDX_INVALID;
1473 		goto fail;
1474 	}
1475 
1476 	write_lock_irq(&global_state_lock);
1477 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1478 	write_unlock_irq(&global_state_lock);
1479 	if (retcode != NO_ERROR)
1480 		goto fail;
1481 
1482 	rcu_read_lock();
1483 	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1484 	if (nc) {
1485 		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1486 			rcu_read_unlock();
1487 			retcode = ERR_STONITH_AND_PROT_A;
1488 			goto fail;
1489 		}
1490 	}
1491 	rcu_read_unlock();
1492 
1493 	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1494 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1495 	if (IS_ERR(bdev)) {
1496 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1497 			PTR_ERR(bdev));
1498 		retcode = ERR_OPEN_DISK;
1499 		goto fail;
1500 	}
1501 	nbc->backing_bdev = bdev;
1502 
1503 	/*
1504 	 * meta_dev_idx >= 0: external fixed size, possibly multiple
1505 	 * drbd sharing one meta device.  TODO in that case, paranoia
1506 	 * check that [md_bdev, meta_dev_idx] is not yet used by some
1507 	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
1508 	 * should check it for you already; but if you don't, or
1509 	 * someone fooled it, we need to double check here)
1510 	 */
1511 	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1512 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1513 				  (new_disk_conf->meta_dev_idx < 0) ?
1514 				  (void *)device : (void *)drbd_m_holder);
1515 	if (IS_ERR(bdev)) {
1516 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1517 			PTR_ERR(bdev));
1518 		retcode = ERR_OPEN_MD_DISK;
1519 		goto fail;
1520 	}
1521 	nbc->md_bdev = bdev;
1522 
1523 	if ((nbc->backing_bdev == nbc->md_bdev) !=
1524 	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1525 	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1526 		retcode = ERR_MD_IDX_INVALID;
1527 		goto fail;
1528 	}
1529 
1530 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
1531 			1, 61, sizeof(struct bm_extent),
1532 			offsetof(struct bm_extent, lce));
1533 	if (!resync_lru) {
1534 		retcode = ERR_NOMEM;
1535 		goto fail;
1536 	}
1537 
1538 	/* Read our meta data super block early.
1539 	 * This also sets other on-disk offsets. */
1540 	retcode = drbd_md_read(device, nbc);
1541 	if (retcode != NO_ERROR)
1542 		goto fail;
1543 
1544 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1545 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1546 	if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1547 		new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1548 
1549 	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1550 		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1551 			(unsigned long long) drbd_get_max_capacity(nbc),
1552 			(unsigned long long) new_disk_conf->disk_size);
1553 		retcode = ERR_DISK_TOO_SMALL;
1554 		goto fail;
1555 	}
1556 
1557 	if (new_disk_conf->meta_dev_idx < 0) {
1558 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1559 		/* at least one MB, otherwise it does not make sense */
1560 		min_md_device_sectors = (2<<10);
1561 	} else {
1562 		max_possible_sectors = DRBD_MAX_SECTORS;
1563 		min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1564 	}
1565 
1566 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1567 		retcode = ERR_MD_DISK_TOO_SMALL;
1568 		drbd_warn(device, "refusing attach: md-device too small, "
1569 		     "at least %llu sectors needed for this meta-disk type\n",
1570 		     (unsigned long long) min_md_device_sectors);
1571 		goto fail;
1572 	}
1573 
1574 	/* Make sure the new disk is big enough
1575 	 * (we may currently be R_PRIMARY with no local disk...) */
1576 	if (drbd_get_max_capacity(nbc) <
1577 	    drbd_get_capacity(device->this_bdev)) {
1578 		retcode = ERR_DISK_TOO_SMALL;
1579 		goto fail;
1580 	}
1581 
1582 	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1583 
1584 	if (nbc->known_size > max_possible_sectors) {
1585 		drbd_warn(device, "==> truncating very big lower level device "
1586 			"to currently maximum possible %llu sectors <==\n",
1587 			(unsigned long long) max_possible_sectors);
1588 		if (new_disk_conf->meta_dev_idx >= 0)
1589 			drbd_warn(device, "==>> using internal or flexible "
1590 				      "meta data may help <<==\n");
1591 	}
1592 
1593 	drbd_suspend_io(device);
1594 	/* also wait for the last barrier ack. */
1595 	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1596 	 * We need a way to either ignore barrier acks for barriers sent before a device
1597 	 * was attached, or a way to wait for all pending barrier acks to come in.
1598 	 * As barriers are counted per resource,
1599 	 * we'd need to suspend io on all devices of a resource.
1600 	 */
1601 	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1602 	/* and for any other previously queued work */
1603 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
1604 
1605 	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1606 	retcode = rv;  /* FIXME: Type mismatch. */
1607 	drbd_resume_io(device);
1608 	if (rv < SS_SUCCESS)
1609 		goto fail;
1610 
1611 	if (!get_ldev_if_state(device, D_ATTACHING))
1612 		goto force_diskless;
1613 
1614 	if (!device->bitmap) {
1615 		if (drbd_bm_init(device)) {
1616 			retcode = ERR_NOMEM;
1617 			goto force_diskless_dec;
1618 		}
1619 	}
1620 
1621 	if (device->state.conn < C_CONNECTED &&
1622 	    device->state.role == R_PRIMARY &&
1623 	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1624 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1625 		    (unsigned long long)device->ed_uuid);
1626 		retcode = ERR_DATA_NOT_CURRENT;
1627 		goto force_diskless_dec;
1628 	}
1629 
1630 	/* Since we are diskless, fix the activity log first... */
1631 	if (drbd_check_al_size(device, new_disk_conf)) {
1632 		retcode = ERR_NOMEM;
1633 		goto force_diskless_dec;
1634 	}
1635 
1636 	/* Prevent shrinking of consistent devices ! */
1637 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1638 	    drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1639 		drbd_warn(device, "refusing to truncate a consistent device\n");
1640 		retcode = ERR_DISK_TOO_SMALL;
1641 		goto force_diskless_dec;
1642 	}
1643 
1644 	/* Reset the "barriers don't work" bits here, then force meta data to
1645 	 * be written, to ensure we determine if barriers are supported. */
1646 	if (new_disk_conf->md_flushes)
1647 		clear_bit(MD_NO_FUA, &device->flags);
1648 	else
1649 		set_bit(MD_NO_FUA, &device->flags);
1650 
1651 	/* Point of no return reached.
1652 	 * Devices and memory are no longer released by error cleanup below.
1653 	 * now device takes over responsibility, and the state engine should
1654 	 * clean it up somewhere.  */
1655 	D_ASSERT(device, device->ldev == NULL);
1656 	device->ldev = nbc;
1657 	device->resync = resync_lru;
1658 	device->rs_plan_s = new_plan;
1659 	nbc = NULL;
1660 	resync_lru = NULL;
1661 	new_disk_conf = NULL;
1662 	new_plan = NULL;
1663 
1664 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1665 
1666 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1667 		set_bit(CRASHED_PRIMARY, &device->flags);
1668 	else
1669 		clear_bit(CRASHED_PRIMARY, &device->flags);
1670 
1671 	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1672 	    !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1673 		set_bit(CRASHED_PRIMARY, &device->flags);
1674 
1675 	device->send_cnt = 0;
1676 	device->recv_cnt = 0;
1677 	device->read_cnt = 0;
1678 	device->writ_cnt = 0;
1679 
1680 	drbd_reconsider_max_bio_size(device);
1681 
1682 	/* If I am currently not R_PRIMARY,
1683 	 * but meta data primary indicator is set,
1684 	 * I just now recover from a hard crash,
1685 	 * and have been R_PRIMARY before that crash.
1686 	 *
1687 	 * Now, if I had no connection before that crash
1688 	 * (have been degraded R_PRIMARY), chances are that
1689 	 * I won't find my peer now either.
1690 	 *
1691 	 * In that case, and _only_ in that case,
1692 	 * we use the degr-wfc-timeout instead of the default,
1693 	 * so we can automatically recover from a crash of a
1694 	 * degraded but active "cluster" after a certain timeout.
1695 	 */
1696 	clear_bit(USE_DEGR_WFC_T, &device->flags);
1697 	if (device->state.role != R_PRIMARY &&
1698 	     drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1699 	    !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1700 		set_bit(USE_DEGR_WFC_T, &device->flags);
1701 
1702 	dd = drbd_determine_dev_size(device, 0, NULL);
1703 	if (dd <= DS_ERROR) {
1704 		retcode = ERR_NOMEM_BITMAP;
1705 		goto force_diskless_dec;
1706 	} else if (dd == DS_GREW)
1707 		set_bit(RESYNC_AFTER_NEG, &device->flags);
1708 
1709 	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1710 	    (test_bit(CRASHED_PRIMARY, &device->flags) &&
1711 	     drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1712 		drbd_info(device, "Assuming that all blocks are out of sync "
1713 		     "(aka FullSync)\n");
1714 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1715 			"set_n_write from attaching", BM_LOCKED_MASK)) {
1716 			retcode = ERR_IO_MD_DISK;
1717 			goto force_diskless_dec;
1718 		}
1719 	} else {
1720 		if (drbd_bitmap_io(device, &drbd_bm_read,
1721 			"read from attaching", BM_LOCKED_MASK)) {
1722 			retcode = ERR_IO_MD_DISK;
1723 			goto force_diskless_dec;
1724 		}
1725 	}
1726 
1727 	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1728 		drbd_suspend_al(device); /* IO is still suspended here... */
1729 
1730 	spin_lock_irq(&device->resource->req_lock);
1731 	os = drbd_read_state(device);
1732 	ns = os;
1733 	/* If MDF_CONSISTENT is not set go into inconsistent state,
1734 	   otherwise investigate MDF_WasUpToDate...
1735 	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1736 	   otherwise into D_CONSISTENT state.
1737 	*/
1738 	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1739 		if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1740 			ns.disk = D_CONSISTENT;
1741 		else
1742 			ns.disk = D_OUTDATED;
1743 	} else {
1744 		ns.disk = D_INCONSISTENT;
1745 	}
1746 
1747 	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1748 		ns.pdsk = D_OUTDATED;
1749 
1750 	rcu_read_lock();
1751 	if (ns.disk == D_CONSISTENT &&
1752 	    (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1753 		ns.disk = D_UP_TO_DATE;
1754 
1755 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1756 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1757 	   this point, because drbd_request_state() modifies these
1758 	   flags. */
1759 
1760 	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1761 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1762 	else
1763 		device->ldev->md.flags |= MDF_AL_DISABLED;
1764 
1765 	rcu_read_unlock();
1766 
1767 	/* In case we are C_CONNECTED postpone any decision on the new disk
1768 	   state after the negotiation phase. */
1769 	if (device->state.conn == C_CONNECTED) {
1770 		device->new_state_tmp.i = ns.i;
1771 		ns.i = os.i;
1772 		ns.disk = D_NEGOTIATING;
1773 
1774 		/* We expect to receive up-to-date UUIDs soon.
1775 		   To avoid a race in receive_state, free p_uuid while
1776 		   holding req_lock. I.e. atomic with the state change */
1777 		kfree(device->p_uuid);
1778 		device->p_uuid = NULL;
1779 	}
1780 
1781 	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1782 	spin_unlock_irq(&device->resource->req_lock);
1783 
1784 	if (rv < SS_SUCCESS)
1785 		goto force_diskless_dec;
1786 
1787 	mod_timer(&device->request_timer, jiffies + HZ);
1788 
1789 	if (device->state.role == R_PRIMARY)
1790 		device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1791 	else
1792 		device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1793 
1794 	drbd_md_mark_dirty(device);
1795 	drbd_md_sync(device);
1796 
1797 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1798 	put_ldev(device);
1799 	conn_reconfig_done(first_peer_device(device)->connection);
1800 	drbd_adm_finish(info, retcode);
1801 	return 0;
1802 
1803  force_diskless_dec:
1804 	put_ldev(device);
1805  force_diskless:
1806 	drbd_force_state(device, NS(disk, D_DISKLESS));
1807 	drbd_md_sync(device);
1808  fail:
1809 	conn_reconfig_done(first_peer_device(device)->connection);
1810 	if (nbc) {
1811 		if (nbc->backing_bdev)
1812 			blkdev_put(nbc->backing_bdev,
1813 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1814 		if (nbc->md_bdev)
1815 			blkdev_put(nbc->md_bdev,
1816 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1817 		kfree(nbc);
1818 	}
1819 	kfree(new_disk_conf);
1820 	lc_destroy(resync_lru);
1821 	kfree(new_plan);
1822 
1823  finish:
1824 	drbd_adm_finish(info, retcode);
1825 	return 0;
1826 }
1827 
1828 static int adm_detach(struct drbd_device *device, int force)
1829 {
1830 	enum drbd_state_rv retcode;
1831 	int ret;
1832 
1833 	if (force) {
1834 		set_bit(FORCE_DETACH, &device->flags);
1835 		drbd_force_state(device, NS(disk, D_FAILED));
1836 		retcode = SS_SUCCESS;
1837 		goto out;
1838 	}
1839 
1840 	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1841 	drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1842 	retcode = drbd_request_state(device, NS(disk, D_FAILED));
1843 	drbd_md_put_buffer(device);
1844 	/* D_FAILED will transition to DISKLESS. */
1845 	ret = wait_event_interruptible(device->misc_wait,
1846 			device->state.disk != D_FAILED);
1847 	drbd_resume_io(device);
1848 	if ((int)retcode == (int)SS_IS_DISKLESS)
1849 		retcode = SS_NOTHING_TO_DO;
1850 	if (ret)
1851 		retcode = ERR_INTR;
1852 out:
1853 	return retcode;
1854 }
1855 
1856 /* Detaching the disk is a process in multiple stages.  First we need to lock
1857  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1858  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1859  * internal references as well.
1860  * Only then we have finally detached. */
1861 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1862 {
1863 	enum drbd_ret_code retcode;
1864 	struct detach_parms parms = { };
1865 	int err;
1866 
1867 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1868 	if (!adm_ctx.reply_skb)
1869 		return retcode;
1870 	if (retcode != NO_ERROR)
1871 		goto out;
1872 
1873 	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1874 		err = detach_parms_from_attrs(&parms, info);
1875 		if (err) {
1876 			retcode = ERR_MANDATORY_TAG;
1877 			drbd_msg_put_info(from_attrs_err_to_txt(err));
1878 			goto out;
1879 		}
1880 	}
1881 
1882 	retcode = adm_detach(adm_ctx.device, parms.force_detach);
1883 out:
1884 	drbd_adm_finish(info, retcode);
1885 	return 0;
1886 }
1887 
1888 static bool conn_resync_running(struct drbd_connection *connection)
1889 {
1890 	struct drbd_peer_device *peer_device;
1891 	bool rv = false;
1892 	int vnr;
1893 
1894 	rcu_read_lock();
1895 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1896 		struct drbd_device *device = peer_device->device;
1897 		if (device->state.conn == C_SYNC_SOURCE ||
1898 		    device->state.conn == C_SYNC_TARGET ||
1899 		    device->state.conn == C_PAUSED_SYNC_S ||
1900 		    device->state.conn == C_PAUSED_SYNC_T) {
1901 			rv = true;
1902 			break;
1903 		}
1904 	}
1905 	rcu_read_unlock();
1906 
1907 	return rv;
1908 }
1909 
1910 static bool conn_ov_running(struct drbd_connection *connection)
1911 {
1912 	struct drbd_peer_device *peer_device;
1913 	bool rv = false;
1914 	int vnr;
1915 
1916 	rcu_read_lock();
1917 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1918 		struct drbd_device *device = peer_device->device;
1919 		if (device->state.conn == C_VERIFY_S ||
1920 		    device->state.conn == C_VERIFY_T) {
1921 			rv = true;
1922 			break;
1923 		}
1924 	}
1925 	rcu_read_unlock();
1926 
1927 	return rv;
1928 }
1929 
1930 static enum drbd_ret_code
1931 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1932 {
1933 	struct drbd_peer_device *peer_device;
1934 	int i;
1935 
1936 	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1937 		if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1938 			return ERR_NEED_APV_100;
1939 
1940 		if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1941 			return ERR_NEED_APV_100;
1942 
1943 		if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1944 			return ERR_NEED_APV_100;
1945 	}
1946 
1947 	if (!new_net_conf->two_primaries &&
1948 	    conn_highest_role(connection) == R_PRIMARY &&
1949 	    conn_highest_peer(connection) == R_PRIMARY)
1950 		return ERR_NEED_ALLOW_TWO_PRI;
1951 
1952 	if (new_net_conf->two_primaries &&
1953 	    (new_net_conf->wire_protocol != DRBD_PROT_C))
1954 		return ERR_NOT_PROTO_C;
1955 
1956 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1957 		struct drbd_device *device = peer_device->device;
1958 		if (get_ldev(device)) {
1959 			enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1960 			put_ldev(device);
1961 			if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
1962 				return ERR_STONITH_AND_PROT_A;
1963 		}
1964 		if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
1965 			return ERR_DISCARD_IMPOSSIBLE;
1966 	}
1967 
1968 	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
1969 		return ERR_CONG_NOT_PROTO_A;
1970 
1971 	return NO_ERROR;
1972 }
1973 
1974 static enum drbd_ret_code
1975 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
1976 {
1977 	static enum drbd_ret_code rv;
1978 	struct drbd_peer_device *peer_device;
1979 	int i;
1980 
1981 	rcu_read_lock();
1982 	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
1983 	rcu_read_unlock();
1984 
1985 	/* connection->volumes protected by genl_lock() here */
1986 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
1987 		struct drbd_device *device = peer_device->device;
1988 		if (!device->bitmap) {
1989 			if (drbd_bm_init(device))
1990 				return ERR_NOMEM;
1991 		}
1992 	}
1993 
1994 	return rv;
1995 }
1996 
1997 struct crypto {
1998 	struct crypto_hash *verify_tfm;
1999 	struct crypto_hash *csums_tfm;
2000 	struct crypto_hash *cram_hmac_tfm;
2001 	struct crypto_hash *integrity_tfm;
2002 };
2003 
2004 static int
2005 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2006 {
2007 	if (!tfm_name[0])
2008 		return NO_ERROR;
2009 
2010 	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2011 	if (IS_ERR(*tfm)) {
2012 		*tfm = NULL;
2013 		return err_alg;
2014 	}
2015 
2016 	return NO_ERROR;
2017 }
2018 
2019 static enum drbd_ret_code
2020 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2021 {
2022 	char hmac_name[CRYPTO_MAX_ALG_NAME];
2023 	enum drbd_ret_code rv;
2024 
2025 	rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2026 		       ERR_CSUMS_ALG);
2027 	if (rv != NO_ERROR)
2028 		return rv;
2029 	rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2030 		       ERR_VERIFY_ALG);
2031 	if (rv != NO_ERROR)
2032 		return rv;
2033 	rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2034 		       ERR_INTEGRITY_ALG);
2035 	if (rv != NO_ERROR)
2036 		return rv;
2037 	if (new_net_conf->cram_hmac_alg[0] != 0) {
2038 		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2039 			 new_net_conf->cram_hmac_alg);
2040 
2041 		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2042 			       ERR_AUTH_ALG);
2043 	}
2044 
2045 	return rv;
2046 }
2047 
2048 static void free_crypto(struct crypto *crypto)
2049 {
2050 	crypto_free_hash(crypto->cram_hmac_tfm);
2051 	crypto_free_hash(crypto->integrity_tfm);
2052 	crypto_free_hash(crypto->csums_tfm);
2053 	crypto_free_hash(crypto->verify_tfm);
2054 }
2055 
2056 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2057 {
2058 	enum drbd_ret_code retcode;
2059 	struct drbd_connection *connection;
2060 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2061 	int err;
2062 	int ovr; /* online verify running */
2063 	int rsr; /* re-sync running */
2064 	struct crypto crypto = { };
2065 
2066 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2067 	if (!adm_ctx.reply_skb)
2068 		return retcode;
2069 	if (retcode != NO_ERROR)
2070 		goto out;
2071 
2072 	connection = adm_ctx.connection;
2073 
2074 	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2075 	if (!new_net_conf) {
2076 		retcode = ERR_NOMEM;
2077 		goto out;
2078 	}
2079 
2080 	conn_reconfig_start(connection);
2081 
2082 	mutex_lock(&connection->data.mutex);
2083 	mutex_lock(&connection->resource->conf_update);
2084 	old_net_conf = connection->net_conf;
2085 
2086 	if (!old_net_conf) {
2087 		drbd_msg_put_info("net conf missing, try connect");
2088 		retcode = ERR_INVALID_REQUEST;
2089 		goto fail;
2090 	}
2091 
2092 	*new_net_conf = *old_net_conf;
2093 	if (should_set_defaults(info))
2094 		set_net_conf_defaults(new_net_conf);
2095 
2096 	err = net_conf_from_attrs_for_change(new_net_conf, info);
2097 	if (err && err != -ENOMSG) {
2098 		retcode = ERR_MANDATORY_TAG;
2099 		drbd_msg_put_info(from_attrs_err_to_txt(err));
2100 		goto fail;
2101 	}
2102 
2103 	retcode = check_net_options(connection, new_net_conf);
2104 	if (retcode != NO_ERROR)
2105 		goto fail;
2106 
2107 	/* re-sync running */
2108 	rsr = conn_resync_running(connection);
2109 	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2110 		retcode = ERR_CSUMS_RESYNC_RUNNING;
2111 		goto fail;
2112 	}
2113 
2114 	/* online verify running */
2115 	ovr = conn_ov_running(connection);
2116 	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2117 		retcode = ERR_VERIFY_RUNNING;
2118 		goto fail;
2119 	}
2120 
2121 	retcode = alloc_crypto(&crypto, new_net_conf);
2122 	if (retcode != NO_ERROR)
2123 		goto fail;
2124 
2125 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2126 
2127 	if (!rsr) {
2128 		crypto_free_hash(connection->csums_tfm);
2129 		connection->csums_tfm = crypto.csums_tfm;
2130 		crypto.csums_tfm = NULL;
2131 	}
2132 	if (!ovr) {
2133 		crypto_free_hash(connection->verify_tfm);
2134 		connection->verify_tfm = crypto.verify_tfm;
2135 		crypto.verify_tfm = NULL;
2136 	}
2137 
2138 	crypto_free_hash(connection->integrity_tfm);
2139 	connection->integrity_tfm = crypto.integrity_tfm;
2140 	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2141 		/* Do this without trying to take connection->data.mutex again.  */
2142 		__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2143 
2144 	crypto_free_hash(connection->cram_hmac_tfm);
2145 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2146 
2147 	mutex_unlock(&connection->resource->conf_update);
2148 	mutex_unlock(&connection->data.mutex);
2149 	synchronize_rcu();
2150 	kfree(old_net_conf);
2151 
2152 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
2153 		struct drbd_peer_device *peer_device;
2154 		int vnr;
2155 
2156 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2157 			drbd_send_sync_param(peer_device);
2158 	}
2159 
2160 	goto done;
2161 
2162  fail:
2163 	mutex_unlock(&connection->resource->conf_update);
2164 	mutex_unlock(&connection->data.mutex);
2165 	free_crypto(&crypto);
2166 	kfree(new_net_conf);
2167  done:
2168 	conn_reconfig_done(connection);
2169  out:
2170 	drbd_adm_finish(info, retcode);
2171 	return 0;
2172 }
2173 
2174 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2175 {
2176 	struct drbd_peer_device *peer_device;
2177 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2178 	struct crypto crypto = { };
2179 	struct drbd_resource *resource;
2180 	struct drbd_connection *connection;
2181 	enum drbd_ret_code retcode;
2182 	int i;
2183 	int err;
2184 
2185 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2186 
2187 	if (!adm_ctx.reply_skb)
2188 		return retcode;
2189 	if (retcode != NO_ERROR)
2190 		goto out;
2191 	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2192 		drbd_msg_put_info("connection endpoint(s) missing");
2193 		retcode = ERR_INVALID_REQUEST;
2194 		goto out;
2195 	}
2196 
2197 	/* No need for _rcu here. All reconfiguration is
2198 	 * strictly serialized on genl_lock(). We are protected against
2199 	 * concurrent reconfiguration/addition/deletion */
2200 	for_each_resource(resource, &drbd_resources) {
2201 		for_each_connection(connection, resource) {
2202 			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2203 			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2204 				    connection->my_addr_len)) {
2205 				retcode = ERR_LOCAL_ADDR;
2206 				goto out;
2207 			}
2208 
2209 			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2210 			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2211 				    connection->peer_addr_len)) {
2212 				retcode = ERR_PEER_ADDR;
2213 				goto out;
2214 			}
2215 		}
2216 	}
2217 
2218 	connection = first_connection(adm_ctx.resource);
2219 	conn_reconfig_start(connection);
2220 
2221 	if (connection->cstate > C_STANDALONE) {
2222 		retcode = ERR_NET_CONFIGURED;
2223 		goto fail;
2224 	}
2225 
2226 	/* allocation not in the IO path, drbdsetup / netlink process context */
2227 	new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2228 	if (!new_net_conf) {
2229 		retcode = ERR_NOMEM;
2230 		goto fail;
2231 	}
2232 
2233 	set_net_conf_defaults(new_net_conf);
2234 
2235 	err = net_conf_from_attrs(new_net_conf, info);
2236 	if (err && err != -ENOMSG) {
2237 		retcode = ERR_MANDATORY_TAG;
2238 		drbd_msg_put_info(from_attrs_err_to_txt(err));
2239 		goto fail;
2240 	}
2241 
2242 	retcode = check_net_options(connection, new_net_conf);
2243 	if (retcode != NO_ERROR)
2244 		goto fail;
2245 
2246 	retcode = alloc_crypto(&crypto, new_net_conf);
2247 	if (retcode != NO_ERROR)
2248 		goto fail;
2249 
2250 	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2251 
2252 	drbd_flush_workqueue(&connection->sender_work);
2253 
2254 	mutex_lock(&adm_ctx.resource->conf_update);
2255 	old_net_conf = connection->net_conf;
2256 	if (old_net_conf) {
2257 		retcode = ERR_NET_CONFIGURED;
2258 		mutex_unlock(&adm_ctx.resource->conf_update);
2259 		goto fail;
2260 	}
2261 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2262 
2263 	conn_free_crypto(connection);
2264 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2265 	connection->integrity_tfm = crypto.integrity_tfm;
2266 	connection->csums_tfm = crypto.csums_tfm;
2267 	connection->verify_tfm = crypto.verify_tfm;
2268 
2269 	connection->my_addr_len = nla_len(adm_ctx.my_addr);
2270 	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2271 	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2272 	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2273 
2274 	mutex_unlock(&adm_ctx.resource->conf_update);
2275 
2276 	rcu_read_lock();
2277 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2278 		struct drbd_device *device = peer_device->device;
2279 		device->send_cnt = 0;
2280 		device->recv_cnt = 0;
2281 	}
2282 	rcu_read_unlock();
2283 
2284 	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2285 
2286 	conn_reconfig_done(connection);
2287 	drbd_adm_finish(info, retcode);
2288 	return 0;
2289 
2290 fail:
2291 	free_crypto(&crypto);
2292 	kfree(new_net_conf);
2293 
2294 	conn_reconfig_done(connection);
2295 out:
2296 	drbd_adm_finish(info, retcode);
2297 	return 0;
2298 }
2299 
2300 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2301 {
2302 	enum drbd_state_rv rv;
2303 
2304 	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2305 			force ? CS_HARD : 0);
2306 
2307 	switch (rv) {
2308 	case SS_NOTHING_TO_DO:
2309 		break;
2310 	case SS_ALREADY_STANDALONE:
2311 		return SS_SUCCESS;
2312 	case SS_PRIMARY_NOP:
2313 		/* Our state checking code wants to see the peer outdated. */
2314 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2315 
2316 		if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2317 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2318 
2319 		break;
2320 	case SS_CW_FAILED_BY_PEER:
2321 		/* The peer probably wants to see us outdated. */
2322 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2323 							disk, D_OUTDATED), 0);
2324 		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2325 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2326 					CS_HARD);
2327 		}
2328 		break;
2329 	default:;
2330 		/* no special handling necessary */
2331 	}
2332 
2333 	if (rv >= SS_SUCCESS) {
2334 		enum drbd_state_rv rv2;
2335 		/* No one else can reconfigure the network while I am here.
2336 		 * The state handling only uses drbd_thread_stop_nowait(),
2337 		 * we want to really wait here until the receiver is no more.
2338 		 */
2339 		drbd_thread_stop(&connection->receiver);
2340 
2341 		/* Race breaker.  This additional state change request may be
2342 		 * necessary, if this was a forced disconnect during a receiver
2343 		 * restart.  We may have "killed" the receiver thread just
2344 		 * after drbd_receiver() returned.  Typically, we should be
2345 		 * C_STANDALONE already, now, and this becomes a no-op.
2346 		 */
2347 		rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2348 				CS_VERBOSE | CS_HARD);
2349 		if (rv2 < SS_SUCCESS)
2350 			drbd_err(connection,
2351 				"unexpected rv2=%d in conn_try_disconnect()\n",
2352 				rv2);
2353 	}
2354 	return rv;
2355 }
2356 
2357 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2358 {
2359 	struct disconnect_parms parms;
2360 	struct drbd_connection *connection;
2361 	enum drbd_state_rv rv;
2362 	enum drbd_ret_code retcode;
2363 	int err;
2364 
2365 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
2366 	if (!adm_ctx.reply_skb)
2367 		return retcode;
2368 	if (retcode != NO_ERROR)
2369 		goto fail;
2370 
2371 	connection = adm_ctx.connection;
2372 	memset(&parms, 0, sizeof(parms));
2373 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2374 		err = disconnect_parms_from_attrs(&parms, info);
2375 		if (err) {
2376 			retcode = ERR_MANDATORY_TAG;
2377 			drbd_msg_put_info(from_attrs_err_to_txt(err));
2378 			goto fail;
2379 		}
2380 	}
2381 
2382 	rv = conn_try_disconnect(connection, parms.force_disconnect);
2383 	if (rv < SS_SUCCESS)
2384 		retcode = rv;  /* FIXME: Type mismatch. */
2385 	else
2386 		retcode = NO_ERROR;
2387  fail:
2388 	drbd_adm_finish(info, retcode);
2389 	return 0;
2390 }
2391 
2392 void resync_after_online_grow(struct drbd_device *device)
2393 {
2394 	int iass; /* I am sync source */
2395 
2396 	drbd_info(device, "Resync of new storage after online grow\n");
2397 	if (device->state.role != device->state.peer)
2398 		iass = (device->state.role == R_PRIMARY);
2399 	else
2400 		iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2401 
2402 	if (iass)
2403 		drbd_start_resync(device, C_SYNC_SOURCE);
2404 	else
2405 		_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2406 }
2407 
2408 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2409 {
2410 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2411 	struct resize_parms rs;
2412 	struct drbd_device *device;
2413 	enum drbd_ret_code retcode;
2414 	enum determine_dev_size dd;
2415 	bool change_al_layout = false;
2416 	enum dds_flags ddsf;
2417 	sector_t u_size;
2418 	int err;
2419 
2420 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2421 	if (!adm_ctx.reply_skb)
2422 		return retcode;
2423 	if (retcode != NO_ERROR)
2424 		goto fail;
2425 
2426 	device = adm_ctx.device;
2427 	if (!get_ldev(device)) {
2428 		retcode = ERR_NO_DISK;
2429 		goto fail;
2430 	}
2431 
2432 	memset(&rs, 0, sizeof(struct resize_parms));
2433 	rs.al_stripes = device->ldev->md.al_stripes;
2434 	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2435 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2436 		err = resize_parms_from_attrs(&rs, info);
2437 		if (err) {
2438 			retcode = ERR_MANDATORY_TAG;
2439 			drbd_msg_put_info(from_attrs_err_to_txt(err));
2440 			goto fail_ldev;
2441 		}
2442 	}
2443 
2444 	if (device->state.conn > C_CONNECTED) {
2445 		retcode = ERR_RESIZE_RESYNC;
2446 		goto fail_ldev;
2447 	}
2448 
2449 	if (device->state.role == R_SECONDARY &&
2450 	    device->state.peer == R_SECONDARY) {
2451 		retcode = ERR_NO_PRIMARY;
2452 		goto fail_ldev;
2453 	}
2454 
2455 	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2456 		retcode = ERR_NEED_APV_93;
2457 		goto fail_ldev;
2458 	}
2459 
2460 	rcu_read_lock();
2461 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2462 	rcu_read_unlock();
2463 	if (u_size != (sector_t)rs.resize_size) {
2464 		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2465 		if (!new_disk_conf) {
2466 			retcode = ERR_NOMEM;
2467 			goto fail_ldev;
2468 		}
2469 	}
2470 
2471 	if (device->ldev->md.al_stripes != rs.al_stripes ||
2472 	    device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2473 		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2474 
2475 		if (al_size_k > (16 * 1024 * 1024)) {
2476 			retcode = ERR_MD_LAYOUT_TOO_BIG;
2477 			goto fail_ldev;
2478 		}
2479 
2480 		if (al_size_k < MD_32kB_SECT/2) {
2481 			retcode = ERR_MD_LAYOUT_TOO_SMALL;
2482 			goto fail_ldev;
2483 		}
2484 
2485 		if (device->state.conn != C_CONNECTED) {
2486 			retcode = ERR_MD_LAYOUT_CONNECTED;
2487 			goto fail_ldev;
2488 		}
2489 
2490 		change_al_layout = true;
2491 	}
2492 
2493 	if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2494 		device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2495 
2496 	if (new_disk_conf) {
2497 		mutex_lock(&device->resource->conf_update);
2498 		old_disk_conf = device->ldev->disk_conf;
2499 		*new_disk_conf = *old_disk_conf;
2500 		new_disk_conf->disk_size = (sector_t)rs.resize_size;
2501 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2502 		mutex_unlock(&device->resource->conf_update);
2503 		synchronize_rcu();
2504 		kfree(old_disk_conf);
2505 	}
2506 
2507 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2508 	dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2509 	drbd_md_sync(device);
2510 	put_ldev(device);
2511 	if (dd == DS_ERROR) {
2512 		retcode = ERR_NOMEM_BITMAP;
2513 		goto fail;
2514 	} else if (dd == DS_ERROR_SPACE_MD) {
2515 		retcode = ERR_MD_LAYOUT_NO_FIT;
2516 		goto fail;
2517 	} else if (dd == DS_ERROR_SHRINK) {
2518 		retcode = ERR_IMPLICIT_SHRINK;
2519 		goto fail;
2520 	}
2521 
2522 	if (device->state.conn == C_CONNECTED) {
2523 		if (dd == DS_GREW)
2524 			set_bit(RESIZE_PENDING, &device->flags);
2525 
2526 		drbd_send_uuids(first_peer_device(device));
2527 		drbd_send_sizes(first_peer_device(device), 1, ddsf);
2528 	}
2529 
2530  fail:
2531 	drbd_adm_finish(info, retcode);
2532 	return 0;
2533 
2534  fail_ldev:
2535 	put_ldev(device);
2536 	goto fail;
2537 }
2538 
2539 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2540 {
2541 	enum drbd_ret_code retcode;
2542 	struct res_opts res_opts;
2543 	int err;
2544 
2545 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2546 	if (!adm_ctx.reply_skb)
2547 		return retcode;
2548 	if (retcode != NO_ERROR)
2549 		goto fail;
2550 
2551 	res_opts = adm_ctx.resource->res_opts;
2552 	if (should_set_defaults(info))
2553 		set_res_opts_defaults(&res_opts);
2554 
2555 	err = res_opts_from_attrs(&res_opts, info);
2556 	if (err && err != -ENOMSG) {
2557 		retcode = ERR_MANDATORY_TAG;
2558 		drbd_msg_put_info(from_attrs_err_to_txt(err));
2559 		goto fail;
2560 	}
2561 
2562 	err = set_resource_options(adm_ctx.resource, &res_opts);
2563 	if (err) {
2564 		retcode = ERR_INVALID_REQUEST;
2565 		if (err == -ENOMEM)
2566 			retcode = ERR_NOMEM;
2567 	}
2568 
2569 fail:
2570 	drbd_adm_finish(info, retcode);
2571 	return 0;
2572 }
2573 
2574 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2575 {
2576 	struct drbd_device *device;
2577 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2578 
2579 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2580 	if (!adm_ctx.reply_skb)
2581 		return retcode;
2582 	if (retcode != NO_ERROR)
2583 		goto out;
2584 
2585 	device = adm_ctx.device;
2586 
2587 	/* If there is still bitmap IO pending, probably because of a previous
2588 	 * resync just being finished, wait for it before requesting a new resync.
2589 	 * Also wait for it's after_state_ch(). */
2590 	drbd_suspend_io(device);
2591 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2592 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2593 
2594 	/* If we happen to be C_STANDALONE R_SECONDARY, just change to
2595 	 * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2596 	 * try to start a resync handshake as sync target for full sync.
2597 	 */
2598 	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2599 		retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2600 		if (retcode >= SS_SUCCESS) {
2601 			if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2602 				"set_n_write from invalidate", BM_LOCKED_MASK))
2603 				retcode = ERR_IO_MD_DISK;
2604 		}
2605 	} else
2606 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2607 	drbd_resume_io(device);
2608 
2609 out:
2610 	drbd_adm_finish(info, retcode);
2611 	return 0;
2612 }
2613 
2614 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2615 		union drbd_state mask, union drbd_state val)
2616 {
2617 	enum drbd_ret_code retcode;
2618 
2619 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2620 	if (!adm_ctx.reply_skb)
2621 		return retcode;
2622 	if (retcode != NO_ERROR)
2623 		goto out;
2624 
2625 	retcode = drbd_request_state(adm_ctx.device, mask, val);
2626 out:
2627 	drbd_adm_finish(info, retcode);
2628 	return 0;
2629 }
2630 
2631 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2632 {
2633 	int rv;
2634 
2635 	rv = drbd_bmio_set_n_write(device);
2636 	drbd_suspend_al(device);
2637 	return rv;
2638 }
2639 
2640 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2641 {
2642 	int retcode; /* drbd_ret_code, drbd_state_rv */
2643 	struct drbd_device *device;
2644 
2645 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2646 	if (!adm_ctx.reply_skb)
2647 		return retcode;
2648 	if (retcode != NO_ERROR)
2649 		goto out;
2650 
2651 	device = adm_ctx.device;
2652 
2653 	/* If there is still bitmap IO pending, probably because of a previous
2654 	 * resync just being finished, wait for it before requesting a new resync.
2655 	 * Also wait for it's after_state_ch(). */
2656 	drbd_suspend_io(device);
2657 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2658 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2659 
2660 	/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2661 	 * in the bitmap.  Otherwise, try to start a resync handshake
2662 	 * as sync source for full sync.
2663 	 */
2664 	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2665 		/* The peer will get a resync upon connect anyways. Just make that
2666 		   into a full resync. */
2667 		retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2668 		if (retcode >= SS_SUCCESS) {
2669 			if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2670 				"set_n_write from invalidate_peer",
2671 				BM_LOCKED_SET_ALLOWED))
2672 				retcode = ERR_IO_MD_DISK;
2673 		}
2674 	} else
2675 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2676 	drbd_resume_io(device);
2677 
2678 out:
2679 	drbd_adm_finish(info, retcode);
2680 	return 0;
2681 }
2682 
2683 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2684 {
2685 	enum drbd_ret_code retcode;
2686 
2687 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2688 	if (!adm_ctx.reply_skb)
2689 		return retcode;
2690 	if (retcode != NO_ERROR)
2691 		goto out;
2692 
2693 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2694 		retcode = ERR_PAUSE_IS_SET;
2695 out:
2696 	drbd_adm_finish(info, retcode);
2697 	return 0;
2698 }
2699 
2700 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2701 {
2702 	union drbd_dev_state s;
2703 	enum drbd_ret_code retcode;
2704 
2705 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2706 	if (!adm_ctx.reply_skb)
2707 		return retcode;
2708 	if (retcode != NO_ERROR)
2709 		goto out;
2710 
2711 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2712 		s = adm_ctx.device->state;
2713 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2714 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2715 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2716 		} else {
2717 			retcode = ERR_PAUSE_IS_CLEAR;
2718 		}
2719 	}
2720 
2721 out:
2722 	drbd_adm_finish(info, retcode);
2723 	return 0;
2724 }
2725 
2726 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2727 {
2728 	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2729 }
2730 
2731 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2732 {
2733 	struct drbd_device *device;
2734 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2735 
2736 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2737 	if (!adm_ctx.reply_skb)
2738 		return retcode;
2739 	if (retcode != NO_ERROR)
2740 		goto out;
2741 
2742 	device = adm_ctx.device;
2743 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
2744 		drbd_uuid_new_current(device);
2745 		clear_bit(NEW_CUR_UUID, &device->flags);
2746 	}
2747 	drbd_suspend_io(device);
2748 	retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2749 	if (retcode == SS_SUCCESS) {
2750 		if (device->state.conn < C_CONNECTED)
2751 			tl_clear(first_peer_device(device)->connection);
2752 		if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2753 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2754 	}
2755 	drbd_resume_io(device);
2756 
2757 out:
2758 	drbd_adm_finish(info, retcode);
2759 	return 0;
2760 }
2761 
2762 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2763 {
2764 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2765 }
2766 
2767 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2768 				    struct drbd_resource *resource,
2769 				    struct drbd_connection *connection,
2770 				    struct drbd_device *device)
2771 {
2772 	struct nlattr *nla;
2773 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2774 	if (!nla)
2775 		goto nla_put_failure;
2776 	if (device &&
2777 	    nla_put_u32(skb, T_ctx_volume, device->vnr))
2778 		goto nla_put_failure;
2779 	if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2780 		goto nla_put_failure;
2781 	if (connection) {
2782 		if (connection->my_addr_len &&
2783 		    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2784 			goto nla_put_failure;
2785 		if (connection->peer_addr_len &&
2786 		    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2787 			goto nla_put_failure;
2788 	}
2789 	nla_nest_end(skb, nla);
2790 	return 0;
2791 
2792 nla_put_failure:
2793 	if (nla)
2794 		nla_nest_cancel(skb, nla);
2795 	return -EMSGSIZE;
2796 }
2797 
2798 /*
2799  * Return the connection of @resource if @resource has exactly one connection.
2800  */
2801 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2802 {
2803 	struct list_head *connections = &resource->connections;
2804 
2805 	if (list_empty(connections) || connections->next->next != connections)
2806 		return NULL;
2807 	return list_first_entry(&resource->connections, struct drbd_connection, connections);
2808 }
2809 
2810 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2811 		const struct sib_info *sib)
2812 {
2813 	struct drbd_resource *resource = device->resource;
2814 	struct state_info *si = NULL; /* for sizeof(si->member); */
2815 	struct nlattr *nla;
2816 	int got_ldev;
2817 	int err = 0;
2818 	int exclude_sensitive;
2819 
2820 	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2821 	 * to.  So we better exclude_sensitive information.
2822 	 *
2823 	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2824 	 * in the context of the requesting user process. Exclude sensitive
2825 	 * information, unless current has superuser.
2826 	 *
2827 	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2828 	 * relies on the current implementation of netlink_dump(), which
2829 	 * executes the dump callback successively from netlink_recvmsg(),
2830 	 * always in the context of the receiving process */
2831 	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2832 
2833 	got_ldev = get_ldev(device);
2834 
2835 	/* We need to add connection name and volume number information still.
2836 	 * Minor number is in drbd_genlmsghdr. */
2837 	if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2838 		goto nla_put_failure;
2839 
2840 	if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2841 		goto nla_put_failure;
2842 
2843 	rcu_read_lock();
2844 	if (got_ldev) {
2845 		struct disk_conf *disk_conf;
2846 
2847 		disk_conf = rcu_dereference(device->ldev->disk_conf);
2848 		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2849 	}
2850 	if (!err) {
2851 		struct net_conf *nc;
2852 
2853 		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2854 		if (nc)
2855 			err = net_conf_to_skb(skb, nc, exclude_sensitive);
2856 	}
2857 	rcu_read_unlock();
2858 	if (err)
2859 		goto nla_put_failure;
2860 
2861 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2862 	if (!nla)
2863 		goto nla_put_failure;
2864 	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2865 	    nla_put_u32(skb, T_current_state, device->state.i) ||
2866 	    nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2867 	    nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2868 	    nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2869 	    nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2870 	    nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2871 	    nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2872 	    nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2873 	    nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2874 	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2875 	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2876 	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2877 		goto nla_put_failure;
2878 
2879 	if (got_ldev) {
2880 		int err;
2881 
2882 		spin_lock_irq(&device->ldev->md.uuid_lock);
2883 		err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2884 		spin_unlock_irq(&device->ldev->md.uuid_lock);
2885 
2886 		if (err)
2887 			goto nla_put_failure;
2888 
2889 		if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2890 		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2891 		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2892 			goto nla_put_failure;
2893 		if (C_SYNC_SOURCE <= device->state.conn &&
2894 		    C_PAUSED_SYNC_T >= device->state.conn) {
2895 			if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2896 			    nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2897 				goto nla_put_failure;
2898 		}
2899 	}
2900 
2901 	if (sib) {
2902 		switch(sib->sib_reason) {
2903 		case SIB_SYNC_PROGRESS:
2904 		case SIB_GET_STATUS_REPLY:
2905 			break;
2906 		case SIB_STATE_CHANGE:
2907 			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2908 			    nla_put_u32(skb, T_new_state, sib->ns.i))
2909 				goto nla_put_failure;
2910 			break;
2911 		case SIB_HELPER_POST:
2912 			if (nla_put_u32(skb, T_helper_exit_code,
2913 					sib->helper_exit_code))
2914 				goto nla_put_failure;
2915 			/* fall through */
2916 		case SIB_HELPER_PRE:
2917 			if (nla_put_string(skb, T_helper, sib->helper_name))
2918 				goto nla_put_failure;
2919 			break;
2920 		}
2921 	}
2922 	nla_nest_end(skb, nla);
2923 
2924 	if (0)
2925 nla_put_failure:
2926 		err = -EMSGSIZE;
2927 	if (got_ldev)
2928 		put_ldev(device);
2929 	return err;
2930 }
2931 
2932 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
2933 {
2934 	enum drbd_ret_code retcode;
2935 	int err;
2936 
2937 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2938 	if (!adm_ctx.reply_skb)
2939 		return retcode;
2940 	if (retcode != NO_ERROR)
2941 		goto out;
2942 
2943 	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
2944 	if (err) {
2945 		nlmsg_free(adm_ctx.reply_skb);
2946 		return err;
2947 	}
2948 out:
2949 	drbd_adm_finish(info, retcode);
2950 	return 0;
2951 }
2952 
2953 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
2954 {
2955 	struct drbd_device *device;
2956 	struct drbd_genlmsghdr *dh;
2957 	struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
2958 	struct drbd_resource *resource = NULL;
2959 	struct drbd_resource *tmp;
2960 	unsigned volume = cb->args[1];
2961 
2962 	/* Open coded, deferred, iteration:
2963 	 * for_each_resource_safe(resource, tmp, &drbd_resources) {
2964 	 *      connection = "first connection of resource or undefined";
2965 	 *	idr_for_each_entry(&resource->devices, device, i) {
2966 	 *	  ...
2967 	 *	}
2968 	 * }
2969 	 * where resource is cb->args[0];
2970 	 * and i is cb->args[1];
2971 	 *
2972 	 * cb->args[2] indicates if we shall loop over all resources,
2973 	 * or just dump all volumes of a single resource.
2974 	 *
2975 	 * This may miss entries inserted after this dump started,
2976 	 * or entries deleted before they are reached.
2977 	 *
2978 	 * We need to make sure the device won't disappear while
2979 	 * we are looking at it, and revalidate our iterators
2980 	 * on each iteration.
2981 	 */
2982 
2983 	/* synchronize with conn_create()/drbd_destroy_connection() */
2984 	rcu_read_lock();
2985 	/* revalidate iterator position */
2986 	for_each_resource_rcu(tmp, &drbd_resources) {
2987 		if (pos == NULL) {
2988 			/* first iteration */
2989 			pos = tmp;
2990 			resource = pos;
2991 			break;
2992 		}
2993 		if (tmp == pos) {
2994 			resource = pos;
2995 			break;
2996 		}
2997 	}
2998 	if (resource) {
2999 next_resource:
3000 		device = idr_get_next(&resource->devices, &volume);
3001 		if (!device) {
3002 			/* No more volumes to dump on this resource.
3003 			 * Advance resource iterator. */
3004 			pos = list_entry_rcu(resource->resources.next,
3005 					     struct drbd_resource, resources);
3006 			/* Did we dump any volume of this resource yet? */
3007 			if (volume != 0) {
3008 				/* If we reached the end of the list,
3009 				 * or only a single resource dump was requested,
3010 				 * we are done. */
3011 				if (&pos->resources == &drbd_resources || cb->args[2])
3012 					goto out;
3013 				volume = 0;
3014 				resource = pos;
3015 				goto next_resource;
3016 			}
3017 		}
3018 
3019 		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3020 				cb->nlh->nlmsg_seq, &drbd_genl_family,
3021 				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3022 		if (!dh)
3023 			goto out;
3024 
3025 		if (!device) {
3026 			/* This is a connection without a single volume.
3027 			 * Suprisingly enough, it may have a network
3028 			 * configuration. */
3029 			struct drbd_connection *connection;
3030 
3031 			dh->minor = -1U;
3032 			dh->ret_code = NO_ERROR;
3033 			connection = the_only_connection(resource);
3034 			if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3035 				goto cancel;
3036 			if (connection) {
3037 				struct net_conf *nc;
3038 
3039 				nc = rcu_dereference(connection->net_conf);
3040 				if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3041 					goto cancel;
3042 			}
3043 			goto done;
3044 		}
3045 
3046 		D_ASSERT(device, device->vnr == volume);
3047 		D_ASSERT(device, device->resource == resource);
3048 
3049 		dh->minor = device_to_minor(device);
3050 		dh->ret_code = NO_ERROR;
3051 
3052 		if (nla_put_status_info(skb, device, NULL)) {
3053 cancel:
3054 			genlmsg_cancel(skb, dh);
3055 			goto out;
3056 		}
3057 done:
3058 		genlmsg_end(skb, dh);
3059 	}
3060 
3061 out:
3062 	rcu_read_unlock();
3063 	/* where to start the next iteration */
3064 	cb->args[0] = (long)pos;
3065 	cb->args[1] = (pos == resource) ? volume + 1 : 0;
3066 
3067 	/* No more resources/volumes/minors found results in an empty skb.
3068 	 * Which will terminate the dump. */
3069         return skb->len;
3070 }
3071 
3072 /*
3073  * Request status of all resources, or of all volumes within a single resource.
3074  *
3075  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3076  * Which means we cannot use the family->attrbuf or other such members, because
3077  * dump is NOT protected by the genl_lock().  During dump, we only have access
3078  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3079  *
3080  * Once things are setup properly, we call into get_one_status().
3081  */
3082 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3083 {
3084 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3085 	struct nlattr *nla;
3086 	const char *resource_name;
3087 	struct drbd_resource *resource;
3088 	int maxtype;
3089 
3090 	/* Is this a followup call? */
3091 	if (cb->args[0]) {
3092 		/* ... of a single resource dump,
3093 		 * and the resource iterator has been advanced already? */
3094 		if (cb->args[2] && cb->args[2] != cb->args[0])
3095 			return 0; /* DONE. */
3096 		goto dump;
3097 	}
3098 
3099 	/* First call (from netlink_dump_start).  We need to figure out
3100 	 * which resource(s) the user wants us to dump. */
3101 	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3102 			nlmsg_attrlen(cb->nlh, hdrlen),
3103 			DRBD_NLA_CFG_CONTEXT);
3104 
3105 	/* No explicit context given.  Dump all. */
3106 	if (!nla)
3107 		goto dump;
3108 	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3109 	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3110 	if (IS_ERR(nla))
3111 		return PTR_ERR(nla);
3112 	/* context given, but no name present? */
3113 	if (!nla)
3114 		return -EINVAL;
3115 	resource_name = nla_data(nla);
3116 	if (!*resource_name)
3117 		return -ENODEV;
3118 	resource = drbd_find_resource(resource_name);
3119 	if (!resource)
3120 		return -ENODEV;
3121 
3122 	kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3123 
3124 	/* prime iterators, and set "filter" mode mark:
3125 	 * only dump this connection. */
3126 	cb->args[0] = (long)resource;
3127 	/* cb->args[1] = 0; passed in this way. */
3128 	cb->args[2] = (long)resource;
3129 
3130 dump:
3131 	return get_one_status(skb, cb);
3132 }
3133 
3134 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3135 {
3136 	enum drbd_ret_code retcode;
3137 	struct timeout_parms tp;
3138 	int err;
3139 
3140 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3141 	if (!adm_ctx.reply_skb)
3142 		return retcode;
3143 	if (retcode != NO_ERROR)
3144 		goto out;
3145 
3146 	tp.timeout_type =
3147 		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3148 		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3149 		UT_DEFAULT;
3150 
3151 	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3152 	if (err) {
3153 		nlmsg_free(adm_ctx.reply_skb);
3154 		return err;
3155 	}
3156 out:
3157 	drbd_adm_finish(info, retcode);
3158 	return 0;
3159 }
3160 
3161 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3162 {
3163 	struct drbd_device *device;
3164 	enum drbd_ret_code retcode;
3165 	struct start_ov_parms parms;
3166 
3167 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3168 	if (!adm_ctx.reply_skb)
3169 		return retcode;
3170 	if (retcode != NO_ERROR)
3171 		goto out;
3172 
3173 	device = adm_ctx.device;
3174 
3175 	/* resume from last known position, if possible */
3176 	parms.ov_start_sector = device->ov_start_sector;
3177 	parms.ov_stop_sector = ULLONG_MAX;
3178 	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3179 		int err = start_ov_parms_from_attrs(&parms, info);
3180 		if (err) {
3181 			retcode = ERR_MANDATORY_TAG;
3182 			drbd_msg_put_info(from_attrs_err_to_txt(err));
3183 			goto out;
3184 		}
3185 	}
3186 	/* w_make_ov_request expects position to be aligned */
3187 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3188 	device->ov_stop_sector = parms.ov_stop_sector;
3189 
3190 	/* If there is still bitmap IO pending, e.g. previous resync or verify
3191 	 * just being finished, wait for it before requesting a new resync. */
3192 	drbd_suspend_io(device);
3193 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3194 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3195 	drbd_resume_io(device);
3196 out:
3197 	drbd_adm_finish(info, retcode);
3198 	return 0;
3199 }
3200 
3201 
3202 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3203 {
3204 	struct drbd_device *device;
3205 	enum drbd_ret_code retcode;
3206 	int skip_initial_sync = 0;
3207 	int err;
3208 	struct new_c_uuid_parms args;
3209 
3210 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3211 	if (!adm_ctx.reply_skb)
3212 		return retcode;
3213 	if (retcode != NO_ERROR)
3214 		goto out_nolock;
3215 
3216 	device = adm_ctx.device;
3217 	memset(&args, 0, sizeof(args));
3218 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3219 		err = new_c_uuid_parms_from_attrs(&args, info);
3220 		if (err) {
3221 			retcode = ERR_MANDATORY_TAG;
3222 			drbd_msg_put_info(from_attrs_err_to_txt(err));
3223 			goto out_nolock;
3224 		}
3225 	}
3226 
3227 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3228 
3229 	if (!get_ldev(device)) {
3230 		retcode = ERR_NO_DISK;
3231 		goto out;
3232 	}
3233 
3234 	/* this is "skip initial sync", assume to be clean */
3235 	if (device->state.conn == C_CONNECTED &&
3236 	    first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3237 	    device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3238 		drbd_info(device, "Preparing to skip initial sync\n");
3239 		skip_initial_sync = 1;
3240 	} else if (device->state.conn != C_STANDALONE) {
3241 		retcode = ERR_CONNECTED;
3242 		goto out_dec;
3243 	}
3244 
3245 	drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3246 	drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3247 
3248 	if (args.clear_bm) {
3249 		err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3250 			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3251 		if (err) {
3252 			drbd_err(device, "Writing bitmap failed with %d\n", err);
3253 			retcode = ERR_IO_MD_DISK;
3254 		}
3255 		if (skip_initial_sync) {
3256 			drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3257 			_drbd_uuid_set(device, UI_BITMAP, 0);
3258 			drbd_print_uuids(device, "cleared bitmap UUID");
3259 			spin_lock_irq(&device->resource->req_lock);
3260 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3261 					CS_VERBOSE, NULL);
3262 			spin_unlock_irq(&device->resource->req_lock);
3263 		}
3264 	}
3265 
3266 	drbd_md_sync(device);
3267 out_dec:
3268 	put_ldev(device);
3269 out:
3270 	mutex_unlock(device->state_mutex);
3271 out_nolock:
3272 	drbd_adm_finish(info, retcode);
3273 	return 0;
3274 }
3275 
3276 static enum drbd_ret_code
3277 drbd_check_resource_name(const char *name)
3278 {
3279 	if (!name || !name[0]) {
3280 		drbd_msg_put_info("resource name missing");
3281 		return ERR_MANDATORY_TAG;
3282 	}
3283 	/* if we want to use these in sysfs/configfs/debugfs some day,
3284 	 * we must not allow slashes */
3285 	if (strchr(name, '/')) {
3286 		drbd_msg_put_info("invalid resource name");
3287 		return ERR_INVALID_REQUEST;
3288 	}
3289 	return NO_ERROR;
3290 }
3291 
3292 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3293 {
3294 	enum drbd_ret_code retcode;
3295 	struct res_opts res_opts;
3296 	int err;
3297 
3298 	retcode = drbd_adm_prepare(skb, info, 0);
3299 	if (!adm_ctx.reply_skb)
3300 		return retcode;
3301 	if (retcode != NO_ERROR)
3302 		goto out;
3303 
3304 	set_res_opts_defaults(&res_opts);
3305 	err = res_opts_from_attrs(&res_opts, info);
3306 	if (err && err != -ENOMSG) {
3307 		retcode = ERR_MANDATORY_TAG;
3308 		drbd_msg_put_info(from_attrs_err_to_txt(err));
3309 		goto out;
3310 	}
3311 
3312 	retcode = drbd_check_resource_name(adm_ctx.resource_name);
3313 	if (retcode != NO_ERROR)
3314 		goto out;
3315 
3316 	if (adm_ctx.resource) {
3317 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3318 			retcode = ERR_INVALID_REQUEST;
3319 			drbd_msg_put_info("resource exists");
3320 		}
3321 		/* else: still NO_ERROR */
3322 		goto out;
3323 	}
3324 
3325 	if (!conn_create(adm_ctx.resource_name, &res_opts))
3326 		retcode = ERR_NOMEM;
3327 out:
3328 	drbd_adm_finish(info, retcode);
3329 	return 0;
3330 }
3331 
3332 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3333 {
3334 	struct drbd_genlmsghdr *dh = info->userhdr;
3335 	enum drbd_ret_code retcode;
3336 
3337 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3338 	if (!adm_ctx.reply_skb)
3339 		return retcode;
3340 	if (retcode != NO_ERROR)
3341 		goto out;
3342 
3343 	if (dh->minor > MINORMASK) {
3344 		drbd_msg_put_info("requested minor out of range");
3345 		retcode = ERR_INVALID_REQUEST;
3346 		goto out;
3347 	}
3348 	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3349 		drbd_msg_put_info("requested volume id out of range");
3350 		retcode = ERR_INVALID_REQUEST;
3351 		goto out;
3352 	}
3353 
3354 	/* drbd_adm_prepare made sure already
3355 	 * that first_peer_device(device)->connection and device->vnr match the request. */
3356 	if (adm_ctx.device) {
3357 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3358 			retcode = ERR_MINOR_EXISTS;
3359 		/* else: still NO_ERROR */
3360 		goto out;
3361 	}
3362 
3363 	retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume);
3364 out:
3365 	drbd_adm_finish(info, retcode);
3366 	return 0;
3367 }
3368 
3369 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3370 {
3371 	if (device->state.disk == D_DISKLESS &&
3372 	    /* no need to be device->state.conn == C_STANDALONE &&
3373 	     * we may want to delete a minor from a live replication group.
3374 	     */
3375 	    device->state.role == R_SECONDARY) {
3376 		_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3377 				    CS_VERBOSE + CS_WAIT_COMPLETE);
3378 		drbd_delete_device(device);
3379 		return NO_ERROR;
3380 	} else
3381 		return ERR_MINOR_CONFIGURED;
3382 }
3383 
3384 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3385 {
3386 	enum drbd_ret_code retcode;
3387 
3388 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
3389 	if (!adm_ctx.reply_skb)
3390 		return retcode;
3391 	if (retcode != NO_ERROR)
3392 		goto out;
3393 
3394 	retcode = adm_del_minor(adm_ctx.device);
3395 out:
3396 	drbd_adm_finish(info, retcode);
3397 	return 0;
3398 }
3399 
3400 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3401 {
3402 	struct drbd_resource *resource;
3403 	struct drbd_connection *connection;
3404 	struct drbd_device *device;
3405 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3406 	unsigned i;
3407 
3408 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3409 	if (!adm_ctx.reply_skb)
3410 		return retcode;
3411 	if (retcode != NO_ERROR)
3412 		goto out;
3413 
3414 	resource = adm_ctx.resource;
3415 	/* demote */
3416 	for_each_connection(connection, resource) {
3417 		struct drbd_peer_device *peer_device;
3418 
3419 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3420 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3421 			if (retcode < SS_SUCCESS) {
3422 				drbd_msg_put_info("failed to demote");
3423 				goto out;
3424 			}
3425 		}
3426 
3427 		retcode = conn_try_disconnect(connection, 0);
3428 		if (retcode < SS_SUCCESS) {
3429 			drbd_msg_put_info("failed to disconnect");
3430 			goto out;
3431 		}
3432 	}
3433 
3434 	/* detach */
3435 	idr_for_each_entry(&resource->devices, device, i) {
3436 		retcode = adm_detach(device, 0);
3437 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3438 			drbd_msg_put_info("failed to detach");
3439 			goto out;
3440 		}
3441 	}
3442 
3443 	/* If we reach this, all volumes (of this connection) are Secondary,
3444 	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3445 	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3446 	for_each_connection(connection, resource)
3447 		drbd_thread_stop(&connection->worker);
3448 
3449 	/* Now, nothing can fail anymore */
3450 
3451 	/* delete volumes */
3452 	idr_for_each_entry(&resource->devices, device, i) {
3453 		retcode = adm_del_minor(device);
3454 		if (retcode != NO_ERROR) {
3455 			/* "can not happen" */
3456 			drbd_msg_put_info("failed to delete volume");
3457 			goto out;
3458 		}
3459 	}
3460 
3461 	list_del_rcu(&resource->resources);
3462 	synchronize_rcu();
3463 	drbd_free_resource(resource);
3464 	retcode = NO_ERROR;
3465 
3466 out:
3467 	drbd_adm_finish(info, retcode);
3468 	return 0;
3469 }
3470 
3471 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3472 {
3473 	struct drbd_resource *resource;
3474 	struct drbd_connection *connection;
3475 	enum drbd_ret_code retcode;
3476 
3477 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
3478 	if (!adm_ctx.reply_skb)
3479 		return retcode;
3480 	if (retcode != NO_ERROR)
3481 		goto out;
3482 
3483 	resource = adm_ctx.resource;
3484 	for_each_connection(connection, resource) {
3485 		if (connection->cstate > C_STANDALONE) {
3486 			retcode = ERR_NET_CONFIGURED;
3487 			goto out;
3488 		}
3489 	}
3490 	if (!idr_is_empty(&resource->devices)) {
3491 		retcode = ERR_RES_IN_USE;
3492 		goto out;
3493 	}
3494 
3495 	list_del_rcu(&resource->resources);
3496 	for_each_connection(connection, resource)
3497 		drbd_thread_stop(&connection->worker);
3498 	synchronize_rcu();
3499 	drbd_free_resource(resource);
3500 	retcode = NO_ERROR;
3501 out:
3502 	drbd_adm_finish(info, retcode);
3503 	return 0;
3504 }
3505 
3506 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3507 {
3508 	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3509 	struct sk_buff *msg;
3510 	struct drbd_genlmsghdr *d_out;
3511 	unsigned seq;
3512 	int err = -ENOMEM;
3513 
3514 	if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3515 		if (time_after(jiffies, device->rs_last_bcast + HZ))
3516 			device->rs_last_bcast = jiffies;
3517 		else
3518 			return;
3519 	}
3520 
3521 	seq = atomic_inc_return(&drbd_genl_seq);
3522 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3523 	if (!msg)
3524 		goto failed;
3525 
3526 	err = -EMSGSIZE;
3527 	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3528 	if (!d_out) /* cannot happen, but anyways. */
3529 		goto nla_put_failure;
3530 	d_out->minor = device_to_minor(device);
3531 	d_out->ret_code = NO_ERROR;
3532 
3533 	if (nla_put_status_info(msg, device, sib))
3534 		goto nla_put_failure;
3535 	genlmsg_end(msg, d_out);
3536 	err = drbd_genl_multicast_events(msg, 0);
3537 	/* msg has been consumed or freed in netlink_broadcast() */
3538 	if (err && err != -ESRCH)
3539 		goto failed;
3540 
3541 	return;
3542 
3543 nla_put_failure:
3544 	nlmsg_free(msg);
3545 failed:
3546 	drbd_err(device, "Error %d while broadcasting event. "
3547 			"Event seq:%u sib_reason:%u\n",
3548 			err, seq, sib->sib_reason);
3549 }
3550