xref: /openbmc/linux/drivers/block/drbd/drbd_nl.c (revision 62e7ca52)
1 /*
2    drbd_nl.c
3 
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5 
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9 
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14 
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 
24  */
25 
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40 
41 #include <net/genetlink.h>
42 
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46 
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49 
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53 
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76 
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80 
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83 
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87 	if (genlmsg_reply(skb, info))
88 		printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90 
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92  * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95 	struct nlattr *nla;
96 	int err = -EMSGSIZE;
97 
98 	if (!info || !info[0])
99 		return 0;
100 
101 	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102 	if (!nla)
103 		return err;
104 
105 	err = nla_put_string(skb, T_info_text, info);
106 	if (err) {
107 		nla_nest_cancel(skb, nla);
108 		return err;
109 	} else
110 		nla_nest_end(skb, nla);
111 	return 0;
112 }
113 
114 /* This would be a good candidate for a "pre_doit" hook,
115  * and per-family private info->pointers.
116  * But we need to stay compatible with older kernels.
117  * If it returns successfully, adm_ctx members are valid.
118  *
119  * At this point, we still rely on the global genl_lock().
120  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
121  * to add additional synchronization against object destruction/modification.
122  */
123 #define DRBD_ADM_NEED_MINOR	1
124 #define DRBD_ADM_NEED_RESOURCE	2
125 #define DRBD_ADM_NEED_CONNECTION 4
126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
127 	struct sk_buff *skb, struct genl_info *info, unsigned flags)
128 {
129 	struct drbd_genlmsghdr *d_in = info->userhdr;
130 	const u8 cmd = info->genlhdr->cmd;
131 	int err;
132 
133 	memset(adm_ctx, 0, sizeof(*adm_ctx));
134 
135 	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
136 	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
137 	       return -EPERM;
138 
139 	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
140 	if (!adm_ctx->reply_skb) {
141 		err = -ENOMEM;
142 		goto fail;
143 	}
144 
145 	adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
146 					info, &drbd_genl_family, 0, cmd);
147 	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
148 	 * but anyways */
149 	if (!adm_ctx->reply_dh) {
150 		err = -ENOMEM;
151 		goto fail;
152 	}
153 
154 	adm_ctx->reply_dh->minor = d_in->minor;
155 	adm_ctx->reply_dh->ret_code = NO_ERROR;
156 
157 	adm_ctx->volume = VOLUME_UNSPECIFIED;
158 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
159 		struct nlattr *nla;
160 		/* parse and validate only */
161 		err = drbd_cfg_context_from_attrs(NULL, info);
162 		if (err)
163 			goto fail;
164 
165 		/* It was present, and valid,
166 		 * copy it over to the reply skb. */
167 		err = nla_put_nohdr(adm_ctx->reply_skb,
168 				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
169 				info->attrs[DRBD_NLA_CFG_CONTEXT]);
170 		if (err)
171 			goto fail;
172 
173 		/* and assign stuff to the adm_ctx */
174 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
175 		if (nla)
176 			adm_ctx->volume = nla_get_u32(nla);
177 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
178 		if (nla)
179 			adm_ctx->resource_name = nla_data(nla);
180 		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
181 		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
182 		if ((adm_ctx->my_addr &&
183 		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
184 		    (adm_ctx->peer_addr &&
185 		     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
186 			err = -EINVAL;
187 			goto fail;
188 		}
189 	}
190 
191 	adm_ctx->minor = d_in->minor;
192 	adm_ctx->device = minor_to_device(d_in->minor);
193 
194 	/* We are protected by the global genl_lock().
195 	 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
196 	 * so make sure this object stays around. */
197 	if (adm_ctx->device)
198 		kref_get(&adm_ctx->device->kref);
199 
200 	if (adm_ctx->resource_name) {
201 		adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
202 	}
203 
204 	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
205 		drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
206 		return ERR_MINOR_INVALID;
207 	}
208 	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
209 		drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
210 		if (adm_ctx->resource_name)
211 			return ERR_RES_NOT_KNOWN;
212 		return ERR_INVALID_REQUEST;
213 	}
214 
215 	if (flags & DRBD_ADM_NEED_CONNECTION) {
216 		if (adm_ctx->resource) {
217 			drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
218 			return ERR_INVALID_REQUEST;
219 		}
220 		if (adm_ctx->device) {
221 			drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
222 			return ERR_INVALID_REQUEST;
223 		}
224 		if (adm_ctx->my_addr && adm_ctx->peer_addr)
225 			adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
226 							  nla_len(adm_ctx->my_addr),
227 							  nla_data(adm_ctx->peer_addr),
228 							  nla_len(adm_ctx->peer_addr));
229 		if (!adm_ctx->connection) {
230 			drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
231 			return ERR_INVALID_REQUEST;
232 		}
233 	}
234 
235 	/* some more paranoia, if the request was over-determined */
236 	if (adm_ctx->device && adm_ctx->resource &&
237 	    adm_ctx->device->resource != adm_ctx->resource) {
238 		pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
239 				adm_ctx->minor, adm_ctx->resource->name,
240 				adm_ctx->device->resource->name);
241 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
242 		return ERR_INVALID_REQUEST;
243 	}
244 	if (adm_ctx->device &&
245 	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
246 	    adm_ctx->volume != adm_ctx->device->vnr) {
247 		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
248 				adm_ctx->minor, adm_ctx->volume,
249 				adm_ctx->device->vnr,
250 				adm_ctx->device->resource->name);
251 		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
252 		return ERR_INVALID_REQUEST;
253 	}
254 
255 	/* still, provide adm_ctx->resource always, if possible. */
256 	if (!adm_ctx->resource) {
257 		adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
258 			: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
259 		if (adm_ctx->resource)
260 			kref_get(&adm_ctx->resource->kref);
261 	}
262 
263 	return NO_ERROR;
264 
265 fail:
266 	nlmsg_free(adm_ctx->reply_skb);
267 	adm_ctx->reply_skb = NULL;
268 	return err;
269 }
270 
271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
272 	struct genl_info *info, int retcode)
273 {
274 	if (adm_ctx->device) {
275 		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
276 		adm_ctx->device = NULL;
277 	}
278 	if (adm_ctx->connection) {
279 		kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
280 		adm_ctx->connection = NULL;
281 	}
282 	if (adm_ctx->resource) {
283 		kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
284 		adm_ctx->resource = NULL;
285 	}
286 
287 	if (!adm_ctx->reply_skb)
288 		return -ENOMEM;
289 
290 	adm_ctx->reply_dh->ret_code = retcode;
291 	drbd_adm_send_reply(adm_ctx->reply_skb, info);
292 	return 0;
293 }
294 
295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
296 {
297 	char *afs;
298 
299 	/* FIXME: A future version will not allow this case. */
300 	if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
301 		return;
302 
303 	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
304 	case AF_INET6:
305 		afs = "ipv6";
306 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
307 			 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
308 		break;
309 	case AF_INET:
310 		afs = "ipv4";
311 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
312 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
313 		break;
314 	default:
315 		afs = "ssocks";
316 		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
317 			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
318 	}
319 	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
320 }
321 
322 int drbd_khelper(struct drbd_device *device, char *cmd)
323 {
324 	char *envp[] = { "HOME=/",
325 			"TERM=linux",
326 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
327 			 (char[20]) { }, /* address family */
328 			 (char[60]) { }, /* address */
329 			NULL };
330 	char mb[12];
331 	char *argv[] = {usermode_helper, cmd, mb, NULL };
332 	struct drbd_connection *connection = first_peer_device(device)->connection;
333 	struct sib_info sib;
334 	int ret;
335 
336 	if (current == connection->worker.task)
337 		set_bit(CALLBACK_PENDING, &connection->flags);
338 
339 	snprintf(mb, 12, "minor-%d", device_to_minor(device));
340 	setup_khelper_env(connection, envp);
341 
342 	/* The helper may take some time.
343 	 * write out any unsynced meta data changes now */
344 	drbd_md_sync(device);
345 
346 	drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
347 	sib.sib_reason = SIB_HELPER_PRE;
348 	sib.helper_name = cmd;
349 	drbd_bcast_event(device, &sib);
350 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
351 	if (ret)
352 		drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
353 				usermode_helper, cmd, mb,
354 				(ret >> 8) & 0xff, ret);
355 	else
356 		drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357 				usermode_helper, cmd, mb,
358 				(ret >> 8) & 0xff, ret);
359 	sib.sib_reason = SIB_HELPER_POST;
360 	sib.helper_exit_code = ret;
361 	drbd_bcast_event(device, &sib);
362 
363 	if (current == connection->worker.task)
364 		clear_bit(CALLBACK_PENDING, &connection->flags);
365 
366 	if (ret < 0) /* Ignore any ERRNOs we got. */
367 		ret = 0;
368 
369 	return ret;
370 }
371 
372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
373 {
374 	char *envp[] = { "HOME=/",
375 			"TERM=linux",
376 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
377 			 (char[20]) { }, /* address family */
378 			 (char[60]) { }, /* address */
379 			NULL };
380 	char *resource_name = connection->resource->name;
381 	char *argv[] = {usermode_helper, cmd, resource_name, NULL };
382 	int ret;
383 
384 	setup_khelper_env(connection, envp);
385 	conn_md_sync(connection);
386 
387 	drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
388 	/* TODO: conn_bcast_event() ?? */
389 
390 	ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
391 	if (ret)
392 		drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
393 			  usermode_helper, cmd, resource_name,
394 			  (ret >> 8) & 0xff, ret);
395 	else
396 		drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397 			  usermode_helper, cmd, resource_name,
398 			  (ret >> 8) & 0xff, ret);
399 	/* TODO: conn_bcast_event() ?? */
400 
401 	if (ret < 0) /* Ignore any ERRNOs we got. */
402 		ret = 0;
403 
404 	return ret;
405 }
406 
407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
408 {
409 	enum drbd_fencing_p fp = FP_NOT_AVAIL;
410 	struct drbd_peer_device *peer_device;
411 	int vnr;
412 
413 	rcu_read_lock();
414 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
415 		struct drbd_device *device = peer_device->device;
416 		if (get_ldev_if_state(device, D_CONSISTENT)) {
417 			struct disk_conf *disk_conf =
418 				rcu_dereference(peer_device->device->ldev->disk_conf);
419 			fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
420 			put_ldev(device);
421 		}
422 	}
423 	rcu_read_unlock();
424 
425 	if (fp == FP_NOT_AVAIL) {
426 		/* IO Suspending works on the whole resource.
427 		   Do it only for one device. */
428 		vnr = 0;
429 		peer_device = idr_get_next(&connection->peer_devices, &vnr);
430 		drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
431 	}
432 
433 	return fp;
434 }
435 
436 bool conn_try_outdate_peer(struct drbd_connection *connection)
437 {
438 	unsigned int connect_cnt;
439 	union drbd_state mask = { };
440 	union drbd_state val = { };
441 	enum drbd_fencing_p fp;
442 	char *ex_to_string;
443 	int r;
444 
445 	spin_lock_irq(&connection->resource->req_lock);
446 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
447 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
448 		spin_unlock_irq(&connection->resource->req_lock);
449 		return false;
450 	}
451 
452 	connect_cnt = connection->connect_cnt;
453 	spin_unlock_irq(&connection->resource->req_lock);
454 
455 	fp = highest_fencing_policy(connection);
456 	switch (fp) {
457 	case FP_NOT_AVAIL:
458 		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
459 		goto out;
460 	case FP_DONT_CARE:
461 		return true;
462 	default: ;
463 	}
464 
465 	r = conn_khelper(connection, "fence-peer");
466 
467 	switch ((r>>8) & 0xff) {
468 	case 3: /* peer is inconsistent */
469 		ex_to_string = "peer is inconsistent or worse";
470 		mask.pdsk = D_MASK;
471 		val.pdsk = D_INCONSISTENT;
472 		break;
473 	case 4: /* peer got outdated, or was already outdated */
474 		ex_to_string = "peer was fenced";
475 		mask.pdsk = D_MASK;
476 		val.pdsk = D_OUTDATED;
477 		break;
478 	case 5: /* peer was down */
479 		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
480 			/* we will(have) create(d) a new UUID anyways... */
481 			ex_to_string = "peer is unreachable, assumed to be dead";
482 			mask.pdsk = D_MASK;
483 			val.pdsk = D_OUTDATED;
484 		} else {
485 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
486 		}
487 		break;
488 	case 6: /* Peer is primary, voluntarily outdate myself.
489 		 * This is useful when an unconnected R_SECONDARY is asked to
490 		 * become R_PRIMARY, but finds the other peer being active. */
491 		ex_to_string = "peer is active";
492 		drbd_warn(connection, "Peer is primary, outdating myself.\n");
493 		mask.disk = D_MASK;
494 		val.disk = D_OUTDATED;
495 		break;
496 	case 7:
497 		if (fp != FP_STONITH)
498 			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
499 		ex_to_string = "peer was stonithed";
500 		mask.pdsk = D_MASK;
501 		val.pdsk = D_OUTDATED;
502 		break;
503 	default:
504 		/* The script is broken ... */
505 		drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
506 		return false; /* Eventually leave IO frozen */
507 	}
508 
509 	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
510 		  (r>>8) & 0xff, ex_to_string);
511 
512  out:
513 
514 	/* Not using
515 	   conn_request_state(connection, mask, val, CS_VERBOSE);
516 	   here, because we might were able to re-establish the connection in the
517 	   meantime. */
518 	spin_lock_irq(&connection->resource->req_lock);
519 	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
520 		if (connection->connect_cnt != connect_cnt)
521 			/* In case the connection was established and droped
522 			   while the fence-peer handler was running, ignore it */
523 			drbd_info(connection, "Ignoring fence-peer exit code\n");
524 		else
525 			_conn_request_state(connection, mask, val, CS_VERBOSE);
526 	}
527 	spin_unlock_irq(&connection->resource->req_lock);
528 
529 	return conn_highest_pdsk(connection) <= D_OUTDATED;
530 }
531 
532 static int _try_outdate_peer_async(void *data)
533 {
534 	struct drbd_connection *connection = (struct drbd_connection *)data;
535 
536 	conn_try_outdate_peer(connection);
537 
538 	kref_put(&connection->kref, drbd_destroy_connection);
539 	return 0;
540 }
541 
542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
543 {
544 	struct task_struct *opa;
545 
546 	kref_get(&connection->kref);
547 	/* We may just have force_sig()'ed this thread
548 	 * to get it out of some blocking network function.
549 	 * Clear signals; otherwise kthread_run(), which internally uses
550 	 * wait_on_completion_killable(), will mistake our pending signal
551 	 * for a new fatal signal and fail. */
552 	flush_signals(current);
553 	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
554 	if (IS_ERR(opa)) {
555 		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
556 		kref_put(&connection->kref, drbd_destroy_connection);
557 	}
558 }
559 
560 enum drbd_state_rv
561 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
562 {
563 	const int max_tries = 4;
564 	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
565 	struct net_conf *nc;
566 	int try = 0;
567 	int forced = 0;
568 	union drbd_state mask, val;
569 
570 	if (new_role == R_PRIMARY) {
571 		struct drbd_connection *connection;
572 
573 		/* Detect dead peers as soon as possible.  */
574 
575 		rcu_read_lock();
576 		for_each_connection(connection, device->resource)
577 			request_ping(connection);
578 		rcu_read_unlock();
579 	}
580 
581 	mutex_lock(device->state_mutex);
582 
583 	mask.i = 0; mask.role = R_MASK;
584 	val.i  = 0; val.role  = new_role;
585 
586 	while (try++ < max_tries) {
587 		rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
588 
589 		/* in case we first succeeded to outdate,
590 		 * but now suddenly could establish a connection */
591 		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
592 			val.pdsk = 0;
593 			mask.pdsk = 0;
594 			continue;
595 		}
596 
597 		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
598 		    (device->state.disk < D_UP_TO_DATE &&
599 		     device->state.disk >= D_INCONSISTENT)) {
600 			mask.disk = D_MASK;
601 			val.disk  = D_UP_TO_DATE;
602 			forced = 1;
603 			continue;
604 		}
605 
606 		if (rv == SS_NO_UP_TO_DATE_DISK &&
607 		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
608 			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
609 
610 			if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
611 				val.disk = D_UP_TO_DATE;
612 				mask.disk = D_MASK;
613 			}
614 			continue;
615 		}
616 
617 		if (rv == SS_NOTHING_TO_DO)
618 			goto out;
619 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
620 			if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
621 				drbd_warn(device, "Forced into split brain situation!\n");
622 				mask.pdsk = D_MASK;
623 				val.pdsk  = D_OUTDATED;
624 
625 			}
626 			continue;
627 		}
628 		if (rv == SS_TWO_PRIMARIES) {
629 			/* Maybe the peer is detected as dead very soon...
630 			   retry at most once more in this case. */
631 			int timeo;
632 			rcu_read_lock();
633 			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
634 			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
635 			rcu_read_unlock();
636 			schedule_timeout_interruptible(timeo);
637 			if (try < max_tries)
638 				try = max_tries - 1;
639 			continue;
640 		}
641 		if (rv < SS_SUCCESS) {
642 			rv = _drbd_request_state(device, mask, val,
643 						CS_VERBOSE + CS_WAIT_COMPLETE);
644 			if (rv < SS_SUCCESS)
645 				goto out;
646 		}
647 		break;
648 	}
649 
650 	if (rv < SS_SUCCESS)
651 		goto out;
652 
653 	if (forced)
654 		drbd_warn(device, "Forced to consider local data as UpToDate!\n");
655 
656 	/* Wait until nothing is on the fly :) */
657 	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
658 
659 	/* FIXME also wait for all pending P_BARRIER_ACK? */
660 
661 	if (new_role == R_SECONDARY) {
662 		set_disk_ro(device->vdisk, true);
663 		if (get_ldev(device)) {
664 			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
665 			put_ldev(device);
666 		}
667 	} else {
668 		/* Called from drbd_adm_set_role only.
669 		 * We are still holding the conf_update mutex. */
670 		nc = first_peer_device(device)->connection->net_conf;
671 		if (nc)
672 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
673 
674 		set_disk_ro(device->vdisk, false);
675 		if (get_ldev(device)) {
676 			if (((device->state.conn < C_CONNECTED ||
677 			       device->state.pdsk <= D_FAILED)
678 			      && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
679 				drbd_uuid_new_current(device);
680 
681 			device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
682 			put_ldev(device);
683 		}
684 	}
685 
686 	/* writeout of activity log covered areas of the bitmap
687 	 * to stable storage done in after state change already */
688 
689 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
690 		/* if this was forced, we should consider sync */
691 		if (forced)
692 			drbd_send_uuids(first_peer_device(device));
693 		drbd_send_current_state(first_peer_device(device));
694 	}
695 
696 	drbd_md_sync(device);
697 
698 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
699 out:
700 	mutex_unlock(device->state_mutex);
701 	return rv;
702 }
703 
704 static const char *from_attrs_err_to_txt(int err)
705 {
706 	return	err == -ENOMSG ? "required attribute missing" :
707 		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
708 		err == -EEXIST ? "can not change invariant setting" :
709 		"invalid attribute value";
710 }
711 
712 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
713 {
714 	struct drbd_config_context adm_ctx;
715 	struct set_role_parms parms;
716 	int err;
717 	enum drbd_ret_code retcode;
718 
719 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
720 	if (!adm_ctx.reply_skb)
721 		return retcode;
722 	if (retcode != NO_ERROR)
723 		goto out;
724 
725 	memset(&parms, 0, sizeof(parms));
726 	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
727 		err = set_role_parms_from_attrs(&parms, info);
728 		if (err) {
729 			retcode = ERR_MANDATORY_TAG;
730 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
731 			goto out;
732 		}
733 	}
734 	genl_unlock();
735 	mutex_lock(&adm_ctx.resource->adm_mutex);
736 
737 	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
738 		retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
739 	else
740 		retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
741 
742 	mutex_unlock(&adm_ctx.resource->adm_mutex);
743 	genl_lock();
744 out:
745 	drbd_adm_finish(&adm_ctx, info, retcode);
746 	return 0;
747 }
748 
749 /* Initializes the md.*_offset members, so we are able to find
750  * the on disk meta data.
751  *
752  * We currently have two possible layouts:
753  * external:
754  *   |----------- md_size_sect ------------------|
755  *   [ 4k superblock ][ activity log ][  Bitmap  ]
756  *   | al_offset == 8 |
757  *   | bm_offset = al_offset + X      |
758  *  ==> bitmap sectors = md_size_sect - bm_offset
759  *
760  * internal:
761  *            |----------- md_size_sect ------------------|
762  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
763  *                        | al_offset < 0 |
764  *            | bm_offset = al_offset - Y |
765  *  ==> bitmap sectors = Y = al_offset - bm_offset
766  *
767  *  Activity log size used to be fixed 32kB,
768  *  but is about to become configurable.
769  */
770 static void drbd_md_set_sector_offsets(struct drbd_device *device,
771 				       struct drbd_backing_dev *bdev)
772 {
773 	sector_t md_size_sect = 0;
774 	unsigned int al_size_sect = bdev->md.al_size_4k * 8;
775 
776 	bdev->md.md_offset = drbd_md_ss(bdev);
777 
778 	switch (bdev->md.meta_dev_idx) {
779 	default:
780 		/* v07 style fixed size indexed meta data */
781 		bdev->md.md_size_sect = MD_128MB_SECT;
782 		bdev->md.al_offset = MD_4kB_SECT;
783 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
784 		break;
785 	case DRBD_MD_INDEX_FLEX_EXT:
786 		/* just occupy the full device; unit: sectors */
787 		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
788 		bdev->md.al_offset = MD_4kB_SECT;
789 		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
790 		break;
791 	case DRBD_MD_INDEX_INTERNAL:
792 	case DRBD_MD_INDEX_FLEX_INT:
793 		/* al size is still fixed */
794 		bdev->md.al_offset = -al_size_sect;
795 		/* we need (slightly less than) ~ this much bitmap sectors: */
796 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
797 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
798 		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
799 		md_size_sect = ALIGN(md_size_sect, 8);
800 
801 		/* plus the "drbd meta data super block",
802 		 * and the activity log; */
803 		md_size_sect += MD_4kB_SECT + al_size_sect;
804 
805 		bdev->md.md_size_sect = md_size_sect;
806 		/* bitmap offset is adjusted by 'super' block size */
807 		bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
808 		break;
809 	}
810 }
811 
812 /* input size is expected to be in KB */
813 char *ppsize(char *buf, unsigned long long size)
814 {
815 	/* Needs 9 bytes at max including trailing NUL:
816 	 * -1ULL ==> "16384 EB" */
817 	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
818 	int base = 0;
819 	while (size >= 10000 && base < sizeof(units)-1) {
820 		/* shift + round */
821 		size = (size >> 10) + !!(size & (1<<9));
822 		base++;
823 	}
824 	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
825 
826 	return buf;
827 }
828 
829 /* there is still a theoretical deadlock when called from receiver
830  * on an D_INCONSISTENT R_PRIMARY:
831  *  remote READ does inc_ap_bio, receiver would need to receive answer
832  *  packet from remote to dec_ap_bio again.
833  *  receiver receive_sizes(), comes here,
834  *  waits for ap_bio_cnt == 0. -> deadlock.
835  * but this cannot happen, actually, because:
836  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
837  *  (not connected, or bad/no disk on peer):
838  *  see drbd_fail_request_early, ap_bio_cnt is zero.
839  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
840  *  peer may not initiate a resize.
841  */
842 /* Note these are not to be confused with
843  * drbd_adm_suspend_io/drbd_adm_resume_io,
844  * which are (sub) state changes triggered by admin (drbdsetup),
845  * and can be long lived.
846  * This changes an device->flag, is triggered by drbd internals,
847  * and should be short-lived. */
848 void drbd_suspend_io(struct drbd_device *device)
849 {
850 	set_bit(SUSPEND_IO, &device->flags);
851 	if (drbd_suspended(device))
852 		return;
853 	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
854 }
855 
856 void drbd_resume_io(struct drbd_device *device)
857 {
858 	clear_bit(SUSPEND_IO, &device->flags);
859 	wake_up(&device->misc_wait);
860 }
861 
862 /**
863  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
864  * @device:	DRBD device.
865  *
866  * Returns 0 on success, negative return values indicate errors.
867  * You should call drbd_md_sync() after calling this function.
868  */
869 enum determine_dev_size
870 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
871 {
872 	sector_t prev_first_sect, prev_size; /* previous meta location */
873 	sector_t la_size_sect, u_size;
874 	struct drbd_md *md = &device->ldev->md;
875 	u32 prev_al_stripe_size_4k;
876 	u32 prev_al_stripes;
877 	sector_t size;
878 	char ppb[10];
879 	void *buffer;
880 
881 	int md_moved, la_size_changed;
882 	enum determine_dev_size rv = DS_UNCHANGED;
883 
884 	/* race:
885 	 * application request passes inc_ap_bio,
886 	 * but then cannot get an AL-reference.
887 	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
888 	 *
889 	 * to avoid that:
890 	 * Suspend IO right here.
891 	 * still lock the act_log to not trigger ASSERTs there.
892 	 */
893 	drbd_suspend_io(device);
894 	buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
895 	if (!buffer) {
896 		drbd_resume_io(device);
897 		return DS_ERROR;
898 	}
899 
900 	/* no wait necessary anymore, actually we could assert that */
901 	wait_event(device->al_wait, lc_try_lock(device->act_log));
902 
903 	prev_first_sect = drbd_md_first_sector(device->ldev);
904 	prev_size = device->ldev->md.md_size_sect;
905 	la_size_sect = device->ldev->md.la_size_sect;
906 
907 	if (rs) {
908 		/* rs is non NULL if we should change the AL layout only */
909 
910 		prev_al_stripes = md->al_stripes;
911 		prev_al_stripe_size_4k = md->al_stripe_size_4k;
912 
913 		md->al_stripes = rs->al_stripes;
914 		md->al_stripe_size_4k = rs->al_stripe_size / 4;
915 		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
916 	}
917 
918 	drbd_md_set_sector_offsets(device, device->ldev);
919 
920 	rcu_read_lock();
921 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
922 	rcu_read_unlock();
923 	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
924 
925 	if (size < la_size_sect) {
926 		if (rs && u_size == 0) {
927 			/* Remove "rs &&" later. This check should always be active, but
928 			   right now the receiver expects the permissive behavior */
929 			drbd_warn(device, "Implicit shrink not allowed. "
930 				 "Use --size=%llus for explicit shrink.\n",
931 				 (unsigned long long)size);
932 			rv = DS_ERROR_SHRINK;
933 		}
934 		if (u_size > size)
935 			rv = DS_ERROR_SPACE_MD;
936 		if (rv != DS_UNCHANGED)
937 			goto err_out;
938 	}
939 
940 	if (drbd_get_capacity(device->this_bdev) != size ||
941 	    drbd_bm_capacity(device) != size) {
942 		int err;
943 		err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
944 		if (unlikely(err)) {
945 			/* currently there is only one error: ENOMEM! */
946 			size = drbd_bm_capacity(device)>>1;
947 			if (size == 0) {
948 				drbd_err(device, "OUT OF MEMORY! "
949 				    "Could not allocate bitmap!\n");
950 			} else {
951 				drbd_err(device, "BM resizing failed. "
952 				    "Leaving size unchanged at size = %lu KB\n",
953 				    (unsigned long)size);
954 			}
955 			rv = DS_ERROR;
956 		}
957 		/* racy, see comments above. */
958 		drbd_set_my_capacity(device, size);
959 		device->ldev->md.la_size_sect = size;
960 		drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
961 		     (unsigned long long)size>>1);
962 	}
963 	if (rv <= DS_ERROR)
964 		goto err_out;
965 
966 	la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
967 
968 	md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
969 		|| prev_size	   != device->ldev->md.md_size_sect;
970 
971 	if (la_size_changed || md_moved || rs) {
972 		u32 prev_flags;
973 
974 		drbd_al_shrink(device); /* All extents inactive. */
975 
976 		prev_flags = md->flags;
977 		md->flags &= ~MDF_PRIMARY_IND;
978 		drbd_md_write(device, buffer);
979 
980 		drbd_info(device, "Writing the whole bitmap, %s\n",
981 			 la_size_changed && md_moved ? "size changed and md moved" :
982 			 la_size_changed ? "size changed" : "md moved");
983 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
984 		drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
985 			       "size changed", BM_LOCKED_MASK);
986 		drbd_initialize_al(device, buffer);
987 
988 		md->flags = prev_flags;
989 		drbd_md_write(device, buffer);
990 
991 		if (rs)
992 			drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
993 				  md->al_stripes, md->al_stripe_size_4k * 4);
994 	}
995 
996 	if (size > la_size_sect)
997 		rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
998 	if (size < la_size_sect)
999 		rv = DS_SHRUNK;
1000 
1001 	if (0) {
1002 	err_out:
1003 		if (rs) {
1004 			md->al_stripes = prev_al_stripes;
1005 			md->al_stripe_size_4k = prev_al_stripe_size_4k;
1006 			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1007 
1008 			drbd_md_set_sector_offsets(device, device->ldev);
1009 		}
1010 	}
1011 	lc_unlock(device->act_log);
1012 	wake_up(&device->al_wait);
1013 	drbd_md_put_buffer(device);
1014 	drbd_resume_io(device);
1015 
1016 	return rv;
1017 }
1018 
1019 sector_t
1020 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1021 		  sector_t u_size, int assume_peer_has_space)
1022 {
1023 	sector_t p_size = device->p_size;   /* partner's disk size. */
1024 	sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1025 	sector_t m_size; /* my size */
1026 	sector_t size = 0;
1027 
1028 	m_size = drbd_get_max_capacity(bdev);
1029 
1030 	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1031 		drbd_warn(device, "Resize while not connected was forced by the user!\n");
1032 		p_size = m_size;
1033 	}
1034 
1035 	if (p_size && m_size) {
1036 		size = min_t(sector_t, p_size, m_size);
1037 	} else {
1038 		if (la_size_sect) {
1039 			size = la_size_sect;
1040 			if (m_size && m_size < size)
1041 				size = m_size;
1042 			if (p_size && p_size < size)
1043 				size = p_size;
1044 		} else {
1045 			if (m_size)
1046 				size = m_size;
1047 			if (p_size)
1048 				size = p_size;
1049 		}
1050 	}
1051 
1052 	if (size == 0)
1053 		drbd_err(device, "Both nodes diskless!\n");
1054 
1055 	if (u_size) {
1056 		if (u_size > size)
1057 			drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1058 			    (unsigned long)u_size>>1, (unsigned long)size>>1);
1059 		else
1060 			size = u_size;
1061 	}
1062 
1063 	return size;
1064 }
1065 
1066 /**
1067  * drbd_check_al_size() - Ensures that the AL is of the right size
1068  * @device:	DRBD device.
1069  *
1070  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1071  * failed, and 0 on success. You should call drbd_md_sync() after you called
1072  * this function.
1073  */
1074 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1075 {
1076 	struct lru_cache *n, *t;
1077 	struct lc_element *e;
1078 	unsigned int in_use;
1079 	int i;
1080 
1081 	if (device->act_log &&
1082 	    device->act_log->nr_elements == dc->al_extents)
1083 		return 0;
1084 
1085 	in_use = 0;
1086 	t = device->act_log;
1087 	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1088 		dc->al_extents, sizeof(struct lc_element), 0);
1089 
1090 	if (n == NULL) {
1091 		drbd_err(device, "Cannot allocate act_log lru!\n");
1092 		return -ENOMEM;
1093 	}
1094 	spin_lock_irq(&device->al_lock);
1095 	if (t) {
1096 		for (i = 0; i < t->nr_elements; i++) {
1097 			e = lc_element_by_index(t, i);
1098 			if (e->refcnt)
1099 				drbd_err(device, "refcnt(%d)==%d\n",
1100 				    e->lc_number, e->refcnt);
1101 			in_use += e->refcnt;
1102 		}
1103 	}
1104 	if (!in_use)
1105 		device->act_log = n;
1106 	spin_unlock_irq(&device->al_lock);
1107 	if (in_use) {
1108 		drbd_err(device, "Activity log still in use!\n");
1109 		lc_destroy(n);
1110 		return -EBUSY;
1111 	} else {
1112 		if (t)
1113 			lc_destroy(t);
1114 	}
1115 	drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1116 	return 0;
1117 }
1118 
1119 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1120 {
1121 	struct request_queue * const q = device->rq_queue;
1122 	unsigned int max_hw_sectors = max_bio_size >> 9;
1123 	unsigned int max_segments = 0;
1124 	struct request_queue *b = NULL;
1125 
1126 	if (get_ldev_if_state(device, D_ATTACHING)) {
1127 		b = device->ldev->backing_bdev->bd_disk->queue;
1128 
1129 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1130 		rcu_read_lock();
1131 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1132 		rcu_read_unlock();
1133 
1134 		blk_set_stacking_limits(&q->limits);
1135 		blk_queue_max_write_same_sectors(q, 0);
1136 	}
1137 
1138 	blk_queue_logical_block_size(q, 512);
1139 	blk_queue_max_hw_sectors(q, max_hw_sectors);
1140 	/* This is the workaround for "bio would need to, but cannot, be split" */
1141 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1142 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1143 
1144 	if (b) {
1145 		struct drbd_connection *connection = first_peer_device(device)->connection;
1146 
1147 		if (blk_queue_discard(b) &&
1148 		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1149 			/* For now, don't allow more than one activity log extent worth of data
1150 			 * to be discarded in one go. We may need to rework drbd_al_begin_io()
1151 			 * to allow for even larger discard ranges */
1152 			q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1153 
1154 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1155 			/* REALLY? Is stacking secdiscard "legal"? */
1156 			if (blk_queue_secdiscard(b))
1157 				queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1158 		} else {
1159 			q->limits.max_discard_sectors = 0;
1160 			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1161 			queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1162 		}
1163 
1164 		blk_queue_stack_limits(q, b);
1165 
1166 		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1167 			drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1168 				 q->backing_dev_info.ra_pages,
1169 				 b->backing_dev_info.ra_pages);
1170 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1171 		}
1172 		put_ldev(device);
1173 	}
1174 }
1175 
1176 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1177 {
1178 	unsigned int now, new, local, peer;
1179 
1180 	now = queue_max_hw_sectors(device->rq_queue) << 9;
1181 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1182 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1183 
1184 	if (get_ldev_if_state(device, D_ATTACHING)) {
1185 		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1186 		device->local_max_bio_size = local;
1187 		put_ldev(device);
1188 	}
1189 	local = min(local, DRBD_MAX_BIO_SIZE);
1190 
1191 	/* We may ignore peer limits if the peer is modern enough.
1192 	   Because new from 8.3.8 onwards the peer can use multiple
1193 	   BIOs for a single peer_request */
1194 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
1195 		if (first_peer_device(device)->connection->agreed_pro_version < 94)
1196 			peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1197 			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1198 		else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1199 			peer = DRBD_MAX_SIZE_H80_PACKET;
1200 		else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1201 			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1202 		else
1203 			peer = DRBD_MAX_BIO_SIZE;
1204 
1205 		/* We may later detach and re-attach on a disconnected Primary.
1206 		 * Avoid this setting to jump back in that case.
1207 		 * We want to store what we know the peer DRBD can handle,
1208 		 * not what the peer IO backend can handle. */
1209 		if (peer > device->peer_max_bio_size)
1210 			device->peer_max_bio_size = peer;
1211 	}
1212 	new = min(local, peer);
1213 
1214 	if (device->state.role == R_PRIMARY && new < now)
1215 		drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1216 
1217 	if (new != now)
1218 		drbd_info(device, "max BIO size = %u\n", new);
1219 
1220 	drbd_setup_queue_param(device, new);
1221 }
1222 
1223 /* Starts the worker thread */
1224 static void conn_reconfig_start(struct drbd_connection *connection)
1225 {
1226 	drbd_thread_start(&connection->worker);
1227 	drbd_flush_workqueue(&connection->sender_work);
1228 }
1229 
1230 /* if still unconfigured, stops worker again. */
1231 static void conn_reconfig_done(struct drbd_connection *connection)
1232 {
1233 	bool stop_threads;
1234 	spin_lock_irq(&connection->resource->req_lock);
1235 	stop_threads = conn_all_vols_unconf(connection) &&
1236 		connection->cstate == C_STANDALONE;
1237 	spin_unlock_irq(&connection->resource->req_lock);
1238 	if (stop_threads) {
1239 		/* asender is implicitly stopped by receiver
1240 		 * in conn_disconnect() */
1241 		drbd_thread_stop(&connection->receiver);
1242 		drbd_thread_stop(&connection->worker);
1243 	}
1244 }
1245 
1246 /* Make sure IO is suspended before calling this function(). */
1247 static void drbd_suspend_al(struct drbd_device *device)
1248 {
1249 	int s = 0;
1250 
1251 	if (!lc_try_lock(device->act_log)) {
1252 		drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1253 		return;
1254 	}
1255 
1256 	drbd_al_shrink(device);
1257 	spin_lock_irq(&device->resource->req_lock);
1258 	if (device->state.conn < C_CONNECTED)
1259 		s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1260 	spin_unlock_irq(&device->resource->req_lock);
1261 	lc_unlock(device->act_log);
1262 
1263 	if (s)
1264 		drbd_info(device, "Suspended AL updates\n");
1265 }
1266 
1267 
1268 static bool should_set_defaults(struct genl_info *info)
1269 {
1270 	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1271 	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1272 }
1273 
1274 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1275 {
1276 	/* This is limited by 16 bit "slot" numbers,
1277 	 * and by available on-disk context storage.
1278 	 *
1279 	 * Also (u16)~0 is special (denotes a "free" extent).
1280 	 *
1281 	 * One transaction occupies one 4kB on-disk block,
1282 	 * we have n such blocks in the on disk ring buffer,
1283 	 * the "current" transaction may fail (n-1),
1284 	 * and there is 919 slot numbers context information per transaction.
1285 	 *
1286 	 * 72 transaction blocks amounts to more than 2**16 context slots,
1287 	 * so cap there first.
1288 	 */
1289 	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1290 	const unsigned int sufficient_on_disk =
1291 		(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1292 		/AL_CONTEXT_PER_TRANSACTION;
1293 
1294 	unsigned int al_size_4k = bdev->md.al_size_4k;
1295 
1296 	if (al_size_4k > sufficient_on_disk)
1297 		return max_al_nr;
1298 
1299 	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1300 }
1301 
1302 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1303 {
1304 	struct drbd_config_context adm_ctx;
1305 	enum drbd_ret_code retcode;
1306 	struct drbd_device *device;
1307 	struct disk_conf *new_disk_conf, *old_disk_conf;
1308 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1309 	int err, fifo_size;
1310 
1311 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1312 	if (!adm_ctx.reply_skb)
1313 		return retcode;
1314 	if (retcode != NO_ERROR)
1315 		goto finish;
1316 
1317 	device = adm_ctx.device;
1318 	mutex_lock(&adm_ctx.resource->adm_mutex);
1319 
1320 	/* we also need a disk
1321 	 * to change the options on */
1322 	if (!get_ldev(device)) {
1323 		retcode = ERR_NO_DISK;
1324 		goto out;
1325 	}
1326 
1327 	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1328 	if (!new_disk_conf) {
1329 		retcode = ERR_NOMEM;
1330 		goto fail;
1331 	}
1332 
1333 	mutex_lock(&device->resource->conf_update);
1334 	old_disk_conf = device->ldev->disk_conf;
1335 	*new_disk_conf = *old_disk_conf;
1336 	if (should_set_defaults(info))
1337 		set_disk_conf_defaults(new_disk_conf);
1338 
1339 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1340 	if (err && err != -ENOMSG) {
1341 		retcode = ERR_MANDATORY_TAG;
1342 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1343 		goto fail_unlock;
1344 	}
1345 
1346 	if (!expect(new_disk_conf->resync_rate >= 1))
1347 		new_disk_conf->resync_rate = 1;
1348 
1349 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1350 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1351 	if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1352 		new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1353 
1354 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1355 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1356 
1357 	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1358 	if (fifo_size != device->rs_plan_s->size) {
1359 		new_plan = fifo_alloc(fifo_size);
1360 		if (!new_plan) {
1361 			drbd_err(device, "kmalloc of fifo_buffer failed");
1362 			retcode = ERR_NOMEM;
1363 			goto fail_unlock;
1364 		}
1365 	}
1366 
1367 	drbd_suspend_io(device);
1368 	wait_event(device->al_wait, lc_try_lock(device->act_log));
1369 	drbd_al_shrink(device);
1370 	err = drbd_check_al_size(device, new_disk_conf);
1371 	lc_unlock(device->act_log);
1372 	wake_up(&device->al_wait);
1373 	drbd_resume_io(device);
1374 
1375 	if (err) {
1376 		retcode = ERR_NOMEM;
1377 		goto fail_unlock;
1378 	}
1379 
1380 	write_lock_irq(&global_state_lock);
1381 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1382 	if (retcode == NO_ERROR) {
1383 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1384 		drbd_resync_after_changed(device);
1385 	}
1386 	write_unlock_irq(&global_state_lock);
1387 
1388 	if (retcode != NO_ERROR)
1389 		goto fail_unlock;
1390 
1391 	if (new_plan) {
1392 		old_plan = device->rs_plan_s;
1393 		rcu_assign_pointer(device->rs_plan_s, new_plan);
1394 	}
1395 
1396 	mutex_unlock(&device->resource->conf_update);
1397 
1398 	if (new_disk_conf->al_updates)
1399 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1400 	else
1401 		device->ldev->md.flags |= MDF_AL_DISABLED;
1402 
1403 	if (new_disk_conf->md_flushes)
1404 		clear_bit(MD_NO_FUA, &device->flags);
1405 	else
1406 		set_bit(MD_NO_FUA, &device->flags);
1407 
1408 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1409 
1410 	drbd_md_sync(device);
1411 
1412 	if (device->state.conn >= C_CONNECTED) {
1413 		struct drbd_peer_device *peer_device;
1414 
1415 		for_each_peer_device(peer_device, device)
1416 			drbd_send_sync_param(peer_device);
1417 	}
1418 
1419 	synchronize_rcu();
1420 	kfree(old_disk_conf);
1421 	kfree(old_plan);
1422 	mod_timer(&device->request_timer, jiffies + HZ);
1423 	goto success;
1424 
1425 fail_unlock:
1426 	mutex_unlock(&device->resource->conf_update);
1427  fail:
1428 	kfree(new_disk_conf);
1429 	kfree(new_plan);
1430 success:
1431 	put_ldev(device);
1432  out:
1433 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1434  finish:
1435 	drbd_adm_finish(&adm_ctx, info, retcode);
1436 	return 0;
1437 }
1438 
1439 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1440 {
1441 	struct drbd_config_context adm_ctx;
1442 	struct drbd_device *device;
1443 	int err;
1444 	enum drbd_ret_code retcode;
1445 	enum determine_dev_size dd;
1446 	sector_t max_possible_sectors;
1447 	sector_t min_md_device_sectors;
1448 	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1449 	struct disk_conf *new_disk_conf = NULL;
1450 	struct block_device *bdev;
1451 	struct lru_cache *resync_lru = NULL;
1452 	struct fifo_buffer *new_plan = NULL;
1453 	union drbd_state ns, os;
1454 	enum drbd_state_rv rv;
1455 	struct net_conf *nc;
1456 
1457 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1458 	if (!adm_ctx.reply_skb)
1459 		return retcode;
1460 	if (retcode != NO_ERROR)
1461 		goto finish;
1462 
1463 	device = adm_ctx.device;
1464 	mutex_lock(&adm_ctx.resource->adm_mutex);
1465 	conn_reconfig_start(first_peer_device(device)->connection);
1466 
1467 	/* if you want to reconfigure, please tear down first */
1468 	if (device->state.disk > D_DISKLESS) {
1469 		retcode = ERR_DISK_CONFIGURED;
1470 		goto fail;
1471 	}
1472 	/* It may just now have detached because of IO error.  Make sure
1473 	 * drbd_ldev_destroy is done already, we may end up here very fast,
1474 	 * e.g. if someone calls attach from the on-io-error handler,
1475 	 * to realize a "hot spare" feature (not that I'd recommend that) */
1476 	wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1477 
1478 	/* make sure there is no leftover from previous force-detach attempts */
1479 	clear_bit(FORCE_DETACH, &device->flags);
1480 	clear_bit(WAS_IO_ERROR, &device->flags);
1481 	clear_bit(WAS_READ_ERROR, &device->flags);
1482 
1483 	/* and no leftover from previously aborted resync or verify, either */
1484 	device->rs_total = 0;
1485 	device->rs_failed = 0;
1486 	atomic_set(&device->rs_pending_cnt, 0);
1487 
1488 	/* allocation not in the IO path, drbdsetup context */
1489 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1490 	if (!nbc) {
1491 		retcode = ERR_NOMEM;
1492 		goto fail;
1493 	}
1494 	spin_lock_init(&nbc->md.uuid_lock);
1495 
1496 	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1497 	if (!new_disk_conf) {
1498 		retcode = ERR_NOMEM;
1499 		goto fail;
1500 	}
1501 	nbc->disk_conf = new_disk_conf;
1502 
1503 	set_disk_conf_defaults(new_disk_conf);
1504 	err = disk_conf_from_attrs(new_disk_conf, info);
1505 	if (err) {
1506 		retcode = ERR_MANDATORY_TAG;
1507 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1508 		goto fail;
1509 	}
1510 
1511 	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1512 		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1513 
1514 	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1515 	if (!new_plan) {
1516 		retcode = ERR_NOMEM;
1517 		goto fail;
1518 	}
1519 
1520 	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1521 		retcode = ERR_MD_IDX_INVALID;
1522 		goto fail;
1523 	}
1524 
1525 	write_lock_irq(&global_state_lock);
1526 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1527 	write_unlock_irq(&global_state_lock);
1528 	if (retcode != NO_ERROR)
1529 		goto fail;
1530 
1531 	rcu_read_lock();
1532 	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1533 	if (nc) {
1534 		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1535 			rcu_read_unlock();
1536 			retcode = ERR_STONITH_AND_PROT_A;
1537 			goto fail;
1538 		}
1539 	}
1540 	rcu_read_unlock();
1541 
1542 	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1543 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1544 	if (IS_ERR(bdev)) {
1545 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1546 			PTR_ERR(bdev));
1547 		retcode = ERR_OPEN_DISK;
1548 		goto fail;
1549 	}
1550 	nbc->backing_bdev = bdev;
1551 
1552 	/*
1553 	 * meta_dev_idx >= 0: external fixed size, possibly multiple
1554 	 * drbd sharing one meta device.  TODO in that case, paranoia
1555 	 * check that [md_bdev, meta_dev_idx] is not yet used by some
1556 	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
1557 	 * should check it for you already; but if you don't, or
1558 	 * someone fooled it, we need to double check here)
1559 	 */
1560 	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1561 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1562 				  (new_disk_conf->meta_dev_idx < 0) ?
1563 				  (void *)device : (void *)drbd_m_holder);
1564 	if (IS_ERR(bdev)) {
1565 		drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1566 			PTR_ERR(bdev));
1567 		retcode = ERR_OPEN_MD_DISK;
1568 		goto fail;
1569 	}
1570 	nbc->md_bdev = bdev;
1571 
1572 	if ((nbc->backing_bdev == nbc->md_bdev) !=
1573 	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1574 	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1575 		retcode = ERR_MD_IDX_INVALID;
1576 		goto fail;
1577 	}
1578 
1579 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
1580 			1, 61, sizeof(struct bm_extent),
1581 			offsetof(struct bm_extent, lce));
1582 	if (!resync_lru) {
1583 		retcode = ERR_NOMEM;
1584 		goto fail;
1585 	}
1586 
1587 	/* Read our meta data super block early.
1588 	 * This also sets other on-disk offsets. */
1589 	retcode = drbd_md_read(device, nbc);
1590 	if (retcode != NO_ERROR)
1591 		goto fail;
1592 
1593 	if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1594 		new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1595 	if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1596 		new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1597 
1598 	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1599 		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1600 			(unsigned long long) drbd_get_max_capacity(nbc),
1601 			(unsigned long long) new_disk_conf->disk_size);
1602 		retcode = ERR_DISK_TOO_SMALL;
1603 		goto fail;
1604 	}
1605 
1606 	if (new_disk_conf->meta_dev_idx < 0) {
1607 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1608 		/* at least one MB, otherwise it does not make sense */
1609 		min_md_device_sectors = (2<<10);
1610 	} else {
1611 		max_possible_sectors = DRBD_MAX_SECTORS;
1612 		min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1613 	}
1614 
1615 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1616 		retcode = ERR_MD_DISK_TOO_SMALL;
1617 		drbd_warn(device, "refusing attach: md-device too small, "
1618 		     "at least %llu sectors needed for this meta-disk type\n",
1619 		     (unsigned long long) min_md_device_sectors);
1620 		goto fail;
1621 	}
1622 
1623 	/* Make sure the new disk is big enough
1624 	 * (we may currently be R_PRIMARY with no local disk...) */
1625 	if (drbd_get_max_capacity(nbc) <
1626 	    drbd_get_capacity(device->this_bdev)) {
1627 		retcode = ERR_DISK_TOO_SMALL;
1628 		goto fail;
1629 	}
1630 
1631 	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1632 
1633 	if (nbc->known_size > max_possible_sectors) {
1634 		drbd_warn(device, "==> truncating very big lower level device "
1635 			"to currently maximum possible %llu sectors <==\n",
1636 			(unsigned long long) max_possible_sectors);
1637 		if (new_disk_conf->meta_dev_idx >= 0)
1638 			drbd_warn(device, "==>> using internal or flexible "
1639 				      "meta data may help <<==\n");
1640 	}
1641 
1642 	drbd_suspend_io(device);
1643 	/* also wait for the last barrier ack. */
1644 	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1645 	 * We need a way to either ignore barrier acks for barriers sent before a device
1646 	 * was attached, or a way to wait for all pending barrier acks to come in.
1647 	 * As barriers are counted per resource,
1648 	 * we'd need to suspend io on all devices of a resource.
1649 	 */
1650 	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1651 	/* and for any other previously queued work */
1652 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
1653 
1654 	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1655 	retcode = rv;  /* FIXME: Type mismatch. */
1656 	drbd_resume_io(device);
1657 	if (rv < SS_SUCCESS)
1658 		goto fail;
1659 
1660 	if (!get_ldev_if_state(device, D_ATTACHING))
1661 		goto force_diskless;
1662 
1663 	if (!device->bitmap) {
1664 		if (drbd_bm_init(device)) {
1665 			retcode = ERR_NOMEM;
1666 			goto force_diskless_dec;
1667 		}
1668 	}
1669 
1670 	if (device->state.conn < C_CONNECTED &&
1671 	    device->state.role == R_PRIMARY && device->ed_uuid &&
1672 	    (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1673 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1674 		    (unsigned long long)device->ed_uuid);
1675 		retcode = ERR_DATA_NOT_CURRENT;
1676 		goto force_diskless_dec;
1677 	}
1678 
1679 	/* Since we are diskless, fix the activity log first... */
1680 	if (drbd_check_al_size(device, new_disk_conf)) {
1681 		retcode = ERR_NOMEM;
1682 		goto force_diskless_dec;
1683 	}
1684 
1685 	/* Prevent shrinking of consistent devices ! */
1686 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1687 	    drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1688 		drbd_warn(device, "refusing to truncate a consistent device\n");
1689 		retcode = ERR_DISK_TOO_SMALL;
1690 		goto force_diskless_dec;
1691 	}
1692 
1693 	/* Reset the "barriers don't work" bits here, then force meta data to
1694 	 * be written, to ensure we determine if barriers are supported. */
1695 	if (new_disk_conf->md_flushes)
1696 		clear_bit(MD_NO_FUA, &device->flags);
1697 	else
1698 		set_bit(MD_NO_FUA, &device->flags);
1699 
1700 	/* Point of no return reached.
1701 	 * Devices and memory are no longer released by error cleanup below.
1702 	 * now device takes over responsibility, and the state engine should
1703 	 * clean it up somewhere.  */
1704 	D_ASSERT(device, device->ldev == NULL);
1705 	device->ldev = nbc;
1706 	device->resync = resync_lru;
1707 	device->rs_plan_s = new_plan;
1708 	nbc = NULL;
1709 	resync_lru = NULL;
1710 	new_disk_conf = NULL;
1711 	new_plan = NULL;
1712 
1713 	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1714 
1715 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1716 		set_bit(CRASHED_PRIMARY, &device->flags);
1717 	else
1718 		clear_bit(CRASHED_PRIMARY, &device->flags);
1719 
1720 	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1721 	    !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1722 		set_bit(CRASHED_PRIMARY, &device->flags);
1723 
1724 	device->send_cnt = 0;
1725 	device->recv_cnt = 0;
1726 	device->read_cnt = 0;
1727 	device->writ_cnt = 0;
1728 
1729 	drbd_reconsider_max_bio_size(device);
1730 
1731 	/* If I am currently not R_PRIMARY,
1732 	 * but meta data primary indicator is set,
1733 	 * I just now recover from a hard crash,
1734 	 * and have been R_PRIMARY before that crash.
1735 	 *
1736 	 * Now, if I had no connection before that crash
1737 	 * (have been degraded R_PRIMARY), chances are that
1738 	 * I won't find my peer now either.
1739 	 *
1740 	 * In that case, and _only_ in that case,
1741 	 * we use the degr-wfc-timeout instead of the default,
1742 	 * so we can automatically recover from a crash of a
1743 	 * degraded but active "cluster" after a certain timeout.
1744 	 */
1745 	clear_bit(USE_DEGR_WFC_T, &device->flags);
1746 	if (device->state.role != R_PRIMARY &&
1747 	     drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1748 	    !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1749 		set_bit(USE_DEGR_WFC_T, &device->flags);
1750 
1751 	dd = drbd_determine_dev_size(device, 0, NULL);
1752 	if (dd <= DS_ERROR) {
1753 		retcode = ERR_NOMEM_BITMAP;
1754 		goto force_diskless_dec;
1755 	} else if (dd == DS_GREW)
1756 		set_bit(RESYNC_AFTER_NEG, &device->flags);
1757 
1758 	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1759 	    (test_bit(CRASHED_PRIMARY, &device->flags) &&
1760 	     drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1761 		drbd_info(device, "Assuming that all blocks are out of sync "
1762 		     "(aka FullSync)\n");
1763 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1764 			"set_n_write from attaching", BM_LOCKED_MASK)) {
1765 			retcode = ERR_IO_MD_DISK;
1766 			goto force_diskless_dec;
1767 		}
1768 	} else {
1769 		if (drbd_bitmap_io(device, &drbd_bm_read,
1770 			"read from attaching", BM_LOCKED_MASK)) {
1771 			retcode = ERR_IO_MD_DISK;
1772 			goto force_diskless_dec;
1773 		}
1774 	}
1775 
1776 	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1777 		drbd_suspend_al(device); /* IO is still suspended here... */
1778 
1779 	spin_lock_irq(&device->resource->req_lock);
1780 	os = drbd_read_state(device);
1781 	ns = os;
1782 	/* If MDF_CONSISTENT is not set go into inconsistent state,
1783 	   otherwise investigate MDF_WasUpToDate...
1784 	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1785 	   otherwise into D_CONSISTENT state.
1786 	*/
1787 	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1788 		if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1789 			ns.disk = D_CONSISTENT;
1790 		else
1791 			ns.disk = D_OUTDATED;
1792 	} else {
1793 		ns.disk = D_INCONSISTENT;
1794 	}
1795 
1796 	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1797 		ns.pdsk = D_OUTDATED;
1798 
1799 	rcu_read_lock();
1800 	if (ns.disk == D_CONSISTENT &&
1801 	    (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1802 		ns.disk = D_UP_TO_DATE;
1803 
1804 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1805 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1806 	   this point, because drbd_request_state() modifies these
1807 	   flags. */
1808 
1809 	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1810 		device->ldev->md.flags &= ~MDF_AL_DISABLED;
1811 	else
1812 		device->ldev->md.flags |= MDF_AL_DISABLED;
1813 
1814 	rcu_read_unlock();
1815 
1816 	/* In case we are C_CONNECTED postpone any decision on the new disk
1817 	   state after the negotiation phase. */
1818 	if (device->state.conn == C_CONNECTED) {
1819 		device->new_state_tmp.i = ns.i;
1820 		ns.i = os.i;
1821 		ns.disk = D_NEGOTIATING;
1822 
1823 		/* We expect to receive up-to-date UUIDs soon.
1824 		   To avoid a race in receive_state, free p_uuid while
1825 		   holding req_lock. I.e. atomic with the state change */
1826 		kfree(device->p_uuid);
1827 		device->p_uuid = NULL;
1828 	}
1829 
1830 	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1831 	spin_unlock_irq(&device->resource->req_lock);
1832 
1833 	if (rv < SS_SUCCESS)
1834 		goto force_diskless_dec;
1835 
1836 	mod_timer(&device->request_timer, jiffies + HZ);
1837 
1838 	if (device->state.role == R_PRIMARY)
1839 		device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1840 	else
1841 		device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1842 
1843 	drbd_md_mark_dirty(device);
1844 	drbd_md_sync(device);
1845 
1846 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1847 	put_ldev(device);
1848 	conn_reconfig_done(first_peer_device(device)->connection);
1849 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1850 	drbd_adm_finish(&adm_ctx, info, retcode);
1851 	return 0;
1852 
1853  force_diskless_dec:
1854 	put_ldev(device);
1855  force_diskless:
1856 	drbd_force_state(device, NS(disk, D_DISKLESS));
1857 	drbd_md_sync(device);
1858  fail:
1859 	conn_reconfig_done(first_peer_device(device)->connection);
1860 	if (nbc) {
1861 		if (nbc->backing_bdev)
1862 			blkdev_put(nbc->backing_bdev,
1863 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1864 		if (nbc->md_bdev)
1865 			blkdev_put(nbc->md_bdev,
1866 				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1867 		kfree(nbc);
1868 	}
1869 	kfree(new_disk_conf);
1870 	lc_destroy(resync_lru);
1871 	kfree(new_plan);
1872 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1873  finish:
1874 	drbd_adm_finish(&adm_ctx, info, retcode);
1875 	return 0;
1876 }
1877 
1878 static int adm_detach(struct drbd_device *device, int force)
1879 {
1880 	enum drbd_state_rv retcode;
1881 	int ret;
1882 
1883 	if (force) {
1884 		set_bit(FORCE_DETACH, &device->flags);
1885 		drbd_force_state(device, NS(disk, D_FAILED));
1886 		retcode = SS_SUCCESS;
1887 		goto out;
1888 	}
1889 
1890 	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1891 	drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1892 	retcode = drbd_request_state(device, NS(disk, D_FAILED));
1893 	drbd_md_put_buffer(device);
1894 	/* D_FAILED will transition to DISKLESS. */
1895 	ret = wait_event_interruptible(device->misc_wait,
1896 			device->state.disk != D_FAILED);
1897 	drbd_resume_io(device);
1898 	if ((int)retcode == (int)SS_IS_DISKLESS)
1899 		retcode = SS_NOTHING_TO_DO;
1900 	if (ret)
1901 		retcode = ERR_INTR;
1902 out:
1903 	return retcode;
1904 }
1905 
1906 /* Detaching the disk is a process in multiple stages.  First we need to lock
1907  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1908  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1909  * internal references as well.
1910  * Only then we have finally detached. */
1911 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1912 {
1913 	struct drbd_config_context adm_ctx;
1914 	enum drbd_ret_code retcode;
1915 	struct detach_parms parms = { };
1916 	int err;
1917 
1918 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1919 	if (!adm_ctx.reply_skb)
1920 		return retcode;
1921 	if (retcode != NO_ERROR)
1922 		goto out;
1923 
1924 	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1925 		err = detach_parms_from_attrs(&parms, info);
1926 		if (err) {
1927 			retcode = ERR_MANDATORY_TAG;
1928 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1929 			goto out;
1930 		}
1931 	}
1932 
1933 	mutex_lock(&adm_ctx.resource->adm_mutex);
1934 	retcode = adm_detach(adm_ctx.device, parms.force_detach);
1935 	mutex_unlock(&adm_ctx.resource->adm_mutex);
1936 out:
1937 	drbd_adm_finish(&adm_ctx, info, retcode);
1938 	return 0;
1939 }
1940 
1941 static bool conn_resync_running(struct drbd_connection *connection)
1942 {
1943 	struct drbd_peer_device *peer_device;
1944 	bool rv = false;
1945 	int vnr;
1946 
1947 	rcu_read_lock();
1948 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1949 		struct drbd_device *device = peer_device->device;
1950 		if (device->state.conn == C_SYNC_SOURCE ||
1951 		    device->state.conn == C_SYNC_TARGET ||
1952 		    device->state.conn == C_PAUSED_SYNC_S ||
1953 		    device->state.conn == C_PAUSED_SYNC_T) {
1954 			rv = true;
1955 			break;
1956 		}
1957 	}
1958 	rcu_read_unlock();
1959 
1960 	return rv;
1961 }
1962 
1963 static bool conn_ov_running(struct drbd_connection *connection)
1964 {
1965 	struct drbd_peer_device *peer_device;
1966 	bool rv = false;
1967 	int vnr;
1968 
1969 	rcu_read_lock();
1970 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1971 		struct drbd_device *device = peer_device->device;
1972 		if (device->state.conn == C_VERIFY_S ||
1973 		    device->state.conn == C_VERIFY_T) {
1974 			rv = true;
1975 			break;
1976 		}
1977 	}
1978 	rcu_read_unlock();
1979 
1980 	return rv;
1981 }
1982 
1983 static enum drbd_ret_code
1984 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1985 {
1986 	struct drbd_peer_device *peer_device;
1987 	int i;
1988 
1989 	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1990 		if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1991 			return ERR_NEED_APV_100;
1992 
1993 		if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1994 			return ERR_NEED_APV_100;
1995 
1996 		if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1997 			return ERR_NEED_APV_100;
1998 	}
1999 
2000 	if (!new_net_conf->two_primaries &&
2001 	    conn_highest_role(connection) == R_PRIMARY &&
2002 	    conn_highest_peer(connection) == R_PRIMARY)
2003 		return ERR_NEED_ALLOW_TWO_PRI;
2004 
2005 	if (new_net_conf->two_primaries &&
2006 	    (new_net_conf->wire_protocol != DRBD_PROT_C))
2007 		return ERR_NOT_PROTO_C;
2008 
2009 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2010 		struct drbd_device *device = peer_device->device;
2011 		if (get_ldev(device)) {
2012 			enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2013 			put_ldev(device);
2014 			if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2015 				return ERR_STONITH_AND_PROT_A;
2016 		}
2017 		if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2018 			return ERR_DISCARD_IMPOSSIBLE;
2019 	}
2020 
2021 	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2022 		return ERR_CONG_NOT_PROTO_A;
2023 
2024 	return NO_ERROR;
2025 }
2026 
2027 static enum drbd_ret_code
2028 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2029 {
2030 	static enum drbd_ret_code rv;
2031 	struct drbd_peer_device *peer_device;
2032 	int i;
2033 
2034 	rcu_read_lock();
2035 	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2036 	rcu_read_unlock();
2037 
2038 	/* connection->volumes protected by genl_lock() here */
2039 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2040 		struct drbd_device *device = peer_device->device;
2041 		if (!device->bitmap) {
2042 			if (drbd_bm_init(device))
2043 				return ERR_NOMEM;
2044 		}
2045 	}
2046 
2047 	return rv;
2048 }
2049 
2050 struct crypto {
2051 	struct crypto_hash *verify_tfm;
2052 	struct crypto_hash *csums_tfm;
2053 	struct crypto_hash *cram_hmac_tfm;
2054 	struct crypto_hash *integrity_tfm;
2055 };
2056 
2057 static int
2058 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2059 {
2060 	if (!tfm_name[0])
2061 		return NO_ERROR;
2062 
2063 	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2064 	if (IS_ERR(*tfm)) {
2065 		*tfm = NULL;
2066 		return err_alg;
2067 	}
2068 
2069 	return NO_ERROR;
2070 }
2071 
2072 static enum drbd_ret_code
2073 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2074 {
2075 	char hmac_name[CRYPTO_MAX_ALG_NAME];
2076 	enum drbd_ret_code rv;
2077 
2078 	rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2079 		       ERR_CSUMS_ALG);
2080 	if (rv != NO_ERROR)
2081 		return rv;
2082 	rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2083 		       ERR_VERIFY_ALG);
2084 	if (rv != NO_ERROR)
2085 		return rv;
2086 	rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2087 		       ERR_INTEGRITY_ALG);
2088 	if (rv != NO_ERROR)
2089 		return rv;
2090 	if (new_net_conf->cram_hmac_alg[0] != 0) {
2091 		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2092 			 new_net_conf->cram_hmac_alg);
2093 
2094 		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2095 			       ERR_AUTH_ALG);
2096 	}
2097 
2098 	return rv;
2099 }
2100 
2101 static void free_crypto(struct crypto *crypto)
2102 {
2103 	crypto_free_hash(crypto->cram_hmac_tfm);
2104 	crypto_free_hash(crypto->integrity_tfm);
2105 	crypto_free_hash(crypto->csums_tfm);
2106 	crypto_free_hash(crypto->verify_tfm);
2107 }
2108 
2109 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2110 {
2111 	struct drbd_config_context adm_ctx;
2112 	enum drbd_ret_code retcode;
2113 	struct drbd_connection *connection;
2114 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2115 	int err;
2116 	int ovr; /* online verify running */
2117 	int rsr; /* re-sync running */
2118 	struct crypto crypto = { };
2119 
2120 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2121 	if (!adm_ctx.reply_skb)
2122 		return retcode;
2123 	if (retcode != NO_ERROR)
2124 		goto finish;
2125 
2126 	connection = adm_ctx.connection;
2127 	mutex_lock(&adm_ctx.resource->adm_mutex);
2128 
2129 	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2130 	if (!new_net_conf) {
2131 		retcode = ERR_NOMEM;
2132 		goto out;
2133 	}
2134 
2135 	conn_reconfig_start(connection);
2136 
2137 	mutex_lock(&connection->data.mutex);
2138 	mutex_lock(&connection->resource->conf_update);
2139 	old_net_conf = connection->net_conf;
2140 
2141 	if (!old_net_conf) {
2142 		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2143 		retcode = ERR_INVALID_REQUEST;
2144 		goto fail;
2145 	}
2146 
2147 	*new_net_conf = *old_net_conf;
2148 	if (should_set_defaults(info))
2149 		set_net_conf_defaults(new_net_conf);
2150 
2151 	err = net_conf_from_attrs_for_change(new_net_conf, info);
2152 	if (err && err != -ENOMSG) {
2153 		retcode = ERR_MANDATORY_TAG;
2154 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2155 		goto fail;
2156 	}
2157 
2158 	retcode = check_net_options(connection, new_net_conf);
2159 	if (retcode != NO_ERROR)
2160 		goto fail;
2161 
2162 	/* re-sync running */
2163 	rsr = conn_resync_running(connection);
2164 	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2165 		retcode = ERR_CSUMS_RESYNC_RUNNING;
2166 		goto fail;
2167 	}
2168 
2169 	/* online verify running */
2170 	ovr = conn_ov_running(connection);
2171 	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2172 		retcode = ERR_VERIFY_RUNNING;
2173 		goto fail;
2174 	}
2175 
2176 	retcode = alloc_crypto(&crypto, new_net_conf);
2177 	if (retcode != NO_ERROR)
2178 		goto fail;
2179 
2180 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2181 
2182 	if (!rsr) {
2183 		crypto_free_hash(connection->csums_tfm);
2184 		connection->csums_tfm = crypto.csums_tfm;
2185 		crypto.csums_tfm = NULL;
2186 	}
2187 	if (!ovr) {
2188 		crypto_free_hash(connection->verify_tfm);
2189 		connection->verify_tfm = crypto.verify_tfm;
2190 		crypto.verify_tfm = NULL;
2191 	}
2192 
2193 	crypto_free_hash(connection->integrity_tfm);
2194 	connection->integrity_tfm = crypto.integrity_tfm;
2195 	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2196 		/* Do this without trying to take connection->data.mutex again.  */
2197 		__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2198 
2199 	crypto_free_hash(connection->cram_hmac_tfm);
2200 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2201 
2202 	mutex_unlock(&connection->resource->conf_update);
2203 	mutex_unlock(&connection->data.mutex);
2204 	synchronize_rcu();
2205 	kfree(old_net_conf);
2206 
2207 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
2208 		struct drbd_peer_device *peer_device;
2209 		int vnr;
2210 
2211 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2212 			drbd_send_sync_param(peer_device);
2213 	}
2214 
2215 	goto done;
2216 
2217  fail:
2218 	mutex_unlock(&connection->resource->conf_update);
2219 	mutex_unlock(&connection->data.mutex);
2220 	free_crypto(&crypto);
2221 	kfree(new_net_conf);
2222  done:
2223 	conn_reconfig_done(connection);
2224  out:
2225 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2226  finish:
2227 	drbd_adm_finish(&adm_ctx, info, retcode);
2228 	return 0;
2229 }
2230 
2231 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2232 {
2233 	struct drbd_config_context adm_ctx;
2234 	struct drbd_peer_device *peer_device;
2235 	struct net_conf *old_net_conf, *new_net_conf = NULL;
2236 	struct crypto crypto = { };
2237 	struct drbd_resource *resource;
2238 	struct drbd_connection *connection;
2239 	enum drbd_ret_code retcode;
2240 	int i;
2241 	int err;
2242 
2243 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2244 
2245 	if (!adm_ctx.reply_skb)
2246 		return retcode;
2247 	if (retcode != NO_ERROR)
2248 		goto out;
2249 	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2250 		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2251 		retcode = ERR_INVALID_REQUEST;
2252 		goto out;
2253 	}
2254 
2255 	/* No need for _rcu here. All reconfiguration is
2256 	 * strictly serialized on genl_lock(). We are protected against
2257 	 * concurrent reconfiguration/addition/deletion */
2258 	for_each_resource(resource, &drbd_resources) {
2259 		for_each_connection(connection, resource) {
2260 			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2261 			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2262 				    connection->my_addr_len)) {
2263 				retcode = ERR_LOCAL_ADDR;
2264 				goto out;
2265 			}
2266 
2267 			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2268 			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2269 				    connection->peer_addr_len)) {
2270 				retcode = ERR_PEER_ADDR;
2271 				goto out;
2272 			}
2273 		}
2274 	}
2275 
2276 	mutex_lock(&adm_ctx.resource->adm_mutex);
2277 	connection = first_connection(adm_ctx.resource);
2278 	conn_reconfig_start(connection);
2279 
2280 	if (connection->cstate > C_STANDALONE) {
2281 		retcode = ERR_NET_CONFIGURED;
2282 		goto fail;
2283 	}
2284 
2285 	/* allocation not in the IO path, drbdsetup / netlink process context */
2286 	new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2287 	if (!new_net_conf) {
2288 		retcode = ERR_NOMEM;
2289 		goto fail;
2290 	}
2291 
2292 	set_net_conf_defaults(new_net_conf);
2293 
2294 	err = net_conf_from_attrs(new_net_conf, info);
2295 	if (err && err != -ENOMSG) {
2296 		retcode = ERR_MANDATORY_TAG;
2297 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2298 		goto fail;
2299 	}
2300 
2301 	retcode = check_net_options(connection, new_net_conf);
2302 	if (retcode != NO_ERROR)
2303 		goto fail;
2304 
2305 	retcode = alloc_crypto(&crypto, new_net_conf);
2306 	if (retcode != NO_ERROR)
2307 		goto fail;
2308 
2309 	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2310 
2311 	drbd_flush_workqueue(&connection->sender_work);
2312 
2313 	mutex_lock(&adm_ctx.resource->conf_update);
2314 	old_net_conf = connection->net_conf;
2315 	if (old_net_conf) {
2316 		retcode = ERR_NET_CONFIGURED;
2317 		mutex_unlock(&adm_ctx.resource->conf_update);
2318 		goto fail;
2319 	}
2320 	rcu_assign_pointer(connection->net_conf, new_net_conf);
2321 
2322 	conn_free_crypto(connection);
2323 	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2324 	connection->integrity_tfm = crypto.integrity_tfm;
2325 	connection->csums_tfm = crypto.csums_tfm;
2326 	connection->verify_tfm = crypto.verify_tfm;
2327 
2328 	connection->my_addr_len = nla_len(adm_ctx.my_addr);
2329 	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2330 	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2331 	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2332 
2333 	mutex_unlock(&adm_ctx.resource->conf_update);
2334 
2335 	rcu_read_lock();
2336 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2337 		struct drbd_device *device = peer_device->device;
2338 		device->send_cnt = 0;
2339 		device->recv_cnt = 0;
2340 	}
2341 	rcu_read_unlock();
2342 
2343 	retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2344 
2345 	conn_reconfig_done(connection);
2346 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2347 	drbd_adm_finish(&adm_ctx, info, retcode);
2348 	return 0;
2349 
2350 fail:
2351 	free_crypto(&crypto);
2352 	kfree(new_net_conf);
2353 
2354 	conn_reconfig_done(connection);
2355 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2356 out:
2357 	drbd_adm_finish(&adm_ctx, info, retcode);
2358 	return 0;
2359 }
2360 
2361 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2362 {
2363 	enum drbd_state_rv rv;
2364 
2365 	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2366 			force ? CS_HARD : 0);
2367 
2368 	switch (rv) {
2369 	case SS_NOTHING_TO_DO:
2370 		break;
2371 	case SS_ALREADY_STANDALONE:
2372 		return SS_SUCCESS;
2373 	case SS_PRIMARY_NOP:
2374 		/* Our state checking code wants to see the peer outdated. */
2375 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2376 
2377 		if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2378 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2379 
2380 		break;
2381 	case SS_CW_FAILED_BY_PEER:
2382 		/* The peer probably wants to see us outdated. */
2383 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2384 							disk, D_OUTDATED), 0);
2385 		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2386 			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2387 					CS_HARD);
2388 		}
2389 		break;
2390 	default:;
2391 		/* no special handling necessary */
2392 	}
2393 
2394 	if (rv >= SS_SUCCESS) {
2395 		enum drbd_state_rv rv2;
2396 		/* No one else can reconfigure the network while I am here.
2397 		 * The state handling only uses drbd_thread_stop_nowait(),
2398 		 * we want to really wait here until the receiver is no more.
2399 		 */
2400 		drbd_thread_stop(&connection->receiver);
2401 
2402 		/* Race breaker.  This additional state change request may be
2403 		 * necessary, if this was a forced disconnect during a receiver
2404 		 * restart.  We may have "killed" the receiver thread just
2405 		 * after drbd_receiver() returned.  Typically, we should be
2406 		 * C_STANDALONE already, now, and this becomes a no-op.
2407 		 */
2408 		rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2409 				CS_VERBOSE | CS_HARD);
2410 		if (rv2 < SS_SUCCESS)
2411 			drbd_err(connection,
2412 				"unexpected rv2=%d in conn_try_disconnect()\n",
2413 				rv2);
2414 	}
2415 	return rv;
2416 }
2417 
2418 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2419 {
2420 	struct drbd_config_context adm_ctx;
2421 	struct disconnect_parms parms;
2422 	struct drbd_connection *connection;
2423 	enum drbd_state_rv rv;
2424 	enum drbd_ret_code retcode;
2425 	int err;
2426 
2427 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2428 	if (!adm_ctx.reply_skb)
2429 		return retcode;
2430 	if (retcode != NO_ERROR)
2431 		goto fail;
2432 
2433 	connection = adm_ctx.connection;
2434 	memset(&parms, 0, sizeof(parms));
2435 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2436 		err = disconnect_parms_from_attrs(&parms, info);
2437 		if (err) {
2438 			retcode = ERR_MANDATORY_TAG;
2439 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2440 			goto fail;
2441 		}
2442 	}
2443 
2444 	mutex_lock(&adm_ctx.resource->adm_mutex);
2445 	rv = conn_try_disconnect(connection, parms.force_disconnect);
2446 	if (rv < SS_SUCCESS)
2447 		retcode = rv;  /* FIXME: Type mismatch. */
2448 	else
2449 		retcode = NO_ERROR;
2450 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2451  fail:
2452 	drbd_adm_finish(&adm_ctx, info, retcode);
2453 	return 0;
2454 }
2455 
2456 void resync_after_online_grow(struct drbd_device *device)
2457 {
2458 	int iass; /* I am sync source */
2459 
2460 	drbd_info(device, "Resync of new storage after online grow\n");
2461 	if (device->state.role != device->state.peer)
2462 		iass = (device->state.role == R_PRIMARY);
2463 	else
2464 		iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2465 
2466 	if (iass)
2467 		drbd_start_resync(device, C_SYNC_SOURCE);
2468 	else
2469 		_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2470 }
2471 
2472 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2473 {
2474 	struct drbd_config_context adm_ctx;
2475 	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2476 	struct resize_parms rs;
2477 	struct drbd_device *device;
2478 	enum drbd_ret_code retcode;
2479 	enum determine_dev_size dd;
2480 	bool change_al_layout = false;
2481 	enum dds_flags ddsf;
2482 	sector_t u_size;
2483 	int err;
2484 
2485 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2486 	if (!adm_ctx.reply_skb)
2487 		return retcode;
2488 	if (retcode != NO_ERROR)
2489 		goto finish;
2490 
2491 	mutex_lock(&adm_ctx.resource->adm_mutex);
2492 	device = adm_ctx.device;
2493 	if (!get_ldev(device)) {
2494 		retcode = ERR_NO_DISK;
2495 		goto fail;
2496 	}
2497 
2498 	memset(&rs, 0, sizeof(struct resize_parms));
2499 	rs.al_stripes = device->ldev->md.al_stripes;
2500 	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2501 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2502 		err = resize_parms_from_attrs(&rs, info);
2503 		if (err) {
2504 			retcode = ERR_MANDATORY_TAG;
2505 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2506 			goto fail_ldev;
2507 		}
2508 	}
2509 
2510 	if (device->state.conn > C_CONNECTED) {
2511 		retcode = ERR_RESIZE_RESYNC;
2512 		goto fail_ldev;
2513 	}
2514 
2515 	if (device->state.role == R_SECONDARY &&
2516 	    device->state.peer == R_SECONDARY) {
2517 		retcode = ERR_NO_PRIMARY;
2518 		goto fail_ldev;
2519 	}
2520 
2521 	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2522 		retcode = ERR_NEED_APV_93;
2523 		goto fail_ldev;
2524 	}
2525 
2526 	rcu_read_lock();
2527 	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2528 	rcu_read_unlock();
2529 	if (u_size != (sector_t)rs.resize_size) {
2530 		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2531 		if (!new_disk_conf) {
2532 			retcode = ERR_NOMEM;
2533 			goto fail_ldev;
2534 		}
2535 	}
2536 
2537 	if (device->ldev->md.al_stripes != rs.al_stripes ||
2538 	    device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2539 		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2540 
2541 		if (al_size_k > (16 * 1024 * 1024)) {
2542 			retcode = ERR_MD_LAYOUT_TOO_BIG;
2543 			goto fail_ldev;
2544 		}
2545 
2546 		if (al_size_k < MD_32kB_SECT/2) {
2547 			retcode = ERR_MD_LAYOUT_TOO_SMALL;
2548 			goto fail_ldev;
2549 		}
2550 
2551 		if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2552 			retcode = ERR_MD_LAYOUT_CONNECTED;
2553 			goto fail_ldev;
2554 		}
2555 
2556 		change_al_layout = true;
2557 	}
2558 
2559 	if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2560 		device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2561 
2562 	if (new_disk_conf) {
2563 		mutex_lock(&device->resource->conf_update);
2564 		old_disk_conf = device->ldev->disk_conf;
2565 		*new_disk_conf = *old_disk_conf;
2566 		new_disk_conf->disk_size = (sector_t)rs.resize_size;
2567 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2568 		mutex_unlock(&device->resource->conf_update);
2569 		synchronize_rcu();
2570 		kfree(old_disk_conf);
2571 	}
2572 
2573 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2574 	dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2575 	drbd_md_sync(device);
2576 	put_ldev(device);
2577 	if (dd == DS_ERROR) {
2578 		retcode = ERR_NOMEM_BITMAP;
2579 		goto fail;
2580 	} else if (dd == DS_ERROR_SPACE_MD) {
2581 		retcode = ERR_MD_LAYOUT_NO_FIT;
2582 		goto fail;
2583 	} else if (dd == DS_ERROR_SHRINK) {
2584 		retcode = ERR_IMPLICIT_SHRINK;
2585 		goto fail;
2586 	}
2587 
2588 	if (device->state.conn == C_CONNECTED) {
2589 		if (dd == DS_GREW)
2590 			set_bit(RESIZE_PENDING, &device->flags);
2591 
2592 		drbd_send_uuids(first_peer_device(device));
2593 		drbd_send_sizes(first_peer_device(device), 1, ddsf);
2594 	}
2595 
2596  fail:
2597 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2598  finish:
2599 	drbd_adm_finish(&adm_ctx, info, retcode);
2600 	return 0;
2601 
2602  fail_ldev:
2603 	put_ldev(device);
2604 	goto fail;
2605 }
2606 
2607 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2608 {
2609 	struct drbd_config_context adm_ctx;
2610 	enum drbd_ret_code retcode;
2611 	struct res_opts res_opts;
2612 	int err;
2613 
2614 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2615 	if (!adm_ctx.reply_skb)
2616 		return retcode;
2617 	if (retcode != NO_ERROR)
2618 		goto fail;
2619 
2620 	res_opts = adm_ctx.resource->res_opts;
2621 	if (should_set_defaults(info))
2622 		set_res_opts_defaults(&res_opts);
2623 
2624 	err = res_opts_from_attrs(&res_opts, info);
2625 	if (err && err != -ENOMSG) {
2626 		retcode = ERR_MANDATORY_TAG;
2627 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2628 		goto fail;
2629 	}
2630 
2631 	mutex_lock(&adm_ctx.resource->adm_mutex);
2632 	err = set_resource_options(adm_ctx.resource, &res_opts);
2633 	if (err) {
2634 		retcode = ERR_INVALID_REQUEST;
2635 		if (err == -ENOMEM)
2636 			retcode = ERR_NOMEM;
2637 	}
2638 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2639 
2640 fail:
2641 	drbd_adm_finish(&adm_ctx, info, retcode);
2642 	return 0;
2643 }
2644 
2645 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2646 {
2647 	struct drbd_config_context adm_ctx;
2648 	struct drbd_device *device;
2649 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2650 
2651 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2652 	if (!adm_ctx.reply_skb)
2653 		return retcode;
2654 	if (retcode != NO_ERROR)
2655 		goto out;
2656 
2657 	mutex_lock(&adm_ctx.resource->adm_mutex);
2658 	device = adm_ctx.device;
2659 
2660 	/* If there is still bitmap IO pending, probably because of a previous
2661 	 * resync just being finished, wait for it before requesting a new resync.
2662 	 * Also wait for it's after_state_ch(). */
2663 	drbd_suspend_io(device);
2664 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2665 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2666 
2667 	/* If we happen to be C_STANDALONE R_SECONDARY, just change to
2668 	 * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2669 	 * try to start a resync handshake as sync target for full sync.
2670 	 */
2671 	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2672 		retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2673 		if (retcode >= SS_SUCCESS) {
2674 			if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2675 				"set_n_write from invalidate", BM_LOCKED_MASK))
2676 				retcode = ERR_IO_MD_DISK;
2677 		}
2678 	} else
2679 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2680 	drbd_resume_io(device);
2681 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2682 out:
2683 	drbd_adm_finish(&adm_ctx, info, retcode);
2684 	return 0;
2685 }
2686 
2687 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2688 		union drbd_state mask, union drbd_state val)
2689 {
2690 	struct drbd_config_context adm_ctx;
2691 	enum drbd_ret_code retcode;
2692 
2693 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2694 	if (!adm_ctx.reply_skb)
2695 		return retcode;
2696 	if (retcode != NO_ERROR)
2697 		goto out;
2698 
2699 	mutex_lock(&adm_ctx.resource->adm_mutex);
2700 	retcode = drbd_request_state(adm_ctx.device, mask, val);
2701 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2702 out:
2703 	drbd_adm_finish(&adm_ctx, info, retcode);
2704 	return 0;
2705 }
2706 
2707 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2708 {
2709 	int rv;
2710 
2711 	rv = drbd_bmio_set_n_write(device);
2712 	drbd_suspend_al(device);
2713 	return rv;
2714 }
2715 
2716 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2717 {
2718 	struct drbd_config_context adm_ctx;
2719 	int retcode; /* drbd_ret_code, drbd_state_rv */
2720 	struct drbd_device *device;
2721 
2722 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2723 	if (!adm_ctx.reply_skb)
2724 		return retcode;
2725 	if (retcode != NO_ERROR)
2726 		goto out;
2727 
2728 	mutex_lock(&adm_ctx.resource->adm_mutex);
2729 	device = adm_ctx.device;
2730 
2731 	/* If there is still bitmap IO pending, probably because of a previous
2732 	 * resync just being finished, wait for it before requesting a new resync.
2733 	 * Also wait for it's after_state_ch(). */
2734 	drbd_suspend_io(device);
2735 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2736 	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2737 
2738 	/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2739 	 * in the bitmap.  Otherwise, try to start a resync handshake
2740 	 * as sync source for full sync.
2741 	 */
2742 	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2743 		/* The peer will get a resync upon connect anyways. Just make that
2744 		   into a full resync. */
2745 		retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2746 		if (retcode >= SS_SUCCESS) {
2747 			if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2748 				"set_n_write from invalidate_peer",
2749 				BM_LOCKED_SET_ALLOWED))
2750 				retcode = ERR_IO_MD_DISK;
2751 		}
2752 	} else
2753 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2754 	drbd_resume_io(device);
2755 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2756 out:
2757 	drbd_adm_finish(&adm_ctx, info, retcode);
2758 	return 0;
2759 }
2760 
2761 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2762 {
2763 	struct drbd_config_context adm_ctx;
2764 	enum drbd_ret_code retcode;
2765 
2766 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2767 	if (!adm_ctx.reply_skb)
2768 		return retcode;
2769 	if (retcode != NO_ERROR)
2770 		goto out;
2771 
2772 	mutex_lock(&adm_ctx.resource->adm_mutex);
2773 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2774 		retcode = ERR_PAUSE_IS_SET;
2775 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2776 out:
2777 	drbd_adm_finish(&adm_ctx, info, retcode);
2778 	return 0;
2779 }
2780 
2781 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2782 {
2783 	struct drbd_config_context adm_ctx;
2784 	union drbd_dev_state s;
2785 	enum drbd_ret_code retcode;
2786 
2787 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2788 	if (!adm_ctx.reply_skb)
2789 		return retcode;
2790 	if (retcode != NO_ERROR)
2791 		goto out;
2792 
2793 	mutex_lock(&adm_ctx.resource->adm_mutex);
2794 	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2795 		s = adm_ctx.device->state;
2796 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2797 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2798 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2799 		} else {
2800 			retcode = ERR_PAUSE_IS_CLEAR;
2801 		}
2802 	}
2803 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2804 out:
2805 	drbd_adm_finish(&adm_ctx, info, retcode);
2806 	return 0;
2807 }
2808 
2809 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2810 {
2811 	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2812 }
2813 
2814 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2815 {
2816 	struct drbd_config_context adm_ctx;
2817 	struct drbd_device *device;
2818 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2819 
2820 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2821 	if (!adm_ctx.reply_skb)
2822 		return retcode;
2823 	if (retcode != NO_ERROR)
2824 		goto out;
2825 
2826 	mutex_lock(&adm_ctx.resource->adm_mutex);
2827 	device = adm_ctx.device;
2828 	if (test_bit(NEW_CUR_UUID, &device->flags)) {
2829 		drbd_uuid_new_current(device);
2830 		clear_bit(NEW_CUR_UUID, &device->flags);
2831 	}
2832 	drbd_suspend_io(device);
2833 	retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2834 	if (retcode == SS_SUCCESS) {
2835 		if (device->state.conn < C_CONNECTED)
2836 			tl_clear(first_peer_device(device)->connection);
2837 		if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2838 			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2839 	}
2840 	drbd_resume_io(device);
2841 	mutex_unlock(&adm_ctx.resource->adm_mutex);
2842 out:
2843 	drbd_adm_finish(&adm_ctx, info, retcode);
2844 	return 0;
2845 }
2846 
2847 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2848 {
2849 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2850 }
2851 
2852 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2853 				    struct drbd_resource *resource,
2854 				    struct drbd_connection *connection,
2855 				    struct drbd_device *device)
2856 {
2857 	struct nlattr *nla;
2858 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2859 	if (!nla)
2860 		goto nla_put_failure;
2861 	if (device &&
2862 	    nla_put_u32(skb, T_ctx_volume, device->vnr))
2863 		goto nla_put_failure;
2864 	if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2865 		goto nla_put_failure;
2866 	if (connection) {
2867 		if (connection->my_addr_len &&
2868 		    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2869 			goto nla_put_failure;
2870 		if (connection->peer_addr_len &&
2871 		    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2872 			goto nla_put_failure;
2873 	}
2874 	nla_nest_end(skb, nla);
2875 	return 0;
2876 
2877 nla_put_failure:
2878 	if (nla)
2879 		nla_nest_cancel(skb, nla);
2880 	return -EMSGSIZE;
2881 }
2882 
2883 /*
2884  * Return the connection of @resource if @resource has exactly one connection.
2885  */
2886 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2887 {
2888 	struct list_head *connections = &resource->connections;
2889 
2890 	if (list_empty(connections) || connections->next->next != connections)
2891 		return NULL;
2892 	return list_first_entry(&resource->connections, struct drbd_connection, connections);
2893 }
2894 
2895 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2896 		const struct sib_info *sib)
2897 {
2898 	struct drbd_resource *resource = device->resource;
2899 	struct state_info *si = NULL; /* for sizeof(si->member); */
2900 	struct nlattr *nla;
2901 	int got_ldev;
2902 	int err = 0;
2903 	int exclude_sensitive;
2904 
2905 	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2906 	 * to.  So we better exclude_sensitive information.
2907 	 *
2908 	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2909 	 * in the context of the requesting user process. Exclude sensitive
2910 	 * information, unless current has superuser.
2911 	 *
2912 	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2913 	 * relies on the current implementation of netlink_dump(), which
2914 	 * executes the dump callback successively from netlink_recvmsg(),
2915 	 * always in the context of the receiving process */
2916 	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2917 
2918 	got_ldev = get_ldev(device);
2919 
2920 	/* We need to add connection name and volume number information still.
2921 	 * Minor number is in drbd_genlmsghdr. */
2922 	if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2923 		goto nla_put_failure;
2924 
2925 	if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2926 		goto nla_put_failure;
2927 
2928 	rcu_read_lock();
2929 	if (got_ldev) {
2930 		struct disk_conf *disk_conf;
2931 
2932 		disk_conf = rcu_dereference(device->ldev->disk_conf);
2933 		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2934 	}
2935 	if (!err) {
2936 		struct net_conf *nc;
2937 
2938 		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2939 		if (nc)
2940 			err = net_conf_to_skb(skb, nc, exclude_sensitive);
2941 	}
2942 	rcu_read_unlock();
2943 	if (err)
2944 		goto nla_put_failure;
2945 
2946 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2947 	if (!nla)
2948 		goto nla_put_failure;
2949 	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2950 	    nla_put_u32(skb, T_current_state, device->state.i) ||
2951 	    nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2952 	    nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2953 	    nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2954 	    nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2955 	    nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2956 	    nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2957 	    nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2958 	    nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2959 	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2960 	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2961 	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2962 		goto nla_put_failure;
2963 
2964 	if (got_ldev) {
2965 		int err;
2966 
2967 		spin_lock_irq(&device->ldev->md.uuid_lock);
2968 		err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2969 		spin_unlock_irq(&device->ldev->md.uuid_lock);
2970 
2971 		if (err)
2972 			goto nla_put_failure;
2973 
2974 		if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2975 		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2976 		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2977 			goto nla_put_failure;
2978 		if (C_SYNC_SOURCE <= device->state.conn &&
2979 		    C_PAUSED_SYNC_T >= device->state.conn) {
2980 			if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2981 			    nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2982 				goto nla_put_failure;
2983 		}
2984 	}
2985 
2986 	if (sib) {
2987 		switch(sib->sib_reason) {
2988 		case SIB_SYNC_PROGRESS:
2989 		case SIB_GET_STATUS_REPLY:
2990 			break;
2991 		case SIB_STATE_CHANGE:
2992 			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2993 			    nla_put_u32(skb, T_new_state, sib->ns.i))
2994 				goto nla_put_failure;
2995 			break;
2996 		case SIB_HELPER_POST:
2997 			if (nla_put_u32(skb, T_helper_exit_code,
2998 					sib->helper_exit_code))
2999 				goto nla_put_failure;
3000 			/* fall through */
3001 		case SIB_HELPER_PRE:
3002 			if (nla_put_string(skb, T_helper, sib->helper_name))
3003 				goto nla_put_failure;
3004 			break;
3005 		}
3006 	}
3007 	nla_nest_end(skb, nla);
3008 
3009 	if (0)
3010 nla_put_failure:
3011 		err = -EMSGSIZE;
3012 	if (got_ldev)
3013 		put_ldev(device);
3014 	return err;
3015 }
3016 
3017 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3018 {
3019 	struct drbd_config_context adm_ctx;
3020 	enum drbd_ret_code retcode;
3021 	int err;
3022 
3023 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3024 	if (!adm_ctx.reply_skb)
3025 		return retcode;
3026 	if (retcode != NO_ERROR)
3027 		goto out;
3028 
3029 	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3030 	if (err) {
3031 		nlmsg_free(adm_ctx.reply_skb);
3032 		return err;
3033 	}
3034 out:
3035 	drbd_adm_finish(&adm_ctx, info, retcode);
3036 	return 0;
3037 }
3038 
3039 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3040 {
3041 	struct drbd_device *device;
3042 	struct drbd_genlmsghdr *dh;
3043 	struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3044 	struct drbd_resource *resource = NULL;
3045 	struct drbd_resource *tmp;
3046 	unsigned volume = cb->args[1];
3047 
3048 	/* Open coded, deferred, iteration:
3049 	 * for_each_resource_safe(resource, tmp, &drbd_resources) {
3050 	 *      connection = "first connection of resource or undefined";
3051 	 *	idr_for_each_entry(&resource->devices, device, i) {
3052 	 *	  ...
3053 	 *	}
3054 	 * }
3055 	 * where resource is cb->args[0];
3056 	 * and i is cb->args[1];
3057 	 *
3058 	 * cb->args[2] indicates if we shall loop over all resources,
3059 	 * or just dump all volumes of a single resource.
3060 	 *
3061 	 * This may miss entries inserted after this dump started,
3062 	 * or entries deleted before they are reached.
3063 	 *
3064 	 * We need to make sure the device won't disappear while
3065 	 * we are looking at it, and revalidate our iterators
3066 	 * on each iteration.
3067 	 */
3068 
3069 	/* synchronize with conn_create()/drbd_destroy_connection() */
3070 	rcu_read_lock();
3071 	/* revalidate iterator position */
3072 	for_each_resource_rcu(tmp, &drbd_resources) {
3073 		if (pos == NULL) {
3074 			/* first iteration */
3075 			pos = tmp;
3076 			resource = pos;
3077 			break;
3078 		}
3079 		if (tmp == pos) {
3080 			resource = pos;
3081 			break;
3082 		}
3083 	}
3084 	if (resource) {
3085 next_resource:
3086 		device = idr_get_next(&resource->devices, &volume);
3087 		if (!device) {
3088 			/* No more volumes to dump on this resource.
3089 			 * Advance resource iterator. */
3090 			pos = list_entry_rcu(resource->resources.next,
3091 					     struct drbd_resource, resources);
3092 			/* Did we dump any volume of this resource yet? */
3093 			if (volume != 0) {
3094 				/* If we reached the end of the list,
3095 				 * or only a single resource dump was requested,
3096 				 * we are done. */
3097 				if (&pos->resources == &drbd_resources || cb->args[2])
3098 					goto out;
3099 				volume = 0;
3100 				resource = pos;
3101 				goto next_resource;
3102 			}
3103 		}
3104 
3105 		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3106 				cb->nlh->nlmsg_seq, &drbd_genl_family,
3107 				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3108 		if (!dh)
3109 			goto out;
3110 
3111 		if (!device) {
3112 			/* This is a connection without a single volume.
3113 			 * Suprisingly enough, it may have a network
3114 			 * configuration. */
3115 			struct drbd_connection *connection;
3116 
3117 			dh->minor = -1U;
3118 			dh->ret_code = NO_ERROR;
3119 			connection = the_only_connection(resource);
3120 			if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3121 				goto cancel;
3122 			if (connection) {
3123 				struct net_conf *nc;
3124 
3125 				nc = rcu_dereference(connection->net_conf);
3126 				if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3127 					goto cancel;
3128 			}
3129 			goto done;
3130 		}
3131 
3132 		D_ASSERT(device, device->vnr == volume);
3133 		D_ASSERT(device, device->resource == resource);
3134 
3135 		dh->minor = device_to_minor(device);
3136 		dh->ret_code = NO_ERROR;
3137 
3138 		if (nla_put_status_info(skb, device, NULL)) {
3139 cancel:
3140 			genlmsg_cancel(skb, dh);
3141 			goto out;
3142 		}
3143 done:
3144 		genlmsg_end(skb, dh);
3145 	}
3146 
3147 out:
3148 	rcu_read_unlock();
3149 	/* where to start the next iteration */
3150 	cb->args[0] = (long)pos;
3151 	cb->args[1] = (pos == resource) ? volume + 1 : 0;
3152 
3153 	/* No more resources/volumes/minors found results in an empty skb.
3154 	 * Which will terminate the dump. */
3155         return skb->len;
3156 }
3157 
3158 /*
3159  * Request status of all resources, or of all volumes within a single resource.
3160  *
3161  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3162  * Which means we cannot use the family->attrbuf or other such members, because
3163  * dump is NOT protected by the genl_lock().  During dump, we only have access
3164  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3165  *
3166  * Once things are setup properly, we call into get_one_status().
3167  */
3168 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3169 {
3170 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3171 	struct nlattr *nla;
3172 	const char *resource_name;
3173 	struct drbd_resource *resource;
3174 	int maxtype;
3175 
3176 	/* Is this a followup call? */
3177 	if (cb->args[0]) {
3178 		/* ... of a single resource dump,
3179 		 * and the resource iterator has been advanced already? */
3180 		if (cb->args[2] && cb->args[2] != cb->args[0])
3181 			return 0; /* DONE. */
3182 		goto dump;
3183 	}
3184 
3185 	/* First call (from netlink_dump_start).  We need to figure out
3186 	 * which resource(s) the user wants us to dump. */
3187 	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3188 			nlmsg_attrlen(cb->nlh, hdrlen),
3189 			DRBD_NLA_CFG_CONTEXT);
3190 
3191 	/* No explicit context given.  Dump all. */
3192 	if (!nla)
3193 		goto dump;
3194 	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3195 	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3196 	if (IS_ERR(nla))
3197 		return PTR_ERR(nla);
3198 	/* context given, but no name present? */
3199 	if (!nla)
3200 		return -EINVAL;
3201 	resource_name = nla_data(nla);
3202 	if (!*resource_name)
3203 		return -ENODEV;
3204 	resource = drbd_find_resource(resource_name);
3205 	if (!resource)
3206 		return -ENODEV;
3207 
3208 	kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3209 
3210 	/* prime iterators, and set "filter" mode mark:
3211 	 * only dump this connection. */
3212 	cb->args[0] = (long)resource;
3213 	/* cb->args[1] = 0; passed in this way. */
3214 	cb->args[2] = (long)resource;
3215 
3216 dump:
3217 	return get_one_status(skb, cb);
3218 }
3219 
3220 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3221 {
3222 	struct drbd_config_context adm_ctx;
3223 	enum drbd_ret_code retcode;
3224 	struct timeout_parms tp;
3225 	int err;
3226 
3227 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3228 	if (!adm_ctx.reply_skb)
3229 		return retcode;
3230 	if (retcode != NO_ERROR)
3231 		goto out;
3232 
3233 	tp.timeout_type =
3234 		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3235 		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3236 		UT_DEFAULT;
3237 
3238 	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3239 	if (err) {
3240 		nlmsg_free(adm_ctx.reply_skb);
3241 		return err;
3242 	}
3243 out:
3244 	drbd_adm_finish(&adm_ctx, info, retcode);
3245 	return 0;
3246 }
3247 
3248 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3249 {
3250 	struct drbd_config_context adm_ctx;
3251 	struct drbd_device *device;
3252 	enum drbd_ret_code retcode;
3253 	struct start_ov_parms parms;
3254 
3255 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3256 	if (!adm_ctx.reply_skb)
3257 		return retcode;
3258 	if (retcode != NO_ERROR)
3259 		goto out;
3260 
3261 	device = adm_ctx.device;
3262 
3263 	/* resume from last known position, if possible */
3264 	parms.ov_start_sector = device->ov_start_sector;
3265 	parms.ov_stop_sector = ULLONG_MAX;
3266 	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3267 		int err = start_ov_parms_from_attrs(&parms, info);
3268 		if (err) {
3269 			retcode = ERR_MANDATORY_TAG;
3270 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3271 			goto out;
3272 		}
3273 	}
3274 	mutex_lock(&adm_ctx.resource->adm_mutex);
3275 
3276 	/* w_make_ov_request expects position to be aligned */
3277 	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3278 	device->ov_stop_sector = parms.ov_stop_sector;
3279 
3280 	/* If there is still bitmap IO pending, e.g. previous resync or verify
3281 	 * just being finished, wait for it before requesting a new resync. */
3282 	drbd_suspend_io(device);
3283 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3284 	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3285 	drbd_resume_io(device);
3286 
3287 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3288 out:
3289 	drbd_adm_finish(&adm_ctx, info, retcode);
3290 	return 0;
3291 }
3292 
3293 
3294 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3295 {
3296 	struct drbd_config_context adm_ctx;
3297 	struct drbd_device *device;
3298 	enum drbd_ret_code retcode;
3299 	int skip_initial_sync = 0;
3300 	int err;
3301 	struct new_c_uuid_parms args;
3302 
3303 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3304 	if (!adm_ctx.reply_skb)
3305 		return retcode;
3306 	if (retcode != NO_ERROR)
3307 		goto out_nolock;
3308 
3309 	device = adm_ctx.device;
3310 	memset(&args, 0, sizeof(args));
3311 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3312 		err = new_c_uuid_parms_from_attrs(&args, info);
3313 		if (err) {
3314 			retcode = ERR_MANDATORY_TAG;
3315 			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3316 			goto out_nolock;
3317 		}
3318 	}
3319 
3320 	mutex_lock(&adm_ctx.resource->adm_mutex);
3321 	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3322 
3323 	if (!get_ldev(device)) {
3324 		retcode = ERR_NO_DISK;
3325 		goto out;
3326 	}
3327 
3328 	/* this is "skip initial sync", assume to be clean */
3329 	if (device->state.conn == C_CONNECTED &&
3330 	    first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3331 	    device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3332 		drbd_info(device, "Preparing to skip initial sync\n");
3333 		skip_initial_sync = 1;
3334 	} else if (device->state.conn != C_STANDALONE) {
3335 		retcode = ERR_CONNECTED;
3336 		goto out_dec;
3337 	}
3338 
3339 	drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3340 	drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3341 
3342 	if (args.clear_bm) {
3343 		err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3344 			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3345 		if (err) {
3346 			drbd_err(device, "Writing bitmap failed with %d\n", err);
3347 			retcode = ERR_IO_MD_DISK;
3348 		}
3349 		if (skip_initial_sync) {
3350 			drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3351 			_drbd_uuid_set(device, UI_BITMAP, 0);
3352 			drbd_print_uuids(device, "cleared bitmap UUID");
3353 			spin_lock_irq(&device->resource->req_lock);
3354 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3355 					CS_VERBOSE, NULL);
3356 			spin_unlock_irq(&device->resource->req_lock);
3357 		}
3358 	}
3359 
3360 	drbd_md_sync(device);
3361 out_dec:
3362 	put_ldev(device);
3363 out:
3364 	mutex_unlock(device->state_mutex);
3365 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3366 out_nolock:
3367 	drbd_adm_finish(&adm_ctx, info, retcode);
3368 	return 0;
3369 }
3370 
3371 static enum drbd_ret_code
3372 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3373 {
3374 	const char *name = adm_ctx->resource_name;
3375 	if (!name || !name[0]) {
3376 		drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3377 		return ERR_MANDATORY_TAG;
3378 	}
3379 	/* if we want to use these in sysfs/configfs/debugfs some day,
3380 	 * we must not allow slashes */
3381 	if (strchr(name, '/')) {
3382 		drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3383 		return ERR_INVALID_REQUEST;
3384 	}
3385 	return NO_ERROR;
3386 }
3387 
3388 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3389 {
3390 	struct drbd_config_context adm_ctx;
3391 	enum drbd_ret_code retcode;
3392 	struct res_opts res_opts;
3393 	int err;
3394 
3395 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3396 	if (!adm_ctx.reply_skb)
3397 		return retcode;
3398 	if (retcode != NO_ERROR)
3399 		goto out;
3400 
3401 	set_res_opts_defaults(&res_opts);
3402 	err = res_opts_from_attrs(&res_opts, info);
3403 	if (err && err != -ENOMSG) {
3404 		retcode = ERR_MANDATORY_TAG;
3405 		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3406 		goto out;
3407 	}
3408 
3409 	retcode = drbd_check_resource_name(&adm_ctx);
3410 	if (retcode != NO_ERROR)
3411 		goto out;
3412 
3413 	if (adm_ctx.resource) {
3414 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3415 			retcode = ERR_INVALID_REQUEST;
3416 			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3417 		}
3418 		/* else: still NO_ERROR */
3419 		goto out;
3420 	}
3421 
3422 	/* not yet safe for genl_family.parallel_ops */
3423 	if (!conn_create(adm_ctx.resource_name, &res_opts))
3424 		retcode = ERR_NOMEM;
3425 out:
3426 	drbd_adm_finish(&adm_ctx, info, retcode);
3427 	return 0;
3428 }
3429 
3430 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3431 {
3432 	struct drbd_config_context adm_ctx;
3433 	struct drbd_genlmsghdr *dh = info->userhdr;
3434 	enum drbd_ret_code retcode;
3435 
3436 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3437 	if (!adm_ctx.reply_skb)
3438 		return retcode;
3439 	if (retcode != NO_ERROR)
3440 		goto out;
3441 
3442 	if (dh->minor > MINORMASK) {
3443 		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3444 		retcode = ERR_INVALID_REQUEST;
3445 		goto out;
3446 	}
3447 	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3448 		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3449 		retcode = ERR_INVALID_REQUEST;
3450 		goto out;
3451 	}
3452 
3453 	/* drbd_adm_prepare made sure already
3454 	 * that first_peer_device(device)->connection and device->vnr match the request. */
3455 	if (adm_ctx.device) {
3456 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3457 			retcode = ERR_MINOR_EXISTS;
3458 		/* else: still NO_ERROR */
3459 		goto out;
3460 	}
3461 
3462 	mutex_lock(&adm_ctx.resource->adm_mutex);
3463 	retcode = drbd_create_device(&adm_ctx, dh->minor);
3464 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3465 out:
3466 	drbd_adm_finish(&adm_ctx, info, retcode);
3467 	return 0;
3468 }
3469 
3470 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3471 {
3472 	if (device->state.disk == D_DISKLESS &&
3473 	    /* no need to be device->state.conn == C_STANDALONE &&
3474 	     * we may want to delete a minor from a live replication group.
3475 	     */
3476 	    device->state.role == R_SECONDARY) {
3477 		_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3478 				    CS_VERBOSE + CS_WAIT_COMPLETE);
3479 		drbd_delete_device(device);
3480 		return NO_ERROR;
3481 	} else
3482 		return ERR_MINOR_CONFIGURED;
3483 }
3484 
3485 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3486 {
3487 	struct drbd_config_context adm_ctx;
3488 	enum drbd_ret_code retcode;
3489 
3490 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3491 	if (!adm_ctx.reply_skb)
3492 		return retcode;
3493 	if (retcode != NO_ERROR)
3494 		goto out;
3495 
3496 	mutex_lock(&adm_ctx.resource->adm_mutex);
3497 	retcode = adm_del_minor(adm_ctx.device);
3498 	mutex_unlock(&adm_ctx.resource->adm_mutex);
3499 out:
3500 	drbd_adm_finish(&adm_ctx, info, retcode);
3501 	return 0;
3502 }
3503 
3504 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3505 {
3506 	struct drbd_config_context adm_ctx;
3507 	struct drbd_resource *resource;
3508 	struct drbd_connection *connection;
3509 	struct drbd_device *device;
3510 	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3511 	unsigned i;
3512 
3513 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3514 	if (!adm_ctx.reply_skb)
3515 		return retcode;
3516 	if (retcode != NO_ERROR)
3517 		goto finish;
3518 
3519 	resource = adm_ctx.resource;
3520 	mutex_lock(&resource->adm_mutex);
3521 	/* demote */
3522 	for_each_connection(connection, resource) {
3523 		struct drbd_peer_device *peer_device;
3524 
3525 		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3526 			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3527 			if (retcode < SS_SUCCESS) {
3528 				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3529 				goto out;
3530 			}
3531 		}
3532 
3533 		retcode = conn_try_disconnect(connection, 0);
3534 		if (retcode < SS_SUCCESS) {
3535 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3536 			goto out;
3537 		}
3538 	}
3539 
3540 	/* detach */
3541 	idr_for_each_entry(&resource->devices, device, i) {
3542 		retcode = adm_detach(device, 0);
3543 		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3544 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3545 			goto out;
3546 		}
3547 	}
3548 
3549 	/* If we reach this, all volumes (of this connection) are Secondary,
3550 	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3551 	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3552 	for_each_connection(connection, resource)
3553 		drbd_thread_stop(&connection->worker);
3554 
3555 	/* Now, nothing can fail anymore */
3556 
3557 	/* delete volumes */
3558 	idr_for_each_entry(&resource->devices, device, i) {
3559 		retcode = adm_del_minor(device);
3560 		if (retcode != NO_ERROR) {
3561 			/* "can not happen" */
3562 			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3563 			goto out;
3564 		}
3565 	}
3566 
3567 	list_del_rcu(&resource->resources);
3568 	synchronize_rcu();
3569 	drbd_free_resource(resource);
3570 	retcode = NO_ERROR;
3571 out:
3572 	mutex_unlock(&resource->adm_mutex);
3573 finish:
3574 	drbd_adm_finish(&adm_ctx, info, retcode);
3575 	return 0;
3576 }
3577 
3578 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3579 {
3580 	struct drbd_config_context adm_ctx;
3581 	struct drbd_resource *resource;
3582 	struct drbd_connection *connection;
3583 	enum drbd_ret_code retcode;
3584 
3585 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3586 	if (!adm_ctx.reply_skb)
3587 		return retcode;
3588 	if (retcode != NO_ERROR)
3589 		goto finish;
3590 
3591 	resource = adm_ctx.resource;
3592 	mutex_lock(&resource->adm_mutex);
3593 	for_each_connection(connection, resource) {
3594 		if (connection->cstate > C_STANDALONE) {
3595 			retcode = ERR_NET_CONFIGURED;
3596 			goto out;
3597 		}
3598 	}
3599 	if (!idr_is_empty(&resource->devices)) {
3600 		retcode = ERR_RES_IN_USE;
3601 		goto out;
3602 	}
3603 
3604 	list_del_rcu(&resource->resources);
3605 	for_each_connection(connection, resource)
3606 		drbd_thread_stop(&connection->worker);
3607 	synchronize_rcu();
3608 	drbd_free_resource(resource);
3609 	retcode = NO_ERROR;
3610 out:
3611 	mutex_unlock(&resource->adm_mutex);
3612 finish:
3613 	drbd_adm_finish(&adm_ctx, info, retcode);
3614 	return 0;
3615 }
3616 
3617 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3618 {
3619 	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3620 	struct sk_buff *msg;
3621 	struct drbd_genlmsghdr *d_out;
3622 	unsigned seq;
3623 	int err = -ENOMEM;
3624 
3625 	if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3626 		if (time_after(jiffies, device->rs_last_bcast + HZ))
3627 			device->rs_last_bcast = jiffies;
3628 		else
3629 			return;
3630 	}
3631 
3632 	seq = atomic_inc_return(&drbd_genl_seq);
3633 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3634 	if (!msg)
3635 		goto failed;
3636 
3637 	err = -EMSGSIZE;
3638 	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3639 	if (!d_out) /* cannot happen, but anyways. */
3640 		goto nla_put_failure;
3641 	d_out->minor = device_to_minor(device);
3642 	d_out->ret_code = NO_ERROR;
3643 
3644 	if (nla_put_status_info(msg, device, sib))
3645 		goto nla_put_failure;
3646 	genlmsg_end(msg, d_out);
3647 	err = drbd_genl_multicast_events(msg, 0);
3648 	/* msg has been consumed or freed in netlink_broadcast() */
3649 	if (err && err != -ESRCH)
3650 		goto failed;
3651 
3652 	return;
3653 
3654 nla_put_failure:
3655 	nlmsg_free(msg);
3656 failed:
3657 	drbd_err(device, "Error %d while broadcasting event. "
3658 			"Event seq:%u sib_reason:%u\n",
3659 			err, seq, sib->sib_reason);
3660 }
3661