1 /* 2 drbd_nl.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/module.h> 27 #include <linux/drbd.h> 28 #include <linux/in.h> 29 #include <linux/fs.h> 30 #include <linux/file.h> 31 #include <linux/slab.h> 32 #include <linux/blkpg.h> 33 #include <linux/cpumask.h> 34 #include "drbd_int.h" 35 #include "drbd_protocol.h" 36 #include "drbd_req.h" 37 #include "drbd_wrappers.h" 38 #include <asm/unaligned.h> 39 #include <linux/drbd_limits.h> 40 #include <linux/kthread.h> 41 42 #include <net/genetlink.h> 43 44 /* .doit */ 45 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); 46 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); 47 48 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info); 49 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info); 50 51 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); 52 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); 53 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); 54 55 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); 56 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); 57 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); 58 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); 59 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); 60 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); 61 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); 62 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); 63 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); 64 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info); 65 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info); 66 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info); 67 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info); 68 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); 69 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); 70 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); 71 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); 72 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); 73 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); 74 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); 75 /* .dumpit */ 76 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); 77 78 #include <linux/drbd_genl_api.h> 79 #include "drbd_nla.h" 80 #include <linux/genl_magic_func.h> 81 82 /* used blkdev_get_by_path, to claim our meta data device(s) */ 83 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; 84 85 /* Configuration is strictly serialized, because generic netlink message 86 * processing is strictly serialized by the genl_lock(). 87 * Which means we can use one static global drbd_config_context struct. 88 */ 89 static struct drbd_config_context { 90 /* assigned from drbd_genlmsghdr */ 91 unsigned int minor; 92 /* assigned from request attributes, if present */ 93 unsigned int volume; 94 #define VOLUME_UNSPECIFIED (-1U) 95 /* pointer into the request skb, 96 * limited lifetime! */ 97 char *resource_name; 98 struct nlattr *my_addr; 99 struct nlattr *peer_addr; 100 101 /* reply buffer */ 102 struct sk_buff *reply_skb; 103 /* pointer into reply buffer */ 104 struct drbd_genlmsghdr *reply_dh; 105 /* resolved from attributes, if possible */ 106 struct drbd_device *device; 107 struct drbd_resource *resource; 108 struct drbd_connection *connection; 109 } adm_ctx; 110 111 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) 112 { 113 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); 114 if (genlmsg_reply(skb, info)) 115 printk(KERN_ERR "drbd: error sending genl reply\n"); 116 } 117 118 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only 119 * reason it could fail was no space in skb, and there are 4k available. */ 120 int drbd_msg_put_info(const char *info) 121 { 122 struct sk_buff *skb = adm_ctx.reply_skb; 123 struct nlattr *nla; 124 int err = -EMSGSIZE; 125 126 if (!info || !info[0]) 127 return 0; 128 129 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); 130 if (!nla) 131 return err; 132 133 err = nla_put_string(skb, T_info_text, info); 134 if (err) { 135 nla_nest_cancel(skb, nla); 136 return err; 137 } else 138 nla_nest_end(skb, nla); 139 return 0; 140 } 141 142 /* This would be a good candidate for a "pre_doit" hook, 143 * and per-family private info->pointers. 144 * But we need to stay compatible with older kernels. 145 * If it returns successfully, adm_ctx members are valid. 146 */ 147 #define DRBD_ADM_NEED_MINOR 1 148 #define DRBD_ADM_NEED_RESOURCE 2 149 #define DRBD_ADM_NEED_CONNECTION 4 150 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, 151 unsigned flags) 152 { 153 struct drbd_genlmsghdr *d_in = info->userhdr; 154 const u8 cmd = info->genlhdr->cmd; 155 int err; 156 157 memset(&adm_ctx, 0, sizeof(adm_ctx)); 158 159 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ 160 if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) 161 return -EPERM; 162 163 adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 164 if (!adm_ctx.reply_skb) { 165 err = -ENOMEM; 166 goto fail; 167 } 168 169 adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, 170 info, &drbd_genl_family, 0, cmd); 171 /* put of a few bytes into a fresh skb of >= 4k will always succeed. 172 * but anyways */ 173 if (!adm_ctx.reply_dh) { 174 err = -ENOMEM; 175 goto fail; 176 } 177 178 adm_ctx.reply_dh->minor = d_in->minor; 179 adm_ctx.reply_dh->ret_code = NO_ERROR; 180 181 adm_ctx.volume = VOLUME_UNSPECIFIED; 182 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { 183 struct nlattr *nla; 184 /* parse and validate only */ 185 err = drbd_cfg_context_from_attrs(NULL, info); 186 if (err) 187 goto fail; 188 189 /* It was present, and valid, 190 * copy it over to the reply skb. */ 191 err = nla_put_nohdr(adm_ctx.reply_skb, 192 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, 193 info->attrs[DRBD_NLA_CFG_CONTEXT]); 194 if (err) 195 goto fail; 196 197 /* and assign stuff to the global adm_ctx */ 198 nla = nested_attr_tb[__nla_type(T_ctx_volume)]; 199 if (nla) 200 adm_ctx.volume = nla_get_u32(nla); 201 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; 202 if (nla) 203 adm_ctx.resource_name = nla_data(nla); 204 adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; 205 adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; 206 if ((adm_ctx.my_addr && 207 nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) || 208 (adm_ctx.peer_addr && 209 nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) { 210 err = -EINVAL; 211 goto fail; 212 } 213 } 214 215 adm_ctx.minor = d_in->minor; 216 adm_ctx.device = minor_to_device(d_in->minor); 217 if (adm_ctx.resource_name) { 218 adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name); 219 } 220 221 if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) { 222 drbd_msg_put_info("unknown minor"); 223 return ERR_MINOR_INVALID; 224 } 225 if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) { 226 drbd_msg_put_info("unknown resource"); 227 if (adm_ctx.resource_name) 228 return ERR_RES_NOT_KNOWN; 229 return ERR_INVALID_REQUEST; 230 } 231 232 if (flags & DRBD_ADM_NEED_CONNECTION) { 233 if (adm_ctx.resource) { 234 drbd_msg_put_info("no resource name expected"); 235 return ERR_INVALID_REQUEST; 236 } 237 if (adm_ctx.device) { 238 drbd_msg_put_info("no minor number expected"); 239 return ERR_INVALID_REQUEST; 240 } 241 if (adm_ctx.my_addr && adm_ctx.peer_addr) 242 adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr), 243 nla_len(adm_ctx.my_addr), 244 nla_data(adm_ctx.peer_addr), 245 nla_len(adm_ctx.peer_addr)); 246 if (!adm_ctx.connection) { 247 drbd_msg_put_info("unknown connection"); 248 return ERR_INVALID_REQUEST; 249 } 250 } 251 252 /* some more paranoia, if the request was over-determined */ 253 if (adm_ctx.device && adm_ctx.resource && 254 adm_ctx.device->resource != adm_ctx.resource) { 255 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", 256 adm_ctx.minor, adm_ctx.resource->name, 257 adm_ctx.device->resource->name); 258 drbd_msg_put_info("minor exists in different resource"); 259 return ERR_INVALID_REQUEST; 260 } 261 if (adm_ctx.device && 262 adm_ctx.volume != VOLUME_UNSPECIFIED && 263 adm_ctx.volume != adm_ctx.device->vnr) { 264 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", 265 adm_ctx.minor, adm_ctx.volume, 266 adm_ctx.device->vnr, 267 adm_ctx.device->resource->name); 268 drbd_msg_put_info("minor exists as different volume"); 269 return ERR_INVALID_REQUEST; 270 } 271 272 return NO_ERROR; 273 274 fail: 275 nlmsg_free(adm_ctx.reply_skb); 276 adm_ctx.reply_skb = NULL; 277 return err; 278 } 279 280 static int drbd_adm_finish(struct genl_info *info, int retcode) 281 { 282 if (adm_ctx.connection) { 283 kref_put(&adm_ctx.connection->kref, drbd_destroy_connection); 284 adm_ctx.connection = NULL; 285 } 286 if (adm_ctx.resource) { 287 kref_put(&adm_ctx.resource->kref, drbd_destroy_resource); 288 adm_ctx.resource = NULL; 289 } 290 291 if (!adm_ctx.reply_skb) 292 return -ENOMEM; 293 294 adm_ctx.reply_dh->ret_code = retcode; 295 drbd_adm_send_reply(adm_ctx.reply_skb, info); 296 return 0; 297 } 298 299 static void setup_khelper_env(struct drbd_connection *connection, char **envp) 300 { 301 char *afs; 302 303 /* FIXME: A future version will not allow this case. */ 304 if (connection->my_addr_len == 0 || connection->peer_addr_len == 0) 305 return; 306 307 switch (((struct sockaddr *)&connection->peer_addr)->sa_family) { 308 case AF_INET6: 309 afs = "ipv6"; 310 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", 311 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr); 312 break; 313 case AF_INET: 314 afs = "ipv4"; 315 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", 316 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); 317 break; 318 default: 319 afs = "ssocks"; 320 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", 321 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); 322 } 323 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); 324 } 325 326 int drbd_khelper(struct drbd_device *device, char *cmd) 327 { 328 char *envp[] = { "HOME=/", 329 "TERM=linux", 330 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 331 (char[20]) { }, /* address family */ 332 (char[60]) { }, /* address */ 333 NULL }; 334 char mb[12]; 335 char *argv[] = {usermode_helper, cmd, mb, NULL }; 336 struct drbd_connection *connection = first_peer_device(device)->connection; 337 struct sib_info sib; 338 int ret; 339 340 if (current == connection->worker.task) 341 set_bit(CALLBACK_PENDING, &connection->flags); 342 343 snprintf(mb, 12, "minor-%d", device_to_minor(device)); 344 setup_khelper_env(connection, envp); 345 346 /* The helper may take some time. 347 * write out any unsynced meta data changes now */ 348 drbd_md_sync(device); 349 350 drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 351 sib.sib_reason = SIB_HELPER_PRE; 352 sib.helper_name = cmd; 353 drbd_bcast_event(device, &sib); 354 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); 355 if (ret) 356 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", 357 usermode_helper, cmd, mb, 358 (ret >> 8) & 0xff, ret); 359 else 360 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n", 361 usermode_helper, cmd, mb, 362 (ret >> 8) & 0xff, ret); 363 sib.sib_reason = SIB_HELPER_POST; 364 sib.helper_exit_code = ret; 365 drbd_bcast_event(device, &sib); 366 367 if (current == connection->worker.task) 368 clear_bit(CALLBACK_PENDING, &connection->flags); 369 370 if (ret < 0) /* Ignore any ERRNOs we got. */ 371 ret = 0; 372 373 return ret; 374 } 375 376 static int conn_khelper(struct drbd_connection *connection, char *cmd) 377 { 378 char *envp[] = { "HOME=/", 379 "TERM=linux", 380 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 381 (char[20]) { }, /* address family */ 382 (char[60]) { }, /* address */ 383 NULL }; 384 char *resource_name = connection->resource->name; 385 char *argv[] = {usermode_helper, cmd, resource_name, NULL }; 386 int ret; 387 388 setup_khelper_env(connection, envp); 389 conn_md_sync(connection); 390 391 drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); 392 /* TODO: conn_bcast_event() ?? */ 393 394 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); 395 if (ret) 396 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n", 397 usermode_helper, cmd, resource_name, 398 (ret >> 8) & 0xff, ret); 399 else 400 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n", 401 usermode_helper, cmd, resource_name, 402 (ret >> 8) & 0xff, ret); 403 /* TODO: conn_bcast_event() ?? */ 404 405 if (ret < 0) /* Ignore any ERRNOs we got. */ 406 ret = 0; 407 408 return ret; 409 } 410 411 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection) 412 { 413 enum drbd_fencing_p fp = FP_NOT_AVAIL; 414 struct drbd_peer_device *peer_device; 415 int vnr; 416 417 rcu_read_lock(); 418 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 419 struct drbd_device *device = peer_device->device; 420 if (get_ldev_if_state(device, D_CONSISTENT)) { 421 struct disk_conf *disk_conf = 422 rcu_dereference(peer_device->device->ldev->disk_conf); 423 fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing); 424 put_ldev(device); 425 } 426 } 427 rcu_read_unlock(); 428 429 return fp; 430 } 431 432 bool conn_try_outdate_peer(struct drbd_connection *connection) 433 { 434 unsigned int connect_cnt; 435 union drbd_state mask = { }; 436 union drbd_state val = { }; 437 enum drbd_fencing_p fp; 438 char *ex_to_string; 439 int r; 440 441 if (connection->cstate >= C_WF_REPORT_PARAMS) { 442 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); 443 return false; 444 } 445 446 spin_lock_irq(&connection->resource->req_lock); 447 connect_cnt = connection->connect_cnt; 448 spin_unlock_irq(&connection->resource->req_lock); 449 450 fp = highest_fencing_policy(connection); 451 switch (fp) { 452 case FP_NOT_AVAIL: 453 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n"); 454 goto out; 455 case FP_DONT_CARE: 456 return true; 457 default: ; 458 } 459 460 r = conn_khelper(connection, "fence-peer"); 461 462 switch ((r>>8) & 0xff) { 463 case 3: /* peer is inconsistent */ 464 ex_to_string = "peer is inconsistent or worse"; 465 mask.pdsk = D_MASK; 466 val.pdsk = D_INCONSISTENT; 467 break; 468 case 4: /* peer got outdated, or was already outdated */ 469 ex_to_string = "peer was fenced"; 470 mask.pdsk = D_MASK; 471 val.pdsk = D_OUTDATED; 472 break; 473 case 5: /* peer was down */ 474 if (conn_highest_disk(connection) == D_UP_TO_DATE) { 475 /* we will(have) create(d) a new UUID anyways... */ 476 ex_to_string = "peer is unreachable, assumed to be dead"; 477 mask.pdsk = D_MASK; 478 val.pdsk = D_OUTDATED; 479 } else { 480 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; 481 } 482 break; 483 case 6: /* Peer is primary, voluntarily outdate myself. 484 * This is useful when an unconnected R_SECONDARY is asked to 485 * become R_PRIMARY, but finds the other peer being active. */ 486 ex_to_string = "peer is active"; 487 drbd_warn(connection, "Peer is primary, outdating myself.\n"); 488 mask.disk = D_MASK; 489 val.disk = D_OUTDATED; 490 break; 491 case 7: 492 if (fp != FP_STONITH) 493 drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); 494 ex_to_string = "peer was stonithed"; 495 mask.pdsk = D_MASK; 496 val.pdsk = D_OUTDATED; 497 break; 498 default: 499 /* The script is broken ... */ 500 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); 501 return false; /* Eventually leave IO frozen */ 502 } 503 504 drbd_info(connection, "fence-peer helper returned %d (%s)\n", 505 (r>>8) & 0xff, ex_to_string); 506 507 out: 508 509 /* Not using 510 conn_request_state(connection, mask, val, CS_VERBOSE); 511 here, because we might were able to re-establish the connection in the 512 meantime. */ 513 spin_lock_irq(&connection->resource->req_lock); 514 if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) { 515 if (connection->connect_cnt != connect_cnt) 516 /* In case the connection was established and droped 517 while the fence-peer handler was running, ignore it */ 518 drbd_info(connection, "Ignoring fence-peer exit code\n"); 519 else 520 _conn_request_state(connection, mask, val, CS_VERBOSE); 521 } 522 spin_unlock_irq(&connection->resource->req_lock); 523 524 return conn_highest_pdsk(connection) <= D_OUTDATED; 525 } 526 527 static int _try_outdate_peer_async(void *data) 528 { 529 struct drbd_connection *connection = (struct drbd_connection *)data; 530 531 conn_try_outdate_peer(connection); 532 533 kref_put(&connection->kref, drbd_destroy_connection); 534 return 0; 535 } 536 537 void conn_try_outdate_peer_async(struct drbd_connection *connection) 538 { 539 struct task_struct *opa; 540 541 kref_get(&connection->kref); 542 opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h"); 543 if (IS_ERR(opa)) { 544 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n"); 545 kref_put(&connection->kref, drbd_destroy_connection); 546 } 547 } 548 549 enum drbd_state_rv 550 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) 551 { 552 const int max_tries = 4; 553 enum drbd_state_rv rv = SS_UNKNOWN_ERROR; 554 struct net_conf *nc; 555 int try = 0; 556 int forced = 0; 557 union drbd_state mask, val; 558 559 if (new_role == R_PRIMARY) { 560 struct drbd_connection *connection; 561 562 /* Detect dead peers as soon as possible. */ 563 564 rcu_read_lock(); 565 for_each_connection(connection, device->resource) 566 request_ping(connection); 567 rcu_read_unlock(); 568 } 569 570 mutex_lock(device->state_mutex); 571 572 mask.i = 0; mask.role = R_MASK; 573 val.i = 0; val.role = new_role; 574 575 while (try++ < max_tries) { 576 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE); 577 578 /* in case we first succeeded to outdate, 579 * but now suddenly could establish a connection */ 580 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { 581 val.pdsk = 0; 582 mask.pdsk = 0; 583 continue; 584 } 585 586 if (rv == SS_NO_UP_TO_DATE_DISK && force && 587 (device->state.disk < D_UP_TO_DATE && 588 device->state.disk >= D_INCONSISTENT)) { 589 mask.disk = D_MASK; 590 val.disk = D_UP_TO_DATE; 591 forced = 1; 592 continue; 593 } 594 595 if (rv == SS_NO_UP_TO_DATE_DISK && 596 device->state.disk == D_CONSISTENT && mask.pdsk == 0) { 597 D_ASSERT(device, device->state.pdsk == D_UNKNOWN); 598 599 if (conn_try_outdate_peer(first_peer_device(device)->connection)) { 600 val.disk = D_UP_TO_DATE; 601 mask.disk = D_MASK; 602 } 603 continue; 604 } 605 606 if (rv == SS_NOTHING_TO_DO) 607 goto out; 608 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { 609 if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) { 610 drbd_warn(device, "Forced into split brain situation!\n"); 611 mask.pdsk = D_MASK; 612 val.pdsk = D_OUTDATED; 613 614 } 615 continue; 616 } 617 if (rv == SS_TWO_PRIMARIES) { 618 /* Maybe the peer is detected as dead very soon... 619 retry at most once more in this case. */ 620 int timeo; 621 rcu_read_lock(); 622 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 623 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; 624 rcu_read_unlock(); 625 schedule_timeout_interruptible(timeo); 626 if (try < max_tries) 627 try = max_tries - 1; 628 continue; 629 } 630 if (rv < SS_SUCCESS) { 631 rv = _drbd_request_state(device, mask, val, 632 CS_VERBOSE + CS_WAIT_COMPLETE); 633 if (rv < SS_SUCCESS) 634 goto out; 635 } 636 break; 637 } 638 639 if (rv < SS_SUCCESS) 640 goto out; 641 642 if (forced) 643 drbd_warn(device, "Forced to consider local data as UpToDate!\n"); 644 645 /* Wait until nothing is on the fly :) */ 646 wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0); 647 648 /* FIXME also wait for all pending P_BARRIER_ACK? */ 649 650 if (new_role == R_SECONDARY) { 651 set_disk_ro(device->vdisk, true); 652 if (get_ldev(device)) { 653 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 654 put_ldev(device); 655 } 656 } else { 657 mutex_lock(&device->resource->conf_update); 658 nc = first_peer_device(device)->connection->net_conf; 659 if (nc) 660 nc->discard_my_data = 0; /* without copy; single bit op is atomic */ 661 mutex_unlock(&device->resource->conf_update); 662 663 set_disk_ro(device->vdisk, false); 664 if (get_ldev(device)) { 665 if (((device->state.conn < C_CONNECTED || 666 device->state.pdsk <= D_FAILED) 667 && device->ldev->md.uuid[UI_BITMAP] == 0) || forced) 668 drbd_uuid_new_current(device); 669 670 device->ldev->md.uuid[UI_CURRENT] |= (u64)1; 671 put_ldev(device); 672 } 673 } 674 675 /* writeout of activity log covered areas of the bitmap 676 * to stable storage done in after state change already */ 677 678 if (device->state.conn >= C_WF_REPORT_PARAMS) { 679 /* if this was forced, we should consider sync */ 680 if (forced) 681 drbd_send_uuids(first_peer_device(device)); 682 drbd_send_current_state(first_peer_device(device)); 683 } 684 685 drbd_md_sync(device); 686 687 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 688 out: 689 mutex_unlock(device->state_mutex); 690 return rv; 691 } 692 693 static const char *from_attrs_err_to_txt(int err) 694 { 695 return err == -ENOMSG ? "required attribute missing" : 696 err == -EOPNOTSUPP ? "unknown mandatory attribute" : 697 err == -EEXIST ? "can not change invariant setting" : 698 "invalid attribute value"; 699 } 700 701 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) 702 { 703 struct set_role_parms parms; 704 int err; 705 enum drbd_ret_code retcode; 706 707 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 708 if (!adm_ctx.reply_skb) 709 return retcode; 710 if (retcode != NO_ERROR) 711 goto out; 712 713 memset(&parms, 0, sizeof(parms)); 714 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { 715 err = set_role_parms_from_attrs(&parms, info); 716 if (err) { 717 retcode = ERR_MANDATORY_TAG; 718 drbd_msg_put_info(from_attrs_err_to_txt(err)); 719 goto out; 720 } 721 } 722 723 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) 724 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); 725 else 726 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); 727 out: 728 drbd_adm_finish(info, retcode); 729 return 0; 730 } 731 732 /* Initializes the md.*_offset members, so we are able to find 733 * the on disk meta data. 734 * 735 * We currently have two possible layouts: 736 * external: 737 * |----------- md_size_sect ------------------| 738 * [ 4k superblock ][ activity log ][ Bitmap ] 739 * | al_offset == 8 | 740 * | bm_offset = al_offset + X | 741 * ==> bitmap sectors = md_size_sect - bm_offset 742 * 743 * internal: 744 * |----------- md_size_sect ------------------| 745 * [data.....][ Bitmap ][ activity log ][ 4k superblock ] 746 * | al_offset < 0 | 747 * | bm_offset = al_offset - Y | 748 * ==> bitmap sectors = Y = al_offset - bm_offset 749 * 750 * Activity log size used to be fixed 32kB, 751 * but is about to become configurable. 752 */ 753 static void drbd_md_set_sector_offsets(struct drbd_device *device, 754 struct drbd_backing_dev *bdev) 755 { 756 sector_t md_size_sect = 0; 757 unsigned int al_size_sect = bdev->md.al_size_4k * 8; 758 759 bdev->md.md_offset = drbd_md_ss(bdev); 760 761 switch (bdev->md.meta_dev_idx) { 762 default: 763 /* v07 style fixed size indexed meta data */ 764 bdev->md.md_size_sect = MD_128MB_SECT; 765 bdev->md.al_offset = MD_4kB_SECT; 766 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 767 break; 768 case DRBD_MD_INDEX_FLEX_EXT: 769 /* just occupy the full device; unit: sectors */ 770 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); 771 bdev->md.al_offset = MD_4kB_SECT; 772 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 773 break; 774 case DRBD_MD_INDEX_INTERNAL: 775 case DRBD_MD_INDEX_FLEX_INT: 776 /* al size is still fixed */ 777 bdev->md.al_offset = -al_size_sect; 778 /* we need (slightly less than) ~ this much bitmap sectors: */ 779 md_size_sect = drbd_get_capacity(bdev->backing_bdev); 780 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); 781 md_size_sect = BM_SECT_TO_EXT(md_size_sect); 782 md_size_sect = ALIGN(md_size_sect, 8); 783 784 /* plus the "drbd meta data super block", 785 * and the activity log; */ 786 md_size_sect += MD_4kB_SECT + al_size_sect; 787 788 bdev->md.md_size_sect = md_size_sect; 789 /* bitmap offset is adjusted by 'super' block size */ 790 bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT; 791 break; 792 } 793 } 794 795 /* input size is expected to be in KB */ 796 char *ppsize(char *buf, unsigned long long size) 797 { 798 /* Needs 9 bytes at max including trailing NUL: 799 * -1ULL ==> "16384 EB" */ 800 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; 801 int base = 0; 802 while (size >= 10000 && base < sizeof(units)-1) { 803 /* shift + round */ 804 size = (size >> 10) + !!(size & (1<<9)); 805 base++; 806 } 807 sprintf(buf, "%u %cB", (unsigned)size, units[base]); 808 809 return buf; 810 } 811 812 /* there is still a theoretical deadlock when called from receiver 813 * on an D_INCONSISTENT R_PRIMARY: 814 * remote READ does inc_ap_bio, receiver would need to receive answer 815 * packet from remote to dec_ap_bio again. 816 * receiver receive_sizes(), comes here, 817 * waits for ap_bio_cnt == 0. -> deadlock. 818 * but this cannot happen, actually, because: 819 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable 820 * (not connected, or bad/no disk on peer): 821 * see drbd_fail_request_early, ap_bio_cnt is zero. 822 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: 823 * peer may not initiate a resize. 824 */ 825 /* Note these are not to be confused with 826 * drbd_adm_suspend_io/drbd_adm_resume_io, 827 * which are (sub) state changes triggered by admin (drbdsetup), 828 * and can be long lived. 829 * This changes an device->flag, is triggered by drbd internals, 830 * and should be short-lived. */ 831 void drbd_suspend_io(struct drbd_device *device) 832 { 833 set_bit(SUSPEND_IO, &device->flags); 834 if (drbd_suspended(device)) 835 return; 836 wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt)); 837 } 838 839 void drbd_resume_io(struct drbd_device *device) 840 { 841 clear_bit(SUSPEND_IO, &device->flags); 842 wake_up(&device->misc_wait); 843 } 844 845 /** 846 * drbd_determine_dev_size() - Sets the right device size obeying all constraints 847 * @device: DRBD device. 848 * 849 * Returns 0 on success, negative return values indicate errors. 850 * You should call drbd_md_sync() after calling this function. 851 */ 852 enum determine_dev_size 853 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) 854 { 855 sector_t prev_first_sect, prev_size; /* previous meta location */ 856 sector_t la_size_sect, u_size; 857 struct drbd_md *md = &device->ldev->md; 858 u32 prev_al_stripe_size_4k; 859 u32 prev_al_stripes; 860 sector_t size; 861 char ppb[10]; 862 void *buffer; 863 864 int md_moved, la_size_changed; 865 enum determine_dev_size rv = DS_UNCHANGED; 866 867 /* race: 868 * application request passes inc_ap_bio, 869 * but then cannot get an AL-reference. 870 * this function later may wait on ap_bio_cnt == 0. -> deadlock. 871 * 872 * to avoid that: 873 * Suspend IO right here. 874 * still lock the act_log to not trigger ASSERTs there. 875 */ 876 drbd_suspend_io(device); 877 buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */ 878 if (!buffer) { 879 drbd_resume_io(device); 880 return DS_ERROR; 881 } 882 883 /* no wait necessary anymore, actually we could assert that */ 884 wait_event(device->al_wait, lc_try_lock(device->act_log)); 885 886 prev_first_sect = drbd_md_first_sector(device->ldev); 887 prev_size = device->ldev->md.md_size_sect; 888 la_size_sect = device->ldev->md.la_size_sect; 889 890 if (rs) { 891 /* rs is non NULL if we should change the AL layout only */ 892 893 prev_al_stripes = md->al_stripes; 894 prev_al_stripe_size_4k = md->al_stripe_size_4k; 895 896 md->al_stripes = rs->al_stripes; 897 md->al_stripe_size_4k = rs->al_stripe_size / 4; 898 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; 899 } 900 901 drbd_md_set_sector_offsets(device, device->ldev); 902 903 rcu_read_lock(); 904 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; 905 rcu_read_unlock(); 906 size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED); 907 908 if (size < la_size_sect) { 909 if (rs && u_size == 0) { 910 /* Remove "rs &&" later. This check should always be active, but 911 right now the receiver expects the permissive behavior */ 912 drbd_warn(device, "Implicit shrink not allowed. " 913 "Use --size=%llus for explicit shrink.\n", 914 (unsigned long long)size); 915 rv = DS_ERROR_SHRINK; 916 } 917 if (u_size > size) 918 rv = DS_ERROR_SPACE_MD; 919 if (rv != DS_UNCHANGED) 920 goto err_out; 921 } 922 923 if (drbd_get_capacity(device->this_bdev) != size || 924 drbd_bm_capacity(device) != size) { 925 int err; 926 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); 927 if (unlikely(err)) { 928 /* currently there is only one error: ENOMEM! */ 929 size = drbd_bm_capacity(device)>>1; 930 if (size == 0) { 931 drbd_err(device, "OUT OF MEMORY! " 932 "Could not allocate bitmap!\n"); 933 } else { 934 drbd_err(device, "BM resizing failed. " 935 "Leaving size unchanged at size = %lu KB\n", 936 (unsigned long)size); 937 } 938 rv = DS_ERROR; 939 } 940 /* racy, see comments above. */ 941 drbd_set_my_capacity(device, size); 942 device->ldev->md.la_size_sect = size; 943 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), 944 (unsigned long long)size>>1); 945 } 946 if (rv <= DS_ERROR) 947 goto err_out; 948 949 la_size_changed = (la_size_sect != device->ldev->md.la_size_sect); 950 951 md_moved = prev_first_sect != drbd_md_first_sector(device->ldev) 952 || prev_size != device->ldev->md.md_size_sect; 953 954 if (la_size_changed || md_moved || rs) { 955 u32 prev_flags; 956 957 drbd_al_shrink(device); /* All extents inactive. */ 958 959 prev_flags = md->flags; 960 md->flags &= ~MDF_PRIMARY_IND; 961 drbd_md_write(device, buffer); 962 963 drbd_info(device, "Writing the whole bitmap, %s\n", 964 la_size_changed && md_moved ? "size changed and md moved" : 965 la_size_changed ? "size changed" : "md moved"); 966 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ 967 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, 968 "size changed", BM_LOCKED_MASK); 969 drbd_initialize_al(device, buffer); 970 971 md->flags = prev_flags; 972 drbd_md_write(device, buffer); 973 974 if (rs) 975 drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", 976 md->al_stripes, md->al_stripe_size_4k * 4); 977 } 978 979 if (size > la_size_sect) 980 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO; 981 if (size < la_size_sect) 982 rv = DS_SHRUNK; 983 984 if (0) { 985 err_out: 986 if (rs) { 987 md->al_stripes = prev_al_stripes; 988 md->al_stripe_size_4k = prev_al_stripe_size_4k; 989 md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k; 990 991 drbd_md_set_sector_offsets(device, device->ldev); 992 } 993 } 994 lc_unlock(device->act_log); 995 wake_up(&device->al_wait); 996 drbd_md_put_buffer(device); 997 drbd_resume_io(device); 998 999 return rv; 1000 } 1001 1002 sector_t 1003 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev, 1004 sector_t u_size, int assume_peer_has_space) 1005 { 1006 sector_t p_size = device->p_size; /* partner's disk size. */ 1007 sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */ 1008 sector_t m_size; /* my size */ 1009 sector_t size = 0; 1010 1011 m_size = drbd_get_max_capacity(bdev); 1012 1013 if (device->state.conn < C_CONNECTED && assume_peer_has_space) { 1014 drbd_warn(device, "Resize while not connected was forced by the user!\n"); 1015 p_size = m_size; 1016 } 1017 1018 if (p_size && m_size) { 1019 size = min_t(sector_t, p_size, m_size); 1020 } else { 1021 if (la_size_sect) { 1022 size = la_size_sect; 1023 if (m_size && m_size < size) 1024 size = m_size; 1025 if (p_size && p_size < size) 1026 size = p_size; 1027 } else { 1028 if (m_size) 1029 size = m_size; 1030 if (p_size) 1031 size = p_size; 1032 } 1033 } 1034 1035 if (size == 0) 1036 drbd_err(device, "Both nodes diskless!\n"); 1037 1038 if (u_size) { 1039 if (u_size > size) 1040 drbd_err(device, "Requested disk size is too big (%lu > %lu)\n", 1041 (unsigned long)u_size>>1, (unsigned long)size>>1); 1042 else 1043 size = u_size; 1044 } 1045 1046 return size; 1047 } 1048 1049 /** 1050 * drbd_check_al_size() - Ensures that the AL is of the right size 1051 * @device: DRBD device. 1052 * 1053 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation 1054 * failed, and 0 on success. You should call drbd_md_sync() after you called 1055 * this function. 1056 */ 1057 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) 1058 { 1059 struct lru_cache *n, *t; 1060 struct lc_element *e; 1061 unsigned int in_use; 1062 int i; 1063 1064 if (device->act_log && 1065 device->act_log->nr_elements == dc->al_extents) 1066 return 0; 1067 1068 in_use = 0; 1069 t = device->act_log; 1070 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, 1071 dc->al_extents, sizeof(struct lc_element), 0); 1072 1073 if (n == NULL) { 1074 drbd_err(device, "Cannot allocate act_log lru!\n"); 1075 return -ENOMEM; 1076 } 1077 spin_lock_irq(&device->al_lock); 1078 if (t) { 1079 for (i = 0; i < t->nr_elements; i++) { 1080 e = lc_element_by_index(t, i); 1081 if (e->refcnt) 1082 drbd_err(device, "refcnt(%d)==%d\n", 1083 e->lc_number, e->refcnt); 1084 in_use += e->refcnt; 1085 } 1086 } 1087 if (!in_use) 1088 device->act_log = n; 1089 spin_unlock_irq(&device->al_lock); 1090 if (in_use) { 1091 drbd_err(device, "Activity log still in use!\n"); 1092 lc_destroy(n); 1093 return -EBUSY; 1094 } else { 1095 if (t) 1096 lc_destroy(t); 1097 } 1098 drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */ 1099 return 0; 1100 } 1101 1102 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) 1103 { 1104 struct request_queue * const q = device->rq_queue; 1105 unsigned int max_hw_sectors = max_bio_size >> 9; 1106 unsigned int max_segments = 0; 1107 1108 if (get_ldev_if_state(device, D_ATTACHING)) { 1109 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; 1110 1111 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); 1112 rcu_read_lock(); 1113 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; 1114 rcu_read_unlock(); 1115 put_ldev(device); 1116 } 1117 1118 blk_queue_logical_block_size(q, 512); 1119 blk_queue_max_hw_sectors(q, max_hw_sectors); 1120 /* This is the workaround for "bio would need to, but cannot, be split" */ 1121 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); 1122 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); 1123 1124 if (get_ldev_if_state(device, D_ATTACHING)) { 1125 struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; 1126 1127 blk_queue_stack_limits(q, b); 1128 1129 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 1130 drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", 1131 q->backing_dev_info.ra_pages, 1132 b->backing_dev_info.ra_pages); 1133 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; 1134 } 1135 put_ldev(device); 1136 } 1137 } 1138 1139 void drbd_reconsider_max_bio_size(struct drbd_device *device) 1140 { 1141 unsigned int now, new, local, peer; 1142 1143 now = queue_max_hw_sectors(device->rq_queue) << 9; 1144 local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ 1145 peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ 1146 1147 if (get_ldev_if_state(device, D_ATTACHING)) { 1148 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; 1149 device->local_max_bio_size = local; 1150 put_ldev(device); 1151 } 1152 local = min(local, DRBD_MAX_BIO_SIZE); 1153 1154 /* We may ignore peer limits if the peer is modern enough. 1155 Because new from 8.3.8 onwards the peer can use multiple 1156 BIOs for a single peer_request */ 1157 if (device->state.conn >= C_WF_REPORT_PARAMS) { 1158 if (first_peer_device(device)->connection->agreed_pro_version < 94) 1159 peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 1160 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ 1161 else if (first_peer_device(device)->connection->agreed_pro_version == 94) 1162 peer = DRBD_MAX_SIZE_H80_PACKET; 1163 else if (first_peer_device(device)->connection->agreed_pro_version < 100) 1164 peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ 1165 else 1166 peer = DRBD_MAX_BIO_SIZE; 1167 } 1168 1169 new = min(local, peer); 1170 1171 if (device->state.role == R_PRIMARY && new < now) 1172 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); 1173 1174 if (new != now) 1175 drbd_info(device, "max BIO size = %u\n", new); 1176 1177 drbd_setup_queue_param(device, new); 1178 } 1179 1180 /* Starts the worker thread */ 1181 static void conn_reconfig_start(struct drbd_connection *connection) 1182 { 1183 drbd_thread_start(&connection->worker); 1184 drbd_flush_workqueue(&connection->sender_work); 1185 } 1186 1187 /* if still unconfigured, stops worker again. */ 1188 static void conn_reconfig_done(struct drbd_connection *connection) 1189 { 1190 bool stop_threads; 1191 spin_lock_irq(&connection->resource->req_lock); 1192 stop_threads = conn_all_vols_unconf(connection) && 1193 connection->cstate == C_STANDALONE; 1194 spin_unlock_irq(&connection->resource->req_lock); 1195 if (stop_threads) { 1196 /* asender is implicitly stopped by receiver 1197 * in conn_disconnect() */ 1198 drbd_thread_stop(&connection->receiver); 1199 drbd_thread_stop(&connection->worker); 1200 } 1201 } 1202 1203 /* Make sure IO is suspended before calling this function(). */ 1204 static void drbd_suspend_al(struct drbd_device *device) 1205 { 1206 int s = 0; 1207 1208 if (!lc_try_lock(device->act_log)) { 1209 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n"); 1210 return; 1211 } 1212 1213 drbd_al_shrink(device); 1214 spin_lock_irq(&device->resource->req_lock); 1215 if (device->state.conn < C_CONNECTED) 1216 s = !test_and_set_bit(AL_SUSPENDED, &device->flags); 1217 spin_unlock_irq(&device->resource->req_lock); 1218 lc_unlock(device->act_log); 1219 1220 if (s) 1221 drbd_info(device, "Suspended AL updates\n"); 1222 } 1223 1224 1225 static bool should_set_defaults(struct genl_info *info) 1226 { 1227 unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags; 1228 return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); 1229 } 1230 1231 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) 1232 { 1233 /* This is limited by 16 bit "slot" numbers, 1234 * and by available on-disk context storage. 1235 * 1236 * Also (u16)~0 is special (denotes a "free" extent). 1237 * 1238 * One transaction occupies one 4kB on-disk block, 1239 * we have n such blocks in the on disk ring buffer, 1240 * the "current" transaction may fail (n-1), 1241 * and there is 919 slot numbers context information per transaction. 1242 * 1243 * 72 transaction blocks amounts to more than 2**16 context slots, 1244 * so cap there first. 1245 */ 1246 const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX; 1247 const unsigned int sufficient_on_disk = 1248 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1) 1249 /AL_CONTEXT_PER_TRANSACTION; 1250 1251 unsigned int al_size_4k = bdev->md.al_size_4k; 1252 1253 if (al_size_4k > sufficient_on_disk) 1254 return max_al_nr; 1255 1256 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; 1257 } 1258 1259 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) 1260 { 1261 enum drbd_ret_code retcode; 1262 struct drbd_device *device; 1263 struct disk_conf *new_disk_conf, *old_disk_conf; 1264 struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 1265 int err, fifo_size; 1266 1267 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 1268 if (!adm_ctx.reply_skb) 1269 return retcode; 1270 if (retcode != NO_ERROR) 1271 goto out; 1272 1273 device = adm_ctx.device; 1274 1275 /* we also need a disk 1276 * to change the options on */ 1277 if (!get_ldev(device)) { 1278 retcode = ERR_NO_DISK; 1279 goto out; 1280 } 1281 1282 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); 1283 if (!new_disk_conf) { 1284 retcode = ERR_NOMEM; 1285 goto fail; 1286 } 1287 1288 mutex_lock(&device->resource->conf_update); 1289 old_disk_conf = device->ldev->disk_conf; 1290 *new_disk_conf = *old_disk_conf; 1291 if (should_set_defaults(info)) 1292 set_disk_conf_defaults(new_disk_conf); 1293 1294 err = disk_conf_from_attrs_for_change(new_disk_conf, info); 1295 if (err && err != -ENOMSG) { 1296 retcode = ERR_MANDATORY_TAG; 1297 drbd_msg_put_info(from_attrs_err_to_txt(err)); 1298 goto fail_unlock; 1299 } 1300 1301 if (!expect(new_disk_conf->resync_rate >= 1)) 1302 new_disk_conf->resync_rate = 1; 1303 1304 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) 1305 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; 1306 if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev)) 1307 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev); 1308 1309 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) 1310 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; 1311 1312 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 1313 if (fifo_size != device->rs_plan_s->size) { 1314 new_plan = fifo_alloc(fifo_size); 1315 if (!new_plan) { 1316 drbd_err(device, "kmalloc of fifo_buffer failed"); 1317 retcode = ERR_NOMEM; 1318 goto fail_unlock; 1319 } 1320 } 1321 1322 drbd_suspend_io(device); 1323 wait_event(device->al_wait, lc_try_lock(device->act_log)); 1324 drbd_al_shrink(device); 1325 err = drbd_check_al_size(device, new_disk_conf); 1326 lc_unlock(device->act_log); 1327 wake_up(&device->al_wait); 1328 drbd_resume_io(device); 1329 1330 if (err) { 1331 retcode = ERR_NOMEM; 1332 goto fail_unlock; 1333 } 1334 1335 write_lock_irq(&global_state_lock); 1336 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); 1337 if (retcode == NO_ERROR) { 1338 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 1339 drbd_resync_after_changed(device); 1340 } 1341 write_unlock_irq(&global_state_lock); 1342 1343 if (retcode != NO_ERROR) 1344 goto fail_unlock; 1345 1346 if (new_plan) { 1347 old_plan = device->rs_plan_s; 1348 rcu_assign_pointer(device->rs_plan_s, new_plan); 1349 } 1350 1351 mutex_unlock(&device->resource->conf_update); 1352 1353 if (new_disk_conf->al_updates) 1354 device->ldev->md.flags &= ~MDF_AL_DISABLED; 1355 else 1356 device->ldev->md.flags |= MDF_AL_DISABLED; 1357 1358 if (new_disk_conf->md_flushes) 1359 clear_bit(MD_NO_FUA, &device->flags); 1360 else 1361 set_bit(MD_NO_FUA, &device->flags); 1362 1363 drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); 1364 1365 drbd_md_sync(device); 1366 1367 if (device->state.conn >= C_CONNECTED) { 1368 struct drbd_peer_device *peer_device; 1369 1370 for_each_peer_device(peer_device, device) 1371 drbd_send_sync_param(peer_device); 1372 } 1373 1374 synchronize_rcu(); 1375 kfree(old_disk_conf); 1376 kfree(old_plan); 1377 mod_timer(&device->request_timer, jiffies + HZ); 1378 goto success; 1379 1380 fail_unlock: 1381 mutex_unlock(&device->resource->conf_update); 1382 fail: 1383 kfree(new_disk_conf); 1384 kfree(new_plan); 1385 success: 1386 put_ldev(device); 1387 out: 1388 drbd_adm_finish(info, retcode); 1389 return 0; 1390 } 1391 1392 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) 1393 { 1394 struct drbd_device *device; 1395 int err; 1396 enum drbd_ret_code retcode; 1397 enum determine_dev_size dd; 1398 sector_t max_possible_sectors; 1399 sector_t min_md_device_sectors; 1400 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 1401 struct disk_conf *new_disk_conf = NULL; 1402 struct block_device *bdev; 1403 struct lru_cache *resync_lru = NULL; 1404 struct fifo_buffer *new_plan = NULL; 1405 union drbd_state ns, os; 1406 enum drbd_state_rv rv; 1407 struct net_conf *nc; 1408 1409 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 1410 if (!adm_ctx.reply_skb) 1411 return retcode; 1412 if (retcode != NO_ERROR) 1413 goto finish; 1414 1415 device = adm_ctx.device; 1416 conn_reconfig_start(first_peer_device(device)->connection); 1417 1418 /* if you want to reconfigure, please tear down first */ 1419 if (device->state.disk > D_DISKLESS) { 1420 retcode = ERR_DISK_CONFIGURED; 1421 goto fail; 1422 } 1423 /* It may just now have detached because of IO error. Make sure 1424 * drbd_ldev_destroy is done already, we may end up here very fast, 1425 * e.g. if someone calls attach from the on-io-error handler, 1426 * to realize a "hot spare" feature (not that I'd recommend that) */ 1427 wait_event(device->misc_wait, !atomic_read(&device->local_cnt)); 1428 1429 /* make sure there is no leftover from previous force-detach attempts */ 1430 clear_bit(FORCE_DETACH, &device->flags); 1431 clear_bit(WAS_IO_ERROR, &device->flags); 1432 clear_bit(WAS_READ_ERROR, &device->flags); 1433 1434 /* and no leftover from previously aborted resync or verify, either */ 1435 device->rs_total = 0; 1436 device->rs_failed = 0; 1437 atomic_set(&device->rs_pending_cnt, 0); 1438 1439 /* allocation not in the IO path, drbdsetup context */ 1440 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 1441 if (!nbc) { 1442 retcode = ERR_NOMEM; 1443 goto fail; 1444 } 1445 spin_lock_init(&nbc->md.uuid_lock); 1446 1447 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 1448 if (!new_disk_conf) { 1449 retcode = ERR_NOMEM; 1450 goto fail; 1451 } 1452 nbc->disk_conf = new_disk_conf; 1453 1454 set_disk_conf_defaults(new_disk_conf); 1455 err = disk_conf_from_attrs(new_disk_conf, info); 1456 if (err) { 1457 retcode = ERR_MANDATORY_TAG; 1458 drbd_msg_put_info(from_attrs_err_to_txt(err)); 1459 goto fail; 1460 } 1461 1462 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) 1463 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; 1464 1465 new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); 1466 if (!new_plan) { 1467 retcode = ERR_NOMEM; 1468 goto fail; 1469 } 1470 1471 if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { 1472 retcode = ERR_MD_IDX_INVALID; 1473 goto fail; 1474 } 1475 1476 write_lock_irq(&global_state_lock); 1477 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); 1478 write_unlock_irq(&global_state_lock); 1479 if (retcode != NO_ERROR) 1480 goto fail; 1481 1482 rcu_read_lock(); 1483 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 1484 if (nc) { 1485 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { 1486 rcu_read_unlock(); 1487 retcode = ERR_STONITH_AND_PROT_A; 1488 goto fail; 1489 } 1490 } 1491 rcu_read_unlock(); 1492 1493 bdev = blkdev_get_by_path(new_disk_conf->backing_dev, 1494 FMODE_READ | FMODE_WRITE | FMODE_EXCL, device); 1495 if (IS_ERR(bdev)) { 1496 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, 1497 PTR_ERR(bdev)); 1498 retcode = ERR_OPEN_DISK; 1499 goto fail; 1500 } 1501 nbc->backing_bdev = bdev; 1502 1503 /* 1504 * meta_dev_idx >= 0: external fixed size, possibly multiple 1505 * drbd sharing one meta device. TODO in that case, paranoia 1506 * check that [md_bdev, meta_dev_idx] is not yet used by some 1507 * other drbd minor! (if you use drbd.conf + drbdadm, that 1508 * should check it for you already; but if you don't, or 1509 * someone fooled it, we need to double check here) 1510 */ 1511 bdev = blkdev_get_by_path(new_disk_conf->meta_dev, 1512 FMODE_READ | FMODE_WRITE | FMODE_EXCL, 1513 (new_disk_conf->meta_dev_idx < 0) ? 1514 (void *)device : (void *)drbd_m_holder); 1515 if (IS_ERR(bdev)) { 1516 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, 1517 PTR_ERR(bdev)); 1518 retcode = ERR_OPEN_MD_DISK; 1519 goto fail; 1520 } 1521 nbc->md_bdev = bdev; 1522 1523 if ((nbc->backing_bdev == nbc->md_bdev) != 1524 (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || 1525 new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { 1526 retcode = ERR_MD_IDX_INVALID; 1527 goto fail; 1528 } 1529 1530 resync_lru = lc_create("resync", drbd_bm_ext_cache, 1531 1, 61, sizeof(struct bm_extent), 1532 offsetof(struct bm_extent, lce)); 1533 if (!resync_lru) { 1534 retcode = ERR_NOMEM; 1535 goto fail; 1536 } 1537 1538 /* Read our meta data super block early. 1539 * This also sets other on-disk offsets. */ 1540 retcode = drbd_md_read(device, nbc); 1541 if (retcode != NO_ERROR) 1542 goto fail; 1543 1544 if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) 1545 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; 1546 if (new_disk_conf->al_extents > drbd_al_extents_max(nbc)) 1547 new_disk_conf->al_extents = drbd_al_extents_max(nbc); 1548 1549 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { 1550 drbd_err(device, "max capacity %llu smaller than disk size %llu\n", 1551 (unsigned long long) drbd_get_max_capacity(nbc), 1552 (unsigned long long) new_disk_conf->disk_size); 1553 retcode = ERR_DISK_TOO_SMALL; 1554 goto fail; 1555 } 1556 1557 if (new_disk_conf->meta_dev_idx < 0) { 1558 max_possible_sectors = DRBD_MAX_SECTORS_FLEX; 1559 /* at least one MB, otherwise it does not make sense */ 1560 min_md_device_sectors = (2<<10); 1561 } else { 1562 max_possible_sectors = DRBD_MAX_SECTORS; 1563 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1); 1564 } 1565 1566 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { 1567 retcode = ERR_MD_DISK_TOO_SMALL; 1568 drbd_warn(device, "refusing attach: md-device too small, " 1569 "at least %llu sectors needed for this meta-disk type\n", 1570 (unsigned long long) min_md_device_sectors); 1571 goto fail; 1572 } 1573 1574 /* Make sure the new disk is big enough 1575 * (we may currently be R_PRIMARY with no local disk...) */ 1576 if (drbd_get_max_capacity(nbc) < 1577 drbd_get_capacity(device->this_bdev)) { 1578 retcode = ERR_DISK_TOO_SMALL; 1579 goto fail; 1580 } 1581 1582 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 1583 1584 if (nbc->known_size > max_possible_sectors) { 1585 drbd_warn(device, "==> truncating very big lower level device " 1586 "to currently maximum possible %llu sectors <==\n", 1587 (unsigned long long) max_possible_sectors); 1588 if (new_disk_conf->meta_dev_idx >= 0) 1589 drbd_warn(device, "==>> using internal or flexible " 1590 "meta data may help <<==\n"); 1591 } 1592 1593 drbd_suspend_io(device); 1594 /* also wait for the last barrier ack. */ 1595 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 1596 * We need a way to either ignore barrier acks for barriers sent before a device 1597 * was attached, or a way to wait for all pending barrier acks to come in. 1598 * As barriers are counted per resource, 1599 * we'd need to suspend io on all devices of a resource. 1600 */ 1601 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); 1602 /* and for any other previously queued work */ 1603 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 1604 1605 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); 1606 retcode = rv; /* FIXME: Type mismatch. */ 1607 drbd_resume_io(device); 1608 if (rv < SS_SUCCESS) 1609 goto fail; 1610 1611 if (!get_ldev_if_state(device, D_ATTACHING)) 1612 goto force_diskless; 1613 1614 if (!device->bitmap) { 1615 if (drbd_bm_init(device)) { 1616 retcode = ERR_NOMEM; 1617 goto force_diskless_dec; 1618 } 1619 } 1620 1621 if (device->state.conn < C_CONNECTED && 1622 device->state.role == R_PRIMARY && 1623 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 1624 drbd_err(device, "Can only attach to data with current UUID=%016llX\n", 1625 (unsigned long long)device->ed_uuid); 1626 retcode = ERR_DATA_NOT_CURRENT; 1627 goto force_diskless_dec; 1628 } 1629 1630 /* Since we are diskless, fix the activity log first... */ 1631 if (drbd_check_al_size(device, new_disk_conf)) { 1632 retcode = ERR_NOMEM; 1633 goto force_diskless_dec; 1634 } 1635 1636 /* Prevent shrinking of consistent devices ! */ 1637 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 1638 drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { 1639 drbd_warn(device, "refusing to truncate a consistent device\n"); 1640 retcode = ERR_DISK_TOO_SMALL; 1641 goto force_diskless_dec; 1642 } 1643 1644 /* Reset the "barriers don't work" bits here, then force meta data to 1645 * be written, to ensure we determine if barriers are supported. */ 1646 if (new_disk_conf->md_flushes) 1647 clear_bit(MD_NO_FUA, &device->flags); 1648 else 1649 set_bit(MD_NO_FUA, &device->flags); 1650 1651 /* Point of no return reached. 1652 * Devices and memory are no longer released by error cleanup below. 1653 * now device takes over responsibility, and the state engine should 1654 * clean it up somewhere. */ 1655 D_ASSERT(device, device->ldev == NULL); 1656 device->ldev = nbc; 1657 device->resync = resync_lru; 1658 device->rs_plan_s = new_plan; 1659 nbc = NULL; 1660 resync_lru = NULL; 1661 new_disk_conf = NULL; 1662 new_plan = NULL; 1663 1664 drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); 1665 1666 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) 1667 set_bit(CRASHED_PRIMARY, &device->flags); 1668 else 1669 clear_bit(CRASHED_PRIMARY, &device->flags); 1670 1671 if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && 1672 !(device->state.role == R_PRIMARY && device->resource->susp_nod)) 1673 set_bit(CRASHED_PRIMARY, &device->flags); 1674 1675 device->send_cnt = 0; 1676 device->recv_cnt = 0; 1677 device->read_cnt = 0; 1678 device->writ_cnt = 0; 1679 1680 drbd_reconsider_max_bio_size(device); 1681 1682 /* If I am currently not R_PRIMARY, 1683 * but meta data primary indicator is set, 1684 * I just now recover from a hard crash, 1685 * and have been R_PRIMARY before that crash. 1686 * 1687 * Now, if I had no connection before that crash 1688 * (have been degraded R_PRIMARY), chances are that 1689 * I won't find my peer now either. 1690 * 1691 * In that case, and _only_ in that case, 1692 * we use the degr-wfc-timeout instead of the default, 1693 * so we can automatically recover from a crash of a 1694 * degraded but active "cluster" after a certain timeout. 1695 */ 1696 clear_bit(USE_DEGR_WFC_T, &device->flags); 1697 if (device->state.role != R_PRIMARY && 1698 drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && 1699 !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND)) 1700 set_bit(USE_DEGR_WFC_T, &device->flags); 1701 1702 dd = drbd_determine_dev_size(device, 0, NULL); 1703 if (dd <= DS_ERROR) { 1704 retcode = ERR_NOMEM_BITMAP; 1705 goto force_diskless_dec; 1706 } else if (dd == DS_GREW) 1707 set_bit(RESYNC_AFTER_NEG, &device->flags); 1708 1709 if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) || 1710 (test_bit(CRASHED_PRIMARY, &device->flags) && 1711 drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) { 1712 drbd_info(device, "Assuming that all blocks are out of sync " 1713 "(aka FullSync)\n"); 1714 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, 1715 "set_n_write from attaching", BM_LOCKED_MASK)) { 1716 retcode = ERR_IO_MD_DISK; 1717 goto force_diskless_dec; 1718 } 1719 } else { 1720 if (drbd_bitmap_io(device, &drbd_bm_read, 1721 "read from attaching", BM_LOCKED_MASK)) { 1722 retcode = ERR_IO_MD_DISK; 1723 goto force_diskless_dec; 1724 } 1725 } 1726 1727 if (_drbd_bm_total_weight(device) == drbd_bm_bits(device)) 1728 drbd_suspend_al(device); /* IO is still suspended here... */ 1729 1730 spin_lock_irq(&device->resource->req_lock); 1731 os = drbd_read_state(device); 1732 ns = os; 1733 /* If MDF_CONSISTENT is not set go into inconsistent state, 1734 otherwise investigate MDF_WasUpToDate... 1735 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, 1736 otherwise into D_CONSISTENT state. 1737 */ 1738 if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) { 1739 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE)) 1740 ns.disk = D_CONSISTENT; 1741 else 1742 ns.disk = D_OUTDATED; 1743 } else { 1744 ns.disk = D_INCONSISTENT; 1745 } 1746 1747 if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED)) 1748 ns.pdsk = D_OUTDATED; 1749 1750 rcu_read_lock(); 1751 if (ns.disk == D_CONSISTENT && 1752 (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE)) 1753 ns.disk = D_UP_TO_DATE; 1754 1755 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, 1756 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before 1757 this point, because drbd_request_state() modifies these 1758 flags. */ 1759 1760 if (rcu_dereference(device->ldev->disk_conf)->al_updates) 1761 device->ldev->md.flags &= ~MDF_AL_DISABLED; 1762 else 1763 device->ldev->md.flags |= MDF_AL_DISABLED; 1764 1765 rcu_read_unlock(); 1766 1767 /* In case we are C_CONNECTED postpone any decision on the new disk 1768 state after the negotiation phase. */ 1769 if (device->state.conn == C_CONNECTED) { 1770 device->new_state_tmp.i = ns.i; 1771 ns.i = os.i; 1772 ns.disk = D_NEGOTIATING; 1773 1774 /* We expect to receive up-to-date UUIDs soon. 1775 To avoid a race in receive_state, free p_uuid while 1776 holding req_lock. I.e. atomic with the state change */ 1777 kfree(device->p_uuid); 1778 device->p_uuid = NULL; 1779 } 1780 1781 rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL); 1782 spin_unlock_irq(&device->resource->req_lock); 1783 1784 if (rv < SS_SUCCESS) 1785 goto force_diskless_dec; 1786 1787 mod_timer(&device->request_timer, jiffies + HZ); 1788 1789 if (device->state.role == R_PRIMARY) 1790 device->ldev->md.uuid[UI_CURRENT] |= (u64)1; 1791 else 1792 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 1793 1794 drbd_md_mark_dirty(device); 1795 drbd_md_sync(device); 1796 1797 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 1798 put_ldev(device); 1799 conn_reconfig_done(first_peer_device(device)->connection); 1800 drbd_adm_finish(info, retcode); 1801 return 0; 1802 1803 force_diskless_dec: 1804 put_ldev(device); 1805 force_diskless: 1806 drbd_force_state(device, NS(disk, D_DISKLESS)); 1807 drbd_md_sync(device); 1808 fail: 1809 conn_reconfig_done(first_peer_device(device)->connection); 1810 if (nbc) { 1811 if (nbc->backing_bdev) 1812 blkdev_put(nbc->backing_bdev, 1813 FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1814 if (nbc->md_bdev) 1815 blkdev_put(nbc->md_bdev, 1816 FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1817 kfree(nbc); 1818 } 1819 kfree(new_disk_conf); 1820 lc_destroy(resync_lru); 1821 kfree(new_plan); 1822 1823 finish: 1824 drbd_adm_finish(info, retcode); 1825 return 0; 1826 } 1827 1828 static int adm_detach(struct drbd_device *device, int force) 1829 { 1830 enum drbd_state_rv retcode; 1831 int ret; 1832 1833 if (force) { 1834 set_bit(FORCE_DETACH, &device->flags); 1835 drbd_force_state(device, NS(disk, D_FAILED)); 1836 retcode = SS_SUCCESS; 1837 goto out; 1838 } 1839 1840 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ 1841 drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */ 1842 retcode = drbd_request_state(device, NS(disk, D_FAILED)); 1843 drbd_md_put_buffer(device); 1844 /* D_FAILED will transition to DISKLESS. */ 1845 ret = wait_event_interruptible(device->misc_wait, 1846 device->state.disk != D_FAILED); 1847 drbd_resume_io(device); 1848 if ((int)retcode == (int)SS_IS_DISKLESS) 1849 retcode = SS_NOTHING_TO_DO; 1850 if (ret) 1851 retcode = ERR_INTR; 1852 out: 1853 return retcode; 1854 } 1855 1856 /* Detaching the disk is a process in multiple stages. First we need to lock 1857 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. 1858 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all 1859 * internal references as well. 1860 * Only then we have finally detached. */ 1861 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) 1862 { 1863 enum drbd_ret_code retcode; 1864 struct detach_parms parms = { }; 1865 int err; 1866 1867 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 1868 if (!adm_ctx.reply_skb) 1869 return retcode; 1870 if (retcode != NO_ERROR) 1871 goto out; 1872 1873 if (info->attrs[DRBD_NLA_DETACH_PARMS]) { 1874 err = detach_parms_from_attrs(&parms, info); 1875 if (err) { 1876 retcode = ERR_MANDATORY_TAG; 1877 drbd_msg_put_info(from_attrs_err_to_txt(err)); 1878 goto out; 1879 } 1880 } 1881 1882 retcode = adm_detach(adm_ctx.device, parms.force_detach); 1883 out: 1884 drbd_adm_finish(info, retcode); 1885 return 0; 1886 } 1887 1888 static bool conn_resync_running(struct drbd_connection *connection) 1889 { 1890 struct drbd_peer_device *peer_device; 1891 bool rv = false; 1892 int vnr; 1893 1894 rcu_read_lock(); 1895 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1896 struct drbd_device *device = peer_device->device; 1897 if (device->state.conn == C_SYNC_SOURCE || 1898 device->state.conn == C_SYNC_TARGET || 1899 device->state.conn == C_PAUSED_SYNC_S || 1900 device->state.conn == C_PAUSED_SYNC_T) { 1901 rv = true; 1902 break; 1903 } 1904 } 1905 rcu_read_unlock(); 1906 1907 return rv; 1908 } 1909 1910 static bool conn_ov_running(struct drbd_connection *connection) 1911 { 1912 struct drbd_peer_device *peer_device; 1913 bool rv = false; 1914 int vnr; 1915 1916 rcu_read_lock(); 1917 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1918 struct drbd_device *device = peer_device->device; 1919 if (device->state.conn == C_VERIFY_S || 1920 device->state.conn == C_VERIFY_T) { 1921 rv = true; 1922 break; 1923 } 1924 } 1925 rcu_read_unlock(); 1926 1927 return rv; 1928 } 1929 1930 static enum drbd_ret_code 1931 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf) 1932 { 1933 struct drbd_peer_device *peer_device; 1934 int i; 1935 1936 if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) { 1937 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol) 1938 return ERR_NEED_APV_100; 1939 1940 if (new_net_conf->two_primaries != old_net_conf->two_primaries) 1941 return ERR_NEED_APV_100; 1942 1943 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg)) 1944 return ERR_NEED_APV_100; 1945 } 1946 1947 if (!new_net_conf->two_primaries && 1948 conn_highest_role(connection) == R_PRIMARY && 1949 conn_highest_peer(connection) == R_PRIMARY) 1950 return ERR_NEED_ALLOW_TWO_PRI; 1951 1952 if (new_net_conf->two_primaries && 1953 (new_net_conf->wire_protocol != DRBD_PROT_C)) 1954 return ERR_NOT_PROTO_C; 1955 1956 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 1957 struct drbd_device *device = peer_device->device; 1958 if (get_ldev(device)) { 1959 enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing; 1960 put_ldev(device); 1961 if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) 1962 return ERR_STONITH_AND_PROT_A; 1963 } 1964 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data) 1965 return ERR_DISCARD_IMPOSSIBLE; 1966 } 1967 1968 if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A) 1969 return ERR_CONG_NOT_PROTO_A; 1970 1971 return NO_ERROR; 1972 } 1973 1974 static enum drbd_ret_code 1975 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf) 1976 { 1977 static enum drbd_ret_code rv; 1978 struct drbd_peer_device *peer_device; 1979 int i; 1980 1981 rcu_read_lock(); 1982 rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); 1983 rcu_read_unlock(); 1984 1985 /* connection->volumes protected by genl_lock() here */ 1986 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 1987 struct drbd_device *device = peer_device->device; 1988 if (!device->bitmap) { 1989 if (drbd_bm_init(device)) 1990 return ERR_NOMEM; 1991 } 1992 } 1993 1994 return rv; 1995 } 1996 1997 struct crypto { 1998 struct crypto_hash *verify_tfm; 1999 struct crypto_hash *csums_tfm; 2000 struct crypto_hash *cram_hmac_tfm; 2001 struct crypto_hash *integrity_tfm; 2002 }; 2003 2004 static int 2005 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg) 2006 { 2007 if (!tfm_name[0]) 2008 return NO_ERROR; 2009 2010 *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC); 2011 if (IS_ERR(*tfm)) { 2012 *tfm = NULL; 2013 return err_alg; 2014 } 2015 2016 return NO_ERROR; 2017 } 2018 2019 static enum drbd_ret_code 2020 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) 2021 { 2022 char hmac_name[CRYPTO_MAX_ALG_NAME]; 2023 enum drbd_ret_code rv; 2024 2025 rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg, 2026 ERR_CSUMS_ALG); 2027 if (rv != NO_ERROR) 2028 return rv; 2029 rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg, 2030 ERR_VERIFY_ALG); 2031 if (rv != NO_ERROR) 2032 return rv; 2033 rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg, 2034 ERR_INTEGRITY_ALG); 2035 if (rv != NO_ERROR) 2036 return rv; 2037 if (new_net_conf->cram_hmac_alg[0] != 0) { 2038 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", 2039 new_net_conf->cram_hmac_alg); 2040 2041 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name, 2042 ERR_AUTH_ALG); 2043 } 2044 2045 return rv; 2046 } 2047 2048 static void free_crypto(struct crypto *crypto) 2049 { 2050 crypto_free_hash(crypto->cram_hmac_tfm); 2051 crypto_free_hash(crypto->integrity_tfm); 2052 crypto_free_hash(crypto->csums_tfm); 2053 crypto_free_hash(crypto->verify_tfm); 2054 } 2055 2056 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) 2057 { 2058 enum drbd_ret_code retcode; 2059 struct drbd_connection *connection; 2060 struct net_conf *old_net_conf, *new_net_conf = NULL; 2061 int err; 2062 int ovr; /* online verify running */ 2063 int rsr; /* re-sync running */ 2064 struct crypto crypto = { }; 2065 2066 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); 2067 if (!adm_ctx.reply_skb) 2068 return retcode; 2069 if (retcode != NO_ERROR) 2070 goto out; 2071 2072 connection = adm_ctx.connection; 2073 2074 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 2075 if (!new_net_conf) { 2076 retcode = ERR_NOMEM; 2077 goto out; 2078 } 2079 2080 conn_reconfig_start(connection); 2081 2082 mutex_lock(&connection->data.mutex); 2083 mutex_lock(&connection->resource->conf_update); 2084 old_net_conf = connection->net_conf; 2085 2086 if (!old_net_conf) { 2087 drbd_msg_put_info("net conf missing, try connect"); 2088 retcode = ERR_INVALID_REQUEST; 2089 goto fail; 2090 } 2091 2092 *new_net_conf = *old_net_conf; 2093 if (should_set_defaults(info)) 2094 set_net_conf_defaults(new_net_conf); 2095 2096 err = net_conf_from_attrs_for_change(new_net_conf, info); 2097 if (err && err != -ENOMSG) { 2098 retcode = ERR_MANDATORY_TAG; 2099 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2100 goto fail; 2101 } 2102 2103 retcode = check_net_options(connection, new_net_conf); 2104 if (retcode != NO_ERROR) 2105 goto fail; 2106 2107 /* re-sync running */ 2108 rsr = conn_resync_running(connection); 2109 if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) { 2110 retcode = ERR_CSUMS_RESYNC_RUNNING; 2111 goto fail; 2112 } 2113 2114 /* online verify running */ 2115 ovr = conn_ov_running(connection); 2116 if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) { 2117 retcode = ERR_VERIFY_RUNNING; 2118 goto fail; 2119 } 2120 2121 retcode = alloc_crypto(&crypto, new_net_conf); 2122 if (retcode != NO_ERROR) 2123 goto fail; 2124 2125 rcu_assign_pointer(connection->net_conf, new_net_conf); 2126 2127 if (!rsr) { 2128 crypto_free_hash(connection->csums_tfm); 2129 connection->csums_tfm = crypto.csums_tfm; 2130 crypto.csums_tfm = NULL; 2131 } 2132 if (!ovr) { 2133 crypto_free_hash(connection->verify_tfm); 2134 connection->verify_tfm = crypto.verify_tfm; 2135 crypto.verify_tfm = NULL; 2136 } 2137 2138 crypto_free_hash(connection->integrity_tfm); 2139 connection->integrity_tfm = crypto.integrity_tfm; 2140 if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) 2141 /* Do this without trying to take connection->data.mutex again. */ 2142 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE); 2143 2144 crypto_free_hash(connection->cram_hmac_tfm); 2145 connection->cram_hmac_tfm = crypto.cram_hmac_tfm; 2146 2147 mutex_unlock(&connection->resource->conf_update); 2148 mutex_unlock(&connection->data.mutex); 2149 synchronize_rcu(); 2150 kfree(old_net_conf); 2151 2152 if (connection->cstate >= C_WF_REPORT_PARAMS) { 2153 struct drbd_peer_device *peer_device; 2154 int vnr; 2155 2156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 2157 drbd_send_sync_param(peer_device); 2158 } 2159 2160 goto done; 2161 2162 fail: 2163 mutex_unlock(&connection->resource->conf_update); 2164 mutex_unlock(&connection->data.mutex); 2165 free_crypto(&crypto); 2166 kfree(new_net_conf); 2167 done: 2168 conn_reconfig_done(connection); 2169 out: 2170 drbd_adm_finish(info, retcode); 2171 return 0; 2172 } 2173 2174 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) 2175 { 2176 struct drbd_peer_device *peer_device; 2177 struct net_conf *old_net_conf, *new_net_conf = NULL; 2178 struct crypto crypto = { }; 2179 struct drbd_resource *resource; 2180 struct drbd_connection *connection; 2181 enum drbd_ret_code retcode; 2182 int i; 2183 int err; 2184 2185 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); 2186 2187 if (!adm_ctx.reply_skb) 2188 return retcode; 2189 if (retcode != NO_ERROR) 2190 goto out; 2191 if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { 2192 drbd_msg_put_info("connection endpoint(s) missing"); 2193 retcode = ERR_INVALID_REQUEST; 2194 goto out; 2195 } 2196 2197 /* No need for _rcu here. All reconfiguration is 2198 * strictly serialized on genl_lock(). We are protected against 2199 * concurrent reconfiguration/addition/deletion */ 2200 for_each_resource(resource, &drbd_resources) { 2201 for_each_connection(connection, resource) { 2202 if (nla_len(adm_ctx.my_addr) == connection->my_addr_len && 2203 !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr, 2204 connection->my_addr_len)) { 2205 retcode = ERR_LOCAL_ADDR; 2206 goto out; 2207 } 2208 2209 if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len && 2210 !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr, 2211 connection->peer_addr_len)) { 2212 retcode = ERR_PEER_ADDR; 2213 goto out; 2214 } 2215 } 2216 } 2217 2218 connection = first_connection(adm_ctx.resource); 2219 conn_reconfig_start(connection); 2220 2221 if (connection->cstate > C_STANDALONE) { 2222 retcode = ERR_NET_CONFIGURED; 2223 goto fail; 2224 } 2225 2226 /* allocation not in the IO path, drbdsetup / netlink process context */ 2227 new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL); 2228 if (!new_net_conf) { 2229 retcode = ERR_NOMEM; 2230 goto fail; 2231 } 2232 2233 set_net_conf_defaults(new_net_conf); 2234 2235 err = net_conf_from_attrs(new_net_conf, info); 2236 if (err && err != -ENOMSG) { 2237 retcode = ERR_MANDATORY_TAG; 2238 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2239 goto fail; 2240 } 2241 2242 retcode = check_net_options(connection, new_net_conf); 2243 if (retcode != NO_ERROR) 2244 goto fail; 2245 2246 retcode = alloc_crypto(&crypto, new_net_conf); 2247 if (retcode != NO_ERROR) 2248 goto fail; 2249 2250 ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; 2251 2252 drbd_flush_workqueue(&connection->sender_work); 2253 2254 mutex_lock(&adm_ctx.resource->conf_update); 2255 old_net_conf = connection->net_conf; 2256 if (old_net_conf) { 2257 retcode = ERR_NET_CONFIGURED; 2258 mutex_unlock(&adm_ctx.resource->conf_update); 2259 goto fail; 2260 } 2261 rcu_assign_pointer(connection->net_conf, new_net_conf); 2262 2263 conn_free_crypto(connection); 2264 connection->cram_hmac_tfm = crypto.cram_hmac_tfm; 2265 connection->integrity_tfm = crypto.integrity_tfm; 2266 connection->csums_tfm = crypto.csums_tfm; 2267 connection->verify_tfm = crypto.verify_tfm; 2268 2269 connection->my_addr_len = nla_len(adm_ctx.my_addr); 2270 memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len); 2271 connection->peer_addr_len = nla_len(adm_ctx.peer_addr); 2272 memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len); 2273 2274 mutex_unlock(&adm_ctx.resource->conf_update); 2275 2276 rcu_read_lock(); 2277 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2278 struct drbd_device *device = peer_device->device; 2279 device->send_cnt = 0; 2280 device->recv_cnt = 0; 2281 } 2282 rcu_read_unlock(); 2283 2284 retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 2285 2286 conn_reconfig_done(connection); 2287 drbd_adm_finish(info, retcode); 2288 return 0; 2289 2290 fail: 2291 free_crypto(&crypto); 2292 kfree(new_net_conf); 2293 2294 conn_reconfig_done(connection); 2295 out: 2296 drbd_adm_finish(info, retcode); 2297 return 0; 2298 } 2299 2300 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) 2301 { 2302 enum drbd_state_rv rv; 2303 2304 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2305 force ? CS_HARD : 0); 2306 2307 switch (rv) { 2308 case SS_NOTHING_TO_DO: 2309 break; 2310 case SS_ALREADY_STANDALONE: 2311 return SS_SUCCESS; 2312 case SS_PRIMARY_NOP: 2313 /* Our state checking code wants to see the peer outdated. */ 2314 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); 2315 2316 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */ 2317 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE); 2318 2319 break; 2320 case SS_CW_FAILED_BY_PEER: 2321 /* The peer probably wants to see us outdated. */ 2322 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, 2323 disk, D_OUTDATED), 0); 2324 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { 2325 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2326 CS_HARD); 2327 } 2328 break; 2329 default:; 2330 /* no special handling necessary */ 2331 } 2332 2333 if (rv >= SS_SUCCESS) { 2334 enum drbd_state_rv rv2; 2335 /* No one else can reconfigure the network while I am here. 2336 * The state handling only uses drbd_thread_stop_nowait(), 2337 * we want to really wait here until the receiver is no more. 2338 */ 2339 drbd_thread_stop(&connection->receiver); 2340 2341 /* Race breaker. This additional state change request may be 2342 * necessary, if this was a forced disconnect during a receiver 2343 * restart. We may have "killed" the receiver thread just 2344 * after drbd_receiver() returned. Typically, we should be 2345 * C_STANDALONE already, now, and this becomes a no-op. 2346 */ 2347 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE), 2348 CS_VERBOSE | CS_HARD); 2349 if (rv2 < SS_SUCCESS) 2350 drbd_err(connection, 2351 "unexpected rv2=%d in conn_try_disconnect()\n", 2352 rv2); 2353 } 2354 return rv; 2355 } 2356 2357 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) 2358 { 2359 struct disconnect_parms parms; 2360 struct drbd_connection *connection; 2361 enum drbd_state_rv rv; 2362 enum drbd_ret_code retcode; 2363 int err; 2364 2365 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); 2366 if (!adm_ctx.reply_skb) 2367 return retcode; 2368 if (retcode != NO_ERROR) 2369 goto fail; 2370 2371 connection = adm_ctx.connection; 2372 memset(&parms, 0, sizeof(parms)); 2373 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { 2374 err = disconnect_parms_from_attrs(&parms, info); 2375 if (err) { 2376 retcode = ERR_MANDATORY_TAG; 2377 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2378 goto fail; 2379 } 2380 } 2381 2382 rv = conn_try_disconnect(connection, parms.force_disconnect); 2383 if (rv < SS_SUCCESS) 2384 retcode = rv; /* FIXME: Type mismatch. */ 2385 else 2386 retcode = NO_ERROR; 2387 fail: 2388 drbd_adm_finish(info, retcode); 2389 return 0; 2390 } 2391 2392 void resync_after_online_grow(struct drbd_device *device) 2393 { 2394 int iass; /* I am sync source */ 2395 2396 drbd_info(device, "Resync of new storage after online grow\n"); 2397 if (device->state.role != device->state.peer) 2398 iass = (device->state.role == R_PRIMARY); 2399 else 2400 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); 2401 2402 if (iass) 2403 drbd_start_resync(device, C_SYNC_SOURCE); 2404 else 2405 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); 2406 } 2407 2408 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) 2409 { 2410 struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 2411 struct resize_parms rs; 2412 struct drbd_device *device; 2413 enum drbd_ret_code retcode; 2414 enum determine_dev_size dd; 2415 bool change_al_layout = false; 2416 enum dds_flags ddsf; 2417 sector_t u_size; 2418 int err; 2419 2420 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2421 if (!adm_ctx.reply_skb) 2422 return retcode; 2423 if (retcode != NO_ERROR) 2424 goto fail; 2425 2426 device = adm_ctx.device; 2427 if (!get_ldev(device)) { 2428 retcode = ERR_NO_DISK; 2429 goto fail; 2430 } 2431 2432 memset(&rs, 0, sizeof(struct resize_parms)); 2433 rs.al_stripes = device->ldev->md.al_stripes; 2434 rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4; 2435 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { 2436 err = resize_parms_from_attrs(&rs, info); 2437 if (err) { 2438 retcode = ERR_MANDATORY_TAG; 2439 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2440 goto fail_ldev; 2441 } 2442 } 2443 2444 if (device->state.conn > C_CONNECTED) { 2445 retcode = ERR_RESIZE_RESYNC; 2446 goto fail_ldev; 2447 } 2448 2449 if (device->state.role == R_SECONDARY && 2450 device->state.peer == R_SECONDARY) { 2451 retcode = ERR_NO_PRIMARY; 2452 goto fail_ldev; 2453 } 2454 2455 if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) { 2456 retcode = ERR_NEED_APV_93; 2457 goto fail_ldev; 2458 } 2459 2460 rcu_read_lock(); 2461 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; 2462 rcu_read_unlock(); 2463 if (u_size != (sector_t)rs.resize_size) { 2464 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL); 2465 if (!new_disk_conf) { 2466 retcode = ERR_NOMEM; 2467 goto fail_ldev; 2468 } 2469 } 2470 2471 if (device->ldev->md.al_stripes != rs.al_stripes || 2472 device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { 2473 u32 al_size_k = rs.al_stripes * rs.al_stripe_size; 2474 2475 if (al_size_k > (16 * 1024 * 1024)) { 2476 retcode = ERR_MD_LAYOUT_TOO_BIG; 2477 goto fail_ldev; 2478 } 2479 2480 if (al_size_k < MD_32kB_SECT/2) { 2481 retcode = ERR_MD_LAYOUT_TOO_SMALL; 2482 goto fail_ldev; 2483 } 2484 2485 if (device->state.conn != C_CONNECTED) { 2486 retcode = ERR_MD_LAYOUT_CONNECTED; 2487 goto fail_ldev; 2488 } 2489 2490 change_al_layout = true; 2491 } 2492 2493 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) 2494 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 2495 2496 if (new_disk_conf) { 2497 mutex_lock(&device->resource->conf_update); 2498 old_disk_conf = device->ldev->disk_conf; 2499 *new_disk_conf = *old_disk_conf; 2500 new_disk_conf->disk_size = (sector_t)rs.resize_size; 2501 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 2502 mutex_unlock(&device->resource->conf_update); 2503 synchronize_rcu(); 2504 kfree(old_disk_conf); 2505 } 2506 2507 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); 2508 dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL); 2509 drbd_md_sync(device); 2510 put_ldev(device); 2511 if (dd == DS_ERROR) { 2512 retcode = ERR_NOMEM_BITMAP; 2513 goto fail; 2514 } else if (dd == DS_ERROR_SPACE_MD) { 2515 retcode = ERR_MD_LAYOUT_NO_FIT; 2516 goto fail; 2517 } else if (dd == DS_ERROR_SHRINK) { 2518 retcode = ERR_IMPLICIT_SHRINK; 2519 goto fail; 2520 } 2521 2522 if (device->state.conn == C_CONNECTED) { 2523 if (dd == DS_GREW) 2524 set_bit(RESIZE_PENDING, &device->flags); 2525 2526 drbd_send_uuids(first_peer_device(device)); 2527 drbd_send_sizes(first_peer_device(device), 1, ddsf); 2528 } 2529 2530 fail: 2531 drbd_adm_finish(info, retcode); 2532 return 0; 2533 2534 fail_ldev: 2535 put_ldev(device); 2536 goto fail; 2537 } 2538 2539 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) 2540 { 2541 enum drbd_ret_code retcode; 2542 struct res_opts res_opts; 2543 int err; 2544 2545 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); 2546 if (!adm_ctx.reply_skb) 2547 return retcode; 2548 if (retcode != NO_ERROR) 2549 goto fail; 2550 2551 res_opts = adm_ctx.resource->res_opts; 2552 if (should_set_defaults(info)) 2553 set_res_opts_defaults(&res_opts); 2554 2555 err = res_opts_from_attrs(&res_opts, info); 2556 if (err && err != -ENOMSG) { 2557 retcode = ERR_MANDATORY_TAG; 2558 drbd_msg_put_info(from_attrs_err_to_txt(err)); 2559 goto fail; 2560 } 2561 2562 err = set_resource_options(adm_ctx.resource, &res_opts); 2563 if (err) { 2564 retcode = ERR_INVALID_REQUEST; 2565 if (err == -ENOMEM) 2566 retcode = ERR_NOMEM; 2567 } 2568 2569 fail: 2570 drbd_adm_finish(info, retcode); 2571 return 0; 2572 } 2573 2574 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) 2575 { 2576 struct drbd_device *device; 2577 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 2578 2579 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2580 if (!adm_ctx.reply_skb) 2581 return retcode; 2582 if (retcode != NO_ERROR) 2583 goto out; 2584 2585 device = adm_ctx.device; 2586 2587 /* If there is still bitmap IO pending, probably because of a previous 2588 * resync just being finished, wait for it before requesting a new resync. 2589 * Also wait for it's after_state_ch(). */ 2590 drbd_suspend_io(device); 2591 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 2592 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 2593 2594 /* If we happen to be C_STANDALONE R_SECONDARY, just change to 2595 * D_INCONSISTENT, and set all bits in the bitmap. Otherwise, 2596 * try to start a resync handshake as sync target for full sync. 2597 */ 2598 if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) { 2599 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); 2600 if (retcode >= SS_SUCCESS) { 2601 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, 2602 "set_n_write from invalidate", BM_LOCKED_MASK)) 2603 retcode = ERR_IO_MD_DISK; 2604 } 2605 } else 2606 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); 2607 drbd_resume_io(device); 2608 2609 out: 2610 drbd_adm_finish(info, retcode); 2611 return 0; 2612 } 2613 2614 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, 2615 union drbd_state mask, union drbd_state val) 2616 { 2617 enum drbd_ret_code retcode; 2618 2619 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2620 if (!adm_ctx.reply_skb) 2621 return retcode; 2622 if (retcode != NO_ERROR) 2623 goto out; 2624 2625 retcode = drbd_request_state(adm_ctx.device, mask, val); 2626 out: 2627 drbd_adm_finish(info, retcode); 2628 return 0; 2629 } 2630 2631 static int drbd_bmio_set_susp_al(struct drbd_device *device) 2632 { 2633 int rv; 2634 2635 rv = drbd_bmio_set_n_write(device); 2636 drbd_suspend_al(device); 2637 return rv; 2638 } 2639 2640 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) 2641 { 2642 int retcode; /* drbd_ret_code, drbd_state_rv */ 2643 struct drbd_device *device; 2644 2645 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2646 if (!adm_ctx.reply_skb) 2647 return retcode; 2648 if (retcode != NO_ERROR) 2649 goto out; 2650 2651 device = adm_ctx.device; 2652 2653 /* If there is still bitmap IO pending, probably because of a previous 2654 * resync just being finished, wait for it before requesting a new resync. 2655 * Also wait for it's after_state_ch(). */ 2656 drbd_suspend_io(device); 2657 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 2658 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 2659 2660 /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits 2661 * in the bitmap. Otherwise, try to start a resync handshake 2662 * as sync source for full sync. 2663 */ 2664 if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) { 2665 /* The peer will get a resync upon connect anyways. Just make that 2666 into a full resync. */ 2667 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT)); 2668 if (retcode >= SS_SUCCESS) { 2669 if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, 2670 "set_n_write from invalidate_peer", 2671 BM_LOCKED_SET_ALLOWED)) 2672 retcode = ERR_IO_MD_DISK; 2673 } 2674 } else 2675 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); 2676 drbd_resume_io(device); 2677 2678 out: 2679 drbd_adm_finish(info, retcode); 2680 return 0; 2681 } 2682 2683 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) 2684 { 2685 enum drbd_ret_code retcode; 2686 2687 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2688 if (!adm_ctx.reply_skb) 2689 return retcode; 2690 if (retcode != NO_ERROR) 2691 goto out; 2692 2693 if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO) 2694 retcode = ERR_PAUSE_IS_SET; 2695 out: 2696 drbd_adm_finish(info, retcode); 2697 return 0; 2698 } 2699 2700 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) 2701 { 2702 union drbd_dev_state s; 2703 enum drbd_ret_code retcode; 2704 2705 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2706 if (!adm_ctx.reply_skb) 2707 return retcode; 2708 if (retcode != NO_ERROR) 2709 goto out; 2710 2711 if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { 2712 s = adm_ctx.device->state; 2713 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { 2714 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : 2715 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; 2716 } else { 2717 retcode = ERR_PAUSE_IS_CLEAR; 2718 } 2719 } 2720 2721 out: 2722 drbd_adm_finish(info, retcode); 2723 return 0; 2724 } 2725 2726 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) 2727 { 2728 return drbd_adm_simple_request_state(skb, info, NS(susp, 1)); 2729 } 2730 2731 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) 2732 { 2733 struct drbd_device *device; 2734 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 2735 2736 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2737 if (!adm_ctx.reply_skb) 2738 return retcode; 2739 if (retcode != NO_ERROR) 2740 goto out; 2741 2742 device = adm_ctx.device; 2743 if (test_bit(NEW_CUR_UUID, &device->flags)) { 2744 drbd_uuid_new_current(device); 2745 clear_bit(NEW_CUR_UUID, &device->flags); 2746 } 2747 drbd_suspend_io(device); 2748 retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); 2749 if (retcode == SS_SUCCESS) { 2750 if (device->state.conn < C_CONNECTED) 2751 tl_clear(first_peer_device(device)->connection); 2752 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED) 2753 tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO); 2754 } 2755 drbd_resume_io(device); 2756 2757 out: 2758 drbd_adm_finish(info, retcode); 2759 return 0; 2760 } 2761 2762 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) 2763 { 2764 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); 2765 } 2766 2767 static int nla_put_drbd_cfg_context(struct sk_buff *skb, 2768 struct drbd_resource *resource, 2769 struct drbd_connection *connection, 2770 struct drbd_device *device) 2771 { 2772 struct nlattr *nla; 2773 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); 2774 if (!nla) 2775 goto nla_put_failure; 2776 if (device && 2777 nla_put_u32(skb, T_ctx_volume, device->vnr)) 2778 goto nla_put_failure; 2779 if (nla_put_string(skb, T_ctx_resource_name, resource->name)) 2780 goto nla_put_failure; 2781 if (connection) { 2782 if (connection->my_addr_len && 2783 nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr)) 2784 goto nla_put_failure; 2785 if (connection->peer_addr_len && 2786 nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr)) 2787 goto nla_put_failure; 2788 } 2789 nla_nest_end(skb, nla); 2790 return 0; 2791 2792 nla_put_failure: 2793 if (nla) 2794 nla_nest_cancel(skb, nla); 2795 return -EMSGSIZE; 2796 } 2797 2798 /* 2799 * Return the connection of @resource if @resource has exactly one connection. 2800 */ 2801 static struct drbd_connection *the_only_connection(struct drbd_resource *resource) 2802 { 2803 struct list_head *connections = &resource->connections; 2804 2805 if (list_empty(connections) || connections->next->next != connections) 2806 return NULL; 2807 return list_first_entry(&resource->connections, struct drbd_connection, connections); 2808 } 2809 2810 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, 2811 const struct sib_info *sib) 2812 { 2813 struct drbd_resource *resource = device->resource; 2814 struct state_info *si = NULL; /* for sizeof(si->member); */ 2815 struct nlattr *nla; 2816 int got_ldev; 2817 int err = 0; 2818 int exclude_sensitive; 2819 2820 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen 2821 * to. So we better exclude_sensitive information. 2822 * 2823 * If sib == NULL, this is drbd_adm_get_status, executed synchronously 2824 * in the context of the requesting user process. Exclude sensitive 2825 * information, unless current has superuser. 2826 * 2827 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and 2828 * relies on the current implementation of netlink_dump(), which 2829 * executes the dump callback successively from netlink_recvmsg(), 2830 * always in the context of the receiving process */ 2831 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); 2832 2833 got_ldev = get_ldev(device); 2834 2835 /* We need to add connection name and volume number information still. 2836 * Minor number is in drbd_genlmsghdr. */ 2837 if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device)) 2838 goto nla_put_failure; 2839 2840 if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive)) 2841 goto nla_put_failure; 2842 2843 rcu_read_lock(); 2844 if (got_ldev) { 2845 struct disk_conf *disk_conf; 2846 2847 disk_conf = rcu_dereference(device->ldev->disk_conf); 2848 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive); 2849 } 2850 if (!err) { 2851 struct net_conf *nc; 2852 2853 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 2854 if (nc) 2855 err = net_conf_to_skb(skb, nc, exclude_sensitive); 2856 } 2857 rcu_read_unlock(); 2858 if (err) 2859 goto nla_put_failure; 2860 2861 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); 2862 if (!nla) 2863 goto nla_put_failure; 2864 if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || 2865 nla_put_u32(skb, T_current_state, device->state.i) || 2866 nla_put_u64(skb, T_ed_uuid, device->ed_uuid) || 2867 nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) || 2868 nla_put_u64(skb, T_send_cnt, device->send_cnt) || 2869 nla_put_u64(skb, T_recv_cnt, device->recv_cnt) || 2870 nla_put_u64(skb, T_read_cnt, device->read_cnt) || 2871 nla_put_u64(skb, T_writ_cnt, device->writ_cnt) || 2872 nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) || 2873 nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) || 2874 nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) || 2875 nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) || 2876 nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt))) 2877 goto nla_put_failure; 2878 2879 if (got_ldev) { 2880 int err; 2881 2882 spin_lock_irq(&device->ldev->md.uuid_lock); 2883 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid); 2884 spin_unlock_irq(&device->ldev->md.uuid_lock); 2885 2886 if (err) 2887 goto nla_put_failure; 2888 2889 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) || 2890 nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) || 2891 nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device))) 2892 goto nla_put_failure; 2893 if (C_SYNC_SOURCE <= device->state.conn && 2894 C_PAUSED_SYNC_T >= device->state.conn) { 2895 if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) || 2896 nla_put_u64(skb, T_bits_rs_failed, device->rs_failed)) 2897 goto nla_put_failure; 2898 } 2899 } 2900 2901 if (sib) { 2902 switch(sib->sib_reason) { 2903 case SIB_SYNC_PROGRESS: 2904 case SIB_GET_STATUS_REPLY: 2905 break; 2906 case SIB_STATE_CHANGE: 2907 if (nla_put_u32(skb, T_prev_state, sib->os.i) || 2908 nla_put_u32(skb, T_new_state, sib->ns.i)) 2909 goto nla_put_failure; 2910 break; 2911 case SIB_HELPER_POST: 2912 if (nla_put_u32(skb, T_helper_exit_code, 2913 sib->helper_exit_code)) 2914 goto nla_put_failure; 2915 /* fall through */ 2916 case SIB_HELPER_PRE: 2917 if (nla_put_string(skb, T_helper, sib->helper_name)) 2918 goto nla_put_failure; 2919 break; 2920 } 2921 } 2922 nla_nest_end(skb, nla); 2923 2924 if (0) 2925 nla_put_failure: 2926 err = -EMSGSIZE; 2927 if (got_ldev) 2928 put_ldev(device); 2929 return err; 2930 } 2931 2932 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) 2933 { 2934 enum drbd_ret_code retcode; 2935 int err; 2936 2937 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 2938 if (!adm_ctx.reply_skb) 2939 return retcode; 2940 if (retcode != NO_ERROR) 2941 goto out; 2942 2943 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL); 2944 if (err) { 2945 nlmsg_free(adm_ctx.reply_skb); 2946 return err; 2947 } 2948 out: 2949 drbd_adm_finish(info, retcode); 2950 return 0; 2951 } 2952 2953 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) 2954 { 2955 struct drbd_device *device; 2956 struct drbd_genlmsghdr *dh; 2957 struct drbd_resource *pos = (struct drbd_resource *)cb->args[0]; 2958 struct drbd_resource *resource = NULL; 2959 struct drbd_resource *tmp; 2960 unsigned volume = cb->args[1]; 2961 2962 /* Open coded, deferred, iteration: 2963 * for_each_resource_safe(resource, tmp, &drbd_resources) { 2964 * connection = "first connection of resource or undefined"; 2965 * idr_for_each_entry(&resource->devices, device, i) { 2966 * ... 2967 * } 2968 * } 2969 * where resource is cb->args[0]; 2970 * and i is cb->args[1]; 2971 * 2972 * cb->args[2] indicates if we shall loop over all resources, 2973 * or just dump all volumes of a single resource. 2974 * 2975 * This may miss entries inserted after this dump started, 2976 * or entries deleted before they are reached. 2977 * 2978 * We need to make sure the device won't disappear while 2979 * we are looking at it, and revalidate our iterators 2980 * on each iteration. 2981 */ 2982 2983 /* synchronize with conn_create()/drbd_destroy_connection() */ 2984 rcu_read_lock(); 2985 /* revalidate iterator position */ 2986 for_each_resource_rcu(tmp, &drbd_resources) { 2987 if (pos == NULL) { 2988 /* first iteration */ 2989 pos = tmp; 2990 resource = pos; 2991 break; 2992 } 2993 if (tmp == pos) { 2994 resource = pos; 2995 break; 2996 } 2997 } 2998 if (resource) { 2999 next_resource: 3000 device = idr_get_next(&resource->devices, &volume); 3001 if (!device) { 3002 /* No more volumes to dump on this resource. 3003 * Advance resource iterator. */ 3004 pos = list_entry_rcu(resource->resources.next, 3005 struct drbd_resource, resources); 3006 /* Did we dump any volume of this resource yet? */ 3007 if (volume != 0) { 3008 /* If we reached the end of the list, 3009 * or only a single resource dump was requested, 3010 * we are done. */ 3011 if (&pos->resources == &drbd_resources || cb->args[2]) 3012 goto out; 3013 volume = 0; 3014 resource = pos; 3015 goto next_resource; 3016 } 3017 } 3018 3019 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3020 cb->nlh->nlmsg_seq, &drbd_genl_family, 3021 NLM_F_MULTI, DRBD_ADM_GET_STATUS); 3022 if (!dh) 3023 goto out; 3024 3025 if (!device) { 3026 /* This is a connection without a single volume. 3027 * Suprisingly enough, it may have a network 3028 * configuration. */ 3029 struct drbd_connection *connection; 3030 3031 dh->minor = -1U; 3032 dh->ret_code = NO_ERROR; 3033 connection = the_only_connection(resource); 3034 if (nla_put_drbd_cfg_context(skb, resource, connection, NULL)) 3035 goto cancel; 3036 if (connection) { 3037 struct net_conf *nc; 3038 3039 nc = rcu_dereference(connection->net_conf); 3040 if (nc && net_conf_to_skb(skb, nc, 1) != 0) 3041 goto cancel; 3042 } 3043 goto done; 3044 } 3045 3046 D_ASSERT(device, device->vnr == volume); 3047 D_ASSERT(device, device->resource == resource); 3048 3049 dh->minor = device_to_minor(device); 3050 dh->ret_code = NO_ERROR; 3051 3052 if (nla_put_status_info(skb, device, NULL)) { 3053 cancel: 3054 genlmsg_cancel(skb, dh); 3055 goto out; 3056 } 3057 done: 3058 genlmsg_end(skb, dh); 3059 } 3060 3061 out: 3062 rcu_read_unlock(); 3063 /* where to start the next iteration */ 3064 cb->args[0] = (long)pos; 3065 cb->args[1] = (pos == resource) ? volume + 1 : 0; 3066 3067 /* No more resources/volumes/minors found results in an empty skb. 3068 * Which will terminate the dump. */ 3069 return skb->len; 3070 } 3071 3072 /* 3073 * Request status of all resources, or of all volumes within a single resource. 3074 * 3075 * This is a dump, as the answer may not fit in a single reply skb otherwise. 3076 * Which means we cannot use the family->attrbuf or other such members, because 3077 * dump is NOT protected by the genl_lock(). During dump, we only have access 3078 * to the incoming skb, and need to opencode "parsing" of the nlattr payload. 3079 * 3080 * Once things are setup properly, we call into get_one_status(). 3081 */ 3082 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) 3083 { 3084 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; 3085 struct nlattr *nla; 3086 const char *resource_name; 3087 struct drbd_resource *resource; 3088 int maxtype; 3089 3090 /* Is this a followup call? */ 3091 if (cb->args[0]) { 3092 /* ... of a single resource dump, 3093 * and the resource iterator has been advanced already? */ 3094 if (cb->args[2] && cb->args[2] != cb->args[0]) 3095 return 0; /* DONE. */ 3096 goto dump; 3097 } 3098 3099 /* First call (from netlink_dump_start). We need to figure out 3100 * which resource(s) the user wants us to dump. */ 3101 nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen), 3102 nlmsg_attrlen(cb->nlh, hdrlen), 3103 DRBD_NLA_CFG_CONTEXT); 3104 3105 /* No explicit context given. Dump all. */ 3106 if (!nla) 3107 goto dump; 3108 maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; 3109 nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); 3110 if (IS_ERR(nla)) 3111 return PTR_ERR(nla); 3112 /* context given, but no name present? */ 3113 if (!nla) 3114 return -EINVAL; 3115 resource_name = nla_data(nla); 3116 if (!*resource_name) 3117 return -ENODEV; 3118 resource = drbd_find_resource(resource_name); 3119 if (!resource) 3120 return -ENODEV; 3121 3122 kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */ 3123 3124 /* prime iterators, and set "filter" mode mark: 3125 * only dump this connection. */ 3126 cb->args[0] = (long)resource; 3127 /* cb->args[1] = 0; passed in this way. */ 3128 cb->args[2] = (long)resource; 3129 3130 dump: 3131 return get_one_status(skb, cb); 3132 } 3133 3134 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) 3135 { 3136 enum drbd_ret_code retcode; 3137 struct timeout_parms tp; 3138 int err; 3139 3140 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 3141 if (!adm_ctx.reply_skb) 3142 return retcode; 3143 if (retcode != NO_ERROR) 3144 goto out; 3145 3146 tp.timeout_type = 3147 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : 3148 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED : 3149 UT_DEFAULT; 3150 3151 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp); 3152 if (err) { 3153 nlmsg_free(adm_ctx.reply_skb); 3154 return err; 3155 } 3156 out: 3157 drbd_adm_finish(info, retcode); 3158 return 0; 3159 } 3160 3161 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) 3162 { 3163 struct drbd_device *device; 3164 enum drbd_ret_code retcode; 3165 struct start_ov_parms parms; 3166 3167 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 3168 if (!adm_ctx.reply_skb) 3169 return retcode; 3170 if (retcode != NO_ERROR) 3171 goto out; 3172 3173 device = adm_ctx.device; 3174 3175 /* resume from last known position, if possible */ 3176 parms.ov_start_sector = device->ov_start_sector; 3177 parms.ov_stop_sector = ULLONG_MAX; 3178 if (info->attrs[DRBD_NLA_START_OV_PARMS]) { 3179 int err = start_ov_parms_from_attrs(&parms, info); 3180 if (err) { 3181 retcode = ERR_MANDATORY_TAG; 3182 drbd_msg_put_info(from_attrs_err_to_txt(err)); 3183 goto out; 3184 } 3185 } 3186 /* w_make_ov_request expects position to be aligned */ 3187 device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); 3188 device->ov_stop_sector = parms.ov_stop_sector; 3189 3190 /* If there is still bitmap IO pending, e.g. previous resync or verify 3191 * just being finished, wait for it before requesting a new resync. */ 3192 drbd_suspend_io(device); 3193 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 3194 retcode = drbd_request_state(device, NS(conn, C_VERIFY_S)); 3195 drbd_resume_io(device); 3196 out: 3197 drbd_adm_finish(info, retcode); 3198 return 0; 3199 } 3200 3201 3202 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) 3203 { 3204 struct drbd_device *device; 3205 enum drbd_ret_code retcode; 3206 int skip_initial_sync = 0; 3207 int err; 3208 struct new_c_uuid_parms args; 3209 3210 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 3211 if (!adm_ctx.reply_skb) 3212 return retcode; 3213 if (retcode != NO_ERROR) 3214 goto out_nolock; 3215 3216 device = adm_ctx.device; 3217 memset(&args, 0, sizeof(args)); 3218 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { 3219 err = new_c_uuid_parms_from_attrs(&args, info); 3220 if (err) { 3221 retcode = ERR_MANDATORY_TAG; 3222 drbd_msg_put_info(from_attrs_err_to_txt(err)); 3223 goto out_nolock; 3224 } 3225 } 3226 3227 mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */ 3228 3229 if (!get_ldev(device)) { 3230 retcode = ERR_NO_DISK; 3231 goto out; 3232 } 3233 3234 /* this is "skip initial sync", assume to be clean */ 3235 if (device->state.conn == C_CONNECTED && 3236 first_peer_device(device)->connection->agreed_pro_version >= 90 && 3237 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { 3238 drbd_info(device, "Preparing to skip initial sync\n"); 3239 skip_initial_sync = 1; 3240 } else if (device->state.conn != C_STANDALONE) { 3241 retcode = ERR_CONNECTED; 3242 goto out_dec; 3243 } 3244 3245 drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ 3246 drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */ 3247 3248 if (args.clear_bm) { 3249 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 3250 "clear_n_write from new_c_uuid", BM_LOCKED_MASK); 3251 if (err) { 3252 drbd_err(device, "Writing bitmap failed with %d\n", err); 3253 retcode = ERR_IO_MD_DISK; 3254 } 3255 if (skip_initial_sync) { 3256 drbd_send_uuids_skip_initial_sync(first_peer_device(device)); 3257 _drbd_uuid_set(device, UI_BITMAP, 0); 3258 drbd_print_uuids(device, "cleared bitmap UUID"); 3259 spin_lock_irq(&device->resource->req_lock); 3260 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3261 CS_VERBOSE, NULL); 3262 spin_unlock_irq(&device->resource->req_lock); 3263 } 3264 } 3265 3266 drbd_md_sync(device); 3267 out_dec: 3268 put_ldev(device); 3269 out: 3270 mutex_unlock(device->state_mutex); 3271 out_nolock: 3272 drbd_adm_finish(info, retcode); 3273 return 0; 3274 } 3275 3276 static enum drbd_ret_code 3277 drbd_check_resource_name(const char *name) 3278 { 3279 if (!name || !name[0]) { 3280 drbd_msg_put_info("resource name missing"); 3281 return ERR_MANDATORY_TAG; 3282 } 3283 /* if we want to use these in sysfs/configfs/debugfs some day, 3284 * we must not allow slashes */ 3285 if (strchr(name, '/')) { 3286 drbd_msg_put_info("invalid resource name"); 3287 return ERR_INVALID_REQUEST; 3288 } 3289 return NO_ERROR; 3290 } 3291 3292 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) 3293 { 3294 enum drbd_ret_code retcode; 3295 struct res_opts res_opts; 3296 int err; 3297 3298 retcode = drbd_adm_prepare(skb, info, 0); 3299 if (!adm_ctx.reply_skb) 3300 return retcode; 3301 if (retcode != NO_ERROR) 3302 goto out; 3303 3304 set_res_opts_defaults(&res_opts); 3305 err = res_opts_from_attrs(&res_opts, info); 3306 if (err && err != -ENOMSG) { 3307 retcode = ERR_MANDATORY_TAG; 3308 drbd_msg_put_info(from_attrs_err_to_txt(err)); 3309 goto out; 3310 } 3311 3312 retcode = drbd_check_resource_name(adm_ctx.resource_name); 3313 if (retcode != NO_ERROR) 3314 goto out; 3315 3316 if (adm_ctx.resource) { 3317 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { 3318 retcode = ERR_INVALID_REQUEST; 3319 drbd_msg_put_info("resource exists"); 3320 } 3321 /* else: still NO_ERROR */ 3322 goto out; 3323 } 3324 3325 if (!conn_create(adm_ctx.resource_name, &res_opts)) 3326 retcode = ERR_NOMEM; 3327 out: 3328 drbd_adm_finish(info, retcode); 3329 return 0; 3330 } 3331 3332 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) 3333 { 3334 struct drbd_genlmsghdr *dh = info->userhdr; 3335 enum drbd_ret_code retcode; 3336 3337 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); 3338 if (!adm_ctx.reply_skb) 3339 return retcode; 3340 if (retcode != NO_ERROR) 3341 goto out; 3342 3343 if (dh->minor > MINORMASK) { 3344 drbd_msg_put_info("requested minor out of range"); 3345 retcode = ERR_INVALID_REQUEST; 3346 goto out; 3347 } 3348 if (adm_ctx.volume > DRBD_VOLUME_MAX) { 3349 drbd_msg_put_info("requested volume id out of range"); 3350 retcode = ERR_INVALID_REQUEST; 3351 goto out; 3352 } 3353 3354 /* drbd_adm_prepare made sure already 3355 * that first_peer_device(device)->connection and device->vnr match the request. */ 3356 if (adm_ctx.device) { 3357 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) 3358 retcode = ERR_MINOR_EXISTS; 3359 /* else: still NO_ERROR */ 3360 goto out; 3361 } 3362 3363 retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume); 3364 out: 3365 drbd_adm_finish(info, retcode); 3366 return 0; 3367 } 3368 3369 static enum drbd_ret_code adm_del_minor(struct drbd_device *device) 3370 { 3371 if (device->state.disk == D_DISKLESS && 3372 /* no need to be device->state.conn == C_STANDALONE && 3373 * we may want to delete a minor from a live replication group. 3374 */ 3375 device->state.role == R_SECONDARY) { 3376 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS), 3377 CS_VERBOSE + CS_WAIT_COMPLETE); 3378 drbd_delete_device(device); 3379 return NO_ERROR; 3380 } else 3381 return ERR_MINOR_CONFIGURED; 3382 } 3383 3384 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info) 3385 { 3386 enum drbd_ret_code retcode; 3387 3388 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); 3389 if (!adm_ctx.reply_skb) 3390 return retcode; 3391 if (retcode != NO_ERROR) 3392 goto out; 3393 3394 retcode = adm_del_minor(adm_ctx.device); 3395 out: 3396 drbd_adm_finish(info, retcode); 3397 return 0; 3398 } 3399 3400 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) 3401 { 3402 struct drbd_resource *resource; 3403 struct drbd_connection *connection; 3404 struct drbd_device *device; 3405 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 3406 unsigned i; 3407 3408 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); 3409 if (!adm_ctx.reply_skb) 3410 return retcode; 3411 if (retcode != NO_ERROR) 3412 goto out; 3413 3414 resource = adm_ctx.resource; 3415 /* demote */ 3416 for_each_connection(connection, resource) { 3417 struct drbd_peer_device *peer_device; 3418 3419 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 3420 retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0); 3421 if (retcode < SS_SUCCESS) { 3422 drbd_msg_put_info("failed to demote"); 3423 goto out; 3424 } 3425 } 3426 3427 retcode = conn_try_disconnect(connection, 0); 3428 if (retcode < SS_SUCCESS) { 3429 drbd_msg_put_info("failed to disconnect"); 3430 goto out; 3431 } 3432 } 3433 3434 /* detach */ 3435 idr_for_each_entry(&resource->devices, device, i) { 3436 retcode = adm_detach(device, 0); 3437 if (retcode < SS_SUCCESS || retcode > NO_ERROR) { 3438 drbd_msg_put_info("failed to detach"); 3439 goto out; 3440 } 3441 } 3442 3443 /* If we reach this, all volumes (of this connection) are Secondary, 3444 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have 3445 * actually stopped, state handling only does drbd_thread_stop_nowait(). */ 3446 for_each_connection(connection, resource) 3447 drbd_thread_stop(&connection->worker); 3448 3449 /* Now, nothing can fail anymore */ 3450 3451 /* delete volumes */ 3452 idr_for_each_entry(&resource->devices, device, i) { 3453 retcode = adm_del_minor(device); 3454 if (retcode != NO_ERROR) { 3455 /* "can not happen" */ 3456 drbd_msg_put_info("failed to delete volume"); 3457 goto out; 3458 } 3459 } 3460 3461 list_del_rcu(&resource->resources); 3462 synchronize_rcu(); 3463 drbd_free_resource(resource); 3464 retcode = NO_ERROR; 3465 3466 out: 3467 drbd_adm_finish(info, retcode); 3468 return 0; 3469 } 3470 3471 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) 3472 { 3473 struct drbd_resource *resource; 3474 struct drbd_connection *connection; 3475 enum drbd_ret_code retcode; 3476 3477 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); 3478 if (!adm_ctx.reply_skb) 3479 return retcode; 3480 if (retcode != NO_ERROR) 3481 goto out; 3482 3483 resource = adm_ctx.resource; 3484 for_each_connection(connection, resource) { 3485 if (connection->cstate > C_STANDALONE) { 3486 retcode = ERR_NET_CONFIGURED; 3487 goto out; 3488 } 3489 } 3490 if (!idr_is_empty(&resource->devices)) { 3491 retcode = ERR_RES_IN_USE; 3492 goto out; 3493 } 3494 3495 list_del_rcu(&resource->resources); 3496 for_each_connection(connection, resource) 3497 drbd_thread_stop(&connection->worker); 3498 synchronize_rcu(); 3499 drbd_free_resource(resource); 3500 retcode = NO_ERROR; 3501 out: 3502 drbd_adm_finish(info, retcode); 3503 return 0; 3504 } 3505 3506 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) 3507 { 3508 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ 3509 struct sk_buff *msg; 3510 struct drbd_genlmsghdr *d_out; 3511 unsigned seq; 3512 int err = -ENOMEM; 3513 3514 if (sib->sib_reason == SIB_SYNC_PROGRESS) { 3515 if (time_after(jiffies, device->rs_last_bcast + HZ)) 3516 device->rs_last_bcast = jiffies; 3517 else 3518 return; 3519 } 3520 3521 seq = atomic_inc_return(&drbd_genl_seq); 3522 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 3523 if (!msg) 3524 goto failed; 3525 3526 err = -EMSGSIZE; 3527 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); 3528 if (!d_out) /* cannot happen, but anyways. */ 3529 goto nla_put_failure; 3530 d_out->minor = device_to_minor(device); 3531 d_out->ret_code = NO_ERROR; 3532 3533 if (nla_put_status_info(msg, device, sib)) 3534 goto nla_put_failure; 3535 genlmsg_end(msg, d_out); 3536 err = drbd_genl_multicast_events(msg, 0); 3537 /* msg has been consumed or freed in netlink_broadcast() */ 3538 if (err && err != -ESRCH) 3539 goto failed; 3540 3541 return; 3542 3543 nla_put_failure: 3544 nlmsg_free(msg); 3545 failed: 3546 drbd_err(device, "Error %d while broadcasting event. " 3547 "Event seq:%u sib_reason:%u\n", 3548 err, seq, sib->sib_reason); 3549 } 3550