Lines Matching +full:count +full:- +full:threshold
4 * Copyright (C) 2012-2014 Nodalink, EURL.
13 * See the COPYING file in the top-level directory.
25 #include "qapi/qapi-events-block.h"
36 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
38 #define QUORUM_OPT_REWRITE "rewrite-corrupted"
39 #define QUORUM_OPT_READ_PATTERN "read-pattern"
43 uint8_t h[HASH_LENGTH]; /* SHA-256 hash */
75 int num_children; /* children count */
79 int threshold; /* if less than threshold children reads gave the member
90 bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
129 int count; /* number of completed AIOCB */ member
132 int rewrite_count; /* number of replica to rewrite: count down to
150 g_free(acb->qcrs); in quorum_aio_finalize()
156 return !memcmp(a->h, b->h, HASH_LENGTH); in quorum_sha256_compare()
161 return a->l == b->l; in quorum_64bits_compare()
169 BDRVQuorumState *s = bs->opaque; in quorum_aio_get()
184 acb->qcrs = g_new0(QuorumChildRequest, s->num_children); in quorum_aio_get()
185 for (i = 0; i < s->num_children; i++) { in quorum_aio_get()
186 acb->qcrs[i].buf = NULL; in quorum_aio_get()
187 acb->qcrs[i].ret = 0; in quorum_aio_get()
188 acb->qcrs[i].parent = acb; in quorum_aio_get()
202 msg = strerror(-ret); in quorum_report_bad()
206 end_sector - start_sector); in quorum_report_bad()
211 const char *reference = bdrv_get_device_or_node_name(acb->bs); in quorum_report_failure()
212 int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; in quorum_report_failure()
213 int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes, in quorum_report_failure()
217 end_sector - start_sector); in quorum_report_failure()
224 BDRVQuorumState *s = acb->bs->opaque; in quorum_has_too_much_io_failed()
226 if (acb->success_count < s->threshold) { in quorum_has_too_much_io_failed()
227 acb->vote_ret = quorum_vote_error(acb); in quorum_has_too_much_io_failed()
238 assert(dest->niov == source->niov); in quorum_copy_qiov()
239 assert(dest->size == source->size); in quorum_copy_qiov()
240 for (i = 0; i < source->niov; i++) { in quorum_copy_qiov()
241 assert(dest->iov[i].iov_len == source->iov[i].iov_len); in quorum_copy_qiov()
242 memcpy(dest->iov[i].iov_base, in quorum_copy_qiov()
243 source->iov[i].iov_base, in quorum_copy_qiov()
244 source->iov[i].iov_len); in quorum_copy_qiov()
250 QuorumAIOCB *acb = sacb->parent; in quorum_report_bad_acb()
251 QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; in quorum_report_bad_acb()
252 quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret); in quorum_report_bad_acb()
262 QLIST_FOREACH(version, &acb->votes.vote_list, next) { in quorum_report_bad_versions()
263 if (acb->votes.compare(&version->value, value)) { in quorum_report_bad_versions()
266 QLIST_FOREACH(item, &version->items, next) { in quorum_report_bad_versions()
267 quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes, in quorum_report_bad_versions()
268 s->children[item->index]->bs->node_name, 0); in quorum_report_bad_versions()
274 * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
280 QuorumAIOCB *acb = co->acb; in quorum_rewrite_entry()
281 BDRVQuorumState *s = acb->bs->opaque; in quorum_rewrite_entry()
287 bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes, in quorum_rewrite_entry()
288 acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED); in quorum_rewrite_entry()
291 acb->rewrite_count--; in quorum_rewrite_entry()
292 if (!acb->rewrite_count) { in quorum_rewrite_entry()
293 qemu_coroutine_enter_if_inactive(acb->co); in quorum_rewrite_entry()
302 int count = 0; in quorum_rewrite_bad_versions() local
304 /* first count the number of bad versions: done first to avoid concurrency in quorum_rewrite_bad_versions()
307 QLIST_FOREACH(version, &acb->votes.vote_list, next) { in quorum_rewrite_bad_versions()
308 if (acb->votes.compare(&version->value, value)) { in quorum_rewrite_bad_versions()
311 QLIST_FOREACH(item, &version->items, next) { in quorum_rewrite_bad_versions()
312 count++; in quorum_rewrite_bad_versions()
316 /* quorum_rewrite_entry will count down this to zero */ in quorum_rewrite_bad_versions()
317 acb->rewrite_count = count; in quorum_rewrite_bad_versions()
320 QLIST_FOREACH(version, &acb->votes.vote_list, next) { in quorum_rewrite_bad_versions()
321 if (acb->votes.compare(&version->value, value)) { in quorum_rewrite_bad_versions()
324 QLIST_FOREACH(item, &version->items, next) { in quorum_rewrite_bad_versions()
328 .idx = item->index, in quorum_rewrite_bad_versions()
337 return count; in quorum_rewrite_bad_versions()
348 QLIST_FOREACH(v, &votes->vote_list, next) { in quorum_count_vote()
349 if (votes->compare(&v->value, value)) { in quorum_count_vote()
358 QLIST_INIT(&version->items); in quorum_count_vote()
359 memcpy(&version->value, value, sizeof(version->value)); in quorum_count_vote()
360 version->index = index; in quorum_count_vote()
361 version->vote_count = 0; in quorum_count_vote()
362 QLIST_INSERT_HEAD(&votes->vote_list, version, next); in quorum_count_vote()
365 version->vote_count++; in quorum_count_vote()
368 item->index = index; in quorum_count_vote()
369 QLIST_INSERT_HEAD(&version->items, item, next); in quorum_count_vote()
377 QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) { in quorum_free_vote_list()
379 QLIST_FOREACH_SAFE(item, &version->items, next, next_item) { in quorum_free_vote_list()
389 QEMUIOVector *qiov = &acb->qcrs[i].qiov; in quorum_compute_hash()
390 size_t len = sizeof(hash->h); in quorum_compute_hash()
391 uint8_t *data = hash->h; in quorum_compute_hash()
393 /* XXX - would be nice if we could pass in the Error ** in quorum_compute_hash()
397 qiov->iov, qiov->niov, in quorum_compute_hash()
400 return -EINVAL; in quorum_compute_hash()
411 QLIST_FOREACH(candidate, &votes->vote_list, next) { in quorum_get_vote_winner()
412 if (candidate->vote_count > max) { in quorum_get_vote_winner()
413 max = candidate->vote_count; in quorum_get_vote_winner()
432 assert(a->niov == b->niov); in quorum_iovec_compare()
433 for (i = 0; i < a->niov; i++) { in quorum_iovec_compare()
434 assert(a->iov[i].iov_len == b->iov[i].iov_len); in quorum_iovec_compare()
435 result = memcmp(a->iov[i].iov_base, in quorum_iovec_compare()
436 b->iov[i].iov_base, in quorum_iovec_compare()
437 a->iov[i].iov_len); in quorum_iovec_compare()
448 BDRVQuorumState *s = acb->bs->opaque; in quorum_compare()
452 if (s->is_blkverify) { in quorum_compare()
454 if (offset != -1) { in quorum_compare()
457 acb->offset, acb->bytes, acb->offset + offset); in quorum_compare()
469 BDRVQuorumState *s = acb->bs->opaque; in quorum_vote_error()
479 for (i = 0; i < s->num_children; i++) { in quorum_vote_error()
480 ret = acb->qcrs[i].ret; in quorum_vote_error()
490 ret = winner->value.l; in quorum_vote_error()
503 BDRVQuorumState *s = acb->bs->opaque; in quorum_vote()
511 for (i = 0; i < s->num_children; i++) { in quorum_vote()
512 if (!acb->qcrs[i].ret) { in quorum_vote()
517 assert(i < s->num_children); in quorum_vote()
522 for (j = i + 1; j < s->num_children; j++) { in quorum_vote()
523 if (acb->qcrs[j].ret) { in quorum_vote()
526 quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov); in quorum_vote()
534 quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); in quorum_vote()
539 for (i = 0; i < s->num_children; i++) { in quorum_vote()
540 if (acb->qcrs[i].ret) { in quorum_vote()
546 acb->vote_ret = ret; in quorum_vote()
549 quorum_count_vote(&acb->votes, &hash, i); in quorum_vote()
553 winner = quorum_get_vote_winner(&acb->votes); in quorum_vote()
555 /* if the winner count is smaller than threshold the read fails */ in quorum_vote()
556 if (winner->vote_count < s->threshold) { in quorum_vote()
558 acb->vote_ret = -EIO; in quorum_vote()
563 quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov); in quorum_vote()
566 quorum_report_bad_versions(s, acb, &winner->value); in quorum_vote()
569 if (s->rewrite_corrupted) { in quorum_vote()
570 quorum_rewrite_bad_versions(acb, &winner->value); in quorum_vote()
575 quorum_free_vote_list(&acb->votes); in quorum_vote()
579 * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
585 QuorumAIOCB *acb = co->acb; in read_quorum_children_entry()
586 BDRVQuorumState *s = acb->bs->opaque; in read_quorum_children_entry()
587 int i = co->idx; in read_quorum_children_entry()
588 QuorumChildRequest *sacb = &acb->qcrs[i]; in read_quorum_children_entry()
590 sacb->bs = s->children[i]->bs; in read_quorum_children_entry()
591 sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes, in read_quorum_children_entry()
592 &acb->qcrs[i].qiov, 0); in read_quorum_children_entry()
594 if (sacb->ret == 0) { in read_quorum_children_entry()
595 acb->success_count++; in read_quorum_children_entry()
597 quorum_report_bad_acb(sacb, sacb->ret); in read_quorum_children_entry()
600 acb->count++; in read_quorum_children_entry()
601 assert(acb->count <= s->num_children); in read_quorum_children_entry()
602 assert(acb->success_count <= s->num_children); in read_quorum_children_entry()
605 if (acb->count == s->num_children) { in read_quorum_children_entry()
606 qemu_coroutine_enter_if_inactive(acb->co); in read_quorum_children_entry()
612 BDRVQuorumState *s = acb->bs->opaque; in read_quorum_children()
615 acb->children_read = s->num_children; in read_quorum_children()
616 for (i = 0; i < s->num_children; i++) { in read_quorum_children()
617 acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size); in read_quorum_children()
618 qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); in read_quorum_children()
619 qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf); in read_quorum_children()
622 for (i = 0; i < s->num_children; i++) { in read_quorum_children()
633 while (acb->count < s->num_children) { in read_quorum_children()
639 for (i = 0; i < s->num_children; i++) { in read_quorum_children()
640 qemu_vfree(acb->qcrs[i].buf); in read_quorum_children()
641 qemu_iovec_destroy(&acb->qcrs[i].qiov); in read_quorum_children()
644 while (acb->rewrite_count) { in read_quorum_children()
648 return acb->vote_ret; in read_quorum_children()
653 BDRVQuorumState *s = acb->bs->opaque; in read_fifo_child()
658 n = acb->children_read++; in read_fifo_child()
659 acb->qcrs[n].bs = s->children[n]->bs; in read_fifo_child()
660 ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, in read_fifo_child()
661 acb->qiov, 0); in read_fifo_child()
663 quorum_report_bad_acb(&acb->qcrs[n], ret); in read_fifo_child()
665 } while (ret < 0 && acb->children_read < s->num_children); in read_fifo_child()
667 /* FIXME: rewrite failed children if acb->children_read > 1? */ in read_fifo_child()
676 BDRVQuorumState *s = bs->opaque; in quorum_co_preadv()
680 acb->is_read = true; in quorum_co_preadv()
681 acb->children_read = 0; in quorum_co_preadv()
683 if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { in quorum_co_preadv()
694 * This function can count as GRAPH_RDLOCK because quorum_co_pwritev() holds the
700 QuorumAIOCB *acb = co->acb; in write_quorum_entry()
701 BDRVQuorumState *s = acb->bs->opaque; in write_quorum_entry()
702 int i = co->idx; in write_quorum_entry()
703 QuorumChildRequest *sacb = &acb->qcrs[i]; in write_quorum_entry()
705 sacb->bs = s->children[i]->bs; in write_quorum_entry()
706 if (acb->flags & BDRV_REQ_ZERO_WRITE) { in write_quorum_entry()
707 sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset, in write_quorum_entry()
708 acb->bytes, acb->flags); in write_quorum_entry()
710 sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, in write_quorum_entry()
711 acb->qiov, acb->flags); in write_quorum_entry()
713 if (sacb->ret == 0) { in write_quorum_entry()
714 acb->success_count++; in write_quorum_entry()
716 quorum_report_bad_acb(sacb, sacb->ret); in write_quorum_entry()
718 acb->count++; in write_quorum_entry()
719 assert(acb->count <= s->num_children); in write_quorum_entry()
720 assert(acb->success_count <= s->num_children); in write_quorum_entry()
723 if (acb->count == s->num_children) { in write_quorum_entry()
724 qemu_coroutine_enter_if_inactive(acb->co); in write_quorum_entry()
732 BDRVQuorumState *s = bs->opaque; in quorum_co_pwritev()
736 for (i = 0; i < s->num_children; i++) { in quorum_co_pwritev()
747 while (acb->count < s->num_children) { in quorum_co_pwritev()
753 ret = acb->vote_ret; in quorum_co_pwritev()
770 BDRVQuorumState *s = bs->opaque; in quorum_co_getlength()
775 result = bdrv_co_getlength(s->children[0]->bs); in quorum_co_getlength()
779 for (i = 1; i < s->num_children; i++) { in quorum_co_getlength()
780 int64_t value = bdrv_co_getlength(s->children[i]->bs); in quorum_co_getlength()
785 return -EIO; in quorum_co_getlength()
794 BDRVQuorumState *s = bs->opaque; in quorum_co_flush()
805 for (i = 0; i < s->num_children; i++) { in quorum_co_flush()
806 result = bdrv_co_flush(s->children[i]->bs); in quorum_co_flush()
809 s->children[i]->bs->node_name, result); in quorum_co_flush()
817 if (success_count >= s->threshold) { in quorum_co_flush()
821 result = winner->value.l; in quorum_co_flush()
831 BDRVQuorumState *s = bs->opaque; in quorum_recurse_can_replace()
834 for (i = 0; i < s->num_children; i++) { in quorum_recurse_can_replace()
862 if (s->children[i]->bs == to_replace) { in quorum_recurse_can_replace()
873 return QLIST_FIRST(&to_replace->parents) == s->children[i] && in quorum_recurse_can_replace()
874 QLIST_NEXT(s->children[i], next_parent) == NULL; in quorum_recurse_can_replace()
881 static int quorum_valid_threshold(int threshold, int num_children, Error **errp) in quorum_valid_threshold() argument
884 if (threshold < 1) { in quorum_valid_threshold()
886 "vote-threshold", "a value >= 1"); in quorum_valid_threshold()
887 return -ERANGE; in quorum_valid_threshold()
890 if (threshold > num_children) { in quorum_valid_threshold()
891 error_setg(errp, "threshold may not exceed children count"); in quorum_valid_threshold()
892 return -ERANGE; in quorum_valid_threshold()
928 BDRVQuorumState *s = bs->opaque; in quorum_refresh_flags()
931 bs->supported_zero_flags = in quorum_refresh_flags()
934 for (i = 0; i < s->num_children; i++) { in quorum_refresh_flags()
935 bs->supported_zero_flags &= s->children[i]->bs->supported_zero_flags; in quorum_refresh_flags()
938 bs->supported_zero_flags |= BDRV_REQ_WRITE_UNCHANGED; in quorum_refresh_flags()
944 BDRVQuorumState *s = bs->opaque; in quorum_open()
953 /* count how many different children are present */ in quorum_open()
954 s->num_children = qdict_array_entries(options, "children."); in quorum_open()
955 if (s->num_children < 0) { in quorum_open()
957 ret = -EINVAL; in quorum_open()
960 if (s->num_children < 1) { in quorum_open()
962 ret = -EINVAL; in quorum_open()
968 ret = -EINVAL; in quorum_open()
972 s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); in quorum_open()
973 /* and validate it against s->num_children */ in quorum_open()
974 ret = quorum_valid_threshold(s->threshold, s->num_children, errp); in quorum_open()
984 -EINVAL, NULL); in quorum_open()
987 error_setg(errp, "Please set read-pattern as fifo or quorum"); in quorum_open()
990 s->read_pattern = ret; in quorum_open()
992 if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { in quorum_open()
993 s->is_blkverify = qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false); in quorum_open()
994 if (s->is_blkverify && (s->num_children != 2 || s->threshold != 2)) { in quorum_open()
996 "exactly two files and vote-threshold is 2"); in quorum_open()
997 ret = -EINVAL; in quorum_open()
1001 s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, in quorum_open()
1003 if (s->rewrite_corrupted && s->is_blkverify) { in quorum_open()
1005 "rewrite-corrupted=on cannot be used with blkverify=on"); in quorum_open()
1006 ret = -EINVAL; in quorum_open()
1012 s->children = g_new0(BdrvChild *, s->num_children); in quorum_open()
1013 opened = g_new0(bool, s->num_children); in quorum_open()
1015 for (i = 0; i < s->num_children; i++) { in quorum_open()
1020 s->children[i] = bdrv_open_child(NULL, options, indexstr, bs, in quorum_open()
1023 if (!s->children[i]) { in quorum_open()
1024 ret = -EINVAL; in quorum_open()
1030 s->next_child_index = s->num_children; in quorum_open()
1032 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; in quorum_open()
1041 for (i = 0; i < s->num_children; i++) { in quorum_open()
1045 bdrv_unref_child(bs, s->children[i]); in quorum_open()
1048 g_free(s->children); in quorum_open()
1057 BDRVQuorumState *s = bs->opaque; in quorum_close()
1061 for (i = 0; i < s->num_children; i++) { in quorum_close()
1062 bdrv_unref_child(bs, s->children[i]); in quorum_close()
1066 g_free(s->children); in quorum_close()
1072 BDRVQuorumState *s = bs->opaque; in quorum_add_child()
1077 if (s->is_blkverify) { in quorum_add_child()
1082 assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); in quorum_add_child()
1083 if (s->num_children == INT_MAX / sizeof(BdrvChild *) || in quorum_add_child()
1084 s->next_child_index == UINT_MAX) { in quorum_add_child()
1089 ret = snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index); in quorum_add_child()
1094 s->next_child_index++; in quorum_add_child()
1102 s->next_child_index--; in quorum_add_child()
1105 s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); in quorum_add_child()
1106 s->children[s->num_children++] = child; in quorum_add_child()
1113 BDRVQuorumState *s = bs->opaque; in quorum_del_child()
1117 for (i = 0; i < s->num_children; i++) { in quorum_del_child()
1118 if (s->children[i] == child) { in quorum_del_child()
1124 assert(i < s->num_children); in quorum_del_child()
1126 if (s->num_children <= s->threshold) { in quorum_del_child()
1128 "The number of children cannot be lower than the vote threshold %d", in quorum_del_child()
1129 s->threshold); in quorum_del_child()
1133 /* We know now that num_children > threshold, so blkverify must be false */ in quorum_del_child()
1134 assert(!s->is_blkverify); in quorum_del_child()
1136 snprintf(indexstr, INDEXSTR_LEN, "children.%u", s->next_child_index - 1); in quorum_del_child()
1137 if (!strncmp(child->name, indexstr, INDEXSTR_LEN)) { in quorum_del_child()
1138 s->next_child_index--; in quorum_del_child()
1142 memmove(&s->children[i], &s->children[i + 1], in quorum_del_child()
1143 (s->num_children - i - 1) * sizeof(BdrvChild *)); in quorum_del_child()
1144 s->children = g_renew(BdrvChild *, s->children, --s->num_children); in quorum_del_child()
1154 BDRVQuorumState *s = bs->opaque; in quorum_gather_child_options()
1163 * (s->next_child_index) that is incremented each time a new child in quorum_gather_child_options()
1170 * Therefore, we have to create a new gap-less enumeration here in quorum_gather_child_options()
1184 for (i = 0; i < s->num_children; i++) { in quorum_gather_child_options()
1186 qobject_ref(s->children[i]->bs->full_open_options)); in quorum_gather_child_options()
1206 BDRVQuorumState *s = bs->opaque; in quorum_child_perm()
1209 if (s->rewrite_corrupted) { in quorum_child_perm()
1230 int64_t offset, int64_t count, in quorum_co_block_status() argument
1233 BDRVQuorumState *s = bs->opaque; in quorum_co_block_status()
1235 int64_t pnum_zero = count; in quorum_co_block_status()
1238 for (i = 0; i < s->num_children; i++) { in quorum_co_block_status()
1240 ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, in quorum_co_block_status()
1241 want_zero, offset, count, in quorum_co_block_status()
1244 quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, in quorum_co_block_status()
1245 s->children[i]->bs->node_name, ret); in quorum_co_block_status()
1246 pnum_data = count; in quorum_co_block_status()