1 /* 2 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2016-2017 Milan Broz 4 * Copyright (C) 2016-2017 Mikulas Patocka 5 * 6 * This file is released under the GPL. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/device-mapper.h> 11 #include <linux/dm-io.h> 12 #include <linux/vmalloc.h> 13 #include <linux/sort.h> 14 #include <linux/rbtree.h> 15 #include <linux/delay.h> 16 #include <linux/random.h> 17 #include <crypto/hash.h> 18 #include <crypto/skcipher.h> 19 #include <linux/async_tx.h> 20 #include "dm-bufio.h" 21 22 #define DM_MSG_PREFIX "integrity" 23 24 #define DEFAULT_INTERLEAVE_SECTORS 32768 25 #define DEFAULT_JOURNAL_SIZE_FACTOR 7 26 #define DEFAULT_BUFFER_SECTORS 128 27 #define DEFAULT_JOURNAL_WATERMARK 50 28 #define DEFAULT_SYNC_MSEC 10000 29 #define DEFAULT_MAX_JOURNAL_SECTORS 131072 30 #define MIN_LOG2_INTERLEAVE_SECTORS 3 31 #define MAX_LOG2_INTERLEAVE_SECTORS 31 32 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 33 34 /* 35 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 36 * so it should not be enabled in the official kernel 37 */ 38 //#define DEBUG_PRINT 39 //#define INTERNAL_VERIFY 40 41 /* 42 * On disk structures 43 */ 44 45 #define SB_MAGIC "integrt" 46 #define SB_VERSION 1 47 #define SB_SECTORS 8 48 #define MAX_SECTORS_PER_BLOCK 8 49 50 struct superblock { 51 __u8 magic[8]; 52 __u8 version; 53 __u8 log2_interleave_sectors; 54 __u16 integrity_tag_size; 55 __u32 journal_sections; 56 __u64 provided_data_sectors; /* userspace uses this value */ 57 __u32 flags; 58 __u8 log2_sectors_per_block; 59 }; 60 61 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 62 63 #define JOURNAL_ENTRY_ROUNDUP 8 64 65 typedef __u64 commit_id_t; 66 #define JOURNAL_MAC_PER_SECTOR 8 67 68 struct journal_entry { 69 union { 70 struct { 71 __u32 sector_lo; 72 __u32 sector_hi; 73 } s; 74 __u64 sector; 75 } u; 76 commit_id_t last_bytes[0]; 77 /* __u8 tag[0]; */ 78 }; 79 80 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) 81 82 #if BITS_PER_LONG == 64 83 #define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0) 84 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 85 #elif defined(CONFIG_LBDAF) 86 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32((x) >> 32); } while (0) 87 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 88 #else 89 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32(0); } while (0) 90 #define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo) 91 #endif 92 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) 93 #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0) 94 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) 95 #define journal_entry_set_inprogress(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0) 96 97 #define JOURNAL_BLOCK_SECTORS 8 98 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t)) 99 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS) 100 101 struct journal_sector { 102 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR]; 103 __u8 mac[JOURNAL_MAC_PER_SECTOR]; 104 commit_id_t commit_id; 105 }; 106 107 #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK])) 108 109 #define METADATA_PADDING_SECTORS 8 110 111 #define N_COMMIT_IDS 4 112 113 static unsigned char prev_commit_seq(unsigned char seq) 114 { 115 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS; 116 } 117 118 static unsigned char next_commit_seq(unsigned char seq) 119 { 120 return (seq + 1) % N_COMMIT_IDS; 121 } 122 123 /* 124 * In-memory structures 125 */ 126 127 struct journal_node { 128 struct rb_node node; 129 sector_t sector; 130 }; 131 132 struct alg_spec { 133 char *alg_string; 134 char *key_string; 135 __u8 *key; 136 unsigned key_size; 137 }; 138 139 struct dm_integrity_c { 140 struct dm_dev *dev; 141 unsigned tag_size; 142 __s8 log2_tag_size; 143 sector_t start; 144 mempool_t *journal_io_mempool; 145 struct dm_io_client *io; 146 struct dm_bufio_client *bufio; 147 struct workqueue_struct *metadata_wq; 148 struct superblock *sb; 149 unsigned journal_pages; 150 struct page_list *journal; 151 struct page_list *journal_io; 152 struct page_list *journal_xor; 153 154 struct crypto_skcipher *journal_crypt; 155 struct scatterlist **journal_scatterlist; 156 struct scatterlist **journal_io_scatterlist; 157 struct skcipher_request **sk_requests; 158 159 struct crypto_shash *journal_mac; 160 161 struct journal_node *journal_tree; 162 struct rb_root journal_tree_root; 163 164 sector_t provided_data_sectors; 165 166 unsigned short journal_entry_size; 167 unsigned char journal_entries_per_sector; 168 unsigned char journal_section_entries; 169 unsigned short journal_section_sectors; 170 unsigned journal_sections; 171 unsigned journal_entries; 172 sector_t device_sectors; 173 unsigned initial_sectors; 174 unsigned metadata_run; 175 __s8 log2_metadata_run; 176 __u8 log2_buffer_sectors; 177 __u8 sectors_per_block; 178 179 unsigned char mode; 180 bool suspending; 181 182 int failed; 183 184 struct crypto_shash *internal_hash; 185 186 /* these variables are locked with endio_wait.lock */ 187 struct rb_root in_progress; 188 wait_queue_head_t endio_wait; 189 struct workqueue_struct *wait_wq; 190 191 unsigned char commit_seq; 192 commit_id_t commit_ids[N_COMMIT_IDS]; 193 194 unsigned committed_section; 195 unsigned n_committed_sections; 196 197 unsigned uncommitted_section; 198 unsigned n_uncommitted_sections; 199 200 unsigned free_section; 201 unsigned char free_section_entry; 202 unsigned free_sectors; 203 204 unsigned free_sectors_threshold; 205 206 struct workqueue_struct *commit_wq; 207 struct work_struct commit_work; 208 209 struct workqueue_struct *writer_wq; 210 struct work_struct writer_work; 211 212 struct bio_list flush_bio_list; 213 214 unsigned long autocommit_jiffies; 215 struct timer_list autocommit_timer; 216 unsigned autocommit_msec; 217 218 wait_queue_head_t copy_to_journal_wait; 219 220 struct completion crypto_backoff; 221 222 bool journal_uptodate; 223 bool just_formatted; 224 225 struct alg_spec internal_hash_alg; 226 struct alg_spec journal_crypt_alg; 227 struct alg_spec journal_mac_alg; 228 229 atomic64_t number_of_mismatches; 230 }; 231 232 struct dm_integrity_range { 233 sector_t logical_sector; 234 unsigned n_sectors; 235 struct rb_node node; 236 }; 237 238 struct dm_integrity_io { 239 struct work_struct work; 240 241 struct dm_integrity_c *ic; 242 bool write; 243 bool fua; 244 245 struct dm_integrity_range range; 246 247 sector_t metadata_block; 248 unsigned metadata_offset; 249 250 atomic_t in_flight; 251 blk_status_t bi_status; 252 253 struct completion *completion; 254 255 struct gendisk *orig_bi_disk; 256 u8 orig_bi_partno; 257 bio_end_io_t *orig_bi_end_io; 258 struct bio_integrity_payload *orig_bi_integrity; 259 struct bvec_iter orig_bi_iter; 260 }; 261 262 struct journal_completion { 263 struct dm_integrity_c *ic; 264 atomic_t in_flight; 265 struct completion comp; 266 }; 267 268 struct journal_io { 269 struct dm_integrity_range range; 270 struct journal_completion *comp; 271 }; 272 273 static struct kmem_cache *journal_io_cache; 274 275 #define JOURNAL_IO_MEMPOOL 32 276 277 #ifdef DEBUG_PRINT 278 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) 279 static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) 280 { 281 va_list args; 282 va_start(args, msg); 283 vprintk(msg, args); 284 va_end(args); 285 if (len) 286 pr_cont(":"); 287 while (len) { 288 pr_cont(" %02x", *bytes); 289 bytes++; 290 len--; 291 } 292 pr_cont("\n"); 293 } 294 #define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__) 295 #else 296 #define DEBUG_print(x, ...) do { } while (0) 297 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) 298 #endif 299 300 /* 301 * DM Integrity profile, protection is performed layer above (dm-crypt) 302 */ 303 static const struct blk_integrity_profile dm_integrity_profile = { 304 .name = "DM-DIF-EXT-TAG", 305 .generate_fn = NULL, 306 .verify_fn = NULL, 307 }; 308 309 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); 310 static void integrity_bio_wait(struct work_struct *w); 311 static void dm_integrity_dtr(struct dm_target *ti); 312 313 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) 314 { 315 if (err == -EILSEQ) 316 atomic64_inc(&ic->number_of_mismatches); 317 if (!cmpxchg(&ic->failed, 0, err)) 318 DMERR("Error on %s: %d", msg, err); 319 } 320 321 static int dm_integrity_failed(struct dm_integrity_c *ic) 322 { 323 return ACCESS_ONCE(ic->failed); 324 } 325 326 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, 327 unsigned j, unsigned char seq) 328 { 329 /* 330 * Xor the number with section and sector, so that if a piece of 331 * journal is written at wrong place, it is detected. 332 */ 333 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j); 334 } 335 336 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 337 sector_t *area, sector_t *offset) 338 { 339 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 340 341 *area = data_sector >> log2_interleave_sectors; 342 *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); 343 } 344 345 #define sector_to_block(ic, n) \ 346 do { \ 347 BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \ 348 (n) >>= (ic)->sb->log2_sectors_per_block; \ 349 } while (0) 350 351 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, 352 sector_t offset, unsigned *metadata_offset) 353 { 354 __u64 ms; 355 unsigned mo; 356 357 ms = area << ic->sb->log2_interleave_sectors; 358 if (likely(ic->log2_metadata_run >= 0)) 359 ms += area << ic->log2_metadata_run; 360 else 361 ms += area * ic->metadata_run; 362 ms >>= ic->log2_buffer_sectors; 363 364 sector_to_block(ic, offset); 365 366 if (likely(ic->log2_tag_size >= 0)) { 367 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); 368 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 369 } else { 370 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors); 371 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 372 } 373 *metadata_offset = mo; 374 return ms; 375 } 376 377 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset) 378 { 379 sector_t result; 380 381 result = area << ic->sb->log2_interleave_sectors; 382 if (likely(ic->log2_metadata_run >= 0)) 383 result += (area + 1) << ic->log2_metadata_run; 384 else 385 result += (area + 1) * ic->metadata_run; 386 387 result += (sector_t)ic->initial_sectors + offset; 388 return result; 389 } 390 391 static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr) 392 { 393 if (unlikely(*sec_ptr >= ic->journal_sections)) 394 *sec_ptr -= ic->journal_sections; 395 } 396 397 static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) 398 { 399 struct dm_io_request io_req; 400 struct dm_io_region io_loc; 401 402 io_req.bi_op = op; 403 io_req.bi_op_flags = op_flags; 404 io_req.mem.type = DM_IO_KMEM; 405 io_req.mem.ptr.addr = ic->sb; 406 io_req.notify.fn = NULL; 407 io_req.client = ic->io; 408 io_loc.bdev = ic->dev->bdev; 409 io_loc.sector = ic->start; 410 io_loc.count = SB_SECTORS; 411 412 return dm_io(&io_req, 1, &io_loc, NULL); 413 } 414 415 static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset, 416 bool e, const char *function) 417 { 418 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY) 419 unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors; 420 421 if (unlikely(section >= ic->journal_sections) || 422 unlikely(offset >= limit)) { 423 printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n", 424 function, section, offset, ic->journal_sections, limit); 425 BUG(); 426 } 427 #endif 428 } 429 430 static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset, 431 unsigned *pl_index, unsigned *pl_offset) 432 { 433 unsigned sector; 434 435 access_journal_check(ic, section, offset, false, "page_list_location"); 436 437 sector = section * ic->journal_section_sectors + offset; 438 439 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 440 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 441 } 442 443 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl, 444 unsigned section, unsigned offset, unsigned *n_sectors) 445 { 446 unsigned pl_index, pl_offset; 447 char *va; 448 449 page_list_location(ic, section, offset, &pl_index, &pl_offset); 450 451 if (n_sectors) 452 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT; 453 454 va = lowmem_page_address(pl[pl_index].page); 455 456 return (struct journal_sector *)(va + pl_offset); 457 } 458 459 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset) 460 { 461 return access_page_list(ic, ic->journal, section, offset, NULL); 462 } 463 464 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n) 465 { 466 unsigned rel_sector, offset; 467 struct journal_sector *js; 468 469 access_journal_check(ic, section, n, true, "access_journal_entry"); 470 471 rel_sector = n % JOURNAL_BLOCK_SECTORS; 472 offset = n / JOURNAL_BLOCK_SECTORS; 473 474 js = access_journal(ic, section, rel_sector); 475 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size); 476 } 477 478 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n) 479 { 480 n <<= ic->sb->log2_sectors_per_block; 481 482 n += JOURNAL_BLOCK_SECTORS; 483 484 access_journal_check(ic, section, n, false, "access_journal_data"); 485 486 return access_journal(ic, section, n); 487 } 488 489 static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE]) 490 { 491 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 492 int r; 493 unsigned j, size; 494 495 desc->tfm = ic->journal_mac; 496 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; 497 498 r = crypto_shash_init(desc); 499 if (unlikely(r)) { 500 dm_integrity_io_error(ic, "crypto_shash_init", r); 501 goto err; 502 } 503 504 for (j = 0; j < ic->journal_section_entries; j++) { 505 struct journal_entry *je = access_journal_entry(ic, section, j); 506 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector); 507 if (unlikely(r)) { 508 dm_integrity_io_error(ic, "crypto_shash_update", r); 509 goto err; 510 } 511 } 512 513 size = crypto_shash_digestsize(ic->journal_mac); 514 515 if (likely(size <= JOURNAL_MAC_SIZE)) { 516 r = crypto_shash_final(desc, result); 517 if (unlikely(r)) { 518 dm_integrity_io_error(ic, "crypto_shash_final", r); 519 goto err; 520 } 521 memset(result + size, 0, JOURNAL_MAC_SIZE - size); 522 } else { 523 __u8 digest[size]; 524 r = crypto_shash_final(desc, digest); 525 if (unlikely(r)) { 526 dm_integrity_io_error(ic, "crypto_shash_final", r); 527 goto err; 528 } 529 memcpy(result, digest, JOURNAL_MAC_SIZE); 530 } 531 532 return; 533 err: 534 memset(result, 0, JOURNAL_MAC_SIZE); 535 } 536 537 static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr) 538 { 539 __u8 result[JOURNAL_MAC_SIZE]; 540 unsigned j; 541 542 if (!ic->journal_mac) 543 return; 544 545 section_mac(ic, section, result); 546 547 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) { 548 struct journal_sector *js = access_journal(ic, section, j); 549 550 if (likely(wr)) 551 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); 552 else { 553 if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) 554 dm_integrity_io_error(ic, "journal mac", -EILSEQ); 555 } 556 } 557 } 558 559 static void complete_journal_op(void *context) 560 { 561 struct journal_completion *comp = context; 562 BUG_ON(!atomic_read(&comp->in_flight)); 563 if (likely(atomic_dec_and_test(&comp->in_flight))) 564 complete(&comp->comp); 565 } 566 567 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 568 unsigned n_sections, struct journal_completion *comp) 569 { 570 struct async_submit_ctl submit; 571 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT; 572 unsigned pl_index, pl_offset, section_index; 573 struct page_list *source_pl, *target_pl; 574 575 if (likely(encrypt)) { 576 source_pl = ic->journal; 577 target_pl = ic->journal_io; 578 } else { 579 source_pl = ic->journal_io; 580 target_pl = ic->journal; 581 } 582 583 page_list_location(ic, section, 0, &pl_index, &pl_offset); 584 585 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight); 586 587 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL); 588 589 section_index = pl_index; 590 591 do { 592 size_t this_step; 593 struct page *src_pages[2]; 594 struct page *dst_page; 595 596 while (unlikely(pl_index == section_index)) { 597 unsigned dummy; 598 if (likely(encrypt)) 599 rw_section_mac(ic, section, true); 600 section++; 601 n_sections--; 602 if (!n_sections) 603 break; 604 page_list_location(ic, section, 0, §ion_index, &dummy); 605 } 606 607 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset); 608 dst_page = target_pl[pl_index].page; 609 src_pages[0] = source_pl[pl_index].page; 610 src_pages[1] = ic->journal_xor[pl_index].page; 611 612 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit); 613 614 pl_index++; 615 pl_offset = 0; 616 n_bytes -= this_step; 617 } while (n_bytes); 618 619 BUG_ON(n_sections); 620 621 async_tx_issue_pending_all(); 622 } 623 624 static void complete_journal_encrypt(struct crypto_async_request *req, int err) 625 { 626 struct journal_completion *comp = req->data; 627 if (unlikely(err)) { 628 if (likely(err == -EINPROGRESS)) { 629 complete(&comp->ic->crypto_backoff); 630 return; 631 } 632 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err); 633 } 634 complete_journal_op(comp); 635 } 636 637 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) 638 { 639 int r; 640 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, 641 complete_journal_encrypt, comp); 642 if (likely(encrypt)) 643 r = crypto_skcipher_encrypt(req); 644 else 645 r = crypto_skcipher_decrypt(req); 646 if (likely(!r)) 647 return false; 648 if (likely(r == -EINPROGRESS)) 649 return true; 650 if (likely(r == -EBUSY)) { 651 wait_for_completion(&comp->ic->crypto_backoff); 652 reinit_completion(&comp->ic->crypto_backoff); 653 return true; 654 } 655 dm_integrity_io_error(comp->ic, "encrypt", r); 656 return false; 657 } 658 659 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 660 unsigned n_sections, struct journal_completion *comp) 661 { 662 struct scatterlist **source_sg; 663 struct scatterlist **target_sg; 664 665 atomic_add(2, &comp->in_flight); 666 667 if (likely(encrypt)) { 668 source_sg = ic->journal_scatterlist; 669 target_sg = ic->journal_io_scatterlist; 670 } else { 671 source_sg = ic->journal_io_scatterlist; 672 target_sg = ic->journal_scatterlist; 673 } 674 675 do { 676 struct skcipher_request *req; 677 unsigned ivsize; 678 char *iv; 679 680 if (likely(encrypt)) 681 rw_section_mac(ic, section, true); 682 683 req = ic->sk_requests[section]; 684 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 685 iv = req->iv; 686 687 memcpy(iv, iv + ivsize, ivsize); 688 689 req->src = source_sg[section]; 690 req->dst = target_sg[section]; 691 692 if (unlikely(do_crypt(encrypt, req, comp))) 693 atomic_inc(&comp->in_flight); 694 695 section++; 696 n_sections--; 697 } while (n_sections); 698 699 atomic_dec(&comp->in_flight); 700 complete_journal_op(comp); 701 } 702 703 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 704 unsigned n_sections, struct journal_completion *comp) 705 { 706 if (ic->journal_xor) 707 return xor_journal(ic, encrypt, section, n_sections, comp); 708 else 709 return crypt_journal(ic, encrypt, section, n_sections, comp); 710 } 711 712 static void complete_journal_io(unsigned long error, void *context) 713 { 714 struct journal_completion *comp = context; 715 if (unlikely(error != 0)) 716 dm_integrity_io_error(comp->ic, "writing journal", -EIO); 717 complete_journal_op(comp); 718 } 719 720 static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, 721 unsigned n_sections, struct journal_completion *comp) 722 { 723 struct dm_io_request io_req; 724 struct dm_io_region io_loc; 725 unsigned sector, n_sectors, pl_index, pl_offset; 726 int r; 727 728 if (unlikely(dm_integrity_failed(ic))) { 729 if (comp) 730 complete_journal_io(-1UL, comp); 731 return; 732 } 733 734 sector = section * ic->journal_section_sectors; 735 n_sectors = n_sections * ic->journal_section_sectors; 736 737 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 738 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 739 740 io_req.bi_op = op; 741 io_req.bi_op_flags = op_flags; 742 io_req.mem.type = DM_IO_PAGE_LIST; 743 if (ic->journal_io) 744 io_req.mem.ptr.pl = &ic->journal_io[pl_index]; 745 else 746 io_req.mem.ptr.pl = &ic->journal[pl_index]; 747 io_req.mem.offset = pl_offset; 748 if (likely(comp != NULL)) { 749 io_req.notify.fn = complete_journal_io; 750 io_req.notify.context = comp; 751 } else { 752 io_req.notify.fn = NULL; 753 } 754 io_req.client = ic->io; 755 io_loc.bdev = ic->dev->bdev; 756 io_loc.sector = ic->start + SB_SECTORS + sector; 757 io_loc.count = n_sectors; 758 759 r = dm_io(&io_req, 1, &io_loc, NULL); 760 if (unlikely(r)) { 761 dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r); 762 if (comp) { 763 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 764 complete_journal_io(-1UL, comp); 765 } 766 } 767 } 768 769 static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections) 770 { 771 struct journal_completion io_comp; 772 struct journal_completion crypt_comp_1; 773 struct journal_completion crypt_comp_2; 774 unsigned i; 775 776 io_comp.ic = ic; 777 init_completion(&io_comp.comp); 778 779 if (commit_start + commit_sections <= ic->journal_sections) { 780 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); 781 if (ic->journal_io) { 782 crypt_comp_1.ic = ic; 783 init_completion(&crypt_comp_1.comp); 784 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 785 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); 786 wait_for_completion_io(&crypt_comp_1.comp); 787 } else { 788 for (i = 0; i < commit_sections; i++) 789 rw_section_mac(ic, commit_start + i, true); 790 } 791 rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, 792 commit_sections, &io_comp); 793 } else { 794 unsigned to_end; 795 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); 796 to_end = ic->journal_sections - commit_start; 797 if (ic->journal_io) { 798 crypt_comp_1.ic = ic; 799 init_completion(&crypt_comp_1.comp); 800 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 801 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); 802 if (try_wait_for_completion(&crypt_comp_1.comp)) { 803 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 804 reinit_completion(&crypt_comp_1.comp); 805 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 806 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); 807 wait_for_completion_io(&crypt_comp_1.comp); 808 } else { 809 crypt_comp_2.ic = ic; 810 init_completion(&crypt_comp_2.comp); 811 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); 812 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); 813 wait_for_completion_io(&crypt_comp_1.comp); 814 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 815 wait_for_completion_io(&crypt_comp_2.comp); 816 } 817 } else { 818 for (i = 0; i < to_end; i++) 819 rw_section_mac(ic, commit_start + i, true); 820 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 821 for (i = 0; i < commit_sections - to_end; i++) 822 rw_section_mac(ic, i, true); 823 } 824 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp); 825 } 826 827 wait_for_completion_io(&io_comp.comp); 828 } 829 830 static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset, 831 unsigned n_sectors, sector_t target, io_notify_fn fn, void *data) 832 { 833 struct dm_io_request io_req; 834 struct dm_io_region io_loc; 835 int r; 836 unsigned sector, pl_index, pl_offset; 837 838 BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1)); 839 840 if (unlikely(dm_integrity_failed(ic))) { 841 fn(-1UL, data); 842 return; 843 } 844 845 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset; 846 847 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 848 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 849 850 io_req.bi_op = REQ_OP_WRITE; 851 io_req.bi_op_flags = 0; 852 io_req.mem.type = DM_IO_PAGE_LIST; 853 io_req.mem.ptr.pl = &ic->journal[pl_index]; 854 io_req.mem.offset = pl_offset; 855 io_req.notify.fn = fn; 856 io_req.notify.context = data; 857 io_req.client = ic->io; 858 io_loc.bdev = ic->dev->bdev; 859 io_loc.sector = ic->start + target; 860 io_loc.count = n_sectors; 861 862 r = dm_io(&io_req, 1, &io_loc, NULL); 863 if (unlikely(r)) { 864 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 865 fn(-1UL, data); 866 } 867 } 868 869 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 870 { 871 struct rb_node **n = &ic->in_progress.rb_node; 872 struct rb_node *parent; 873 874 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); 875 876 parent = NULL; 877 878 while (*n) { 879 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node); 880 881 parent = *n; 882 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) { 883 n = &range->node.rb_left; 884 } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) { 885 n = &range->node.rb_right; 886 } else { 887 return false; 888 } 889 } 890 891 rb_link_node(&new_range->node, parent, n); 892 rb_insert_color(&new_range->node, &ic->in_progress); 893 894 return true; 895 } 896 897 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 898 { 899 rb_erase(&range->node, &ic->in_progress); 900 wake_up_locked(&ic->endio_wait); 901 } 902 903 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 904 { 905 unsigned long flags; 906 907 spin_lock_irqsave(&ic->endio_wait.lock, flags); 908 remove_range_unlocked(ic, range); 909 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 910 } 911 912 static void init_journal_node(struct journal_node *node) 913 { 914 RB_CLEAR_NODE(&node->node); 915 node->sector = (sector_t)-1; 916 } 917 918 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector) 919 { 920 struct rb_node **link; 921 struct rb_node *parent; 922 923 node->sector = sector; 924 BUG_ON(!RB_EMPTY_NODE(&node->node)); 925 926 link = &ic->journal_tree_root.rb_node; 927 parent = NULL; 928 929 while (*link) { 930 struct journal_node *j; 931 parent = *link; 932 j = container_of(parent, struct journal_node, node); 933 if (sector < j->sector) 934 link = &j->node.rb_left; 935 else 936 link = &j->node.rb_right; 937 } 938 939 rb_link_node(&node->node, parent, link); 940 rb_insert_color(&node->node, &ic->journal_tree_root); 941 } 942 943 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node) 944 { 945 BUG_ON(RB_EMPTY_NODE(&node->node)); 946 rb_erase(&node->node, &ic->journal_tree_root); 947 init_journal_node(node); 948 } 949 950 #define NOT_FOUND (-1U) 951 952 static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector) 953 { 954 struct rb_node *n = ic->journal_tree_root.rb_node; 955 unsigned found = NOT_FOUND; 956 *next_sector = (sector_t)-1; 957 while (n) { 958 struct journal_node *j = container_of(n, struct journal_node, node); 959 if (sector == j->sector) { 960 found = j - ic->journal_tree; 961 } 962 if (sector < j->sector) { 963 *next_sector = j->sector; 964 n = j->node.rb_left; 965 } else { 966 n = j->node.rb_right; 967 } 968 } 969 970 return found; 971 } 972 973 static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector) 974 { 975 struct journal_node *node, *next_node; 976 struct rb_node *next; 977 978 if (unlikely(pos >= ic->journal_entries)) 979 return false; 980 node = &ic->journal_tree[pos]; 981 if (unlikely(RB_EMPTY_NODE(&node->node))) 982 return false; 983 if (unlikely(node->sector != sector)) 984 return false; 985 986 next = rb_next(&node->node); 987 if (unlikely(!next)) 988 return true; 989 990 next_node = container_of(next, struct journal_node, node); 991 return next_node->sector != sector; 992 } 993 994 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node) 995 { 996 struct rb_node *next; 997 struct journal_node *next_node; 998 unsigned next_section; 999 1000 BUG_ON(RB_EMPTY_NODE(&node->node)); 1001 1002 next = rb_next(&node->node); 1003 if (unlikely(!next)) 1004 return false; 1005 1006 next_node = container_of(next, struct journal_node, node); 1007 1008 if (next_node->sector != node->sector) 1009 return false; 1010 1011 next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries; 1012 if (next_section >= ic->committed_section && 1013 next_section < ic->committed_section + ic->n_committed_sections) 1014 return true; 1015 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections) 1016 return true; 1017 1018 return false; 1019 } 1020 1021 #define TAG_READ 0 1022 #define TAG_WRITE 1 1023 #define TAG_CMP 2 1024 1025 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, 1026 unsigned *metadata_offset, unsigned total_size, int op) 1027 { 1028 do { 1029 unsigned char *data, *dp; 1030 struct dm_buffer *b; 1031 unsigned to_copy; 1032 int r; 1033 1034 r = dm_integrity_failed(ic); 1035 if (unlikely(r)) 1036 return r; 1037 1038 data = dm_bufio_read(ic->bufio, *metadata_block, &b); 1039 if (unlikely(IS_ERR(data))) 1040 return PTR_ERR(data); 1041 1042 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); 1043 dp = data + *metadata_offset; 1044 if (op == TAG_READ) { 1045 memcpy(tag, dp, to_copy); 1046 } else if (op == TAG_WRITE) { 1047 memcpy(dp, tag, to_copy); 1048 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1049 } else { 1050 /* e.g.: op == TAG_CMP */ 1051 if (unlikely(memcmp(dp, tag, to_copy))) { 1052 unsigned i; 1053 1054 for (i = 0; i < to_copy; i++) { 1055 if (dp[i] != tag[i]) 1056 break; 1057 total_size--; 1058 } 1059 dm_bufio_release(b); 1060 return total_size; 1061 } 1062 } 1063 dm_bufio_release(b); 1064 1065 tag += to_copy; 1066 *metadata_offset += to_copy; 1067 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) { 1068 (*metadata_block)++; 1069 *metadata_offset = 0; 1070 } 1071 total_size -= to_copy; 1072 } while (unlikely(total_size)); 1073 1074 return 0; 1075 } 1076 1077 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) 1078 { 1079 int r; 1080 r = dm_bufio_write_dirty_buffers(ic->bufio); 1081 if (unlikely(r)) 1082 dm_integrity_io_error(ic, "writing tags", r); 1083 } 1084 1085 static void sleep_on_endio_wait(struct dm_integrity_c *ic) 1086 { 1087 DECLARE_WAITQUEUE(wait, current); 1088 __add_wait_queue(&ic->endio_wait, &wait); 1089 __set_current_state(TASK_UNINTERRUPTIBLE); 1090 spin_unlock_irq(&ic->endio_wait.lock); 1091 io_schedule(); 1092 spin_lock_irq(&ic->endio_wait.lock); 1093 __remove_wait_queue(&ic->endio_wait, &wait); 1094 } 1095 1096 static void autocommit_fn(unsigned long data) 1097 { 1098 struct dm_integrity_c *ic = (struct dm_integrity_c *)data; 1099 1100 if (likely(!dm_integrity_failed(ic))) 1101 queue_work(ic->commit_wq, &ic->commit_work); 1102 } 1103 1104 static void schedule_autocommit(struct dm_integrity_c *ic) 1105 { 1106 if (!timer_pending(&ic->autocommit_timer)) 1107 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies); 1108 } 1109 1110 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1111 { 1112 struct bio *bio; 1113 unsigned long flags; 1114 1115 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1116 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1117 bio_list_add(&ic->flush_bio_list, bio); 1118 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1119 1120 queue_work(ic->commit_wq, &ic->commit_work); 1121 } 1122 1123 static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1124 { 1125 int r = dm_integrity_failed(ic); 1126 if (unlikely(r) && !bio->bi_status) 1127 bio->bi_status = errno_to_blk_status(r); 1128 bio_endio(bio); 1129 } 1130 1131 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1132 { 1133 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1134 1135 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic))) 1136 submit_flush_bio(ic, dio); 1137 else 1138 do_endio(ic, bio); 1139 } 1140 1141 static void dec_in_flight(struct dm_integrity_io *dio) 1142 { 1143 if (atomic_dec_and_test(&dio->in_flight)) { 1144 struct dm_integrity_c *ic = dio->ic; 1145 struct bio *bio; 1146 1147 remove_range(ic, &dio->range); 1148 1149 if (unlikely(dio->write)) 1150 schedule_autocommit(ic); 1151 1152 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1153 1154 if (unlikely(dio->bi_status) && !bio->bi_status) 1155 bio->bi_status = dio->bi_status; 1156 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1157 dio->range.logical_sector += dio->range.n_sectors; 1158 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1159 INIT_WORK(&dio->work, integrity_bio_wait); 1160 queue_work(ic->wait_wq, &dio->work); 1161 return; 1162 } 1163 do_endio_flush(ic, dio); 1164 } 1165 } 1166 1167 static void integrity_end_io(struct bio *bio) 1168 { 1169 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1170 1171 bio->bi_iter = dio->orig_bi_iter; 1172 bio->bi_disk = dio->orig_bi_disk; 1173 bio->bi_partno = dio->orig_bi_partno; 1174 if (dio->orig_bi_integrity) { 1175 bio->bi_integrity = dio->orig_bi_integrity; 1176 bio->bi_opf |= REQ_INTEGRITY; 1177 } 1178 bio->bi_end_io = dio->orig_bi_end_io; 1179 1180 if (dio->completion) 1181 complete(dio->completion); 1182 1183 dec_in_flight(dio); 1184 } 1185 1186 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector, 1187 const char *data, char *result) 1188 { 1189 __u64 sector_le = cpu_to_le64(sector); 1190 SHASH_DESC_ON_STACK(req, ic->internal_hash); 1191 int r; 1192 unsigned digest_size; 1193 1194 req->tfm = ic->internal_hash; 1195 req->flags = 0; 1196 1197 r = crypto_shash_init(req); 1198 if (unlikely(r < 0)) { 1199 dm_integrity_io_error(ic, "crypto_shash_init", r); 1200 goto failed; 1201 } 1202 1203 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof sector_le); 1204 if (unlikely(r < 0)) { 1205 dm_integrity_io_error(ic, "crypto_shash_update", r); 1206 goto failed; 1207 } 1208 1209 r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT); 1210 if (unlikely(r < 0)) { 1211 dm_integrity_io_error(ic, "crypto_shash_update", r); 1212 goto failed; 1213 } 1214 1215 r = crypto_shash_final(req, result); 1216 if (unlikely(r < 0)) { 1217 dm_integrity_io_error(ic, "crypto_shash_final", r); 1218 goto failed; 1219 } 1220 1221 digest_size = crypto_shash_digestsize(ic->internal_hash); 1222 if (unlikely(digest_size < ic->tag_size)) 1223 memset(result + digest_size, 0, ic->tag_size - digest_size); 1224 1225 return; 1226 1227 failed: 1228 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1229 get_random_bytes(result, ic->tag_size); 1230 } 1231 1232 static void integrity_metadata(struct work_struct *w) 1233 { 1234 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1235 struct dm_integrity_c *ic = dio->ic; 1236 1237 int r; 1238 1239 if (ic->internal_hash) { 1240 struct bvec_iter iter; 1241 struct bio_vec bv; 1242 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); 1243 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1244 char *checksums; 1245 unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 1246 char checksums_onstack[ic->tag_size + extra_space]; 1247 unsigned sectors_to_process = dio->range.n_sectors; 1248 sector_t sector = dio->range.logical_sector; 1249 1250 if (unlikely(ic->mode == 'R')) 1251 goto skip_io; 1252 1253 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, 1254 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1255 if (!checksums) 1256 checksums = checksums_onstack; 1257 1258 __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) { 1259 unsigned pos; 1260 char *mem, *checksums_ptr; 1261 1262 again: 1263 mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset; 1264 pos = 0; 1265 checksums_ptr = checksums; 1266 do { 1267 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); 1268 checksums_ptr += ic->tag_size; 1269 sectors_to_process -= ic->sectors_per_block; 1270 pos += ic->sectors_per_block << SECTOR_SHIFT; 1271 sector += ic->sectors_per_block; 1272 } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); 1273 kunmap_atomic(mem); 1274 1275 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1276 checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); 1277 if (unlikely(r)) { 1278 if (r > 0) { 1279 DMERR("Checksum failed at sector 0x%llx", 1280 (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); 1281 r = -EILSEQ; 1282 atomic64_inc(&ic->number_of_mismatches); 1283 } 1284 if (likely(checksums != checksums_onstack)) 1285 kfree(checksums); 1286 goto error; 1287 } 1288 1289 if (!sectors_to_process) 1290 break; 1291 1292 if (unlikely(pos < bv.bv_len)) { 1293 bv.bv_offset += pos; 1294 bv.bv_len -= pos; 1295 goto again; 1296 } 1297 } 1298 1299 if (likely(checksums != checksums_onstack)) 1300 kfree(checksums); 1301 } else { 1302 struct bio_integrity_payload *bip = dio->orig_bi_integrity; 1303 1304 if (bip) { 1305 struct bio_vec biv; 1306 struct bvec_iter iter; 1307 unsigned data_to_process = dio->range.n_sectors; 1308 sector_to_block(ic, data_to_process); 1309 data_to_process *= ic->tag_size; 1310 1311 bip_for_each_vec(biv, bip, iter) { 1312 unsigned char *tag; 1313 unsigned this_len; 1314 1315 BUG_ON(PageHighMem(biv.bv_page)); 1316 tag = lowmem_page_address(biv.bv_page) + biv.bv_offset; 1317 this_len = min(biv.bv_len, data_to_process); 1318 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, 1319 this_len, !dio->write ? TAG_READ : TAG_WRITE); 1320 if (unlikely(r)) 1321 goto error; 1322 data_to_process -= this_len; 1323 if (!data_to_process) 1324 break; 1325 } 1326 } 1327 } 1328 skip_io: 1329 dec_in_flight(dio); 1330 return; 1331 error: 1332 dio->bi_status = errno_to_blk_status(r); 1333 dec_in_flight(dio); 1334 } 1335 1336 static int dm_integrity_map(struct dm_target *ti, struct bio *bio) 1337 { 1338 struct dm_integrity_c *ic = ti->private; 1339 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1340 struct bio_integrity_payload *bip; 1341 1342 sector_t area, offset; 1343 1344 dio->ic = ic; 1345 dio->bi_status = 0; 1346 1347 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1348 submit_flush_bio(ic, dio); 1349 return DM_MAPIO_SUBMITTED; 1350 } 1351 1352 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 1353 dio->write = bio_op(bio) == REQ_OP_WRITE; 1354 dio->fua = dio->write && bio->bi_opf & REQ_FUA; 1355 if (unlikely(dio->fua)) { 1356 /* 1357 * Don't pass down the FUA flag because we have to flush 1358 * disk cache anyway. 1359 */ 1360 bio->bi_opf &= ~REQ_FUA; 1361 } 1362 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { 1363 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1364 (unsigned long long)dio->range.logical_sector, bio_sectors(bio), 1365 (unsigned long long)ic->provided_data_sectors); 1366 return DM_MAPIO_KILL; 1367 } 1368 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { 1369 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1370 ic->sectors_per_block, 1371 (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); 1372 return DM_MAPIO_KILL; 1373 } 1374 1375 if (ic->sectors_per_block > 1) { 1376 struct bvec_iter iter; 1377 struct bio_vec bv; 1378 bio_for_each_segment(bv, bio, iter) { 1379 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1380 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1381 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1382 return DM_MAPIO_KILL; 1383 } 1384 } 1385 } 1386 1387 bip = bio_integrity(bio); 1388 if (!ic->internal_hash) { 1389 if (bip) { 1390 unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; 1391 if (ic->log2_tag_size >= 0) 1392 wanted_tag_size <<= ic->log2_tag_size; 1393 else 1394 wanted_tag_size *= ic->tag_size; 1395 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1396 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); 1397 return DM_MAPIO_KILL; 1398 } 1399 } 1400 } else { 1401 if (unlikely(bip != NULL)) { 1402 DMERR("Unexpected integrity data when using internal hash"); 1403 return DM_MAPIO_KILL; 1404 } 1405 } 1406 1407 if (unlikely(ic->mode == 'R') && unlikely(dio->write)) 1408 return DM_MAPIO_KILL; 1409 1410 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1411 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1412 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); 1413 1414 dm_integrity_map_continue(dio, true); 1415 return DM_MAPIO_SUBMITTED; 1416 } 1417 1418 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, 1419 unsigned journal_section, unsigned journal_entry) 1420 { 1421 struct dm_integrity_c *ic = dio->ic; 1422 sector_t logical_sector; 1423 unsigned n_sectors; 1424 1425 logical_sector = dio->range.logical_sector; 1426 n_sectors = dio->range.n_sectors; 1427 do { 1428 struct bio_vec bv = bio_iovec(bio); 1429 char *mem; 1430 1431 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors)) 1432 bv.bv_len = n_sectors << SECTOR_SHIFT; 1433 n_sectors -= bv.bv_len >> SECTOR_SHIFT; 1434 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); 1435 retry_kmap: 1436 mem = kmap_atomic(bv.bv_page); 1437 if (likely(dio->write)) 1438 flush_dcache_page(bv.bv_page); 1439 1440 do { 1441 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); 1442 1443 if (unlikely(!dio->write)) { 1444 struct journal_sector *js; 1445 char *mem_ptr; 1446 unsigned s; 1447 1448 if (unlikely(journal_entry_is_inprogress(je))) { 1449 flush_dcache_page(bv.bv_page); 1450 kunmap_atomic(mem); 1451 1452 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 1453 goto retry_kmap; 1454 } 1455 smp_rmb(); 1456 BUG_ON(journal_entry_get_sector(je) != logical_sector); 1457 js = access_journal_data(ic, journal_section, journal_entry); 1458 mem_ptr = mem + bv.bv_offset; 1459 s = 0; 1460 do { 1461 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); 1462 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; 1463 js++; 1464 mem_ptr += 1 << SECTOR_SHIFT; 1465 } while (++s < ic->sectors_per_block); 1466 #ifdef INTERNAL_VERIFY 1467 if (ic->internal_hash) { 1468 char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; 1469 1470 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); 1471 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { 1472 DMERR("Checksum failed when reading from journal, at sector 0x%llx", 1473 (unsigned long long)logical_sector); 1474 } 1475 } 1476 #endif 1477 } 1478 1479 if (!ic->internal_hash) { 1480 struct bio_integrity_payload *bip = bio_integrity(bio); 1481 unsigned tag_todo = ic->tag_size; 1482 char *tag_ptr = journal_entry_tag(ic, je); 1483 1484 if (bip) do { 1485 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 1486 unsigned tag_now = min(biv.bv_len, tag_todo); 1487 char *tag_addr; 1488 BUG_ON(PageHighMem(biv.bv_page)); 1489 tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset; 1490 if (likely(dio->write)) 1491 memcpy(tag_ptr, tag_addr, tag_now); 1492 else 1493 memcpy(tag_addr, tag_ptr, tag_now); 1494 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now); 1495 tag_ptr += tag_now; 1496 tag_todo -= tag_now; 1497 } while (unlikely(tag_todo)); else { 1498 if (likely(dio->write)) 1499 memset(tag_ptr, 0, tag_todo); 1500 } 1501 } 1502 1503 if (likely(dio->write)) { 1504 struct journal_sector *js; 1505 unsigned s; 1506 1507 js = access_journal_data(ic, journal_section, journal_entry); 1508 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); 1509 1510 s = 0; 1511 do { 1512 je->last_bytes[s] = js[s].commit_id; 1513 } while (++s < ic->sectors_per_block); 1514 1515 if (ic->internal_hash) { 1516 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); 1517 if (unlikely(digest_size > ic->tag_size)) { 1518 char checksums_onstack[digest_size]; 1519 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); 1520 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); 1521 } else 1522 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je)); 1523 } 1524 1525 journal_entry_set_sector(je, logical_sector); 1526 } 1527 logical_sector += ic->sectors_per_block; 1528 1529 journal_entry++; 1530 if (unlikely(journal_entry == ic->journal_section_entries)) { 1531 journal_entry = 0; 1532 journal_section++; 1533 wraparound_section(ic, &journal_section); 1534 } 1535 1536 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; 1537 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); 1538 1539 if (unlikely(!dio->write)) 1540 flush_dcache_page(bv.bv_page); 1541 kunmap_atomic(mem); 1542 } while (n_sectors); 1543 1544 if (likely(dio->write)) { 1545 smp_mb(); 1546 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) 1547 wake_up(&ic->copy_to_journal_wait); 1548 if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) { 1549 queue_work(ic->commit_wq, &ic->commit_work); 1550 } else { 1551 schedule_autocommit(ic); 1552 } 1553 } else { 1554 remove_range(ic, &dio->range); 1555 } 1556 1557 if (unlikely(bio->bi_iter.bi_size)) { 1558 sector_t area, offset; 1559 1560 dio->range.logical_sector = logical_sector; 1561 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1562 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1563 return true; 1564 } 1565 1566 return false; 1567 } 1568 1569 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map) 1570 { 1571 struct dm_integrity_c *ic = dio->ic; 1572 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1573 unsigned journal_section, journal_entry; 1574 unsigned journal_read_pos; 1575 struct completion read_comp; 1576 bool need_sync_io = ic->internal_hash && !dio->write; 1577 1578 if (need_sync_io && from_map) { 1579 INIT_WORK(&dio->work, integrity_bio_wait); 1580 queue_work(ic->metadata_wq, &dio->work); 1581 return; 1582 } 1583 1584 lock_retry: 1585 spin_lock_irq(&ic->endio_wait.lock); 1586 retry: 1587 if (unlikely(dm_integrity_failed(ic))) { 1588 spin_unlock_irq(&ic->endio_wait.lock); 1589 do_endio(ic, bio); 1590 return; 1591 } 1592 dio->range.n_sectors = bio_sectors(bio); 1593 journal_read_pos = NOT_FOUND; 1594 if (likely(ic->mode == 'J')) { 1595 if (dio->write) { 1596 unsigned next_entry, i, pos; 1597 unsigned ws, we, range_sectors; 1598 1599 dio->range.n_sectors = min(dio->range.n_sectors, 1600 ic->free_sectors << ic->sb->log2_sectors_per_block); 1601 if (unlikely(!dio->range.n_sectors)) 1602 goto sleep; 1603 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 1604 ic->free_sectors -= range_sectors; 1605 journal_section = ic->free_section; 1606 journal_entry = ic->free_section_entry; 1607 1608 next_entry = ic->free_section_entry + range_sectors; 1609 ic->free_section_entry = next_entry % ic->journal_section_entries; 1610 ic->free_section += next_entry / ic->journal_section_entries; 1611 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; 1612 wraparound_section(ic, &ic->free_section); 1613 1614 pos = journal_section * ic->journal_section_entries + journal_entry; 1615 ws = journal_section; 1616 we = journal_entry; 1617 i = 0; 1618 do { 1619 struct journal_entry *je; 1620 1621 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); 1622 pos++; 1623 if (unlikely(pos >= ic->journal_entries)) 1624 pos = 0; 1625 1626 je = access_journal_entry(ic, ws, we); 1627 BUG_ON(!journal_entry_is_unused(je)); 1628 journal_entry_set_inprogress(je); 1629 we++; 1630 if (unlikely(we == ic->journal_section_entries)) { 1631 we = 0; 1632 ws++; 1633 wraparound_section(ic, &ws); 1634 } 1635 } while ((i += ic->sectors_per_block) < dio->range.n_sectors); 1636 1637 spin_unlock_irq(&ic->endio_wait.lock); 1638 goto journal_read_write; 1639 } else { 1640 sector_t next_sector; 1641 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 1642 if (likely(journal_read_pos == NOT_FOUND)) { 1643 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector)) 1644 dio->range.n_sectors = next_sector - dio->range.logical_sector; 1645 } else { 1646 unsigned i; 1647 unsigned jp = journal_read_pos + 1; 1648 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { 1649 if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) 1650 break; 1651 } 1652 dio->range.n_sectors = i; 1653 } 1654 } 1655 } 1656 if (unlikely(!add_new_range(ic, &dio->range))) { 1657 /* 1658 * We must not sleep in the request routine because it could 1659 * stall bios on current->bio_list. 1660 * So, we offload the bio to a workqueue if we have to sleep. 1661 */ 1662 sleep: 1663 if (from_map) { 1664 spin_unlock_irq(&ic->endio_wait.lock); 1665 INIT_WORK(&dio->work, integrity_bio_wait); 1666 queue_work(ic->wait_wq, &dio->work); 1667 return; 1668 } else { 1669 sleep_on_endio_wait(ic); 1670 goto retry; 1671 } 1672 } 1673 spin_unlock_irq(&ic->endio_wait.lock); 1674 1675 if (unlikely(journal_read_pos != NOT_FOUND)) { 1676 journal_section = journal_read_pos / ic->journal_section_entries; 1677 journal_entry = journal_read_pos % ic->journal_section_entries; 1678 goto journal_read_write; 1679 } 1680 1681 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 1682 1683 if (need_sync_io) { 1684 init_completion(&read_comp); 1685 dio->completion = &read_comp; 1686 } else 1687 dio->completion = NULL; 1688 1689 dio->orig_bi_iter = bio->bi_iter; 1690 1691 dio->orig_bi_disk = bio->bi_disk; 1692 dio->orig_bi_partno = bio->bi_partno; 1693 bio_set_dev(bio, ic->dev->bdev); 1694 1695 dio->orig_bi_integrity = bio_integrity(bio); 1696 bio->bi_integrity = NULL; 1697 bio->bi_opf &= ~REQ_INTEGRITY; 1698 1699 dio->orig_bi_end_io = bio->bi_end_io; 1700 bio->bi_end_io = integrity_end_io; 1701 1702 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 1703 bio->bi_iter.bi_sector += ic->start; 1704 generic_make_request(bio); 1705 1706 if (need_sync_io) { 1707 wait_for_completion_io(&read_comp); 1708 if (likely(!bio->bi_status)) 1709 integrity_metadata(&dio->work); 1710 else 1711 dec_in_flight(dio); 1712 1713 } else { 1714 INIT_WORK(&dio->work, integrity_metadata); 1715 queue_work(ic->metadata_wq, &dio->work); 1716 } 1717 1718 return; 1719 1720 journal_read_write: 1721 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry))) 1722 goto lock_retry; 1723 1724 do_endio_flush(ic, dio); 1725 } 1726 1727 1728 static void integrity_bio_wait(struct work_struct *w) 1729 { 1730 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1731 1732 dm_integrity_map_continue(dio, false); 1733 } 1734 1735 static void pad_uncommitted(struct dm_integrity_c *ic) 1736 { 1737 if (ic->free_section_entry) { 1738 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry; 1739 ic->free_section_entry = 0; 1740 ic->free_section++; 1741 wraparound_section(ic, &ic->free_section); 1742 ic->n_uncommitted_sections++; 1743 } 1744 WARN_ON(ic->journal_sections * ic->journal_section_entries != 1745 (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors); 1746 } 1747 1748 static void integrity_commit(struct work_struct *w) 1749 { 1750 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work); 1751 unsigned commit_start, commit_sections; 1752 unsigned i, j, n; 1753 struct bio *flushes; 1754 1755 del_timer(&ic->autocommit_timer); 1756 1757 spin_lock_irq(&ic->endio_wait.lock); 1758 flushes = bio_list_get(&ic->flush_bio_list); 1759 if (unlikely(ic->mode != 'J')) { 1760 spin_unlock_irq(&ic->endio_wait.lock); 1761 dm_integrity_flush_buffers(ic); 1762 goto release_flush_bios; 1763 } 1764 1765 pad_uncommitted(ic); 1766 commit_start = ic->uncommitted_section; 1767 commit_sections = ic->n_uncommitted_sections; 1768 spin_unlock_irq(&ic->endio_wait.lock); 1769 1770 if (!commit_sections) 1771 goto release_flush_bios; 1772 1773 i = commit_start; 1774 for (n = 0; n < commit_sections; n++) { 1775 for (j = 0; j < ic->journal_section_entries; j++) { 1776 struct journal_entry *je; 1777 je = access_journal_entry(ic, i, j); 1778 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 1779 } 1780 for (j = 0; j < ic->journal_section_sectors; j++) { 1781 struct journal_sector *js; 1782 js = access_journal(ic, i, j); 1783 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq); 1784 } 1785 i++; 1786 if (unlikely(i >= ic->journal_sections)) 1787 ic->commit_seq = next_commit_seq(ic->commit_seq); 1788 wraparound_section(ic, &i); 1789 } 1790 smp_rmb(); 1791 1792 write_journal(ic, commit_start, commit_sections); 1793 1794 spin_lock_irq(&ic->endio_wait.lock); 1795 ic->uncommitted_section += commit_sections; 1796 wraparound_section(ic, &ic->uncommitted_section); 1797 ic->n_uncommitted_sections -= commit_sections; 1798 ic->n_committed_sections += commit_sections; 1799 spin_unlock_irq(&ic->endio_wait.lock); 1800 1801 if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 1802 queue_work(ic->writer_wq, &ic->writer_work); 1803 1804 release_flush_bios: 1805 while (flushes) { 1806 struct bio *next = flushes->bi_next; 1807 flushes->bi_next = NULL; 1808 do_endio(ic, flushes); 1809 flushes = next; 1810 } 1811 } 1812 1813 static void complete_copy_from_journal(unsigned long error, void *context) 1814 { 1815 struct journal_io *io = context; 1816 struct journal_completion *comp = io->comp; 1817 struct dm_integrity_c *ic = comp->ic; 1818 remove_range(ic, &io->range); 1819 mempool_free(io, ic->journal_io_mempool); 1820 if (unlikely(error != 0)) 1821 dm_integrity_io_error(ic, "copying from journal", -EIO); 1822 complete_journal_op(comp); 1823 } 1824 1825 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, 1826 struct journal_entry *je) 1827 { 1828 unsigned s = 0; 1829 do { 1830 js->commit_id = je->last_bytes[s]; 1831 js++; 1832 } while (++s < ic->sectors_per_block); 1833 } 1834 1835 static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, 1836 unsigned write_sections, bool from_replay) 1837 { 1838 unsigned i, j, n; 1839 struct journal_completion comp; 1840 struct blk_plug plug; 1841 1842 blk_start_plug(&plug); 1843 1844 comp.ic = ic; 1845 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 1846 init_completion(&comp.comp); 1847 1848 i = write_start; 1849 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { 1850 #ifndef INTERNAL_VERIFY 1851 if (unlikely(from_replay)) 1852 #endif 1853 rw_section_mac(ic, i, false); 1854 for (j = 0; j < ic->journal_section_entries; j++) { 1855 struct journal_entry *je = access_journal_entry(ic, i, j); 1856 sector_t sec, area, offset; 1857 unsigned k, l, next_loop; 1858 sector_t metadata_block; 1859 unsigned metadata_offset; 1860 struct journal_io *io; 1861 1862 if (journal_entry_is_unused(je)) 1863 continue; 1864 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); 1865 sec = journal_entry_get_sector(je); 1866 if (unlikely(from_replay)) { 1867 if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) { 1868 dm_integrity_io_error(ic, "invalid sector in journal", -EIO); 1869 sec &= ~(sector_t)(ic->sectors_per_block - 1); 1870 } 1871 } 1872 get_area_and_offset(ic, sec, &area, &offset); 1873 restore_last_bytes(ic, access_journal_data(ic, i, j), je); 1874 for (k = j + 1; k < ic->journal_section_entries; k++) { 1875 struct journal_entry *je2 = access_journal_entry(ic, i, k); 1876 sector_t sec2, area2, offset2; 1877 if (journal_entry_is_unused(je2)) 1878 break; 1879 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); 1880 sec2 = journal_entry_get_sector(je2); 1881 get_area_and_offset(ic, sec2, &area2, &offset2); 1882 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) 1883 break; 1884 restore_last_bytes(ic, access_journal_data(ic, i, k), je2); 1885 } 1886 next_loop = k - 1; 1887 1888 io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO); 1889 io->comp = ∁ 1890 io->range.logical_sector = sec; 1891 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 1892 1893 spin_lock_irq(&ic->endio_wait.lock); 1894 while (unlikely(!add_new_range(ic, &io->range))) 1895 sleep_on_endio_wait(ic); 1896 1897 if (likely(!from_replay)) { 1898 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 1899 1900 /* don't write if there is newer committed sector */ 1901 while (j < k && find_newer_committed_node(ic, §ion_node[j])) { 1902 struct journal_entry *je2 = access_journal_entry(ic, i, j); 1903 1904 journal_entry_set_unused(je2); 1905 remove_journal_node(ic, §ion_node[j]); 1906 j++; 1907 sec += ic->sectors_per_block; 1908 offset += ic->sectors_per_block; 1909 } 1910 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { 1911 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); 1912 1913 journal_entry_set_unused(je2); 1914 remove_journal_node(ic, §ion_node[k - 1]); 1915 k--; 1916 } 1917 if (j == k) { 1918 remove_range_unlocked(ic, &io->range); 1919 spin_unlock_irq(&ic->endio_wait.lock); 1920 mempool_free(io, ic->journal_io_mempool); 1921 goto skip_io; 1922 } 1923 for (l = j; l < k; l++) { 1924 remove_journal_node(ic, §ion_node[l]); 1925 } 1926 } 1927 spin_unlock_irq(&ic->endio_wait.lock); 1928 1929 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 1930 for (l = j; l < k; l++) { 1931 int r; 1932 struct journal_entry *je2 = access_journal_entry(ic, i, l); 1933 1934 if ( 1935 #ifndef INTERNAL_VERIFY 1936 unlikely(from_replay) && 1937 #endif 1938 ic->internal_hash) { 1939 char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; 1940 1941 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), 1942 (char *)access_journal_data(ic, i, l), test_tag); 1943 if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) 1944 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); 1945 } 1946 1947 journal_entry_set_unused(je2); 1948 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, 1949 ic->tag_size, TAG_WRITE); 1950 if (unlikely(r)) { 1951 dm_integrity_io_error(ic, "reading tags", r); 1952 } 1953 } 1954 1955 atomic_inc(&comp.in_flight); 1956 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, 1957 (k - j) << ic->sb->log2_sectors_per_block, 1958 get_data_sector(ic, area, offset), 1959 complete_copy_from_journal, io); 1960 skip_io: 1961 j = next_loop; 1962 } 1963 } 1964 1965 dm_bufio_write_dirty_buffers_async(ic->bufio); 1966 1967 blk_finish_plug(&plug); 1968 1969 complete_journal_op(&comp); 1970 wait_for_completion_io(&comp.comp); 1971 1972 dm_integrity_flush_buffers(ic); 1973 } 1974 1975 static void integrity_writer(struct work_struct *w) 1976 { 1977 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work); 1978 unsigned write_start, write_sections; 1979 1980 unsigned prev_free_sectors; 1981 1982 /* the following test is not needed, but it tests the replay code */ 1983 if (ACCESS_ONCE(ic->suspending)) 1984 return; 1985 1986 spin_lock_irq(&ic->endio_wait.lock); 1987 write_start = ic->committed_section; 1988 write_sections = ic->n_committed_sections; 1989 spin_unlock_irq(&ic->endio_wait.lock); 1990 1991 if (!write_sections) 1992 return; 1993 1994 do_journal_write(ic, write_start, write_sections, false); 1995 1996 spin_lock_irq(&ic->endio_wait.lock); 1997 1998 ic->committed_section += write_sections; 1999 wraparound_section(ic, &ic->committed_section); 2000 ic->n_committed_sections -= write_sections; 2001 2002 prev_free_sectors = ic->free_sectors; 2003 ic->free_sectors += write_sections * ic->journal_section_entries; 2004 if (unlikely(!prev_free_sectors)) 2005 wake_up_locked(&ic->endio_wait); 2006 2007 spin_unlock_irq(&ic->endio_wait.lock); 2008 } 2009 2010 static void init_journal(struct dm_integrity_c *ic, unsigned start_section, 2011 unsigned n_sections, unsigned char commit_seq) 2012 { 2013 unsigned i, j, n; 2014 2015 if (!n_sections) 2016 return; 2017 2018 for (n = 0; n < n_sections; n++) { 2019 i = start_section + n; 2020 wraparound_section(ic, &i); 2021 for (j = 0; j < ic->journal_section_sectors; j++) { 2022 struct journal_sector *js = access_journal(ic, i, j); 2023 memset(&js->entries, 0, JOURNAL_SECTOR_DATA); 2024 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq); 2025 } 2026 for (j = 0; j < ic->journal_section_entries; j++) { 2027 struct journal_entry *je = access_journal_entry(ic, i, j); 2028 journal_entry_set_unused(je); 2029 } 2030 } 2031 2032 write_journal(ic, start_section, n_sections); 2033 } 2034 2035 static int find_commit_seq(struct dm_integrity_c *ic, unsigned i, unsigned j, commit_id_t id) 2036 { 2037 unsigned char k; 2038 for (k = 0; k < N_COMMIT_IDS; k++) { 2039 if (dm_integrity_commit_id(ic, i, j, k) == id) 2040 return k; 2041 } 2042 dm_integrity_io_error(ic, "journal commit id", -EIO); 2043 return -EIO; 2044 } 2045 2046 static void replay_journal(struct dm_integrity_c *ic) 2047 { 2048 unsigned i, j; 2049 bool used_commit_ids[N_COMMIT_IDS]; 2050 unsigned max_commit_id_sections[N_COMMIT_IDS]; 2051 unsigned write_start, write_sections; 2052 unsigned continue_section; 2053 bool journal_empty; 2054 unsigned char unused, last_used, want_commit_seq; 2055 2056 if (ic->mode == 'R') 2057 return; 2058 2059 if (ic->journal_uptodate) 2060 return; 2061 2062 last_used = 0; 2063 write_start = 0; 2064 2065 if (!ic->just_formatted) { 2066 DEBUG_print("reading journal\n"); 2067 rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL); 2068 if (ic->journal_io) 2069 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); 2070 if (ic->journal_io) { 2071 struct journal_completion crypt_comp; 2072 crypt_comp.ic = ic; 2073 init_completion(&crypt_comp.comp); 2074 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); 2075 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); 2076 wait_for_completion(&crypt_comp.comp); 2077 } 2078 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal"); 2079 } 2080 2081 if (dm_integrity_failed(ic)) 2082 goto clear_journal; 2083 2084 journal_empty = true; 2085 memset(used_commit_ids, 0, sizeof used_commit_ids); 2086 memset(max_commit_id_sections, 0, sizeof max_commit_id_sections); 2087 for (i = 0; i < ic->journal_sections; i++) { 2088 for (j = 0; j < ic->journal_section_sectors; j++) { 2089 int k; 2090 struct journal_sector *js = access_journal(ic, i, j); 2091 k = find_commit_seq(ic, i, j, js->commit_id); 2092 if (k < 0) 2093 goto clear_journal; 2094 used_commit_ids[k] = true; 2095 max_commit_id_sections[k] = i; 2096 } 2097 if (journal_empty) { 2098 for (j = 0; j < ic->journal_section_entries; j++) { 2099 struct journal_entry *je = access_journal_entry(ic, i, j); 2100 if (!journal_entry_is_unused(je)) { 2101 journal_empty = false; 2102 break; 2103 } 2104 } 2105 } 2106 } 2107 2108 if (!used_commit_ids[N_COMMIT_IDS - 1]) { 2109 unused = N_COMMIT_IDS - 1; 2110 while (unused && !used_commit_ids[unused - 1]) 2111 unused--; 2112 } else { 2113 for (unused = 0; unused < N_COMMIT_IDS; unused++) 2114 if (!used_commit_ids[unused]) 2115 break; 2116 if (unused == N_COMMIT_IDS) { 2117 dm_integrity_io_error(ic, "journal commit ids", -EIO); 2118 goto clear_journal; 2119 } 2120 } 2121 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n", 2122 unused, used_commit_ids[0], used_commit_ids[1], 2123 used_commit_ids[2], used_commit_ids[3]); 2124 2125 last_used = prev_commit_seq(unused); 2126 want_commit_seq = prev_commit_seq(last_used); 2127 2128 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)]) 2129 journal_empty = true; 2130 2131 write_start = max_commit_id_sections[last_used] + 1; 2132 if (unlikely(write_start >= ic->journal_sections)) 2133 want_commit_seq = next_commit_seq(want_commit_seq); 2134 wraparound_section(ic, &write_start); 2135 2136 i = write_start; 2137 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) { 2138 for (j = 0; j < ic->journal_section_sectors; j++) { 2139 struct journal_sector *js = access_journal(ic, i, j); 2140 2141 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) { 2142 /* 2143 * This could be caused by crash during writing. 2144 * We won't replay the inconsistent part of the 2145 * journal. 2146 */ 2147 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n", 2148 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq); 2149 goto brk; 2150 } 2151 } 2152 i++; 2153 if (unlikely(i >= ic->journal_sections)) 2154 want_commit_seq = next_commit_seq(want_commit_seq); 2155 wraparound_section(ic, &i); 2156 } 2157 brk: 2158 2159 if (!journal_empty) { 2160 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n", 2161 write_sections, write_start, want_commit_seq); 2162 do_journal_write(ic, write_start, write_sections, true); 2163 } 2164 2165 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) { 2166 continue_section = write_start; 2167 ic->commit_seq = want_commit_seq; 2168 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq); 2169 } else { 2170 unsigned s; 2171 unsigned char erase_seq; 2172 clear_journal: 2173 DEBUG_print("clearing journal\n"); 2174 2175 erase_seq = prev_commit_seq(prev_commit_seq(last_used)); 2176 s = write_start; 2177 init_journal(ic, s, 1, erase_seq); 2178 s++; 2179 wraparound_section(ic, &s); 2180 if (ic->journal_sections >= 2) { 2181 init_journal(ic, s, ic->journal_sections - 2, erase_seq); 2182 s += ic->journal_sections - 2; 2183 wraparound_section(ic, &s); 2184 init_journal(ic, s, 1, erase_seq); 2185 } 2186 2187 continue_section = 0; 2188 ic->commit_seq = next_commit_seq(erase_seq); 2189 } 2190 2191 ic->committed_section = continue_section; 2192 ic->n_committed_sections = 0; 2193 2194 ic->uncommitted_section = continue_section; 2195 ic->n_uncommitted_sections = 0; 2196 2197 ic->free_section = continue_section; 2198 ic->free_section_entry = 0; 2199 ic->free_sectors = ic->journal_entries; 2200 2201 ic->journal_tree_root = RB_ROOT; 2202 for (i = 0; i < ic->journal_entries; i++) 2203 init_journal_node(&ic->journal_tree[i]); 2204 } 2205 2206 static void dm_integrity_postsuspend(struct dm_target *ti) 2207 { 2208 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2209 2210 del_timer_sync(&ic->autocommit_timer); 2211 2212 ic->suspending = true; 2213 2214 queue_work(ic->commit_wq, &ic->commit_work); 2215 drain_workqueue(ic->commit_wq); 2216 2217 if (ic->mode == 'J') { 2218 drain_workqueue(ic->writer_wq); 2219 dm_integrity_flush_buffers(ic); 2220 } 2221 2222 ic->suspending = false; 2223 2224 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 2225 2226 ic->journal_uptodate = true; 2227 } 2228 2229 static void dm_integrity_resume(struct dm_target *ti) 2230 { 2231 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2232 2233 replay_journal(ic); 2234 } 2235 2236 static void dm_integrity_status(struct dm_target *ti, status_type_t type, 2237 unsigned status_flags, char *result, unsigned maxlen) 2238 { 2239 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2240 unsigned arg_count; 2241 size_t sz = 0; 2242 2243 switch (type) { 2244 case STATUSTYPE_INFO: 2245 DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); 2246 break; 2247 2248 case STATUSTYPE_TABLE: { 2249 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; 2250 watermark_percentage += ic->journal_entries / 2; 2251 do_div(watermark_percentage, ic->journal_entries); 2252 arg_count = 5; 2253 arg_count += ic->sectors_per_block != 1; 2254 arg_count += !!ic->internal_hash_alg.alg_string; 2255 arg_count += !!ic->journal_crypt_alg.alg_string; 2256 arg_count += !!ic->journal_mac_alg.alg_string; 2257 DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, 2258 ic->tag_size, ic->mode, arg_count); 2259 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 2260 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 2261 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 2262 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); 2263 DMEMIT(" commit_time:%u", ic->autocommit_msec); 2264 if (ic->sectors_per_block != 1) 2265 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 2266 2267 #define EMIT_ALG(a, n) \ 2268 do { \ 2269 if (ic->a.alg_string) { \ 2270 DMEMIT(" %s:%s", n, ic->a.alg_string); \ 2271 if (ic->a.key_string) \ 2272 DMEMIT(":%s", ic->a.key_string);\ 2273 } \ 2274 } while (0) 2275 EMIT_ALG(internal_hash_alg, "internal_hash"); 2276 EMIT_ALG(journal_crypt_alg, "journal_crypt"); 2277 EMIT_ALG(journal_mac_alg, "journal_mac"); 2278 break; 2279 } 2280 } 2281 } 2282 2283 static int dm_integrity_iterate_devices(struct dm_target *ti, 2284 iterate_devices_callout_fn fn, void *data) 2285 { 2286 struct dm_integrity_c *ic = ti->private; 2287 2288 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 2289 } 2290 2291 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 2292 { 2293 struct dm_integrity_c *ic = ti->private; 2294 2295 if (ic->sectors_per_block > 1) { 2296 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 2297 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 2298 blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); 2299 } 2300 } 2301 2302 static void calculate_journal_section_size(struct dm_integrity_c *ic) 2303 { 2304 unsigned sector_space = JOURNAL_SECTOR_DATA; 2305 2306 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); 2307 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, 2308 JOURNAL_ENTRY_ROUNDUP); 2309 2310 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) 2311 sector_space -= JOURNAL_MAC_PER_SECTOR; 2312 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; 2313 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; 2314 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; 2315 ic->journal_entries = ic->journal_section_entries * ic->journal_sections; 2316 } 2317 2318 static int calculate_device_limits(struct dm_integrity_c *ic) 2319 { 2320 __u64 initial_sectors; 2321 sector_t last_sector, last_area, last_offset; 2322 2323 calculate_journal_section_size(ic); 2324 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 2325 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX) 2326 return -EINVAL; 2327 ic->initial_sectors = initial_sectors; 2328 2329 ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 2330 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; 2331 if (!(ic->metadata_run & (ic->metadata_run - 1))) 2332 ic->log2_metadata_run = __ffs(ic->metadata_run); 2333 else 2334 ic->log2_metadata_run = -1; 2335 2336 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 2337 last_sector = get_data_sector(ic, last_area, last_offset); 2338 2339 if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors) 2340 return -EINVAL; 2341 2342 return 0; 2343 } 2344 2345 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors) 2346 { 2347 unsigned journal_sections; 2348 int test_bit; 2349 2350 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 2351 memcpy(ic->sb->magic, SB_MAGIC, 8); 2352 ic->sb->version = SB_VERSION; 2353 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 2354 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 2355 if (ic->journal_mac_alg.alg_string) 2356 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); 2357 2358 calculate_journal_section_size(ic); 2359 journal_sections = journal_sectors / ic->journal_section_sectors; 2360 if (!journal_sections) 2361 journal_sections = 1; 2362 ic->sb->journal_sections = cpu_to_le32(journal_sections); 2363 2364 if (!interleave_sectors) 2365 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 2366 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 2367 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2368 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2369 2370 ic->provided_data_sectors = 0; 2371 for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { 2372 __u64 prev_data_sectors = ic->provided_data_sectors; 2373 2374 ic->provided_data_sectors |= (sector_t)1 << test_bit; 2375 if (calculate_device_limits(ic)) 2376 ic->provided_data_sectors = prev_data_sectors; 2377 } 2378 2379 if (!ic->provided_data_sectors) 2380 return -EINVAL; 2381 2382 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 2383 2384 return 0; 2385 } 2386 2387 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) 2388 { 2389 struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); 2390 struct blk_integrity bi; 2391 2392 memset(&bi, 0, sizeof(bi)); 2393 bi.profile = &dm_integrity_profile; 2394 bi.tuple_size = ic->tag_size; 2395 bi.tag_size = bi.tuple_size; 2396 bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 2397 2398 blk_integrity_register(disk, &bi); 2399 blk_queue_max_integrity_segments(disk->queue, UINT_MAX); 2400 } 2401 2402 static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl) 2403 { 2404 unsigned i; 2405 2406 if (!pl) 2407 return; 2408 for (i = 0; i < ic->journal_pages; i++) 2409 if (pl[i].page) 2410 __free_page(pl[i].page); 2411 kvfree(pl); 2412 } 2413 2414 static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic) 2415 { 2416 size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list); 2417 struct page_list *pl; 2418 unsigned i; 2419 2420 pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO); 2421 if (!pl) 2422 return NULL; 2423 2424 for (i = 0; i < ic->journal_pages; i++) { 2425 pl[i].page = alloc_page(GFP_KERNEL); 2426 if (!pl[i].page) { 2427 dm_integrity_free_page_list(ic, pl); 2428 return NULL; 2429 } 2430 if (i) 2431 pl[i - 1].next = &pl[i]; 2432 } 2433 2434 return pl; 2435 } 2436 2437 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl) 2438 { 2439 unsigned i; 2440 for (i = 0; i < ic->journal_sections; i++) 2441 kvfree(sl[i]); 2442 kfree(sl); 2443 } 2444 2445 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl) 2446 { 2447 struct scatterlist **sl; 2448 unsigned i; 2449 2450 sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO); 2451 if (!sl) 2452 return NULL; 2453 2454 for (i = 0; i < ic->journal_sections; i++) { 2455 struct scatterlist *s; 2456 unsigned start_index, start_offset; 2457 unsigned end_index, end_offset; 2458 unsigned n_pages; 2459 unsigned idx; 2460 2461 page_list_location(ic, i, 0, &start_index, &start_offset); 2462 page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset); 2463 2464 n_pages = (end_index - start_index + 1); 2465 2466 s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL); 2467 if (!s) { 2468 dm_integrity_free_journal_scatterlist(ic, sl); 2469 return NULL; 2470 } 2471 2472 sg_init_table(s, n_pages); 2473 for (idx = start_index; idx <= end_index; idx++) { 2474 char *va = lowmem_page_address(pl[idx].page); 2475 unsigned start = 0, end = PAGE_SIZE; 2476 if (idx == start_index) 2477 start = start_offset; 2478 if (idx == end_index) 2479 end = end_offset + (1 << SECTOR_SHIFT); 2480 sg_set_buf(&s[idx - start_index], va + start, end - start); 2481 } 2482 2483 sl[i] = s; 2484 } 2485 2486 return sl; 2487 } 2488 2489 static void free_alg(struct alg_spec *a) 2490 { 2491 kzfree(a->alg_string); 2492 kzfree(a->key); 2493 memset(a, 0, sizeof *a); 2494 } 2495 2496 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval) 2497 { 2498 char *k; 2499 2500 free_alg(a); 2501 2502 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL); 2503 if (!a->alg_string) 2504 goto nomem; 2505 2506 k = strchr(a->alg_string, ':'); 2507 if (k) { 2508 *k = 0; 2509 a->key_string = k + 1; 2510 if (strlen(a->key_string) & 1) 2511 goto inval; 2512 2513 a->key_size = strlen(a->key_string) / 2; 2514 a->key = kmalloc(a->key_size, GFP_KERNEL); 2515 if (!a->key) 2516 goto nomem; 2517 if (hex2bin(a->key, a->key_string, a->key_size)) 2518 goto inval; 2519 } 2520 2521 return 0; 2522 inval: 2523 *error = error_inval; 2524 return -EINVAL; 2525 nomem: 2526 *error = "Out of memory for an argument"; 2527 return -ENOMEM; 2528 } 2529 2530 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, 2531 char *error_alg, char *error_key) 2532 { 2533 int r; 2534 2535 if (a->alg_string) { 2536 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC); 2537 if (IS_ERR(*hash)) { 2538 *error = error_alg; 2539 r = PTR_ERR(*hash); 2540 *hash = NULL; 2541 return r; 2542 } 2543 2544 if (a->key) { 2545 r = crypto_shash_setkey(*hash, a->key, a->key_size); 2546 if (r) { 2547 *error = error_key; 2548 return r; 2549 } 2550 } 2551 } 2552 2553 return 0; 2554 } 2555 2556 static int create_journal(struct dm_integrity_c *ic, char **error) 2557 { 2558 int r = 0; 2559 unsigned i; 2560 __u64 journal_pages, journal_desc_size, journal_tree_size; 2561 unsigned char *crypt_data = NULL; 2562 2563 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); 2564 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); 2565 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); 2566 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); 2567 2568 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, 2569 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); 2570 journal_desc_size = journal_pages * sizeof(struct page_list); 2571 if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) { 2572 *error = "Journal doesn't fit into memory"; 2573 r = -ENOMEM; 2574 goto bad; 2575 } 2576 ic->journal_pages = journal_pages; 2577 2578 ic->journal = dm_integrity_alloc_page_list(ic); 2579 if (!ic->journal) { 2580 *error = "Could not allocate memory for journal"; 2581 r = -ENOMEM; 2582 goto bad; 2583 } 2584 if (ic->journal_crypt_alg.alg_string) { 2585 unsigned ivsize, blocksize; 2586 struct journal_completion comp; 2587 2588 comp.ic = ic; 2589 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0); 2590 if (IS_ERR(ic->journal_crypt)) { 2591 *error = "Invalid journal cipher"; 2592 r = PTR_ERR(ic->journal_crypt); 2593 ic->journal_crypt = NULL; 2594 goto bad; 2595 } 2596 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 2597 blocksize = crypto_skcipher_blocksize(ic->journal_crypt); 2598 2599 if (ic->journal_crypt_alg.key) { 2600 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key, 2601 ic->journal_crypt_alg.key_size); 2602 if (r) { 2603 *error = "Error setting encryption key"; 2604 goto bad; 2605 } 2606 } 2607 DEBUG_print("cipher %s, block size %u iv size %u\n", 2608 ic->journal_crypt_alg.alg_string, blocksize, ivsize); 2609 2610 ic->journal_io = dm_integrity_alloc_page_list(ic); 2611 if (!ic->journal_io) { 2612 *error = "Could not allocate memory for journal io"; 2613 r = -ENOMEM; 2614 goto bad; 2615 } 2616 2617 if (blocksize == 1) { 2618 struct scatterlist *sg; 2619 SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); 2620 unsigned char iv[ivsize]; 2621 skcipher_request_set_tfm(req, ic->journal_crypt); 2622 2623 ic->journal_xor = dm_integrity_alloc_page_list(ic); 2624 if (!ic->journal_xor) { 2625 *error = "Could not allocate memory for journal xor"; 2626 r = -ENOMEM; 2627 goto bad; 2628 } 2629 2630 sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL); 2631 if (!sg) { 2632 *error = "Unable to allocate sg list"; 2633 r = -ENOMEM; 2634 goto bad; 2635 } 2636 sg_init_table(sg, ic->journal_pages + 1); 2637 for (i = 0; i < ic->journal_pages; i++) { 2638 char *va = lowmem_page_address(ic->journal_xor[i].page); 2639 clear_page(va); 2640 sg_set_buf(&sg[i], va, PAGE_SIZE); 2641 } 2642 sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); 2643 memset(iv, 0x00, ivsize); 2644 2645 skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); 2646 init_completion(&comp.comp); 2647 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2648 if (do_crypt(true, req, &comp)) 2649 wait_for_completion(&comp.comp); 2650 kvfree(sg); 2651 r = dm_integrity_failed(ic); 2652 if (r) { 2653 *error = "Unable to encrypt journal"; 2654 goto bad; 2655 } 2656 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data"); 2657 2658 crypto_free_skcipher(ic->journal_crypt); 2659 ic->journal_crypt = NULL; 2660 } else { 2661 SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); 2662 unsigned char iv[ivsize]; 2663 unsigned crypt_len = roundup(ivsize, blocksize); 2664 2665 crypt_data = kmalloc(crypt_len, GFP_KERNEL); 2666 if (!crypt_data) { 2667 *error = "Unable to allocate crypt data"; 2668 r = -ENOMEM; 2669 goto bad; 2670 } 2671 2672 skcipher_request_set_tfm(req, ic->journal_crypt); 2673 2674 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); 2675 if (!ic->journal_scatterlist) { 2676 *error = "Unable to allocate sg list"; 2677 r = -ENOMEM; 2678 goto bad; 2679 } 2680 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io); 2681 if (!ic->journal_io_scatterlist) { 2682 *error = "Unable to allocate sg list"; 2683 r = -ENOMEM; 2684 goto bad; 2685 } 2686 ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO); 2687 if (!ic->sk_requests) { 2688 *error = "Unable to allocate sk requests"; 2689 r = -ENOMEM; 2690 goto bad; 2691 } 2692 for (i = 0; i < ic->journal_sections; i++) { 2693 struct scatterlist sg; 2694 struct skcipher_request *section_req; 2695 __u32 section_le = cpu_to_le32(i); 2696 2697 memset(iv, 0x00, ivsize); 2698 memset(crypt_data, 0x00, crypt_len); 2699 memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le))); 2700 2701 sg_init_one(&sg, crypt_data, crypt_len); 2702 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); 2703 init_completion(&comp.comp); 2704 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2705 if (do_crypt(true, req, &comp)) 2706 wait_for_completion(&comp.comp); 2707 2708 r = dm_integrity_failed(ic); 2709 if (r) { 2710 *error = "Unable to generate iv"; 2711 goto bad; 2712 } 2713 2714 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 2715 if (!section_req) { 2716 *error = "Unable to allocate crypt request"; 2717 r = -ENOMEM; 2718 goto bad; 2719 } 2720 section_req->iv = kmalloc(ivsize * 2, GFP_KERNEL); 2721 if (!section_req->iv) { 2722 skcipher_request_free(section_req); 2723 *error = "Unable to allocate iv"; 2724 r = -ENOMEM; 2725 goto bad; 2726 } 2727 memcpy(section_req->iv + ivsize, crypt_data, ivsize); 2728 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT; 2729 ic->sk_requests[i] = section_req; 2730 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i); 2731 } 2732 } 2733 } 2734 2735 for (i = 0; i < N_COMMIT_IDS; i++) { 2736 unsigned j; 2737 retest_commit_id: 2738 for (j = 0; j < i; j++) { 2739 if (ic->commit_ids[j] == ic->commit_ids[i]) { 2740 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1); 2741 goto retest_commit_id; 2742 } 2743 } 2744 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]); 2745 } 2746 2747 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node); 2748 if (journal_tree_size > ULONG_MAX) { 2749 *error = "Journal doesn't fit into memory"; 2750 r = -ENOMEM; 2751 goto bad; 2752 } 2753 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); 2754 if (!ic->journal_tree) { 2755 *error = "Could not allocate memory for journal tree"; 2756 r = -ENOMEM; 2757 } 2758 bad: 2759 kfree(crypt_data); 2760 return r; 2761 } 2762 2763 /* 2764 * Construct a integrity mapping 2765 * 2766 * Arguments: 2767 * device 2768 * offset from the start of the device 2769 * tag size 2770 * D - direct writes, J - journal writes, R - recovery mode 2771 * number of optional arguments 2772 * optional arguments: 2773 * journal_sectors 2774 * interleave_sectors 2775 * buffer_sectors 2776 * journal_watermark 2777 * commit_time 2778 * internal_hash 2779 * journal_crypt 2780 * journal_mac 2781 * block_size 2782 */ 2783 static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) 2784 { 2785 struct dm_integrity_c *ic; 2786 char dummy; 2787 int r; 2788 unsigned extra_args; 2789 struct dm_arg_set as; 2790 static const struct dm_arg _args[] = { 2791 {0, 9, "Invalid number of feature args"}, 2792 }; 2793 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 2794 bool should_write_sb; 2795 __u64 threshold; 2796 unsigned long long start; 2797 2798 #define DIRECT_ARGUMENTS 4 2799 2800 if (argc <= DIRECT_ARGUMENTS) { 2801 ti->error = "Invalid argument count"; 2802 return -EINVAL; 2803 } 2804 2805 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL); 2806 if (!ic) { 2807 ti->error = "Cannot allocate integrity context"; 2808 return -ENOMEM; 2809 } 2810 ti->private = ic; 2811 ti->per_io_data_size = sizeof(struct dm_integrity_io); 2812 2813 ic->in_progress = RB_ROOT; 2814 init_waitqueue_head(&ic->endio_wait); 2815 bio_list_init(&ic->flush_bio_list); 2816 init_waitqueue_head(&ic->copy_to_journal_wait); 2817 init_completion(&ic->crypto_backoff); 2818 atomic64_set(&ic->number_of_mismatches, 0); 2819 2820 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 2821 if (r) { 2822 ti->error = "Device lookup failed"; 2823 goto bad; 2824 } 2825 2826 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { 2827 ti->error = "Invalid starting offset"; 2828 r = -EINVAL; 2829 goto bad; 2830 } 2831 ic->start = start; 2832 2833 if (strcmp(argv[2], "-")) { 2834 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) { 2835 ti->error = "Invalid tag size"; 2836 r = -EINVAL; 2837 goto bad; 2838 } 2839 } 2840 2841 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) 2842 ic->mode = argv[3][0]; 2843 else { 2844 ti->error = "Invalid mode (expecting J, D, R)"; 2845 r = -EINVAL; 2846 goto bad; 2847 } 2848 2849 ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; 2850 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, 2851 ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); 2852 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 2853 buffer_sectors = DEFAULT_BUFFER_SECTORS; 2854 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 2855 sync_msec = DEFAULT_SYNC_MSEC; 2856 ic->sectors_per_block = 1; 2857 2858 as.argc = argc - DIRECT_ARGUMENTS; 2859 as.argv = argv + DIRECT_ARGUMENTS; 2860 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error); 2861 if (r) 2862 goto bad; 2863 2864 while (extra_args--) { 2865 const char *opt_string; 2866 unsigned val; 2867 opt_string = dm_shift_arg(&as); 2868 if (!opt_string) { 2869 r = -EINVAL; 2870 ti->error = "Not enough feature arguments"; 2871 goto bad; 2872 } 2873 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 2874 journal_sectors = val; 2875 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 2876 interleave_sectors = val; 2877 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 2878 buffer_sectors = val; 2879 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) 2880 journal_watermark = val; 2881 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 2882 sync_msec = val; 2883 else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 2884 if (val < 1 << SECTOR_SHIFT || 2885 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 2886 (val & (val -1))) { 2887 r = -EINVAL; 2888 ti->error = "Invalid block_size argument"; 2889 goto bad; 2890 } 2891 ic->sectors_per_block = val >> SECTOR_SHIFT; 2892 } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 2893 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 2894 "Invalid internal_hash argument"); 2895 if (r) 2896 goto bad; 2897 } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { 2898 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, 2899 "Invalid journal_crypt argument"); 2900 if (r) 2901 goto bad; 2902 } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { 2903 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, 2904 "Invalid journal_mac argument"); 2905 if (r) 2906 goto bad; 2907 } else { 2908 r = -EINVAL; 2909 ti->error = "Invalid argument"; 2910 goto bad; 2911 } 2912 } 2913 2914 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, 2915 "Invalid internal hash", "Error setting internal hash key"); 2916 if (r) 2917 goto bad; 2918 2919 r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error, 2920 "Invalid journal mac", "Error setting journal mac key"); 2921 if (r) 2922 goto bad; 2923 2924 if (!ic->tag_size) { 2925 if (!ic->internal_hash) { 2926 ti->error = "Unknown tag size"; 2927 r = -EINVAL; 2928 goto bad; 2929 } 2930 ic->tag_size = crypto_shash_digestsize(ic->internal_hash); 2931 } 2932 if (ic->tag_size > MAX_TAG_SIZE) { 2933 ti->error = "Too big tag size"; 2934 r = -EINVAL; 2935 goto bad; 2936 } 2937 if (!(ic->tag_size & (ic->tag_size - 1))) 2938 ic->log2_tag_size = __ffs(ic->tag_size); 2939 else 2940 ic->log2_tag_size = -1; 2941 2942 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 2943 ic->autocommit_msec = sync_msec; 2944 setup_timer(&ic->autocommit_timer, autocommit_fn, (unsigned long)ic); 2945 2946 ic->io = dm_io_client_create(); 2947 if (IS_ERR(ic->io)) { 2948 r = PTR_ERR(ic->io); 2949 ic->io = NULL; 2950 ti->error = "Cannot allocate dm io"; 2951 goto bad; 2952 } 2953 2954 ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache); 2955 if (!ic->journal_io_mempool) { 2956 r = -ENOMEM; 2957 ti->error = "Cannot allocate mempool"; 2958 goto bad; 2959 } 2960 2961 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", 2962 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); 2963 if (!ic->metadata_wq) { 2964 ti->error = "Cannot allocate workqueue"; 2965 r = -ENOMEM; 2966 goto bad; 2967 } 2968 2969 /* 2970 * If this workqueue were percpu, it would cause bio reordering 2971 * and reduced performance. 2972 */ 2973 ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 2974 if (!ic->wait_wq) { 2975 ti->error = "Cannot allocate workqueue"; 2976 r = -ENOMEM; 2977 goto bad; 2978 } 2979 2980 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); 2981 if (!ic->commit_wq) { 2982 ti->error = "Cannot allocate workqueue"; 2983 r = -ENOMEM; 2984 goto bad; 2985 } 2986 INIT_WORK(&ic->commit_work, integrity_commit); 2987 2988 if (ic->mode == 'J') { 2989 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 2990 if (!ic->writer_wq) { 2991 ti->error = "Cannot allocate workqueue"; 2992 r = -ENOMEM; 2993 goto bad; 2994 } 2995 INIT_WORK(&ic->writer_work, integrity_writer); 2996 } 2997 2998 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL); 2999 if (!ic->sb) { 3000 r = -ENOMEM; 3001 ti->error = "Cannot allocate superblock area"; 3002 goto bad; 3003 } 3004 3005 r = sync_rw_sb(ic, REQ_OP_READ, 0); 3006 if (r) { 3007 ti->error = "Error reading superblock"; 3008 goto bad; 3009 } 3010 should_write_sb = false; 3011 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { 3012 if (ic->mode != 'R') { 3013 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { 3014 r = -EINVAL; 3015 ti->error = "The device is not initialized"; 3016 goto bad; 3017 } 3018 } 3019 3020 r = initialize_superblock(ic, journal_sectors, interleave_sectors); 3021 if (r) { 3022 ti->error = "Could not initialize superblock"; 3023 goto bad; 3024 } 3025 if (ic->mode != 'R') 3026 should_write_sb = true; 3027 } 3028 3029 if (ic->sb->version != SB_VERSION) { 3030 r = -EINVAL; 3031 ti->error = "Unknown version"; 3032 goto bad; 3033 } 3034 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { 3035 r = -EINVAL; 3036 ti->error = "Tag size doesn't match the information in superblock"; 3037 goto bad; 3038 } 3039 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 3040 r = -EINVAL; 3041 ti->error = "Block size doesn't match the information in superblock"; 3042 goto bad; 3043 } 3044 if (!le32_to_cpu(ic->sb->journal_sections)) { 3045 r = -EINVAL; 3046 ti->error = "Corrupted superblock, journal_sections is 0"; 3047 goto bad; 3048 } 3049 /* make sure that ti->max_io_len doesn't overflow */ 3050 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 3051 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 3052 r = -EINVAL; 3053 ti->error = "Invalid interleave_sectors in the superblock"; 3054 goto bad; 3055 } 3056 ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3057 if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { 3058 /* test for overflow */ 3059 r = -EINVAL; 3060 ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors"; 3061 goto bad; 3062 } 3063 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { 3064 r = -EINVAL; 3065 ti->error = "Journal mac mismatch"; 3066 goto bad; 3067 } 3068 r = calculate_device_limits(ic); 3069 if (r) { 3070 ti->error = "The device is too small"; 3071 goto bad; 3072 } 3073 if (ti->len > ic->provided_data_sectors) { 3074 r = -EINVAL; 3075 ti->error = "Not enough provided sectors for requested mapping size"; 3076 goto bad; 3077 } 3078 3079 if (!buffer_sectors) 3080 buffer_sectors = 1; 3081 ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT); 3082 3083 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 3084 threshold += 50; 3085 do_div(threshold, 100); 3086 ic->free_sectors_threshold = threshold; 3087 3088 DEBUG_print("initialized:\n"); 3089 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size)); 3090 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size); 3091 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector); 3092 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries); 3093 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors); 3094 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); 3095 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 3096 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 3097 DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors); 3098 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 3099 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 3100 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 3101 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors, 3102 (unsigned long long)ic->provided_data_sectors); 3103 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 3104 3105 ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 3106 1, 0, NULL, NULL); 3107 if (IS_ERR(ic->bufio)) { 3108 r = PTR_ERR(ic->bufio); 3109 ti->error = "Cannot initialize dm-bufio"; 3110 ic->bufio = NULL; 3111 goto bad; 3112 } 3113 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); 3114 3115 if (ic->mode != 'R') { 3116 r = create_journal(ic, &ti->error); 3117 if (r) 3118 goto bad; 3119 } 3120 3121 if (should_write_sb) { 3122 int r; 3123 3124 init_journal(ic, 0, ic->journal_sections, 0); 3125 r = dm_integrity_failed(ic); 3126 if (unlikely(r)) { 3127 ti->error = "Error initializing journal"; 3128 goto bad; 3129 } 3130 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); 3131 if (r) { 3132 ti->error = "Error initializing superblock"; 3133 goto bad; 3134 } 3135 ic->just_formatted = true; 3136 } 3137 3138 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 3139 if (r) 3140 goto bad; 3141 3142 if (!ic->internal_hash) 3143 dm_integrity_set(ti, ic); 3144 3145 ti->num_flush_bios = 1; 3146 ti->flush_supported = true; 3147 3148 return 0; 3149 bad: 3150 dm_integrity_dtr(ti); 3151 return r; 3152 } 3153 3154 static void dm_integrity_dtr(struct dm_target *ti) 3155 { 3156 struct dm_integrity_c *ic = ti->private; 3157 3158 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3159 3160 if (ic->metadata_wq) 3161 destroy_workqueue(ic->metadata_wq); 3162 if (ic->wait_wq) 3163 destroy_workqueue(ic->wait_wq); 3164 if (ic->commit_wq) 3165 destroy_workqueue(ic->commit_wq); 3166 if (ic->writer_wq) 3167 destroy_workqueue(ic->writer_wq); 3168 if (ic->bufio) 3169 dm_bufio_client_destroy(ic->bufio); 3170 mempool_destroy(ic->journal_io_mempool); 3171 if (ic->io) 3172 dm_io_client_destroy(ic->io); 3173 if (ic->dev) 3174 dm_put_device(ti, ic->dev); 3175 dm_integrity_free_page_list(ic, ic->journal); 3176 dm_integrity_free_page_list(ic, ic->journal_io); 3177 dm_integrity_free_page_list(ic, ic->journal_xor); 3178 if (ic->journal_scatterlist) 3179 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 3180 if (ic->journal_io_scatterlist) 3181 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist); 3182 if (ic->sk_requests) { 3183 unsigned i; 3184 3185 for (i = 0; i < ic->journal_sections; i++) { 3186 struct skcipher_request *req = ic->sk_requests[i]; 3187 if (req) { 3188 kzfree(req->iv); 3189 skcipher_request_free(req); 3190 } 3191 } 3192 kvfree(ic->sk_requests); 3193 } 3194 kvfree(ic->journal_tree); 3195 if (ic->sb) 3196 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT); 3197 3198 if (ic->internal_hash) 3199 crypto_free_shash(ic->internal_hash); 3200 free_alg(&ic->internal_hash_alg); 3201 3202 if (ic->journal_crypt) 3203 crypto_free_skcipher(ic->journal_crypt); 3204 free_alg(&ic->journal_crypt_alg); 3205 3206 if (ic->journal_mac) 3207 crypto_free_shash(ic->journal_mac); 3208 free_alg(&ic->journal_mac_alg); 3209 3210 kfree(ic); 3211 } 3212 3213 static struct target_type integrity_target = { 3214 .name = "integrity", 3215 .version = {1, 1, 0}, 3216 .module = THIS_MODULE, 3217 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 3218 .ctr = dm_integrity_ctr, 3219 .dtr = dm_integrity_dtr, 3220 .map = dm_integrity_map, 3221 .postsuspend = dm_integrity_postsuspend, 3222 .resume = dm_integrity_resume, 3223 .status = dm_integrity_status, 3224 .iterate_devices = dm_integrity_iterate_devices, 3225 .io_hints = dm_integrity_io_hints, 3226 }; 3227 3228 int __init dm_integrity_init(void) 3229 { 3230 int r; 3231 3232 journal_io_cache = kmem_cache_create("integrity_journal_io", 3233 sizeof(struct journal_io), 0, 0, NULL); 3234 if (!journal_io_cache) { 3235 DMERR("can't allocate journal io cache"); 3236 return -ENOMEM; 3237 } 3238 3239 r = dm_register_target(&integrity_target); 3240 3241 if (r < 0) 3242 DMERR("register failed %d", r); 3243 3244 return r; 3245 } 3246 3247 void dm_integrity_exit(void) 3248 { 3249 dm_unregister_target(&integrity_target); 3250 kmem_cache_destroy(journal_io_cache); 3251 } 3252 3253 module_init(dm_integrity_init); 3254 module_exit(dm_integrity_exit); 3255 3256 MODULE_AUTHOR("Milan Broz"); 3257 MODULE_AUTHOR("Mikulas Patocka"); 3258 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension"); 3259 MODULE_LICENSE("GPL"); 3260