1 /* 2 * Copyright (C) 2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 * 18 * Based on jffs2 zlib code: 19 * Copyright © 2001-2007 Red Hat, Inc. 20 * Created by David Woodhouse <dwmw2@infradead.org> 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/slab.h> 25 #include <linux/zlib.h> 26 #include <linux/zutil.h> 27 #include <linux/vmalloc.h> 28 #include <linux/init.h> 29 #include <linux/err.h> 30 #include <linux/sched.h> 31 #include <linux/pagemap.h> 32 #include <linux/bio.h> 33 #include "compression.h" 34 35 /* Plan: call deflate() with avail_in == *sourcelen, 36 avail_out = *dstlen - 12 and flush == Z_FINISH. 37 If it doesn't manage to finish, call it again with 38 avail_in == 0 and avail_out set to the remaining 12 39 bytes for it to clean up. 40 Q: Is 12 bytes sufficient? 41 */ 42 #define STREAM_END_SPACE 12 43 44 struct workspace { 45 z_stream inf_strm; 46 z_stream def_strm; 47 char *buf; 48 struct list_head list; 49 }; 50 51 static LIST_HEAD(idle_workspace); 52 static DEFINE_SPINLOCK(workspace_lock); 53 static unsigned long num_workspace; 54 static atomic_t alloc_workspace = ATOMIC_INIT(0); 55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); 56 57 /* 58 * this finds an available zlib workspace or allocates a new one 59 * NULL or an ERR_PTR is returned if things go bad. 60 */ 61 static struct workspace *find_zlib_workspace(void) 62 { 63 struct workspace *workspace; 64 int ret; 65 int cpus = num_online_cpus(); 66 67 again: 68 spin_lock(&workspace_lock); 69 if (!list_empty(&idle_workspace)) { 70 workspace = list_entry(idle_workspace.next, struct workspace, 71 list); 72 list_del(&workspace->list); 73 num_workspace--; 74 spin_unlock(&workspace_lock); 75 return workspace; 76 77 } 78 spin_unlock(&workspace_lock); 79 if (atomic_read(&alloc_workspace) > cpus) { 80 DEFINE_WAIT(wait); 81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 82 if (atomic_read(&alloc_workspace) > cpus) 83 schedule(); 84 finish_wait(&workspace_wait, &wait); 85 goto again; 86 } 87 atomic_inc(&alloc_workspace); 88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 89 if (!workspace) { 90 ret = -ENOMEM; 91 goto fail; 92 } 93 94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 95 if (!workspace->def_strm.workspace) { 96 ret = -ENOMEM; 97 goto fail; 98 } 99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 100 if (!workspace->inf_strm.workspace) { 101 ret = -ENOMEM; 102 goto fail_inflate; 103 } 104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 105 if (!workspace->buf) { 106 ret = -ENOMEM; 107 goto fail_kmalloc; 108 } 109 return workspace; 110 111 fail_kmalloc: 112 vfree(workspace->inf_strm.workspace); 113 fail_inflate: 114 vfree(workspace->def_strm.workspace); 115 fail: 116 kfree(workspace); 117 atomic_dec(&alloc_workspace); 118 wake_up(&workspace_wait); 119 return ERR_PTR(ret); 120 } 121 122 /* 123 * put a workspace struct back on the list or free it if we have enough 124 * idle ones sitting around 125 */ 126 static int free_workspace(struct workspace *workspace) 127 { 128 spin_lock(&workspace_lock); 129 if (num_workspace < num_online_cpus()) { 130 list_add_tail(&workspace->list, &idle_workspace); 131 num_workspace++; 132 spin_unlock(&workspace_lock); 133 if (waitqueue_active(&workspace_wait)) 134 wake_up(&workspace_wait); 135 return 0; 136 } 137 spin_unlock(&workspace_lock); 138 vfree(workspace->def_strm.workspace); 139 vfree(workspace->inf_strm.workspace); 140 kfree(workspace->buf); 141 kfree(workspace); 142 143 atomic_dec(&alloc_workspace); 144 if (waitqueue_active(&workspace_wait)) 145 wake_up(&workspace_wait); 146 return 0; 147 } 148 149 /* 150 * cleanup function for module exit 151 */ 152 static void free_workspaces(void) 153 { 154 struct workspace *workspace; 155 while (!list_empty(&idle_workspace)) { 156 workspace = list_entry(idle_workspace.next, struct workspace, 157 list); 158 list_del(&workspace->list); 159 vfree(workspace->def_strm.workspace); 160 vfree(workspace->inf_strm.workspace); 161 kfree(workspace->buf); 162 kfree(workspace); 163 atomic_dec(&alloc_workspace); 164 } 165 } 166 167 /* 168 * given an address space and start/len, compress the bytes. 169 * 170 * pages are allocated to hold the compressed result and stored 171 * in 'pages' 172 * 173 * out_pages is used to return the number of pages allocated. There 174 * may be pages allocated even if we return an error 175 * 176 * total_in is used to return the number of bytes actually read. It 177 * may be smaller then len if we had to exit early because we 178 * ran out of room in the pages array or because we cross the 179 * max_out threshold. 180 * 181 * total_out is used to return the total number of compressed bytes 182 * 183 * max_out tells us the max number of bytes that we're allowed to 184 * stuff into pages 185 */ 186 int btrfs_zlib_compress_pages(struct address_space *mapping, 187 u64 start, unsigned long len, 188 struct page **pages, 189 unsigned long nr_dest_pages, 190 unsigned long *out_pages, 191 unsigned long *total_in, 192 unsigned long *total_out, 193 unsigned long max_out) 194 { 195 int ret; 196 struct workspace *workspace; 197 char *data_in; 198 char *cpage_out; 199 int nr_pages = 0; 200 struct page *in_page = NULL; 201 struct page *out_page = NULL; 202 int out_written = 0; 203 int in_read = 0; 204 unsigned long bytes_left; 205 206 *out_pages = 0; 207 *total_out = 0; 208 *total_in = 0; 209 210 workspace = find_zlib_workspace(); 211 if (IS_ERR(workspace)) 212 return -1; 213 214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 215 printk(KERN_WARNING "deflateInit failed\n"); 216 ret = -1; 217 goto out; 218 } 219 220 workspace->def_strm.total_in = 0; 221 workspace->def_strm.total_out = 0; 222 223 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 224 data_in = kmap(in_page); 225 226 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 227 cpage_out = kmap(out_page); 228 pages[0] = out_page; 229 nr_pages = 1; 230 231 workspace->def_strm.next_in = data_in; 232 workspace->def_strm.next_out = cpage_out; 233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 235 236 out_written = 0; 237 in_read = 0; 238 239 while (workspace->def_strm.total_in < len) { 240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 241 if (ret != Z_OK) { 242 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", 243 ret); 244 zlib_deflateEnd(&workspace->def_strm); 245 ret = -1; 246 goto out; 247 } 248 249 /* we're making it bigger, give up */ 250 if (workspace->def_strm.total_in > 8192 && 251 workspace->def_strm.total_in < 252 workspace->def_strm.total_out) { 253 ret = -1; 254 goto out; 255 } 256 /* we need another page for writing out. Test this 257 * before the total_in so we will pull in a new page for 258 * the stream end if required 259 */ 260 if (workspace->def_strm.avail_out == 0) { 261 kunmap(out_page); 262 if (nr_pages == nr_dest_pages) { 263 out_page = NULL; 264 ret = -1; 265 goto out; 266 } 267 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 268 cpage_out = kmap(out_page); 269 pages[nr_pages] = out_page; 270 nr_pages++; 271 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 272 workspace->def_strm.next_out = cpage_out; 273 } 274 /* we're all done */ 275 if (workspace->def_strm.total_in >= len) 276 break; 277 278 /* we've read in a full page, get a new one */ 279 if (workspace->def_strm.avail_in == 0) { 280 if (workspace->def_strm.total_out > max_out) 281 break; 282 283 bytes_left = len - workspace->def_strm.total_in; 284 kunmap(in_page); 285 page_cache_release(in_page); 286 287 start += PAGE_CACHE_SIZE; 288 in_page = find_get_page(mapping, 289 start >> PAGE_CACHE_SHIFT); 290 data_in = kmap(in_page); 291 workspace->def_strm.avail_in = min(bytes_left, 292 PAGE_CACHE_SIZE); 293 workspace->def_strm.next_in = data_in; 294 } 295 } 296 workspace->def_strm.avail_in = 0; 297 ret = zlib_deflate(&workspace->def_strm, Z_FINISH); 298 zlib_deflateEnd(&workspace->def_strm); 299 300 if (ret != Z_STREAM_END) { 301 ret = -1; 302 goto out; 303 } 304 305 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { 306 ret = -1; 307 goto out; 308 } 309 310 ret = 0; 311 *total_out = workspace->def_strm.total_out; 312 *total_in = workspace->def_strm.total_in; 313 out: 314 *out_pages = nr_pages; 315 if (out_page) 316 kunmap(out_page); 317 318 if (in_page) { 319 kunmap(in_page); 320 page_cache_release(in_page); 321 } 322 free_workspace(workspace); 323 return ret; 324 } 325 326 /* 327 * pages_in is an array of pages with compressed data. 328 * 329 * disk_start is the starting logical offset of this array in the file 330 * 331 * bvec is a bio_vec of pages from the file that we want to decompress into 332 * 333 * vcnt is the count of pages in the biovec 334 * 335 * srclen is the number of bytes in pages_in 336 * 337 * The basic idea is that we have a bio that was created by readpages. 338 * The pages in the bio are for the uncompressed data, and they may not 339 * be contiguous. They all correspond to the range of bytes covered by 340 * the compressed extent. 341 */ 342 int btrfs_zlib_decompress_biovec(struct page **pages_in, 343 u64 disk_start, 344 struct bio_vec *bvec, 345 int vcnt, 346 size_t srclen) 347 { 348 int ret = 0; 349 int wbits = MAX_WBITS; 350 struct workspace *workspace; 351 char *data_in; 352 size_t total_out = 0; 353 unsigned long page_bytes_left; 354 unsigned long page_in_index = 0; 355 unsigned long page_out_index = 0; 356 struct page *page_out; 357 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 358 PAGE_CACHE_SIZE; 359 unsigned long buf_start; 360 unsigned long buf_offset; 361 unsigned long bytes; 362 unsigned long working_bytes; 363 unsigned long pg_offset; 364 unsigned long start_byte; 365 unsigned long current_buf_start; 366 char *kaddr; 367 368 workspace = find_zlib_workspace(); 369 if (IS_ERR(workspace)) 370 return -ENOMEM; 371 372 data_in = kmap(pages_in[page_in_index]); 373 workspace->inf_strm.next_in = data_in; 374 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); 375 workspace->inf_strm.total_in = 0; 376 377 workspace->inf_strm.total_out = 0; 378 workspace->inf_strm.next_out = workspace->buf; 379 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 380 page_out = bvec[page_out_index].bv_page; 381 page_bytes_left = PAGE_CACHE_SIZE; 382 pg_offset = 0; 383 384 /* If it's deflate, and it's got no preset dictionary, then 385 we can tell zlib to skip the adler32 check. */ 386 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 387 ((data_in[0] & 0x0f) == Z_DEFLATED) && 388 !(((data_in[0]<<8) + data_in[1]) % 31)) { 389 390 wbits = -((data_in[0] >> 4) + 8); 391 workspace->inf_strm.next_in += 2; 392 workspace->inf_strm.avail_in -= 2; 393 } 394 395 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 396 printk(KERN_WARNING "inflateInit failed\n"); 397 ret = -1; 398 goto out; 399 } 400 while (workspace->inf_strm.total_in < srclen) { 401 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 402 if (ret != Z_OK && ret != Z_STREAM_END) 403 break; 404 /* 405 * buf start is the byte offset we're of the start of 406 * our workspace buffer 407 */ 408 buf_start = total_out; 409 410 /* total_out is the last byte of the workspace buffer */ 411 total_out = workspace->inf_strm.total_out; 412 413 working_bytes = total_out - buf_start; 414 415 /* 416 * start byte is the first byte of the page we're currently 417 * copying into relative to the start of the compressed data. 418 */ 419 start_byte = page_offset(page_out) - disk_start; 420 421 if (working_bytes == 0) { 422 /* we didn't make progress in this inflate 423 * call, we're done 424 */ 425 if (ret != Z_STREAM_END) 426 ret = -1; 427 break; 428 } 429 430 /* we haven't yet hit data corresponding to this page */ 431 if (total_out <= start_byte) 432 goto next; 433 434 /* 435 * the start of the data we care about is offset into 436 * the middle of our working buffer 437 */ 438 if (total_out > start_byte && buf_start < start_byte) { 439 buf_offset = start_byte - buf_start; 440 working_bytes -= buf_offset; 441 } else { 442 buf_offset = 0; 443 } 444 current_buf_start = buf_start; 445 446 /* copy bytes from the working buffer into the pages */ 447 while (working_bytes > 0) { 448 bytes = min(PAGE_CACHE_SIZE - pg_offset, 449 PAGE_CACHE_SIZE - buf_offset); 450 bytes = min(bytes, working_bytes); 451 kaddr = kmap_atomic(page_out, KM_USER0); 452 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, 453 bytes); 454 kunmap_atomic(kaddr, KM_USER0); 455 flush_dcache_page(page_out); 456 457 pg_offset += bytes; 458 page_bytes_left -= bytes; 459 buf_offset += bytes; 460 working_bytes -= bytes; 461 current_buf_start += bytes; 462 463 /* check if we need to pick another page */ 464 if (page_bytes_left == 0) { 465 page_out_index++; 466 if (page_out_index >= vcnt) { 467 ret = 0; 468 goto done; 469 } 470 471 page_out = bvec[page_out_index].bv_page; 472 pg_offset = 0; 473 page_bytes_left = PAGE_CACHE_SIZE; 474 start_byte = page_offset(page_out) - disk_start; 475 476 /* 477 * make sure our new page is covered by this 478 * working buffer 479 */ 480 if (total_out <= start_byte) 481 goto next; 482 483 /* the next page in the biovec might not 484 * be adjacent to the last page, but it 485 * might still be found inside this working 486 * buffer. bump our offset pointer 487 */ 488 if (total_out > start_byte && 489 current_buf_start < start_byte) { 490 buf_offset = start_byte - buf_start; 491 working_bytes = total_out - start_byte; 492 current_buf_start = buf_start + 493 buf_offset; 494 } 495 } 496 } 497 next: 498 workspace->inf_strm.next_out = workspace->buf; 499 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 500 501 if (workspace->inf_strm.avail_in == 0) { 502 unsigned long tmp; 503 kunmap(pages_in[page_in_index]); 504 page_in_index++; 505 if (page_in_index >= total_pages_in) { 506 data_in = NULL; 507 break; 508 } 509 data_in = kmap(pages_in[page_in_index]); 510 workspace->inf_strm.next_in = data_in; 511 tmp = srclen - workspace->inf_strm.total_in; 512 workspace->inf_strm.avail_in = min(tmp, 513 PAGE_CACHE_SIZE); 514 } 515 } 516 if (ret != Z_STREAM_END) 517 ret = -1; 518 else 519 ret = 0; 520 done: 521 zlib_inflateEnd(&workspace->inf_strm); 522 if (data_in) 523 kunmap(pages_in[page_in_index]); 524 out: 525 free_workspace(workspace); 526 return ret; 527 } 528 529 /* 530 * a less complex decompression routine. Our compressed data fits in a 531 * single page, and we want to read a single page out of it. 532 * start_byte tells us the offset into the compressed data we're interested in 533 */ 534 int btrfs_zlib_decompress(unsigned char *data_in, 535 struct page *dest_page, 536 unsigned long start_byte, 537 size_t srclen, size_t destlen) 538 { 539 int ret = 0; 540 int wbits = MAX_WBITS; 541 struct workspace *workspace; 542 unsigned long bytes_left = destlen; 543 unsigned long total_out = 0; 544 char *kaddr; 545 546 if (destlen > PAGE_CACHE_SIZE) 547 return -ENOMEM; 548 549 workspace = find_zlib_workspace(); 550 if (IS_ERR(workspace)) 551 return -ENOMEM; 552 553 workspace->inf_strm.next_in = data_in; 554 workspace->inf_strm.avail_in = srclen; 555 workspace->inf_strm.total_in = 0; 556 557 workspace->inf_strm.next_out = workspace->buf; 558 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 559 workspace->inf_strm.total_out = 0; 560 /* If it's deflate, and it's got no preset dictionary, then 561 we can tell zlib to skip the adler32 check. */ 562 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 563 ((data_in[0] & 0x0f) == Z_DEFLATED) && 564 !(((data_in[0]<<8) + data_in[1]) % 31)) { 565 566 wbits = -((data_in[0] >> 4) + 8); 567 workspace->inf_strm.next_in += 2; 568 workspace->inf_strm.avail_in -= 2; 569 } 570 571 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 572 printk(KERN_WARNING "inflateInit failed\n"); 573 ret = -1; 574 goto out; 575 } 576 577 while (bytes_left > 0) { 578 unsigned long buf_start; 579 unsigned long buf_offset; 580 unsigned long bytes; 581 unsigned long pg_offset = 0; 582 583 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 584 if (ret != Z_OK && ret != Z_STREAM_END) 585 break; 586 587 buf_start = total_out; 588 total_out = workspace->inf_strm.total_out; 589 590 if (total_out == buf_start) { 591 ret = -1; 592 break; 593 } 594 595 if (total_out <= start_byte) 596 goto next; 597 598 if (total_out > start_byte && buf_start < start_byte) 599 buf_offset = start_byte - buf_start; 600 else 601 buf_offset = 0; 602 603 bytes = min(PAGE_CACHE_SIZE - pg_offset, 604 PAGE_CACHE_SIZE - buf_offset); 605 bytes = min(bytes, bytes_left); 606 607 kaddr = kmap_atomic(dest_page, KM_USER0); 608 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 609 kunmap_atomic(kaddr, KM_USER0); 610 611 pg_offset += bytes; 612 bytes_left -= bytes; 613 next: 614 workspace->inf_strm.next_out = workspace->buf; 615 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 616 } 617 618 if (ret != Z_STREAM_END && bytes_left != 0) 619 ret = -1; 620 else 621 ret = 0; 622 623 zlib_inflateEnd(&workspace->inf_strm); 624 out: 625 free_workspace(workspace); 626 return ret; 627 } 628 629 void btrfs_zlib_exit(void) 630 { 631 free_workspaces(); 632 } 633