xref: /openbmc/linux/fs/btrfs/compression.c (revision 26d0dfbb16fcb17d128a79dc70f3020ea6992af0)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
2c8b97818SChris Mason /*
3c8b97818SChris Mason  * Copyright (C) 2008 Oracle.  All rights reserved.
4c8b97818SChris Mason  */
5c8b97818SChris Mason 
6c8b97818SChris Mason #include <linux/kernel.h>
7c8b97818SChris Mason #include <linux/bio.h>
8c8b97818SChris Mason #include <linux/file.h>
9c8b97818SChris Mason #include <linux/fs.h>
10c8b97818SChris Mason #include <linux/pagemap.h>
11a75b81c3SVishal Moola (Oracle) #include <linux/pagevec.h>
12c8b97818SChris Mason #include <linux/highmem.h>
13e41d12f5SChristoph Hellwig #include <linux/kthread.h>
14c8b97818SChris Mason #include <linux/time.h>
15c8b97818SChris Mason #include <linux/init.h>
16c8b97818SChris Mason #include <linux/string.h>
17c8b97818SChris Mason #include <linux/backing-dev.h>
18c8b97818SChris Mason #include <linux/writeback.h>
194088a47eSChristoph Hellwig #include <linux/psi.h>
205a0e3ad6STejun Heo #include <linux/slab.h>
21fe308533SDavid Sterba #include <linux/sched/mm.h>
2219562430STimofey Titovets #include <linux/log2.h>
23d5178578SJohannes Thumshirn #include <crypto/hash.h>
24602cbe91SDavid Sterba #include "misc.h"
25c8b97818SChris Mason #include "ctree.h"
26ec8eb376SJosef Bacik #include "fs.h"
27c8b97818SChris Mason #include "disk-io.h"
28c8b97818SChris Mason #include "transaction.h"
29c8b97818SChris Mason #include "btrfs_inode.h"
30103c1972SChristoph Hellwig #include "bio.h"
31c8b97818SChris Mason #include "ordered-data.h"
32c8b97818SChris Mason #include "compression.h"
33c8b97818SChris Mason #include "extent_io.h"
34c8b97818SChris Mason #include "extent_map.h"
356a404910SQu Wenruo #include "subpage.h"
36764c7c9aSJohannes Thumshirn #include "zoned.h"
377c8ede16SJosef Bacik #include "file-item.h"
387f0add25SJosef Bacik #include "super.h"
39c8b97818SChris Mason 
40e794203eSBen Dooks static struct bio_set btrfs_compressed_bioset;
41544fe4a9SChristoph Hellwig 
42e128f9c3SDavid Sterba static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
43e128f9c3SDavid Sterba 
btrfs_compress_type2str(enum btrfs_compression_type type)44e128f9c3SDavid Sterba const char* btrfs_compress_type2str(enum btrfs_compression_type type)
45e128f9c3SDavid Sterba {
46e128f9c3SDavid Sterba 	switch (type) {
47e128f9c3SDavid Sterba 	case BTRFS_COMPRESS_ZLIB:
48e128f9c3SDavid Sterba 	case BTRFS_COMPRESS_LZO:
49e128f9c3SDavid Sterba 	case BTRFS_COMPRESS_ZSTD:
50e128f9c3SDavid Sterba 	case BTRFS_COMPRESS_NONE:
51e128f9c3SDavid Sterba 		return btrfs_compress_types[type];
52ce96b7ffSChengguang Xu 	default:
53ce96b7ffSChengguang Xu 		break;
54e128f9c3SDavid Sterba 	}
55e128f9c3SDavid Sterba 
56e128f9c3SDavid Sterba 	return NULL;
57e128f9c3SDavid Sterba }
58e128f9c3SDavid Sterba 
to_compressed_bio(struct btrfs_bio * bbio)59544fe4a9SChristoph Hellwig static inline struct compressed_bio *to_compressed_bio(struct btrfs_bio *bbio)
60544fe4a9SChristoph Hellwig {
61544fe4a9SChristoph Hellwig 	return container_of(bbio, struct compressed_bio, bbio);
62544fe4a9SChristoph Hellwig }
63544fe4a9SChristoph Hellwig 
alloc_compressed_bio(struct btrfs_inode * inode,u64 start,blk_opf_t op,btrfs_bio_end_io_t end_io)64544fe4a9SChristoph Hellwig static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode,
65544fe4a9SChristoph Hellwig 						   u64 start, blk_opf_t op,
66544fe4a9SChristoph Hellwig 						   btrfs_bio_end_io_t end_io)
67544fe4a9SChristoph Hellwig {
68544fe4a9SChristoph Hellwig 	struct btrfs_bio *bbio;
69544fe4a9SChristoph Hellwig 
70544fe4a9SChristoph Hellwig 	bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op,
71544fe4a9SChristoph Hellwig 					  GFP_NOFS, &btrfs_compressed_bioset));
724317ff00SQu Wenruo 	btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL);
734317ff00SQu Wenruo 	bbio->inode = inode;
74544fe4a9SChristoph Hellwig 	bbio->file_offset = start;
75544fe4a9SChristoph Hellwig 	return to_compressed_bio(bbio);
76544fe4a9SChristoph Hellwig }
77544fe4a9SChristoph Hellwig 
btrfs_compress_is_valid_type(const char * str,size_t len)78aa53e3bfSJohannes Thumshirn bool btrfs_compress_is_valid_type(const char *str, size_t len)
79aa53e3bfSJohannes Thumshirn {
80aa53e3bfSJohannes Thumshirn 	int i;
81aa53e3bfSJohannes Thumshirn 
82aa53e3bfSJohannes Thumshirn 	for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
83aa53e3bfSJohannes Thumshirn 		size_t comp_len = strlen(btrfs_compress_types[i]);
84aa53e3bfSJohannes Thumshirn 
85aa53e3bfSJohannes Thumshirn 		if (len < comp_len)
86aa53e3bfSJohannes Thumshirn 			continue;
87aa53e3bfSJohannes Thumshirn 
88aa53e3bfSJohannes Thumshirn 		if (!strncmp(btrfs_compress_types[i], str, comp_len))
89aa53e3bfSJohannes Thumshirn 			return true;
90aa53e3bfSJohannes Thumshirn 	}
91aa53e3bfSJohannes Thumshirn 	return false;
92aa53e3bfSJohannes Thumshirn }
93aa53e3bfSJohannes Thumshirn 
compression_compress_pages(int type,struct list_head * ws,struct address_space * mapping,u64 start,struct page ** pages,unsigned long * out_pages,unsigned long * total_in,unsigned long * total_out)941e4eb746SDavid Sterba static int compression_compress_pages(int type, struct list_head *ws,
951e4eb746SDavid Sterba                struct address_space *mapping, u64 start, struct page **pages,
961e4eb746SDavid Sterba                unsigned long *out_pages, unsigned long *total_in,
971e4eb746SDavid Sterba                unsigned long *total_out)
981e4eb746SDavid Sterba {
991e4eb746SDavid Sterba 	switch (type) {
1001e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZLIB:
1011e4eb746SDavid Sterba 		return zlib_compress_pages(ws, mapping, start, pages,
1021e4eb746SDavid Sterba 				out_pages, total_in, total_out);
1031e4eb746SDavid Sterba 	case BTRFS_COMPRESS_LZO:
1041e4eb746SDavid Sterba 		return lzo_compress_pages(ws, mapping, start, pages,
1051e4eb746SDavid Sterba 				out_pages, total_in, total_out);
1061e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZSTD:
1071e4eb746SDavid Sterba 		return zstd_compress_pages(ws, mapping, start, pages,
1081e4eb746SDavid Sterba 				out_pages, total_in, total_out);
1091e4eb746SDavid Sterba 	case BTRFS_COMPRESS_NONE:
1101e4eb746SDavid Sterba 	default:
1111e4eb746SDavid Sterba 		/*
1121d8ba9e7SQu Wenruo 		 * This can happen when compression races with remount setting
1131d8ba9e7SQu Wenruo 		 * it to 'no compress', while caller doesn't call
1141d8ba9e7SQu Wenruo 		 * inode_need_compress() to check if we really need to
1151d8ba9e7SQu Wenruo 		 * compress.
1161d8ba9e7SQu Wenruo 		 *
1171d8ba9e7SQu Wenruo 		 * Not a big deal, just need to inform caller that we
1181d8ba9e7SQu Wenruo 		 * haven't allocated any pages yet.
1191e4eb746SDavid Sterba 		 */
1201d8ba9e7SQu Wenruo 		*out_pages = 0;
1211e4eb746SDavid Sterba 		return -E2BIG;
1221e4eb746SDavid Sterba 	}
1231e4eb746SDavid Sterba }
1241e4eb746SDavid Sterba 
compression_decompress_bio(struct list_head * ws,struct compressed_bio * cb)1254a9e803eSSu Yue static int compression_decompress_bio(struct list_head *ws,
1261e4eb746SDavid Sterba 				      struct compressed_bio *cb)
1271e4eb746SDavid Sterba {
1284a9e803eSSu Yue 	switch (cb->compress_type) {
1291e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb);
1301e4eb746SDavid Sterba 	case BTRFS_COMPRESS_LZO:  return lzo_decompress_bio(ws, cb);
1311e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb);
1321e4eb746SDavid Sterba 	case BTRFS_COMPRESS_NONE:
1331e4eb746SDavid Sterba 	default:
1341e4eb746SDavid Sterba 		/*
1351e4eb746SDavid Sterba 		 * This can't happen, the type is validated several times
1361e4eb746SDavid Sterba 		 * before we get here.
1371e4eb746SDavid Sterba 		 */
1381e4eb746SDavid Sterba 		BUG();
1391e4eb746SDavid Sterba 	}
1401e4eb746SDavid Sterba }
1411e4eb746SDavid Sterba 
compression_decompress(int type,struct list_head * ws,const u8 * data_in,struct page * dest_page,unsigned long dest_pgoff,size_t srclen,size_t destlen)1421e4eb746SDavid Sterba static int compression_decompress(int type, struct list_head *ws,
1433e09b5b2SDavid Sterba 		const u8 *data_in, struct page *dest_page,
144*56a1bf2bSQu Wenruo 		unsigned long dest_pgoff, size_t srclen, size_t destlen)
1451e4eb746SDavid Sterba {
1461e4eb746SDavid Sterba 	switch (type) {
1471e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
148*56a1bf2bSQu Wenruo 						dest_pgoff, srclen, destlen);
1491e4eb746SDavid Sterba 	case BTRFS_COMPRESS_LZO:  return lzo_decompress(ws, data_in, dest_page,
150*56a1bf2bSQu Wenruo 						dest_pgoff, srclen, destlen);
1511e4eb746SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
152*56a1bf2bSQu Wenruo 						dest_pgoff, srclen, destlen);
1531e4eb746SDavid Sterba 	case BTRFS_COMPRESS_NONE:
1541e4eb746SDavid Sterba 	default:
1551e4eb746SDavid Sterba 		/*
1561e4eb746SDavid Sterba 		 * This can't happen, the type is validated several times
1571e4eb746SDavid Sterba 		 * before we get here.
1581e4eb746SDavid Sterba 		 */
1591e4eb746SDavid Sterba 		BUG();
1601e4eb746SDavid Sterba 	}
1611e4eb746SDavid Sterba }
1621e4eb746SDavid Sterba 
btrfs_free_compressed_pages(struct compressed_bio * cb)16332586c5bSChristoph Hellwig static void btrfs_free_compressed_pages(struct compressed_bio *cb)
16432586c5bSChristoph Hellwig {
165a959a174SChristoph Hellwig 	for (unsigned int i = 0; i < cb->nr_pages; i++)
166a959a174SChristoph Hellwig 		put_page(cb->compressed_pages[i]);
16732586c5bSChristoph Hellwig 	kfree(cb->compressed_pages);
16832586c5bSChristoph Hellwig }
16932586c5bSChristoph Hellwig 
1708140dc30SAnand Jain static int btrfs_decompress_bio(struct compressed_bio *cb);
17148a3b636SEric Sandeen 
end_compressed_bio_read(struct btrfs_bio * bbio)17230493ff4SQu Wenruo static void end_compressed_bio_read(struct btrfs_bio *bbio)
17386ccbb4dSQu Wenruo {
174544fe4a9SChristoph Hellwig 	struct compressed_bio *cb = to_compressed_bio(bbio);
175544fe4a9SChristoph Hellwig 	blk_status_t status = bbio->bio.bi_status;
17686ccbb4dSQu Wenruo 
177544fe4a9SChristoph Hellwig 	if (!status)
178544fe4a9SChristoph Hellwig 		status = errno_to_blk_status(btrfs_decompress_bio(cb));
17981bd9328SChristoph Hellwig 
18032586c5bSChristoph Hellwig 	btrfs_free_compressed_pages(cb);
181b7d463a1SChristoph Hellwig 	btrfs_bio_end_io(cb->orig_bbio, status);
182917f32a2SChristoph Hellwig 	bio_put(&bbio->bio);
183c8b97818SChris Mason }
184c8b97818SChris Mason 
185c8b97818SChris Mason /*
186c8b97818SChris Mason  * Clear the writeback bits on all of the file
187c8b97818SChris Mason  * pages for a compressed write
188c8b97818SChris Mason  */
end_compressed_writeback(const struct compressed_bio * cb)189544fe4a9SChristoph Hellwig static noinline void end_compressed_writeback(const struct compressed_bio *cb)
190c8b97818SChris Mason {
191544fe4a9SChristoph Hellwig 	struct inode *inode = &cb->bbio.inode->vfs_inode;
192741ec653SQu Wenruo 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
19309cbfeafSKirill A. Shutemov 	unsigned long index = cb->start >> PAGE_SHIFT;
19409cbfeafSKirill A. Shutemov 	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
195a75b81c3SVishal Moola (Oracle) 	struct folio_batch fbatch;
196544fe4a9SChristoph Hellwig 	const int errno = blk_status_to_errno(cb->bbio.bio.bi_status);
197c8b97818SChris Mason 	int i;
198c8b97818SChris Mason 	int ret;
199c8b97818SChris Mason 
200606f82e7SJosef Bacik 	if (errno)
201606f82e7SJosef Bacik 		mapping_set_error(inode->i_mapping, errno);
2027bdcefc1SFilipe Manana 
203a75b81c3SVishal Moola (Oracle) 	folio_batch_init(&fbatch);
204a75b81c3SVishal Moola (Oracle) 	while (index <= end_index) {
205a75b81c3SVishal Moola (Oracle) 		ret = filemap_get_folios(inode->i_mapping, &index, end_index,
206a75b81c3SVishal Moola (Oracle) 				&fbatch);
207a75b81c3SVishal Moola (Oracle) 
208a75b81c3SVishal Moola (Oracle) 		if (ret == 0)
209a75b81c3SVishal Moola (Oracle) 			return;
210a75b81c3SVishal Moola (Oracle) 
211c8b97818SChris Mason 		for (i = 0; i < ret; i++) {
212a75b81c3SVishal Moola (Oracle) 			struct folio *folio = fbatch.folios[i];
213a75b81c3SVishal Moola (Oracle) 
214a75b81c3SVishal Moola (Oracle) 			btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
215741ec653SQu Wenruo 							 cb->start, cb->len);
216c8b97818SChris Mason 		}
217a75b81c3SVishal Moola (Oracle) 		folio_batch_release(&fbatch);
218c8b97818SChris Mason 	}
219c8b97818SChris Mason 	/* the inode may be gone now */
220c8b97818SChris Mason }
221c8b97818SChris Mason 
btrfs_finish_compressed_write_work(struct work_struct * work)222f9327a70SChristoph Hellwig static void btrfs_finish_compressed_write_work(struct work_struct *work)
223c8b97818SChris Mason {
224f9327a70SChristoph Hellwig 	struct compressed_bio *cb =
225f9327a70SChristoph Hellwig 		container_of(work, struct compressed_bio, write_end_work);
226f9327a70SChristoph Hellwig 
2277dd43954SChristoph Hellwig 	btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
228544fe4a9SChristoph Hellwig 				    cb->bbio.bio.bi_status == BLK_STS_OK);
229c8b97818SChris Mason 
2307c0c7269SOmar Sandoval 	if (cb->writeback)
231544fe4a9SChristoph Hellwig 		end_compressed_writeback(cb);
2326853c64aSQu Wenruo 	/* Note, our inode could be gone now */
233c8b97818SChris Mason 
23432586c5bSChristoph Hellwig 	btrfs_free_compressed_pages(cb);
235544fe4a9SChristoph Hellwig 	bio_put(&cb->bbio.bio);
2366853c64aSQu Wenruo }
2376853c64aSQu Wenruo 
2386853c64aSQu Wenruo /*
2396853c64aSQu Wenruo  * Do the cleanup once all the compressed pages hit the disk.  This will clear
2406853c64aSQu Wenruo  * writeback on the file pages and free the compressed pages.
2416853c64aSQu Wenruo  *
2426853c64aSQu Wenruo  * This also calls the writeback end hooks for the file pages so that metadata
2436853c64aSQu Wenruo  * and checksums can be updated in the file.
2446853c64aSQu Wenruo  */
end_compressed_bio_write(struct btrfs_bio * bbio)245917f32a2SChristoph Hellwig static void end_compressed_bio_write(struct btrfs_bio *bbio)
2466853c64aSQu Wenruo {
247544fe4a9SChristoph Hellwig 	struct compressed_bio *cb = to_compressed_bio(bbio);
248544fe4a9SChristoph Hellwig 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
2496853c64aSQu Wenruo 
250fed8a72dSChristoph Hellwig 	queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
2512d4e0b84SQu Wenruo }
2522d4e0b84SQu Wenruo 
btrfs_add_compressed_bio_pages(struct compressed_bio * cb)2534513cb0cSChristoph Hellwig static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
25410e924bcSChristoph Hellwig {
25510e924bcSChristoph Hellwig 	struct bio *bio = &cb->bbio.bio;
25643fa4219SChristoph Hellwig 	u32 offset = 0;
25710e924bcSChristoph Hellwig 
25843fa4219SChristoph Hellwig 	while (offset < cb->compressed_len) {
25943fa4219SChristoph Hellwig 		u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);
26010e924bcSChristoph Hellwig 
26143fa4219SChristoph Hellwig 		/* Maximum compressed extent is smaller than bio size limit. */
26243fa4219SChristoph Hellwig 		__bio_add_page(bio, cb->compressed_pages[offset >> PAGE_SHIFT],
26343fa4219SChristoph Hellwig 			       len, 0);
26443fa4219SChristoph Hellwig 		offset += len;
26510e924bcSChristoph Hellwig 	}
26610e924bcSChristoph Hellwig }
26710e924bcSChristoph Hellwig 
268c8b97818SChris Mason /*
269c8b97818SChris Mason  * worker function to build and submit bios for previously compressed pages.
270c8b97818SChris Mason  * The corresponding pages in the inode should be marked for writeback
271c8b97818SChris Mason  * and the compressed pages should have a reference on them for dropping
272c8b97818SChris Mason  * when the IO is complete.
273c8b97818SChris Mason  *
274c8b97818SChris Mason  * This also checksums the file bytes and gets things ready for
275c8b97818SChris Mason  * the end io hooks.
276c8b97818SChris Mason  */
btrfs_submit_compressed_write(struct btrfs_ordered_extent * ordered,struct page ** compressed_pages,unsigned int nr_pages,blk_opf_t write_flags,bool writeback)277d611935bSChristoph Hellwig void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
278c8b97818SChris Mason 				   struct page **compressed_pages,
27965b5355fSAnand Jain 				   unsigned int nr_pages,
280bf9486d6SBart Van Assche 				   blk_opf_t write_flags,
2817c0c7269SOmar Sandoval 				   bool writeback)
282c8b97818SChris Mason {
283d611935bSChristoph Hellwig 	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
284c7ee1819SNikolay Borisov 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
285c8b97818SChris Mason 	struct compressed_bio *cb;
286c8b97818SChris Mason 
287d611935bSChristoph Hellwig 	ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize));
288d611935bSChristoph Hellwig 	ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize));
289544fe4a9SChristoph Hellwig 
290d611935bSChristoph Hellwig 	cb = alloc_compressed_bio(inode, ordered->file_offset,
291d611935bSChristoph Hellwig 				  REQ_OP_WRITE | write_flags,
292544fe4a9SChristoph Hellwig 				  end_compressed_bio_write);
293d611935bSChristoph Hellwig 	cb->start = ordered->file_offset;
294d611935bSChristoph Hellwig 	cb->len = ordered->num_bytes;
295c8b97818SChris Mason 	cb->compressed_pages = compressed_pages;
296d611935bSChristoph Hellwig 	cb->compressed_len = ordered->disk_num_bytes;
2977c0c7269SOmar Sandoval 	cb->writeback = writeback;
298fed8a72dSChristoph Hellwig 	INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
299c8b97818SChris Mason 	cb->nr_pages = nr_pages;
300d611935bSChristoph Hellwig 	cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
301ec63b84dSChristoph Hellwig 	cb->bbio.ordered = ordered;
3024513cb0cSChristoph Hellwig 	btrfs_add_compressed_bio_pages(cb);
303c8b97818SChris Mason 
304ae42a154SChristoph Hellwig 	btrfs_submit_bio(&cb->bbio, 0);
305c8b97818SChris Mason }
306c8b97818SChris Mason 
3076a404910SQu Wenruo /*
3086a404910SQu Wenruo  * Add extra pages in the same compressed file extent so that we don't need to
3096a404910SQu Wenruo  * re-read the same extent again and again.
3106a404910SQu Wenruo  *
3116a404910SQu Wenruo  * NOTE: this won't work well for subpage, as for subpage read, we lock the
3126a404910SQu Wenruo  * full page then submit bio for each compressed/regular extents.
3136a404910SQu Wenruo  *
3146a404910SQu Wenruo  * This means, if we have several sectors in the same page points to the same
3156a404910SQu Wenruo  * on-disk compressed data, we will re-read the same extent many times and
3166a404910SQu Wenruo  * this function can only help for the next page.
3176a404910SQu Wenruo  */
add_ra_bio_pages(struct inode * inode,u64 compressed_end,struct compressed_bio * cb,int * memstall,unsigned long * pflags)318771ed689SChris Mason static noinline int add_ra_bio_pages(struct inode *inode,
319771ed689SChris Mason 				     u64 compressed_end,
3204088a47eSChristoph Hellwig 				     struct compressed_bio *cb,
32182e60d00SJohannes Weiner 				     int *memstall, unsigned long *pflags)
322771ed689SChris Mason {
3236a404910SQu Wenruo 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
324771ed689SChris Mason 	unsigned long end_index;
325b7d463a1SChristoph Hellwig 	struct bio *orig_bio = &cb->orig_bbio->bio;
326b7d463a1SChristoph Hellwig 	u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
327771ed689SChris Mason 	u64 isize = i_size_read(inode);
328771ed689SChris Mason 	int ret;
329771ed689SChris Mason 	struct page *page;
330771ed689SChris Mason 	struct extent_map *em;
331771ed689SChris Mason 	struct address_space *mapping = inode->i_mapping;
332771ed689SChris Mason 	struct extent_map_tree *em_tree;
333771ed689SChris Mason 	struct extent_io_tree *tree;
3346a404910SQu Wenruo 	int sectors_missed = 0;
335771ed689SChris Mason 
336771ed689SChris Mason 	em_tree = &BTRFS_I(inode)->extent_tree;
337771ed689SChris Mason 	tree = &BTRFS_I(inode)->io_tree;
338771ed689SChris Mason 
339771ed689SChris Mason 	if (isize == 0)
340771ed689SChris Mason 		return 0;
341771ed689SChris Mason 
342ca62e85dSQu Wenruo 	/*
343ca62e85dSQu Wenruo 	 * For current subpage support, we only support 64K page size,
344ca62e85dSQu Wenruo 	 * which means maximum compressed extent size (128K) is just 2x page
345ca62e85dSQu Wenruo 	 * size.
346ca62e85dSQu Wenruo 	 * This makes readahead less effective, so here disable readahead for
347ca62e85dSQu Wenruo 	 * subpage for now, until full compressed write is supported.
348ca62e85dSQu Wenruo 	 */
349ca62e85dSQu Wenruo 	if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE)
350ca62e85dSQu Wenruo 		return 0;
351ca62e85dSQu Wenruo 
35209cbfeafSKirill A. Shutemov 	end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
353771ed689SChris Mason 
3546a404910SQu Wenruo 	while (cur < compressed_end) {
3556a404910SQu Wenruo 		u64 page_end;
3566a404910SQu Wenruo 		u64 pg_index = cur >> PAGE_SHIFT;
3576a404910SQu Wenruo 		u32 add_size;
358771ed689SChris Mason 
359306e16ceSDavid Sterba 		if (pg_index > end_index)
360771ed689SChris Mason 			break;
361771ed689SChris Mason 
3620a943c65SMatthew Wilcox 		page = xa_load(&mapping->i_pages, pg_index);
3633159f943SMatthew Wilcox 		if (page && !xa_is_value(page)) {
3646a404910SQu Wenruo 			sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
3656a404910SQu Wenruo 					  fs_info->sectorsize_bits;
3666a404910SQu Wenruo 
3676a404910SQu Wenruo 			/* Beyond threshold, no need to continue */
3686a404910SQu Wenruo 			if (sectors_missed > 4)
369771ed689SChris Mason 				break;
3706a404910SQu Wenruo 
3716a404910SQu Wenruo 			/*
3726a404910SQu Wenruo 			 * Jump to next page start as we already have page for
3736a404910SQu Wenruo 			 * current offset.
3746a404910SQu Wenruo 			 */
3756a404910SQu Wenruo 			cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
3766a404910SQu Wenruo 			continue;
377771ed689SChris Mason 		}
378771ed689SChris Mason 
379c62d2555SMichal Hocko 		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
380c62d2555SMichal Hocko 								 ~__GFP_FS));
381771ed689SChris Mason 		if (!page)
382771ed689SChris Mason 			break;
383771ed689SChris Mason 
384c62d2555SMichal Hocko 		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
38509cbfeafSKirill A. Shutemov 			put_page(page);
3866a404910SQu Wenruo 			/* There is already a page, skip to page end */
3876a404910SQu Wenruo 			cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
3886a404910SQu Wenruo 			continue;
389771ed689SChris Mason 		}
390771ed689SChris Mason 
39182e60d00SJohannes Weiner 		if (!*memstall && PageWorkingset(page)) {
3924088a47eSChristoph Hellwig 			psi_memstall_enter(pflags);
39382e60d00SJohannes Weiner 			*memstall = 1;
39482e60d00SJohannes Weiner 		}
3954088a47eSChristoph Hellwig 
39632443de3SQu Wenruo 		ret = set_page_extent_mapped(page);
39732443de3SQu Wenruo 		if (ret < 0) {
39832443de3SQu Wenruo 			unlock_page(page);
39932443de3SQu Wenruo 			put_page(page);
40032443de3SQu Wenruo 			break;
40132443de3SQu Wenruo 		}
40232443de3SQu Wenruo 
4036a404910SQu Wenruo 		page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
404570eb97bSJosef Bacik 		lock_extent(tree, cur, page_end, NULL);
405890871beSChris Mason 		read_lock(&em_tree->lock);
4066a404910SQu Wenruo 		em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
407890871beSChris Mason 		read_unlock(&em_tree->lock);
408771ed689SChris Mason 
4096a404910SQu Wenruo 		/*
4106a404910SQu Wenruo 		 * At this point, we have a locked page in the page cache for
4116a404910SQu Wenruo 		 * these bytes in the file.  But, we have to make sure they map
4126a404910SQu Wenruo 		 * to this compressed extent on disk.
4136a404910SQu Wenruo 		 */
4146a404910SQu Wenruo 		if (!em || cur < em->start ||
4156a404910SQu Wenruo 		    (cur + fs_info->sectorsize > extent_map_end(em)) ||
41629e70be2SAnand Jain 		    (em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) {
417771ed689SChris Mason 			free_extent_map(em);
418570eb97bSJosef Bacik 			unlock_extent(tree, cur, page_end, NULL);
419771ed689SChris Mason 			unlock_page(page);
42009cbfeafSKirill A. Shutemov 			put_page(page);
421771ed689SChris Mason 			break;
422771ed689SChris Mason 		}
423c205565eSFilipe Manana 		add_size = min(em->start + em->len, page_end + 1) - cur;
424771ed689SChris Mason 		free_extent_map(em);
425771ed689SChris Mason 
426771ed689SChris Mason 		if (page->index == end_index) {
4277073017aSJohannes Thumshirn 			size_t zero_offset = offset_in_page(isize);
428771ed689SChris Mason 
429771ed689SChris Mason 			if (zero_offset) {
430771ed689SChris Mason 				int zeros;
43109cbfeafSKirill A. Shutemov 				zeros = PAGE_SIZE - zero_offset;
432d048b9c2SIra Weiny 				memzero_page(page, zero_offset, zeros);
433771ed689SChris Mason 			}
434771ed689SChris Mason 		}
435771ed689SChris Mason 
436b7d463a1SChristoph Hellwig 		ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur));
4376a404910SQu Wenruo 		if (ret != add_size) {
438570eb97bSJosef Bacik 			unlock_extent(tree, cur, page_end, NULL);
439771ed689SChris Mason 			unlock_page(page);
44009cbfeafSKirill A. Shutemov 			put_page(page);
441771ed689SChris Mason 			break;
442771ed689SChris Mason 		}
4436a404910SQu Wenruo 		/*
4446a404910SQu Wenruo 		 * If it's subpage, we also need to increase its
4456a404910SQu Wenruo 		 * subpage::readers number, as at endio we will decrease
4466a404910SQu Wenruo 		 * subpage::readers and to unlock the page.
4476a404910SQu Wenruo 		 */
4486a404910SQu Wenruo 		if (fs_info->sectorsize < PAGE_SIZE)
4496a404910SQu Wenruo 			btrfs_subpage_start_reader(fs_info, page, cur, add_size);
4506a404910SQu Wenruo 		put_page(page);
4516a404910SQu Wenruo 		cur += add_size;
452771ed689SChris Mason 	}
453771ed689SChris Mason 	return 0;
454771ed689SChris Mason }
455771ed689SChris Mason 
456c8b97818SChris Mason /*
457c8b97818SChris Mason  * for a compressed read, the bio we get passed has all the inode pages
458c8b97818SChris Mason  * in it.  We don't actually do IO on those pages but allocate new ones
459c8b97818SChris Mason  * to hold the compressed pages on disk.
460c8b97818SChris Mason  *
4614f024f37SKent Overstreet  * bio->bi_iter.bi_sector points to the compressed extent on disk
462c8b97818SChris Mason  * bio->bi_io_vec points to all of the inode pages
463c8b97818SChris Mason  *
464c8b97818SChris Mason  * After the compressed pages are read, we copy the bytes into the
465c8b97818SChris Mason  * bio we were passed and then call the bio end_io calls
466c8b97818SChris Mason  */
btrfs_submit_compressed_read(struct btrfs_bio * bbio)467e1949310SChristoph Hellwig void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
468c8b97818SChris Mason {
469690834e4SChristoph Hellwig 	struct btrfs_inode *inode = bbio->inode;
470544fe4a9SChristoph Hellwig 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
471544fe4a9SChristoph Hellwig 	struct extent_map_tree *em_tree = &inode->extent_tree;
472c8b97818SChris Mason 	struct compressed_bio *cb;
473356b4a2dSAnand Jain 	unsigned int compressed_len;
474690834e4SChristoph Hellwig 	u64 file_offset = bbio->file_offset;
475e04ca626SChris Mason 	u64 em_len;
476e04ca626SChris Mason 	u64 em_start;
477c8b97818SChris Mason 	struct extent_map *em;
47882e60d00SJohannes Weiner 	unsigned long pflags;
47982e60d00SJohannes Weiner 	int memstall = 0;
480f9f15de8SJosef Bacik 	blk_status_t ret;
481dd137dd1SSweet Tea Dorminy 	int ret2;
482c8b97818SChris Mason 
483c8b97818SChris Mason 	/* we need the actual starting offset of this extent in the file */
484890871beSChris Mason 	read_lock(&em_tree->lock);
485557023eaSQu Wenruo 	em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
486890871beSChris Mason 	read_unlock(&em_tree->lock);
487f9f15de8SJosef Bacik 	if (!em) {
488f9f15de8SJosef Bacik 		ret = BLK_STS_IOERR;
489f9f15de8SJosef Bacik 		goto out;
490f9f15de8SJosef Bacik 	}
491c8b97818SChris Mason 
492557023eaSQu Wenruo 	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
493d20f7043SChris Mason 	compressed_len = em->block_len;
4946b82ce8dSliubo 
495544fe4a9SChristoph Hellwig 	cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
496544fe4a9SChristoph Hellwig 				  end_compressed_bio_read);
497c8b97818SChris Mason 
498ff5b7ee3SYan Zheng 	cb->start = em->orig_start;
499e04ca626SChris Mason 	em_len = em->len;
500e04ca626SChris Mason 	em_start = em->start;
501d20f7043SChris Mason 
502690834e4SChristoph Hellwig 	cb->len = bbio->bio.bi_iter.bi_size;
503c8b97818SChris Mason 	cb->compressed_len = compressed_len;
5041d8fa2e2SGoldwyn Rodrigues 	cb->compress_type = em->compress_type;
505b7d463a1SChristoph Hellwig 	cb->orig_bbio = bbio;
506c8b97818SChris Mason 
5071d8fa2e2SGoldwyn Rodrigues 	free_extent_map(em);
5081d8fa2e2SGoldwyn Rodrigues 
509dd137dd1SSweet Tea Dorminy 	cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
510dd137dd1SSweet Tea Dorminy 	cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS);
511f9f15de8SJosef Bacik 	if (!cb->compressed_pages) {
512f9f15de8SJosef Bacik 		ret = BLK_STS_RESOURCE;
513544fe4a9SChristoph Hellwig 		goto out_free_bio;
514f9f15de8SJosef Bacik 	}
5156b82ce8dSliubo 
516dd137dd1SSweet Tea Dorminy 	ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
517dd137dd1SSweet Tea Dorminy 	if (ret2) {
5180e9350deSDan Carpenter 		ret = BLK_STS_RESOURCE;
519544fe4a9SChristoph Hellwig 		goto out_free_compressed_pages;
520c8b97818SChris Mason 	}
521c8b97818SChris Mason 
522544fe4a9SChristoph Hellwig 	add_ra_bio_pages(&inode->vfs_inode, em_start + em_len, cb, &memstall,
523544fe4a9SChristoph Hellwig 			 &pflags);
524771ed689SChris Mason 
525771ed689SChris Mason 	/* include any pages we added in add_ra-bio_pages */
526690834e4SChristoph Hellwig 	cb->len = bbio->bio.bi_iter.bi_size;
5274513cb0cSChristoph Hellwig 	cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector;
5284513cb0cSChristoph Hellwig 	btrfs_add_compressed_bio_pages(cb);
529524bcd1eSChristoph Hellwig 
53082e60d00SJohannes Weiner 	if (memstall)
5314088a47eSChristoph Hellwig 		psi_memstall_leave(&pflags);
5324088a47eSChristoph Hellwig 
533e1949310SChristoph Hellwig 	btrfs_submit_bio(&cb->bbio, 0);
534cb4411ddSChristoph Hellwig 	return;
5356b82ce8dSliubo 
536544fe4a9SChristoph Hellwig out_free_compressed_pages:
5376b82ce8dSliubo 	kfree(cb->compressed_pages);
538544fe4a9SChristoph Hellwig out_free_bio:
53910e924bcSChristoph Hellwig 	bio_put(&cb->bbio.bio);
540544fe4a9SChristoph Hellwig out:
541690834e4SChristoph Hellwig 	btrfs_bio_end_io(bbio, ret);
542c8b97818SChris Mason }
543261507a0SLi Zefan 
54417b5a6c1STimofey Titovets /*
54517b5a6c1STimofey Titovets  * Heuristic uses systematic sampling to collect data from the input data
54617b5a6c1STimofey Titovets  * range, the logic can be tuned by the following constants:
54717b5a6c1STimofey Titovets  *
54817b5a6c1STimofey Titovets  * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
54917b5a6c1STimofey Titovets  * @SAMPLING_INTERVAL  - range from which the sampled data can be collected
55017b5a6c1STimofey Titovets  */
55117b5a6c1STimofey Titovets #define SAMPLING_READ_SIZE	(16)
55217b5a6c1STimofey Titovets #define SAMPLING_INTERVAL	(256)
55317b5a6c1STimofey Titovets 
55417b5a6c1STimofey Titovets /*
55517b5a6c1STimofey Titovets  * For statistical analysis of the input data we consider bytes that form a
55617b5a6c1STimofey Titovets  * Galois Field of 256 objects. Each object has an attribute count, ie. how
55717b5a6c1STimofey Titovets  * many times the object appeared in the sample.
55817b5a6c1STimofey Titovets  */
55917b5a6c1STimofey Titovets #define BUCKET_SIZE		(256)
56017b5a6c1STimofey Titovets 
56117b5a6c1STimofey Titovets /*
56217b5a6c1STimofey Titovets  * The size of the sample is based on a statistical sampling rule of thumb.
56317b5a6c1STimofey Titovets  * The common way is to perform sampling tests as long as the number of
56417b5a6c1STimofey Titovets  * elements in each cell is at least 5.
56517b5a6c1STimofey Titovets  *
56617b5a6c1STimofey Titovets  * Instead of 5, we choose 32 to obtain more accurate results.
56717b5a6c1STimofey Titovets  * If the data contain the maximum number of symbols, which is 256, we obtain a
56817b5a6c1STimofey Titovets  * sample size bound by 8192.
56917b5a6c1STimofey Titovets  *
57017b5a6c1STimofey Titovets  * For a sample of at most 8KB of data per data range: 16 consecutive bytes
57117b5a6c1STimofey Titovets  * from up to 512 locations.
57217b5a6c1STimofey Titovets  */
57317b5a6c1STimofey Titovets #define MAX_SAMPLE_SIZE		(BTRFS_MAX_UNCOMPRESSED *		\
57417b5a6c1STimofey Titovets 				 SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
57517b5a6c1STimofey Titovets 
57617b5a6c1STimofey Titovets struct bucket_item {
57717b5a6c1STimofey Titovets 	u32 count;
57817b5a6c1STimofey Titovets };
5794e439a0bSTimofey Titovets 
5804e439a0bSTimofey Titovets struct heuristic_ws {
58117b5a6c1STimofey Titovets 	/* Partial copy of input data */
58217b5a6c1STimofey Titovets 	u8 *sample;
583a440d48cSTimofey Titovets 	u32 sample_size;
58417b5a6c1STimofey Titovets 	/* Buckets store counters for each byte value */
58517b5a6c1STimofey Titovets 	struct bucket_item *bucket;
586440c840cSTimofey Titovets 	/* Sorting buffer */
587440c840cSTimofey Titovets 	struct bucket_item *bucket_b;
5884e439a0bSTimofey Titovets 	struct list_head list;
5894e439a0bSTimofey Titovets };
5904e439a0bSTimofey Titovets 
59192ee5530SDennis Zhou static struct workspace_manager heuristic_wsm;
59292ee5530SDennis Zhou 
free_heuristic_ws(struct list_head * ws)5934e439a0bSTimofey Titovets static void free_heuristic_ws(struct list_head *ws)
5944e439a0bSTimofey Titovets {
5954e439a0bSTimofey Titovets 	struct heuristic_ws *workspace;
5964e439a0bSTimofey Titovets 
5974e439a0bSTimofey Titovets 	workspace = list_entry(ws, struct heuristic_ws, list);
5984e439a0bSTimofey Titovets 
59917b5a6c1STimofey Titovets 	kvfree(workspace->sample);
60017b5a6c1STimofey Titovets 	kfree(workspace->bucket);
601440c840cSTimofey Titovets 	kfree(workspace->bucket_b);
6024e439a0bSTimofey Titovets 	kfree(workspace);
6034e439a0bSTimofey Titovets }
6044e439a0bSTimofey Titovets 
alloc_heuristic_ws(unsigned int level)6057bf49943SDennis Zhou static struct list_head *alloc_heuristic_ws(unsigned int level)
6064e439a0bSTimofey Titovets {
6074e439a0bSTimofey Titovets 	struct heuristic_ws *ws;
6084e439a0bSTimofey Titovets 
6094e439a0bSTimofey Titovets 	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
6104e439a0bSTimofey Titovets 	if (!ws)
6114e439a0bSTimofey Titovets 		return ERR_PTR(-ENOMEM);
6124e439a0bSTimofey Titovets 
61317b5a6c1STimofey Titovets 	ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
61417b5a6c1STimofey Titovets 	if (!ws->sample)
61517b5a6c1STimofey Titovets 		goto fail;
6164e439a0bSTimofey Titovets 
61717b5a6c1STimofey Titovets 	ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
61817b5a6c1STimofey Titovets 	if (!ws->bucket)
61917b5a6c1STimofey Titovets 		goto fail;
62017b5a6c1STimofey Titovets 
621440c840cSTimofey Titovets 	ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
622440c840cSTimofey Titovets 	if (!ws->bucket_b)
623440c840cSTimofey Titovets 		goto fail;
624440c840cSTimofey Titovets 
62517b5a6c1STimofey Titovets 	INIT_LIST_HEAD(&ws->list);
6264e439a0bSTimofey Titovets 	return &ws->list;
62717b5a6c1STimofey Titovets fail:
62817b5a6c1STimofey Titovets 	free_heuristic_ws(&ws->list);
62917b5a6c1STimofey Titovets 	return ERR_PTR(-ENOMEM);
6304e439a0bSTimofey Titovets }
6314e439a0bSTimofey Titovets 
632ca4ac360SDennis Zhou const struct btrfs_compress_op btrfs_heuristic_compress = {
633be951045SDavid Sterba 	.workspace_manager = &heuristic_wsm,
634ca4ac360SDennis Zhou };
635ca4ac360SDennis Zhou 
636e8c9f186SDavid Sterba static const struct btrfs_compress_op * const btrfs_compress_op[] = {
637ca4ac360SDennis Zhou 	/* The heuristic is represented as compression type 0 */
638ca4ac360SDennis Zhou 	&btrfs_heuristic_compress,
639261507a0SLi Zefan 	&btrfs_zlib_compress,
640a6fa6faeSLi Zefan 	&btrfs_lzo_compress,
6415c1aab1dSNick Terrell 	&btrfs_zstd_compress,
642261507a0SLi Zefan };
643261507a0SLi Zefan 
alloc_workspace(int type,unsigned int level)644c778df14SDavid Sterba static struct list_head *alloc_workspace(int type, unsigned int level)
645c778df14SDavid Sterba {
646c778df14SDavid Sterba 	switch (type) {
647c778df14SDavid Sterba 	case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
648c778df14SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
649c778df14SDavid Sterba 	case BTRFS_COMPRESS_LZO:  return lzo_alloc_workspace(level);
650c778df14SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
651c778df14SDavid Sterba 	default:
652c778df14SDavid Sterba 		/*
653c778df14SDavid Sterba 		 * This can't happen, the type is validated several times
654c778df14SDavid Sterba 		 * before we get here.
655c778df14SDavid Sterba 		 */
656c778df14SDavid Sterba 		BUG();
657c778df14SDavid Sterba 	}
658c778df14SDavid Sterba }
659c778df14SDavid Sterba 
free_workspace(int type,struct list_head * ws)6601e002351SDavid Sterba static void free_workspace(int type, struct list_head *ws)
6611e002351SDavid Sterba {
6621e002351SDavid Sterba 	switch (type) {
6631e002351SDavid Sterba 	case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws);
6641e002351SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws);
6651e002351SDavid Sterba 	case BTRFS_COMPRESS_LZO:  return lzo_free_workspace(ws);
6661e002351SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws);
6671e002351SDavid Sterba 	default:
6681e002351SDavid Sterba 		/*
6691e002351SDavid Sterba 		 * This can't happen, the type is validated several times
6701e002351SDavid Sterba 		 * before we get here.
6711e002351SDavid Sterba 		 */
6721e002351SDavid Sterba 		BUG();
6731e002351SDavid Sterba 	}
6741e002351SDavid Sterba }
6751e002351SDavid Sterba 
btrfs_init_workspace_manager(int type)676d5517033SDavid Sterba static void btrfs_init_workspace_manager(int type)
677261507a0SLi Zefan {
6780cf25213SDavid Sterba 	struct workspace_manager *wsm;
6794e439a0bSTimofey Titovets 	struct list_head *workspace;
680261507a0SLi Zefan 
6810cf25213SDavid Sterba 	wsm = btrfs_compress_op[type]->workspace_manager;
68292ee5530SDennis Zhou 	INIT_LIST_HEAD(&wsm->idle_ws);
68392ee5530SDennis Zhou 	spin_lock_init(&wsm->ws_lock);
68492ee5530SDennis Zhou 	atomic_set(&wsm->total_ws, 0);
68592ee5530SDennis Zhou 	init_waitqueue_head(&wsm->ws_wait);
686f77dd0d6SDavid Sterba 
687f77dd0d6SDavid Sterba 	/*
6881666edabSDennis Zhou 	 * Preallocate one workspace for each compression type so we can
6891666edabSDennis Zhou 	 * guarantee forward progress in the worst case
690f77dd0d6SDavid Sterba 	 */
691c778df14SDavid Sterba 	workspace = alloc_workspace(type, 0);
692f77dd0d6SDavid Sterba 	if (IS_ERR(workspace)) {
6931666edabSDennis Zhou 		pr_warn(
6941666edabSDennis Zhou 	"BTRFS: cannot preallocate compression workspace, will try later\n");
695f77dd0d6SDavid Sterba 	} else {
69692ee5530SDennis Zhou 		atomic_set(&wsm->total_ws, 1);
69792ee5530SDennis Zhou 		wsm->free_ws = 1;
69892ee5530SDennis Zhou 		list_add(workspace, &wsm->idle_ws);
699f77dd0d6SDavid Sterba 	}
700261507a0SLi Zefan }
7011666edabSDennis Zhou 
btrfs_cleanup_workspace_manager(int type)7022510307eSDavid Sterba static void btrfs_cleanup_workspace_manager(int type)
7031666edabSDennis Zhou {
7042dba7143SDavid Sterba 	struct workspace_manager *wsman;
7051666edabSDennis Zhou 	struct list_head *ws;
7061666edabSDennis Zhou 
7072dba7143SDavid Sterba 	wsman = btrfs_compress_op[type]->workspace_manager;
7081666edabSDennis Zhou 	while (!list_empty(&wsman->idle_ws)) {
7091666edabSDennis Zhou 		ws = wsman->idle_ws.next;
7101666edabSDennis Zhou 		list_del(ws);
7111e002351SDavid Sterba 		free_workspace(type, ws);
7121666edabSDennis Zhou 		atomic_dec(&wsman->total_ws);
7131666edabSDennis Zhou 	}
714261507a0SLi Zefan }
715261507a0SLi Zefan 
716261507a0SLi Zefan /*
717e721e49dSDavid Sterba  * This finds an available workspace or allocates a new one.
718e721e49dSDavid Sterba  * If it's not possible to allocate a new one, waits until there's one.
719e721e49dSDavid Sterba  * Preallocation makes a forward progress guarantees and we do not return
720e721e49dSDavid Sterba  * errors.
721261507a0SLi Zefan  */
btrfs_get_workspace(int type,unsigned int level)7225907a9bbSDavid Sterba struct list_head *btrfs_get_workspace(int type, unsigned int level)
723261507a0SLi Zefan {
7245907a9bbSDavid Sterba 	struct workspace_manager *wsm;
725261507a0SLi Zefan 	struct list_head *workspace;
726261507a0SLi Zefan 	int cpus = num_online_cpus();
727fe308533SDavid Sterba 	unsigned nofs_flag;
7284e439a0bSTimofey Titovets 	struct list_head *idle_ws;
7294e439a0bSTimofey Titovets 	spinlock_t *ws_lock;
7304e439a0bSTimofey Titovets 	atomic_t *total_ws;
7314e439a0bSTimofey Titovets 	wait_queue_head_t *ws_wait;
7324e439a0bSTimofey Titovets 	int *free_ws;
733261507a0SLi Zefan 
7345907a9bbSDavid Sterba 	wsm = btrfs_compress_op[type]->workspace_manager;
73592ee5530SDennis Zhou 	idle_ws	 = &wsm->idle_ws;
73692ee5530SDennis Zhou 	ws_lock	 = &wsm->ws_lock;
73792ee5530SDennis Zhou 	total_ws = &wsm->total_ws;
73892ee5530SDennis Zhou 	ws_wait	 = &wsm->ws_wait;
73992ee5530SDennis Zhou 	free_ws	 = &wsm->free_ws;
7404e439a0bSTimofey Titovets 
741261507a0SLi Zefan again:
742d9187649SByongho Lee 	spin_lock(ws_lock);
743d9187649SByongho Lee 	if (!list_empty(idle_ws)) {
744d9187649SByongho Lee 		workspace = idle_ws->next;
745261507a0SLi Zefan 		list_del(workspace);
7466ac10a6aSDavid Sterba 		(*free_ws)--;
747d9187649SByongho Lee 		spin_unlock(ws_lock);
748261507a0SLi Zefan 		return workspace;
749261507a0SLi Zefan 
750261507a0SLi Zefan 	}
7516ac10a6aSDavid Sterba 	if (atomic_read(total_ws) > cpus) {
752261507a0SLi Zefan 		DEFINE_WAIT(wait);
753261507a0SLi Zefan 
754d9187649SByongho Lee 		spin_unlock(ws_lock);
755d9187649SByongho Lee 		prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
7566ac10a6aSDavid Sterba 		if (atomic_read(total_ws) > cpus && !*free_ws)
757261507a0SLi Zefan 			schedule();
758d9187649SByongho Lee 		finish_wait(ws_wait, &wait);
759261507a0SLi Zefan 		goto again;
760261507a0SLi Zefan 	}
7616ac10a6aSDavid Sterba 	atomic_inc(total_ws);
762d9187649SByongho Lee 	spin_unlock(ws_lock);
763261507a0SLi Zefan 
764fe308533SDavid Sterba 	/*
765fe308533SDavid Sterba 	 * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have
766fe308533SDavid Sterba 	 * to turn it off here because we might get called from the restricted
767fe308533SDavid Sterba 	 * context of btrfs_compress_bio/btrfs_compress_pages
768fe308533SDavid Sterba 	 */
769fe308533SDavid Sterba 	nofs_flag = memalloc_nofs_save();
770c778df14SDavid Sterba 	workspace = alloc_workspace(type, level);
771fe308533SDavid Sterba 	memalloc_nofs_restore(nofs_flag);
772fe308533SDavid Sterba 
773261507a0SLi Zefan 	if (IS_ERR(workspace)) {
7746ac10a6aSDavid Sterba 		atomic_dec(total_ws);
775d9187649SByongho Lee 		wake_up(ws_wait);
776e721e49dSDavid Sterba 
777e721e49dSDavid Sterba 		/*
778e721e49dSDavid Sterba 		 * Do not return the error but go back to waiting. There's a
779e721e49dSDavid Sterba 		 * workspace preallocated for each type and the compression
780e721e49dSDavid Sterba 		 * time is bounded so we get to a workspace eventually. This
781e721e49dSDavid Sterba 		 * makes our caller's life easier.
78252356716SDavid Sterba 		 *
78352356716SDavid Sterba 		 * To prevent silent and low-probability deadlocks (when the
78452356716SDavid Sterba 		 * initial preallocation fails), check if there are any
78552356716SDavid Sterba 		 * workspaces at all.
786e721e49dSDavid Sterba 		 */
78752356716SDavid Sterba 		if (atomic_read(total_ws) == 0) {
78852356716SDavid Sterba 			static DEFINE_RATELIMIT_STATE(_rs,
78952356716SDavid Sterba 					/* once per minute */ 60 * HZ,
79052356716SDavid Sterba 					/* no burst */ 1);
79152356716SDavid Sterba 
79252356716SDavid Sterba 			if (__ratelimit(&_rs)) {
793ab8d0fc4SJeff Mahoney 				pr_warn("BTRFS: no compression workspaces, low memory, retrying\n");
79452356716SDavid Sterba 			}
79552356716SDavid Sterba 		}
796e721e49dSDavid Sterba 		goto again;
797261507a0SLi Zefan 	}
798261507a0SLi Zefan 	return workspace;
799261507a0SLi Zefan }
800261507a0SLi Zefan 
get_workspace(int type,int level)8017bf49943SDennis Zhou static struct list_head *get_workspace(int type, int level)
802929f4bafSDennis Zhou {
8036a0d1272SDavid Sterba 	switch (type) {
8045907a9bbSDavid Sterba 	case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
8056a0d1272SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
8065907a9bbSDavid Sterba 	case BTRFS_COMPRESS_LZO:  return btrfs_get_workspace(type, level);
8076a0d1272SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
8086a0d1272SDavid Sterba 	default:
8096a0d1272SDavid Sterba 		/*
8106a0d1272SDavid Sterba 		 * This can't happen, the type is validated several times
8116a0d1272SDavid Sterba 		 * before we get here.
8126a0d1272SDavid Sterba 		 */
8136a0d1272SDavid Sterba 		BUG();
8146a0d1272SDavid Sterba 	}
815929f4bafSDennis Zhou }
816929f4bafSDennis Zhou 
817261507a0SLi Zefan /*
818261507a0SLi Zefan  * put a workspace struct back on the list or free it if we have enough
819261507a0SLi Zefan  * idle ones sitting around
820261507a0SLi Zefan  */
btrfs_put_workspace(int type,struct list_head * ws)821a3bbd2a9SDavid Sterba void btrfs_put_workspace(int type, struct list_head *ws)
822261507a0SLi Zefan {
823a3bbd2a9SDavid Sterba 	struct workspace_manager *wsm;
8244e439a0bSTimofey Titovets 	struct list_head *idle_ws;
8254e439a0bSTimofey Titovets 	spinlock_t *ws_lock;
8264e439a0bSTimofey Titovets 	atomic_t *total_ws;
8274e439a0bSTimofey Titovets 	wait_queue_head_t *ws_wait;
8284e439a0bSTimofey Titovets 	int *free_ws;
8294e439a0bSTimofey Titovets 
830a3bbd2a9SDavid Sterba 	wsm = btrfs_compress_op[type]->workspace_manager;
83192ee5530SDennis Zhou 	idle_ws	 = &wsm->idle_ws;
83292ee5530SDennis Zhou 	ws_lock	 = &wsm->ws_lock;
83392ee5530SDennis Zhou 	total_ws = &wsm->total_ws;
83492ee5530SDennis Zhou 	ws_wait	 = &wsm->ws_wait;
83592ee5530SDennis Zhou 	free_ws	 = &wsm->free_ws;
836261507a0SLi Zefan 
837d9187649SByongho Lee 	spin_lock(ws_lock);
83826b28dceSNick Terrell 	if (*free_ws <= num_online_cpus()) {
839929f4bafSDennis Zhou 		list_add(ws, idle_ws);
8406ac10a6aSDavid Sterba 		(*free_ws)++;
841d9187649SByongho Lee 		spin_unlock(ws_lock);
842261507a0SLi Zefan 		goto wake;
843261507a0SLi Zefan 	}
844d9187649SByongho Lee 	spin_unlock(ws_lock);
845261507a0SLi Zefan 
8461e002351SDavid Sterba 	free_workspace(type, ws);
8476ac10a6aSDavid Sterba 	atomic_dec(total_ws);
848261507a0SLi Zefan wake:
849093258e6SDavid Sterba 	cond_wake_up(ws_wait);
850261507a0SLi Zefan }
851261507a0SLi Zefan 
put_workspace(int type,struct list_head * ws)852929f4bafSDennis Zhou static void put_workspace(int type, struct list_head *ws)
853929f4bafSDennis Zhou {
854bd3a5287SDavid Sterba 	switch (type) {
855a3bbd2a9SDavid Sterba 	case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
856a3bbd2a9SDavid Sterba 	case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
857a3bbd2a9SDavid Sterba 	case BTRFS_COMPRESS_LZO:  return btrfs_put_workspace(type, ws);
858bd3a5287SDavid Sterba 	case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
859bd3a5287SDavid Sterba 	default:
860bd3a5287SDavid Sterba 		/*
861bd3a5287SDavid Sterba 		 * This can't happen, the type is validated several times
862bd3a5287SDavid Sterba 		 * before we get here.
863bd3a5287SDavid Sterba 		 */
864bd3a5287SDavid Sterba 		BUG();
865bd3a5287SDavid Sterba 	}
866929f4bafSDennis Zhou }
867929f4bafSDennis Zhou 
868261507a0SLi Zefan /*
869adbab642SAnand Jain  * Adjust @level according to the limits of the compression algorithm or
870adbab642SAnand Jain  * fallback to default
871adbab642SAnand Jain  */
btrfs_compress_set_level(int type,unsigned level)872adbab642SAnand Jain static unsigned int btrfs_compress_set_level(int type, unsigned level)
873adbab642SAnand Jain {
874adbab642SAnand Jain 	const struct btrfs_compress_op *ops = btrfs_compress_op[type];
875adbab642SAnand Jain 
876adbab642SAnand Jain 	if (level == 0)
877adbab642SAnand Jain 		level = ops->default_level;
878adbab642SAnand Jain 	else
879adbab642SAnand Jain 		level = min(level, ops->max_level);
880adbab642SAnand Jain 
881adbab642SAnand Jain 	return level;
882adbab642SAnand Jain }
883adbab642SAnand Jain 
884adbab642SAnand Jain /*
88538c31464SDavid Sterba  * Given an address space and start and length, compress the bytes into @pages
88638c31464SDavid Sterba  * that are allocated on demand.
887261507a0SLi Zefan  *
888f51d2b59SDavid Sterba  * @type_level is encoded algorithm and level, where level 0 means whatever
889f51d2b59SDavid Sterba  * default the algorithm chooses and is opaque here;
890f51d2b59SDavid Sterba  * - compression algo are 0-3
891f51d2b59SDavid Sterba  * - the level are bits 4-7
892f51d2b59SDavid Sterba  *
8934d3a800eSDavid Sterba  * @out_pages is an in/out parameter, holds maximum number of pages to allocate
8944d3a800eSDavid Sterba  * and returns number of actually allocated pages
895261507a0SLi Zefan  *
89638c31464SDavid Sterba  * @total_in is used to return the number of bytes actually read.  It
89738c31464SDavid Sterba  * may be smaller than the input length if we had to exit early because we
898261507a0SLi Zefan  * ran out of room in the pages array or because we cross the
899261507a0SLi Zefan  * max_out threshold.
900261507a0SLi Zefan  *
90138c31464SDavid Sterba  * @total_out is an in/out parameter, must be set to the input length and will
90238c31464SDavid Sterba  * be also used to return the total number of compressed bytes
903261507a0SLi Zefan  */
btrfs_compress_pages(unsigned int type_level,struct address_space * mapping,u64 start,struct page ** pages,unsigned long * out_pages,unsigned long * total_in,unsigned long * total_out)904f51d2b59SDavid Sterba int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
90538c31464SDavid Sterba 			 u64 start, struct page **pages,
906261507a0SLi Zefan 			 unsigned long *out_pages,
907261507a0SLi Zefan 			 unsigned long *total_in,
908e5d74902SDavid Sterba 			 unsigned long *total_out)
909261507a0SLi Zefan {
9101972708aSDennis Zhou 	int type = btrfs_compress_type(type_level);
9117bf49943SDennis Zhou 	int level = btrfs_compress_level(type_level);
912261507a0SLi Zefan 	struct list_head *workspace;
913261507a0SLi Zefan 	int ret;
914261507a0SLi Zefan 
915b0c1fe1eSDavid Sterba 	level = btrfs_compress_set_level(type, level);
9167bf49943SDennis Zhou 	workspace = get_workspace(type, level);
9171e4eb746SDavid Sterba 	ret = compression_compress_pages(type, workspace, mapping, start, pages,
9181e4eb746SDavid Sterba 					 out_pages, total_in, total_out);
919929f4bafSDennis Zhou 	put_workspace(type, workspace);
920261507a0SLi Zefan 	return ret;
921261507a0SLi Zefan }
922261507a0SLi Zefan 
btrfs_decompress_bio(struct compressed_bio * cb)9238140dc30SAnand Jain static int btrfs_decompress_bio(struct compressed_bio *cb)
924261507a0SLi Zefan {
925261507a0SLi Zefan 	struct list_head *workspace;
926261507a0SLi Zefan 	int ret;
9278140dc30SAnand Jain 	int type = cb->compress_type;
928261507a0SLi Zefan 
9297bf49943SDennis Zhou 	workspace = get_workspace(type, 0);
9304a9e803eSSu Yue 	ret = compression_decompress_bio(workspace, cb);
931929f4bafSDennis Zhou 	put_workspace(type, workspace);
932e1ddce71SAnand Jain 
9337edb9a3eSChristoph Hellwig 	if (!ret)
934b7d463a1SChristoph Hellwig 		zero_fill_bio(&cb->orig_bbio->bio);
935261507a0SLi Zefan 	return ret;
936261507a0SLi Zefan }
937261507a0SLi Zefan 
938261507a0SLi Zefan /*
939261507a0SLi Zefan  * a less complex decompression routine.  Our compressed data fits in a
940261507a0SLi Zefan  * single page, and we want to read a single page out of it.
941261507a0SLi Zefan  * start_byte tells us the offset into the compressed data we're interested in
942261507a0SLi Zefan  */
btrfs_decompress(int type,const u8 * data_in,struct page * dest_page,unsigned long dest_pgoff,size_t srclen,size_t destlen)9433e09b5b2SDavid Sterba int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
944*56a1bf2bSQu Wenruo 		     unsigned long dest_pgoff, size_t srclen, size_t destlen)
945261507a0SLi Zefan {
946*56a1bf2bSQu Wenruo 	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
947261507a0SLi Zefan 	struct list_head *workspace;
948*56a1bf2bSQu Wenruo 	const u32 sectorsize = fs_info->sectorsize;
949261507a0SLi Zefan 	int ret;
950261507a0SLi Zefan 
951*56a1bf2bSQu Wenruo 	/*
952*56a1bf2bSQu Wenruo 	 * The full destination page range should not exceed the page size.
953*56a1bf2bSQu Wenruo 	 * And the @destlen should not exceed sectorsize, as this is only called for
954*56a1bf2bSQu Wenruo 	 * inline file extents, which should not exceed sectorsize.
955*56a1bf2bSQu Wenruo 	 */
956*56a1bf2bSQu Wenruo 	ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
957*56a1bf2bSQu Wenruo 
9587bf49943SDennis Zhou 	workspace = get_workspace(type, 0);
9591e4eb746SDavid Sterba 	ret = compression_decompress(type, workspace, data_in, dest_page,
960*56a1bf2bSQu Wenruo 				     dest_pgoff, srclen, destlen);
961929f4bafSDennis Zhou 	put_workspace(type, workspace);
9627bf49943SDennis Zhou 
963261507a0SLi Zefan 	return ret;
964261507a0SLi Zefan }
965261507a0SLi Zefan 
btrfs_init_compress(void)9665565b8e0SQu Wenruo int __init btrfs_init_compress(void)
9671666edabSDennis Zhou {
968544fe4a9SChristoph Hellwig 	if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE,
969544fe4a9SChristoph Hellwig 			offsetof(struct compressed_bio, bbio.bio),
970544fe4a9SChristoph Hellwig 			BIOSET_NEED_BVECS))
971544fe4a9SChristoph Hellwig 		return -ENOMEM;
972d5517033SDavid Sterba 	btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
973d5517033SDavid Sterba 	btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
974d5517033SDavid Sterba 	btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
975d5517033SDavid Sterba 	zstd_init_workspace_manager();
9765565b8e0SQu Wenruo 	return 0;
9771666edabSDennis Zhou }
9781666edabSDennis Zhou 
btrfs_exit_compress(void)979e67c718bSDavid Sterba void __cold btrfs_exit_compress(void)
980261507a0SLi Zefan {
9812510307eSDavid Sterba 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
9822510307eSDavid Sterba 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
9832510307eSDavid Sterba 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
9842510307eSDavid Sterba 	zstd_cleanup_workspace_manager();
985544fe4a9SChristoph Hellwig 	bioset_exit(&btrfs_compressed_bioset);
986261507a0SLi Zefan }
9873a39c18dSLi Zefan 
9883a39c18dSLi Zefan /*
9891c3dc173SQu Wenruo  * Copy decompressed data from working buffer to pages.
9903a39c18dSLi Zefan  *
9911c3dc173SQu Wenruo  * @buf:		The decompressed data buffer
9921c3dc173SQu Wenruo  * @buf_len:		The decompressed data length
9931c3dc173SQu Wenruo  * @decompressed:	Number of bytes that are already decompressed inside the
9941c3dc173SQu Wenruo  * 			compressed extent
9951c3dc173SQu Wenruo  * @cb:			The compressed extent descriptor
9961c3dc173SQu Wenruo  * @orig_bio:		The original bio that the caller wants to read for
9973a39c18dSLi Zefan  *
9981c3dc173SQu Wenruo  * An easier to understand graph is like below:
9991c3dc173SQu Wenruo  *
10001c3dc173SQu Wenruo  * 		|<- orig_bio ->|     |<- orig_bio->|
10011c3dc173SQu Wenruo  * 	|<-------      full decompressed extent      ----->|
10021c3dc173SQu Wenruo  * 	|<-----------    @cb range   ---->|
10031c3dc173SQu Wenruo  * 	|			|<-- @buf_len -->|
10041c3dc173SQu Wenruo  * 	|<--- @decompressed --->|
10051c3dc173SQu Wenruo  *
10061c3dc173SQu Wenruo  * Note that, @cb can be a subpage of the full decompressed extent, but
10071c3dc173SQu Wenruo  * @cb->start always has the same as the orig_file_offset value of the full
10081c3dc173SQu Wenruo  * decompressed extent.
10091c3dc173SQu Wenruo  *
10101c3dc173SQu Wenruo  * When reading compressed extent, we have to read the full compressed extent,
10111c3dc173SQu Wenruo  * while @orig_bio may only want part of the range.
10121c3dc173SQu Wenruo  * Thus this function will ensure only data covered by @orig_bio will be copied
10131c3dc173SQu Wenruo  * to.
10141c3dc173SQu Wenruo  *
10151c3dc173SQu Wenruo  * Return 0 if we have copied all needed contents for @orig_bio.
10161c3dc173SQu Wenruo  * Return >0 if we need continue decompress.
10173a39c18dSLi Zefan  */
btrfs_decompress_buf2page(const char * buf,u32 buf_len,struct compressed_bio * cb,u32 decompressed)10181c3dc173SQu Wenruo int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
10191c3dc173SQu Wenruo 			      struct compressed_bio *cb, u32 decompressed)
10203a39c18dSLi Zefan {
1021b7d463a1SChristoph Hellwig 	struct bio *orig_bio = &cb->orig_bbio->bio;
10221c3dc173SQu Wenruo 	/* Offset inside the full decompressed extent */
10231c3dc173SQu Wenruo 	u32 cur_offset;
10243a39c18dSLi Zefan 
10251c3dc173SQu Wenruo 	cur_offset = decompressed;
10261c3dc173SQu Wenruo 	/* The main loop to do the copy */
10271c3dc173SQu Wenruo 	while (cur_offset < decompressed + buf_len) {
10281c3dc173SQu Wenruo 		struct bio_vec bvec;
10291c3dc173SQu Wenruo 		size_t copy_len;
10301c3dc173SQu Wenruo 		u32 copy_start;
10311c3dc173SQu Wenruo 		/* Offset inside the full decompressed extent */
10321c3dc173SQu Wenruo 		u32 bvec_offset;
10331c3dc173SQu Wenruo 
10341c3dc173SQu Wenruo 		bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
10353a39c18dSLi Zefan 		/*
10361c3dc173SQu Wenruo 		 * cb->start may underflow, but subtracting that value can still
10371c3dc173SQu Wenruo 		 * give us correct offset inside the full decompressed extent.
10383a39c18dSLi Zefan 		 */
10391c3dc173SQu Wenruo 		bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start;
10403a39c18dSLi Zefan 
10411c3dc173SQu Wenruo 		/* Haven't reached the bvec range, exit */
10421c3dc173SQu Wenruo 		if (decompressed + buf_len <= bvec_offset)
10433a39c18dSLi Zefan 			return 1;
10443a39c18dSLi Zefan 
10451c3dc173SQu Wenruo 		copy_start = max(cur_offset, bvec_offset);
10461c3dc173SQu Wenruo 		copy_len = min(bvec_offset + bvec.bv_len,
10471c3dc173SQu Wenruo 			       decompressed + buf_len) - copy_start;
10481c3dc173SQu Wenruo 		ASSERT(copy_len);
10491c3dc173SQu Wenruo 
10503a39c18dSLi Zefan 		/*
10511c3dc173SQu Wenruo 		 * Extra range check to ensure we didn't go beyond
10521c3dc173SQu Wenruo 		 * @buf + @buf_len.
10533a39c18dSLi Zefan 		 */
10541c3dc173SQu Wenruo 		ASSERT(copy_start - decompressed < buf_len);
10551c3dc173SQu Wenruo 		memcpy_to_page(bvec.bv_page, bvec.bv_offset,
10561c3dc173SQu Wenruo 			       buf + copy_start - decompressed, copy_len);
10571c3dc173SQu Wenruo 		cur_offset += copy_len;
1058974b1adcSChristoph Hellwig 
10591c3dc173SQu Wenruo 		bio_advance(orig_bio, copy_len);
10601c3dc173SQu Wenruo 		/* Finished the bio */
10611c3dc173SQu Wenruo 		if (!orig_bio->bi_iter.bi_size)
10623a39c18dSLi Zefan 			return 0;
10633a39c18dSLi Zefan 	}
10643a39c18dSLi Zefan 	return 1;
10653a39c18dSLi Zefan }
1066c2fcdcdfSTimofey Titovets 
106719562430STimofey Titovets /*
106819562430STimofey Titovets  * Shannon Entropy calculation
106919562430STimofey Titovets  *
107052042d8eSAndrea Gelmini  * Pure byte distribution analysis fails to determine compressibility of data.
107119562430STimofey Titovets  * Try calculating entropy to estimate the average minimum number of bits
107219562430STimofey Titovets  * needed to encode the sampled data.
107319562430STimofey Titovets  *
107419562430STimofey Titovets  * For convenience, return the percentage of needed bits, instead of amount of
107519562430STimofey Titovets  * bits directly.
107619562430STimofey Titovets  *
107719562430STimofey Titovets  * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
107819562430STimofey Titovets  *			    and can be compressible with high probability
107919562430STimofey Titovets  *
108019562430STimofey Titovets  * @ENTROPY_LVL_HIGH - data are not compressible with high probability
108119562430STimofey Titovets  *
108219562430STimofey Titovets  * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
108319562430STimofey Titovets  */
108419562430STimofey Titovets #define ENTROPY_LVL_ACEPTABLE		(65)
108519562430STimofey Titovets #define ENTROPY_LVL_HIGH		(80)
108619562430STimofey Titovets 
108719562430STimofey Titovets /*
108819562430STimofey Titovets  * For increasead precision in shannon_entropy calculation,
108919562430STimofey Titovets  * let's do pow(n, M) to save more digits after comma:
109019562430STimofey Titovets  *
109119562430STimofey Titovets  * - maximum int bit length is 64
109219562430STimofey Titovets  * - ilog2(MAX_SAMPLE_SIZE)	-> 13
109319562430STimofey Titovets  * - 13 * 4 = 52 < 64		-> M = 4
109419562430STimofey Titovets  *
109519562430STimofey Titovets  * So use pow(n, 4).
109619562430STimofey Titovets  */
ilog2_w(u64 n)109719562430STimofey Titovets static inline u32 ilog2_w(u64 n)
109819562430STimofey Titovets {
109919562430STimofey Titovets 	return ilog2(n * n * n * n);
110019562430STimofey Titovets }
110119562430STimofey Titovets 
shannon_entropy(struct heuristic_ws * ws)110219562430STimofey Titovets static u32 shannon_entropy(struct heuristic_ws *ws)
110319562430STimofey Titovets {
110419562430STimofey Titovets 	const u32 entropy_max = 8 * ilog2_w(2);
110519562430STimofey Titovets 	u32 entropy_sum = 0;
110619562430STimofey Titovets 	u32 p, p_base, sz_base;
110719562430STimofey Titovets 	u32 i;
110819562430STimofey Titovets 
110919562430STimofey Titovets 	sz_base = ilog2_w(ws->sample_size);
111019562430STimofey Titovets 	for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
111119562430STimofey Titovets 		p = ws->bucket[i].count;
111219562430STimofey Titovets 		p_base = ilog2_w(p);
111319562430STimofey Titovets 		entropy_sum += p * (sz_base - p_base);
111419562430STimofey Titovets 	}
111519562430STimofey Titovets 
111619562430STimofey Titovets 	entropy_sum /= ws->sample_size;
111719562430STimofey Titovets 	return entropy_sum * 100 / entropy_max;
111819562430STimofey Titovets }
111919562430STimofey Titovets 
1120440c840cSTimofey Titovets #define RADIX_BASE		4U
1121440c840cSTimofey Titovets #define COUNTERS_SIZE		(1U << RADIX_BASE)
1122858177d3STimofey Titovets 
get4bits(u64 num,int shift)1123440c840cSTimofey Titovets static u8 get4bits(u64 num, int shift) {
1124440c840cSTimofey Titovets 	u8 low4bits;
1125440c840cSTimofey Titovets 
1126440c840cSTimofey Titovets 	num >>= shift;
1127440c840cSTimofey Titovets 	/* Reverse order */
1128440c840cSTimofey Titovets 	low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
1129440c840cSTimofey Titovets 	return low4bits;
1130440c840cSTimofey Titovets }
1131440c840cSTimofey Titovets 
1132440c840cSTimofey Titovets /*
1133440c840cSTimofey Titovets  * Use 4 bits as radix base
113452042d8eSAndrea Gelmini  * Use 16 u32 counters for calculating new position in buf array
1135440c840cSTimofey Titovets  *
1136440c840cSTimofey Titovets  * @array     - array that will be sorted
1137440c840cSTimofey Titovets  * @array_buf - buffer array to store sorting results
1138440c840cSTimofey Titovets  *              must be equal in size to @array
1139440c840cSTimofey Titovets  * @num       - array size
1140440c840cSTimofey Titovets  */
radix_sort(struct bucket_item * array,struct bucket_item * array_buf,int num)114123ae8c63SDavid Sterba static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
114236243c91SDavid Sterba 		       int num)
1143440c840cSTimofey Titovets {
1144440c840cSTimofey Titovets 	u64 max_num;
1145440c840cSTimofey Titovets 	u64 buf_num;
1146440c840cSTimofey Titovets 	u32 counters[COUNTERS_SIZE];
1147440c840cSTimofey Titovets 	u32 new_addr;
1148440c840cSTimofey Titovets 	u32 addr;
1149440c840cSTimofey Titovets 	int bitlen;
1150440c840cSTimofey Titovets 	int shift;
1151440c840cSTimofey Titovets 	int i;
1152440c840cSTimofey Titovets 
1153440c840cSTimofey Titovets 	/*
1154440c840cSTimofey Titovets 	 * Try avoid useless loop iterations for small numbers stored in big
1155440c840cSTimofey Titovets 	 * counters.  Example: 48 33 4 ... in 64bit array
1156440c840cSTimofey Titovets 	 */
115723ae8c63SDavid Sterba 	max_num = array[0].count;
1158440c840cSTimofey Titovets 	for (i = 1; i < num; i++) {
115923ae8c63SDavid Sterba 		buf_num = array[i].count;
1160440c840cSTimofey Titovets 		if (buf_num > max_num)
1161440c840cSTimofey Titovets 			max_num = buf_num;
1162440c840cSTimofey Titovets 	}
1163440c840cSTimofey Titovets 
1164440c840cSTimofey Titovets 	buf_num = ilog2(max_num);
1165440c840cSTimofey Titovets 	bitlen = ALIGN(buf_num, RADIX_BASE * 2);
1166440c840cSTimofey Titovets 
1167440c840cSTimofey Titovets 	shift = 0;
1168440c840cSTimofey Titovets 	while (shift < bitlen) {
1169440c840cSTimofey Titovets 		memset(counters, 0, sizeof(counters));
1170440c840cSTimofey Titovets 
1171440c840cSTimofey Titovets 		for (i = 0; i < num; i++) {
117223ae8c63SDavid Sterba 			buf_num = array[i].count;
1173440c840cSTimofey Titovets 			addr = get4bits(buf_num, shift);
1174440c840cSTimofey Titovets 			counters[addr]++;
1175440c840cSTimofey Titovets 		}
1176440c840cSTimofey Titovets 
1177440c840cSTimofey Titovets 		for (i = 1; i < COUNTERS_SIZE; i++)
1178440c840cSTimofey Titovets 			counters[i] += counters[i - 1];
1179440c840cSTimofey Titovets 
1180440c840cSTimofey Titovets 		for (i = num - 1; i >= 0; i--) {
118123ae8c63SDavid Sterba 			buf_num = array[i].count;
1182440c840cSTimofey Titovets 			addr = get4bits(buf_num, shift);
1183440c840cSTimofey Titovets 			counters[addr]--;
1184440c840cSTimofey Titovets 			new_addr = counters[addr];
11857add17beSDavid Sterba 			array_buf[new_addr] = array[i];
1186440c840cSTimofey Titovets 		}
1187440c840cSTimofey Titovets 
1188440c840cSTimofey Titovets 		shift += RADIX_BASE;
1189440c840cSTimofey Titovets 
1190440c840cSTimofey Titovets 		/*
1191440c840cSTimofey Titovets 		 * Normal radix expects to move data from a temporary array, to
1192440c840cSTimofey Titovets 		 * the main one.  But that requires some CPU time. Avoid that
1193440c840cSTimofey Titovets 		 * by doing another sort iteration to original array instead of
1194440c840cSTimofey Titovets 		 * memcpy()
1195440c840cSTimofey Titovets 		 */
1196440c840cSTimofey Titovets 		memset(counters, 0, sizeof(counters));
1197440c840cSTimofey Titovets 
1198440c840cSTimofey Titovets 		for (i = 0; i < num; i ++) {
119923ae8c63SDavid Sterba 			buf_num = array_buf[i].count;
1200440c840cSTimofey Titovets 			addr = get4bits(buf_num, shift);
1201440c840cSTimofey Titovets 			counters[addr]++;
1202440c840cSTimofey Titovets 		}
1203440c840cSTimofey Titovets 
1204440c840cSTimofey Titovets 		for (i = 1; i < COUNTERS_SIZE; i++)
1205440c840cSTimofey Titovets 			counters[i] += counters[i - 1];
1206440c840cSTimofey Titovets 
1207440c840cSTimofey Titovets 		for (i = num - 1; i >= 0; i--) {
120823ae8c63SDavid Sterba 			buf_num = array_buf[i].count;
1209440c840cSTimofey Titovets 			addr = get4bits(buf_num, shift);
1210440c840cSTimofey Titovets 			counters[addr]--;
1211440c840cSTimofey Titovets 			new_addr = counters[addr];
12127add17beSDavid Sterba 			array[new_addr] = array_buf[i];
1213440c840cSTimofey Titovets 		}
1214440c840cSTimofey Titovets 
1215440c840cSTimofey Titovets 		shift += RADIX_BASE;
1216440c840cSTimofey Titovets 	}
1217858177d3STimofey Titovets }
1218858177d3STimofey Titovets 
1219858177d3STimofey Titovets /*
1220858177d3STimofey Titovets  * Size of the core byte set - how many bytes cover 90% of the sample
1221858177d3STimofey Titovets  *
1222858177d3STimofey Titovets  * There are several types of structured binary data that use nearly all byte
1223858177d3STimofey Titovets  * values. The distribution can be uniform and counts in all buckets will be
1224858177d3STimofey Titovets  * nearly the same (eg. encrypted data). Unlikely to be compressible.
1225858177d3STimofey Titovets  *
1226858177d3STimofey Titovets  * Other possibility is normal (Gaussian) distribution, where the data could
1227858177d3STimofey Titovets  * be potentially compressible, but we have to take a few more steps to decide
1228858177d3STimofey Titovets  * how much.
1229858177d3STimofey Titovets  *
1230858177d3STimofey Titovets  * @BYTE_CORE_SET_LOW  - main part of byte values repeated frequently,
1231858177d3STimofey Titovets  *                       compression algo can easy fix that
1232858177d3STimofey Titovets  * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
1233858177d3STimofey Titovets  *                       probability is not compressible
1234858177d3STimofey Titovets  */
1235858177d3STimofey Titovets #define BYTE_CORE_SET_LOW		(64)
1236858177d3STimofey Titovets #define BYTE_CORE_SET_HIGH		(200)
1237858177d3STimofey Titovets 
byte_core_set_size(struct heuristic_ws * ws)1238858177d3STimofey Titovets static int byte_core_set_size(struct heuristic_ws *ws)
1239858177d3STimofey Titovets {
1240858177d3STimofey Titovets 	u32 i;
1241858177d3STimofey Titovets 	u32 coreset_sum = 0;
1242858177d3STimofey Titovets 	const u32 core_set_threshold = ws->sample_size * 90 / 100;
1243858177d3STimofey Titovets 	struct bucket_item *bucket = ws->bucket;
1244858177d3STimofey Titovets 
1245858177d3STimofey Titovets 	/* Sort in reverse order */
124636243c91SDavid Sterba 	radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
1247858177d3STimofey Titovets 
1248858177d3STimofey Titovets 	for (i = 0; i < BYTE_CORE_SET_LOW; i++)
1249858177d3STimofey Titovets 		coreset_sum += bucket[i].count;
1250858177d3STimofey Titovets 
1251858177d3STimofey Titovets 	if (coreset_sum > core_set_threshold)
1252858177d3STimofey Titovets 		return i;
1253858177d3STimofey Titovets 
1254858177d3STimofey Titovets 	for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
1255858177d3STimofey Titovets 		coreset_sum += bucket[i].count;
1256858177d3STimofey Titovets 		if (coreset_sum > core_set_threshold)
1257858177d3STimofey Titovets 			break;
1258858177d3STimofey Titovets 	}
1259858177d3STimofey Titovets 
1260858177d3STimofey Titovets 	return i;
1261858177d3STimofey Titovets }
1262858177d3STimofey Titovets 
1263a288e92cSTimofey Titovets /*
1264a288e92cSTimofey Titovets  * Count byte values in buckets.
1265a288e92cSTimofey Titovets  * This heuristic can detect textual data (configs, xml, json, html, etc).
1266a288e92cSTimofey Titovets  * Because in most text-like data byte set is restricted to limited number of
1267a288e92cSTimofey Titovets  * possible characters, and that restriction in most cases makes data easy to
1268a288e92cSTimofey Titovets  * compress.
1269a288e92cSTimofey Titovets  *
1270a288e92cSTimofey Titovets  * @BYTE_SET_THRESHOLD - consider all data within this byte set size:
1271a288e92cSTimofey Titovets  *	less - compressible
1272a288e92cSTimofey Titovets  *	more - need additional analysis
1273a288e92cSTimofey Titovets  */
1274a288e92cSTimofey Titovets #define BYTE_SET_THRESHOLD		(64)
1275a288e92cSTimofey Titovets 
byte_set_size(const struct heuristic_ws * ws)1276a288e92cSTimofey Titovets static u32 byte_set_size(const struct heuristic_ws *ws)
1277a288e92cSTimofey Titovets {
1278a288e92cSTimofey Titovets 	u32 i;
1279a288e92cSTimofey Titovets 	u32 byte_set_size = 0;
1280a288e92cSTimofey Titovets 
1281a288e92cSTimofey Titovets 	for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
1282a288e92cSTimofey Titovets 		if (ws->bucket[i].count > 0)
1283a288e92cSTimofey Titovets 			byte_set_size++;
1284a288e92cSTimofey Titovets 	}
1285a288e92cSTimofey Titovets 
1286a288e92cSTimofey Titovets 	/*
1287a288e92cSTimofey Titovets 	 * Continue collecting count of byte values in buckets.  If the byte
1288a288e92cSTimofey Titovets 	 * set size is bigger then the threshold, it's pointless to continue,
1289a288e92cSTimofey Titovets 	 * the detection technique would fail for this type of data.
1290a288e92cSTimofey Titovets 	 */
1291a288e92cSTimofey Titovets 	for (; i < BUCKET_SIZE; i++) {
1292a288e92cSTimofey Titovets 		if (ws->bucket[i].count > 0) {
1293a288e92cSTimofey Titovets 			byte_set_size++;
1294a288e92cSTimofey Titovets 			if (byte_set_size > BYTE_SET_THRESHOLD)
1295a288e92cSTimofey Titovets 				return byte_set_size;
1296a288e92cSTimofey Titovets 		}
1297a288e92cSTimofey Titovets 	}
1298a288e92cSTimofey Titovets 
1299a288e92cSTimofey Titovets 	return byte_set_size;
1300a288e92cSTimofey Titovets }
1301a288e92cSTimofey Titovets 
sample_repeated_patterns(struct heuristic_ws * ws)13021fe4f6faSTimofey Titovets static bool sample_repeated_patterns(struct heuristic_ws *ws)
13031fe4f6faSTimofey Titovets {
13041fe4f6faSTimofey Titovets 	const u32 half_of_sample = ws->sample_size / 2;
13051fe4f6faSTimofey Titovets 	const u8 *data = ws->sample;
13061fe4f6faSTimofey Titovets 
13071fe4f6faSTimofey Titovets 	return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
13081fe4f6faSTimofey Titovets }
13091fe4f6faSTimofey Titovets 
heuristic_collect_sample(struct inode * inode,u64 start,u64 end,struct heuristic_ws * ws)1310a440d48cSTimofey Titovets static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
1311a440d48cSTimofey Titovets 				     struct heuristic_ws *ws)
1312a440d48cSTimofey Titovets {
1313a440d48cSTimofey Titovets 	struct page *page;
1314a440d48cSTimofey Titovets 	u64 index, index_end;
1315a440d48cSTimofey Titovets 	u32 i, curr_sample_pos;
1316a440d48cSTimofey Titovets 	u8 *in_data;
1317a440d48cSTimofey Titovets 
1318a440d48cSTimofey Titovets 	/*
1319a440d48cSTimofey Titovets 	 * Compression handles the input data by chunks of 128KiB
1320a440d48cSTimofey Titovets 	 * (defined by BTRFS_MAX_UNCOMPRESSED)
1321a440d48cSTimofey Titovets 	 *
1322a440d48cSTimofey Titovets 	 * We do the same for the heuristic and loop over the whole range.
1323a440d48cSTimofey Titovets 	 *
1324a440d48cSTimofey Titovets 	 * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
1325a440d48cSTimofey Titovets 	 * process no more than BTRFS_MAX_UNCOMPRESSED at a time.
1326a440d48cSTimofey Titovets 	 */
1327a440d48cSTimofey Titovets 	if (end - start > BTRFS_MAX_UNCOMPRESSED)
1328a440d48cSTimofey Titovets 		end = start + BTRFS_MAX_UNCOMPRESSED;
1329a440d48cSTimofey Titovets 
1330a440d48cSTimofey Titovets 	index = start >> PAGE_SHIFT;
1331a440d48cSTimofey Titovets 	index_end = end >> PAGE_SHIFT;
1332a440d48cSTimofey Titovets 
1333a440d48cSTimofey Titovets 	/* Don't miss unaligned end */
1334ce394a7fSYushan Zhou 	if (!PAGE_ALIGNED(end))
1335a440d48cSTimofey Titovets 		index_end++;
1336a440d48cSTimofey Titovets 
1337a440d48cSTimofey Titovets 	curr_sample_pos = 0;
1338a440d48cSTimofey Titovets 	while (index < index_end) {
1339a440d48cSTimofey Titovets 		page = find_get_page(inode->i_mapping, index);
134058c1a35cSIra Weiny 		in_data = kmap_local_page(page);
1341a440d48cSTimofey Titovets 		/* Handle case where the start is not aligned to PAGE_SIZE */
1342a440d48cSTimofey Titovets 		i = start % PAGE_SIZE;
1343a440d48cSTimofey Titovets 		while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
1344a440d48cSTimofey Titovets 			/* Don't sample any garbage from the last page */
1345a440d48cSTimofey Titovets 			if (start > end - SAMPLING_READ_SIZE)
1346a440d48cSTimofey Titovets 				break;
1347a440d48cSTimofey Titovets 			memcpy(&ws->sample[curr_sample_pos], &in_data[i],
1348a440d48cSTimofey Titovets 					SAMPLING_READ_SIZE);
1349a440d48cSTimofey Titovets 			i += SAMPLING_INTERVAL;
1350a440d48cSTimofey Titovets 			start += SAMPLING_INTERVAL;
1351a440d48cSTimofey Titovets 			curr_sample_pos += SAMPLING_READ_SIZE;
1352a440d48cSTimofey Titovets 		}
135358c1a35cSIra Weiny 		kunmap_local(in_data);
1354a440d48cSTimofey Titovets 		put_page(page);
1355a440d48cSTimofey Titovets 
1356a440d48cSTimofey Titovets 		index++;
1357a440d48cSTimofey Titovets 	}
1358a440d48cSTimofey Titovets 
1359a440d48cSTimofey Titovets 	ws->sample_size = curr_sample_pos;
1360a440d48cSTimofey Titovets }
1361a440d48cSTimofey Titovets 
1362c2fcdcdfSTimofey Titovets /*
1363c2fcdcdfSTimofey Titovets  * Compression heuristic.
1364c2fcdcdfSTimofey Titovets  *
1365c2fcdcdfSTimofey Titovets  * For now is's a naive and optimistic 'return true', we'll extend the logic to
1366c2fcdcdfSTimofey Titovets  * quickly (compared to direct compression) detect data characteristics
136767da05b3SColin Ian King  * (compressible/incompressible) to avoid wasting CPU time on incompressible
1368c2fcdcdfSTimofey Titovets  * data.
1369c2fcdcdfSTimofey Titovets  *
1370c2fcdcdfSTimofey Titovets  * The following types of analysis can be performed:
1371c2fcdcdfSTimofey Titovets  * - detect mostly zero data
1372c2fcdcdfSTimofey Titovets  * - detect data with low "byte set" size (text, etc)
1373c2fcdcdfSTimofey Titovets  * - detect data with low/high "core byte" set
1374c2fcdcdfSTimofey Titovets  *
1375c2fcdcdfSTimofey Titovets  * Return non-zero if the compression should be done, 0 otherwise.
1376c2fcdcdfSTimofey Titovets  */
btrfs_compress_heuristic(struct inode * inode,u64 start,u64 end)1377c2fcdcdfSTimofey Titovets int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
1378c2fcdcdfSTimofey Titovets {
13797bf49943SDennis Zhou 	struct list_head *ws_list = get_workspace(0, 0);
13804e439a0bSTimofey Titovets 	struct heuristic_ws *ws;
1381a440d48cSTimofey Titovets 	u32 i;
1382a440d48cSTimofey Titovets 	u8 byte;
138319562430STimofey Titovets 	int ret = 0;
1384c2fcdcdfSTimofey Titovets 
13854e439a0bSTimofey Titovets 	ws = list_entry(ws_list, struct heuristic_ws, list);
13864e439a0bSTimofey Titovets 
1387a440d48cSTimofey Titovets 	heuristic_collect_sample(inode, start, end, ws);
1388a440d48cSTimofey Titovets 
13891fe4f6faSTimofey Titovets 	if (sample_repeated_patterns(ws)) {
13901fe4f6faSTimofey Titovets 		ret = 1;
13911fe4f6faSTimofey Titovets 		goto out;
13921fe4f6faSTimofey Titovets 	}
13931fe4f6faSTimofey Titovets 
1394a440d48cSTimofey Titovets 	memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
1395a440d48cSTimofey Titovets 
1396a440d48cSTimofey Titovets 	for (i = 0; i < ws->sample_size; i++) {
1397a440d48cSTimofey Titovets 		byte = ws->sample[i];
1398a440d48cSTimofey Titovets 		ws->bucket[byte].count++;
1399c2fcdcdfSTimofey Titovets 	}
1400c2fcdcdfSTimofey Titovets 
1401a288e92cSTimofey Titovets 	i = byte_set_size(ws);
1402a288e92cSTimofey Titovets 	if (i < BYTE_SET_THRESHOLD) {
1403a288e92cSTimofey Titovets 		ret = 2;
1404a288e92cSTimofey Titovets 		goto out;
1405a288e92cSTimofey Titovets 	}
1406a288e92cSTimofey Titovets 
1407858177d3STimofey Titovets 	i = byte_core_set_size(ws);
1408858177d3STimofey Titovets 	if (i <= BYTE_CORE_SET_LOW) {
1409858177d3STimofey Titovets 		ret = 3;
1410858177d3STimofey Titovets 		goto out;
1411858177d3STimofey Titovets 	}
1412858177d3STimofey Titovets 
1413858177d3STimofey Titovets 	if (i >= BYTE_CORE_SET_HIGH) {
1414858177d3STimofey Titovets 		ret = 0;
1415858177d3STimofey Titovets 		goto out;
1416858177d3STimofey Titovets 	}
1417858177d3STimofey Titovets 
141819562430STimofey Titovets 	i = shannon_entropy(ws);
141919562430STimofey Titovets 	if (i <= ENTROPY_LVL_ACEPTABLE) {
142019562430STimofey Titovets 		ret = 4;
142119562430STimofey Titovets 		goto out;
142219562430STimofey Titovets 	}
142319562430STimofey Titovets 
142419562430STimofey Titovets 	/*
142519562430STimofey Titovets 	 * For the levels below ENTROPY_LVL_HIGH, additional analysis would be
142619562430STimofey Titovets 	 * needed to give green light to compression.
142719562430STimofey Titovets 	 *
142819562430STimofey Titovets 	 * For now just assume that compression at that level is not worth the
142919562430STimofey Titovets 	 * resources because:
143019562430STimofey Titovets 	 *
143119562430STimofey Titovets 	 * 1. it is possible to defrag the data later
143219562430STimofey Titovets 	 *
143319562430STimofey Titovets 	 * 2. the data would turn out to be hardly compressible, eg. 150 byte
143419562430STimofey Titovets 	 * values, every bucket has counter at level ~54. The heuristic would
143519562430STimofey Titovets 	 * be confused. This can happen when data have some internal repeated
143619562430STimofey Titovets 	 * patterns like "abbacbbc...". This can be detected by analyzing
143719562430STimofey Titovets 	 * pairs of bytes, which is too costly.
143819562430STimofey Titovets 	 */
143919562430STimofey Titovets 	if (i < ENTROPY_LVL_HIGH) {
144019562430STimofey Titovets 		ret = 5;
144119562430STimofey Titovets 		goto out;
144219562430STimofey Titovets 	} else {
144319562430STimofey Titovets 		ret = 0;
144419562430STimofey Titovets 		goto out;
144519562430STimofey Titovets 	}
144619562430STimofey Titovets 
14471fe4f6faSTimofey Titovets out:
1448929f4bafSDennis Zhou 	put_workspace(0, ws_list);
1449c2fcdcdfSTimofey Titovets 	return ret;
1450c2fcdcdfSTimofey Titovets }
1451f51d2b59SDavid Sterba 
1452d0ab62ceSDennis Zhou /*
1453d0ab62ceSDennis Zhou  * Convert the compression suffix (eg. after "zlib" starting with ":") to
1454d0ab62ceSDennis Zhou  * level, unrecognized string will set the default level
1455d0ab62ceSDennis Zhou  */
btrfs_compress_str2level(unsigned int type,const char * str)1456d0ab62ceSDennis Zhou unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
1457f51d2b59SDavid Sterba {
1458d0ab62ceSDennis Zhou 	unsigned int level = 0;
1459d0ab62ceSDennis Zhou 	int ret;
1460d0ab62ceSDennis Zhou 
1461d0ab62ceSDennis Zhou 	if (!type)
1462f51d2b59SDavid Sterba 		return 0;
1463f51d2b59SDavid Sterba 
1464d0ab62ceSDennis Zhou 	if (str[0] == ':') {
1465d0ab62ceSDennis Zhou 		ret = kstrtouint(str + 1, 10, &level);
1466d0ab62ceSDennis Zhou 		if (ret)
1467d0ab62ceSDennis Zhou 			level = 0;
1468d0ab62ceSDennis Zhou 	}
1469f51d2b59SDavid Sterba 
1470b0c1fe1eSDavid Sterba 	level = btrfs_compress_set_level(type, level);
1471b0c1fe1eSDavid Sterba 
1472b0c1fe1eSDavid Sterba 	return level;
1473b0c1fe1eSDavid Sterba }
1474