xref: /openbmc/linux/drivers/md/md-bitmap.c (revision d37cf9b63113f13d742713881ce691fc615d8b3b)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
4   *
5   * bitmap_create  - sets up the bitmap structure
6   * bitmap_destroy - destroys the bitmap structure
7   *
8   * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
9   * - added disk storage for bitmap
10   * - changes to allow various bitmap chunk sizes
11   */
12  
13  /*
14   * Still to do:
15   *
16   * flush after percent set rather than just time based. (maybe both).
17   */
18  
19  #include <linux/blkdev.h>
20  #include <linux/module.h>
21  #include <linux/errno.h>
22  #include <linux/slab.h>
23  #include <linux/init.h>
24  #include <linux/timer.h>
25  #include <linux/sched.h>
26  #include <linux/list.h>
27  #include <linux/file.h>
28  #include <linux/mount.h>
29  #include <linux/buffer_head.h>
30  #include <linux/seq_file.h>
31  #include <trace/events/block.h>
32  #include "md.h"
33  #include "md-bitmap.h"
34  
bmname(struct bitmap * bitmap)35  static inline char *bmname(struct bitmap *bitmap)
36  {
37  	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
38  }
39  
40  /*
41   * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
42   *
43   * 1) check to see if this page is allocated, if it's not then try to alloc
44   * 2) if the alloc fails, set the page's hijacked flag so we'll use the
45   *    page pointer directly as a counter
46   *
47   * if we find our page, we increment the page's refcount so that it stays
48   * allocated while we're using it
49   */
md_bitmap_checkpage(struct bitmap_counts * bitmap,unsigned long page,int create,int no_hijack)50  static int md_bitmap_checkpage(struct bitmap_counts *bitmap,
51  			       unsigned long page, int create, int no_hijack)
52  __releases(bitmap->lock)
53  __acquires(bitmap->lock)
54  {
55  	unsigned char *mappage;
56  
57  	WARN_ON_ONCE(page >= bitmap->pages);
58  	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
59  		return 0;
60  
61  	if (bitmap->bp[page].map) /* page is already allocated, just return */
62  		return 0;
63  
64  	if (!create)
65  		return -ENOENT;
66  
67  	/* this page has not been allocated yet */
68  
69  	spin_unlock_irq(&bitmap->lock);
70  	/* It is possible that this is being called inside a
71  	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
72  	 * In general it is not permitted to sleep in that context as it
73  	 * can cause the loop to spin freely.
74  	 * That doesn't apply here as we can only reach this point
75  	 * once with any loop.
76  	 * When this function completes, either bp[page].map or
77  	 * bp[page].hijacked.  In either case, this function will
78  	 * abort before getting to this point again.  So there is
79  	 * no risk of a free-spin, and so it is safe to assert
80  	 * that sleeping here is allowed.
81  	 */
82  	sched_annotate_sleep();
83  	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
84  	spin_lock_irq(&bitmap->lock);
85  
86  	if (mappage == NULL) {
87  		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
88  		/* We don't support hijack for cluster raid */
89  		if (no_hijack)
90  			return -ENOMEM;
91  		/* failed - set the hijacked flag so that we can use the
92  		 * pointer as a counter */
93  		if (!bitmap->bp[page].map)
94  			bitmap->bp[page].hijacked = 1;
95  	} else if (bitmap->bp[page].map ||
96  		   bitmap->bp[page].hijacked) {
97  		/* somebody beat us to getting the page */
98  		kfree(mappage);
99  	} else {
100  
101  		/* no page was in place and we have one, so install it */
102  
103  		bitmap->bp[page].map = mappage;
104  		bitmap->missing_pages--;
105  	}
106  	return 0;
107  }
108  
109  /* if page is completely empty, put it back on the free list, or dealloc it */
110  /* if page was hijacked, unmark the flag so it might get alloced next time */
111  /* Note: lock should be held when calling this */
md_bitmap_checkfree(struct bitmap_counts * bitmap,unsigned long page)112  static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
113  {
114  	char *ptr;
115  
116  	if (bitmap->bp[page].count) /* page is still busy */
117  		return;
118  
119  	/* page is no longer in use, it can be released */
120  
121  	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
122  		bitmap->bp[page].hijacked = 0;
123  		bitmap->bp[page].map = NULL;
124  	} else {
125  		/* normal case, free the page */
126  		ptr = bitmap->bp[page].map;
127  		bitmap->bp[page].map = NULL;
128  		bitmap->missing_pages++;
129  		kfree(ptr);
130  	}
131  }
132  
133  /*
134   * bitmap file handling - read and write the bitmap file and its superblock
135   */
136  
137  /*
138   * basic page I/O operations
139   */
140  
141  /* IO operations when bitmap is stored near all superblocks */
142  
143  /* choose a good rdev and read the page from there */
read_sb_page(struct mddev * mddev,loff_t offset,struct page * page,unsigned long index,int size)144  static int read_sb_page(struct mddev *mddev, loff_t offset,
145  		struct page *page, unsigned long index, int size)
146  {
147  
148  	sector_t sector = mddev->bitmap_info.offset + offset +
149  		index * (PAGE_SIZE / SECTOR_SIZE);
150  	struct md_rdev *rdev;
151  
152  	rdev_for_each(rdev, mddev) {
153  		u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
154  
155  		if (!test_bit(In_sync, &rdev->flags) ||
156  		    test_bit(Faulty, &rdev->flags) ||
157  		    test_bit(Bitmap_sync, &rdev->flags))
158  			continue;
159  
160  		if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
161  			return 0;
162  	}
163  	return -EIO;
164  }
165  
next_active_rdev(struct md_rdev * rdev,struct mddev * mddev)166  static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
167  {
168  	/* Iterate the disks of an mddev, using rcu to protect access to the
169  	 * linked list, and raising the refcount of devices we return to ensure
170  	 * they don't disappear while in use.
171  	 * As devices are only added or removed when raid_disk is < 0 and
172  	 * nr_pending is 0 and In_sync is clear, the entries we return will
173  	 * still be in the same position on the list when we re-enter
174  	 * list_for_each_entry_continue_rcu.
175  	 *
176  	 * Note that if entered with 'rdev == NULL' to start at the
177  	 * beginning, we temporarily assign 'rdev' to an address which
178  	 * isn't really an rdev, but which can be used by
179  	 * list_for_each_entry_continue_rcu() to find the first entry.
180  	 */
181  	rcu_read_lock();
182  	if (rdev == NULL)
183  		/* start at the beginning */
184  		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
185  	else {
186  		/* release the previous rdev and start from there. */
187  		rdev_dec_pending(rdev, mddev);
188  	}
189  	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
190  		if (rdev->raid_disk >= 0 &&
191  		    !test_bit(Faulty, &rdev->flags)) {
192  			/* this is a usable devices */
193  			atomic_inc(&rdev->nr_pending);
194  			rcu_read_unlock();
195  			return rdev;
196  		}
197  	}
198  	rcu_read_unlock();
199  	return NULL;
200  }
201  
optimal_io_size(struct block_device * bdev,unsigned int last_page_size,unsigned int io_size)202  static unsigned int optimal_io_size(struct block_device *bdev,
203  				    unsigned int last_page_size,
204  				    unsigned int io_size)
205  {
206  	if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
207  		return roundup(last_page_size, bdev_io_opt(bdev));
208  	return io_size;
209  }
210  
bitmap_io_size(unsigned int io_size,unsigned int opt_size,loff_t start,loff_t boundary)211  static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
212  				   loff_t start, loff_t boundary)
213  {
214  	if (io_size != opt_size &&
215  	    start + opt_size / SECTOR_SIZE <= boundary)
216  		return opt_size;
217  	if (start + io_size / SECTOR_SIZE <= boundary)
218  		return io_size;
219  
220  	/* Overflows boundary */
221  	return 0;
222  }
223  
__write_sb_page(struct md_rdev * rdev,struct bitmap * bitmap,unsigned long pg_index,struct page * page)224  static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
225  			   unsigned long pg_index, struct page *page)
226  {
227  	struct block_device *bdev;
228  	struct mddev *mddev = bitmap->mddev;
229  	struct bitmap_storage *store = &bitmap->storage;
230  	unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
231  		PAGE_SHIFT;
232  	loff_t sboff, offset = mddev->bitmap_info.offset;
233  	sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
234  	unsigned int size = PAGE_SIZE;
235  	unsigned int opt_size = PAGE_SIZE;
236  	sector_t doff;
237  
238  	bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
239  	/* we compare length (page numbers), not page offset. */
240  	if ((pg_index - store->sb_index) == store->file_pages - 1) {
241  		unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
242  
243  		if (last_page_size == 0)
244  			last_page_size = PAGE_SIZE;
245  		size = roundup(last_page_size, bdev_logical_block_size(bdev));
246  		opt_size = optimal_io_size(bdev, last_page_size, size);
247  	}
248  
249  	sboff = rdev->sb_start + offset;
250  	doff = rdev->data_offset;
251  
252  	/* Just make sure we aren't corrupting data or metadata */
253  	if (mddev->external) {
254  		/* Bitmap could be anywhere. */
255  		if (sboff + ps > doff &&
256  		    sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
257  			return -EINVAL;
258  	} else if (offset < 0) {
259  		/* DATA  BITMAP METADATA  */
260  		size = bitmap_io_size(size, opt_size, offset + ps, 0);
261  		if (size == 0)
262  			/* bitmap runs in to metadata */
263  			return -EINVAL;
264  
265  		if (doff + mddev->dev_sectors > sboff)
266  			/* data runs in to bitmap */
267  			return -EINVAL;
268  	} else if (rdev->sb_start < rdev->data_offset) {
269  		/* METADATA BITMAP DATA */
270  		size = bitmap_io_size(size, opt_size, sboff + ps, doff);
271  		if (size == 0)
272  			/* bitmap runs in to data */
273  			return -EINVAL;
274  	}
275  
276  	md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
277  	return 0;
278  }
279  
write_sb_page(struct bitmap * bitmap,unsigned long pg_index,struct page * page,bool wait)280  static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
281  			  struct page *page, bool wait)
282  {
283  	struct mddev *mddev = bitmap->mddev;
284  
285  	do {
286  		struct md_rdev *rdev = NULL;
287  
288  		while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
289  			if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
290  				set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
291  				return;
292  			}
293  		}
294  	} while (wait && md_super_wait(mddev) < 0);
295  }
296  
297  static void md_bitmap_file_kick(struct bitmap *bitmap);
298  
299  #ifdef CONFIG_MD_BITMAP_FILE
write_file_page(struct bitmap * bitmap,struct page * page,int wait)300  static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
301  {
302  	struct buffer_head *bh = page_buffers(page);
303  
304  	while (bh && bh->b_blocknr) {
305  		atomic_inc(&bitmap->pending_writes);
306  		set_buffer_locked(bh);
307  		set_buffer_mapped(bh);
308  		submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
309  		bh = bh->b_this_page;
310  	}
311  
312  	if (wait)
313  		wait_event(bitmap->write_wait,
314  			   atomic_read(&bitmap->pending_writes) == 0);
315  }
316  
end_bitmap_write(struct buffer_head * bh,int uptodate)317  static void end_bitmap_write(struct buffer_head *bh, int uptodate)
318  {
319  	struct bitmap *bitmap = bh->b_private;
320  
321  	if (!uptodate)
322  		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
323  	if (atomic_dec_and_test(&bitmap->pending_writes))
324  		wake_up(&bitmap->write_wait);
325  }
326  
free_buffers(struct page * page)327  static void free_buffers(struct page *page)
328  {
329  	struct buffer_head *bh;
330  
331  	if (!PagePrivate(page))
332  		return;
333  
334  	bh = page_buffers(page);
335  	while (bh) {
336  		struct buffer_head *next = bh->b_this_page;
337  		free_buffer_head(bh);
338  		bh = next;
339  	}
340  	detach_page_private(page);
341  	put_page(page);
342  }
343  
344  /* read a page from a file.
345   * We both read the page, and attach buffers to the page to record the
346   * address of each block (using bmap).  These addresses will be used
347   * to write the block later, completely bypassing the filesystem.
348   * This usage is similar to how swap files are handled, and allows us
349   * to write to a file with no concerns of memory allocation failing.
350   */
read_file_page(struct file * file,unsigned long index,struct bitmap * bitmap,unsigned long count,struct page * page)351  static int read_file_page(struct file *file, unsigned long index,
352  		struct bitmap *bitmap, unsigned long count, struct page *page)
353  {
354  	int ret = 0;
355  	struct inode *inode = file_inode(file);
356  	struct buffer_head *bh;
357  	sector_t block, blk_cur;
358  	unsigned long blocksize = i_blocksize(inode);
359  
360  	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
361  		 (unsigned long long)index << PAGE_SHIFT);
362  
363  	bh = alloc_page_buffers(page, blocksize, false);
364  	if (!bh) {
365  		ret = -ENOMEM;
366  		goto out;
367  	}
368  	attach_page_private(page, bh);
369  	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
370  	while (bh) {
371  		block = blk_cur;
372  
373  		if (count == 0)
374  			bh->b_blocknr = 0;
375  		else {
376  			ret = bmap(inode, &block);
377  			if (ret || !block) {
378  				ret = -EINVAL;
379  				bh->b_blocknr = 0;
380  				goto out;
381  			}
382  
383  			bh->b_blocknr = block;
384  			bh->b_bdev = inode->i_sb->s_bdev;
385  			if (count < blocksize)
386  				count = 0;
387  			else
388  				count -= blocksize;
389  
390  			bh->b_end_io = end_bitmap_write;
391  			bh->b_private = bitmap;
392  			atomic_inc(&bitmap->pending_writes);
393  			set_buffer_locked(bh);
394  			set_buffer_mapped(bh);
395  			submit_bh(REQ_OP_READ, bh);
396  		}
397  		blk_cur++;
398  		bh = bh->b_this_page;
399  	}
400  
401  	wait_event(bitmap->write_wait,
402  		   atomic_read(&bitmap->pending_writes)==0);
403  	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
404  		ret = -EIO;
405  out:
406  	if (ret)
407  		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
408  		       (int)PAGE_SIZE,
409  		       (unsigned long long)index << PAGE_SHIFT,
410  		       ret);
411  	return ret;
412  }
413  #else /* CONFIG_MD_BITMAP_FILE */
write_file_page(struct bitmap * bitmap,struct page * page,int wait)414  static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
415  {
416  }
read_file_page(struct file * file,unsigned long index,struct bitmap * bitmap,unsigned long count,struct page * page)417  static int read_file_page(struct file *file, unsigned long index,
418  		struct bitmap *bitmap, unsigned long count, struct page *page)
419  {
420  	return -EIO;
421  }
free_buffers(struct page * page)422  static void free_buffers(struct page *page)
423  {
424  	put_page(page);
425  }
426  #endif /* CONFIG_MD_BITMAP_FILE */
427  
428  /*
429   * bitmap file superblock operations
430   */
431  
432  /*
433   * write out a page to a file
434   */
filemap_write_page(struct bitmap * bitmap,unsigned long pg_index,bool wait)435  static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
436  			       bool wait)
437  {
438  	struct bitmap_storage *store = &bitmap->storage;
439  	struct page *page = store->filemap[pg_index];
440  
441  	if (mddev_is_clustered(bitmap->mddev)) {
442  		/* go to node bitmap area starting point */
443  		pg_index += store->sb_index;
444  	}
445  
446  	if (store->file)
447  		write_file_page(bitmap, page, wait);
448  	else
449  		write_sb_page(bitmap, pg_index, page, wait);
450  }
451  
452  /*
453   * md_bitmap_wait_writes() should be called before writing any bitmap
454   * blocks, to ensure previous writes, particularly from
455   * md_bitmap_daemon_work(), have completed.
456   */
md_bitmap_wait_writes(struct bitmap * bitmap)457  static void md_bitmap_wait_writes(struct bitmap *bitmap)
458  {
459  	if (bitmap->storage.file)
460  		wait_event(bitmap->write_wait,
461  			   atomic_read(&bitmap->pending_writes)==0);
462  	else
463  		/* Note that we ignore the return value.  The writes
464  		 * might have failed, but that would just mean that
465  		 * some bits which should be cleared haven't been,
466  		 * which is safe.  The relevant bitmap blocks will
467  		 * probably get written again, but there is no great
468  		 * loss if they aren't.
469  		 */
470  		md_super_wait(bitmap->mddev);
471  }
472  
473  
474  /* update the event counter and sync the superblock to disk */
md_bitmap_update_sb(struct bitmap * bitmap)475  void md_bitmap_update_sb(struct bitmap *bitmap)
476  {
477  	bitmap_super_t *sb;
478  
479  	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
480  		return;
481  	if (bitmap->mddev->bitmap_info.external)
482  		return;
483  	if (!bitmap->storage.sb_page) /* no superblock */
484  		return;
485  	sb = kmap_atomic(bitmap->storage.sb_page);
486  	sb->events = cpu_to_le64(bitmap->mddev->events);
487  	if (bitmap->mddev->events < bitmap->events_cleared)
488  		/* rocking back to read-only */
489  		bitmap->events_cleared = bitmap->mddev->events;
490  	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
491  	/*
492  	 * clear BITMAP_WRITE_ERROR bit to protect against the case that
493  	 * a bitmap write error occurred but the later writes succeeded.
494  	 */
495  	sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
496  	/* Just in case these have been changed via sysfs: */
497  	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
498  	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
499  	/* This might have been changed by a reshape */
500  	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
501  	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
502  	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
503  	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
504  					   bitmap_info.space);
505  	kunmap_atomic(sb);
506  
507  	if (bitmap->storage.file)
508  		write_file_page(bitmap, bitmap->storage.sb_page, 1);
509  	else
510  		write_sb_page(bitmap, bitmap->storage.sb_index,
511  			      bitmap->storage.sb_page, 1);
512  }
513  EXPORT_SYMBOL(md_bitmap_update_sb);
514  
515  /* print out the bitmap file superblock */
md_bitmap_print_sb(struct bitmap * bitmap)516  void md_bitmap_print_sb(struct bitmap *bitmap)
517  {
518  	bitmap_super_t *sb;
519  
520  	if (!bitmap || !bitmap->storage.sb_page)
521  		return;
522  	sb = kmap_atomic(bitmap->storage.sb_page);
523  	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
524  	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
525  	pr_debug("       version: %u\n", le32_to_cpu(sb->version));
526  	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
527  		 le32_to_cpu(*(__le32 *)(sb->uuid+0)),
528  		 le32_to_cpu(*(__le32 *)(sb->uuid+4)),
529  		 le32_to_cpu(*(__le32 *)(sb->uuid+8)),
530  		 le32_to_cpu(*(__le32 *)(sb->uuid+12)));
531  	pr_debug("        events: %llu\n",
532  		 (unsigned long long) le64_to_cpu(sb->events));
533  	pr_debug("events cleared: %llu\n",
534  		 (unsigned long long) le64_to_cpu(sb->events_cleared));
535  	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
536  	pr_debug("     chunksize: %u B\n", le32_to_cpu(sb->chunksize));
537  	pr_debug("  daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
538  	pr_debug("     sync size: %llu KB\n",
539  		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
540  	pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
541  	kunmap_atomic(sb);
542  }
543  
544  /*
545   * bitmap_new_disk_sb
546   * @bitmap
547   *
548   * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
549   * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
550   * This function verifies 'bitmap_info' and populates the on-disk bitmap
551   * structure, which is to be written to disk.
552   *
553   * Returns: 0 on success, -Exxx on error
554   */
md_bitmap_new_disk_sb(struct bitmap * bitmap)555  static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
556  {
557  	bitmap_super_t *sb;
558  	unsigned long chunksize, daemon_sleep, write_behind;
559  
560  	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
561  	if (bitmap->storage.sb_page == NULL)
562  		return -ENOMEM;
563  	bitmap->storage.sb_index = 0;
564  
565  	sb = kmap_atomic(bitmap->storage.sb_page);
566  
567  	sb->magic = cpu_to_le32(BITMAP_MAGIC);
568  	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
569  
570  	chunksize = bitmap->mddev->bitmap_info.chunksize;
571  	BUG_ON(!chunksize);
572  	if (!is_power_of_2(chunksize)) {
573  		kunmap_atomic(sb);
574  		pr_warn("bitmap chunksize not a power of 2\n");
575  		return -EINVAL;
576  	}
577  	sb->chunksize = cpu_to_le32(chunksize);
578  
579  	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
580  	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
581  		pr_debug("Choosing daemon_sleep default (5 sec)\n");
582  		daemon_sleep = 5 * HZ;
583  	}
584  	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
585  	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
586  
587  	/*
588  	 * FIXME: write_behind for RAID1.  If not specified, what
589  	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
590  	 */
591  	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
592  	if (write_behind > COUNTER_MAX)
593  		write_behind = COUNTER_MAX / 2;
594  	sb->write_behind = cpu_to_le32(write_behind);
595  	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
596  
597  	/* keep the array size field of the bitmap superblock up to date */
598  	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
599  
600  	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
601  
602  	set_bit(BITMAP_STALE, &bitmap->flags);
603  	sb->state = cpu_to_le32(bitmap->flags);
604  	bitmap->events_cleared = bitmap->mddev->events;
605  	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
606  	bitmap->mddev->bitmap_info.nodes = 0;
607  
608  	kunmap_atomic(sb);
609  
610  	return 0;
611  }
612  
613  /* read the superblock from the bitmap file and initialize some bitmap fields */
md_bitmap_read_sb(struct bitmap * bitmap)614  static int md_bitmap_read_sb(struct bitmap *bitmap)
615  {
616  	char *reason = NULL;
617  	bitmap_super_t *sb;
618  	unsigned long chunksize, daemon_sleep, write_behind;
619  	unsigned long long events;
620  	int nodes = 0;
621  	unsigned long sectors_reserved = 0;
622  	int err = -EINVAL;
623  	struct page *sb_page;
624  	loff_t offset = 0;
625  
626  	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
627  		chunksize = 128 * 1024 * 1024;
628  		daemon_sleep = 5 * HZ;
629  		write_behind = 0;
630  		set_bit(BITMAP_STALE, &bitmap->flags);
631  		err = 0;
632  		goto out_no_sb;
633  	}
634  	/* page 0 is the superblock, read it... */
635  	sb_page = alloc_page(GFP_KERNEL);
636  	if (!sb_page)
637  		return -ENOMEM;
638  	bitmap->storage.sb_page = sb_page;
639  
640  re_read:
641  	/* If cluster_slot is set, the cluster is setup */
642  	if (bitmap->cluster_slot >= 0) {
643  		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
644  
645  		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
646  			   (bitmap->mddev->bitmap_info.chunksize >> 9));
647  		/* bits to bytes */
648  		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
649  		/* to 4k blocks */
650  		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
651  		offset = bitmap->cluster_slot * (bm_blocks << 3);
652  		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
653  			bitmap->cluster_slot, offset);
654  	}
655  
656  	if (bitmap->storage.file) {
657  		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
658  		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
659  
660  		err = read_file_page(bitmap->storage.file, 0,
661  				bitmap, bytes, sb_page);
662  	} else {
663  		err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
664  				   sizeof(bitmap_super_t));
665  	}
666  	if (err)
667  		return err;
668  
669  	err = -EINVAL;
670  	sb = kmap_atomic(sb_page);
671  
672  	chunksize = le32_to_cpu(sb->chunksize);
673  	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
674  	write_behind = le32_to_cpu(sb->write_behind);
675  	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
676  
677  	/* verify that the bitmap-specific fields are valid */
678  	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
679  		reason = "bad magic";
680  	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
681  		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
682  		reason = "unrecognized superblock version";
683  	else if (chunksize < 512)
684  		reason = "bitmap chunksize too small";
685  	else if (!is_power_of_2(chunksize))
686  		reason = "bitmap chunksize not a power of 2";
687  	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
688  		reason = "daemon sleep period out of range";
689  	else if (write_behind > COUNTER_MAX)
690  		reason = "write-behind limit out of range (0 - 16383)";
691  	if (reason) {
692  		pr_warn("%s: invalid bitmap file superblock: %s\n",
693  			bmname(bitmap), reason);
694  		goto out;
695  	}
696  
697  	/*
698  	 * Setup nodes/clustername only if bitmap version is
699  	 * cluster-compatible
700  	 */
701  	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
702  		nodes = le32_to_cpu(sb->nodes);
703  		strscpy(bitmap->mddev->bitmap_info.cluster_name,
704  				sb->cluster_name, 64);
705  	}
706  
707  	/* keep the array size field of the bitmap superblock up to date */
708  	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
709  
710  	if (bitmap->mddev->persistent) {
711  		/*
712  		 * We have a persistent array superblock, so compare the
713  		 * bitmap's UUID and event counter to the mddev's
714  		 */
715  		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
716  			pr_warn("%s: bitmap superblock UUID mismatch\n",
717  				bmname(bitmap));
718  			goto out;
719  		}
720  		events = le64_to_cpu(sb->events);
721  		if (!nodes && (events < bitmap->mddev->events)) {
722  			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
723  				bmname(bitmap), events,
724  				(unsigned long long) bitmap->mddev->events);
725  			set_bit(BITMAP_STALE, &bitmap->flags);
726  		}
727  	}
728  
729  	/* assign fields using values from superblock */
730  	bitmap->flags |= le32_to_cpu(sb->state);
731  	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
732  		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
733  	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
734  	err = 0;
735  
736  out:
737  	kunmap_atomic(sb);
738  	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
739  		/* Assigning chunksize is required for "re_read" */
740  		bitmap->mddev->bitmap_info.chunksize = chunksize;
741  		err = md_setup_cluster(bitmap->mddev, nodes);
742  		if (err) {
743  			pr_warn("%s: Could not setup cluster service (%d)\n",
744  				bmname(bitmap), err);
745  			goto out_no_sb;
746  		}
747  		bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
748  		goto re_read;
749  	}
750  
751  out_no_sb:
752  	if (err == 0) {
753  		if (test_bit(BITMAP_STALE, &bitmap->flags))
754  			bitmap->events_cleared = bitmap->mddev->events;
755  		bitmap->mddev->bitmap_info.chunksize = chunksize;
756  		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
757  		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
758  		bitmap->mddev->bitmap_info.nodes = nodes;
759  		if (bitmap->mddev->bitmap_info.space == 0 ||
760  			bitmap->mddev->bitmap_info.space > sectors_reserved)
761  			bitmap->mddev->bitmap_info.space = sectors_reserved;
762  	} else {
763  		md_bitmap_print_sb(bitmap);
764  		if (bitmap->cluster_slot < 0)
765  			md_cluster_stop(bitmap->mddev);
766  	}
767  	return err;
768  }
769  
770  /*
771   * general bitmap file operations
772   */
773  
774  /*
775   * on-disk bitmap:
776   *
777   * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
778   * file a page at a time. There's a superblock at the start of the file.
779   */
780  /* calculate the index of the page that contains this bit */
file_page_index(struct bitmap_storage * store,unsigned long chunk)781  static inline unsigned long file_page_index(struct bitmap_storage *store,
782  					    unsigned long chunk)
783  {
784  	if (store->sb_page)
785  		chunk += sizeof(bitmap_super_t) << 3;
786  	return chunk >> PAGE_BIT_SHIFT;
787  }
788  
789  /* calculate the (bit) offset of this bit within a page */
file_page_offset(struct bitmap_storage * store,unsigned long chunk)790  static inline unsigned long file_page_offset(struct bitmap_storage *store,
791  					     unsigned long chunk)
792  {
793  	if (store->sb_page)
794  		chunk += sizeof(bitmap_super_t) << 3;
795  	return chunk & (PAGE_BITS - 1);
796  }
797  
798  /*
799   * return a pointer to the page in the filemap that contains the given bit
800   *
801   */
filemap_get_page(struct bitmap_storage * store,unsigned long chunk)802  static inline struct page *filemap_get_page(struct bitmap_storage *store,
803  					    unsigned long chunk)
804  {
805  	if (file_page_index(store, chunk) >= store->file_pages)
806  		return NULL;
807  	return store->filemap[file_page_index(store, chunk)];
808  }
809  
md_bitmap_storage_alloc(struct bitmap_storage * store,unsigned long chunks,int with_super,int slot_number)810  static int md_bitmap_storage_alloc(struct bitmap_storage *store,
811  				   unsigned long chunks, int with_super,
812  				   int slot_number)
813  {
814  	int pnum, offset = 0;
815  	unsigned long num_pages;
816  	unsigned long bytes;
817  
818  	bytes = DIV_ROUND_UP(chunks, 8);
819  	if (with_super)
820  		bytes += sizeof(bitmap_super_t);
821  
822  	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
823  	offset = slot_number * num_pages;
824  
825  	store->filemap = kmalloc_array(num_pages, sizeof(struct page *),
826  				       GFP_KERNEL);
827  	if (!store->filemap)
828  		return -ENOMEM;
829  
830  	if (with_super && !store->sb_page) {
831  		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
832  		if (store->sb_page == NULL)
833  			return -ENOMEM;
834  	}
835  
836  	pnum = 0;
837  	if (store->sb_page) {
838  		store->filemap[0] = store->sb_page;
839  		pnum = 1;
840  		store->sb_index = offset;
841  	}
842  
843  	for ( ; pnum < num_pages; pnum++) {
844  		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
845  		if (!store->filemap[pnum]) {
846  			store->file_pages = pnum;
847  			return -ENOMEM;
848  		}
849  	}
850  	store->file_pages = pnum;
851  
852  	/* We need 4 bits per page, rounded up to a multiple
853  	 * of sizeof(unsigned long) */
854  	store->filemap_attr = kzalloc(
855  		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
856  		GFP_KERNEL);
857  	if (!store->filemap_attr)
858  		return -ENOMEM;
859  
860  	store->bytes = bytes;
861  
862  	return 0;
863  }
864  
md_bitmap_file_unmap(struct bitmap_storage * store)865  static void md_bitmap_file_unmap(struct bitmap_storage *store)
866  {
867  	struct file *file = store->file;
868  	struct page *sb_page = store->sb_page;
869  	struct page **map = store->filemap;
870  	int pages = store->file_pages;
871  
872  	while (pages--)
873  		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
874  			free_buffers(map[pages]);
875  	kfree(map);
876  	kfree(store->filemap_attr);
877  
878  	if (sb_page)
879  		free_buffers(sb_page);
880  
881  	if (file) {
882  		struct inode *inode = file_inode(file);
883  		invalidate_mapping_pages(inode->i_mapping, 0, -1);
884  		fput(file);
885  	}
886  }
887  
888  /*
889   * bitmap_file_kick - if an error occurs while manipulating the bitmap file
890   * then it is no longer reliable, so we stop using it and we mark the file
891   * as failed in the superblock
892   */
md_bitmap_file_kick(struct bitmap * bitmap)893  static void md_bitmap_file_kick(struct bitmap *bitmap)
894  {
895  	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
896  		md_bitmap_update_sb(bitmap);
897  
898  		if (bitmap->storage.file) {
899  			pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
900  				bmname(bitmap), bitmap->storage.file);
901  
902  		} else
903  			pr_warn("%s: disabling internal bitmap due to errors\n",
904  				bmname(bitmap));
905  	}
906  }
907  
908  enum bitmap_page_attr {
909  	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
910  	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
911  				    * i.e. counter is 1 or 2. */
912  	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
913  };
914  
set_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)915  static inline void set_page_attr(struct bitmap *bitmap, int pnum,
916  				 enum bitmap_page_attr attr)
917  {
918  	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
919  }
920  
clear_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)921  static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
922  				   enum bitmap_page_attr attr)
923  {
924  	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
925  }
926  
test_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)927  static inline int test_page_attr(struct bitmap *bitmap, int pnum,
928  				 enum bitmap_page_attr attr)
929  {
930  	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
931  }
932  
test_and_clear_page_attr(struct bitmap * bitmap,int pnum,enum bitmap_page_attr attr)933  static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
934  					   enum bitmap_page_attr attr)
935  {
936  	return test_and_clear_bit((pnum<<2) + attr,
937  				  bitmap->storage.filemap_attr);
938  }
939  /*
940   * bitmap_file_set_bit -- called before performing a write to the md device
941   * to set (and eventually sync) a particular bit in the bitmap file
942   *
943   * we set the bit immediately, then we record the page number so that
944   * when an unplug occurs, we can flush the dirty pages out to disk
945   */
md_bitmap_file_set_bit(struct bitmap * bitmap,sector_t block)946  static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
947  {
948  	unsigned long bit;
949  	struct page *page;
950  	void *kaddr;
951  	unsigned long chunk = block >> bitmap->counts.chunkshift;
952  	struct bitmap_storage *store = &bitmap->storage;
953  	unsigned long index = file_page_index(store, chunk);
954  	unsigned long node_offset = 0;
955  
956  	index += store->sb_index;
957  	if (mddev_is_clustered(bitmap->mddev))
958  		node_offset = bitmap->cluster_slot * store->file_pages;
959  
960  	page = filemap_get_page(&bitmap->storage, chunk);
961  	if (!page)
962  		return;
963  	bit = file_page_offset(&bitmap->storage, chunk);
964  
965  	/* set the bit */
966  	kaddr = kmap_atomic(page);
967  	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
968  		set_bit(bit, kaddr);
969  	else
970  		set_bit_le(bit, kaddr);
971  	kunmap_atomic(kaddr);
972  	pr_debug("set file bit %lu page %lu\n", bit, index);
973  	/* record page number so it gets flushed to disk when unplug occurs */
974  	set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
975  }
976  
md_bitmap_file_clear_bit(struct bitmap * bitmap,sector_t block)977  static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
978  {
979  	unsigned long bit;
980  	struct page *page;
981  	void *paddr;
982  	unsigned long chunk = block >> bitmap->counts.chunkshift;
983  	struct bitmap_storage *store = &bitmap->storage;
984  	unsigned long index = file_page_index(store, chunk);
985  	unsigned long node_offset = 0;
986  
987  	index += store->sb_index;
988  	if (mddev_is_clustered(bitmap->mddev))
989  		node_offset = bitmap->cluster_slot * store->file_pages;
990  
991  	page = filemap_get_page(&bitmap->storage, chunk);
992  	if (!page)
993  		return;
994  	bit = file_page_offset(&bitmap->storage, chunk);
995  	paddr = kmap_atomic(page);
996  	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
997  		clear_bit(bit, paddr);
998  	else
999  		clear_bit_le(bit, paddr);
1000  	kunmap_atomic(paddr);
1001  	if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
1002  		set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
1003  		bitmap->allclean = 0;
1004  	}
1005  }
1006  
md_bitmap_file_test_bit(struct bitmap * bitmap,sector_t block)1007  static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
1008  {
1009  	unsigned long bit;
1010  	struct page *page;
1011  	void *paddr;
1012  	unsigned long chunk = block >> bitmap->counts.chunkshift;
1013  	int set = 0;
1014  
1015  	page = filemap_get_page(&bitmap->storage, chunk);
1016  	if (!page)
1017  		return -EINVAL;
1018  	bit = file_page_offset(&bitmap->storage, chunk);
1019  	paddr = kmap_atomic(page);
1020  	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1021  		set = test_bit(bit, paddr);
1022  	else
1023  		set = test_bit_le(bit, paddr);
1024  	kunmap_atomic(paddr);
1025  	return set;
1026  }
1027  
1028  /* this gets called when the md device is ready to unplug its underlying
1029   * (slave) device queues -- before we let any writes go down, we need to
1030   * sync the dirty pages of the bitmap file to disk */
md_bitmap_unplug(struct bitmap * bitmap)1031  void md_bitmap_unplug(struct bitmap *bitmap)
1032  {
1033  	unsigned long i;
1034  	int dirty, need_write;
1035  	int writing = 0;
1036  
1037  	if (!md_bitmap_enabled(bitmap))
1038  		return;
1039  
1040  	/* look at each page to see if there are any set bits that need to be
1041  	 * flushed out to disk */
1042  	for (i = 0; i < bitmap->storage.file_pages; i++) {
1043  		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1044  		need_write = test_and_clear_page_attr(bitmap, i,
1045  						      BITMAP_PAGE_NEEDWRITE);
1046  		if (dirty || need_write) {
1047  			if (!writing) {
1048  				md_bitmap_wait_writes(bitmap);
1049  				if (bitmap->mddev->queue)
1050  					blk_add_trace_msg(bitmap->mddev->queue,
1051  							  "md bitmap_unplug");
1052  			}
1053  			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
1054  			filemap_write_page(bitmap, i, false);
1055  			writing = 1;
1056  		}
1057  	}
1058  	if (writing)
1059  		md_bitmap_wait_writes(bitmap);
1060  
1061  	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1062  		md_bitmap_file_kick(bitmap);
1063  }
1064  EXPORT_SYMBOL(md_bitmap_unplug);
1065  
1066  struct bitmap_unplug_work {
1067  	struct work_struct work;
1068  	struct bitmap *bitmap;
1069  	struct completion *done;
1070  };
1071  
md_bitmap_unplug_fn(struct work_struct * work)1072  static void md_bitmap_unplug_fn(struct work_struct *work)
1073  {
1074  	struct bitmap_unplug_work *unplug_work =
1075  		container_of(work, struct bitmap_unplug_work, work);
1076  
1077  	md_bitmap_unplug(unplug_work->bitmap);
1078  	complete(unplug_work->done);
1079  }
1080  
md_bitmap_unplug_async(struct bitmap * bitmap)1081  void md_bitmap_unplug_async(struct bitmap *bitmap)
1082  {
1083  	DECLARE_COMPLETION_ONSTACK(done);
1084  	struct bitmap_unplug_work unplug_work;
1085  
1086  	INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
1087  	unplug_work.bitmap = bitmap;
1088  	unplug_work.done = &done;
1089  
1090  	queue_work(md_bitmap_wq, &unplug_work.work);
1091  	wait_for_completion(&done);
1092  	destroy_work_on_stack(&unplug_work.work);
1093  }
1094  EXPORT_SYMBOL(md_bitmap_unplug_async);
1095  
1096  static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1097  
1098  /*
1099   * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
1100   * mapping of the bitmap file.
1101   *
1102   * Special case: If there's no bitmap file, or if the bitmap file had been
1103   * previously kicked from the array, we mark all the bits as 1's in order to
1104   * cause a full resync.
1105   *
1106   * We ignore all bits for sectors that end earlier than 'start'.
1107   * This is used when reading an out-of-date bitmap.
1108   */
md_bitmap_init_from_disk(struct bitmap * bitmap,sector_t start)1109  static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1110  {
1111  	bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1112  	struct mddev *mddev = bitmap->mddev;
1113  	unsigned long chunks = bitmap->counts.chunks;
1114  	struct bitmap_storage *store = &bitmap->storage;
1115  	struct file *file = store->file;
1116  	unsigned long node_offset = 0;
1117  	unsigned long bit_cnt = 0;
1118  	unsigned long i;
1119  	int ret;
1120  
1121  	if (!file && !mddev->bitmap_info.offset) {
1122  		/* No permanent bitmap - fill with '1s'. */
1123  		store->filemap = NULL;
1124  		store->file_pages = 0;
1125  		for (i = 0; i < chunks ; i++) {
1126  			/* if the disk bit is set, set the memory bit */
1127  			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1128  				      >= start);
1129  			md_bitmap_set_memory_bits(bitmap,
1130  						  (sector_t)i << bitmap->counts.chunkshift,
1131  						  needed);
1132  		}
1133  		return 0;
1134  	}
1135  
1136  	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1137  		pr_warn("%s: bitmap file too short %lu < %lu\n",
1138  			bmname(bitmap),
1139  			(unsigned long) i_size_read(file->f_mapping->host),
1140  			store->bytes);
1141  		ret = -ENOSPC;
1142  		goto err;
1143  	}
1144  
1145  	if (mddev_is_clustered(mddev))
1146  		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1147  
1148  	for (i = 0; i < store->file_pages; i++) {
1149  		struct page *page = store->filemap[i];
1150  		int count;
1151  
1152  		/* unmap the old page, we're done with it */
1153  		if (i == store->file_pages - 1)
1154  			count = store->bytes - i * PAGE_SIZE;
1155  		else
1156  			count = PAGE_SIZE;
1157  
1158  		if (file)
1159  			ret = read_file_page(file, i, bitmap, count, page);
1160  		else
1161  			ret = read_sb_page(mddev, 0, page, i + node_offset,
1162  					   count);
1163  		if (ret)
1164  			goto err;
1165  	}
1166  
1167  	if (outofdate) {
1168  		pr_warn("%s: bitmap file is out of date, doing full recovery\n",
1169  			bmname(bitmap));
1170  
1171  		for (i = 0; i < store->file_pages; i++) {
1172  			struct page *page = store->filemap[i];
1173  			unsigned long offset = 0;
1174  			void *paddr;
1175  
1176  			if (i == 0 && !mddev->bitmap_info.external)
1177  				offset = sizeof(bitmap_super_t);
1178  
1179  			/*
1180  			 * If the bitmap is out of date, dirty the whole page
1181  			 * and write it out
1182  			 */
1183  			paddr = kmap_atomic(page);
1184  			memset(paddr + offset, 0xff, PAGE_SIZE - offset);
1185  			kunmap_atomic(paddr);
1186  
1187  			filemap_write_page(bitmap, i, true);
1188  			if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
1189  				ret = -EIO;
1190  				goto err;
1191  			}
1192  		}
1193  	}
1194  
1195  	for (i = 0; i < chunks; i++) {
1196  		struct page *page = filemap_get_page(&bitmap->storage, i);
1197  		unsigned long bit = file_page_offset(&bitmap->storage, i);
1198  		void *paddr;
1199  		bool was_set;
1200  
1201  		paddr = kmap_atomic(page);
1202  		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1203  			was_set = test_bit(bit, paddr);
1204  		else
1205  			was_set = test_bit_le(bit, paddr);
1206  		kunmap_atomic(paddr);
1207  
1208  		if (was_set) {
1209  			/* if the disk bit is set, set the memory bit */
1210  			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1211  				      >= start);
1212  			md_bitmap_set_memory_bits(bitmap,
1213  						  (sector_t)i << bitmap->counts.chunkshift,
1214  						  needed);
1215  			bit_cnt++;
1216  		}
1217  	}
1218  
1219  	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
1220  		 bmname(bitmap), store->file_pages,
1221  		 bit_cnt, chunks);
1222  
1223  	return 0;
1224  
1225   err:
1226  	pr_warn("%s: bitmap initialisation failed: %d\n",
1227  		bmname(bitmap), ret);
1228  	return ret;
1229  }
1230  
md_bitmap_write_all(struct bitmap * bitmap)1231  void md_bitmap_write_all(struct bitmap *bitmap)
1232  {
1233  	/* We don't actually write all bitmap blocks here,
1234  	 * just flag them as needing to be written
1235  	 */
1236  	int i;
1237  
1238  	if (!bitmap || !bitmap->storage.filemap)
1239  		return;
1240  	if (bitmap->storage.file)
1241  		/* Only one copy, so nothing needed */
1242  		return;
1243  
1244  	for (i = 0; i < bitmap->storage.file_pages; i++)
1245  		set_page_attr(bitmap, i,
1246  			      BITMAP_PAGE_NEEDWRITE);
1247  	bitmap->allclean = 0;
1248  }
1249  
md_bitmap_count_page(struct bitmap_counts * bitmap,sector_t offset,int inc)1250  static void md_bitmap_count_page(struct bitmap_counts *bitmap,
1251  				 sector_t offset, int inc)
1252  {
1253  	sector_t chunk = offset >> bitmap->chunkshift;
1254  	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1255  	bitmap->bp[page].count += inc;
1256  	md_bitmap_checkfree(bitmap, page);
1257  }
1258  
md_bitmap_set_pending(struct bitmap_counts * bitmap,sector_t offset)1259  static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1260  {
1261  	sector_t chunk = offset >> bitmap->chunkshift;
1262  	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1263  	struct bitmap_page *bp = &bitmap->bp[page];
1264  
1265  	if (!bp->pending)
1266  		bp->pending = 1;
1267  }
1268  
1269  static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1270  					       sector_t offset, sector_t *blocks,
1271  					       int create);
1272  
mddev_set_timeout(struct mddev * mddev,unsigned long timeout,bool force)1273  static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
1274  			      bool force)
1275  {
1276  	struct md_thread *thread;
1277  
1278  	rcu_read_lock();
1279  	thread = rcu_dereference(mddev->thread);
1280  
1281  	if (!thread)
1282  		goto out;
1283  
1284  	if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
1285  		thread->timeout = timeout;
1286  
1287  out:
1288  	rcu_read_unlock();
1289  }
1290  
1291  /*
1292   * bitmap daemon -- periodically wakes up to clean bits and flush pages
1293   *			out to disk
1294   */
md_bitmap_daemon_work(struct mddev * mddev)1295  void md_bitmap_daemon_work(struct mddev *mddev)
1296  {
1297  	struct bitmap *bitmap;
1298  	unsigned long j;
1299  	unsigned long nextpage;
1300  	sector_t blocks;
1301  	struct bitmap_counts *counts;
1302  
1303  	/* Use a mutex to guard daemon_work against
1304  	 * bitmap_destroy.
1305  	 */
1306  	mutex_lock(&mddev->bitmap_info.mutex);
1307  	bitmap = mddev->bitmap;
1308  	if (bitmap == NULL) {
1309  		mutex_unlock(&mddev->bitmap_info.mutex);
1310  		return;
1311  	}
1312  	if (time_before(jiffies, bitmap->daemon_lastrun
1313  			+ mddev->bitmap_info.daemon_sleep))
1314  		goto done;
1315  
1316  	bitmap->daemon_lastrun = jiffies;
1317  	if (bitmap->allclean) {
1318  		mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
1319  		goto done;
1320  	}
1321  	bitmap->allclean = 1;
1322  
1323  	if (bitmap->mddev->queue)
1324  		blk_add_trace_msg(bitmap->mddev->queue,
1325  				  "md bitmap_daemon_work");
1326  
1327  	/* Any file-page which is PENDING now needs to be written.
1328  	 * So set NEEDWRITE now, then after we make any last-minute changes
1329  	 * we will write it.
1330  	 */
1331  	for (j = 0; j < bitmap->storage.file_pages; j++)
1332  		if (test_and_clear_page_attr(bitmap, j,
1333  					     BITMAP_PAGE_PENDING))
1334  			set_page_attr(bitmap, j,
1335  				      BITMAP_PAGE_NEEDWRITE);
1336  
1337  	if (bitmap->need_sync &&
1338  	    mddev->bitmap_info.external == 0) {
1339  		/* Arrange for superblock update as well as
1340  		 * other changes */
1341  		bitmap_super_t *sb;
1342  		bitmap->need_sync = 0;
1343  		if (bitmap->storage.filemap) {
1344  			sb = kmap_atomic(bitmap->storage.sb_page);
1345  			sb->events_cleared =
1346  				cpu_to_le64(bitmap->events_cleared);
1347  			kunmap_atomic(sb);
1348  			set_page_attr(bitmap, 0,
1349  				      BITMAP_PAGE_NEEDWRITE);
1350  		}
1351  	}
1352  	/* Now look at the bitmap counters and if any are '2' or '1',
1353  	 * decrement and handle accordingly.
1354  	 */
1355  	counts = &bitmap->counts;
1356  	spin_lock_irq(&counts->lock);
1357  	nextpage = 0;
1358  	for (j = 0; j < counts->chunks; j++) {
1359  		bitmap_counter_t *bmc;
1360  		sector_t  block = (sector_t)j << counts->chunkshift;
1361  
1362  		if (j == nextpage) {
1363  			nextpage += PAGE_COUNTER_RATIO;
1364  			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1365  				j |= PAGE_COUNTER_MASK;
1366  				continue;
1367  			}
1368  			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1369  		}
1370  
1371  		bmc = md_bitmap_get_counter(counts, block, &blocks, 0);
1372  		if (!bmc) {
1373  			j |= PAGE_COUNTER_MASK;
1374  			continue;
1375  		}
1376  		if (*bmc == 1 && !bitmap->need_sync) {
1377  			/* We can clear the bit */
1378  			*bmc = 0;
1379  			md_bitmap_count_page(counts, block, -1);
1380  			md_bitmap_file_clear_bit(bitmap, block);
1381  		} else if (*bmc && *bmc <= 2) {
1382  			*bmc = 1;
1383  			md_bitmap_set_pending(counts, block);
1384  			bitmap->allclean = 0;
1385  		}
1386  	}
1387  	spin_unlock_irq(&counts->lock);
1388  
1389  	md_bitmap_wait_writes(bitmap);
1390  	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1391  	 * DIRTY pages need to be written by bitmap_unplug so it can wait
1392  	 * for them.
1393  	 * If we find any DIRTY page we stop there and let bitmap_unplug
1394  	 * handle all the rest.  This is important in the case where
1395  	 * the first blocking holds the superblock and it has been updated.
1396  	 * We mustn't write any other blocks before the superblock.
1397  	 */
1398  	for (j = 0;
1399  	     j < bitmap->storage.file_pages
1400  		     && !test_bit(BITMAP_STALE, &bitmap->flags);
1401  	     j++) {
1402  		if (test_page_attr(bitmap, j,
1403  				   BITMAP_PAGE_DIRTY))
1404  			/* bitmap_unplug will handle the rest */
1405  			break;
1406  		if (bitmap->storage.filemap &&
1407  		    test_and_clear_page_attr(bitmap, j,
1408  					     BITMAP_PAGE_NEEDWRITE))
1409  			filemap_write_page(bitmap, j, false);
1410  	}
1411  
1412   done:
1413  	if (bitmap->allclean == 0)
1414  		mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
1415  	mutex_unlock(&mddev->bitmap_info.mutex);
1416  }
1417  
md_bitmap_get_counter(struct bitmap_counts * bitmap,sector_t offset,sector_t * blocks,int create)1418  static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
1419  					       sector_t offset, sector_t *blocks,
1420  					       int create)
1421  __releases(bitmap->lock)
1422  __acquires(bitmap->lock)
1423  {
1424  	/* If 'create', we might release the lock and reclaim it.
1425  	 * The lock must have been taken with interrupts enabled.
1426  	 * If !create, we don't release the lock.
1427  	 */
1428  	sector_t chunk = offset >> bitmap->chunkshift;
1429  	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1430  	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1431  	sector_t csize = ((sector_t)1) << bitmap->chunkshift;
1432  	int err;
1433  
1434  	if (page >= bitmap->pages) {
1435  		/*
1436  		 * This can happen if bitmap_start_sync goes beyond
1437  		 * End-of-device while looking for a whole page or
1438  		 * user set a huge number to sysfs bitmap_set_bits.
1439  		 */
1440  		*blocks = csize - (offset & (csize - 1));
1441  		return NULL;
1442  	}
1443  	err = md_bitmap_checkpage(bitmap, page, create, 0);
1444  
1445  	if (bitmap->bp[page].hijacked ||
1446  	    bitmap->bp[page].map == NULL)
1447  		csize = ((sector_t)1) << (bitmap->chunkshift +
1448  					  PAGE_COUNTER_SHIFT);
1449  
1450  	*blocks = csize - (offset & (csize - 1));
1451  
1452  	if (err < 0)
1453  		return NULL;
1454  
1455  	/* now locked ... */
1456  
1457  	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1458  		/* should we use the first or second counter field
1459  		 * of the hijacked pointer? */
1460  		int hi = (pageoff > PAGE_COUNTER_MASK);
1461  		return  &((bitmap_counter_t *)
1462  			  &bitmap->bp[page].map)[hi];
1463  	} else /* page is allocated */
1464  		return (bitmap_counter_t *)
1465  			&(bitmap->bp[page].map[pageoff]);
1466  }
1467  
md_bitmap_startwrite(struct bitmap * bitmap,sector_t offset,unsigned long sectors)1468  int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
1469  			 unsigned long sectors)
1470  {
1471  	if (!bitmap)
1472  		return 0;
1473  
1474  	while (sectors) {
1475  		sector_t blocks;
1476  		bitmap_counter_t *bmc;
1477  
1478  		spin_lock_irq(&bitmap->counts.lock);
1479  		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1480  		if (!bmc) {
1481  			spin_unlock_irq(&bitmap->counts.lock);
1482  			return 0;
1483  		}
1484  
1485  		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1486  			DEFINE_WAIT(__wait);
1487  			/* note that it is safe to do the prepare_to_wait
1488  			 * after the test as long as we do it before dropping
1489  			 * the spinlock.
1490  			 */
1491  			prepare_to_wait(&bitmap->overflow_wait, &__wait,
1492  					TASK_UNINTERRUPTIBLE);
1493  			spin_unlock_irq(&bitmap->counts.lock);
1494  			schedule();
1495  			finish_wait(&bitmap->overflow_wait, &__wait);
1496  			continue;
1497  		}
1498  
1499  		switch (*bmc) {
1500  		case 0:
1501  			md_bitmap_file_set_bit(bitmap, offset);
1502  			md_bitmap_count_page(&bitmap->counts, offset, 1);
1503  			fallthrough;
1504  		case 1:
1505  			*bmc = 2;
1506  		}
1507  
1508  		(*bmc)++;
1509  
1510  		spin_unlock_irq(&bitmap->counts.lock);
1511  
1512  		offset += blocks;
1513  		if (sectors > blocks)
1514  			sectors -= blocks;
1515  		else
1516  			sectors = 0;
1517  	}
1518  	return 0;
1519  }
1520  
md_bitmap_endwrite(struct bitmap * bitmap,sector_t offset,unsigned long sectors)1521  void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
1522  			unsigned long sectors)
1523  {
1524  	if (!bitmap)
1525  		return;
1526  
1527  	while (sectors) {
1528  		sector_t blocks;
1529  		unsigned long flags;
1530  		bitmap_counter_t *bmc;
1531  
1532  		spin_lock_irqsave(&bitmap->counts.lock, flags);
1533  		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1534  		if (!bmc) {
1535  			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1536  			return;
1537  		}
1538  
1539  		if (!bitmap->mddev->degraded) {
1540  			if (bitmap->events_cleared < bitmap->mddev->events) {
1541  				bitmap->events_cleared = bitmap->mddev->events;
1542  				bitmap->need_sync = 1;
1543  				sysfs_notify_dirent_safe(
1544  						bitmap->sysfs_can_clear);
1545  			}
1546  		} else if (!NEEDED(*bmc)) {
1547  			*bmc |= NEEDED_MASK;
1548  		}
1549  
1550  		if (COUNTER(*bmc) == COUNTER_MAX)
1551  			wake_up(&bitmap->overflow_wait);
1552  
1553  		(*bmc)--;
1554  		if (*bmc <= 2) {
1555  			md_bitmap_set_pending(&bitmap->counts, offset);
1556  			bitmap->allclean = 0;
1557  		}
1558  		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1559  		offset += blocks;
1560  		if (sectors > blocks)
1561  			sectors -= blocks;
1562  		else
1563  			sectors = 0;
1564  	}
1565  }
1566  
__bitmap_start_sync(struct bitmap * bitmap,sector_t offset,sector_t * blocks,int degraded)1567  static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1568  			       int degraded)
1569  {
1570  	bitmap_counter_t *bmc;
1571  	int rv;
1572  	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1573  		*blocks = 1024;
1574  		return 1; /* always resync if no bitmap */
1575  	}
1576  	spin_lock_irq(&bitmap->counts.lock);
1577  	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1578  	rv = 0;
1579  	if (bmc) {
1580  		/* locked */
1581  		if (RESYNC(*bmc))
1582  			rv = 1;
1583  		else if (NEEDED(*bmc)) {
1584  			rv = 1;
1585  			if (!degraded) { /* don't set/clear bits if degraded */
1586  				*bmc |= RESYNC_MASK;
1587  				*bmc &= ~NEEDED_MASK;
1588  			}
1589  		}
1590  	}
1591  	spin_unlock_irq(&bitmap->counts.lock);
1592  	return rv;
1593  }
1594  
md_bitmap_start_sync(struct bitmap * bitmap,sector_t offset,sector_t * blocks,int degraded)1595  int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1596  			 int degraded)
1597  {
1598  	/* bitmap_start_sync must always report on multiples of whole
1599  	 * pages, otherwise resync (which is very PAGE_SIZE based) will
1600  	 * get confused.
1601  	 * So call __bitmap_start_sync repeatedly (if needed) until
1602  	 * At least PAGE_SIZE>>9 blocks are covered.
1603  	 * Return the 'or' of the result.
1604  	 */
1605  	int rv = 0;
1606  	sector_t blocks1;
1607  
1608  	*blocks = 0;
1609  	while (*blocks < (PAGE_SIZE>>9)) {
1610  		rv |= __bitmap_start_sync(bitmap, offset,
1611  					  &blocks1, degraded);
1612  		offset += blocks1;
1613  		*blocks += blocks1;
1614  	}
1615  	return rv;
1616  }
1617  EXPORT_SYMBOL(md_bitmap_start_sync);
1618  
md_bitmap_end_sync(struct bitmap * bitmap,sector_t offset,sector_t * blocks,int aborted)1619  void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1620  {
1621  	bitmap_counter_t *bmc;
1622  	unsigned long flags;
1623  
1624  	if (bitmap == NULL) {
1625  		*blocks = 1024;
1626  		return;
1627  	}
1628  	spin_lock_irqsave(&bitmap->counts.lock, flags);
1629  	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1630  	if (bmc == NULL)
1631  		goto unlock;
1632  	/* locked */
1633  	if (RESYNC(*bmc)) {
1634  		*bmc &= ~RESYNC_MASK;
1635  
1636  		if (!NEEDED(*bmc) && aborted)
1637  			*bmc |= NEEDED_MASK;
1638  		else {
1639  			if (*bmc <= 2) {
1640  				md_bitmap_set_pending(&bitmap->counts, offset);
1641  				bitmap->allclean = 0;
1642  			}
1643  		}
1644  	}
1645   unlock:
1646  	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1647  }
1648  EXPORT_SYMBOL(md_bitmap_end_sync);
1649  
md_bitmap_close_sync(struct bitmap * bitmap)1650  void md_bitmap_close_sync(struct bitmap *bitmap)
1651  {
1652  	/* Sync has finished, and any bitmap chunks that weren't synced
1653  	 * properly have been aborted.  It remains to us to clear the
1654  	 * RESYNC bit wherever it is still on
1655  	 */
1656  	sector_t sector = 0;
1657  	sector_t blocks;
1658  	if (!bitmap)
1659  		return;
1660  	while (sector < bitmap->mddev->resync_max_sectors) {
1661  		md_bitmap_end_sync(bitmap, sector, &blocks, 0);
1662  		sector += blocks;
1663  	}
1664  }
1665  EXPORT_SYMBOL(md_bitmap_close_sync);
1666  
md_bitmap_cond_end_sync(struct bitmap * bitmap,sector_t sector,bool force)1667  void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
1668  {
1669  	sector_t s = 0;
1670  	sector_t blocks;
1671  
1672  	if (!bitmap)
1673  		return;
1674  	if (sector == 0) {
1675  		bitmap->last_end_sync = jiffies;
1676  		return;
1677  	}
1678  	if (!force && time_before(jiffies, (bitmap->last_end_sync
1679  				  + bitmap->mddev->bitmap_info.daemon_sleep)))
1680  		return;
1681  	wait_event(bitmap->mddev->recovery_wait,
1682  		   atomic_read(&bitmap->mddev->recovery_active) == 0);
1683  
1684  	bitmap->mddev->curr_resync_completed = sector;
1685  	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
1686  	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1687  	s = 0;
1688  	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1689  		md_bitmap_end_sync(bitmap, s, &blocks, 0);
1690  		s += blocks;
1691  	}
1692  	bitmap->last_end_sync = jiffies;
1693  	sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
1694  }
1695  EXPORT_SYMBOL(md_bitmap_cond_end_sync);
1696  
md_bitmap_sync_with_cluster(struct mddev * mddev,sector_t old_lo,sector_t old_hi,sector_t new_lo,sector_t new_hi)1697  void md_bitmap_sync_with_cluster(struct mddev *mddev,
1698  			      sector_t old_lo, sector_t old_hi,
1699  			      sector_t new_lo, sector_t new_hi)
1700  {
1701  	struct bitmap *bitmap = mddev->bitmap;
1702  	sector_t sector, blocks = 0;
1703  
1704  	for (sector = old_lo; sector < new_lo; ) {
1705  		md_bitmap_end_sync(bitmap, sector, &blocks, 0);
1706  		sector += blocks;
1707  	}
1708  	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
1709  
1710  	for (sector = old_hi; sector < new_hi; ) {
1711  		md_bitmap_start_sync(bitmap, sector, &blocks, 0);
1712  		sector += blocks;
1713  	}
1714  	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
1715  }
1716  EXPORT_SYMBOL(md_bitmap_sync_with_cluster);
1717  
md_bitmap_set_memory_bits(struct bitmap * bitmap,sector_t offset,int needed)1718  static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1719  {
1720  	/* For each chunk covered by any of these sectors, set the
1721  	 * counter to 2 and possibly set resync_needed.  They should all
1722  	 * be 0 at this point
1723  	 */
1724  
1725  	sector_t secs;
1726  	bitmap_counter_t *bmc;
1727  	spin_lock_irq(&bitmap->counts.lock);
1728  	bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1729  	if (!bmc) {
1730  		spin_unlock_irq(&bitmap->counts.lock);
1731  		return;
1732  	}
1733  	if (!*bmc) {
1734  		*bmc = 2;
1735  		md_bitmap_count_page(&bitmap->counts, offset, 1);
1736  		md_bitmap_set_pending(&bitmap->counts, offset);
1737  		bitmap->allclean = 0;
1738  	}
1739  	if (needed)
1740  		*bmc |= NEEDED_MASK;
1741  	spin_unlock_irq(&bitmap->counts.lock);
1742  }
1743  
1744  /* dirty the memory and file bits for bitmap chunks "s" to "e" */
md_bitmap_dirty_bits(struct bitmap * bitmap,unsigned long s,unsigned long e)1745  void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1746  {
1747  	unsigned long chunk;
1748  
1749  	for (chunk = s; chunk <= e; chunk++) {
1750  		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1751  		md_bitmap_set_memory_bits(bitmap, sec, 1);
1752  		md_bitmap_file_set_bit(bitmap, sec);
1753  		if (sec < bitmap->mddev->recovery_cp)
1754  			/* We are asserting that the array is dirty,
1755  			 * so move the recovery_cp address back so
1756  			 * that it is obvious that it is dirty
1757  			 */
1758  			bitmap->mddev->recovery_cp = sec;
1759  	}
1760  }
1761  
1762  /*
1763   * flush out any pending updates
1764   */
md_bitmap_flush(struct mddev * mddev)1765  void md_bitmap_flush(struct mddev *mddev)
1766  {
1767  	struct bitmap *bitmap = mddev->bitmap;
1768  	long sleep;
1769  
1770  	if (!bitmap) /* there was no bitmap */
1771  		return;
1772  
1773  	/* run the daemon_work three time to ensure everything is flushed
1774  	 * that can be
1775  	 */
1776  	sleep = mddev->bitmap_info.daemon_sleep * 2;
1777  	bitmap->daemon_lastrun -= sleep;
1778  	md_bitmap_daemon_work(mddev);
1779  	bitmap->daemon_lastrun -= sleep;
1780  	md_bitmap_daemon_work(mddev);
1781  	bitmap->daemon_lastrun -= sleep;
1782  	md_bitmap_daemon_work(mddev);
1783  	if (mddev->bitmap_info.external)
1784  		md_super_wait(mddev);
1785  	md_bitmap_update_sb(bitmap);
1786  }
1787  
1788  /*
1789   * free memory that was allocated
1790   */
md_bitmap_free(struct bitmap * bitmap)1791  void md_bitmap_free(struct bitmap *bitmap)
1792  {
1793  	unsigned long k, pages;
1794  	struct bitmap_page *bp;
1795  
1796  	if (!bitmap) /* there was no bitmap */
1797  		return;
1798  
1799  	if (bitmap->sysfs_can_clear)
1800  		sysfs_put(bitmap->sysfs_can_clear);
1801  
1802  	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
1803  		bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1804  		md_cluster_stop(bitmap->mddev);
1805  
1806  	/* Shouldn't be needed - but just in case.... */
1807  	wait_event(bitmap->write_wait,
1808  		   atomic_read(&bitmap->pending_writes) == 0);
1809  
1810  	/* release the bitmap file  */
1811  	md_bitmap_file_unmap(&bitmap->storage);
1812  
1813  	bp = bitmap->counts.bp;
1814  	pages = bitmap->counts.pages;
1815  
1816  	/* free all allocated memory */
1817  
1818  	if (bp) /* deallocate the page memory */
1819  		for (k = 0; k < pages; k++)
1820  			if (bp[k].map && !bp[k].hijacked)
1821  				kfree(bp[k].map);
1822  	kfree(bp);
1823  	kfree(bitmap);
1824  }
1825  EXPORT_SYMBOL(md_bitmap_free);
1826  
md_bitmap_start_behind_write(struct mddev * mddev)1827  void md_bitmap_start_behind_write(struct mddev *mddev)
1828  {
1829  	struct bitmap *bitmap = mddev->bitmap;
1830  	int bw;
1831  
1832  	if (!bitmap)
1833  		return;
1834  
1835  	atomic_inc(&bitmap->behind_writes);
1836  	bw = atomic_read(&bitmap->behind_writes);
1837  	if (bw > bitmap->behind_writes_used)
1838  		bitmap->behind_writes_used = bw;
1839  
1840  	pr_debug("inc write-behind count %d/%lu\n",
1841  		 bw, bitmap->mddev->bitmap_info.max_write_behind);
1842  }
1843  EXPORT_SYMBOL_GPL(md_bitmap_start_behind_write);
1844  
md_bitmap_end_behind_write(struct mddev * mddev)1845  void md_bitmap_end_behind_write(struct mddev *mddev)
1846  {
1847  	struct bitmap *bitmap = mddev->bitmap;
1848  
1849  	if (!bitmap)
1850  		return;
1851  
1852  	if (atomic_dec_and_test(&bitmap->behind_writes))
1853  		wake_up(&bitmap->behind_wait);
1854  	pr_debug("dec write-behind count %d/%lu\n",
1855  		 atomic_read(&bitmap->behind_writes),
1856  		 bitmap->mddev->bitmap_info.max_write_behind);
1857  }
1858  EXPORT_SYMBOL_GPL(md_bitmap_end_behind_write);
1859  
md_bitmap_wait_behind_writes(struct mddev * mddev)1860  void md_bitmap_wait_behind_writes(struct mddev *mddev)
1861  {
1862  	struct bitmap *bitmap = mddev->bitmap;
1863  
1864  	/* wait for behind writes to complete */
1865  	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
1866  		pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
1867  			 mdname(mddev));
1868  		/* need to kick something here to make sure I/O goes? */
1869  		wait_event(bitmap->behind_wait,
1870  			   atomic_read(&bitmap->behind_writes) == 0);
1871  	}
1872  }
1873  
md_bitmap_destroy(struct mddev * mddev)1874  void md_bitmap_destroy(struct mddev *mddev)
1875  {
1876  	struct bitmap *bitmap = mddev->bitmap;
1877  
1878  	if (!bitmap) /* there was no bitmap */
1879  		return;
1880  
1881  	md_bitmap_wait_behind_writes(mddev);
1882  	if (!mddev->serialize_policy)
1883  		mddev_destroy_serial_pool(mddev, NULL, true);
1884  
1885  	mutex_lock(&mddev->bitmap_info.mutex);
1886  	spin_lock(&mddev->lock);
1887  	mddev->bitmap = NULL; /* disconnect from the md device */
1888  	spin_unlock(&mddev->lock);
1889  	mutex_unlock(&mddev->bitmap_info.mutex);
1890  	mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
1891  
1892  	md_bitmap_free(bitmap);
1893  }
1894  
1895  /*
1896   * initialize the bitmap structure
1897   * if this returns an error, bitmap_destroy must be called to do clean up
1898   * once mddev->bitmap is set
1899   */
md_bitmap_create(struct mddev * mddev,int slot)1900  struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
1901  {
1902  	struct bitmap *bitmap;
1903  	sector_t blocks = mddev->resync_max_sectors;
1904  	struct file *file = mddev->bitmap_info.file;
1905  	int err;
1906  	struct kernfs_node *bm = NULL;
1907  
1908  	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1909  
1910  	BUG_ON(file && mddev->bitmap_info.offset);
1911  
1912  	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
1913  		pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
1914  			  mdname(mddev));
1915  		return ERR_PTR(-EBUSY);
1916  	}
1917  
1918  	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1919  	if (!bitmap)
1920  		return ERR_PTR(-ENOMEM);
1921  
1922  	spin_lock_init(&bitmap->counts.lock);
1923  	atomic_set(&bitmap->pending_writes, 0);
1924  	init_waitqueue_head(&bitmap->write_wait);
1925  	init_waitqueue_head(&bitmap->overflow_wait);
1926  	init_waitqueue_head(&bitmap->behind_wait);
1927  
1928  	bitmap->mddev = mddev;
1929  	bitmap->cluster_slot = slot;
1930  
1931  	if (mddev->kobj.sd)
1932  		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1933  	if (bm) {
1934  		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1935  		sysfs_put(bm);
1936  	} else
1937  		bitmap->sysfs_can_clear = NULL;
1938  
1939  	bitmap->storage.file = file;
1940  	if (file) {
1941  		get_file(file);
1942  		/* As future accesses to this file will use bmap,
1943  		 * and bypass the page cache, we must sync the file
1944  		 * first.
1945  		 */
1946  		vfs_fsync(file, 1);
1947  	}
1948  	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1949  	if (!mddev->bitmap_info.external) {
1950  		/*
1951  		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1952  		 * instructing us to create a new on-disk bitmap instance.
1953  		 */
1954  		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1955  			err = md_bitmap_new_disk_sb(bitmap);
1956  		else
1957  			err = md_bitmap_read_sb(bitmap);
1958  	} else {
1959  		err = 0;
1960  		if (mddev->bitmap_info.chunksize == 0 ||
1961  		    mddev->bitmap_info.daemon_sleep == 0)
1962  			/* chunksize and time_base need to be
1963  			 * set first. */
1964  			err = -EINVAL;
1965  	}
1966  	if (err)
1967  		goto error;
1968  
1969  	bitmap->daemon_lastrun = jiffies;
1970  	err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1971  	if (err)
1972  		goto error;
1973  
1974  	pr_debug("created bitmap (%lu pages) for device %s\n",
1975  		 bitmap->counts.pages, bmname(bitmap));
1976  
1977  	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1978  	if (err)
1979  		goto error;
1980  
1981  	return bitmap;
1982   error:
1983  	md_bitmap_free(bitmap);
1984  	return ERR_PTR(err);
1985  }
1986  
md_bitmap_load(struct mddev * mddev)1987  int md_bitmap_load(struct mddev *mddev)
1988  {
1989  	int err = 0;
1990  	sector_t start = 0;
1991  	sector_t sector = 0;
1992  	struct bitmap *bitmap = mddev->bitmap;
1993  	struct md_rdev *rdev;
1994  
1995  	if (!bitmap)
1996  		goto out;
1997  
1998  	rdev_for_each(rdev, mddev)
1999  		mddev_create_serial_pool(mddev, rdev, true);
2000  
2001  	if (mddev_is_clustered(mddev))
2002  		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
2003  
2004  	/* Clear out old bitmap info first:  Either there is none, or we
2005  	 * are resuming after someone else has possibly changed things,
2006  	 * so we should forget old cached info.
2007  	 * All chunks should be clean, but some might need_sync.
2008  	 */
2009  	while (sector < mddev->resync_max_sectors) {
2010  		sector_t blocks;
2011  		md_bitmap_start_sync(bitmap, sector, &blocks, 0);
2012  		sector += blocks;
2013  	}
2014  	md_bitmap_close_sync(bitmap);
2015  
2016  	if (mddev->degraded == 0
2017  	    || bitmap->events_cleared == mddev->events)
2018  		/* no need to keep dirty bits to optimise a
2019  		 * re-add of a missing device */
2020  		start = mddev->recovery_cp;
2021  
2022  	mutex_lock(&mddev->bitmap_info.mutex);
2023  	err = md_bitmap_init_from_disk(bitmap, start);
2024  	mutex_unlock(&mddev->bitmap_info.mutex);
2025  
2026  	if (err)
2027  		goto out;
2028  	clear_bit(BITMAP_STALE, &bitmap->flags);
2029  
2030  	/* Kick recovery in case any bits were set */
2031  	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
2032  
2033  	mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
2034  	md_wakeup_thread(mddev->thread);
2035  
2036  	md_bitmap_update_sb(bitmap);
2037  
2038  	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
2039  		err = -EIO;
2040  out:
2041  	return err;
2042  }
2043  EXPORT_SYMBOL_GPL(md_bitmap_load);
2044  
2045  /* caller need to free returned bitmap with md_bitmap_free() */
get_bitmap_from_slot(struct mddev * mddev,int slot)2046  struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
2047  {
2048  	int rv = 0;
2049  	struct bitmap *bitmap;
2050  
2051  	bitmap = md_bitmap_create(mddev, slot);
2052  	if (IS_ERR(bitmap)) {
2053  		rv = PTR_ERR(bitmap);
2054  		return ERR_PTR(rv);
2055  	}
2056  
2057  	rv = md_bitmap_init_from_disk(bitmap, 0);
2058  	if (rv) {
2059  		md_bitmap_free(bitmap);
2060  		return ERR_PTR(rv);
2061  	}
2062  
2063  	return bitmap;
2064  }
2065  EXPORT_SYMBOL(get_bitmap_from_slot);
2066  
2067  /* Loads the bitmap associated with slot and copies the resync information
2068   * to our bitmap
2069   */
md_bitmap_copy_from_slot(struct mddev * mddev,int slot,sector_t * low,sector_t * high,bool clear_bits)2070  int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
2071  		sector_t *low, sector_t *high, bool clear_bits)
2072  {
2073  	int rv = 0, i, j;
2074  	sector_t block, lo = 0, hi = 0;
2075  	struct bitmap_counts *counts;
2076  	struct bitmap *bitmap;
2077  
2078  	bitmap = get_bitmap_from_slot(mddev, slot);
2079  	if (IS_ERR(bitmap)) {
2080  		pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
2081  		return -1;
2082  	}
2083  
2084  	counts = &bitmap->counts;
2085  	for (j = 0; j < counts->chunks; j++) {
2086  		block = (sector_t)j << counts->chunkshift;
2087  		if (md_bitmap_file_test_bit(bitmap, block)) {
2088  			if (!lo)
2089  				lo = block;
2090  			hi = block;
2091  			md_bitmap_file_clear_bit(bitmap, block);
2092  			md_bitmap_set_memory_bits(mddev->bitmap, block, 1);
2093  			md_bitmap_file_set_bit(mddev->bitmap, block);
2094  		}
2095  	}
2096  
2097  	if (clear_bits) {
2098  		md_bitmap_update_sb(bitmap);
2099  		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
2100  		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
2101  		for (i = 0; i < bitmap->storage.file_pages; i++)
2102  			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
2103  				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
2104  		md_bitmap_unplug(bitmap);
2105  	}
2106  	md_bitmap_unplug(mddev->bitmap);
2107  	*low = lo;
2108  	*high = hi;
2109  	md_bitmap_free(bitmap);
2110  
2111  	return rv;
2112  }
2113  EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot);
2114  
md_bitmap_get_stats(struct bitmap * bitmap,struct md_bitmap_stats * stats)2115  int md_bitmap_get_stats(struct bitmap *bitmap, struct md_bitmap_stats *stats)
2116  {
2117  	struct bitmap_counts *counts;
2118  	bitmap_super_t *sb;
2119  
2120  	if (!bitmap)
2121  		return -ENOENT;
2122  	if (bitmap->mddev->bitmap_info.external)
2123  		return -ENOENT;
2124  	if (!bitmap->storage.sb_page) /* no superblock */
2125  		return -EINVAL;
2126  	sb = kmap_local_page(bitmap->storage.sb_page);
2127  	stats->sync_size = le64_to_cpu(sb->sync_size);
2128  	kunmap_local(sb);
2129  
2130  	counts = &bitmap->counts;
2131  	stats->missing_pages = counts->missing_pages;
2132  	stats->pages = counts->pages;
2133  	stats->file = bitmap->storage.file;
2134  
2135  	return 0;
2136  }
2137  EXPORT_SYMBOL_GPL(md_bitmap_get_stats);
2138  
md_bitmap_resize(struct bitmap * bitmap,sector_t blocks,int chunksize,int init)2139  int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
2140  		  int chunksize, int init)
2141  {
2142  	/* If chunk_size is 0, choose an appropriate chunk size.
2143  	 * Then possibly allocate new storage space.
2144  	 * Then quiesce, copy bits, replace bitmap, and re-start
2145  	 *
2146  	 * This function is called both to set up the initial bitmap
2147  	 * and to resize the bitmap while the array is active.
2148  	 * If this happens as a result of the array being resized,
2149  	 * chunksize will be zero, and we need to choose a suitable
2150  	 * chunksize, otherwise we use what we are given.
2151  	 */
2152  	struct bitmap_storage store;
2153  	struct bitmap_counts old_counts;
2154  	unsigned long chunks;
2155  	sector_t block;
2156  	sector_t old_blocks, new_blocks;
2157  	int chunkshift;
2158  	int ret = 0;
2159  	long pages;
2160  	struct bitmap_page *new_bp;
2161  
2162  	if (bitmap->storage.file && !init) {
2163  		pr_info("md: cannot resize file-based bitmap\n");
2164  		return -EINVAL;
2165  	}
2166  
2167  	if (chunksize == 0) {
2168  		/* If there is enough space, leave the chunk size unchanged,
2169  		 * else increase by factor of two until there is enough space.
2170  		 */
2171  		long bytes;
2172  		long space = bitmap->mddev->bitmap_info.space;
2173  
2174  		if (space == 0) {
2175  			/* We don't know how much space there is, so limit
2176  			 * to current size - in sectors.
2177  			 */
2178  			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
2179  			if (!bitmap->mddev->bitmap_info.external)
2180  				bytes += sizeof(bitmap_super_t);
2181  			space = DIV_ROUND_UP(bytes, 512);
2182  			bitmap->mddev->bitmap_info.space = space;
2183  		}
2184  		chunkshift = bitmap->counts.chunkshift;
2185  		chunkshift--;
2186  		do {
2187  			/* 'chunkshift' is shift from block size to chunk size */
2188  			chunkshift++;
2189  			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2190  			bytes = DIV_ROUND_UP(chunks, 8);
2191  			if (!bitmap->mddev->bitmap_info.external)
2192  				bytes += sizeof(bitmap_super_t);
2193  		} while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
2194  			(BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
2195  	} else
2196  		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
2197  
2198  	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
2199  	memset(&store, 0, sizeof(store));
2200  	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
2201  		ret = md_bitmap_storage_alloc(&store, chunks,
2202  					      !bitmap->mddev->bitmap_info.external,
2203  					      mddev_is_clustered(bitmap->mddev)
2204  					      ? bitmap->cluster_slot : 0);
2205  	if (ret) {
2206  		md_bitmap_file_unmap(&store);
2207  		goto err;
2208  	}
2209  
2210  	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2211  
2212  	new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL);
2213  	ret = -ENOMEM;
2214  	if (!new_bp) {
2215  		md_bitmap_file_unmap(&store);
2216  		goto err;
2217  	}
2218  
2219  	if (!init)
2220  		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2221  
2222  	store.file = bitmap->storage.file;
2223  	bitmap->storage.file = NULL;
2224  
2225  	if (store.sb_page && bitmap->storage.sb_page)
2226  		memcpy(page_address(store.sb_page),
2227  		       page_address(bitmap->storage.sb_page),
2228  		       sizeof(bitmap_super_t));
2229  	spin_lock_irq(&bitmap->counts.lock);
2230  	md_bitmap_file_unmap(&bitmap->storage);
2231  	bitmap->storage = store;
2232  
2233  	old_counts = bitmap->counts;
2234  	bitmap->counts.bp = new_bp;
2235  	bitmap->counts.pages = pages;
2236  	bitmap->counts.missing_pages = pages;
2237  	bitmap->counts.chunkshift = chunkshift;
2238  	bitmap->counts.chunks = chunks;
2239  	bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
2240  						     BITMAP_BLOCK_SHIFT);
2241  
2242  	blocks = min(old_counts.chunks << old_counts.chunkshift,
2243  		     chunks << chunkshift);
2244  
2245  	/* For cluster raid, need to pre-allocate bitmap */
2246  	if (mddev_is_clustered(bitmap->mddev)) {
2247  		unsigned long page;
2248  		for (page = 0; page < pages; page++) {
2249  			ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1);
2250  			if (ret) {
2251  				unsigned long k;
2252  
2253  				/* deallocate the page memory */
2254  				for (k = 0; k < page; k++) {
2255  					kfree(new_bp[k].map);
2256  				}
2257  				kfree(new_bp);
2258  
2259  				/* restore some fields from old_counts */
2260  				bitmap->counts.bp = old_counts.bp;
2261  				bitmap->counts.pages = old_counts.pages;
2262  				bitmap->counts.missing_pages = old_counts.pages;
2263  				bitmap->counts.chunkshift = old_counts.chunkshift;
2264  				bitmap->counts.chunks = old_counts.chunks;
2265  				bitmap->mddev->bitmap_info.chunksize =
2266  					1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
2267  				blocks = old_counts.chunks << old_counts.chunkshift;
2268  				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
2269  				break;
2270  			} else
2271  				bitmap->counts.bp[page].count += 1;
2272  		}
2273  	}
2274  
2275  	for (block = 0; block < blocks; ) {
2276  		bitmap_counter_t *bmc_old, *bmc_new;
2277  		int set;
2278  
2279  		bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0);
2280  		set = bmc_old && NEEDED(*bmc_old);
2281  
2282  		if (set) {
2283  			bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2284  			if (bmc_new) {
2285  				if (*bmc_new == 0) {
2286  					/* need to set on-disk bits too. */
2287  					sector_t end = block + new_blocks;
2288  					sector_t start = block >> chunkshift;
2289  
2290  					start <<= chunkshift;
2291  					while (start < end) {
2292  						md_bitmap_file_set_bit(bitmap, block);
2293  						start += 1 << chunkshift;
2294  					}
2295  					*bmc_new = 2;
2296  					md_bitmap_count_page(&bitmap->counts, block, 1);
2297  					md_bitmap_set_pending(&bitmap->counts, block);
2298  				}
2299  				*bmc_new |= NEEDED_MASK;
2300  			}
2301  			if (new_blocks < old_blocks)
2302  				old_blocks = new_blocks;
2303  		}
2304  		block += old_blocks;
2305  	}
2306  
2307  	if (bitmap->counts.bp != old_counts.bp) {
2308  		unsigned long k;
2309  		for (k = 0; k < old_counts.pages; k++)
2310  			if (!old_counts.bp[k].hijacked)
2311  				kfree(old_counts.bp[k].map);
2312  		kfree(old_counts.bp);
2313  	}
2314  
2315  	if (!init) {
2316  		int i;
2317  		while (block < (chunks << chunkshift)) {
2318  			bitmap_counter_t *bmc;
2319  			bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
2320  			if (bmc) {
2321  				/* new space.  It needs to be resynced, so
2322  				 * we set NEEDED_MASK.
2323  				 */
2324  				if (*bmc == 0) {
2325  					*bmc = NEEDED_MASK | 2;
2326  					md_bitmap_count_page(&bitmap->counts, block, 1);
2327  					md_bitmap_set_pending(&bitmap->counts, block);
2328  				}
2329  			}
2330  			block += new_blocks;
2331  		}
2332  		for (i = 0; i < bitmap->storage.file_pages; i++)
2333  			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2334  	}
2335  	spin_unlock_irq(&bitmap->counts.lock);
2336  
2337  	if (!init) {
2338  		md_bitmap_unplug(bitmap);
2339  		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2340  	}
2341  	ret = 0;
2342  err:
2343  	return ret;
2344  }
2345  EXPORT_SYMBOL_GPL(md_bitmap_resize);
2346  
2347  static ssize_t
location_show(struct mddev * mddev,char * page)2348  location_show(struct mddev *mddev, char *page)
2349  {
2350  	ssize_t len;
2351  	if (mddev->bitmap_info.file)
2352  		len = sprintf(page, "file");
2353  	else if (mddev->bitmap_info.offset)
2354  		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2355  	else
2356  		len = sprintf(page, "none");
2357  	len += sprintf(page+len, "\n");
2358  	return len;
2359  }
2360  
2361  static ssize_t
location_store(struct mddev * mddev,const char * buf,size_t len)2362  location_store(struct mddev *mddev, const char *buf, size_t len)
2363  {
2364  	int rv;
2365  
2366  	rv = mddev_lock(mddev);
2367  	if (rv)
2368  		return rv;
2369  	if (mddev->pers) {
2370  		if (!mddev->pers->quiesce) {
2371  			rv = -EBUSY;
2372  			goto out;
2373  		}
2374  		if (mddev->recovery || mddev->sync_thread) {
2375  			rv = -EBUSY;
2376  			goto out;
2377  		}
2378  	}
2379  
2380  	if (mddev->bitmap || mddev->bitmap_info.file ||
2381  	    mddev->bitmap_info.offset) {
2382  		/* bitmap already configured.  Only option is to clear it */
2383  		if (strncmp(buf, "none", 4) != 0) {
2384  			rv = -EBUSY;
2385  			goto out;
2386  		}
2387  		if (mddev->pers) {
2388  			mddev_suspend(mddev);
2389  			md_bitmap_destroy(mddev);
2390  			mddev_resume(mddev);
2391  		}
2392  		mddev->bitmap_info.offset = 0;
2393  		if (mddev->bitmap_info.file) {
2394  			struct file *f = mddev->bitmap_info.file;
2395  			mddev->bitmap_info.file = NULL;
2396  			fput(f);
2397  		}
2398  	} else {
2399  		/* No bitmap, OK to set a location */
2400  		long long offset;
2401  		if (strncmp(buf, "none", 4) == 0)
2402  			/* nothing to be done */;
2403  		else if (strncmp(buf, "file:", 5) == 0) {
2404  			/* Not supported yet */
2405  			rv = -EINVAL;
2406  			goto out;
2407  		} else {
2408  			if (buf[0] == '+')
2409  				rv = kstrtoll(buf+1, 10, &offset);
2410  			else
2411  				rv = kstrtoll(buf, 10, &offset);
2412  			if (rv)
2413  				goto out;
2414  			if (offset == 0) {
2415  				rv = -EINVAL;
2416  				goto out;
2417  			}
2418  			if (mddev->bitmap_info.external == 0 &&
2419  			    mddev->major_version == 0 &&
2420  			    offset != mddev->bitmap_info.default_offset) {
2421  				rv = -EINVAL;
2422  				goto out;
2423  			}
2424  			mddev->bitmap_info.offset = offset;
2425  			if (mddev->pers) {
2426  				struct bitmap *bitmap;
2427  				bitmap = md_bitmap_create(mddev, -1);
2428  				mddev_suspend(mddev);
2429  				if (IS_ERR(bitmap))
2430  					rv = PTR_ERR(bitmap);
2431  				else {
2432  					mddev->bitmap = bitmap;
2433  					rv = md_bitmap_load(mddev);
2434  					if (rv)
2435  						mddev->bitmap_info.offset = 0;
2436  				}
2437  				if (rv) {
2438  					md_bitmap_destroy(mddev);
2439  					mddev_resume(mddev);
2440  					goto out;
2441  				}
2442  				mddev_resume(mddev);
2443  			}
2444  		}
2445  	}
2446  	if (!mddev->external) {
2447  		/* Ensure new bitmap info is stored in
2448  		 * metadata promptly.
2449  		 */
2450  		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2451  		md_wakeup_thread(mddev->thread);
2452  	}
2453  	rv = 0;
2454  out:
2455  	mddev_unlock(mddev);
2456  	if (rv)
2457  		return rv;
2458  	return len;
2459  }
2460  
2461  static struct md_sysfs_entry bitmap_location =
2462  __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2463  
2464  /* 'bitmap/space' is the space available at 'location' for the
2465   * bitmap.  This allows the kernel to know when it is safe to
2466   * resize the bitmap to match a resized array.
2467   */
2468  static ssize_t
space_show(struct mddev * mddev,char * page)2469  space_show(struct mddev *mddev, char *page)
2470  {
2471  	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2472  }
2473  
2474  static ssize_t
space_store(struct mddev * mddev,const char * buf,size_t len)2475  space_store(struct mddev *mddev, const char *buf, size_t len)
2476  {
2477  	unsigned long sectors;
2478  	int rv;
2479  
2480  	rv = kstrtoul(buf, 10, &sectors);
2481  	if (rv)
2482  		return rv;
2483  
2484  	if (sectors == 0)
2485  		return -EINVAL;
2486  
2487  	if (mddev->bitmap &&
2488  	    sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2489  		return -EFBIG; /* Bitmap is too big for this small space */
2490  
2491  	/* could make sure it isn't too big, but that isn't really
2492  	 * needed - user-space should be careful.
2493  	 */
2494  	mddev->bitmap_info.space = sectors;
2495  	return len;
2496  }
2497  
2498  static struct md_sysfs_entry bitmap_space =
2499  __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2500  
2501  static ssize_t
timeout_show(struct mddev * mddev,char * page)2502  timeout_show(struct mddev *mddev, char *page)
2503  {
2504  	ssize_t len;
2505  	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2506  	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2507  
2508  	len = sprintf(page, "%lu", secs);
2509  	if (jifs)
2510  		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2511  	len += sprintf(page+len, "\n");
2512  	return len;
2513  }
2514  
2515  static ssize_t
timeout_store(struct mddev * mddev,const char * buf,size_t len)2516  timeout_store(struct mddev *mddev, const char *buf, size_t len)
2517  {
2518  	/* timeout can be set at any time */
2519  	unsigned long timeout;
2520  	int rv = strict_strtoul_scaled(buf, &timeout, 4);
2521  	if (rv)
2522  		return rv;
2523  
2524  	/* just to make sure we don't overflow... */
2525  	if (timeout >= LONG_MAX / HZ)
2526  		return -EINVAL;
2527  
2528  	timeout = timeout * HZ / 10000;
2529  
2530  	if (timeout >= MAX_SCHEDULE_TIMEOUT)
2531  		timeout = MAX_SCHEDULE_TIMEOUT-1;
2532  	if (timeout < 1)
2533  		timeout = 1;
2534  
2535  	mddev->bitmap_info.daemon_sleep = timeout;
2536  	mddev_set_timeout(mddev, timeout, false);
2537  	md_wakeup_thread(mddev->thread);
2538  
2539  	return len;
2540  }
2541  
2542  static struct md_sysfs_entry bitmap_timeout =
2543  __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2544  
2545  static ssize_t
backlog_show(struct mddev * mddev,char * page)2546  backlog_show(struct mddev *mddev, char *page)
2547  {
2548  	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2549  }
2550  
2551  static ssize_t
backlog_store(struct mddev * mddev,const char * buf,size_t len)2552  backlog_store(struct mddev *mddev, const char *buf, size_t len)
2553  {
2554  	unsigned long backlog;
2555  	unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
2556  	struct md_rdev *rdev;
2557  	bool has_write_mostly = false;
2558  	int rv = kstrtoul(buf, 10, &backlog);
2559  	if (rv)
2560  		return rv;
2561  	if (backlog > COUNTER_MAX)
2562  		return -EINVAL;
2563  
2564  	rv = mddev_lock(mddev);
2565  	if (rv)
2566  		return rv;
2567  
2568  	/*
2569  	 * Without write mostly device, it doesn't make sense to set
2570  	 * backlog for max_write_behind.
2571  	 */
2572  	rdev_for_each(rdev, mddev) {
2573  		if (test_bit(WriteMostly, &rdev->flags)) {
2574  			has_write_mostly = true;
2575  			break;
2576  		}
2577  	}
2578  	if (!has_write_mostly) {
2579  		pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
2580  				    mdname(mddev));
2581  		mddev_unlock(mddev);
2582  		return -EINVAL;
2583  	}
2584  
2585  	mddev->bitmap_info.max_write_behind = backlog;
2586  	if (!backlog && mddev->serial_info_pool) {
2587  		/* serial_info_pool is not needed if backlog is zero */
2588  		if (!mddev->serialize_policy)
2589  			mddev_destroy_serial_pool(mddev, NULL, false);
2590  	} else if (backlog && !mddev->serial_info_pool) {
2591  		/* serial_info_pool is needed since backlog is not zero */
2592  		rdev_for_each(rdev, mddev)
2593  			mddev_create_serial_pool(mddev, rdev, false);
2594  	}
2595  	if (old_mwb != backlog)
2596  		md_bitmap_update_sb(mddev->bitmap);
2597  
2598  	mddev_unlock(mddev);
2599  	return len;
2600  }
2601  
2602  static struct md_sysfs_entry bitmap_backlog =
2603  __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2604  
2605  static ssize_t
chunksize_show(struct mddev * mddev,char * page)2606  chunksize_show(struct mddev *mddev, char *page)
2607  {
2608  	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2609  }
2610  
2611  static ssize_t
chunksize_store(struct mddev * mddev,const char * buf,size_t len)2612  chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2613  {
2614  	/* Can only be changed when no bitmap is active */
2615  	int rv;
2616  	unsigned long csize;
2617  	if (mddev->bitmap)
2618  		return -EBUSY;
2619  	rv = kstrtoul(buf, 10, &csize);
2620  	if (rv)
2621  		return rv;
2622  	if (csize < 512 ||
2623  	    !is_power_of_2(csize))
2624  		return -EINVAL;
2625  	if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
2626  		sizeof(((bitmap_super_t *)0)->chunksize))))
2627  		return -EOVERFLOW;
2628  	mddev->bitmap_info.chunksize = csize;
2629  	return len;
2630  }
2631  
2632  static struct md_sysfs_entry bitmap_chunksize =
2633  __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2634  
metadata_show(struct mddev * mddev,char * page)2635  static ssize_t metadata_show(struct mddev *mddev, char *page)
2636  {
2637  	if (mddev_is_clustered(mddev))
2638  		return sprintf(page, "clustered\n");
2639  	return sprintf(page, "%s\n", (mddev->bitmap_info.external
2640  				      ? "external" : "internal"));
2641  }
2642  
metadata_store(struct mddev * mddev,const char * buf,size_t len)2643  static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2644  {
2645  	if (mddev->bitmap ||
2646  	    mddev->bitmap_info.file ||
2647  	    mddev->bitmap_info.offset)
2648  		return -EBUSY;
2649  	if (strncmp(buf, "external", 8) == 0)
2650  		mddev->bitmap_info.external = 1;
2651  	else if ((strncmp(buf, "internal", 8) == 0) ||
2652  			(strncmp(buf, "clustered", 9) == 0))
2653  		mddev->bitmap_info.external = 0;
2654  	else
2655  		return -EINVAL;
2656  	return len;
2657  }
2658  
2659  static struct md_sysfs_entry bitmap_metadata =
2660  __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2661  
can_clear_show(struct mddev * mddev,char * page)2662  static ssize_t can_clear_show(struct mddev *mddev, char *page)
2663  {
2664  	int len;
2665  	spin_lock(&mddev->lock);
2666  	if (mddev->bitmap)
2667  		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2668  					     "false" : "true"));
2669  	else
2670  		len = sprintf(page, "\n");
2671  	spin_unlock(&mddev->lock);
2672  	return len;
2673  }
2674  
can_clear_store(struct mddev * mddev,const char * buf,size_t len)2675  static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2676  {
2677  	if (mddev->bitmap == NULL)
2678  		return -ENOENT;
2679  	if (strncmp(buf, "false", 5) == 0)
2680  		mddev->bitmap->need_sync = 1;
2681  	else if (strncmp(buf, "true", 4) == 0) {
2682  		if (mddev->degraded)
2683  			return -EBUSY;
2684  		mddev->bitmap->need_sync = 0;
2685  	} else
2686  		return -EINVAL;
2687  	return len;
2688  }
2689  
2690  static struct md_sysfs_entry bitmap_can_clear =
2691  __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2692  
2693  static ssize_t
behind_writes_used_show(struct mddev * mddev,char * page)2694  behind_writes_used_show(struct mddev *mddev, char *page)
2695  {
2696  	ssize_t ret;
2697  	spin_lock(&mddev->lock);
2698  	if (mddev->bitmap == NULL)
2699  		ret = sprintf(page, "0\n");
2700  	else
2701  		ret = sprintf(page, "%lu\n",
2702  			      mddev->bitmap->behind_writes_used);
2703  	spin_unlock(&mddev->lock);
2704  	return ret;
2705  }
2706  
2707  static ssize_t
behind_writes_used_reset(struct mddev * mddev,const char * buf,size_t len)2708  behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2709  {
2710  	if (mddev->bitmap)
2711  		mddev->bitmap->behind_writes_used = 0;
2712  	return len;
2713  }
2714  
2715  static struct md_sysfs_entry max_backlog_used =
2716  __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2717         behind_writes_used_show, behind_writes_used_reset);
2718  
2719  static struct attribute *md_bitmap_attrs[] = {
2720  	&bitmap_location.attr,
2721  	&bitmap_space.attr,
2722  	&bitmap_timeout.attr,
2723  	&bitmap_backlog.attr,
2724  	&bitmap_chunksize.attr,
2725  	&bitmap_metadata.attr,
2726  	&bitmap_can_clear.attr,
2727  	&max_backlog_used.attr,
2728  	NULL
2729  };
2730  const struct attribute_group md_bitmap_group = {
2731  	.name = "bitmap",
2732  	.attrs = md_bitmap_attrs,
2733  };
2734