xref: /openbmc/linux/drivers/mtd/ubi/eba.c (revision 64c70b1c)
1 /*
2  * Copyright (c) International Business Machines Corp., 2006
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12  * the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  *
18  * Author: Artem Bityutskiy (Битюцкий Артём)
19  */
20 
21 /*
22  * The UBI Eraseblock Association (EBA) unit.
23  *
24  * This unit is responsible for I/O to/from logical eraseblock.
25  *
26  * Although in this implementation the EBA table is fully kept and managed in
27  * RAM, which assumes poor scalability, it might be (partially) maintained on
28  * flash in future implementations.
29  *
30  * The EBA unit implements per-logical eraseblock locking. Before accessing a
31  * logical eraseblock it is locked for reading or writing. The per-logical
32  * eraseblock locking is implemented by means of the lock tree. The lock tree
33  * is an RB-tree which refers all the currently locked logical eraseblocks. The
34  * lock tree elements are &struct ltree_entry objects. They are indexed by
35  * (@vol_id, @lnum) pairs.
36  *
37  * EBA also maintains the global sequence counter which is incremented each
38  * time a logical eraseblock is mapped to a physical eraseblock and it is
39  * stored in the volume identifier header. This means that each VID header has
40  * a unique sequence number. The sequence number is only increased an we assume
41  * 64 bits is enough to never overflow.
42  */
43 
44 #include <linux/slab.h>
45 #include <linux/crc32.h>
46 #include <linux/err.h>
47 #include "ubi.h"
48 
49 /**
50  * struct ltree_entry - an entry in the lock tree.
51  * @rb: links RB-tree nodes
52  * @vol_id: volume ID of the locked logical eraseblock
53  * @lnum: locked logical eraseblock number
54  * @users: how many tasks are using this logical eraseblock or wait for it
55  * @mutex: read/write mutex to implement read/write access serialization to
56  * the (@vol_id, @lnum) logical eraseblock
57  *
58  * When a logical eraseblock is being locked - corresponding &struct ltree_entry
59  * object is inserted to the lock tree (@ubi->ltree).
60  */
61 struct ltree_entry {
62 	struct rb_node rb;
63 	int vol_id;
64 	int lnum;
65 	int users;
66 	struct rw_semaphore mutex;
67 };
68 
69 /* Slab cache for lock-tree entries */
70 static struct kmem_cache *ltree_slab;
71 
72 /**
73  * next_sqnum - get next sequence number.
74  * @ubi: UBI device description object
75  *
76  * This function returns next sequence number to use, which is just the current
77  * global sequence counter value. It also increases the global sequence
78  * counter.
79  */
80 static unsigned long long next_sqnum(struct ubi_device *ubi)
81 {
82 	unsigned long long sqnum;
83 
84 	spin_lock(&ubi->ltree_lock);
85 	sqnum = ubi->global_sqnum++;
86 	spin_unlock(&ubi->ltree_lock);
87 
88 	return sqnum;
89 }
90 
91 /**
92  * ubi_get_compat - get compatibility flags of a volume.
93  * @ubi: UBI device description object
94  * @vol_id: volume ID
95  *
96  * This function returns compatibility flags for an internal volume. User
97  * volumes have no compatibility flags, so %0 is returned.
98  */
99 static int ubi_get_compat(const struct ubi_device *ubi, int vol_id)
100 {
101 	if (vol_id == UBI_LAYOUT_VOL_ID)
102 		return UBI_LAYOUT_VOLUME_COMPAT;
103 	return 0;
104 }
105 
106 /**
107  * ltree_lookup - look up the lock tree.
108  * @ubi: UBI device description object
109  * @vol_id: volume ID
110  * @lnum: logical eraseblock number
111  *
112  * This function returns a pointer to the corresponding &struct ltree_entry
113  * object if the logical eraseblock is locked and %NULL if it is not.
114  * @ubi->ltree_lock has to be locked.
115  */
116 static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id,
117 					int lnum)
118 {
119 	struct rb_node *p;
120 
121 	p = ubi->ltree.rb_node;
122 	while (p) {
123 		struct ltree_entry *le;
124 
125 		le = rb_entry(p, struct ltree_entry, rb);
126 
127 		if (vol_id < le->vol_id)
128 			p = p->rb_left;
129 		else if (vol_id > le->vol_id)
130 			p = p->rb_right;
131 		else {
132 			if (lnum < le->lnum)
133 				p = p->rb_left;
134 			else if (lnum > le->lnum)
135 				p = p->rb_right;
136 			else
137 				return le;
138 		}
139 	}
140 
141 	return NULL;
142 }
143 
144 /**
145  * ltree_add_entry - add new entry to the lock tree.
146  * @ubi: UBI device description object
147  * @vol_id: volume ID
148  * @lnum: logical eraseblock number
149  *
150  * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the
151  * lock tree. If such entry is already there, its usage counter is increased.
152  * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation
153  * failed.
154  */
155 static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id,
156 					   int lnum)
157 {
158 	struct ltree_entry *le, *le1, *le_free;
159 
160 	le = kmem_cache_alloc(ltree_slab, GFP_KERNEL);
161 	if (!le)
162 		return ERR_PTR(-ENOMEM);
163 
164 	le->vol_id = vol_id;
165 	le->lnum = lnum;
166 
167 	spin_lock(&ubi->ltree_lock);
168 	le1 = ltree_lookup(ubi, vol_id, lnum);
169 
170 	if (le1) {
171 		/*
172 		 * This logical eraseblock is already locked. The newly
173 		 * allocated lock entry is not needed.
174 		 */
175 		le_free = le;
176 		le = le1;
177 	} else {
178 		struct rb_node **p, *parent = NULL;
179 
180 		/*
181 		 * No lock entry, add the newly allocated one to the
182 		 * @ubi->ltree RB-tree.
183 		 */
184 		le_free = NULL;
185 
186 		p = &ubi->ltree.rb_node;
187 		while (*p) {
188 			parent = *p;
189 			le1 = rb_entry(parent, struct ltree_entry, rb);
190 
191 			if (vol_id < le1->vol_id)
192 				p = &(*p)->rb_left;
193 			else if (vol_id > le1->vol_id)
194 				p = &(*p)->rb_right;
195 			else {
196 				ubi_assert(lnum != le1->lnum);
197 				if (lnum < le1->lnum)
198 					p = &(*p)->rb_left;
199 				else
200 					p = &(*p)->rb_right;
201 			}
202 		}
203 
204 		rb_link_node(&le->rb, parent, p);
205 		rb_insert_color(&le->rb, &ubi->ltree);
206 	}
207 	le->users += 1;
208 	spin_unlock(&ubi->ltree_lock);
209 
210 	if (le_free)
211 		kmem_cache_free(ltree_slab, le_free);
212 
213 	return le;
214 }
215 
216 /**
217  * leb_read_lock - lock logical eraseblock for reading.
218  * @ubi: UBI device description object
219  * @vol_id: volume ID
220  * @lnum: logical eraseblock number
221  *
222  * This function locks a logical eraseblock for reading. Returns zero in case
223  * of success and a negative error code in case of failure.
224  */
225 static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
226 {
227 	struct ltree_entry *le;
228 
229 	le = ltree_add_entry(ubi, vol_id, lnum);
230 	if (IS_ERR(le))
231 		return PTR_ERR(le);
232 	down_read(&le->mutex);
233 	return 0;
234 }
235 
236 /**
237  * leb_read_unlock - unlock logical eraseblock.
238  * @ubi: UBI device description object
239  * @vol_id: volume ID
240  * @lnum: logical eraseblock number
241  */
242 static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
243 {
244 	int free = 0;
245 	struct ltree_entry *le;
246 
247 	spin_lock(&ubi->ltree_lock);
248 	le = ltree_lookup(ubi, vol_id, lnum);
249 	le->users -= 1;
250 	ubi_assert(le->users >= 0);
251 	if (le->users == 0) {
252 		rb_erase(&le->rb, &ubi->ltree);
253 		free = 1;
254 	}
255 	spin_unlock(&ubi->ltree_lock);
256 
257 	up_read(&le->mutex);
258 	if (free)
259 		kmem_cache_free(ltree_slab, le);
260 }
261 
262 /**
263  * leb_write_lock - lock logical eraseblock for writing.
264  * @ubi: UBI device description object
265  * @vol_id: volume ID
266  * @lnum: logical eraseblock number
267  *
268  * This function locks a logical eraseblock for writing. Returns zero in case
269  * of success and a negative error code in case of failure.
270  */
271 static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
272 {
273 	struct ltree_entry *le;
274 
275 	le = ltree_add_entry(ubi, vol_id, lnum);
276 	if (IS_ERR(le))
277 		return PTR_ERR(le);
278 	down_write(&le->mutex);
279 	return 0;
280 }
281 
282 /**
283  * leb_write_unlock - unlock logical eraseblock.
284  * @ubi: UBI device description object
285  * @vol_id: volume ID
286  * @lnum: logical eraseblock number
287  */
288 static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
289 {
290 	int free;
291 	struct ltree_entry *le;
292 
293 	spin_lock(&ubi->ltree_lock);
294 	le = ltree_lookup(ubi, vol_id, lnum);
295 	le->users -= 1;
296 	ubi_assert(le->users >= 0);
297 	if (le->users == 0) {
298 		rb_erase(&le->rb, &ubi->ltree);
299 		free = 1;
300 	} else
301 		free = 0;
302 	spin_unlock(&ubi->ltree_lock);
303 
304 	up_write(&le->mutex);
305 	if (free)
306 		kmem_cache_free(ltree_slab, le);
307 }
308 
309 /**
310  * ubi_eba_unmap_leb - un-map logical eraseblock.
311  * @ubi: UBI device description object
312  * @vol_id: volume ID
313  * @lnum: logical eraseblock number
314  *
315  * This function un-maps logical eraseblock @lnum and schedules corresponding
316  * physical eraseblock for erasure. Returns zero in case of success and a
317  * negative error code in case of failure.
318  */
319 int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum)
320 {
321 	int idx = vol_id2idx(ubi, vol_id), err, pnum;
322 	struct ubi_volume *vol = ubi->volumes[idx];
323 
324 	if (ubi->ro_mode)
325 		return -EROFS;
326 
327 	err = leb_write_lock(ubi, vol_id, lnum);
328 	if (err)
329 		return err;
330 
331 	pnum = vol->eba_tbl[lnum];
332 	if (pnum < 0)
333 		/* This logical eraseblock is already unmapped */
334 		goto out_unlock;
335 
336 	dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum);
337 
338 	vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED;
339 	err = ubi_wl_put_peb(ubi, pnum, 0);
340 
341 out_unlock:
342 	leb_write_unlock(ubi, vol_id, lnum);
343 	return err;
344 }
345 
346 /**
347  * ubi_eba_read_leb - read data.
348  * @ubi: UBI device description object
349  * @vol_id: volume ID
350  * @lnum: logical eraseblock number
351  * @buf: buffer to store the read data
352  * @offset: offset from where to read
353  * @len: how many bytes to read
354  * @check: data CRC check flag
355  *
356  * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF
357  * bytes. The @check flag only makes sense for static volumes and forces
358  * eraseblock data CRC checking.
359  *
360  * In case of success this function returns zero. In case of a static volume,
361  * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be
362  * returned for any volume type if an ECC error was detected by the MTD device
363  * driver. Other negative error cored may be returned in case of other errors.
364  */
365 int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
366 		     int offset, int len, int check)
367 {
368 	int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id);
369 	struct ubi_vid_hdr *vid_hdr;
370 	struct ubi_volume *vol = ubi->volumes[idx];
371 	uint32_t crc, crc1;
372 
373 	err = leb_read_lock(ubi, vol_id, lnum);
374 	if (err)
375 		return err;
376 
377 	pnum = vol->eba_tbl[lnum];
378 	if (pnum < 0) {
379 		/*
380 		 * The logical eraseblock is not mapped, fill the whole buffer
381 		 * with 0xFF bytes. The exception is static volumes for which
382 		 * it is an error to read unmapped logical eraseblocks.
383 		 */
384 		dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)",
385 			len, offset, vol_id, lnum);
386 		leb_read_unlock(ubi, vol_id, lnum);
387 		ubi_assert(vol->vol_type != UBI_STATIC_VOLUME);
388 		memset(buf, 0xFF, len);
389 		return 0;
390 	}
391 
392 	dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d",
393 		len, offset, vol_id, lnum, pnum);
394 
395 	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
396 		check = 0;
397 
398 retry:
399 	if (check) {
400 		vid_hdr = ubi_zalloc_vid_hdr(ubi);
401 		if (!vid_hdr) {
402 			err = -ENOMEM;
403 			goto out_unlock;
404 		}
405 
406 		err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
407 		if (err && err != UBI_IO_BITFLIPS) {
408 			if (err > 0) {
409 				/*
410 				 * The header is either absent or corrupted.
411 				 * The former case means there is a bug -
412 				 * switch to read-only mode just in case.
413 				 * The latter case means a real corruption - we
414 				 * may try to recover data. FIXME: but this is
415 				 * not implemented.
416 				 */
417 				if (err == UBI_IO_BAD_VID_HDR) {
418 					ubi_warn("bad VID header at PEB %d, LEB"
419 						 "%d:%d", pnum, vol_id, lnum);
420 					err = -EBADMSG;
421 				} else
422 					ubi_ro_mode(ubi);
423 			}
424 			goto out_free;
425 		} else if (err == UBI_IO_BITFLIPS)
426 			scrub = 1;
427 
428 		ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs));
429 		ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size));
430 
431 		crc = ubi32_to_cpu(vid_hdr->data_crc);
432 		ubi_free_vid_hdr(ubi, vid_hdr);
433 	}
434 
435 	err = ubi_io_read_data(ubi, buf, pnum, offset, len);
436 	if (err) {
437 		if (err == UBI_IO_BITFLIPS) {
438 			scrub = 1;
439 			err = 0;
440 		} else if (err == -EBADMSG) {
441 			if (vol->vol_type == UBI_DYNAMIC_VOLUME)
442 				goto out_unlock;
443 			scrub = 1;
444 			if (!check) {
445 				ubi_msg("force data checking");
446 				check = 1;
447 				goto retry;
448 			}
449 		} else
450 			goto out_unlock;
451 	}
452 
453 	if (check) {
454 		crc1 = crc32(UBI_CRC32_INIT, buf, len);
455 		if (crc1 != crc) {
456 			ubi_warn("CRC error: calculated %#08x, must be %#08x",
457 				 crc1, crc);
458 			err = -EBADMSG;
459 			goto out_unlock;
460 		}
461 	}
462 
463 	if (scrub)
464 		err = ubi_wl_scrub_peb(ubi, pnum);
465 
466 	leb_read_unlock(ubi, vol_id, lnum);
467 	return err;
468 
469 out_free:
470 	ubi_free_vid_hdr(ubi, vid_hdr);
471 out_unlock:
472 	leb_read_unlock(ubi, vol_id, lnum);
473 	return err;
474 }
475 
476 /**
477  * recover_peb - recover from write failure.
478  * @ubi: UBI device description object
479  * @pnum: the physical eraseblock to recover
480  * @vol_id: volume ID
481  * @lnum: logical eraseblock number
482  * @buf: data which was not written because of the write failure
483  * @offset: offset of the failed write
484  * @len: how many bytes should have been written
485  *
486  * This function is called in case of a write failure and moves all good data
487  * from the potentially bad physical eraseblock to a good physical eraseblock.
488  * This function also writes the data which was not written due to the failure.
489  * Returns new physical eraseblock number in case of success, and a negative
490  * error code in case of failure.
491  */
492 static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
493 		       const void *buf, int offset, int len)
494 {
495 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
496 	struct ubi_volume *vol = ubi->volumes[idx];
497 	struct ubi_vid_hdr *vid_hdr;
498 	unsigned char *new_buf;
499 
500 	vid_hdr = ubi_zalloc_vid_hdr(ubi);
501 	if (!vid_hdr) {
502 		return -ENOMEM;
503 	}
504 
505 retry:
506 	new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
507 	if (new_pnum < 0) {
508 		ubi_free_vid_hdr(ubi, vid_hdr);
509 		return new_pnum;
510 	}
511 
512 	ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum);
513 
514 	err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
515 	if (err && err != UBI_IO_BITFLIPS) {
516 		if (err > 0)
517 			err = -EIO;
518 		goto out_put;
519 	}
520 
521 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
522 	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
523 	if (err)
524 		goto write_error;
525 
526 	data_size = offset + len;
527 	new_buf = kmalloc(data_size, GFP_KERNEL);
528 	if (!new_buf) {
529 		err = -ENOMEM;
530 		goto out_put;
531 	}
532 	memset(new_buf + offset, 0xFF, len);
533 
534 	/* Read everything before the area where the write failure happened */
535 	if (offset > 0) {
536 		err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset);
537 		if (err && err != UBI_IO_BITFLIPS) {
538 			kfree(new_buf);
539 			goto out_put;
540 		}
541 	}
542 
543 	memcpy(new_buf + offset, buf, len);
544 
545 	err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size);
546 	if (err) {
547 		kfree(new_buf);
548 		goto write_error;
549 	}
550 
551 	kfree(new_buf);
552 	ubi_free_vid_hdr(ubi, vid_hdr);
553 
554 	vol->eba_tbl[lnum] = new_pnum;
555 	ubi_wl_put_peb(ubi, pnum, 1);
556 
557 	ubi_msg("data was successfully recovered");
558 	return 0;
559 
560 out_put:
561 	ubi_wl_put_peb(ubi, new_pnum, 1);
562 	ubi_free_vid_hdr(ubi, vid_hdr);
563 	return err;
564 
565 write_error:
566 	/*
567 	 * Bad luck? This physical eraseblock is bad too? Crud. Let's try to
568 	 * get another one.
569 	 */
570 	ubi_warn("failed to write to PEB %d", new_pnum);
571 	ubi_wl_put_peb(ubi, new_pnum, 1);
572 	if (++tries > UBI_IO_RETRIES) {
573 		ubi_free_vid_hdr(ubi, vid_hdr);
574 		return err;
575 	}
576 	ubi_msg("try again");
577 	goto retry;
578 }
579 
580 /**
581  * ubi_eba_write_leb - write data to dynamic volume.
582  * @ubi: UBI device description object
583  * @vol_id: volume ID
584  * @lnum: logical eraseblock number
585  * @buf: the data to write
586  * @offset: offset within the logical eraseblock where to write
587  * @len: how many bytes to write
588  * @dtype: data type
589  *
590  * This function writes data to logical eraseblock @lnum of a dynamic volume
591  * @vol_id. Returns zero in case of success and a negative error code in case
592  * of failure. In case of error, it is possible that something was still
593  * written to the flash media, but may be some garbage.
594  */
595 int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
596 		      const void *buf, int offset, int len, int dtype)
597 {
598 	int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0;
599 	struct ubi_volume *vol = ubi->volumes[idx];
600 	struct ubi_vid_hdr *vid_hdr;
601 
602 	if (ubi->ro_mode)
603 		return -EROFS;
604 
605 	err = leb_write_lock(ubi, vol_id, lnum);
606 	if (err)
607 		return err;
608 
609 	pnum = vol->eba_tbl[lnum];
610 	if (pnum >= 0) {
611 		dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
612 			len, offset, vol_id, lnum, pnum);
613 
614 		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
615 		if (err) {
616 			ubi_warn("failed to write data to PEB %d", pnum);
617 			if (err == -EIO && ubi->bad_allowed)
618 				err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len);
619 			if (err)
620 				ubi_ro_mode(ubi);
621 		}
622 		leb_write_unlock(ubi, vol_id, lnum);
623 		return err;
624 	}
625 
626 	/*
627 	 * The logical eraseblock is not mapped. We have to get a free physical
628 	 * eraseblock and write the volume identifier header there first.
629 	 */
630 	vid_hdr = ubi_zalloc_vid_hdr(ubi);
631 	if (!vid_hdr) {
632 		leb_write_unlock(ubi, vol_id, lnum);
633 		return -ENOMEM;
634 	}
635 
636 	vid_hdr->vol_type = UBI_VID_DYNAMIC;
637 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
638 	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
639 	vid_hdr->lnum = cpu_to_ubi32(lnum);
640 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
641 	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
642 
643 retry:
644 	pnum = ubi_wl_get_peb(ubi, dtype);
645 	if (pnum < 0) {
646 		ubi_free_vid_hdr(ubi, vid_hdr);
647 		leb_write_unlock(ubi, vol_id, lnum);
648 		return pnum;
649 	}
650 
651 	dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d",
652 		len, offset, vol_id, lnum, pnum);
653 
654 	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
655 	if (err) {
656 		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
657 			 vol_id, lnum, pnum);
658 		goto write_error;
659 	}
660 
661 	err = ubi_io_write_data(ubi, buf, pnum, offset, len);
662 	if (err) {
663 		ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, "
664 			 "PEB %d", len, offset, vol_id, lnum, pnum);
665 		goto write_error;
666 	}
667 
668 	vol->eba_tbl[lnum] = pnum;
669 
670 	leb_write_unlock(ubi, vol_id, lnum);
671 	ubi_free_vid_hdr(ubi, vid_hdr);
672 	return 0;
673 
674 write_error:
675 	if (err != -EIO || !ubi->bad_allowed) {
676 		ubi_ro_mode(ubi);
677 		leb_write_unlock(ubi, vol_id, lnum);
678 		ubi_free_vid_hdr(ubi, vid_hdr);
679 		return err;
680 	}
681 
682 	/*
683 	 * Fortunately, this is the first write operation to this physical
684 	 * eraseblock, so just put it and request a new one. We assume that if
685 	 * this physical eraseblock went bad, the erase code will handle that.
686 	 */
687 	err = ubi_wl_put_peb(ubi, pnum, 1);
688 	if (err || ++tries > UBI_IO_RETRIES) {
689 		ubi_ro_mode(ubi);
690 		leb_write_unlock(ubi, vol_id, lnum);
691 		ubi_free_vid_hdr(ubi, vid_hdr);
692 		return err;
693 	}
694 
695 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
696 	ubi_msg("try another PEB");
697 	goto retry;
698 }
699 
700 /**
701  * ubi_eba_write_leb_st - write data to static volume.
702  * @ubi: UBI device description object
703  * @vol_id: volume ID
704  * @lnum: logical eraseblock number
705  * @buf: data to write
706  * @len: how many bytes to write
707  * @dtype: data type
708  * @used_ebs: how many logical eraseblocks will this volume contain
709  *
710  * This function writes data to logical eraseblock @lnum of static volume
711  * @vol_id. The @used_ebs argument should contain total number of logical
712  * eraseblock in this static volume.
713  *
714  * When writing to the last logical eraseblock, the @len argument doesn't have
715  * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent
716  * to the real data size, although the @buf buffer has to contain the
717  * alignment. In all other cases, @len has to be aligned.
718  *
719  * It is prohibited to write more then once to logical eraseblocks of static
720  * volumes. This function returns zero in case of success and a negative error
721  * code in case of failure.
722  */
723 int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
724 			 const void *buf, int len, int dtype, int used_ebs)
725 {
726 	int err, pnum, tries = 0, data_size = len;
727 	int idx = vol_id2idx(ubi, vol_id);
728 	struct ubi_volume *vol = ubi->volumes[idx];
729 	struct ubi_vid_hdr *vid_hdr;
730 	uint32_t crc;
731 
732 	if (ubi->ro_mode)
733 		return -EROFS;
734 
735 	if (lnum == used_ebs - 1)
736 		/* If this is the last LEB @len may be unaligned */
737 		len = ALIGN(data_size, ubi->min_io_size);
738 	else
739 		ubi_assert(len % ubi->min_io_size == 0);
740 
741 	vid_hdr = ubi_zalloc_vid_hdr(ubi);
742 	if (!vid_hdr)
743 		return -ENOMEM;
744 
745 	err = leb_write_lock(ubi, vol_id, lnum);
746 	if (err) {
747 		ubi_free_vid_hdr(ubi, vid_hdr);
748 		return err;
749 	}
750 
751 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
752 	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
753 	vid_hdr->lnum = cpu_to_ubi32(lnum);
754 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
755 	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
756 
757 	crc = crc32(UBI_CRC32_INIT, buf, data_size);
758 	vid_hdr->vol_type = UBI_VID_STATIC;
759 	vid_hdr->data_size = cpu_to_ubi32(data_size);
760 	vid_hdr->used_ebs = cpu_to_ubi32(used_ebs);
761 	vid_hdr->data_crc = cpu_to_ubi32(crc);
762 
763 retry:
764 	pnum = ubi_wl_get_peb(ubi, dtype);
765 	if (pnum < 0) {
766 		ubi_free_vid_hdr(ubi, vid_hdr);
767 		leb_write_unlock(ubi, vol_id, lnum);
768 		return pnum;
769 	}
770 
771 	dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d",
772 		len, vol_id, lnum, pnum, used_ebs);
773 
774 	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
775 	if (err) {
776 		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
777 			 vol_id, lnum, pnum);
778 		goto write_error;
779 	}
780 
781 	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
782 	if (err) {
783 		ubi_warn("failed to write %d bytes of data to PEB %d",
784 			 len, pnum);
785 		goto write_error;
786 	}
787 
788 	ubi_assert(vol->eba_tbl[lnum] < 0);
789 	vol->eba_tbl[lnum] = pnum;
790 
791 	leb_write_unlock(ubi, vol_id, lnum);
792 	ubi_free_vid_hdr(ubi, vid_hdr);
793 	return 0;
794 
795 write_error:
796 	if (err != -EIO || !ubi->bad_allowed) {
797 		/*
798 		 * This flash device does not admit of bad eraseblocks or
799 		 * something nasty and unexpected happened. Switch to read-only
800 		 * mode just in case.
801 		 */
802 		ubi_ro_mode(ubi);
803 		leb_write_unlock(ubi, vol_id, lnum);
804 		ubi_free_vid_hdr(ubi, vid_hdr);
805 		return err;
806 	}
807 
808 	err = ubi_wl_put_peb(ubi, pnum, 1);
809 	if (err || ++tries > UBI_IO_RETRIES) {
810 		ubi_ro_mode(ubi);
811 		leb_write_unlock(ubi, vol_id, lnum);
812 		ubi_free_vid_hdr(ubi, vid_hdr);
813 		return err;
814 	}
815 
816 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
817 	ubi_msg("try another PEB");
818 	goto retry;
819 }
820 
821 /*
822  * ubi_eba_atomic_leb_change - change logical eraseblock atomically.
823  * @ubi: UBI device description object
824  * @vol_id: volume ID
825  * @lnum: logical eraseblock number
826  * @buf: data to write
827  * @len: how many bytes to write
828  * @dtype: data type
829  *
830  * This function changes the contents of a logical eraseblock atomically. @buf
831  * has to contain new logical eraseblock data, and @len - the length of the
832  * data, which has to be aligned. This function guarantees that in case of an
833  * unclean reboot the old contents is preserved. Returns zero in case of
834  * success and a negative error code in case of failure.
835  */
836 int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
837 			      const void *buf, int len, int dtype)
838 {
839 	int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id);
840 	struct ubi_volume *vol = ubi->volumes[idx];
841 	struct ubi_vid_hdr *vid_hdr;
842 	uint32_t crc;
843 
844 	if (ubi->ro_mode)
845 		return -EROFS;
846 
847 	vid_hdr = ubi_zalloc_vid_hdr(ubi);
848 	if (!vid_hdr)
849 		return -ENOMEM;
850 
851 	err = leb_write_lock(ubi, vol_id, lnum);
852 	if (err) {
853 		ubi_free_vid_hdr(ubi, vid_hdr);
854 		return err;
855 	}
856 
857 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
858 	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
859 	vid_hdr->lnum = cpu_to_ubi32(lnum);
860 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
861 	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
862 
863 	crc = crc32(UBI_CRC32_INIT, buf, len);
864 	vid_hdr->vol_type = UBI_VID_STATIC;
865 	vid_hdr->data_size = cpu_to_ubi32(len);
866 	vid_hdr->copy_flag = 1;
867 	vid_hdr->data_crc = cpu_to_ubi32(crc);
868 
869 retry:
870 	pnum = ubi_wl_get_peb(ubi, dtype);
871 	if (pnum < 0) {
872 		ubi_free_vid_hdr(ubi, vid_hdr);
873 		leb_write_unlock(ubi, vol_id, lnum);
874 		return pnum;
875 	}
876 
877 	dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d",
878 		vol_id, lnum, vol->eba_tbl[lnum], pnum);
879 
880 	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
881 	if (err) {
882 		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
883 			 vol_id, lnum, pnum);
884 		goto write_error;
885 	}
886 
887 	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
888 	if (err) {
889 		ubi_warn("failed to write %d bytes of data to PEB %d",
890 			 len, pnum);
891 		goto write_error;
892 	}
893 
894 	err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
895 	if (err) {
896 		ubi_free_vid_hdr(ubi, vid_hdr);
897 		leb_write_unlock(ubi, vol_id, lnum);
898 		return err;
899 	}
900 
901 	vol->eba_tbl[lnum] = pnum;
902 	leb_write_unlock(ubi, vol_id, lnum);
903 	ubi_free_vid_hdr(ubi, vid_hdr);
904 	return 0;
905 
906 write_error:
907 	if (err != -EIO || !ubi->bad_allowed) {
908 		/*
909 		 * This flash device does not admit of bad eraseblocks or
910 		 * something nasty and unexpected happened. Switch to read-only
911 		 * mode just in case.
912 		 */
913 		ubi_ro_mode(ubi);
914 		leb_write_unlock(ubi, vol_id, lnum);
915 		ubi_free_vid_hdr(ubi, vid_hdr);
916 		return err;
917 	}
918 
919 	err = ubi_wl_put_peb(ubi, pnum, 1);
920 	if (err || ++tries > UBI_IO_RETRIES) {
921 		ubi_ro_mode(ubi);
922 		leb_write_unlock(ubi, vol_id, lnum);
923 		ubi_free_vid_hdr(ubi, vid_hdr);
924 		return err;
925 	}
926 
927 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
928 	ubi_msg("try another PEB");
929 	goto retry;
930 }
931 
932 /**
933  * ltree_entry_ctor - lock tree entries slab cache constructor.
934  * @obj: the lock-tree entry to construct
935  * @cache: the lock tree entry slab cache
936  * @flags: constructor flags
937  */
938 static void ltree_entry_ctor(void *obj, struct kmem_cache *cache,
939 			     unsigned long flags)
940 {
941 	struct ltree_entry *le = obj;
942 
943 	le->users = 0;
944 	init_rwsem(&le->mutex);
945 }
946 
947 /**
948  * ubi_eba_copy_leb - copy logical eraseblock.
949  * @ubi: UBI device description object
950  * @from: physical eraseblock number from where to copy
951  * @to: physical eraseblock number where to copy
952  * @vid_hdr: VID header of the @from physical eraseblock
953  *
954  * This function copies logical eraseblock from physical eraseblock @from to
955  * physical eraseblock @to. The @vid_hdr buffer may be changed by this
956  * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation
957  * was canceled because bit-flips were detected at the target PEB, and a
958  * negative error code in case of failure.
959  */
960 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
961 		     struct ubi_vid_hdr *vid_hdr)
962 {
963 	int err, vol_id, lnum, data_size, aldata_size, pnum, idx;
964 	struct ubi_volume *vol;
965 	uint32_t crc;
966 	void *buf, *buf1 = NULL;
967 
968 	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
969 	lnum = ubi32_to_cpu(vid_hdr->lnum);
970 
971 	dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
972 
973 	if (vid_hdr->vol_type == UBI_VID_STATIC) {
974 		data_size = ubi32_to_cpu(vid_hdr->data_size);
975 		aldata_size = ALIGN(data_size, ubi->min_io_size);
976 	} else
977 		data_size = aldata_size =
978 			    ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
979 
980 	buf = kmalloc(aldata_size, GFP_KERNEL);
981 	if (!buf)
982 		return -ENOMEM;
983 
984 	/*
985 	 * We do not want anybody to write to this logical eraseblock while we
986 	 * are moving it, so we lock it.
987 	 */
988 	err = leb_write_lock(ubi, vol_id, lnum);
989 	if (err) {
990 		kfree(buf);
991 		return err;
992 	}
993 
994 	/*
995 	 * But the logical eraseblock might have been put by this time.
996 	 * Cancel if it is true.
997 	 */
998 	idx = vol_id2idx(ubi, vol_id);
999 
1000 	/*
1001 	 * We may race with volume deletion/re-size, so we have to hold
1002 	 * @ubi->volumes_lock.
1003 	 */
1004 	spin_lock(&ubi->volumes_lock);
1005 	vol = ubi->volumes[idx];
1006 	if (!vol) {
1007 		dbg_eba("volume %d was removed meanwhile", vol_id);
1008 		spin_unlock(&ubi->volumes_lock);
1009 		goto out_unlock;
1010 	}
1011 
1012 	pnum = vol->eba_tbl[lnum];
1013 	if (pnum != from) {
1014 		dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
1015 			"PEB %d, cancel", vol_id, lnum, from, pnum);
1016 		spin_unlock(&ubi->volumes_lock);
1017 		goto out_unlock;
1018 	}
1019 	spin_unlock(&ubi->volumes_lock);
1020 
1021 	/* OK, now the LEB is locked and we can safely start moving it */
1022 
1023 	dbg_eba("read %d bytes of data", aldata_size);
1024 	err = ubi_io_read_data(ubi, buf, from, 0, aldata_size);
1025 	if (err && err != UBI_IO_BITFLIPS) {
1026 		ubi_warn("error %d while reading data from PEB %d",
1027 			 err, from);
1028 		goto out_unlock;
1029 	}
1030 
1031 	/*
1032 	 * Now we have got to calculate how much data we have to to copy. In
1033 	 * case of a static volume it is fairly easy - the VID header contains
1034 	 * the data size. In case of a dynamic volume it is more difficult - we
1035 	 * have to read the contents, cut 0xFF bytes from the end and copy only
1036 	 * the first part. We must do this to avoid writing 0xFF bytes as it
1037 	 * may have some side-effects. And not only this. It is important not
1038 	 * to include those 0xFFs to CRC because later the they may be filled
1039 	 * by data.
1040 	 */
1041 	if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
1042 		aldata_size = data_size =
1043 				ubi_calc_data_len(ubi, buf, data_size);
1044 
1045 	cond_resched();
1046 	crc = crc32(UBI_CRC32_INIT, buf, data_size);
1047 	cond_resched();
1048 
1049 	/*
1050 	 * It may turn out to me that the whole @from physical eraseblock
1051 	 * contains only 0xFF bytes. Then we have to only write the VID header
1052 	 * and do not write any data. This also means we should not set
1053 	 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
1054 	 */
1055 	if (data_size > 0) {
1056 		vid_hdr->copy_flag = 1;
1057 		vid_hdr->data_size = cpu_to_ubi32(data_size);
1058 		vid_hdr->data_crc = cpu_to_ubi32(crc);
1059 	}
1060 	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
1061 
1062 	err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
1063 	if (err)
1064 		goto out_unlock;
1065 
1066 	cond_resched();
1067 
1068 	/* Read the VID header back and check if it was written correctly */
1069 	err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1);
1070 	if (err) {
1071 		if (err != UBI_IO_BITFLIPS)
1072 			ubi_warn("cannot read VID header back from PEB %d", to);
1073 		goto out_unlock;
1074 	}
1075 
1076 	if (data_size > 0) {
1077 		err = ubi_io_write_data(ubi, buf, to, 0, aldata_size);
1078 		if (err)
1079 			goto out_unlock;
1080 
1081 		/*
1082 		 * We've written the data and are going to read it back to make
1083 		 * sure it was written correctly.
1084 		 */
1085 		buf1 = kmalloc(aldata_size, GFP_KERNEL);
1086 		if (!buf1) {
1087 			err = -ENOMEM;
1088 			goto out_unlock;
1089 		}
1090 
1091 		cond_resched();
1092 
1093 		err = ubi_io_read_data(ubi, buf1, to, 0, aldata_size);
1094 		if (err) {
1095 			if (err != UBI_IO_BITFLIPS)
1096 				ubi_warn("cannot read data back from PEB %d",
1097 					 to);
1098 			goto out_unlock;
1099 		}
1100 
1101 		cond_resched();
1102 
1103 		if (memcmp(buf, buf1, aldata_size)) {
1104 			ubi_warn("read data back from PEB %d - it is different",
1105 				 to);
1106 			goto out_unlock;
1107 		}
1108 	}
1109 
1110 	ubi_assert(vol->eba_tbl[lnum] == from);
1111 	vol->eba_tbl[lnum] = to;
1112 
1113 	leb_write_unlock(ubi, vol_id, lnum);
1114 	kfree(buf);
1115 	kfree(buf1);
1116 
1117 	return 0;
1118 
1119 out_unlock:
1120 	leb_write_unlock(ubi, vol_id, lnum);
1121 	kfree(buf);
1122 	kfree(buf1);
1123 	return err;
1124 }
1125 
1126 /**
1127  * ubi_eba_init_scan - initialize the EBA unit using scanning information.
1128  * @ubi: UBI device description object
1129  * @si: scanning information
1130  *
1131  * This function returns zero in case of success and a negative error code in
1132  * case of failure.
1133  */
1134 int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1135 {
1136 	int i, j, err, num_volumes;
1137 	struct ubi_scan_volume *sv;
1138 	struct ubi_volume *vol;
1139 	struct ubi_scan_leb *seb;
1140 	struct rb_node *rb;
1141 
1142 	dbg_eba("initialize EBA unit");
1143 
1144 	spin_lock_init(&ubi->ltree_lock);
1145 	ubi->ltree = RB_ROOT;
1146 
1147 	if (ubi_devices_cnt == 0) {
1148 		ltree_slab = kmem_cache_create("ubi_ltree_slab",
1149 					       sizeof(struct ltree_entry), 0,
1150 					       0, &ltree_entry_ctor, NULL);
1151 		if (!ltree_slab)
1152 			return -ENOMEM;
1153 	}
1154 
1155 	ubi->global_sqnum = si->max_sqnum + 1;
1156 	num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
1157 
1158 	for (i = 0; i < num_volumes; i++) {
1159 		vol = ubi->volumes[i];
1160 		if (!vol)
1161 			continue;
1162 
1163 		cond_resched();
1164 
1165 		vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int),
1166 				       GFP_KERNEL);
1167 		if (!vol->eba_tbl) {
1168 			err = -ENOMEM;
1169 			goto out_free;
1170 		}
1171 
1172 		for (j = 0; j < vol->reserved_pebs; j++)
1173 			vol->eba_tbl[j] = UBI_LEB_UNMAPPED;
1174 
1175 		sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i));
1176 		if (!sv)
1177 			continue;
1178 
1179 		ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
1180 			if (seb->lnum >= vol->reserved_pebs)
1181 				/*
1182 				 * This may happen in case of an unclean reboot
1183 				 * during re-size.
1184 				 */
1185 				ubi_scan_move_to_list(sv, seb, &si->erase);
1186 			vol->eba_tbl[seb->lnum] = seb->pnum;
1187 		}
1188 	}
1189 
1190 	if (ubi->bad_allowed) {
1191 		ubi_calculate_reserved(ubi);
1192 
1193 		if (ubi->avail_pebs < ubi->beb_rsvd_level) {
1194 			/* No enough free physical eraseblocks */
1195 			ubi->beb_rsvd_pebs = ubi->avail_pebs;
1196 			ubi_warn("cannot reserve enough PEBs for bad PEB "
1197 				 "handling, reserved %d, need %d",
1198 				 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
1199 		} else
1200 			ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
1201 
1202 		ubi->avail_pebs -= ubi->beb_rsvd_pebs;
1203 		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
1204 	}
1205 
1206 	dbg_eba("EBA unit is initialized");
1207 	return 0;
1208 
1209 out_free:
1210 	for (i = 0; i < num_volumes; i++) {
1211 		if (!ubi->volumes[i])
1212 			continue;
1213 		kfree(ubi->volumes[i]->eba_tbl);
1214 	}
1215 	if (ubi_devices_cnt == 0)
1216 		kmem_cache_destroy(ltree_slab);
1217 	return err;
1218 }
1219 
1220 /**
1221  * ubi_eba_close - close EBA unit.
1222  * @ubi: UBI device description object
1223  */
1224 void ubi_eba_close(const struct ubi_device *ubi)
1225 {
1226 	int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
1227 
1228 	dbg_eba("close EBA unit");
1229 
1230 	for (i = 0; i < num_volumes; i++) {
1231 		if (!ubi->volumes[i])
1232 			continue;
1233 		kfree(ubi->volumes[i]->eba_tbl);
1234 	}
1235 	if (ubi_devices_cnt == 1)
1236 		kmem_cache_destroy(ltree_slab);
1237 }
1238