1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * Copyright (C) 2006, 2007 University of Szeged, Hungary 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 51 18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Authors: Artem Bityutskiy (Битюцкий Артём) 21 * Adrian Hunter 22 * Zoltan Sogor 23 */ 24 25 /* 26 * This file implements UBIFS I/O subsystem which provides various I/O-related 27 * helper functions (reading/writing/checking/validating nodes) and implements 28 * write-buffering support. Write buffers help to save space which otherwise 29 * would have been wasted for padding to the nearest minimal I/O unit boundary. 30 * Instead, data first goes to the write-buffer and is flushed when the 31 * buffer is full or when it is not used for some time (by timer). This is 32 * similar to the mechanism is used by JFFS2. 33 * 34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum 35 * write size (@c->max_write_size). The latter is the maximum amount of bytes 36 * the underlying flash is able to program at a time, and writing in 37 * @c->max_write_size units should presumably be faster. Obviously, 38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of 39 * @c->max_write_size bytes in size for maximum performance. However, when a 40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size 41 * boundary) which contains data is written, not the whole write-buffer, 42 * because this is more space-efficient. 43 * 44 * This optimization adds few complications to the code. Indeed, on the one 45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which 46 * also means aligning writes at the @c->max_write_size bytes offsets. On the 47 * other hand, we do not want to waste space when synchronizing the write 48 * buffer, so during synchronization we writes in smaller chunks. And this makes 49 * the next write offset to be not aligned to @c->max_write_size bytes. So the 50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned 51 * to @c->max_write_size bytes again. We do this by temporarily shrinking 52 * write-buffer size (@wbuf->size). 53 * 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 55 * mutexes defined inside these objects. Since sometimes upper-level code 56 * has to lock the write-buffer (e.g. journal space reservation code), many 57 * functions related to write-buffers have "nolock" suffix which means that the 58 * caller has to lock the write-buffer before calling this function. 59 * 60 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not 61 * aligned, UBIFS starts the next node from the aligned address, and the padded 62 * bytes may contain any rubbish. In other words, UBIFS does not put padding 63 * bytes in those small gaps. Common headers of nodes store real node lengths, 64 * not aligned lengths. Indexing nodes also store real lengths in branches. 65 * 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 67 * uses padding nodes or padding bytes, if the padding node does not fit. 68 * 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when 70 * they are read from the flash media. 71 */ 72 73 #include <linux/crc32.h> 74 #include <linux/slab.h> 75 #include "ubifs.h" 76 77 /** 78 * ubifs_ro_mode - switch UBIFS to read read-only mode. 79 * @c: UBIFS file-system description object 80 * @err: error code which is the reason of switching to R/O mode 81 */ 82 void ubifs_ro_mode(struct ubifs_info *c, int err) 83 { 84 if (!c->ro_error) { 85 c->ro_error = 1; 86 c->no_chk_data_crc = 0; 87 c->vfs_sb->s_flags |= MS_RDONLY; 88 ubifs_warn("switched to read-only mode, error %d", err); 89 dbg_dump_stack(); 90 } 91 } 92 93 /** 94 * ubifs_check_node - check node. 95 * @c: UBIFS file-system description object 96 * @buf: node to check 97 * @lnum: logical eraseblock number 98 * @offs: offset within the logical eraseblock 99 * @quiet: print no messages 100 * @must_chk_crc: indicates whether to always check the CRC 101 * 102 * This function checks node magic number and CRC checksum. This function also 103 * validates node length to prevent UBIFS from becoming crazy when an attacker 104 * feeds it a file-system image with incorrect nodes. For example, too large 105 * node length in the common header could cause UBIFS to read memory outside of 106 * allocated buffer when checking the CRC checksum. 107 * 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 109 * true, which is controlled by corresponding UBIFS mount option. However, if 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC 113 * is checked. This is because during mounting or re-mounting from R/O mode to 114 * R/W mode we may read journal nodes (when replying the journal or doing the 115 * recovery) and the journal nodes may potentially be corrupted, so checking is 116 * required. 117 * 118 * This function returns zero in case of success and %-EUCLEAN in case of bad 119 * CRC or magic. 120 */ 121 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 122 int offs, int quiet, int must_chk_crc) 123 { 124 int err = -EINVAL, type, node_len; 125 uint32_t crc, node_crc, magic; 126 const struct ubifs_ch *ch = buf; 127 128 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 129 ubifs_assert(!(offs & 7) && offs < c->leb_size); 130 131 magic = le32_to_cpu(ch->magic); 132 if (magic != UBIFS_NODE_MAGIC) { 133 if (!quiet) 134 ubifs_err("bad magic %#08x, expected %#08x", 135 magic, UBIFS_NODE_MAGIC); 136 err = -EUCLEAN; 137 goto out; 138 } 139 140 type = ch->node_type; 141 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 142 if (!quiet) 143 ubifs_err("bad node type %d", type); 144 goto out; 145 } 146 147 node_len = le32_to_cpu(ch->len); 148 if (node_len + offs > c->leb_size) 149 goto out_len; 150 151 if (c->ranges[type].max_len == 0) { 152 if (node_len != c->ranges[type].len) 153 goto out_len; 154 } else if (node_len < c->ranges[type].min_len || 155 node_len > c->ranges[type].max_len) 156 goto out_len; 157 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && 159 !c->remounting_rw && c->no_chk_data_crc) 160 return 0; 161 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 163 node_crc = le32_to_cpu(ch->crc); 164 if (crc != node_crc) { 165 if (!quiet) 166 ubifs_err("bad CRC: calculated %#08x, read %#08x", 167 crc, node_crc); 168 err = -EUCLEAN; 169 goto out; 170 } 171 172 return 0; 173 174 out_len: 175 if (!quiet) 176 ubifs_err("bad node length %d", node_len); 177 out: 178 if (!quiet) { 179 ubifs_err("bad node at LEB %d:%d", lnum, offs); 180 dbg_dump_node(c, buf); 181 dbg_dump_stack(); 182 } 183 return err; 184 } 185 186 /** 187 * ubifs_pad - pad flash space. 188 * @c: UBIFS file-system description object 189 * @buf: buffer to put padding to 190 * @pad: how many bytes to pad 191 * 192 * The flash media obliges us to write only in chunks of %c->min_io_size and 193 * when we have to write less data we add padding node to the write-buffer and 194 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the 195 * media is being scanned. If the amount of wasted space is not enough to fit a 196 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes 197 * pattern (%UBIFS_PADDING_BYTE). 198 * 199 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is 200 * used. 201 */ 202 void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) 203 { 204 uint32_t crc; 205 206 ubifs_assert(pad >= 0 && !(pad & 7)); 207 208 if (pad >= UBIFS_PAD_NODE_SZ) { 209 struct ubifs_ch *ch = buf; 210 struct ubifs_pad_node *pad_node = buf; 211 212 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 213 ch->node_type = UBIFS_PAD_NODE; 214 ch->group_type = UBIFS_NO_NODE_GROUP; 215 ch->padding[0] = ch->padding[1] = 0; 216 ch->sqnum = 0; 217 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); 218 pad -= UBIFS_PAD_NODE_SZ; 219 pad_node->pad_len = cpu_to_le32(pad); 220 crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); 221 ch->crc = cpu_to_le32(crc); 222 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); 223 } else if (pad > 0) 224 /* Too little space, padding node won't fit */ 225 memset(buf, UBIFS_PADDING_BYTE, pad); 226 } 227 228 /** 229 * next_sqnum - get next sequence number. 230 * @c: UBIFS file-system description object 231 */ 232 static unsigned long long next_sqnum(struct ubifs_info *c) 233 { 234 unsigned long long sqnum; 235 236 spin_lock(&c->cnt_lock); 237 sqnum = ++c->max_sqnum; 238 spin_unlock(&c->cnt_lock); 239 240 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 241 if (sqnum >= SQNUM_WATERMARK) { 242 ubifs_err("sequence number overflow %llu, end of life", 243 sqnum); 244 ubifs_ro_mode(c, -EINVAL); 245 } 246 ubifs_warn("running out of sequence numbers, end of life soon"); 247 } 248 249 return sqnum; 250 } 251 252 /** 253 * ubifs_prepare_node - prepare node to be written to flash. 254 * @c: UBIFS file-system description object 255 * @node: the node to pad 256 * @len: node length 257 * @pad: if the buffer has to be padded 258 * 259 * This function prepares node at @node to be written to the media - it 260 * calculates node CRC, fills the common header, and adds proper padding up to 261 * the next minimum I/O unit if @pad is not zero. 262 */ 263 void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) 264 { 265 uint32_t crc; 266 struct ubifs_ch *ch = node; 267 unsigned long long sqnum = next_sqnum(c); 268 269 ubifs_assert(len >= UBIFS_CH_SZ); 270 271 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 272 ch->len = cpu_to_le32(len); 273 ch->group_type = UBIFS_NO_NODE_GROUP; 274 ch->sqnum = cpu_to_le64(sqnum); 275 ch->padding[0] = ch->padding[1] = 0; 276 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 277 ch->crc = cpu_to_le32(crc); 278 279 if (pad) { 280 len = ALIGN(len, 8); 281 pad = ALIGN(len, c->min_io_size) - len; 282 ubifs_pad(c, node + len, pad); 283 } 284 } 285 286 /** 287 * ubifs_prep_grp_node - prepare node of a group to be written to flash. 288 * @c: UBIFS file-system description object 289 * @node: the node to pad 290 * @len: node length 291 * @last: indicates the last node of the group 292 * 293 * This function prepares node at @node to be written to the media - it 294 * calculates node CRC and fills the common header. 295 */ 296 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) 297 { 298 uint32_t crc; 299 struct ubifs_ch *ch = node; 300 unsigned long long sqnum = next_sqnum(c); 301 302 ubifs_assert(len >= UBIFS_CH_SZ); 303 304 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 305 ch->len = cpu_to_le32(len); 306 if (last) 307 ch->group_type = UBIFS_LAST_OF_NODE_GROUP; 308 else 309 ch->group_type = UBIFS_IN_NODE_GROUP; 310 ch->sqnum = cpu_to_le64(sqnum); 311 ch->padding[0] = ch->padding[1] = 0; 312 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 313 ch->crc = cpu_to_le32(crc); 314 } 315 316 /** 317 * wbuf_timer_callback - write-buffer timer callback function. 318 * @data: timer data (write-buffer descriptor) 319 * 320 * This function is called when the write-buffer timer expires. 321 */ 322 static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) 323 { 324 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 325 326 dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); 327 wbuf->need_sync = 1; 328 wbuf->c->need_wbuf_sync = 1; 329 ubifs_wake_up_bgt(wbuf->c); 330 return HRTIMER_NORESTART; 331 } 332 333 /** 334 * new_wbuf_timer - start new write-buffer timer. 335 * @wbuf: write-buffer descriptor 336 */ 337 static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 338 { 339 ubifs_assert(!hrtimer_active(&wbuf->timer)); 340 341 if (wbuf->no_timer) 342 return; 343 dbg_io("set timer for jhead %s, %llu-%llu millisecs", 344 dbg_jhead(wbuf->jhead), 345 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), 346 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, 347 USEC_PER_SEC)); 348 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, 349 HRTIMER_MODE_REL); 350 } 351 352 /** 353 * cancel_wbuf_timer - cancel write-buffer timer. 354 * @wbuf: write-buffer descriptor 355 */ 356 static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 357 { 358 if (wbuf->no_timer) 359 return; 360 wbuf->need_sync = 0; 361 hrtimer_cancel(&wbuf->timer); 362 } 363 364 /** 365 * ubifs_wbuf_sync_nolock - synchronize write-buffer. 366 * @wbuf: write-buffer to synchronize 367 * 368 * This function synchronizes write-buffer @buf and returns zero in case of 369 * success or a negative error code in case of failure. 370 * 371 * Note, although write-buffers are of @c->max_write_size, this function does 372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead, 373 * if the write-buffer is only partially filled with data, only the used part 374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. 375 * This way we waste less space. 376 */ 377 int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 378 { 379 struct ubifs_info *c = wbuf->c; 380 int err, dirt, sync_len; 381 382 cancel_wbuf_timer_nolock(wbuf); 383 if (!wbuf->used || wbuf->lnum == -1) 384 /* Write-buffer is empty or not seeked */ 385 return 0; 386 387 dbg_io("LEB %d:%d, %d bytes, jhead %s", 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 389 ubifs_assert(!(wbuf->avail & 7)); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); 391 ubifs_assert(wbuf->size >= c->min_io_size); 392 ubifs_assert(wbuf->size <= c->max_write_size); 393 ubifs_assert(wbuf->size % c->min_io_size == 0); 394 ubifs_assert(!c->ro_media && !c->ro_mount); 395 if (c->leb_size - wbuf->offs >= c->max_write_size) 396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 397 398 if (c->ro_error) 399 return -EROFS; 400 401 /* 402 * Do not write whole write buffer but write only the minimum necessary 403 * amount of min. I/O units. 404 */ 405 sync_len = ALIGN(wbuf->used, c->min_io_size); 406 dirt = sync_len - wbuf->used; 407 if (dirt) 408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt); 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 410 sync_len, wbuf->dtype); 411 if (err) { 412 ubifs_err("cannot write %d bytes to LEB %d:%d", 413 sync_len, wbuf->lnum, wbuf->offs); 414 dbg_dump_stack(); 415 return err; 416 } 417 418 spin_lock(&wbuf->lock); 419 wbuf->offs += sync_len; 420 /* 421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. 422 * But our goal is to optimize writes and make sure we write in 423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset. 424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make 425 * sure that @wbuf->offs + @wbuf->size is aligned to 426 * @c->max_write_size. This way we make sure that after next 427 * write-buffer flush we are again at the optimal offset (aligned to 428 * @c->max_write_size). 429 */ 430 if (c->leb_size - wbuf->offs < c->max_write_size) 431 wbuf->size = c->leb_size - wbuf->offs; 432 else if (wbuf->offs & (c->max_write_size - 1)) 433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 434 else 435 wbuf->size = c->max_write_size; 436 wbuf->avail = wbuf->size; 437 wbuf->used = 0; 438 wbuf->next_ino = 0; 439 spin_unlock(&wbuf->lock); 440 441 if (wbuf->sync_callback) 442 err = wbuf->sync_callback(c, wbuf->lnum, 443 c->leb_size - wbuf->offs, dirt); 444 return err; 445 } 446 447 /** 448 * ubifs_wbuf_seek_nolock - seek write-buffer. 449 * @wbuf: write-buffer 450 * @lnum: logical eraseblock number to seek to 451 * @offs: logical eraseblock offset to seek to 452 * @dtype: data type 453 * 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 455 * The write-buffer is synchronized if it is not empty. Returns zero in case of 456 * success and a negative error code in case of failure. 457 */ 458 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 459 int dtype) 460 { 461 const struct ubifs_info *c = wbuf->c; 462 463 dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); 464 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 467 ubifs_assert(lnum != wbuf->lnum); 468 469 if (wbuf->used > 0) { 470 int err = ubifs_wbuf_sync_nolock(wbuf); 471 472 if (err) 473 return err; 474 } 475 476 spin_lock(&wbuf->lock); 477 wbuf->lnum = lnum; 478 wbuf->offs = offs; 479 if (c->leb_size - wbuf->offs < c->max_write_size) 480 wbuf->size = c->leb_size - wbuf->offs; 481 else if (wbuf->offs & (c->max_write_size - 1)) 482 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 483 else 484 wbuf->size = c->max_write_size; 485 wbuf->avail = wbuf->size; 486 wbuf->used = 0; 487 spin_unlock(&wbuf->lock); 488 wbuf->dtype = dtype; 489 490 return 0; 491 } 492 493 /** 494 * ubifs_bg_wbufs_sync - synchronize write-buffers. 495 * @c: UBIFS file-system description object 496 * 497 * This function is called by background thread to synchronize write-buffers. 498 * Returns zero in case of success and a negative error code in case of 499 * failure. 500 */ 501 int ubifs_bg_wbufs_sync(struct ubifs_info *c) 502 { 503 int err, i; 504 505 ubifs_assert(!c->ro_media && !c->ro_mount); 506 if (!c->need_wbuf_sync) 507 return 0; 508 c->need_wbuf_sync = 0; 509 510 if (c->ro_error) { 511 err = -EROFS; 512 goto out_timers; 513 } 514 515 dbg_io("synchronize"); 516 for (i = 0; i < c->jhead_cnt; i++) { 517 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 518 519 cond_resched(); 520 521 /* 522 * If the mutex is locked then wbuf is being changed, so 523 * synchronization is not necessary. 524 */ 525 if (mutex_is_locked(&wbuf->io_mutex)) 526 continue; 527 528 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 529 if (!wbuf->need_sync) { 530 mutex_unlock(&wbuf->io_mutex); 531 continue; 532 } 533 534 err = ubifs_wbuf_sync_nolock(wbuf); 535 mutex_unlock(&wbuf->io_mutex); 536 if (err) { 537 ubifs_err("cannot sync write-buffer, error %d", err); 538 ubifs_ro_mode(c, err); 539 goto out_timers; 540 } 541 } 542 543 return 0; 544 545 out_timers: 546 /* Cancel all timers to prevent repeated errors */ 547 for (i = 0; i < c->jhead_cnt; i++) { 548 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 549 550 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 551 cancel_wbuf_timer_nolock(wbuf); 552 mutex_unlock(&wbuf->io_mutex); 553 } 554 return err; 555 } 556 557 /** 558 * ubifs_wbuf_write_nolock - write data to flash via write-buffer. 559 * @wbuf: write-buffer 560 * @buf: node to write 561 * @len: node length 562 * 563 * This function writes data to flash via write-buffer @wbuf. This means that 564 * the last piece of the node won't reach the flash media immediately if it 565 * does not take whole max. write unit (@c->max_write_size). Instead, the node 566 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or 567 * because more data are appended to the write-buffer). 568 * 569 * This function returns zero in case of success and a negative error code in 570 * case of failure. If the node cannot be written because there is no more 571 * space in this logical eraseblock, %-ENOSPC is returned. 572 */ 573 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 574 { 575 struct ubifs_info *c = wbuf->c; 576 int err, written, n, aligned_len = ALIGN(len, 8), offs; 577 578 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 579 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 580 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); 581 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 582 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 583 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 584 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); 585 ubifs_assert(wbuf->size >= c->min_io_size); 586 ubifs_assert(wbuf->size <= c->max_write_size); 587 ubifs_assert(wbuf->size % c->min_io_size == 0); 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 589 ubifs_assert(!c->ro_media && !c->ro_mount); 590 if (c->leb_size - wbuf->offs >= c->max_write_size) 591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 592 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 594 err = -ENOSPC; 595 goto out; 596 } 597 598 cancel_wbuf_timer_nolock(wbuf); 599 600 if (c->ro_error) 601 return -EROFS; 602 603 if (aligned_len <= wbuf->avail) { 604 /* 605 * The node is not very large and fits entirely within 606 * write-buffer. 607 */ 608 memcpy(wbuf->buf + wbuf->used, buf, len); 609 610 if (aligned_len == wbuf->avail) { 611 dbg_io("flush jhead %s wbuf to LEB %d:%d", 612 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 613 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 614 wbuf->offs, wbuf->size, 615 wbuf->dtype); 616 if (err) 617 goto out; 618 619 spin_lock(&wbuf->lock); 620 wbuf->offs += wbuf->size; 621 if (c->leb_size - wbuf->offs >= c->max_write_size) 622 wbuf->size = c->max_write_size; 623 else 624 wbuf->size = c->leb_size - wbuf->offs; 625 wbuf->avail = wbuf->size; 626 wbuf->used = 0; 627 wbuf->next_ino = 0; 628 spin_unlock(&wbuf->lock); 629 } else { 630 spin_lock(&wbuf->lock); 631 wbuf->avail -= aligned_len; 632 wbuf->used += aligned_len; 633 spin_unlock(&wbuf->lock); 634 } 635 636 goto exit; 637 } 638 639 offs = wbuf->offs; 640 written = 0; 641 642 if (wbuf->used) { 643 /* 644 * The node is large enough and does not fit entirely within 645 * current available space. We have to fill and flush 646 * write-buffer and switch to the next max. write unit. 647 */ 648 dbg_io("flush jhead %s wbuf to LEB %d:%d", 649 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 650 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 651 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 652 wbuf->size, wbuf->dtype); 653 if (err) 654 goto out; 655 656 offs += wbuf->size; 657 len -= wbuf->avail; 658 aligned_len -= wbuf->avail; 659 written += wbuf->avail; 660 } else if (wbuf->offs & (c->max_write_size - 1)) { 661 /* 662 * The write-buffer offset is not aligned to 663 * @c->max_write_size and @wbuf->size is less than 664 * @c->max_write_size. Write @wbuf->size bytes to make sure the 665 * following writes are done in optimal @c->max_write_size 666 * chunks. 667 */ 668 dbg_io("write %d bytes to LEB %d:%d", 669 wbuf->size, wbuf->lnum, wbuf->offs); 670 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, 671 wbuf->size, wbuf->dtype); 672 if (err) 673 goto out; 674 675 offs += wbuf->size; 676 len -= wbuf->size; 677 aligned_len -= wbuf->size; 678 written += wbuf->size; 679 } 680 681 /* 682 * The remaining data may take more whole max. write units, so write the 683 * remains multiple to max. write unit size directly to the flash media. 684 * We align node length to 8-byte boundary because we anyway flash wbuf 685 * if the remaining space is less than 8 bytes. 686 */ 687 n = aligned_len >> c->max_write_shift; 688 if (n) { 689 n <<= c->max_write_shift; 690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 692 wbuf->dtype); 693 if (err) 694 goto out; 695 offs += n; 696 aligned_len -= n; 697 len -= n; 698 written += n; 699 } 700 701 spin_lock(&wbuf->lock); 702 if (aligned_len) 703 /* 704 * And now we have what's left and what does not take whole 705 * max. write unit, so write it to the write-buffer and we are 706 * done. 707 */ 708 memcpy(wbuf->buf, buf + written, len); 709 710 wbuf->offs = offs; 711 if (c->leb_size - wbuf->offs >= c->max_write_size) 712 wbuf->size = c->max_write_size; 713 else 714 wbuf->size = c->leb_size - wbuf->offs; 715 wbuf->avail = wbuf->size - aligned_len; 716 wbuf->used = aligned_len; 717 wbuf->next_ino = 0; 718 spin_unlock(&wbuf->lock); 719 720 exit: 721 if (wbuf->sync_callback) { 722 int free = c->leb_size - wbuf->offs - wbuf->used; 723 724 err = wbuf->sync_callback(c, wbuf->lnum, free, 0); 725 if (err) 726 goto out; 727 } 728 729 if (wbuf->used) 730 new_wbuf_timer_nolock(wbuf); 731 732 return 0; 733 734 out: 735 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 736 len, wbuf->lnum, wbuf->offs, err); 737 dbg_dump_node(c, buf); 738 dbg_dump_stack(); 739 dbg_dump_leb(c, wbuf->lnum); 740 return err; 741 } 742 743 /** 744 * ubifs_write_node - write node to the media. 745 * @c: UBIFS file-system description object 746 * @buf: the node to write 747 * @len: node length 748 * @lnum: logical eraseblock number 749 * @offs: offset within the logical eraseblock 750 * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) 751 * 752 * This function automatically fills node magic number, assigns sequence 753 * number, and calculates node CRC checksum. The length of the @buf buffer has 754 * to be aligned to the minimal I/O unit size. This function automatically 755 * appends padding node and padding bytes if needed. Returns zero in case of 756 * success and a negative error code in case of failure. 757 */ 758 int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, 759 int offs, int dtype) 760 { 761 int err, buf_len = ALIGN(len, c->min_io_size); 762 763 dbg_io("LEB %d:%d, %s, length %d (aligned %d)", 764 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, 765 buf_len); 766 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 767 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 768 ubifs_assert(!c->ro_media && !c->ro_mount); 769 770 if (c->ro_error) 771 return -EROFS; 772 773 ubifs_prepare_node(c, buf, len, 1); 774 err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); 775 if (err) { 776 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 777 buf_len, lnum, offs, err); 778 dbg_dump_node(c, buf); 779 dbg_dump_stack(); 780 } 781 782 return err; 783 } 784 785 /** 786 * ubifs_read_node_wbuf - read node from the media or write-buffer. 787 * @wbuf: wbuf to check for un-written data 788 * @buf: buffer to read to 789 * @type: node type 790 * @len: node length 791 * @lnum: logical eraseblock number 792 * @offs: offset within the logical eraseblock 793 * 794 * This function reads a node of known type and length, checks it and stores 795 * in @buf. If the node partially or fully sits in the write-buffer, this 796 * function takes data from the buffer, otherwise it reads the flash media. 797 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative 798 * error code in case of failure. 799 */ 800 int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, 801 int lnum, int offs) 802 { 803 const struct ubifs_info *c = wbuf->c; 804 int err, rlen, overlap; 805 struct ubifs_ch *ch = buf; 806 807 dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, 808 dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); 809 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 810 ubifs_assert(!(offs & 7) && offs < c->leb_size); 811 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 812 813 spin_lock(&wbuf->lock); 814 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); 815 if (!overlap) { 816 /* We may safely unlock the write-buffer and read the data */ 817 spin_unlock(&wbuf->lock); 818 return ubifs_read_node(c, buf, type, len, lnum, offs); 819 } 820 821 /* Don't read under wbuf */ 822 rlen = wbuf->offs - offs; 823 if (rlen < 0) 824 rlen = 0; 825 826 /* Copy the rest from the write-buffer */ 827 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); 828 spin_unlock(&wbuf->lock); 829 830 if (rlen > 0) { 831 /* Read everything that goes before write-buffer */ 832 err = ubi_read(c->ubi, lnum, buf, offs, rlen); 833 if (err && err != -EBADMSG) { 834 ubifs_err("failed to read node %d from LEB %d:%d, " 835 "error %d", type, lnum, offs, err); 836 dbg_dump_stack(); 837 return err; 838 } 839 } 840 841 if (type != ch->node_type) { 842 ubifs_err("bad node type (%d but expected %d)", 843 ch->node_type, type); 844 goto out; 845 } 846 847 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 848 if (err) { 849 ubifs_err("expected node type %d", type); 850 return err; 851 } 852 853 rlen = le32_to_cpu(ch->len); 854 if (rlen != len) { 855 ubifs_err("bad node length %d, expected %d", rlen, len); 856 goto out; 857 } 858 859 return 0; 860 861 out: 862 ubifs_err("bad node at LEB %d:%d", lnum, offs); 863 dbg_dump_node(c, buf); 864 dbg_dump_stack(); 865 return -EINVAL; 866 } 867 868 /** 869 * ubifs_read_node - read node. 870 * @c: UBIFS file-system description object 871 * @buf: buffer to read to 872 * @type: node type 873 * @len: node length (not aligned) 874 * @lnum: logical eraseblock number 875 * @offs: offset within the logical eraseblock 876 * 877 * This function reads a node of known type and and length, checks it and 878 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched 879 * and a negative error code in case of failure. 880 */ 881 int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, 882 int lnum, int offs) 883 { 884 int err, l; 885 struct ubifs_ch *ch = buf; 886 887 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 888 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 889 ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); 890 ubifs_assert(!(offs & 7) && offs < c->leb_size); 891 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 892 893 err = ubi_read(c->ubi, lnum, buf, offs, len); 894 if (err && err != -EBADMSG) { 895 ubifs_err("cannot read node %d from LEB %d:%d, error %d", 896 type, lnum, offs, err); 897 return err; 898 } 899 900 if (type != ch->node_type) { 901 ubifs_err("bad node type (%d but expected %d)", 902 ch->node_type, type); 903 goto out; 904 } 905 906 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 907 if (err) { 908 ubifs_err("expected node type %d", type); 909 return err; 910 } 911 912 l = le32_to_cpu(ch->len); 913 if (l != len) { 914 ubifs_err("bad node length %d, expected %d", l, len); 915 goto out; 916 } 917 918 return 0; 919 920 out: 921 ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, 922 ubi_is_mapped(c->ubi, lnum)); 923 dbg_dump_node(c, buf); 924 dbg_dump_stack(); 925 return -EINVAL; 926 } 927 928 /** 929 * ubifs_wbuf_init - initialize write-buffer. 930 * @c: UBIFS file-system description object 931 * @wbuf: write-buffer to initialize 932 * 933 * This function initializes write-buffer. Returns zero in case of success 934 * %-ENOMEM in case of failure. 935 */ 936 int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 937 { 938 size_t size; 939 940 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); 941 if (!wbuf->buf) 942 return -ENOMEM; 943 944 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 945 wbuf->inodes = kmalloc(size, GFP_KERNEL); 946 if (!wbuf->inodes) { 947 kfree(wbuf->buf); 948 wbuf->buf = NULL; 949 return -ENOMEM; 950 } 951 952 wbuf->used = 0; 953 wbuf->lnum = wbuf->offs = -1; 954 /* 955 * If the LEB starts at the max. write size aligned address, then 956 * write-buffer size has to be set to @c->max_write_size. Otherwise, 957 * set it to something smaller so that it ends at the closest max. 958 * write size boundary. 959 */ 960 size = c->max_write_size - (c->leb_start % c->max_write_size); 961 wbuf->avail = wbuf->size = size; 962 wbuf->dtype = UBI_UNKNOWN; 963 wbuf->sync_callback = NULL; 964 mutex_init(&wbuf->io_mutex); 965 spin_lock_init(&wbuf->lock); 966 wbuf->c = c; 967 wbuf->next_ino = 0; 968 969 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 970 wbuf->timer.function = wbuf_timer_callback_nolock; 971 wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); 972 wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; 973 wbuf->delta *= 1000000000ULL; 974 ubifs_assert(wbuf->delta <= ULONG_MAX); 975 return 0; 976 } 977 978 /** 979 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 980 * @wbuf: the write-buffer where to add 981 * @inum: the inode number 982 * 983 * This function adds an inode number to the inode array of the write-buffer. 984 */ 985 void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) 986 { 987 if (!wbuf->buf) 988 /* NOR flash or something similar */ 989 return; 990 991 spin_lock(&wbuf->lock); 992 if (wbuf->used) 993 wbuf->inodes[wbuf->next_ino++] = inum; 994 spin_unlock(&wbuf->lock); 995 } 996 997 /** 998 * wbuf_has_ino - returns if the wbuf contains data from the inode. 999 * @wbuf: the write-buffer 1000 * @inum: the inode number 1001 * 1002 * This function returns with %1 if the write-buffer contains some data from the 1003 * given inode otherwise it returns with %0. 1004 */ 1005 static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) 1006 { 1007 int i, ret = 0; 1008 1009 spin_lock(&wbuf->lock); 1010 for (i = 0; i < wbuf->next_ino; i++) 1011 if (inum == wbuf->inodes[i]) { 1012 ret = 1; 1013 break; 1014 } 1015 spin_unlock(&wbuf->lock); 1016 1017 return ret; 1018 } 1019 1020 /** 1021 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. 1022 * @c: UBIFS file-system description object 1023 * @inode: inode to synchronize 1024 * 1025 * This function synchronizes write-buffers which contain nodes belonging to 1026 * @inode. Returns zero in case of success and a negative error code in case of 1027 * failure. 1028 */ 1029 int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) 1030 { 1031 int i, err = 0; 1032 1033 for (i = 0; i < c->jhead_cnt; i++) { 1034 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 1035 1036 if (i == GCHD) 1037 /* 1038 * GC head is special, do not look at it. Even if the 1039 * head contains something related to this inode, it is 1040 * a _copy_ of corresponding on-flash node which sits 1041 * somewhere else. 1042 */ 1043 continue; 1044 1045 if (!wbuf_has_ino(wbuf, inode->i_ino)) 1046 continue; 1047 1048 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1049 if (wbuf_has_ino(wbuf, inode->i_ino)) 1050 err = ubifs_wbuf_sync_nolock(wbuf); 1051 mutex_unlock(&wbuf->io_mutex); 1052 1053 if (err) { 1054 ubifs_ro_mode(c, err); 1055 return err; 1056 } 1057 } 1058 return 0; 1059 } 1060