xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr_remote.c (revision 33ac9dba)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * Copyright (c) 2013 Red Hat, Inc.
4  * All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write the Free Software Foundation,
17  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_shared.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_bit.h"
26 #include "xfs_sb.h"
27 #include "xfs_ag.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_format.h"
30 #include "xfs_da_btree.h"
31 #include "xfs_inode.h"
32 #include "xfs_alloc.h"
33 #include "xfs_trans.h"
34 #include "xfs_inode_item.h"
35 #include "xfs_bmap.h"
36 #include "xfs_bmap_util.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_attr_remote.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_trace.h"
42 #include "xfs_cksum.h"
43 #include "xfs_buf_item.h"
44 #include "xfs_error.h"
45 
46 #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
47 
48 /*
49  * Each contiguous block has a header, so it is not just a simple attribute
50  * length to FSB conversion.
51  */
52 int
53 xfs_attr3_rmt_blocks(
54 	struct xfs_mount *mp,
55 	int		attrlen)
56 {
57 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
58 		int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
59 		return (attrlen + buflen - 1) / buflen;
60 	}
61 	return XFS_B_TO_FSB(mp, attrlen);
62 }
63 
64 /*
65  * Checking of the remote attribute header is split into two parts. The verifier
66  * does CRC, location and bounds checking, the unpacking function checks the
67  * attribute parameters and owner.
68  */
69 static bool
70 xfs_attr3_rmt_hdr_ok(
71 	void			*ptr,
72 	xfs_ino_t		ino,
73 	uint32_t		offset,
74 	uint32_t		size,
75 	xfs_daddr_t		bno)
76 {
77 	struct xfs_attr3_rmt_hdr *rmt = ptr;
78 
79 	if (bno != be64_to_cpu(rmt->rm_blkno))
80 		return false;
81 	if (offset != be32_to_cpu(rmt->rm_offset))
82 		return false;
83 	if (size != be32_to_cpu(rmt->rm_bytes))
84 		return false;
85 	if (ino != be64_to_cpu(rmt->rm_owner))
86 		return false;
87 
88 	/* ok */
89 	return true;
90 }
91 
92 static bool
93 xfs_attr3_rmt_verify(
94 	struct xfs_mount	*mp,
95 	void			*ptr,
96 	int			fsbsize,
97 	xfs_daddr_t		bno)
98 {
99 	struct xfs_attr3_rmt_hdr *rmt = ptr;
100 
101 	if (!xfs_sb_version_hascrc(&mp->m_sb))
102 		return false;
103 	if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
104 		return false;
105 	if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
106 		return false;
107 	if (be64_to_cpu(rmt->rm_blkno) != bno)
108 		return false;
109 	if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
110 		return false;
111 	if (be32_to_cpu(rmt->rm_offset) +
112 				be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
113 		return false;
114 	if (rmt->rm_owner == 0)
115 		return false;
116 
117 	return true;
118 }
119 
120 static void
121 xfs_attr3_rmt_read_verify(
122 	struct xfs_buf	*bp)
123 {
124 	struct xfs_mount *mp = bp->b_target->bt_mount;
125 	char		*ptr;
126 	int		len;
127 	xfs_daddr_t	bno;
128 	int		blksize = mp->m_attr_geo->blksize;
129 
130 	/* no verification of non-crc buffers */
131 	if (!xfs_sb_version_hascrc(&mp->m_sb))
132 		return;
133 
134 	ptr = bp->b_addr;
135 	bno = bp->b_bn;
136 	len = BBTOB(bp->b_length);
137 	ASSERT(len >= blksize);
138 
139 	while (len > 0) {
140 		if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
141 			xfs_buf_ioerror(bp, -EFSBADCRC);
142 			break;
143 		}
144 		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
145 			xfs_buf_ioerror(bp, -EFSCORRUPTED);
146 			break;
147 		}
148 		len -= blksize;
149 		ptr += blksize;
150 		bno += BTOBB(blksize);
151 	}
152 
153 	if (bp->b_error)
154 		xfs_verifier_error(bp);
155 	else
156 		ASSERT(len == 0);
157 }
158 
159 static void
160 xfs_attr3_rmt_write_verify(
161 	struct xfs_buf	*bp)
162 {
163 	struct xfs_mount *mp = bp->b_target->bt_mount;
164 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
165 	char		*ptr;
166 	int		len;
167 	xfs_daddr_t	bno;
168 	int		blksize = mp->m_attr_geo->blksize;
169 
170 	/* no verification of non-crc buffers */
171 	if (!xfs_sb_version_hascrc(&mp->m_sb))
172 		return;
173 
174 	ptr = bp->b_addr;
175 	bno = bp->b_bn;
176 	len = BBTOB(bp->b_length);
177 	ASSERT(len >= blksize);
178 
179 	while (len > 0) {
180 		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
181 			xfs_buf_ioerror(bp, -EFSCORRUPTED);
182 			xfs_verifier_error(bp);
183 			return;
184 		}
185 		if (bip) {
186 			struct xfs_attr3_rmt_hdr *rmt;
187 
188 			rmt = (struct xfs_attr3_rmt_hdr *)ptr;
189 			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
190 		}
191 		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
192 
193 		len -= blksize;
194 		ptr += blksize;
195 		bno += BTOBB(blksize);
196 	}
197 	ASSERT(len == 0);
198 }
199 
200 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
201 	.verify_read = xfs_attr3_rmt_read_verify,
202 	.verify_write = xfs_attr3_rmt_write_verify,
203 };
204 
205 STATIC int
206 xfs_attr3_rmt_hdr_set(
207 	struct xfs_mount	*mp,
208 	void			*ptr,
209 	xfs_ino_t		ino,
210 	uint32_t		offset,
211 	uint32_t		size,
212 	xfs_daddr_t		bno)
213 {
214 	struct xfs_attr3_rmt_hdr *rmt = ptr;
215 
216 	if (!xfs_sb_version_hascrc(&mp->m_sb))
217 		return 0;
218 
219 	rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
220 	rmt->rm_offset = cpu_to_be32(offset);
221 	rmt->rm_bytes = cpu_to_be32(size);
222 	uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
223 	rmt->rm_owner = cpu_to_be64(ino);
224 	rmt->rm_blkno = cpu_to_be64(bno);
225 
226 	return sizeof(struct xfs_attr3_rmt_hdr);
227 }
228 
229 /*
230  * Helper functions to copy attribute data in and out of the one disk extents
231  */
232 STATIC int
233 xfs_attr_rmtval_copyout(
234 	struct xfs_mount *mp,
235 	struct xfs_buf	*bp,
236 	xfs_ino_t	ino,
237 	int		*offset,
238 	int		*valuelen,
239 	__uint8_t	**dst)
240 {
241 	char		*src = bp->b_addr;
242 	xfs_daddr_t	bno = bp->b_bn;
243 	int		len = BBTOB(bp->b_length);
244 	int		blksize = mp->m_attr_geo->blksize;
245 
246 	ASSERT(len >= blksize);
247 
248 	while (len > 0 && *valuelen > 0) {
249 		int hdr_size = 0;
250 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
251 
252 		byte_cnt = min(*valuelen, byte_cnt);
253 
254 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
255 			if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
256 						  byte_cnt, bno)) {
257 				xfs_alert(mp,
258 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
259 					bno, *offset, byte_cnt, ino);
260 				return -EFSCORRUPTED;
261 			}
262 			hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
263 		}
264 
265 		memcpy(*dst, src + hdr_size, byte_cnt);
266 
267 		/* roll buffer forwards */
268 		len -= blksize;
269 		src += blksize;
270 		bno += BTOBB(blksize);
271 
272 		/* roll attribute data forwards */
273 		*valuelen -= byte_cnt;
274 		*dst += byte_cnt;
275 		*offset += byte_cnt;
276 	}
277 	return 0;
278 }
279 
280 STATIC void
281 xfs_attr_rmtval_copyin(
282 	struct xfs_mount *mp,
283 	struct xfs_buf	*bp,
284 	xfs_ino_t	ino,
285 	int		*offset,
286 	int		*valuelen,
287 	__uint8_t	**src)
288 {
289 	char		*dst = bp->b_addr;
290 	xfs_daddr_t	bno = bp->b_bn;
291 	int		len = BBTOB(bp->b_length);
292 	int		blksize = mp->m_attr_geo->blksize;
293 
294 	ASSERT(len >= blksize);
295 
296 	while (len > 0 && *valuelen > 0) {
297 		int hdr_size;
298 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
299 
300 		byte_cnt = min(*valuelen, byte_cnt);
301 		hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
302 						 byte_cnt, bno);
303 
304 		memcpy(dst + hdr_size, *src, byte_cnt);
305 
306 		/*
307 		 * If this is the last block, zero the remainder of it.
308 		 * Check that we are actually the last block, too.
309 		 */
310 		if (byte_cnt + hdr_size < blksize) {
311 			ASSERT(*valuelen - byte_cnt == 0);
312 			ASSERT(len == blksize);
313 			memset(dst + hdr_size + byte_cnt, 0,
314 					blksize - hdr_size - byte_cnt);
315 		}
316 
317 		/* roll buffer forwards */
318 		len -= blksize;
319 		dst += blksize;
320 		bno += BTOBB(blksize);
321 
322 		/* roll attribute data forwards */
323 		*valuelen -= byte_cnt;
324 		*src += byte_cnt;
325 		*offset += byte_cnt;
326 	}
327 }
328 
329 /*
330  * Read the value associated with an attribute from the out-of-line buffer
331  * that we stored it in.
332  */
333 int
334 xfs_attr_rmtval_get(
335 	struct xfs_da_args	*args)
336 {
337 	struct xfs_bmbt_irec	map[ATTR_RMTVALUE_MAPSIZE];
338 	struct xfs_mount	*mp = args->dp->i_mount;
339 	struct xfs_buf		*bp;
340 	xfs_dablk_t		lblkno = args->rmtblkno;
341 	__uint8_t		*dst = args->value;
342 	int			valuelen;
343 	int			nmap;
344 	int			error;
345 	int			blkcnt = args->rmtblkcnt;
346 	int			i;
347 	int			offset = 0;
348 
349 	trace_xfs_attr_rmtval_get(args);
350 
351 	ASSERT(!(args->flags & ATTR_KERNOVAL));
352 	ASSERT(args->rmtvaluelen == args->valuelen);
353 
354 	valuelen = args->rmtvaluelen;
355 	while (valuelen > 0) {
356 		nmap = ATTR_RMTVALUE_MAPSIZE;
357 		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
358 				       blkcnt, map, &nmap,
359 				       XFS_BMAPI_ATTRFORK);
360 		if (error)
361 			return error;
362 		ASSERT(nmap >= 1);
363 
364 		for (i = 0; (i < nmap) && (valuelen > 0); i++) {
365 			xfs_daddr_t	dblkno;
366 			int		dblkcnt;
367 
368 			ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
369 			       (map[i].br_startblock != HOLESTARTBLOCK));
370 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
371 			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
372 			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
373 						   dblkno, dblkcnt, 0, &bp,
374 						   &xfs_attr3_rmt_buf_ops);
375 			if (error)
376 				return error;
377 
378 			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
379 							&offset, &valuelen,
380 							&dst);
381 			xfs_buf_relse(bp);
382 			if (error)
383 				return error;
384 
385 			/* roll attribute extent map forwards */
386 			lblkno += map[i].br_blockcount;
387 			blkcnt -= map[i].br_blockcount;
388 		}
389 	}
390 	ASSERT(valuelen == 0);
391 	return 0;
392 }
393 
394 /*
395  * Write the value associated with an attribute into the out-of-line buffer
396  * that we have defined for it.
397  */
398 int
399 xfs_attr_rmtval_set(
400 	struct xfs_da_args	*args)
401 {
402 	struct xfs_inode	*dp = args->dp;
403 	struct xfs_mount	*mp = dp->i_mount;
404 	struct xfs_bmbt_irec	map;
405 	xfs_dablk_t		lblkno;
406 	xfs_fileoff_t		lfileoff = 0;
407 	__uint8_t		*src = args->value;
408 	int			blkcnt;
409 	int			valuelen;
410 	int			nmap;
411 	int			error;
412 	int			offset = 0;
413 
414 	trace_xfs_attr_rmtval_set(args);
415 
416 	/*
417 	 * Find a "hole" in the attribute address space large enough for
418 	 * us to drop the new attribute's value into. Because CRC enable
419 	 * attributes have headers, we can't just do a straight byte to FSB
420 	 * conversion and have to take the header space into account.
421 	 */
422 	blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
423 	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
424 						   XFS_ATTR_FORK);
425 	if (error)
426 		return error;
427 
428 	args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
429 	args->rmtblkcnt = blkcnt;
430 
431 	/*
432 	 * Roll through the "value", allocating blocks on disk as required.
433 	 */
434 	while (blkcnt > 0) {
435 		int	committed;
436 
437 		/*
438 		 * Allocate a single extent, up to the size of the value.
439 		 */
440 		xfs_bmap_init(args->flist, args->firstblock);
441 		nmap = 1;
442 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
443 				  blkcnt,
444 				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
445 				  args->firstblock, args->total, &map, &nmap,
446 				  args->flist);
447 		if (!error) {
448 			error = xfs_bmap_finish(&args->trans, args->flist,
449 						&committed);
450 		}
451 		if (error) {
452 			ASSERT(committed);
453 			args->trans = NULL;
454 			xfs_bmap_cancel(args->flist);
455 			return error;
456 		}
457 
458 		/*
459 		 * bmap_finish() may have committed the last trans and started
460 		 * a new one.  We need the inode to be in all transactions.
461 		 */
462 		if (committed)
463 			xfs_trans_ijoin(args->trans, dp, 0);
464 
465 		ASSERT(nmap == 1);
466 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
467 		       (map.br_startblock != HOLESTARTBLOCK));
468 		lblkno += map.br_blockcount;
469 		blkcnt -= map.br_blockcount;
470 
471 		/*
472 		 * Start the next trans in the chain.
473 		 */
474 		error = xfs_trans_roll(&args->trans, dp);
475 		if (error)
476 			return error;
477 	}
478 
479 	/*
480 	 * Roll through the "value", copying the attribute value to the
481 	 * already-allocated blocks.  Blocks are written synchronously
482 	 * so that we can know they are all on disk before we turn off
483 	 * the INCOMPLETE flag.
484 	 */
485 	lblkno = args->rmtblkno;
486 	blkcnt = args->rmtblkcnt;
487 	valuelen = args->rmtvaluelen;
488 	while (valuelen > 0) {
489 		struct xfs_buf	*bp;
490 		xfs_daddr_t	dblkno;
491 		int		dblkcnt;
492 
493 		ASSERT(blkcnt > 0);
494 
495 		xfs_bmap_init(args->flist, args->firstblock);
496 		nmap = 1;
497 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
498 				       blkcnt, &map, &nmap,
499 				       XFS_BMAPI_ATTRFORK);
500 		if (error)
501 			return error;
502 		ASSERT(nmap == 1);
503 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
504 		       (map.br_startblock != HOLESTARTBLOCK));
505 
506 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
507 		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
508 
509 		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
510 		if (!bp)
511 			return -ENOMEM;
512 		bp->b_ops = &xfs_attr3_rmt_buf_ops;
513 
514 		xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
515 				       &valuelen, &src);
516 
517 		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
518 		xfs_buf_relse(bp);
519 		if (error)
520 			return error;
521 
522 
523 		/* roll attribute extent map forwards */
524 		lblkno += map.br_blockcount;
525 		blkcnt -= map.br_blockcount;
526 	}
527 	ASSERT(valuelen == 0);
528 	return 0;
529 }
530 
531 /*
532  * Remove the value associated with an attribute by deleting the
533  * out-of-line buffer that it is stored on.
534  */
535 int
536 xfs_attr_rmtval_remove(
537 	struct xfs_da_args	*args)
538 {
539 	struct xfs_mount	*mp = args->dp->i_mount;
540 	xfs_dablk_t		lblkno;
541 	int			blkcnt;
542 	int			error;
543 	int			done;
544 
545 	trace_xfs_attr_rmtval_remove(args);
546 
547 	/*
548 	 * Roll through the "value", invalidating the attribute value's blocks.
549 	 */
550 	lblkno = args->rmtblkno;
551 	blkcnt = args->rmtblkcnt;
552 	while (blkcnt > 0) {
553 		struct xfs_bmbt_irec	map;
554 		struct xfs_buf		*bp;
555 		xfs_daddr_t		dblkno;
556 		int			dblkcnt;
557 		int			nmap;
558 
559 		/*
560 		 * Try to remember where we decided to put the value.
561 		 */
562 		nmap = 1;
563 		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
564 				       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
565 		if (error)
566 			return error;
567 		ASSERT(nmap == 1);
568 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
569 		       (map.br_startblock != HOLESTARTBLOCK));
570 
571 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
572 		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
573 
574 		/*
575 		 * If the "remote" value is in the cache, remove it.
576 		 */
577 		bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
578 		if (bp) {
579 			xfs_buf_stale(bp);
580 			xfs_buf_relse(bp);
581 			bp = NULL;
582 		}
583 
584 		lblkno += map.br_blockcount;
585 		blkcnt -= map.br_blockcount;
586 	}
587 
588 	/*
589 	 * Keep de-allocating extents until the remote-value region is gone.
590 	 */
591 	lblkno = args->rmtblkno;
592 	blkcnt = args->rmtblkcnt;
593 	done = 0;
594 	while (!done) {
595 		int committed;
596 
597 		xfs_bmap_init(args->flist, args->firstblock);
598 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
599 				    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
600 				    1, args->firstblock, args->flist,
601 				    &done);
602 		if (!error) {
603 			error = xfs_bmap_finish(&args->trans, args->flist,
604 						&committed);
605 		}
606 		if (error) {
607 			ASSERT(committed);
608 			args->trans = NULL;
609 			xfs_bmap_cancel(args->flist);
610 			return error;
611 		}
612 
613 		/*
614 		 * bmap_finish() may have committed the last trans and started
615 		 * a new one.  We need the inode to be in all transactions.
616 		 */
617 		if (committed)
618 			xfs_trans_ijoin(args->trans, args->dp, 0);
619 
620 		/*
621 		 * Close out trans and start the next one in the chain.
622 		 */
623 		error = xfs_trans_roll(&args->trans, args->dp);
624 		if (error)
625 			return error;
626 	}
627 	return 0;
628 }
629