1 /* 2 * Copyright (c) 2014 Christoph Hellwig. 3 */ 4 #include "xfs.h" 5 #include "xfs_format.h" 6 #include "xfs_log_format.h" 7 #include "xfs_trans_resv.h" 8 #include "xfs_sb.h" 9 #include "xfs_mount.h" 10 #include "xfs_inode.h" 11 #include "xfs_trans.h" 12 #include "xfs_log.h" 13 #include "xfs_bmap.h" 14 #include "xfs_bmap_util.h" 15 #include "xfs_error.h" 16 #include "xfs_iomap.h" 17 #include "xfs_shared.h" 18 #include "xfs_bit.h" 19 #include "xfs_pnfs.h" 20 21 /* 22 * Ensure that we do not have any outstanding pNFS layouts that can be used by 23 * clients to directly read from or write to this inode. This must be called 24 * before every operation that can remove blocks from the extent map. 25 * Additionally we call it during the write operation, where aren't concerned 26 * about exposing unallocated blocks but just want to provide basic 27 * synchronization between a local writer and pNFS clients. mmap writes would 28 * also benefit from this sort of synchronization, but due to the tricky locking 29 * rules in the page fault path we don't bother. 30 */ 31 int 32 xfs_break_layouts( 33 struct inode *inode, 34 uint *iolock, 35 bool with_imutex) 36 { 37 struct xfs_inode *ip = XFS_I(inode); 38 int error; 39 40 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 41 42 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 43 xfs_iunlock(ip, *iolock); 44 if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) 45 mutex_unlock(&inode->i_mutex); 46 error = break_layout(inode, true); 47 *iolock = XFS_IOLOCK_EXCL; 48 if (with_imutex) 49 mutex_lock(&inode->i_mutex); 50 xfs_ilock(ip, *iolock); 51 } 52 53 return error; 54 } 55 56 /* 57 * Get a unique ID including its location so that the client can identify 58 * the exported device. 59 */ 60 int 61 xfs_fs_get_uuid( 62 struct super_block *sb, 63 u8 *buf, 64 u32 *len, 65 u64 *offset) 66 { 67 struct xfs_mount *mp = XFS_M(sb); 68 69 printk_once(KERN_NOTICE 70 "XFS (%s): using experimental pNFS feature, use at your own risk!\n", 71 mp->m_fsname); 72 73 if (*len < sizeof(uuid_t)) 74 return -EINVAL; 75 76 memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 77 *len = sizeof(uuid_t); 78 *offset = offsetof(struct xfs_dsb, sb_uuid); 79 return 0; 80 } 81 82 static void 83 xfs_bmbt_to_iomap( 84 struct xfs_inode *ip, 85 struct iomap *iomap, 86 struct xfs_bmbt_irec *imap) 87 { 88 struct xfs_mount *mp = ip->i_mount; 89 90 if (imap->br_startblock == HOLESTARTBLOCK) { 91 iomap->blkno = IOMAP_NULL_BLOCK; 92 iomap->type = IOMAP_HOLE; 93 } else if (imap->br_startblock == DELAYSTARTBLOCK) { 94 iomap->blkno = IOMAP_NULL_BLOCK; 95 iomap->type = IOMAP_DELALLOC; 96 } else { 97 iomap->blkno = 98 XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock); 99 if (imap->br_state == XFS_EXT_UNWRITTEN) 100 iomap->type = IOMAP_UNWRITTEN; 101 else 102 iomap->type = IOMAP_MAPPED; 103 } 104 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 105 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 106 } 107 108 /* 109 * Get a layout for the pNFS client. 110 */ 111 int 112 xfs_fs_map_blocks( 113 struct inode *inode, 114 loff_t offset, 115 u64 length, 116 struct iomap *iomap, 117 bool write, 118 u32 *device_generation) 119 { 120 struct xfs_inode *ip = XFS_I(inode); 121 struct xfs_mount *mp = ip->i_mount; 122 struct xfs_bmbt_irec imap; 123 xfs_fileoff_t offset_fsb, end_fsb; 124 loff_t limit; 125 int bmapi_flags = XFS_BMAPI_ENTIRE; 126 int nimaps = 1; 127 uint lock_flags; 128 int error = 0; 129 130 if (XFS_FORCED_SHUTDOWN(mp)) 131 return -EIO; 132 133 /* 134 * We can't export inodes residing on the realtime device. The realtime 135 * device doesn't have a UUID to identify it, so the client has no way 136 * to find it. 137 */ 138 if (XFS_IS_REALTIME_INODE(ip)) 139 return -ENXIO; 140 141 /* 142 * Lock out any other I/O before we flush and invalidate the pagecache, 143 * and then hand out a layout to the remote system. This is very 144 * similar to direct I/O, except that the synchronization is much more 145 * complicated. See the comment near xfs_break_layouts for a detailed 146 * explanation. 147 */ 148 xfs_ilock(ip, XFS_IOLOCK_EXCL); 149 150 error = -EINVAL; 151 limit = mp->m_super->s_maxbytes; 152 if (!write) 153 limit = max(limit, round_up(i_size_read(inode), 154 inode->i_sb->s_blocksize)); 155 if (offset > limit) 156 goto out_unlock; 157 if (offset > limit - length) 158 length = limit - offset; 159 160 error = filemap_write_and_wait(inode->i_mapping); 161 if (error) 162 goto out_unlock; 163 error = invalidate_inode_pages2(inode->i_mapping); 164 if (WARN_ON_ONCE(error)) 165 return error; 166 167 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); 168 offset_fsb = XFS_B_TO_FSBT(mp, offset); 169 170 lock_flags = xfs_ilock_data_map_shared(ip); 171 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 172 &imap, &nimaps, bmapi_flags); 173 xfs_iunlock(ip, lock_flags); 174 175 if (error) 176 goto out_unlock; 177 178 if (write) { 179 enum xfs_prealloc_flags flags = 0; 180 181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 182 183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 184 error = xfs_iomap_write_direct(ip, offset, length, 185 &imap, nimaps); 186 if (error) 187 goto out_unlock; 188 189 /* 190 * Ensure the next transaction is committed 191 * synchronously so that the blocks allocated and 192 * handed out to the client are guaranteed to be 193 * present even after a server crash. 194 */ 195 flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC; 196 } 197 198 error = xfs_update_prealloc_flags(ip, flags); 199 if (error) 200 goto out_unlock; 201 } 202 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 203 204 xfs_bmbt_to_iomap(ip, iomap, &imap); 205 *device_generation = mp->m_generation; 206 return error; 207 out_unlock: 208 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 209 return error; 210 } 211 212 /* 213 * Ensure the size update falls into a valid allocated block. 214 */ 215 static int 216 xfs_pnfs_validate_isize( 217 struct xfs_inode *ip, 218 xfs_off_t isize) 219 { 220 struct xfs_bmbt_irec imap; 221 int nimaps = 1; 222 int error = 0; 223 224 xfs_ilock(ip, XFS_ILOCK_SHARED); 225 error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1, 226 &imap, &nimaps, 0); 227 xfs_iunlock(ip, XFS_ILOCK_SHARED); 228 if (error) 229 return error; 230 231 if (imap.br_startblock == HOLESTARTBLOCK || 232 imap.br_startblock == DELAYSTARTBLOCK || 233 imap.br_state == XFS_EXT_UNWRITTEN) 234 return -EIO; 235 return 0; 236 } 237 238 /* 239 * Make sure the blocks described by maps are stable on disk. This includes 240 * converting any unwritten extents, flushing the disk cache and updating the 241 * time stamps. 242 * 243 * Note that we rely on the caller to always send us a timestamp update so that 244 * we always commit a transaction here. If that stops being true we will have 245 * to manually flush the cache here similar to what the fsync code path does 246 * for datasyncs on files that have no dirty metadata. 247 */ 248 int 249 xfs_fs_commit_blocks( 250 struct inode *inode, 251 struct iomap *maps, 252 int nr_maps, 253 struct iattr *iattr) 254 { 255 struct xfs_inode *ip = XFS_I(inode); 256 struct xfs_mount *mp = ip->i_mount; 257 struct xfs_trans *tp; 258 bool update_isize = false; 259 int error, i; 260 loff_t size; 261 262 ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)); 263 264 xfs_ilock(ip, XFS_IOLOCK_EXCL); 265 266 size = i_size_read(inode); 267 if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) { 268 update_isize = true; 269 size = iattr->ia_size; 270 } 271 272 for (i = 0; i < nr_maps; i++) { 273 u64 start, length, end; 274 275 start = maps[i].offset; 276 if (start > size) 277 continue; 278 279 end = start + maps[i].length; 280 if (end > size) 281 end = size; 282 283 length = end - start; 284 if (!length) 285 continue; 286 287 /* 288 * Make sure reads through the pagecache see the new data. 289 */ 290 error = invalidate_inode_pages2_range(inode->i_mapping, 291 start >> PAGE_CACHE_SHIFT, 292 (end - 1) >> PAGE_CACHE_SHIFT); 293 WARN_ON_ONCE(error); 294 295 error = xfs_iomap_write_unwritten(ip, start, length); 296 if (error) 297 goto out_drop_iolock; 298 } 299 300 if (update_isize) { 301 error = xfs_pnfs_validate_isize(ip, size); 302 if (error) 303 goto out_drop_iolock; 304 } 305 306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 308 if (error) { 309 xfs_trans_cancel(tp, 0); 310 goto out_drop_iolock; 311 } 312 313 xfs_ilock(ip, XFS_ILOCK_EXCL); 314 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 315 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 316 317 xfs_setattr_time(ip, iattr); 318 if (update_isize) { 319 i_size_write(inode, iattr->ia_size); 320 ip->i_d.di_size = iattr->ia_size; 321 } 322 323 xfs_trans_set_sync(tp); 324 error = xfs_trans_commit(tp, 0); 325 326 out_drop_iolock: 327 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 328 return error; 329 } 330