15db11c21SMike Marshall /* 25db11c21SMike Marshall * (C) 2001 Clemson University and The University of Chicago 35db11c21SMike Marshall * 45db11c21SMike Marshall * See COPYING in top-level directory. 55db11c21SMike Marshall */ 65db11c21SMike Marshall 75db11c21SMike Marshall /* 85db11c21SMike Marshall * Linux VFS file operations. 95db11c21SMike Marshall */ 105db11c21SMike Marshall 115db11c21SMike Marshall #include "protocol.h" 12*575e9461SMike Marshall #include "orangefs-kernel.h" 13*575e9461SMike Marshall #include "orangefs-bufmap.h" 145db11c21SMike Marshall #include <linux/fs.h> 155db11c21SMike Marshall #include <linux/pagemap.h> 165db11c21SMike Marshall 175db11c21SMike Marshall #define wake_up_daemon_for_return(op) \ 185db11c21SMike Marshall do { \ 195db11c21SMike Marshall spin_lock(&op->lock); \ 205db11c21SMike Marshall op->io_completed = 1; \ 215db11c21SMike Marshall spin_unlock(&op->lock); \ 225db11c21SMike Marshall wake_up_interruptible(&op->io_completion_waitq);\ 235db11c21SMike Marshall } while (0) 245db11c21SMike Marshall 255db11c21SMike Marshall /* 265db11c21SMike Marshall * Copy to client-core's address space from the buffers specified 275db11c21SMike Marshall * by the iovec upto total_size bytes. 285db11c21SMike Marshall * NOTE: the iovector can either contain addresses which 295db11c21SMike Marshall * can futher be kernel-space or user-space addresses. 305db11c21SMike Marshall * or it can pointers to struct page's 315db11c21SMike Marshall */ 328bb8aefdSYi Liu static int precopy_buffers(struct orangefs_bufmap *bufmap, 335db11c21SMike Marshall int buffer_index, 34a5c126a5SAl Viro struct iov_iter *iter, 354d1c4404SMike Marshall size_t total_size) 365db11c21SMike Marshall { 375db11c21SMike Marshall int ret = 0; 385db11c21SMike Marshall /* 395db11c21SMike Marshall * copy data from application/kernel by pulling it out 405db11c21SMike Marshall * of the iovec. 415db11c21SMike Marshall */ 424d1c4404SMike Marshall 434d1c4404SMike Marshall 444d1c4404SMike Marshall if (total_size) { 458bb8aefdSYi Liu ret = orangefs_bufmap_copy_from_iovec(bufmap, 46a5c126a5SAl Viro iter, 475db11c21SMike Marshall buffer_index, 485db11c21SMike Marshall total_size); 494d1c4404SMike Marshall if (ret < 0) 504d1c4404SMike Marshall gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", 514d1c4404SMike Marshall __func__, 524d1c4404SMike Marshall (long)ret); 534d1c4404SMike Marshall } 544d1c4404SMike Marshall 555db11c21SMike Marshall if (ret < 0) 565db11c21SMike Marshall gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", 575db11c21SMike Marshall __func__, 585db11c21SMike Marshall (long)ret); 595db11c21SMike Marshall return ret; 605db11c21SMike Marshall } 615db11c21SMike Marshall 625db11c21SMike Marshall /* 635db11c21SMike Marshall * Copy from client-core's address space to the buffers specified 645db11c21SMike Marshall * by the iovec upto total_size bytes. 655db11c21SMike Marshall * NOTE: the iovector can either contain addresses which 665db11c21SMike Marshall * can futher be kernel-space or user-space addresses. 675db11c21SMike Marshall * or it can pointers to struct page's 685db11c21SMike Marshall */ 698bb8aefdSYi Liu static int postcopy_buffers(struct orangefs_bufmap *bufmap, 705db11c21SMike Marshall int buffer_index, 715f0e3c95SAl Viro struct iov_iter *iter, 724d1c4404SMike Marshall size_t total_size) 735db11c21SMike Marshall { 745db11c21SMike Marshall int ret = 0; 755db11c21SMike Marshall /* 765db11c21SMike Marshall * copy data to application/kernel by pushing it out to 775db11c21SMike Marshall * the iovec. NOTE; target buffers can be addresses or 785db11c21SMike Marshall * struct page pointers. 795db11c21SMike Marshall */ 805db11c21SMike Marshall if (total_size) { 818bb8aefdSYi Liu ret = orangefs_bufmap_copy_to_iovec(bufmap, 825f0e3c95SAl Viro iter, 835c278228SAl Viro buffer_index, 845c278228SAl Viro total_size); 855db11c21SMike Marshall if (ret < 0) 865db11c21SMike Marshall gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", 875db11c21SMike Marshall __func__, 885db11c21SMike Marshall (long)ret); 895db11c21SMike Marshall } 905db11c21SMike Marshall return ret; 915db11c21SMike Marshall } 925db11c21SMike Marshall 935db11c21SMike Marshall /* 945db11c21SMike Marshall * Post and wait for the I/O upcall to finish 955db11c21SMike Marshall */ 968bb8aefdSYi Liu static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, 973c2fcfcbSAl Viro loff_t *offset, struct iov_iter *iter, 984d1c4404SMike Marshall size_t total_size, loff_t readahead_size) 995db11c21SMike Marshall { 1008bb8aefdSYi Liu struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 1018bb8aefdSYi Liu struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; 1028bb8aefdSYi Liu struct orangefs_bufmap *bufmap = NULL; 1038bb8aefdSYi Liu struct orangefs_kernel_op_s *new_op = NULL; 1045db11c21SMike Marshall int buffer_index = -1; 1055db11c21SMike Marshall ssize_t ret; 1065db11c21SMike Marshall 1078bb8aefdSYi Liu new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); 1085db11c21SMike Marshall if (!new_op) { 1095db11c21SMike Marshall ret = -ENOMEM; 1105db11c21SMike Marshall goto out; 1115db11c21SMike Marshall } 1125db11c21SMike Marshall /* synchronous I/O */ 1138bb8aefdSYi Liu new_op->upcall.req.io.async_vfs_io = ORANGEFS_VFS_SYNC_IO; 1145db11c21SMike Marshall new_op->upcall.req.io.readahead_size = readahead_size; 1155db11c21SMike Marshall new_op->upcall.req.io.io_type = type; 1168bb8aefdSYi Liu new_op->upcall.req.io.refn = orangefs_inode->refn; 1175db11c21SMike Marshall 1185db11c21SMike Marshall populate_shared_memory: 1195db11c21SMike Marshall /* get a shared buffer index */ 1208bb8aefdSYi Liu ret = orangefs_bufmap_get(&bufmap, &buffer_index); 1215db11c21SMike Marshall if (ret < 0) { 1225db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1238bb8aefdSYi Liu "%s: orangefs_bufmap_get failure (%ld)\n", 1245db11c21SMike Marshall __func__, (long)ret); 1255db11c21SMike Marshall goto out; 1265db11c21SMike Marshall } 1275db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1285db11c21SMike Marshall "%s(%pU): GET op %p -> buffer_index %d\n", 1295db11c21SMike Marshall __func__, 1305db11c21SMike Marshall handle, 1315db11c21SMike Marshall new_op, 1325db11c21SMike Marshall buffer_index); 1335db11c21SMike Marshall 1345db11c21SMike Marshall new_op->uses_shared_memory = 1; 1355db11c21SMike Marshall new_op->upcall.req.io.buf_index = buffer_index; 1365db11c21SMike Marshall new_op->upcall.req.io.count = total_size; 1375db11c21SMike Marshall new_op->upcall.req.io.offset = *offset; 1385db11c21SMike Marshall 1395db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1403c2fcfcbSAl Viro "%s(%pU): offset: %llu total_size: %zd\n", 1415db11c21SMike Marshall __func__, 1425db11c21SMike Marshall handle, 1435db11c21SMike Marshall llu(*offset), 1445db11c21SMike Marshall total_size); 1455db11c21SMike Marshall /* 1465db11c21SMike Marshall * Stage 1: copy the buffers into client-core's address space 1475db11c21SMike Marshall * precopy_buffers only pertains to writes. 1485db11c21SMike Marshall */ 1498bb8aefdSYi Liu if (type == ORANGEFS_IO_WRITE) { 1505db11c21SMike Marshall ret = precopy_buffers(bufmap, 1515db11c21SMike Marshall buffer_index, 1523c2fcfcbSAl Viro iter, 1534d1c4404SMike Marshall total_size); 1545db11c21SMike Marshall if (ret < 0) 1555db11c21SMike Marshall goto out; 1565db11c21SMike Marshall } 1575db11c21SMike Marshall 1585db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1595db11c21SMike Marshall "%s(%pU): Calling post_io_request with tag (%llu)\n", 1605db11c21SMike Marshall __func__, 1615db11c21SMike Marshall handle, 1625db11c21SMike Marshall llu(new_op->tag)); 1635db11c21SMike Marshall 1645db11c21SMike Marshall /* Stage 2: Service the I/O operation */ 1655db11c21SMike Marshall ret = service_operation(new_op, 1668bb8aefdSYi Liu type == ORANGEFS_IO_WRITE ? 1675db11c21SMike Marshall "file_write" : 1685db11c21SMike Marshall "file_read", 1695db11c21SMike Marshall get_interruptible_flag(inode)); 1705db11c21SMike Marshall 1715db11c21SMike Marshall /* 1725db11c21SMike Marshall * If service_operation() returns -EAGAIN #and# the operation was 1738bb8aefdSYi Liu * purged from orangefs_request_list or htable_ops_in_progress, then 1745db11c21SMike Marshall * we know that the client was restarted, causing the shared memory 1755db11c21SMike Marshall * area to be wiped clean. To restart a write operation in this 1765db11c21SMike Marshall * case, we must re-copy the data from the user's iovec to a NEW 1775db11c21SMike Marshall * shared memory location. To restart a read operation, we must get 1785db11c21SMike Marshall * a new shared memory location. 1795db11c21SMike Marshall */ 1805db11c21SMike Marshall if (ret == -EAGAIN && op_state_purged(new_op)) { 1818bb8aefdSYi Liu orangefs_bufmap_put(bufmap, buffer_index); 1825db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1835db11c21SMike Marshall "%s:going to repopulate_shared_memory.\n", 1845db11c21SMike Marshall __func__); 1855db11c21SMike Marshall goto populate_shared_memory; 1865db11c21SMike Marshall } 1875db11c21SMike Marshall 1885db11c21SMike Marshall if (ret < 0) { 189*575e9461SMike Marshall handle_io_error(); 1905db11c21SMike Marshall /* 19154804949SMike Marshall * don't write an error to syslog on signaled operation 19254804949SMike Marshall * termination unless we've got debugging turned on, as 19354804949SMike Marshall * this can happen regularly (i.e. ctrl-c) 1945db11c21SMike Marshall */ 1955db11c21SMike Marshall if (ret == -EINTR) 1965db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 1975db11c21SMike Marshall "%s: returning error %ld\n", __func__, 1985db11c21SMike Marshall (long)ret); 1995db11c21SMike Marshall else 2005db11c21SMike Marshall gossip_err("%s: error in %s handle %pU, returning %zd\n", 2015db11c21SMike Marshall __func__, 2028bb8aefdSYi Liu type == ORANGEFS_IO_READ ? 2035db11c21SMike Marshall "read from" : "write to", 2045db11c21SMike Marshall handle, ret); 2055db11c21SMike Marshall goto out; 2065db11c21SMike Marshall } 2075db11c21SMike Marshall 2085db11c21SMike Marshall /* 2095db11c21SMike Marshall * Stage 3: Post copy buffers from client-core's address space 2105db11c21SMike Marshall * postcopy_buffers only pertains to reads. 2115db11c21SMike Marshall */ 2128bb8aefdSYi Liu if (type == ORANGEFS_IO_READ) { 2135db11c21SMike Marshall ret = postcopy_buffers(bufmap, 2145db11c21SMike Marshall buffer_index, 2153c2fcfcbSAl Viro iter, 2164d1c4404SMike Marshall new_op->downcall.resp.io.amt_complete); 2175db11c21SMike Marshall if (ret < 0) { 2185db11c21SMike Marshall /* 2195db11c21SMike Marshall * put error codes in downcall so that handle_io_error() 2205db11c21SMike Marshall * preserves it properly 2215db11c21SMike Marshall */ 2225db11c21SMike Marshall new_op->downcall.status = ret; 2235db11c21SMike Marshall handle_io_error(); 2245db11c21SMike Marshall goto out; 2255db11c21SMike Marshall } 2265db11c21SMike Marshall } 2275db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 2285db11c21SMike Marshall "%s(%pU): Amount written as returned by the sys-io call:%d\n", 2295db11c21SMike Marshall __func__, 2305db11c21SMike Marshall handle, 2315db11c21SMike Marshall (int)new_op->downcall.resp.io.amt_complete); 2325db11c21SMike Marshall 2335db11c21SMike Marshall ret = new_op->downcall.resp.io.amt_complete; 2345db11c21SMike Marshall 2355db11c21SMike Marshall /* 23654804949SMike Marshall * tell the device file owner waiting on I/O that this read has 23754804949SMike Marshall * completed and it can return now. in this exact case, on 23854804949SMike Marshall * wakeup the daemon will free the op, so we *cannot* touch it 23954804949SMike Marshall * after this. 2405db11c21SMike Marshall */ 2415db11c21SMike Marshall wake_up_daemon_for_return(new_op); 2425db11c21SMike Marshall new_op = NULL; 2435db11c21SMike Marshall 2445db11c21SMike Marshall out: 2455db11c21SMike Marshall if (buffer_index >= 0) { 2468bb8aefdSYi Liu orangefs_bufmap_put(bufmap, buffer_index); 2475db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 2485db11c21SMike Marshall "%s(%pU): PUT buffer_index %d\n", 2495db11c21SMike Marshall __func__, handle, buffer_index); 2505db11c21SMike Marshall buffer_index = -1; 2515db11c21SMike Marshall } 2525db11c21SMike Marshall if (new_op) { 2535db11c21SMike Marshall op_release(new_op); 2545db11c21SMike Marshall new_op = NULL; 2555db11c21SMike Marshall } 2565db11c21SMike Marshall return ret; 2575db11c21SMike Marshall } 2585db11c21SMike Marshall 2595db11c21SMike Marshall /* 2605db11c21SMike Marshall * Common entry point for read/write/readv/writev 2615db11c21SMike Marshall * This function will dispatch it to either the direct I/O 2625db11c21SMike Marshall * or buffered I/O path depending on the mount options and/or 2635db11c21SMike Marshall * augmented/extended metadata attached to the file. 2645db11c21SMike Marshall * Note: File extended attributes override any mount options. 2655db11c21SMike Marshall */ 2668bb8aefdSYi Liu static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, 2670071ed1eSAl Viro loff_t *offset, struct iov_iter *iter) 2685db11c21SMike Marshall { 2695db11c21SMike Marshall struct inode *inode = file->f_mapping->host; 2708bb8aefdSYi Liu struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 2718bb8aefdSYi Liu struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; 2720071ed1eSAl Viro size_t count = iov_iter_count(iter); 273dc4067f6SAl Viro ssize_t total_count = 0; 274dc4067f6SAl Viro ssize_t ret = -EINVAL; 2755db11c21SMike Marshall 2765db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 2775db11c21SMike Marshall "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", 2785db11c21SMike Marshall __func__, 2795db11c21SMike Marshall handle, 2805db11c21SMike Marshall (int)count); 2815db11c21SMike Marshall 2828bb8aefdSYi Liu if (type == ORANGEFS_IO_WRITE) { 2835db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 2845db11c21SMike Marshall "%s(%pU): proceeding with offset : %llu, " 2855db11c21SMike Marshall "size %d\n", 2865db11c21SMike Marshall __func__, 2875db11c21SMike Marshall handle, 2885db11c21SMike Marshall llu(*offset), 2895db11c21SMike Marshall (int)count); 2905db11c21SMike Marshall } 2915db11c21SMike Marshall 2925db11c21SMike Marshall if (count == 0) { 2935db11c21SMike Marshall ret = 0; 2945db11c21SMike Marshall goto out; 2955db11c21SMike Marshall } 2965db11c21SMike Marshall 2970071ed1eSAl Viro while (iov_iter_count(iter)) { 2980071ed1eSAl Viro size_t each_count = iov_iter_count(iter); 2995db11c21SMike Marshall size_t amt_complete; 3005db11c21SMike Marshall 3015db11c21SMike Marshall /* how much to transfer in this loop iteration */ 3028bb8aefdSYi Liu if (each_count > orangefs_bufmap_size_query()) 3038bb8aefdSYi Liu each_count = orangefs_bufmap_size_query(); 3045db11c21SMike Marshall 3055db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3065db11c21SMike Marshall "%s(%pU): size of each_count(%d)\n", 3075db11c21SMike Marshall __func__, 3085db11c21SMike Marshall handle, 3095db11c21SMike Marshall (int)each_count); 3105db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3115db11c21SMike Marshall "%s(%pU): BEFORE wait_for_io: offset is %d\n", 3125db11c21SMike Marshall __func__, 3135db11c21SMike Marshall handle, 3145db11c21SMike Marshall (int)*offset); 3155db11c21SMike Marshall 3160071ed1eSAl Viro ret = wait_for_direct_io(type, inode, offset, iter, 3173c2fcfcbSAl Viro each_count, 0); 3185db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3195db11c21SMike Marshall "%s(%pU): return from wait_for_io:%d\n", 3205db11c21SMike Marshall __func__, 3215db11c21SMike Marshall handle, 3225db11c21SMike Marshall (int)ret); 3235db11c21SMike Marshall 3245db11c21SMike Marshall if (ret < 0) 3255db11c21SMike Marshall goto out; 3265db11c21SMike Marshall 3275db11c21SMike Marshall *offset += ret; 3285db11c21SMike Marshall total_count += ret; 3295db11c21SMike Marshall amt_complete = ret; 3305db11c21SMike Marshall 3315db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3325db11c21SMike Marshall "%s(%pU): AFTER wait_for_io: offset is %d\n", 3335db11c21SMike Marshall __func__, 3345db11c21SMike Marshall handle, 3355db11c21SMike Marshall (int)*offset); 3365db11c21SMike Marshall 3375db11c21SMike Marshall /* 3385db11c21SMike Marshall * if we got a short I/O operations, 3395db11c21SMike Marshall * fall out and return what we got so far 3405db11c21SMike Marshall */ 3415db11c21SMike Marshall if (amt_complete < each_count) 3425db11c21SMike Marshall break; 3435db11c21SMike Marshall } /*end while */ 3445db11c21SMike Marshall 3455db11c21SMike Marshall if (total_count > 0) 3465db11c21SMike Marshall ret = total_count; 3475db11c21SMike Marshall out: 3485db11c21SMike Marshall if (ret > 0) { 3498bb8aefdSYi Liu if (type == ORANGEFS_IO_READ) { 3505db11c21SMike Marshall file_accessed(file); 3515db11c21SMike Marshall } else { 3528bb8aefdSYi Liu SetMtimeFlag(orangefs_inode); 3535db11c21SMike Marshall inode->i_mtime = CURRENT_TIME; 3545db11c21SMike Marshall mark_inode_dirty_sync(inode); 3555db11c21SMike Marshall } 3565db11c21SMike Marshall } 3575db11c21SMike Marshall 3585db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3595db11c21SMike Marshall "%s(%pU): Value(%d) returned.\n", 3605db11c21SMike Marshall __func__, 3615db11c21SMike Marshall handle, 3625db11c21SMike Marshall (int)ret); 3635db11c21SMike Marshall 3645db11c21SMike Marshall return ret; 3655db11c21SMike Marshall } 3665db11c21SMike Marshall 3675db11c21SMike Marshall /* 3685db11c21SMike Marshall * Read data from a specified offset in a file (referenced by inode). 3695db11c21SMike Marshall * Data may be placed either in a user or kernel buffer. 3705db11c21SMike Marshall */ 3718bb8aefdSYi Liu ssize_t orangefs_inode_read(struct inode *inode, 37274f68fceSAl Viro struct iov_iter *iter, 3735db11c21SMike Marshall loff_t *offset, 3745db11c21SMike Marshall loff_t readahead_size) 3755db11c21SMike Marshall { 3768bb8aefdSYi Liu struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 37774f68fceSAl Viro size_t count = iov_iter_count(iter); 3785db11c21SMike Marshall size_t bufmap_size; 3795db11c21SMike Marshall ssize_t ret = -EINVAL; 3805db11c21SMike Marshall 3818bb8aefdSYi Liu g_orangefs_stats.reads++; 3825db11c21SMike Marshall 3838bb8aefdSYi Liu bufmap_size = orangefs_bufmap_size_query(); 3845db11c21SMike Marshall if (count > bufmap_size) { 3855db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3865db11c21SMike Marshall "%s: count is too large (%zd/%zd)!\n", 3875db11c21SMike Marshall __func__, count, bufmap_size); 3885db11c21SMike Marshall return -EINVAL; 3895db11c21SMike Marshall } 3905db11c21SMike Marshall 3915db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 3925db11c21SMike Marshall "%s(%pU) %zd@%llu\n", 3935db11c21SMike Marshall __func__, 3948bb8aefdSYi Liu &orangefs_inode->refn.khandle, 3955db11c21SMike Marshall count, 3965db11c21SMike Marshall llu(*offset)); 3975db11c21SMike Marshall 3988bb8aefdSYi Liu ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, 3994d1c4404SMike Marshall count, readahead_size); 4005db11c21SMike Marshall if (ret > 0) 4015db11c21SMike Marshall *offset += ret; 4025db11c21SMike Marshall 4035db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 4045db11c21SMike Marshall "%s(%pU): Value(%zd) returned.\n", 4055db11c21SMike Marshall __func__, 4068bb8aefdSYi Liu &orangefs_inode->refn.khandle, 4075db11c21SMike Marshall ret); 4085db11c21SMike Marshall 4095db11c21SMike Marshall return ret; 4105db11c21SMike Marshall } 4115db11c21SMike Marshall 4128bb8aefdSYi Liu static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 4135db11c21SMike Marshall { 4145db11c21SMike Marshall struct file *file = iocb->ki_filp; 4155db11c21SMike Marshall loff_t pos = *(&iocb->ki_pos); 4165db11c21SMike Marshall ssize_t rc = 0; 4175db11c21SMike Marshall 4185db11c21SMike Marshall BUG_ON(iocb->private); 4195db11c21SMike Marshall 4208bb8aefdSYi Liu gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n"); 4215db11c21SMike Marshall 4228bb8aefdSYi Liu g_orangefs_stats.reads++; 4235db11c21SMike Marshall 4248bb8aefdSYi Liu rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter); 4255db11c21SMike Marshall iocb->ki_pos = pos; 4265db11c21SMike Marshall 4275db11c21SMike Marshall return rc; 4285db11c21SMike Marshall } 4295db11c21SMike Marshall 4308bb8aefdSYi Liu static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) 4315db11c21SMike Marshall { 4325db11c21SMike Marshall struct file *file = iocb->ki_filp; 4333f1b6947SMike Marshall loff_t pos; 4345db11c21SMike Marshall ssize_t rc; 4355db11c21SMike Marshall 4365db11c21SMike Marshall BUG_ON(iocb->private); 4375db11c21SMike Marshall 4388bb8aefdSYi Liu gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); 4395db11c21SMike Marshall 4405db11c21SMike Marshall mutex_lock(&file->f_mapping->host->i_mutex); 4415db11c21SMike Marshall 4425db11c21SMike Marshall /* Make sure generic_write_checks sees an up to date inode size. */ 4435db11c21SMike Marshall if (file->f_flags & O_APPEND) { 4448bb8aefdSYi Liu rc = orangefs_inode_getattr(file->f_mapping->host, 4458bb8aefdSYi Liu ORANGEFS_ATTR_SYS_SIZE); 4465db11c21SMike Marshall if (rc) { 4478bb8aefdSYi Liu gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n", 4485db11c21SMike Marshall __func__, rc); 4495db11c21SMike Marshall goto out; 4505db11c21SMike Marshall } 4515db11c21SMike Marshall } 4525db11c21SMike Marshall 4535db11c21SMike Marshall if (file->f_pos > i_size_read(file->f_mapping->host)) 4548bb8aefdSYi Liu orangefs_i_size_write(file->f_mapping->host, file->f_pos); 4555db11c21SMike Marshall 4565db11c21SMike Marshall rc = generic_write_checks(iocb, iter); 4575db11c21SMike Marshall 4585db11c21SMike Marshall if (rc <= 0) { 4595db11c21SMike Marshall gossip_err("%s: generic_write_checks failed, rc:%zd:.\n", 4605db11c21SMike Marshall __func__, rc); 4615db11c21SMike Marshall goto out; 4625db11c21SMike Marshall } 4635db11c21SMike Marshall 4643f1b6947SMike Marshall /* 4653f1b6947SMike Marshall * if we are appending, generic_write_checks would have updated 4663f1b6947SMike Marshall * pos to the end of the file, so we will wait till now to set 4673f1b6947SMike Marshall * pos... 4683f1b6947SMike Marshall */ 4693f1b6947SMike Marshall pos = *(&iocb->ki_pos); 4703f1b6947SMike Marshall 4718bb8aefdSYi Liu rc = do_readv_writev(ORANGEFS_IO_WRITE, 4725db11c21SMike Marshall file, 4735db11c21SMike Marshall &pos, 4740071ed1eSAl Viro iter); 4755db11c21SMike Marshall if (rc < 0) { 4765db11c21SMike Marshall gossip_err("%s: do_readv_writev failed, rc:%zd:.\n", 4775db11c21SMike Marshall __func__, rc); 4785db11c21SMike Marshall goto out; 4795db11c21SMike Marshall } 4805db11c21SMike Marshall 4815db11c21SMike Marshall iocb->ki_pos = pos; 4828bb8aefdSYi Liu g_orangefs_stats.writes++; 4835db11c21SMike Marshall 4845db11c21SMike Marshall out: 4855db11c21SMike Marshall 4865db11c21SMike Marshall mutex_unlock(&file->f_mapping->host->i_mutex); 4875db11c21SMike Marshall return rc; 4885db11c21SMike Marshall } 4895db11c21SMike Marshall 4905db11c21SMike Marshall /* 4915db11c21SMike Marshall * Perform a miscellaneous operation on a file. 4925db11c21SMike Marshall */ 4938bb8aefdSYi Liu static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 4945db11c21SMike Marshall { 4955db11c21SMike Marshall int ret = -ENOTTY; 4965db11c21SMike Marshall __u64 val = 0; 4975db11c21SMike Marshall unsigned long uval; 4985db11c21SMike Marshall 4995db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 5008bb8aefdSYi Liu "orangefs_ioctl: called with cmd %d\n", 5015db11c21SMike Marshall cmd); 5025db11c21SMike Marshall 5035db11c21SMike Marshall /* 5045db11c21SMike Marshall * we understand some general ioctls on files, such as the immutable 5055db11c21SMike Marshall * and append flags 5065db11c21SMike Marshall */ 5075db11c21SMike Marshall if (cmd == FS_IOC_GETFLAGS) { 5085db11c21SMike Marshall val = 0; 5098bb8aefdSYi Liu ret = orangefs_inode_getxattr(file_inode(file), 5108bb8aefdSYi Liu ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, 5115db11c21SMike Marshall "user.pvfs2.meta_hint", 512555fa0faSAl Viro &val, sizeof(val)); 5135db11c21SMike Marshall if (ret < 0 && ret != -ENODATA) 5145db11c21SMike Marshall return ret; 5155db11c21SMike Marshall else if (ret == -ENODATA) 5165db11c21SMike Marshall val = 0; 5175db11c21SMike Marshall uval = val; 5185db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 5198bb8aefdSYi Liu "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", 5205db11c21SMike Marshall (unsigned long long)uval); 5215db11c21SMike Marshall return put_user(uval, (int __user *)arg); 5225db11c21SMike Marshall } else if (cmd == FS_IOC_SETFLAGS) { 5235db11c21SMike Marshall ret = 0; 5245db11c21SMike Marshall if (get_user(uval, (int __user *)arg)) 5255db11c21SMike Marshall return -EFAULT; 5265db11c21SMike Marshall /* 5278bb8aefdSYi Liu * ORANGEFS_MIRROR_FL is set internally when the mirroring mode 5285db11c21SMike Marshall * is turned on for a file. The user is not allowed to turn 5295db11c21SMike Marshall * on this bit, but the bit is present if the user first gets 5305db11c21SMike Marshall * the flags and then updates the flags with some new 5315db11c21SMike Marshall * settings. So, we ignore it in the following edit. bligon. 5325db11c21SMike Marshall */ 5338bb8aefdSYi Liu if ((uval & ~ORANGEFS_MIRROR_FL) & 5345db11c21SMike Marshall (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) { 5358bb8aefdSYi Liu gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); 5365db11c21SMike Marshall return -EINVAL; 5375db11c21SMike Marshall } 5385db11c21SMike Marshall val = uval; 5395db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 5408bb8aefdSYi Liu "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", 5415db11c21SMike Marshall (unsigned long long)val); 5428bb8aefdSYi Liu ret = orangefs_inode_setxattr(file_inode(file), 5438bb8aefdSYi Liu ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, 5445db11c21SMike Marshall "user.pvfs2.meta_hint", 545555fa0faSAl Viro &val, sizeof(val), 0); 5465db11c21SMike Marshall } 5475db11c21SMike Marshall 5485db11c21SMike Marshall return ret; 5495db11c21SMike Marshall } 5505db11c21SMike Marshall 5515db11c21SMike Marshall /* 5525db11c21SMike Marshall * Memory map a region of a file. 5535db11c21SMike Marshall */ 5548bb8aefdSYi Liu static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) 5555db11c21SMike Marshall { 5565db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 5578bb8aefdSYi Liu "orangefs_file_mmap: called on %s\n", 5585db11c21SMike Marshall (file ? 5595db11c21SMike Marshall (char *)file->f_path.dentry->d_name.name : 5605db11c21SMike Marshall (char *)"Unknown")); 5615db11c21SMike Marshall 5625db11c21SMike Marshall /* set the sequential readahead hint */ 5635db11c21SMike Marshall vma->vm_flags |= VM_SEQ_READ; 5645db11c21SMike Marshall vma->vm_flags &= ~VM_RAND_READ; 56535390803SMartin Brandenburg 56635390803SMartin Brandenburg /* Use readonly mmap since we cannot support writable maps. */ 56735390803SMartin Brandenburg return generic_file_readonly_mmap(file, vma); 5685db11c21SMike Marshall } 5695db11c21SMike Marshall 5705db11c21SMike Marshall #define mapping_nrpages(idata) ((idata)->nrpages) 5715db11c21SMike Marshall 5725db11c21SMike Marshall /* 5735db11c21SMike Marshall * Called to notify the module that there are no more references to 5745db11c21SMike Marshall * this file (i.e. no processes have it open). 5755db11c21SMike Marshall * 5765db11c21SMike Marshall * \note Not called when each file is closed. 5775db11c21SMike Marshall */ 5788bb8aefdSYi Liu static int orangefs_file_release(struct inode *inode, struct file *file) 5795db11c21SMike Marshall { 5805db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 5818bb8aefdSYi Liu "orangefs_file_release: called on %s\n", 5825db11c21SMike Marshall file->f_path.dentry->d_name.name); 5835db11c21SMike Marshall 5848bb8aefdSYi Liu orangefs_flush_inode(inode); 5855db11c21SMike Marshall 5865db11c21SMike Marshall /* 58754804949SMike Marshall * remove all associated inode pages from the page cache and mmap 58854804949SMike Marshall * readahead cache (if any); this forces an expensive refresh of 58954804949SMike Marshall * data for the next caller of mmap (or 'get_block' accesses) 5905db11c21SMike Marshall */ 5915db11c21SMike Marshall if (file->f_path.dentry->d_inode && 5925db11c21SMike Marshall file->f_path.dentry->d_inode->i_mapping && 5935db11c21SMike Marshall mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) 5945db11c21SMike Marshall truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping, 5955db11c21SMike Marshall 0); 5965db11c21SMike Marshall return 0; 5975db11c21SMike Marshall } 5985db11c21SMike Marshall 5995db11c21SMike Marshall /* 6005db11c21SMike Marshall * Push all data for a specific file onto permanent storage. 6015db11c21SMike Marshall */ 6028bb8aefdSYi Liu static int orangefs_fsync(struct file *file, 60384d02150SMike Marshall loff_t start, 60484d02150SMike Marshall loff_t end, 60584d02150SMike Marshall int datasync) 6065db11c21SMike Marshall { 6075db11c21SMike Marshall int ret = -EINVAL; 6088bb8aefdSYi Liu struct orangefs_inode_s *orangefs_inode = 6098bb8aefdSYi Liu ORANGEFS_I(file->f_path.dentry->d_inode); 6108bb8aefdSYi Liu struct orangefs_kernel_op_s *new_op = NULL; 6115db11c21SMike Marshall 6125db11c21SMike Marshall /* required call */ 6135db11c21SMike Marshall filemap_write_and_wait_range(file->f_mapping, start, end); 6145db11c21SMike Marshall 6158bb8aefdSYi Liu new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); 6165db11c21SMike Marshall if (!new_op) 6175db11c21SMike Marshall return -ENOMEM; 6188bb8aefdSYi Liu new_op->upcall.req.fsync.refn = orangefs_inode->refn; 6195db11c21SMike Marshall 6205db11c21SMike Marshall ret = service_operation(new_op, 6218bb8aefdSYi Liu "orangefs_fsync", 6225db11c21SMike Marshall get_interruptible_flag(file->f_path.dentry->d_inode)); 6235db11c21SMike Marshall 6245db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 6258bb8aefdSYi Liu "orangefs_fsync got return value of %d\n", 6265db11c21SMike Marshall ret); 6275db11c21SMike Marshall 6285db11c21SMike Marshall op_release(new_op); 6295db11c21SMike Marshall 6308bb8aefdSYi Liu orangefs_flush_inode(file->f_path.dentry->d_inode); 6315db11c21SMike Marshall return ret; 6325db11c21SMike Marshall } 6335db11c21SMike Marshall 6345db11c21SMike Marshall /* 6355db11c21SMike Marshall * Change the file pointer position for an instance of an open file. 6365db11c21SMike Marshall * 6375db11c21SMike Marshall * \note If .llseek is overriden, we must acquire lock as described in 6385db11c21SMike Marshall * Documentation/filesystems/Locking. 6395db11c21SMike Marshall * 6405db11c21SMike Marshall * Future upgrade could support SEEK_DATA and SEEK_HOLE but would 6415db11c21SMike Marshall * require much changes to the FS 6425db11c21SMike Marshall */ 6438bb8aefdSYi Liu static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) 6445db11c21SMike Marshall { 6455db11c21SMike Marshall int ret = -EINVAL; 6465db11c21SMike Marshall struct inode *inode = file->f_path.dentry->d_inode; 6475db11c21SMike Marshall 6485db11c21SMike Marshall if (!inode) { 6498bb8aefdSYi Liu gossip_err("orangefs_file_llseek: invalid inode (NULL)\n"); 6505db11c21SMike Marshall return ret; 6515db11c21SMike Marshall } 6525db11c21SMike Marshall 6538bb8aefdSYi Liu if (origin == ORANGEFS_SEEK_END) { 6545db11c21SMike Marshall /* 6555db11c21SMike Marshall * revalidate the inode's file size. 6565db11c21SMike Marshall * NOTE: We are only interested in file size here, 6575db11c21SMike Marshall * so we set mask accordingly. 6585db11c21SMike Marshall */ 6598bb8aefdSYi Liu ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE); 6605db11c21SMike Marshall if (ret) { 6615db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 6625db11c21SMike Marshall "%s:%s:%d calling make bad inode\n", 6635db11c21SMike Marshall __FILE__, 6645db11c21SMike Marshall __func__, 6655db11c21SMike Marshall __LINE__); 6668bb8aefdSYi Liu orangefs_make_bad_inode(inode); 6675db11c21SMike Marshall return ret; 6685db11c21SMike Marshall } 6695db11c21SMike Marshall } 6705db11c21SMike Marshall 6715db11c21SMike Marshall gossip_debug(GOSSIP_FILE_DEBUG, 6728bb8aefdSYi Liu "orangefs_file_llseek: offset is %ld | origin is %d" 67354804949SMike Marshall " | inode size is %lu\n", 6745db11c21SMike Marshall (long)offset, 6755db11c21SMike Marshall origin, 6765db11c21SMike Marshall (unsigned long)file->f_path.dentry->d_inode->i_size); 6775db11c21SMike Marshall 6785db11c21SMike Marshall return generic_file_llseek(file, offset, origin); 6795db11c21SMike Marshall } 6805db11c21SMike Marshall 6815db11c21SMike Marshall /* 6825db11c21SMike Marshall * Support local locks (locks that only this kernel knows about) 6835db11c21SMike Marshall * if Orangefs was mounted -o local_lock. 6845db11c21SMike Marshall */ 6858bb8aefdSYi Liu static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) 6865db11c21SMike Marshall { 687f957ae2dSMike Marshall int rc = -EINVAL; 6885db11c21SMike Marshall 6898bb8aefdSYi Liu if (ORANGEFS_SB(filp->f_inode->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) { 6905db11c21SMike Marshall if (cmd == F_GETLK) { 6915db11c21SMike Marshall rc = 0; 6925db11c21SMike Marshall posix_test_lock(filp, fl); 6935db11c21SMike Marshall } else { 6945db11c21SMike Marshall rc = posix_lock_file(filp, fl, NULL); 6955db11c21SMike Marshall } 6965db11c21SMike Marshall } 6975db11c21SMike Marshall 6985db11c21SMike Marshall return rc; 6995db11c21SMike Marshall } 7005db11c21SMike Marshall 7018bb8aefdSYi Liu /** ORANGEFS implementation of VFS file operations */ 7028bb8aefdSYi Liu const struct file_operations orangefs_file_operations = { 7038bb8aefdSYi Liu .llseek = orangefs_file_llseek, 7048bb8aefdSYi Liu .read_iter = orangefs_file_read_iter, 7058bb8aefdSYi Liu .write_iter = orangefs_file_write_iter, 7068bb8aefdSYi Liu .lock = orangefs_lock, 7078bb8aefdSYi Liu .unlocked_ioctl = orangefs_ioctl, 7088bb8aefdSYi Liu .mmap = orangefs_file_mmap, 7095db11c21SMike Marshall .open = generic_file_open, 7108bb8aefdSYi Liu .release = orangefs_file_release, 7118bb8aefdSYi Liu .fsync = orangefs_fsync, 7125db11c21SMike Marshall }; 713