1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/file.h> 4 #include <linux/namei.h> 5 #include <linux/random.h> 6 7 #include "super.h" 8 #include "mds_client.h" 9 #include <linux/ceph/pagelist.h> 10 11 static u64 lock_secret; 12 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 13 struct ceph_mds_request *req); 14 15 static inline u64 secure_addr(void *addr) 16 { 17 u64 v = lock_secret ^ (u64)(unsigned long)addr; 18 /* 19 * Set the most significant bit, so that MDS knows the 'owner' 20 * is sufficient to identify the owner of lock. (old code uses 21 * both 'owner' and 'pid') 22 */ 23 v |= (1ULL << 63); 24 return v; 25 } 26 27 void __init ceph_flock_init(void) 28 { 29 get_random_bytes(&lock_secret, sizeof(lock_secret)); 30 } 31 32 /** 33 * Implement fcntl and flock locking functions. 34 */ 35 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 36 int cmd, u8 wait, struct file_lock *fl) 37 { 38 struct inode *inode = file_inode(file); 39 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 40 struct ceph_mds_request *req; 41 int err; 42 u64 length = 0; 43 u64 owner; 44 45 if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) 46 wait = 0; 47 48 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 49 if (IS_ERR(req)) 50 return PTR_ERR(req); 51 req->r_inode = inode; 52 ihold(inode); 53 req->r_num_caps = 1; 54 55 /* mds requires start and length rather than start and end */ 56 if (LLONG_MAX == fl->fl_end) 57 length = 0; 58 else 59 length = fl->fl_end - fl->fl_start + 1; 60 61 owner = secure_addr(fl->fl_owner); 62 63 dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " 64 "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, 65 (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, 66 wait, fl->fl_type); 67 68 req->r_args.filelock_change.rule = lock_type; 69 req->r_args.filelock_change.type = cmd; 70 req->r_args.filelock_change.owner = cpu_to_le64(owner); 71 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 72 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); 73 req->r_args.filelock_change.length = cpu_to_le64(length); 74 req->r_args.filelock_change.wait = wait; 75 76 if (wait) 77 req->r_wait_for_completion = ceph_lock_wait_for_completion; 78 79 err = ceph_mdsc_do_request(mdsc, inode, req); 80 81 if (operation == CEPH_MDS_OP_GETFILELOCK) { 82 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 83 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 84 fl->fl_type = F_RDLCK; 85 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) 86 fl->fl_type = F_WRLCK; 87 else 88 fl->fl_type = F_UNLCK; 89 90 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); 91 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + 92 le64_to_cpu(req->r_reply_info.filelock_reply->length); 93 if (length >= 1) 94 fl->fl_end = length -1; 95 else 96 fl->fl_end = 0; 97 98 } 99 ceph_mdsc_put_request(req); 100 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 101 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, 102 (int)operation, (u64)fl->fl_pid, fl->fl_start, 103 length, wait, fl->fl_type, err); 104 return err; 105 } 106 107 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 108 struct ceph_mds_request *req) 109 { 110 struct ceph_mds_request *intr_req; 111 struct inode *inode = req->r_inode; 112 int err, lock_type; 113 114 BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK); 115 if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL) 116 lock_type = CEPH_LOCK_FCNTL_INTR; 117 else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK) 118 lock_type = CEPH_LOCK_FLOCK_INTR; 119 else 120 BUG_ON(1); 121 BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK); 122 123 err = wait_for_completion_interruptible(&req->r_completion); 124 if (!err) 125 return 0; 126 127 dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", 128 req->r_tid); 129 130 intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK, 131 USE_AUTH_MDS); 132 if (IS_ERR(intr_req)) 133 return PTR_ERR(intr_req); 134 135 intr_req->r_inode = inode; 136 ihold(inode); 137 intr_req->r_num_caps = 1; 138 139 intr_req->r_args.filelock_change = req->r_args.filelock_change; 140 intr_req->r_args.filelock_change.rule = lock_type; 141 intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK; 142 143 err = ceph_mdsc_do_request(mdsc, inode, intr_req); 144 ceph_mdsc_put_request(intr_req); 145 146 if (err && err != -ERESTARTSYS) 147 return err; 148 149 wait_for_completion(&req->r_completion); 150 return 0; 151 } 152 153 /** 154 * Attempt to set an fcntl lock. 155 * For now, this just goes away to the server. Later it may be more awesome. 156 */ 157 int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 158 { 159 u8 lock_cmd; 160 int err; 161 u8 wait = 0; 162 u16 op = CEPH_MDS_OP_SETFILELOCK; 163 164 if (!(fl->fl_flags & FL_POSIX)) 165 return -ENOLCK; 166 /* No mandatory locks */ 167 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) 168 return -ENOLCK; 169 170 dout("ceph_lock, fl_owner: %p", fl->fl_owner); 171 172 /* set wait bit as appropriate, then make command as Ceph expects it*/ 173 if (IS_GETLK(cmd)) 174 op = CEPH_MDS_OP_GETFILELOCK; 175 else if (IS_SETLKW(cmd)) 176 wait = 1; 177 178 if (F_RDLCK == fl->fl_type) 179 lock_cmd = CEPH_LOCK_SHARED; 180 else if (F_WRLCK == fl->fl_type) 181 lock_cmd = CEPH_LOCK_EXCL; 182 else 183 lock_cmd = CEPH_LOCK_UNLOCK; 184 185 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); 186 if (!err) { 187 if (op != CEPH_MDS_OP_GETFILELOCK) { 188 dout("mds locked, locking locally"); 189 err = posix_lock_file(file, fl, NULL); 190 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 191 /* undo! This should only happen if 192 * the kernel detects local 193 * deadlock. */ 194 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 195 CEPH_LOCK_UNLOCK, 0, fl); 196 dout("got %d on posix_lock_file, undid lock", 197 err); 198 } 199 } 200 } 201 return err; 202 } 203 204 int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 205 { 206 u8 lock_cmd; 207 int err; 208 u8 wait = 0; 209 210 if (!(fl->fl_flags & FL_FLOCK)) 211 return -ENOLCK; 212 /* No mandatory locks */ 213 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) 214 return -ENOLCK; 215 216 dout("ceph_flock, fl_file: %p", fl->fl_file); 217 218 if (IS_SETLKW(cmd)) 219 wait = 1; 220 221 if (F_RDLCK == fl->fl_type) 222 lock_cmd = CEPH_LOCK_SHARED; 223 else if (F_WRLCK == fl->fl_type) 224 lock_cmd = CEPH_LOCK_EXCL; 225 else 226 lock_cmd = CEPH_LOCK_UNLOCK; 227 228 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 229 file, lock_cmd, wait, fl); 230 if (!err) { 231 err = flock_lock_file_wait(file, fl); 232 if (err) { 233 ceph_lock_message(CEPH_LOCK_FLOCK, 234 CEPH_MDS_OP_SETFILELOCK, 235 file, CEPH_LOCK_UNLOCK, 0, fl); 236 dout("got %d on flock_lock_file_wait, undid lock", err); 237 } 238 } 239 return err; 240 } 241 242 /** 243 * Must be called with lock_flocks() already held. Fills in the passed 244 * counter variables, so you can prepare pagelist metadata before calling 245 * ceph_encode_locks. 246 */ 247 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 248 { 249 struct file_lock *lock; 250 251 *fcntl_count = 0; 252 *flock_count = 0; 253 254 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 255 if (lock->fl_flags & FL_POSIX) 256 ++(*fcntl_count); 257 else if (lock->fl_flags & FL_FLOCK) 258 ++(*flock_count); 259 } 260 dout("counted %d flock locks and %d fcntl locks", 261 *flock_count, *fcntl_count); 262 } 263 264 /** 265 * Encode the flock and fcntl locks for the given inode into the ceph_filelock 266 * array. Must be called with inode->i_lock already held. 267 * If we encounter more of a specific lock type than expected, return -ENOSPC. 268 */ 269 int ceph_encode_locks_to_buffer(struct inode *inode, 270 struct ceph_filelock *flocks, 271 int num_fcntl_locks, int num_flock_locks) 272 { 273 struct file_lock *lock; 274 int err = 0; 275 int seen_fcntl = 0; 276 int seen_flock = 0; 277 int l = 0; 278 279 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 280 num_fcntl_locks); 281 282 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 283 if (lock->fl_flags & FL_POSIX) { 284 ++seen_fcntl; 285 if (seen_fcntl > num_fcntl_locks) { 286 err = -ENOSPC; 287 goto fail; 288 } 289 err = lock_to_ceph_filelock(lock, &flocks[l]); 290 if (err) 291 goto fail; 292 ++l; 293 } 294 } 295 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 296 if (lock->fl_flags & FL_FLOCK) { 297 ++seen_flock; 298 if (seen_flock > num_flock_locks) { 299 err = -ENOSPC; 300 goto fail; 301 } 302 err = lock_to_ceph_filelock(lock, &flocks[l]); 303 if (err) 304 goto fail; 305 ++l; 306 } 307 } 308 fail: 309 return err; 310 } 311 312 /** 313 * Copy the encoded flock and fcntl locks into the pagelist. 314 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 315 * sequential flock locks. 316 * Returns zero on success. 317 */ 318 int ceph_locks_to_pagelist(struct ceph_filelock *flocks, 319 struct ceph_pagelist *pagelist, 320 int num_fcntl_locks, int num_flock_locks) 321 { 322 int err = 0; 323 __le32 nlocks; 324 325 nlocks = cpu_to_le32(num_fcntl_locks); 326 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 327 if (err) 328 goto out_fail; 329 330 err = ceph_pagelist_append(pagelist, flocks, 331 num_fcntl_locks * sizeof(*flocks)); 332 if (err) 333 goto out_fail; 334 335 nlocks = cpu_to_le32(num_flock_locks); 336 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 337 if (err) 338 goto out_fail; 339 340 err = ceph_pagelist_append(pagelist, 341 &flocks[num_fcntl_locks], 342 num_flock_locks * sizeof(*flocks)); 343 out_fail: 344 return err; 345 } 346 347 /* 348 * Given a pointer to a lock, convert it to a ceph filelock 349 */ 350 int lock_to_ceph_filelock(struct file_lock *lock, 351 struct ceph_filelock *cephlock) 352 { 353 int err = 0; 354 cephlock->start = cpu_to_le64(lock->fl_start); 355 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 356 cephlock->client = cpu_to_le64(0); 357 cephlock->pid = cpu_to_le64((u64)lock->fl_pid); 358 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); 359 360 switch (lock->fl_type) { 361 case F_RDLCK: 362 cephlock->type = CEPH_LOCK_SHARED; 363 break; 364 case F_WRLCK: 365 cephlock->type = CEPH_LOCK_EXCL; 366 break; 367 case F_UNLCK: 368 cephlock->type = CEPH_LOCK_UNLOCK; 369 break; 370 default: 371 dout("Have unknown lock type %d", lock->fl_type); 372 err = -EINVAL; 373 } 374 375 return err; 376 } 377