1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/file.h> 4 #include <linux/namei.h> 5 #include <linux/random.h> 6 7 #include "super.h" 8 #include "mds_client.h" 9 #include <linux/ceph/pagelist.h> 10 11 static u64 lock_secret; 12 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 13 struct ceph_mds_request *req); 14 15 static inline u64 secure_addr(void *addr) 16 { 17 u64 v = lock_secret ^ (u64)(unsigned long)addr; 18 /* 19 * Set the most significant bit, so that MDS knows the 'owner' 20 * is sufficient to identify the owner of lock. (old code uses 21 * both 'owner' and 'pid') 22 */ 23 v |= (1ULL << 63); 24 return v; 25 } 26 27 void __init ceph_flock_init(void) 28 { 29 get_random_bytes(&lock_secret, sizeof(lock_secret)); 30 } 31 32 /** 33 * Implement fcntl and flock locking functions. 34 */ 35 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 36 int cmd, u8 wait, struct file_lock *fl) 37 { 38 struct inode *inode = file_inode(file); 39 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 40 struct ceph_mds_request *req; 41 int err; 42 u64 length = 0; 43 u64 owner; 44 45 if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) 46 wait = 0; 47 48 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 49 if (IS_ERR(req)) 50 return PTR_ERR(req); 51 req->r_inode = inode; 52 ihold(inode); 53 req->r_num_caps = 1; 54 55 /* mds requires start and length rather than start and end */ 56 if (LLONG_MAX == fl->fl_end) 57 length = 0; 58 else 59 length = fl->fl_end - fl->fl_start + 1; 60 61 owner = secure_addr(fl->fl_owner); 62 63 dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " 64 "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, 65 (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, 66 wait, fl->fl_type); 67 68 req->r_args.filelock_change.rule = lock_type; 69 req->r_args.filelock_change.type = cmd; 70 req->r_args.filelock_change.owner = cpu_to_le64(owner); 71 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 72 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); 73 req->r_args.filelock_change.length = cpu_to_le64(length); 74 req->r_args.filelock_change.wait = wait; 75 76 if (wait) 77 req->r_wait_for_completion = ceph_lock_wait_for_completion; 78 79 err = ceph_mdsc_do_request(mdsc, inode, req); 80 81 if (operation == CEPH_MDS_OP_GETFILELOCK) { 82 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 83 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 84 fl->fl_type = F_RDLCK; 85 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) 86 fl->fl_type = F_WRLCK; 87 else 88 fl->fl_type = F_UNLCK; 89 90 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); 91 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + 92 le64_to_cpu(req->r_reply_info.filelock_reply->length); 93 if (length >= 1) 94 fl->fl_end = length -1; 95 else 96 fl->fl_end = 0; 97 98 } 99 ceph_mdsc_put_request(req); 100 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 101 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, 102 (int)operation, (u64)fl->fl_pid, fl->fl_start, 103 length, wait, fl->fl_type, err); 104 return err; 105 } 106 107 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 108 struct ceph_mds_request *req) 109 { 110 struct ceph_mds_request *intr_req; 111 struct inode *inode = req->r_inode; 112 int err, lock_type; 113 114 BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK); 115 if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL) 116 lock_type = CEPH_LOCK_FCNTL_INTR; 117 else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK) 118 lock_type = CEPH_LOCK_FLOCK_INTR; 119 else 120 BUG_ON(1); 121 BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK); 122 123 err = wait_for_completion_interruptible(&req->r_completion); 124 if (!err) 125 return 0; 126 127 dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", 128 req->r_tid); 129 130 mutex_lock(&mdsc->mutex); 131 if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { 132 err = 0; 133 } else { 134 /* 135 * ensure we aren't running concurrently with 136 * ceph_fill_trace or ceph_readdir_prepopulate, which 137 * rely on locks (dir mutex) held by our caller. 138 */ 139 mutex_lock(&req->r_fill_mutex); 140 req->r_err = err; 141 set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); 142 mutex_unlock(&req->r_fill_mutex); 143 144 if (!req->r_session) { 145 // haven't sent the request 146 err = 0; 147 } 148 } 149 mutex_unlock(&mdsc->mutex); 150 if (!err) 151 return 0; 152 153 intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK, 154 USE_AUTH_MDS); 155 if (IS_ERR(intr_req)) 156 return PTR_ERR(intr_req); 157 158 intr_req->r_inode = inode; 159 ihold(inode); 160 intr_req->r_num_caps = 1; 161 162 intr_req->r_args.filelock_change = req->r_args.filelock_change; 163 intr_req->r_args.filelock_change.rule = lock_type; 164 intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK; 165 166 err = ceph_mdsc_do_request(mdsc, inode, intr_req); 167 ceph_mdsc_put_request(intr_req); 168 169 if (err && err != -ERESTARTSYS) 170 return err; 171 172 wait_for_completion_killable(&req->r_safe_completion); 173 return 0; 174 } 175 176 /** 177 * Attempt to set an fcntl lock. 178 * For now, this just goes away to the server. Later it may be more awesome. 179 */ 180 int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 181 { 182 u8 lock_cmd; 183 int err; 184 u8 wait = 0; 185 u16 op = CEPH_MDS_OP_SETFILELOCK; 186 187 if (!(fl->fl_flags & FL_POSIX)) 188 return -ENOLCK; 189 /* No mandatory locks */ 190 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) 191 return -ENOLCK; 192 193 dout("ceph_lock, fl_owner: %p", fl->fl_owner); 194 195 /* set wait bit as appropriate, then make command as Ceph expects it*/ 196 if (IS_GETLK(cmd)) 197 op = CEPH_MDS_OP_GETFILELOCK; 198 else if (IS_SETLKW(cmd)) 199 wait = 1; 200 201 if (F_RDLCK == fl->fl_type) 202 lock_cmd = CEPH_LOCK_SHARED; 203 else if (F_WRLCK == fl->fl_type) 204 lock_cmd = CEPH_LOCK_EXCL; 205 else 206 lock_cmd = CEPH_LOCK_UNLOCK; 207 208 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); 209 if (!err) { 210 if (op != CEPH_MDS_OP_GETFILELOCK) { 211 dout("mds locked, locking locally"); 212 err = posix_lock_file(file, fl, NULL); 213 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 214 /* undo! This should only happen if 215 * the kernel detects local 216 * deadlock. */ 217 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 218 CEPH_LOCK_UNLOCK, 0, fl); 219 dout("got %d on posix_lock_file, undid lock", 220 err); 221 } 222 } 223 } 224 return err; 225 } 226 227 int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 228 { 229 u8 lock_cmd; 230 int err; 231 u8 wait = 0; 232 233 if (!(fl->fl_flags & FL_FLOCK)) 234 return -ENOLCK; 235 /* No mandatory locks */ 236 if (fl->fl_type & LOCK_MAND) 237 return -EOPNOTSUPP; 238 239 dout("ceph_flock, fl_file: %p", fl->fl_file); 240 241 if (IS_SETLKW(cmd)) 242 wait = 1; 243 244 if (F_RDLCK == fl->fl_type) 245 lock_cmd = CEPH_LOCK_SHARED; 246 else if (F_WRLCK == fl->fl_type) 247 lock_cmd = CEPH_LOCK_EXCL; 248 else 249 lock_cmd = CEPH_LOCK_UNLOCK; 250 251 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 252 file, lock_cmd, wait, fl); 253 if (!err) { 254 err = locks_lock_file_wait(file, fl); 255 if (err) { 256 ceph_lock_message(CEPH_LOCK_FLOCK, 257 CEPH_MDS_OP_SETFILELOCK, 258 file, CEPH_LOCK_UNLOCK, 0, fl); 259 dout("got %d on locks_lock_file_wait, undid lock", err); 260 } 261 } 262 return err; 263 } 264 265 /* 266 * Fills in the passed counter variables, so you can prepare pagelist metadata 267 * before calling ceph_encode_locks. 268 */ 269 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 270 { 271 struct file_lock *lock; 272 struct file_lock_context *ctx; 273 274 *fcntl_count = 0; 275 *flock_count = 0; 276 277 ctx = inode->i_flctx; 278 if (ctx) { 279 spin_lock(&ctx->flc_lock); 280 list_for_each_entry(lock, &ctx->flc_posix, fl_list) 281 ++(*fcntl_count); 282 list_for_each_entry(lock, &ctx->flc_flock, fl_list) 283 ++(*flock_count); 284 spin_unlock(&ctx->flc_lock); 285 } 286 dout("counted %d flock locks and %d fcntl locks", 287 *flock_count, *fcntl_count); 288 } 289 290 /** 291 * Encode the flock and fcntl locks for the given inode into the ceph_filelock 292 * array. Must be called with inode->i_lock already held. 293 * If we encounter more of a specific lock type than expected, return -ENOSPC. 294 */ 295 int ceph_encode_locks_to_buffer(struct inode *inode, 296 struct ceph_filelock *flocks, 297 int num_fcntl_locks, int num_flock_locks) 298 { 299 struct file_lock *lock; 300 struct file_lock_context *ctx = inode->i_flctx; 301 int err = 0; 302 int seen_fcntl = 0; 303 int seen_flock = 0; 304 int l = 0; 305 306 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 307 num_fcntl_locks); 308 309 if (!ctx) 310 return 0; 311 312 spin_lock(&ctx->flc_lock); 313 list_for_each_entry(lock, &ctx->flc_posix, fl_list) { 314 ++seen_fcntl; 315 if (seen_fcntl > num_fcntl_locks) { 316 err = -ENOSPC; 317 goto fail; 318 } 319 err = lock_to_ceph_filelock(lock, &flocks[l]); 320 if (err) 321 goto fail; 322 ++l; 323 } 324 list_for_each_entry(lock, &ctx->flc_flock, fl_list) { 325 ++seen_flock; 326 if (seen_flock > num_flock_locks) { 327 err = -ENOSPC; 328 goto fail; 329 } 330 err = lock_to_ceph_filelock(lock, &flocks[l]); 331 if (err) 332 goto fail; 333 ++l; 334 } 335 fail: 336 spin_unlock(&ctx->flc_lock); 337 return err; 338 } 339 340 /** 341 * Copy the encoded flock and fcntl locks into the pagelist. 342 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 343 * sequential flock locks. 344 * Returns zero on success. 345 */ 346 int ceph_locks_to_pagelist(struct ceph_filelock *flocks, 347 struct ceph_pagelist *pagelist, 348 int num_fcntl_locks, int num_flock_locks) 349 { 350 int err = 0; 351 __le32 nlocks; 352 353 nlocks = cpu_to_le32(num_fcntl_locks); 354 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 355 if (err) 356 goto out_fail; 357 358 err = ceph_pagelist_append(pagelist, flocks, 359 num_fcntl_locks * sizeof(*flocks)); 360 if (err) 361 goto out_fail; 362 363 nlocks = cpu_to_le32(num_flock_locks); 364 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 365 if (err) 366 goto out_fail; 367 368 err = ceph_pagelist_append(pagelist, 369 &flocks[num_fcntl_locks], 370 num_flock_locks * sizeof(*flocks)); 371 out_fail: 372 return err; 373 } 374 375 /* 376 * Given a pointer to a lock, convert it to a ceph filelock 377 */ 378 int lock_to_ceph_filelock(struct file_lock *lock, 379 struct ceph_filelock *cephlock) 380 { 381 int err = 0; 382 cephlock->start = cpu_to_le64(lock->fl_start); 383 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 384 cephlock->client = cpu_to_le64(0); 385 cephlock->pid = cpu_to_le64((u64)lock->fl_pid); 386 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); 387 388 switch (lock->fl_type) { 389 case F_RDLCK: 390 cephlock->type = CEPH_LOCK_SHARED; 391 break; 392 case F_WRLCK: 393 cephlock->type = CEPH_LOCK_EXCL; 394 break; 395 case F_UNLCK: 396 cephlock->type = CEPH_LOCK_UNLOCK; 397 break; 398 default: 399 dout("Have unknown lock type %d", lock->fl_type); 400 err = -EINVAL; 401 } 402 403 return err; 404 } 405