xref: /openbmc/linux/fs/ceph/locks.c (revision 94c7b6fc)
1 #include <linux/ceph/ceph_debug.h>
2 
3 #include <linux/file.h>
4 #include <linux/namei.h>
5 #include <linux/random.h>
6 
7 #include "super.h"
8 #include "mds_client.h"
9 #include <linux/ceph/pagelist.h>
10 
11 static u64 lock_secret;
12 
13 static inline u64 secure_addr(void *addr)
14 {
15 	u64 v = lock_secret ^ (u64)(unsigned long)addr;
16 	/*
17 	 * Set the most significant bit, so that MDS knows the 'owner'
18 	 * is sufficient to identify the owner of lock. (old code uses
19 	 * both 'owner' and 'pid')
20 	 */
21 	v |= (1ULL << 63);
22 	return v;
23 }
24 
25 void __init ceph_flock_init(void)
26 {
27 	get_random_bytes(&lock_secret, sizeof(lock_secret));
28 }
29 
30 /**
31  * Implement fcntl and flock locking functions.
32  */
33 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
34 			     int cmd, u8 wait, struct file_lock *fl)
35 {
36 	struct inode *inode = file_inode(file);
37 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
38 	struct ceph_mds_request *req;
39 	int err;
40 	u64 length = 0;
41 	u64 owner;
42 
43 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
44 	if (IS_ERR(req))
45 		return PTR_ERR(req);
46 	req->r_inode = inode;
47 	ihold(inode);
48 	req->r_num_caps = 1;
49 
50 	/* mds requires start and length rather than start and end */
51 	if (LLONG_MAX == fl->fl_end)
52 		length = 0;
53 	else
54 		length = fl->fl_end - fl->fl_start + 1;
55 
56 	owner = secure_addr(fl->fl_owner);
57 
58 	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
59 	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
60 	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
61 	     wait, fl->fl_type);
62 
63 	req->r_args.filelock_change.rule = lock_type;
64 	req->r_args.filelock_change.type = cmd;
65 	req->r_args.filelock_change.owner = cpu_to_le64(owner);
66 	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
67 	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
68 	req->r_args.filelock_change.length = cpu_to_le64(length);
69 	req->r_args.filelock_change.wait = wait;
70 
71 	err = ceph_mdsc_do_request(mdsc, inode, req);
72 
73 	if (operation == CEPH_MDS_OP_GETFILELOCK) {
74 		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
75 		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
76 			fl->fl_type = F_RDLCK;
77 		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
78 			fl->fl_type = F_WRLCK;
79 		else
80 			fl->fl_type = F_UNLCK;
81 
82 		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
83 		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
84 						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
85 		if (length >= 1)
86 			fl->fl_end = length -1;
87 		else
88 			fl->fl_end = 0;
89 
90 	}
91 	ceph_mdsc_put_request(req);
92 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
93 	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
94 	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
95 	     length, wait, fl->fl_type, err);
96 	return err;
97 }
98 
99 /**
100  * Attempt to set an fcntl lock.
101  * For now, this just goes away to the server. Later it may be more awesome.
102  */
103 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
104 {
105 	u8 lock_cmd;
106 	int err;
107 	u8 wait = 0;
108 	u16 op = CEPH_MDS_OP_SETFILELOCK;
109 
110 	if (!(fl->fl_flags & FL_POSIX))
111 		return -ENOLCK;
112 	/* No mandatory locks */
113 	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
114 		return -ENOLCK;
115 
116 	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
117 
118 	/* set wait bit as appropriate, then make command as Ceph expects it*/
119 	if (IS_GETLK(cmd))
120 		op = CEPH_MDS_OP_GETFILELOCK;
121 	else if (IS_SETLKW(cmd))
122 		wait = 1;
123 
124 	if (F_RDLCK == fl->fl_type)
125 		lock_cmd = CEPH_LOCK_SHARED;
126 	else if (F_WRLCK == fl->fl_type)
127 		lock_cmd = CEPH_LOCK_EXCL;
128 	else
129 		lock_cmd = CEPH_LOCK_UNLOCK;
130 
131 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
132 	if (!err) {
133 		if (op != CEPH_MDS_OP_GETFILELOCK) {
134 			dout("mds locked, locking locally");
135 			err = posix_lock_file(file, fl, NULL);
136 			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
137 				/* undo! This should only happen if
138 				 * the kernel detects local
139 				 * deadlock. */
140 				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
141 						  CEPH_LOCK_UNLOCK, 0, fl);
142 				dout("got %d on posix_lock_file, undid lock",
143 				     err);
144 			}
145 		}
146 
147 	} else if (err == -ERESTARTSYS) {
148 		dout("undoing lock\n");
149 		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
150 				  CEPH_LOCK_UNLOCK, 0, fl);
151 	}
152 	return err;
153 }
154 
155 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
156 {
157 	u8 lock_cmd;
158 	int err;
159 	u8 wait = 0;
160 
161 	if (!(fl->fl_flags & FL_FLOCK))
162 		return -ENOLCK;
163 	/* No mandatory locks */
164 	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
165 		return -ENOLCK;
166 
167 	dout("ceph_flock, fl_file: %p", fl->fl_file);
168 
169 	if (IS_SETLKW(cmd))
170 		wait = 1;
171 
172 	if (F_RDLCK == fl->fl_type)
173 		lock_cmd = CEPH_LOCK_SHARED;
174 	else if (F_WRLCK == fl->fl_type)
175 		lock_cmd = CEPH_LOCK_EXCL;
176 	else
177 		lock_cmd = CEPH_LOCK_UNLOCK;
178 
179 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
180 				file, lock_cmd, wait, fl);
181 	if (!err) {
182 		err = flock_lock_file_wait(file, fl);
183 		if (err) {
184 			ceph_lock_message(CEPH_LOCK_FLOCK,
185 					  CEPH_MDS_OP_SETFILELOCK,
186 					  file, CEPH_LOCK_UNLOCK, 0, fl);
187 			dout("got %d on flock_lock_file_wait, undid lock", err);
188 		}
189 	} else if (err == -ERESTARTSYS) {
190 		dout("undoing lock\n");
191 		ceph_lock_message(CEPH_LOCK_FLOCK,
192 				  CEPH_MDS_OP_SETFILELOCK,
193 				  file, CEPH_LOCK_UNLOCK, 0, fl);
194 	}
195 	return err;
196 }
197 
198 /**
199  * Must be called with lock_flocks() already held. Fills in the passed
200  * counter variables, so you can prepare pagelist metadata before calling
201  * ceph_encode_locks.
202  */
203 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
204 {
205 	struct file_lock *lock;
206 
207 	*fcntl_count = 0;
208 	*flock_count = 0;
209 
210 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
211 		if (lock->fl_flags & FL_POSIX)
212 			++(*fcntl_count);
213 		else if (lock->fl_flags & FL_FLOCK)
214 			++(*flock_count);
215 	}
216 	dout("counted %d flock locks and %d fcntl locks",
217 	     *flock_count, *fcntl_count);
218 }
219 
220 /**
221  * Encode the flock and fcntl locks for the given inode into the ceph_filelock
222  * array. Must be called with inode->i_lock already held.
223  * If we encounter more of a specific lock type than expected, return -ENOSPC.
224  */
225 int ceph_encode_locks_to_buffer(struct inode *inode,
226 				struct ceph_filelock *flocks,
227 				int num_fcntl_locks, int num_flock_locks)
228 {
229 	struct file_lock *lock;
230 	int err = 0;
231 	int seen_fcntl = 0;
232 	int seen_flock = 0;
233 	int l = 0;
234 
235 	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
236 	     num_fcntl_locks);
237 
238 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
239 		if (lock->fl_flags & FL_POSIX) {
240 			++seen_fcntl;
241 			if (seen_fcntl > num_fcntl_locks) {
242 				err = -ENOSPC;
243 				goto fail;
244 			}
245 			err = lock_to_ceph_filelock(lock, &flocks[l]);
246 			if (err)
247 				goto fail;
248 			++l;
249 		}
250 	}
251 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
252 		if (lock->fl_flags & FL_FLOCK) {
253 			++seen_flock;
254 			if (seen_flock > num_flock_locks) {
255 				err = -ENOSPC;
256 				goto fail;
257 			}
258 			err = lock_to_ceph_filelock(lock, &flocks[l]);
259 			if (err)
260 				goto fail;
261 			++l;
262 		}
263 	}
264 fail:
265 	return err;
266 }
267 
268 /**
269  * Copy the encoded flock and fcntl locks into the pagelist.
270  * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
271  * sequential flock locks.
272  * Returns zero on success.
273  */
274 int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
275 			   struct ceph_pagelist *pagelist,
276 			   int num_fcntl_locks, int num_flock_locks)
277 {
278 	int err = 0;
279 	__le32 nlocks;
280 
281 	nlocks = cpu_to_le32(num_fcntl_locks);
282 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
283 	if (err)
284 		goto out_fail;
285 
286 	err = ceph_pagelist_append(pagelist, flocks,
287 				   num_fcntl_locks * sizeof(*flocks));
288 	if (err)
289 		goto out_fail;
290 
291 	nlocks = cpu_to_le32(num_flock_locks);
292 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
293 	if (err)
294 		goto out_fail;
295 
296 	err = ceph_pagelist_append(pagelist,
297 				   &flocks[num_fcntl_locks],
298 				   num_flock_locks * sizeof(*flocks));
299 out_fail:
300 	return err;
301 }
302 
303 /*
304  * Given a pointer to a lock, convert it to a ceph filelock
305  */
306 int lock_to_ceph_filelock(struct file_lock *lock,
307 			  struct ceph_filelock *cephlock)
308 {
309 	int err = 0;
310 	cephlock->start = cpu_to_le64(lock->fl_start);
311 	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
312 	cephlock->client = cpu_to_le64(0);
313 	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
314 	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
315 
316 	switch (lock->fl_type) {
317 	case F_RDLCK:
318 		cephlock->type = CEPH_LOCK_SHARED;
319 		break;
320 	case F_WRLCK:
321 		cephlock->type = CEPH_LOCK_EXCL;
322 		break;
323 	case F_UNLCK:
324 		cephlock->type = CEPH_LOCK_UNLOCK;
325 		break;
326 	default:
327 		dout("Have unknown lock type %d", lock->fl_type);
328 		err = -EINVAL;
329 	}
330 
331 	return err;
332 }
333