155716d26SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2b8441ed2STejun Heo /*
3b8441ed2STejun Heo * fs/kernfs/file.c - kernfs file implementation
4b8441ed2STejun Heo *
5b8441ed2STejun Heo * Copyright (c) 2001-3 Patrick Mochel
6b8441ed2STejun Heo * Copyright (c) 2007 SUSE Linux Products GmbH
7b8441ed2STejun Heo * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
8b8441ed2STejun Heo */
9414985aeSTejun Heo
10414985aeSTejun Heo #include <linux/fs.h>
11414985aeSTejun Heo #include <linux/seq_file.h>
12414985aeSTejun Heo #include <linux/slab.h>
13414985aeSTejun Heo #include <linux/poll.h>
14414985aeSTejun Heo #include <linux/pagemap.h>
15589ee628SIngo Molnar #include <linux/sched/mm.h>
16d911d987STejun Heo #include <linux/fsnotify.h>
174eaad21aSChristoph Hellwig #include <linux/uio.h>
18414985aeSTejun Heo
19414985aeSTejun Heo #include "kernfs-internal.h"
20414985aeSTejun Heo
21c525aaddSTejun Heo struct kernfs_open_node {
22086c00c7SImran Khan struct rcu_head rcu_head;
23414985aeSTejun Heo atomic_t event;
24414985aeSTejun Heo wait_queue_head_t poll;
25c525aaddSTejun Heo struct list_head files; /* goes through kernfs_open_file.list */
26bdb2fd7fSTejun Heo unsigned int nr_mmapped;
27bdb2fd7fSTejun Heo unsigned int nr_to_release;
28414985aeSTejun Heo };
29414985aeSTejun Heo
302fd26970SImran Khan /*
312fd26970SImran Khan * kernfs_notify() may be called from any context and bounces notifications
322fd26970SImran Khan * through a work item. To minimize space overhead in kernfs_node, the
332fd26970SImran Khan * pending queue is implemented as a singly linked list of kernfs_nodes.
342fd26970SImran Khan * The list is terminated with the self pointer so that whether a
352fd26970SImran Khan * kernfs_node is on the list or not can be determined by testing the next
3624b3e3ddSRandy Dunlap * pointer for %NULL.
37ecca47ceSTejun Heo */
382fd26970SImran Khan #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
39ecca47ceSTejun Heo
402fd26970SImran Khan static DEFINE_SPINLOCK(kernfs_notify_lock);
412fd26970SImran Khan static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
42ecca47ceSTejun Heo
kernfs_open_file_mutex_ptr(struct kernfs_node * kn)4341448c61SImran Khan static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
4441448c61SImran Khan {
451d25b84eSImran Khan int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
461d25b84eSImran Khan
471d25b84eSImran Khan return &kernfs_locks->open_file_mutex[idx];
4841448c61SImran Khan }
4941448c61SImran Khan
kernfs_open_file_mutex_lock(struct kernfs_node * kn)5041448c61SImran Khan static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
5141448c61SImran Khan {
5241448c61SImran Khan struct mutex *lock;
5341448c61SImran Khan
5441448c61SImran Khan lock = kernfs_open_file_mutex_ptr(kn);
5541448c61SImran Khan
5641448c61SImran Khan mutex_lock(lock);
5741448c61SImran Khan
5841448c61SImran Khan return lock;
5941448c61SImran Khan }
6041448c61SImran Khan
61086c00c7SImran Khan /**
6224b3e3ddSRandy Dunlap * of_on - Get the kernfs_open_node of the specified kernfs_open_file
6324b3e3ddSRandy Dunlap * @of: target kernfs_open_file
6424b3e3ddSRandy Dunlap *
6524b3e3ddSRandy Dunlap * Return: the kernfs_open_node of the kernfs_open_file
66086c00c7SImran Khan */
of_on(struct kernfs_open_file * of)673db48acaSTejun Heo static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
68086c00c7SImran Khan {
693db48acaSTejun Heo return rcu_dereference_protected(of->kn->attr.open,
703db48acaSTejun Heo !list_empty(&of->list));
71086c00c7SImran Khan }
72086c00c7SImran Khan
73086c00c7SImran Khan /**
743db48acaSTejun Heo * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
75086c00c7SImran Khan *
76086c00c7SImran Khan * @kn: target kernfs_node.
77086c00c7SImran Khan *
78086c00c7SImran Khan * Fetch and return ->attr.open of @kn when caller holds the
7941448c61SImran Khan * kernfs_open_file_mutex_ptr(kn).
80086c00c7SImran Khan *
8141448c61SImran Khan * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
82086c00c7SImran Khan * the caller guarantees that this mutex is being held, other updaters can't
83086c00c7SImran Khan * change ->attr.open and this means that we can safely deref ->attr.open
84086c00c7SImran Khan * outside RCU read-side critical section.
85086c00c7SImran Khan *
86086c00c7SImran Khan * The caller needs to make sure that kernfs_open_file_mutex is held.
8724b3e3ddSRandy Dunlap *
8824b3e3ddSRandy Dunlap * Return: @kn->attr.open when kernfs_open_file_mutex is held.
89086c00c7SImran Khan */
90086c00c7SImran Khan static struct kernfs_open_node *
kernfs_deref_open_node_locked(struct kernfs_node * kn)913db48acaSTejun Heo kernfs_deref_open_node_locked(struct kernfs_node *kn)
92086c00c7SImran Khan {
93086c00c7SImran Khan return rcu_dereference_protected(kn->attr.open,
9441448c61SImran Khan lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
95086c00c7SImran Khan }
96086c00c7SImran Khan
kernfs_of(struct file * file)97c525aaddSTejun Heo static struct kernfs_open_file *kernfs_of(struct file *file)
98414985aeSTejun Heo {
99414985aeSTejun Heo return ((struct seq_file *)file->private_data)->private;
100414985aeSTejun Heo }
101414985aeSTejun Heo
102414985aeSTejun Heo /*
103324a56e1STejun Heo * Determine the kernfs_ops for the given kernfs_node. This function must
104414985aeSTejun Heo * be called while holding an active reference.
105414985aeSTejun Heo */
kernfs_ops(struct kernfs_node * kn)106324a56e1STejun Heo static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
107414985aeSTejun Heo {
108df23fc39STejun Heo if (kn->flags & KERNFS_LOCKDEP)
109324a56e1STejun Heo lockdep_assert_held(kn);
110adc5e8b5STejun Heo return kn->attr.ops;
111414985aeSTejun Heo }
112414985aeSTejun Heo
113bb305947STejun Heo /*
114bb305947STejun Heo * As kernfs_seq_stop() is also called after kernfs_seq_start() or
115bb305947STejun Heo * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
116bb305947STejun Heo * a seq_file iteration which is fully initialized with an active reference
117bb305947STejun Heo * or an aborted kernfs_seq_start() due to get_active failure. The
118bb305947STejun Heo * position pointer is the only context for each seq_file iteration and
119bb305947STejun Heo * thus the stop condition should be encoded in it. As the return value is
120bb305947STejun Heo * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
121bb305947STejun Heo * choice to indicate get_active failure.
122bb305947STejun Heo *
123bb305947STejun Heo * Unfortunately, this is complicated due to the optional custom seq_file
124bb305947STejun Heo * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
125bb305947STejun Heo * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
126bb305947STejun Heo * custom seq_file operations and thus can't decide whether put_active
127bb305947STejun Heo * should be performed or not only on ERR_PTR(-ENODEV).
128bb305947STejun Heo *
129bb305947STejun Heo * This is worked around by factoring out the custom seq_stop() and
130bb305947STejun Heo * put_active part into kernfs_seq_stop_active(), skipping it from
131bb305947STejun Heo * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
132bb305947STejun Heo * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
133bb305947STejun Heo * that kernfs_seq_stop_active() is skipped only after get_active failure.
134bb305947STejun Heo */
kernfs_seq_stop_active(struct seq_file * sf,void * v)135bb305947STejun Heo static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
136bb305947STejun Heo {
137bb305947STejun Heo struct kernfs_open_file *of = sf->private;
138bb305947STejun Heo const struct kernfs_ops *ops = kernfs_ops(of->kn);
139bb305947STejun Heo
140bb305947STejun Heo if (ops->seq_stop)
141bb305947STejun Heo ops->seq_stop(sf, v);
142bb305947STejun Heo kernfs_put_active(of->kn);
143bb305947STejun Heo }
144bb305947STejun Heo
kernfs_seq_start(struct seq_file * sf,loff_t * ppos)145414985aeSTejun Heo static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
146414985aeSTejun Heo {
147c525aaddSTejun Heo struct kernfs_open_file *of = sf->private;
148414985aeSTejun Heo const struct kernfs_ops *ops;
149414985aeSTejun Heo
150414985aeSTejun Heo /*
1512b75869bSNeilBrown * @of->mutex nests outside active ref and is primarily to ensure that
152414985aeSTejun Heo * the ops aren't called concurrently for the same open file.
153414985aeSTejun Heo */
154414985aeSTejun Heo mutex_lock(&of->mutex);
155c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
156414985aeSTejun Heo return ERR_PTR(-ENODEV);
157414985aeSTejun Heo
158324a56e1STejun Heo ops = kernfs_ops(of->kn);
159414985aeSTejun Heo if (ops->seq_start) {
160bb305947STejun Heo void *next = ops->seq_start(sf, ppos);
161bb305947STejun Heo /* see the comment above kernfs_seq_stop_active() */
162bb305947STejun Heo if (next == ERR_PTR(-ENODEV))
163bb305947STejun Heo kernfs_seq_stop_active(sf, next);
164bb305947STejun Heo return next;
165414985aeSTejun Heo }
16690b2433eSMaíra Canal return single_start(sf, ppos);
167414985aeSTejun Heo }
168414985aeSTejun Heo
kernfs_seq_next(struct seq_file * sf,void * v,loff_t * ppos)169414985aeSTejun Heo static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
170414985aeSTejun Heo {
171c525aaddSTejun Heo struct kernfs_open_file *of = sf->private;
172324a56e1STejun Heo const struct kernfs_ops *ops = kernfs_ops(of->kn);
173414985aeSTejun Heo
174414985aeSTejun Heo if (ops->seq_next) {
175bb305947STejun Heo void *next = ops->seq_next(sf, v, ppos);
176bb305947STejun Heo /* see the comment above kernfs_seq_stop_active() */
177bb305947STejun Heo if (next == ERR_PTR(-ENODEV))
178bb305947STejun Heo kernfs_seq_stop_active(sf, next);
179bb305947STejun Heo return next;
180414985aeSTejun Heo } else {
181414985aeSTejun Heo /*
182414985aeSTejun Heo * The same behavior and code as single_open(), always
183414985aeSTejun Heo * terminate after the initial read.
184414985aeSTejun Heo */
185414985aeSTejun Heo ++*ppos;
186414985aeSTejun Heo return NULL;
187414985aeSTejun Heo }
188414985aeSTejun Heo }
189414985aeSTejun Heo
kernfs_seq_stop(struct seq_file * sf,void * v)190414985aeSTejun Heo static void kernfs_seq_stop(struct seq_file *sf, void *v)
191414985aeSTejun Heo {
192c525aaddSTejun Heo struct kernfs_open_file *of = sf->private;
193414985aeSTejun Heo
194bb305947STejun Heo if (v != ERR_PTR(-ENODEV))
195bb305947STejun Heo kernfs_seq_stop_active(sf, v);
196414985aeSTejun Heo mutex_unlock(&of->mutex);
197414985aeSTejun Heo }
198414985aeSTejun Heo
kernfs_seq_show(struct seq_file * sf,void * v)199414985aeSTejun Heo static int kernfs_seq_show(struct seq_file *sf, void *v)
200414985aeSTejun Heo {
201c525aaddSTejun Heo struct kernfs_open_file *of = sf->private;
202414985aeSTejun Heo
2033db48acaSTejun Heo of->event = atomic_read(&of_on(of)->event);
204414985aeSTejun Heo
205adc5e8b5STejun Heo return of->kn->attr.ops->seq_show(sf, v);
206414985aeSTejun Heo }
207414985aeSTejun Heo
208414985aeSTejun Heo static const struct seq_operations kernfs_seq_ops = {
209414985aeSTejun Heo .start = kernfs_seq_start,
210414985aeSTejun Heo .next = kernfs_seq_next,
211414985aeSTejun Heo .stop = kernfs_seq_stop,
212414985aeSTejun Heo .show = kernfs_seq_show,
213414985aeSTejun Heo };
214414985aeSTejun Heo
215414985aeSTejun Heo /*
216414985aeSTejun Heo * As reading a bin file can have side-effects, the exact offset and bytes
217414985aeSTejun Heo * specified in read(2) call should be passed to the read callback making
218414985aeSTejun Heo * it difficult to use seq_file. Implement simplistic custom buffering for
219414985aeSTejun Heo * bin files.
220414985aeSTejun Heo */
kernfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)2214eaad21aSChristoph Hellwig static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
222414985aeSTejun Heo {
2234eaad21aSChristoph Hellwig struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
2244eaad21aSChristoph Hellwig ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
225414985aeSTejun Heo const struct kernfs_ops *ops;
226414985aeSTejun Heo char *buf;
227414985aeSTejun Heo
2284ef67a8cSNeilBrown buf = of->prealloc_buf;
229e4234a1fSChris Wilson if (buf)
230e4234a1fSChris Wilson mutex_lock(&of->prealloc_mutex);
231e4234a1fSChris Wilson else
232414985aeSTejun Heo buf = kmalloc(len, GFP_KERNEL);
233414985aeSTejun Heo if (!buf)
234414985aeSTejun Heo return -ENOMEM;
235414985aeSTejun Heo
236414985aeSTejun Heo /*
2374ef67a8cSNeilBrown * @of->mutex nests outside active ref and is used both to ensure that
238e4234a1fSChris Wilson * the ops aren't called concurrently for the same open file.
239414985aeSTejun Heo */
240414985aeSTejun Heo mutex_lock(&of->mutex);
241c637b8acSTejun Heo if (!kernfs_get_active(of->kn)) {
242414985aeSTejun Heo len = -ENODEV;
243414985aeSTejun Heo mutex_unlock(&of->mutex);
244414985aeSTejun Heo goto out_free;
245414985aeSTejun Heo }
246414985aeSTejun Heo
2473db48acaSTejun Heo of->event = atomic_read(&of_on(of)->event);
248086c00c7SImran Khan
249324a56e1STejun Heo ops = kernfs_ops(of->kn);
250414985aeSTejun Heo if (ops->read)
2514eaad21aSChristoph Hellwig len = ops->read(of, buf, len, iocb->ki_pos);
252414985aeSTejun Heo else
253414985aeSTejun Heo len = -EINVAL;
254414985aeSTejun Heo
255e4234a1fSChris Wilson kernfs_put_active(of->kn);
256e4234a1fSChris Wilson mutex_unlock(&of->mutex);
257e4234a1fSChris Wilson
258414985aeSTejun Heo if (len < 0)
259e4234a1fSChris Wilson goto out_free;
260414985aeSTejun Heo
2614eaad21aSChristoph Hellwig if (copy_to_iter(buf, len, iter) != len) {
262414985aeSTejun Heo len = -EFAULT;
263e4234a1fSChris Wilson goto out_free;
264414985aeSTejun Heo }
265414985aeSTejun Heo
2664eaad21aSChristoph Hellwig iocb->ki_pos += len;
267414985aeSTejun Heo
268414985aeSTejun Heo out_free:
269e4234a1fSChris Wilson if (buf == of->prealloc_buf)
270e4234a1fSChris Wilson mutex_unlock(&of->prealloc_mutex);
271e4234a1fSChris Wilson else
272414985aeSTejun Heo kfree(buf);
273414985aeSTejun Heo return len;
274414985aeSTejun Heo }
275414985aeSTejun Heo
kernfs_fop_read_iter(struct kiocb * iocb,struct iov_iter * iter)2764eaad21aSChristoph Hellwig static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
277414985aeSTejun Heo {
2784eaad21aSChristoph Hellwig if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
2794eaad21aSChristoph Hellwig return seq_read_iter(iocb, iter);
2804eaad21aSChristoph Hellwig return kernfs_file_read_iter(iocb, iter);
281414985aeSTejun Heo }
282414985aeSTejun Heo
283cc099e0bSChristoph Hellwig /*
284414985aeSTejun Heo * Copy data in from userland and pass it to the matching kernfs write
285414985aeSTejun Heo * operation.
286414985aeSTejun Heo *
287414985aeSTejun Heo * There is no easy way for us to know if userspace is only doing a partial
288414985aeSTejun Heo * write, so we don't support them. We expect the entire buffer to come on
289414985aeSTejun Heo * the first write. Hint: if you're writing a value, first read the file,
2903fe40764SSlark Xiao * modify only the value you're changing, then write entire buffer
291414985aeSTejun Heo * back.
292414985aeSTejun Heo */
kernfs_fop_write_iter(struct kiocb * iocb,struct iov_iter * iter)293cc099e0bSChristoph Hellwig static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
294414985aeSTejun Heo {
295cc099e0bSChristoph Hellwig struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
296cc099e0bSChristoph Hellwig ssize_t len = iov_iter_count(iter);
297414985aeSTejun Heo const struct kernfs_ops *ops;
298b7ce40cfSTejun Heo char *buf;
299b7ce40cfSTejun Heo
300b7ce40cfSTejun Heo if (of->atomic_write_len) {
301b7ce40cfSTejun Heo if (len > of->atomic_write_len)
302b7ce40cfSTejun Heo return -E2BIG;
303b7ce40cfSTejun Heo } else {
304cc099e0bSChristoph Hellwig len = min_t(size_t, len, PAGE_SIZE);
305b7ce40cfSTejun Heo }
306b7ce40cfSTejun Heo
3072b75869bSNeilBrown buf = of->prealloc_buf;
308e4234a1fSChris Wilson if (buf)
309e4234a1fSChris Wilson mutex_lock(&of->prealloc_mutex);
310e4234a1fSChris Wilson else
311b7ce40cfSTejun Heo buf = kmalloc(len + 1, GFP_KERNEL);
312b7ce40cfSTejun Heo if (!buf)
313b7ce40cfSTejun Heo return -ENOMEM;
314b7ce40cfSTejun Heo
315cc099e0bSChristoph Hellwig if (copy_from_iter(buf, len, iter) != len) {
316e4234a1fSChris Wilson len = -EFAULT;
317e4234a1fSChris Wilson goto out_free;
318e4234a1fSChris Wilson }
319e4234a1fSChris Wilson buf[len] = '\0'; /* guarantee string termination */
320e4234a1fSChris Wilson
321414985aeSTejun Heo /*
3222b75869bSNeilBrown * @of->mutex nests outside active ref and is used both to ensure that
323e4234a1fSChris Wilson * the ops aren't called concurrently for the same open file.
324414985aeSTejun Heo */
325414985aeSTejun Heo mutex_lock(&of->mutex);
326c637b8acSTejun Heo if (!kernfs_get_active(of->kn)) {
327414985aeSTejun Heo mutex_unlock(&of->mutex);
328b7ce40cfSTejun Heo len = -ENODEV;
329b7ce40cfSTejun Heo goto out_free;
330414985aeSTejun Heo }
331414985aeSTejun Heo
332324a56e1STejun Heo ops = kernfs_ops(of->kn);
333b7ce40cfSTejun Heo if (ops->write)
334cc099e0bSChristoph Hellwig len = ops->write(of, buf, len, iocb->ki_pos);
335b7ce40cfSTejun Heo else
336b7ce40cfSTejun Heo len = -EINVAL;
337b7ce40cfSTejun Heo
338e4234a1fSChris Wilson kernfs_put_active(of->kn);
339e4234a1fSChris Wilson mutex_unlock(&of->mutex);
340e4234a1fSChris Wilson
341414985aeSTejun Heo if (len > 0)
342cc099e0bSChristoph Hellwig iocb->ki_pos += len;
3432b75869bSNeilBrown
344b7ce40cfSTejun Heo out_free:
345e4234a1fSChris Wilson if (buf == of->prealloc_buf)
346e4234a1fSChris Wilson mutex_unlock(&of->prealloc_mutex);
347e4234a1fSChris Wilson else
348414985aeSTejun Heo kfree(buf);
349414985aeSTejun Heo return len;
350414985aeSTejun Heo }
351414985aeSTejun Heo
kernfs_vma_open(struct vm_area_struct * vma)352414985aeSTejun Heo static void kernfs_vma_open(struct vm_area_struct *vma)
353414985aeSTejun Heo {
354414985aeSTejun Heo struct file *file = vma->vm_file;
355c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
356414985aeSTejun Heo
357414985aeSTejun Heo if (!of->vm_ops)
358414985aeSTejun Heo return;
359414985aeSTejun Heo
360c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
361414985aeSTejun Heo return;
362414985aeSTejun Heo
363414985aeSTejun Heo if (of->vm_ops->open)
364414985aeSTejun Heo of->vm_ops->open(vma);
365414985aeSTejun Heo
366c637b8acSTejun Heo kernfs_put_active(of->kn);
367414985aeSTejun Heo }
368414985aeSTejun Heo
kernfs_vma_fault(struct vm_fault * vmf)3699ee84466SSouptick Joarder static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
370414985aeSTejun Heo {
37111bac800SDave Jiang struct file *file = vmf->vma->vm_file;
372c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
3739ee84466SSouptick Joarder vm_fault_t ret;
374414985aeSTejun Heo
375414985aeSTejun Heo if (!of->vm_ops)
376414985aeSTejun Heo return VM_FAULT_SIGBUS;
377414985aeSTejun Heo
378c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
379414985aeSTejun Heo return VM_FAULT_SIGBUS;
380414985aeSTejun Heo
381414985aeSTejun Heo ret = VM_FAULT_SIGBUS;
382414985aeSTejun Heo if (of->vm_ops->fault)
38311bac800SDave Jiang ret = of->vm_ops->fault(vmf);
384414985aeSTejun Heo
385c637b8acSTejun Heo kernfs_put_active(of->kn);
386414985aeSTejun Heo return ret;
387414985aeSTejun Heo }
388414985aeSTejun Heo
kernfs_vma_page_mkwrite(struct vm_fault * vmf)3899ee84466SSouptick Joarder static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
390414985aeSTejun Heo {
39111bac800SDave Jiang struct file *file = vmf->vma->vm_file;
392c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
3939ee84466SSouptick Joarder vm_fault_t ret;
394414985aeSTejun Heo
395414985aeSTejun Heo if (!of->vm_ops)
396414985aeSTejun Heo return VM_FAULT_SIGBUS;
397414985aeSTejun Heo
398c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
399414985aeSTejun Heo return VM_FAULT_SIGBUS;
400414985aeSTejun Heo
401414985aeSTejun Heo ret = 0;
402414985aeSTejun Heo if (of->vm_ops->page_mkwrite)
40311bac800SDave Jiang ret = of->vm_ops->page_mkwrite(vmf);
404414985aeSTejun Heo else
405414985aeSTejun Heo file_update_time(file);
406414985aeSTejun Heo
407c637b8acSTejun Heo kernfs_put_active(of->kn);
408414985aeSTejun Heo return ret;
409414985aeSTejun Heo }
410414985aeSTejun Heo
kernfs_vma_access(struct vm_area_struct * vma,unsigned long addr,void * buf,int len,int write)411414985aeSTejun Heo static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
412414985aeSTejun Heo void *buf, int len, int write)
413414985aeSTejun Heo {
414414985aeSTejun Heo struct file *file = vma->vm_file;
415c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
416414985aeSTejun Heo int ret;
417414985aeSTejun Heo
418414985aeSTejun Heo if (!of->vm_ops)
419414985aeSTejun Heo return -EINVAL;
420414985aeSTejun Heo
421c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
422414985aeSTejun Heo return -EINVAL;
423414985aeSTejun Heo
424414985aeSTejun Heo ret = -EINVAL;
425414985aeSTejun Heo if (of->vm_ops->access)
426414985aeSTejun Heo ret = of->vm_ops->access(vma, addr, buf, len, write);
427414985aeSTejun Heo
428c637b8acSTejun Heo kernfs_put_active(of->kn);
429414985aeSTejun Heo return ret;
430414985aeSTejun Heo }
431414985aeSTejun Heo
432414985aeSTejun Heo #ifdef CONFIG_NUMA
kernfs_vma_set_policy(struct vm_area_struct * vma,struct mempolicy * new)433414985aeSTejun Heo static int kernfs_vma_set_policy(struct vm_area_struct *vma,
434414985aeSTejun Heo struct mempolicy *new)
435414985aeSTejun Heo {
436414985aeSTejun Heo struct file *file = vma->vm_file;
437c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
438414985aeSTejun Heo int ret;
439414985aeSTejun Heo
440414985aeSTejun Heo if (!of->vm_ops)
441414985aeSTejun Heo return 0;
442414985aeSTejun Heo
443c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
444414985aeSTejun Heo return -EINVAL;
445414985aeSTejun Heo
446414985aeSTejun Heo ret = 0;
447414985aeSTejun Heo if (of->vm_ops->set_policy)
448414985aeSTejun Heo ret = of->vm_ops->set_policy(vma, new);
449414985aeSTejun Heo
450c637b8acSTejun Heo kernfs_put_active(of->kn);
451414985aeSTejun Heo return ret;
452414985aeSTejun Heo }
453414985aeSTejun Heo
kernfs_vma_get_policy(struct vm_area_struct * vma,unsigned long addr)454414985aeSTejun Heo static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
455414985aeSTejun Heo unsigned long addr)
456414985aeSTejun Heo {
457414985aeSTejun Heo struct file *file = vma->vm_file;
458c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
459414985aeSTejun Heo struct mempolicy *pol;
460414985aeSTejun Heo
461414985aeSTejun Heo if (!of->vm_ops)
462414985aeSTejun Heo return vma->vm_policy;
463414985aeSTejun Heo
464c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
465414985aeSTejun Heo return vma->vm_policy;
466414985aeSTejun Heo
467414985aeSTejun Heo pol = vma->vm_policy;
468414985aeSTejun Heo if (of->vm_ops->get_policy)
469414985aeSTejun Heo pol = of->vm_ops->get_policy(vma, addr);
470414985aeSTejun Heo
471c637b8acSTejun Heo kernfs_put_active(of->kn);
472414985aeSTejun Heo return pol;
473414985aeSTejun Heo }
474414985aeSTejun Heo
475414985aeSTejun Heo #endif
476414985aeSTejun Heo
477414985aeSTejun Heo static const struct vm_operations_struct kernfs_vm_ops = {
478414985aeSTejun Heo .open = kernfs_vma_open,
479414985aeSTejun Heo .fault = kernfs_vma_fault,
480414985aeSTejun Heo .page_mkwrite = kernfs_vma_page_mkwrite,
481414985aeSTejun Heo .access = kernfs_vma_access,
482414985aeSTejun Heo #ifdef CONFIG_NUMA
483414985aeSTejun Heo .set_policy = kernfs_vma_set_policy,
484414985aeSTejun Heo .get_policy = kernfs_vma_get_policy,
485414985aeSTejun Heo #endif
486414985aeSTejun Heo };
487414985aeSTejun Heo
kernfs_fop_mmap(struct file * file,struct vm_area_struct * vma)488c637b8acSTejun Heo static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
489414985aeSTejun Heo {
490c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(file);
491414985aeSTejun Heo const struct kernfs_ops *ops;
492414985aeSTejun Heo int rc;
493414985aeSTejun Heo
4949b2db6e1STejun Heo /*
4959b2db6e1STejun Heo * mmap path and of->mutex are prone to triggering spurious lockdep
4969b2db6e1STejun Heo * warnings and we don't want to add spurious locking dependency
4979b2db6e1STejun Heo * between the two. Check whether mmap is actually implemented
4989b2db6e1STejun Heo * without grabbing @of->mutex by testing HAS_MMAP flag. See the
4999b2db6e1STejun Heo * comment in kernfs_file_open() for more details.
5009b2db6e1STejun Heo */
501df23fc39STejun Heo if (!(of->kn->flags & KERNFS_HAS_MMAP))
5029b2db6e1STejun Heo return -ENODEV;
5039b2db6e1STejun Heo
504414985aeSTejun Heo mutex_lock(&of->mutex);
505414985aeSTejun Heo
506414985aeSTejun Heo rc = -ENODEV;
507c637b8acSTejun Heo if (!kernfs_get_active(of->kn))
508414985aeSTejun Heo goto out_unlock;
509414985aeSTejun Heo
510324a56e1STejun Heo ops = kernfs_ops(of->kn);
511414985aeSTejun Heo rc = ops->mmap(of, vma);
512b44b2140STejun Heo if (rc)
513b44b2140STejun Heo goto out_put;
514414985aeSTejun Heo
515414985aeSTejun Heo /*
516414985aeSTejun Heo * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
517414985aeSTejun Heo * to satisfy versions of X which crash if the mmap fails: that
518414985aeSTejun Heo * substitutes a new vm_file, and we don't then want bin_vm_ops.
519414985aeSTejun Heo */
520414985aeSTejun Heo if (vma->vm_file != file)
521414985aeSTejun Heo goto out_put;
522414985aeSTejun Heo
523414985aeSTejun Heo rc = -EINVAL;
524414985aeSTejun Heo if (of->mmapped && of->vm_ops != vma->vm_ops)
525414985aeSTejun Heo goto out_put;
526414985aeSTejun Heo
527414985aeSTejun Heo /*
528414985aeSTejun Heo * It is not possible to successfully wrap close.
529414985aeSTejun Heo * So error if someone is trying to use close.
530414985aeSTejun Heo */
531414985aeSTejun Heo if (vma->vm_ops && vma->vm_ops->close)
532414985aeSTejun Heo goto out_put;
533414985aeSTejun Heo
534414985aeSTejun Heo rc = 0;
535*1fb112ceSNeel Natu if (!of->mmapped) {
536a1d82affSTejun Heo of->mmapped = true;
537bdb2fd7fSTejun Heo of_on(of)->nr_mmapped++;
538414985aeSTejun Heo of->vm_ops = vma->vm_ops;
539*1fb112ceSNeel Natu }
540414985aeSTejun Heo vma->vm_ops = &kernfs_vm_ops;
541414985aeSTejun Heo out_put:
542c637b8acSTejun Heo kernfs_put_active(of->kn);
543414985aeSTejun Heo out_unlock:
544414985aeSTejun Heo mutex_unlock(&of->mutex);
545414985aeSTejun Heo
546414985aeSTejun Heo return rc;
547414985aeSTejun Heo }
548414985aeSTejun Heo
549414985aeSTejun Heo /**
550c637b8acSTejun Heo * kernfs_get_open_node - get or create kernfs_open_node
551324a56e1STejun Heo * @kn: target kernfs_node
552c525aaddSTejun Heo * @of: kernfs_open_file for this instance of open
553414985aeSTejun Heo *
554adc5e8b5STejun Heo * If @kn->attr.open exists, increment its reference count; otherwise,
555adc5e8b5STejun Heo * create one. @of is chained to the files list.
556414985aeSTejun Heo *
55724b3e3ddSRandy Dunlap * Locking:
558414985aeSTejun Heo * Kernel thread context (may sleep).
559414985aeSTejun Heo *
56024b3e3ddSRandy Dunlap * Return:
56124b3e3ddSRandy Dunlap * %0 on success, -errno on failure.
562414985aeSTejun Heo */
kernfs_get_open_node(struct kernfs_node * kn,struct kernfs_open_file * of)563c637b8acSTejun Heo static int kernfs_get_open_node(struct kernfs_node *kn,
564c525aaddSTejun Heo struct kernfs_open_file *of)
565414985aeSTejun Heo {
566cf2dc9dbSTejun Heo struct kernfs_open_node *on;
567b52c2379STejun Heo struct mutex *mutex;
568414985aeSTejun Heo
56941448c61SImran Khan mutex = kernfs_open_file_mutex_lock(kn);
5703db48acaSTejun Heo on = kernfs_deref_open_node_locked(kn);
571414985aeSTejun Heo
572cf2dc9dbSTejun Heo if (!on) {
573086c00c7SImran Khan /* not there, initialize a new one */
574bdb2fd7fSTejun Heo on = kzalloc(sizeof(*on), GFP_KERNEL);
575cf2dc9dbSTejun Heo if (!on) {
57641448c61SImran Khan mutex_unlock(mutex);
577414985aeSTejun Heo return -ENOMEM;
578086c00c7SImran Khan }
579cf2dc9dbSTejun Heo atomic_set(&on->event, 1);
580cf2dc9dbSTejun Heo init_waitqueue_head(&on->poll);
581cf2dc9dbSTejun Heo INIT_LIST_HEAD(&on->files);
582cf2dc9dbSTejun Heo rcu_assign_pointer(kn->attr.open, on);
583086c00c7SImran Khan }
584086c00c7SImran Khan
585cf2dc9dbSTejun Heo list_add_tail(&of->list, &on->files);
586bdb2fd7fSTejun Heo if (kn->flags & KERNFS_HAS_RELEASE)
587bdb2fd7fSTejun Heo on->nr_to_release++;
588cf2dc9dbSTejun Heo
589cf2dc9dbSTejun Heo mutex_unlock(mutex);
590086c00c7SImran Khan return 0;
591414985aeSTejun Heo }
592414985aeSTejun Heo
593414985aeSTejun Heo /**
594c1b1352fSImran Khan * kernfs_unlink_open_file - Unlink @of from @kn.
595c1b1352fSImran Khan *
596bd900901SImran Khan * @kn: target kernfs_node
597c525aaddSTejun Heo * @of: associated kernfs_open_file
598bdb2fd7fSTejun Heo * @open_failed: ->open() failed, cancel ->release()
599414985aeSTejun Heo *
600c1b1352fSImran Khan * Unlink @of from list of @kn's associated open files. If list of
601c1b1352fSImran Khan * associated open files becomes empty, disassociate and free
602c1b1352fSImran Khan * kernfs_open_node.
603414985aeSTejun Heo *
604414985aeSTejun Heo * LOCKING:
605414985aeSTejun Heo * None.
606414985aeSTejun Heo */
kernfs_unlink_open_file(struct kernfs_node * kn,struct kernfs_open_file * of,bool open_failed)607c1b1352fSImran Khan static void kernfs_unlink_open_file(struct kernfs_node *kn,
608bdb2fd7fSTejun Heo struct kernfs_open_file *of,
609bdb2fd7fSTejun Heo bool open_failed)
610414985aeSTejun Heo {
611086c00c7SImran Khan struct kernfs_open_node *on;
612b52c2379STejun Heo struct mutex *mutex;
613414985aeSTejun Heo
61441448c61SImran Khan mutex = kernfs_open_file_mutex_lock(kn);
615086c00c7SImran Khan
6163db48acaSTejun Heo on = kernfs_deref_open_node_locked(kn);
617086c00c7SImran Khan if (!on) {
61841448c61SImran Khan mutex_unlock(mutex);
619086c00c7SImran Khan return;
620086c00c7SImran Khan }
621414985aeSTejun Heo
622bdb2fd7fSTejun Heo if (of) {
623bdb2fd7fSTejun Heo if (kn->flags & KERNFS_HAS_RELEASE) {
624bdb2fd7fSTejun Heo WARN_ON_ONCE(of->released == open_failed);
625bdb2fd7fSTejun Heo if (open_failed)
626bdb2fd7fSTejun Heo on->nr_to_release--;
627bdb2fd7fSTejun Heo }
628bdb2fd7fSTejun Heo if (of->mmapped)
629bdb2fd7fSTejun Heo on->nr_mmapped--;
630414985aeSTejun Heo list_del(&of->list);
631bdb2fd7fSTejun Heo }
632414985aeSTejun Heo
633086c00c7SImran Khan if (list_empty(&on->files)) {
634086c00c7SImran Khan rcu_assign_pointer(kn->attr.open, NULL);
635086c00c7SImran Khan kfree_rcu(on, rcu_head);
636086c00c7SImran Khan }
637414985aeSTejun Heo
63841448c61SImran Khan mutex_unlock(mutex);
639414985aeSTejun Heo }
640414985aeSTejun Heo
kernfs_fop_open(struct inode * inode,struct file * file)641c637b8acSTejun Heo static int kernfs_fop_open(struct inode *inode, struct file *file)
642414985aeSTejun Heo {
643319ba91dSShaohua Li struct kernfs_node *kn = inode->i_private;
644555724a8STejun Heo struct kernfs_root *root = kernfs_root(kn);
645414985aeSTejun Heo const struct kernfs_ops *ops;
646c525aaddSTejun Heo struct kernfs_open_file *of;
647414985aeSTejun Heo bool has_read, has_write, has_mmap;
648414985aeSTejun Heo int error = -EACCES;
649414985aeSTejun Heo
650c637b8acSTejun Heo if (!kernfs_get_active(kn))
651414985aeSTejun Heo return -ENODEV;
652414985aeSTejun Heo
653324a56e1STejun Heo ops = kernfs_ops(kn);
654414985aeSTejun Heo
655414985aeSTejun Heo has_read = ops->seq_show || ops->read || ops->mmap;
656414985aeSTejun Heo has_write = ops->write || ops->mmap;
657414985aeSTejun Heo has_mmap = ops->mmap;
658414985aeSTejun Heo
659555724a8STejun Heo /* see the flag definition for details */
660555724a8STejun Heo if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
661414985aeSTejun Heo if ((file->f_mode & FMODE_WRITE) &&
662414985aeSTejun Heo (!(inode->i_mode & S_IWUGO) || !has_write))
663414985aeSTejun Heo goto err_out;
664414985aeSTejun Heo
665414985aeSTejun Heo if ((file->f_mode & FMODE_READ) &&
666414985aeSTejun Heo (!(inode->i_mode & S_IRUGO) || !has_read))
667414985aeSTejun Heo goto err_out;
668555724a8STejun Heo }
669414985aeSTejun Heo
670c525aaddSTejun Heo /* allocate a kernfs_open_file for the file */
671414985aeSTejun Heo error = -ENOMEM;
672c525aaddSTejun Heo of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
673414985aeSTejun Heo if (!of)
674414985aeSTejun Heo goto err_out;
675414985aeSTejun Heo
676414985aeSTejun Heo /*
677414985aeSTejun Heo * The following is done to give a different lockdep key to
678414985aeSTejun Heo * @of->mutex for files which implement mmap. This is a rather
679414985aeSTejun Heo * crude way to avoid false positive lockdep warning around
680c1e8d7c6SMichel Lespinasse * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
681414985aeSTejun Heo * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
682c1e8d7c6SMichel Lespinasse * which mm->mmap_lock nests, while holding @of->mutex. As each
683414985aeSTejun Heo * open file has a separate mutex, it's okay as long as those don't
684414985aeSTejun Heo * happen on the same file. At this point, we can't easily give
685414985aeSTejun Heo * each file a separate locking class. Let's differentiate on
686414985aeSTejun Heo * whether the file has mmap or not for now.
6879b2db6e1STejun Heo *
6889b2db6e1STejun Heo * Both paths of the branch look the same. They're supposed to
6899b2db6e1STejun Heo * look that way and give @of->mutex different static lockdep keys.
690414985aeSTejun Heo */
691414985aeSTejun Heo if (has_mmap)
692414985aeSTejun Heo mutex_init(&of->mutex);
693414985aeSTejun Heo else
694414985aeSTejun Heo mutex_init(&of->mutex);
695414985aeSTejun Heo
696324a56e1STejun Heo of->kn = kn;
697414985aeSTejun Heo of->file = file;
698414985aeSTejun Heo
699414985aeSTejun Heo /*
700b7ce40cfSTejun Heo * Write path needs to atomic_write_len outside active reference.
701cc099e0bSChristoph Hellwig * Cache it in open_file. See kernfs_fop_write_iter() for details.
702b7ce40cfSTejun Heo */
703b7ce40cfSTejun Heo of->atomic_write_len = ops->atomic_write_len;
704b7ce40cfSTejun Heo
7054ef67a8cSNeilBrown error = -EINVAL;
7064ef67a8cSNeilBrown /*
7074ef67a8cSNeilBrown * ->seq_show is incompatible with ->prealloc,
7084ef67a8cSNeilBrown * as seq_read does its own allocation.
7094ef67a8cSNeilBrown * ->read must be used instead.
7104ef67a8cSNeilBrown */
7114ef67a8cSNeilBrown if (ops->prealloc && ops->seq_show)
7124ef67a8cSNeilBrown goto err_free;
7132b75869bSNeilBrown if (ops->prealloc) {
7142b75869bSNeilBrown int len = of->atomic_write_len ?: PAGE_SIZE;
7152b75869bSNeilBrown of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
7162b75869bSNeilBrown error = -ENOMEM;
7172b75869bSNeilBrown if (!of->prealloc_buf)
7182b75869bSNeilBrown goto err_free;
719e4234a1fSChris Wilson mutex_init(&of->prealloc_mutex);
7202b75869bSNeilBrown }
7212b75869bSNeilBrown
722b7ce40cfSTejun Heo /*
723414985aeSTejun Heo * Always instantiate seq_file even if read access doesn't use
724414985aeSTejun Heo * seq_file or is not requested. This unifies private data access
725414985aeSTejun Heo * and readable regular files are the vast majority anyway.
726414985aeSTejun Heo */
727414985aeSTejun Heo if (ops->seq_show)
728414985aeSTejun Heo error = seq_open(file, &kernfs_seq_ops);
729414985aeSTejun Heo else
730414985aeSTejun Heo error = seq_open(file, NULL);
731414985aeSTejun Heo if (error)
732414985aeSTejun Heo goto err_free;
733414985aeSTejun Heo
7340e67db2fSTejun Heo of->seq_file = file->private_data;
7350e67db2fSTejun Heo of->seq_file->private = of;
736414985aeSTejun Heo
737414985aeSTejun Heo /* seq_file clears PWRITE unconditionally, restore it if WRITE */
738414985aeSTejun Heo if (file->f_mode & FMODE_WRITE)
739414985aeSTejun Heo file->f_mode |= FMODE_PWRITE;
740414985aeSTejun Heo
741c637b8acSTejun Heo /* make sure we have open node struct */
742c637b8acSTejun Heo error = kernfs_get_open_node(kn, of);
743414985aeSTejun Heo if (error)
7440e67db2fSTejun Heo goto err_seq_release;
7450e67db2fSTejun Heo
7460e67db2fSTejun Heo if (ops->open) {
7470e67db2fSTejun Heo /* nobody has access to @of yet, skip @of->mutex */
7480e67db2fSTejun Heo error = ops->open(of);
7490e67db2fSTejun Heo if (error)
7500e67db2fSTejun Heo goto err_put_node;
7510e67db2fSTejun Heo }
752414985aeSTejun Heo
753414985aeSTejun Heo /* open succeeded, put active references */
754c637b8acSTejun Heo kernfs_put_active(kn);
755414985aeSTejun Heo return 0;
756414985aeSTejun Heo
7570e67db2fSTejun Heo err_put_node:
758bdb2fd7fSTejun Heo kernfs_unlink_open_file(kn, of, true);
7590e67db2fSTejun Heo err_seq_release:
760414985aeSTejun Heo seq_release(inode, file);
761414985aeSTejun Heo err_free:
7622b75869bSNeilBrown kfree(of->prealloc_buf);
763414985aeSTejun Heo kfree(of);
764414985aeSTejun Heo err_out:
765c637b8acSTejun Heo kernfs_put_active(kn);
766414985aeSTejun Heo return error;
767414985aeSTejun Heo }
768414985aeSTejun Heo
7690e67db2fSTejun Heo /* used from release/drain to ensure that ->release() is called exactly once */
kernfs_release_file(struct kernfs_node * kn,struct kernfs_open_file * of)7700e67db2fSTejun Heo static void kernfs_release_file(struct kernfs_node *kn,
7710e67db2fSTejun Heo struct kernfs_open_file *of)
7720e67db2fSTejun Heo {
773f83f3c51STejun Heo /*
774f83f3c51STejun Heo * @of is guaranteed to have no other file operations in flight and
775f83f3c51STejun Heo * we just want to synchronize release and drain paths.
77641448c61SImran Khan * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
777f83f3c51STejun Heo * here because drain path may be called from places which can
778f83f3c51STejun Heo * cause circular dependency.
779f83f3c51STejun Heo */
78041448c61SImran Khan lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
7810e67db2fSTejun Heo
7820e67db2fSTejun Heo if (!of->released) {
7830e67db2fSTejun Heo /*
7840e67db2fSTejun Heo * A file is never detached without being released and we
7850e67db2fSTejun Heo * need to be able to release files which are deactivated
7860e67db2fSTejun Heo * and being drained. Don't use kernfs_ops().
7870e67db2fSTejun Heo */
7880e67db2fSTejun Heo kn->attr.ops->release(of);
7890e67db2fSTejun Heo of->released = true;
790bdb2fd7fSTejun Heo of_on(of)->nr_to_release--;
7910e67db2fSTejun Heo }
7920e67db2fSTejun Heo }
7930e67db2fSTejun Heo
kernfs_fop_release(struct inode * inode,struct file * filp)794c637b8acSTejun Heo static int kernfs_fop_release(struct inode *inode, struct file *filp)
795414985aeSTejun Heo {
796319ba91dSShaohua Li struct kernfs_node *kn = inode->i_private;
797c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(filp);
798414985aeSTejun Heo
799f83f3c51STejun Heo if (kn->flags & KERNFS_HAS_RELEASE) {
800b52c2379STejun Heo struct mutex *mutex;
801b52c2379STejun Heo
80241448c61SImran Khan mutex = kernfs_open_file_mutex_lock(kn);
8030e67db2fSTejun Heo kernfs_release_file(kn, of);
80441448c61SImran Khan mutex_unlock(mutex);
805f83f3c51STejun Heo }
806f83f3c51STejun Heo
807bdb2fd7fSTejun Heo kernfs_unlink_open_file(kn, of, false);
808414985aeSTejun Heo seq_release(inode, filp);
8092b75869bSNeilBrown kfree(of->prealloc_buf);
810414985aeSTejun Heo kfree(of);
811414985aeSTejun Heo
812414985aeSTejun Heo return 0;
813414985aeSTejun Heo }
814414985aeSTejun Heo
kernfs_should_drain_open_files(struct kernfs_node * kn)815bdb2fd7fSTejun Heo bool kernfs_should_drain_open_files(struct kernfs_node *kn)
816bdb2fd7fSTejun Heo {
817bdb2fd7fSTejun Heo struct kernfs_open_node *on;
818bdb2fd7fSTejun Heo bool ret;
819bdb2fd7fSTejun Heo
820bdb2fd7fSTejun Heo /*
821bdb2fd7fSTejun Heo * @kn being deactivated guarantees that @kn->attr.open can't change
822bdb2fd7fSTejun Heo * beneath us making the lockless test below safe.
823bdb2fd7fSTejun Heo */
824bdb2fd7fSTejun Heo WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
825bdb2fd7fSTejun Heo
826bdb2fd7fSTejun Heo rcu_read_lock();
827bdb2fd7fSTejun Heo on = rcu_dereference(kn->attr.open);
828bdb2fd7fSTejun Heo ret = on && (on->nr_mmapped || on->nr_to_release);
829bdb2fd7fSTejun Heo rcu_read_unlock();
830bdb2fd7fSTejun Heo
831bdb2fd7fSTejun Heo return ret;
832bdb2fd7fSTejun Heo }
833bdb2fd7fSTejun Heo
kernfs_drain_open_files(struct kernfs_node * kn)8340e67db2fSTejun Heo void kernfs_drain_open_files(struct kernfs_node *kn)
835414985aeSTejun Heo {
836c525aaddSTejun Heo struct kernfs_open_node *on;
837c525aaddSTejun Heo struct kernfs_open_file *of;
838b52c2379STejun Heo struct mutex *mutex;
839414985aeSTejun Heo
84041448c61SImran Khan mutex = kernfs_open_file_mutex_lock(kn);
8413db48acaSTejun Heo on = kernfs_deref_open_node_locked(kn);
842086c00c7SImran Khan if (!on) {
84341448c61SImran Khan mutex_unlock(mutex);
844bd900901SImran Khan return;
845bd900901SImran Khan }
8460e67db2fSTejun Heo
847c525aaddSTejun Heo list_for_each_entry(of, &on->files, list) {
848414985aeSTejun Heo struct inode *inode = file_inode(of->file);
8490e67db2fSTejun Heo
850bdb2fd7fSTejun Heo if (of->mmapped) {
851414985aeSTejun Heo unmap_mapping_range(inode->i_mapping, 0, 0, 1);
852bdb2fd7fSTejun Heo of->mmapped = false;
853bdb2fd7fSTejun Heo on->nr_mmapped--;
854bdb2fd7fSTejun Heo }
8550e67db2fSTejun Heo
856966fa72aSVaibhav Jain if (kn->flags & KERNFS_HAS_RELEASE)
8570e67db2fSTejun Heo kernfs_release_file(kn, of);
858414985aeSTejun Heo }
8590e67db2fSTejun Heo
860bdb2fd7fSTejun Heo WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
86141448c61SImran Khan mutex_unlock(mutex);
862414985aeSTejun Heo }
863414985aeSTejun Heo
864c637b8acSTejun Heo /*
865c637b8acSTejun Heo * Kernfs attribute files are pollable. The idea is that you read
866414985aeSTejun Heo * the content and then you use 'poll' or 'select' to wait for
867414985aeSTejun Heo * the content to change. When the content changes (assuming the
868414985aeSTejun Heo * manager for the kobject supports notification), poll will
869a9a08845SLinus Torvalds * return EPOLLERR|EPOLLPRI, and select will return the fd whether
870414985aeSTejun Heo * it is waiting for read, write, or exceptions.
871414985aeSTejun Heo * Once poll/select indicates that the value has changed, you
872414985aeSTejun Heo * need to close and re-open the file, or seek to 0 and read again.
873414985aeSTejun Heo * Reminder: this only works for attributes which actively support
874414985aeSTejun Heo * it, and it is not possible to test an attribute from userspace
875414985aeSTejun Heo * to see if it supports poll (Neither 'poll' nor 'select' return
876414985aeSTejun Heo * an appropriate error code). When in doubt, set a suitable timeout value.
877414985aeSTejun Heo */
kernfs_generic_poll(struct kernfs_open_file * of,poll_table * wait)878147e1a97SJohannes Weiner __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
879147e1a97SJohannes Weiner {
8803db48acaSTejun Heo struct kernfs_open_node *on = of_on(of);
881147e1a97SJohannes Weiner
882147e1a97SJohannes Weiner poll_wait(of->file, &on->poll, wait);
883147e1a97SJohannes Weiner
884147e1a97SJohannes Weiner if (of->event != atomic_read(&on->event))
885147e1a97SJohannes Weiner return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
886147e1a97SJohannes Weiner
887147e1a97SJohannes Weiner return DEFAULT_POLLMASK;
888147e1a97SJohannes Weiner }
889147e1a97SJohannes Weiner
kernfs_fop_poll(struct file * filp,poll_table * wait)890076ccb76SAl Viro static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
891414985aeSTejun Heo {
892c525aaddSTejun Heo struct kernfs_open_file *of = kernfs_of(filp);
893319ba91dSShaohua Li struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
894147e1a97SJohannes Weiner __poll_t ret;
895414985aeSTejun Heo
896c637b8acSTejun Heo if (!kernfs_get_active(kn))
897147e1a97SJohannes Weiner return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
898414985aeSTejun Heo
899147e1a97SJohannes Weiner if (kn->attr.ops->poll)
900147e1a97SJohannes Weiner ret = kn->attr.ops->poll(of, wait);
901147e1a97SJohannes Weiner else
902147e1a97SJohannes Weiner ret = kernfs_generic_poll(of, wait);
903414985aeSTejun Heo
904c637b8acSTejun Heo kernfs_put_active(kn);
905147e1a97SJohannes Weiner return ret;
906414985aeSTejun Heo }
907414985aeSTejun Heo
kernfs_notify_workfn(struct work_struct * work)908ecca47ceSTejun Heo static void kernfs_notify_workfn(struct work_struct *work)
909414985aeSTejun Heo {
910ecca47ceSTejun Heo struct kernfs_node *kn;
911d911d987STejun Heo struct kernfs_super_info *info;
912393c3714SMinchan Kim struct kernfs_root *root;
913ecca47ceSTejun Heo repeat:
914ecca47ceSTejun Heo /* pop one off the notify_list */
9152fd26970SImran Khan spin_lock_irq(&kernfs_notify_lock);
9162fd26970SImran Khan kn = kernfs_notify_list;
9172fd26970SImran Khan if (kn == KERNFS_NOTIFY_EOL) {
9182fd26970SImran Khan spin_unlock_irq(&kernfs_notify_lock);
919d911d987STejun Heo return;
9202fd26970SImran Khan }
9212fd26970SImran Khan kernfs_notify_list = kn->attr.notify_next;
9222fd26970SImran Khan kn->attr.notify_next = NULL;
9232fd26970SImran Khan spin_unlock_irq(&kernfs_notify_lock);
924d911d987STejun Heo
925393c3714SMinchan Kim root = kernfs_root(kn);
926d911d987STejun Heo /* kick fsnotify */
927d911d987STejun Heo
928c9f2dfb7SImran Khan down_read(&root->kernfs_supers_rwsem);
929ecca47ceSTejun Heo list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
930df6a58c5STejun Heo struct kernfs_node *parent;
931497b0c5aSAmir Goldstein struct inode *p_inode = NULL;
932d911d987STejun Heo struct inode *inode;
93325b229dfSAl Viro struct qstr name;
934d911d987STejun Heo
935df6a58c5STejun Heo /*
936df6a58c5STejun Heo * We want fsnotify_modify() on @kn but as the
937df6a58c5STejun Heo * modifications aren't originating from userland don't
938df6a58c5STejun Heo * have the matching @file available. Look up the inodes
939df6a58c5STejun Heo * and generate the events manually.
940df6a58c5STejun Heo */
94167c0496eSTejun Heo inode = ilookup(info->sb, kernfs_ino(kn));
942d911d987STejun Heo if (!inode)
943d911d987STejun Heo continue;
944d911d987STejun Heo
94525b229dfSAl Viro name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
946df6a58c5STejun Heo parent = kernfs_get_parent(kn);
947df6a58c5STejun Heo if (parent) {
94867c0496eSTejun Heo p_inode = ilookup(info->sb, kernfs_ino(parent));
949df6a58c5STejun Heo if (p_inode) {
95040a100d3SAmir Goldstein fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
95140a100d3SAmir Goldstein inode, FSNOTIFY_EVENT_INODE,
95240a100d3SAmir Goldstein p_inode, &name, inode, 0);
953df6a58c5STejun Heo iput(p_inode);
954d911d987STejun Heo }
955d911d987STejun Heo
956df6a58c5STejun Heo kernfs_put(parent);
957df6a58c5STejun Heo }
958df6a58c5STejun Heo
95982ace1efSAmir Goldstein if (!p_inode)
96082ace1efSAmir Goldstein fsnotify_inode(inode, FS_MODIFY);
961497b0c5aSAmir Goldstein
962d911d987STejun Heo iput(inode);
963d911d987STejun Heo }
964d911d987STejun Heo
965c9f2dfb7SImran Khan up_read(&root->kernfs_supers_rwsem);
966ecca47ceSTejun Heo kernfs_put(kn);
967ecca47ceSTejun Heo goto repeat;
968ecca47ceSTejun Heo }
969ecca47ceSTejun Heo
970ecca47ceSTejun Heo /**
971ecca47ceSTejun Heo * kernfs_notify - notify a kernfs file
972ecca47ceSTejun Heo * @kn: file to notify
973ecca47ceSTejun Heo *
974ecca47ceSTejun Heo * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
975ecca47ceSTejun Heo * context.
976ecca47ceSTejun Heo */
kernfs_notify(struct kernfs_node * kn)977ecca47ceSTejun Heo void kernfs_notify(struct kernfs_node *kn)
978ecca47ceSTejun Heo {
979ecca47ceSTejun Heo static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
9802fd26970SImran Khan unsigned long flags;
98103c0a920SRadu Rendec struct kernfs_open_node *on;
982ecca47ceSTejun Heo
983ecca47ceSTejun Heo if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
984ecca47ceSTejun Heo return;
985ecca47ceSTejun Heo
98603c0a920SRadu Rendec /* kick poll immediately */
987086c00c7SImran Khan rcu_read_lock();
988086c00c7SImran Khan on = rcu_dereference(kn->attr.open);
98903c0a920SRadu Rendec if (on) {
99003c0a920SRadu Rendec atomic_inc(&on->event);
99103c0a920SRadu Rendec wake_up_interruptible(&on->poll);
99203c0a920SRadu Rendec }
993086c00c7SImran Khan rcu_read_unlock();
99403c0a920SRadu Rendec
99503c0a920SRadu Rendec /* schedule work to kick fsnotify */
9962fd26970SImran Khan spin_lock_irqsave(&kernfs_notify_lock, flags);
9972fd26970SImran Khan if (!kn->attr.notify_next) {
998ecca47ceSTejun Heo kernfs_get(kn);
9992fd26970SImran Khan kn->attr.notify_next = kernfs_notify_list;
10002fd26970SImran Khan kernfs_notify_list = kn;
1001ecca47ceSTejun Heo schedule_work(&kernfs_notify_work);
1002ecca47ceSTejun Heo }
10032fd26970SImran Khan spin_unlock_irqrestore(&kernfs_notify_lock, flags);
10042fd26970SImran Khan }
1005414985aeSTejun Heo EXPORT_SYMBOL_GPL(kernfs_notify);
1006414985aeSTejun Heo
1007a797bfc3STejun Heo const struct file_operations kernfs_file_fops = {
10084eaad21aSChristoph Hellwig .read_iter = kernfs_fop_read_iter,
1009cc099e0bSChristoph Hellwig .write_iter = kernfs_fop_write_iter,
1010414985aeSTejun Heo .llseek = generic_file_llseek,
1011c637b8acSTejun Heo .mmap = kernfs_fop_mmap,
1012c637b8acSTejun Heo .open = kernfs_fop_open,
1013c637b8acSTejun Heo .release = kernfs_fop_release,
1014c637b8acSTejun Heo .poll = kernfs_fop_poll,
10152a9becddSTony Luck .fsync = noop_fsync,
1016b0072734SDavid Howells .splice_read = copy_splice_read,
1017f2d6c270SChristoph Hellwig .splice_write = iter_file_splice_write,
1018414985aeSTejun Heo };
1019414985aeSTejun Heo
1020414985aeSTejun Heo /**
10212063d608STejun Heo * __kernfs_create_file - kernfs internal function to create a file
1022414985aeSTejun Heo * @parent: directory to create the file in
1023414985aeSTejun Heo * @name: name of the file
1024414985aeSTejun Heo * @mode: mode of the file
1025488dee96SDmitry Torokhov * @uid: uid of the file
1026488dee96SDmitry Torokhov * @gid: gid of the file
1027414985aeSTejun Heo * @size: size of the file
1028414985aeSTejun Heo * @ops: kernfs operations for the file
1029414985aeSTejun Heo * @priv: private data for the file
1030414985aeSTejun Heo * @ns: optional namespace tag of the file
1031414985aeSTejun Heo * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
1032414985aeSTejun Heo *
103324b3e3ddSRandy Dunlap * Return: the created node on success, ERR_PTR() value on error.
1034414985aeSTejun Heo */
__kernfs_create_file(struct kernfs_node * parent,const char * name,umode_t mode,kuid_t uid,kgid_t gid,loff_t size,const struct kernfs_ops * ops,void * priv,const void * ns,struct lock_class_key * key)10352063d608STejun Heo struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
1036414985aeSTejun Heo const char *name,
1037488dee96SDmitry Torokhov umode_t mode, kuid_t uid, kgid_t gid,
1038488dee96SDmitry Torokhov loff_t size,
1039414985aeSTejun Heo const struct kernfs_ops *ops,
1040414985aeSTejun Heo void *priv, const void *ns,
1041414985aeSTejun Heo struct lock_class_key *key)
1042414985aeSTejun Heo {
1043324a56e1STejun Heo struct kernfs_node *kn;
10442063d608STejun Heo unsigned flags;
1045414985aeSTejun Heo int rc;
1046414985aeSTejun Heo
10472063d608STejun Heo flags = KERNFS_FILE;
10482063d608STejun Heo
1049488dee96SDmitry Torokhov kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
1050488dee96SDmitry Torokhov uid, gid, flags);
1051324a56e1STejun Heo if (!kn)
1052414985aeSTejun Heo return ERR_PTR(-ENOMEM);
1053414985aeSTejun Heo
1054adc5e8b5STejun Heo kn->attr.ops = ops;
1055adc5e8b5STejun Heo kn->attr.size = size;
1056adc5e8b5STejun Heo kn->ns = ns;
1057324a56e1STejun Heo kn->priv = priv;
1058414985aeSTejun Heo
1059414985aeSTejun Heo #ifdef CONFIG_DEBUG_LOCK_ALLOC
1060414985aeSTejun Heo if (key) {
10610f605db5SWaiman Long lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
1062df23fc39STejun Heo kn->flags |= KERNFS_LOCKDEP;
1063414985aeSTejun Heo }
1064414985aeSTejun Heo #endif
1065414985aeSTejun Heo
1066414985aeSTejun Heo /*
10671970a062SJulia Lawall * kn->attr.ops is accessible only while holding active ref. We
1068414985aeSTejun Heo * need to know whether some ops are implemented outside active
1069414985aeSTejun Heo * ref. Cache their existence in flags.
1070414985aeSTejun Heo */
1071414985aeSTejun Heo if (ops->seq_show)
1072df23fc39STejun Heo kn->flags |= KERNFS_HAS_SEQ_SHOW;
1073414985aeSTejun Heo if (ops->mmap)
1074df23fc39STejun Heo kn->flags |= KERNFS_HAS_MMAP;
10750e67db2fSTejun Heo if (ops->release)
10760e67db2fSTejun Heo kn->flags |= KERNFS_HAS_RELEASE;
1077414985aeSTejun Heo
1078988cd7afSTejun Heo rc = kernfs_add_one(kn);
1079414985aeSTejun Heo if (rc) {
1080324a56e1STejun Heo kernfs_put(kn);
1081414985aeSTejun Heo return ERR_PTR(rc);
1082414985aeSTejun Heo }
1083324a56e1STejun Heo return kn;
1084414985aeSTejun Heo }
1085