11507f512SMike Rapoport // SPDX-License-Identifier: GPL-2.0
21507f512SMike Rapoport /*
31507f512SMike Rapoport * Copyright IBM Corporation, 2021
41507f512SMike Rapoport *
51507f512SMike Rapoport * Author: Mike Rapoport <rppt@linux.ibm.com>
61507f512SMike Rapoport */
71507f512SMike Rapoport
81507f512SMike Rapoport #include <linux/mm.h>
91507f512SMike Rapoport #include <linux/fs.h>
101507f512SMike Rapoport #include <linux/swap.h>
111507f512SMike Rapoport #include <linux/mount.h>
121507f512SMike Rapoport #include <linux/memfd.h>
131507f512SMike Rapoport #include <linux/bitops.h>
141507f512SMike Rapoport #include <linux/printk.h>
151507f512SMike Rapoport #include <linux/pagemap.h>
161507f512SMike Rapoport #include <linux/syscalls.h>
171507f512SMike Rapoport #include <linux/pseudo_fs.h>
181507f512SMike Rapoport #include <linux/secretmem.h>
191507f512SMike Rapoport #include <linux/set_memory.h>
201507f512SMike Rapoport #include <linux/sched/signal.h>
211507f512SMike Rapoport
221507f512SMike Rapoport #include <uapi/linux/magic.h>
231507f512SMike Rapoport
241507f512SMike Rapoport #include <asm/tlbflush.h>
251507f512SMike Rapoport
261507f512SMike Rapoport #include "internal.h"
271507f512SMike Rapoport
281507f512SMike Rapoport #undef pr_fmt
291507f512SMike Rapoport #define pr_fmt(fmt) "secretmem: " fmt
301507f512SMike Rapoport
311507f512SMike Rapoport /*
321507f512SMike Rapoport * Define mode and flag masks to allow validation of the system call
331507f512SMike Rapoport * parameters.
341507f512SMike Rapoport */
351507f512SMike Rapoport #define SECRETMEM_MODE_MASK (0x0)
361507f512SMike Rapoport #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
371507f512SMike Rapoport
38b758fe6dSMike Rapoport (IBM) static bool secretmem_enable __ro_after_init = 1;
391507f512SMike Rapoport module_param_named(enable, secretmem_enable, bool, 0400);
401507f512SMike Rapoport MODULE_PARM_DESC(secretmem_enable,
411507f512SMike Rapoport "Enable secretmem and memfd_secret(2) system call");
421507f512SMike Rapoport
4387066fddSLinus Torvalds static atomic_t secretmem_users;
449a436f8fSMike Rapoport
secretmem_active(void)459a436f8fSMike Rapoport bool secretmem_active(void)
469a436f8fSMike Rapoport {
4787066fddSLinus Torvalds return !!atomic_read(&secretmem_users);
489a436f8fSMike Rapoport }
499a436f8fSMike Rapoport
secretmem_fault(struct vm_fault * vmf)501507f512SMike Rapoport static vm_fault_t secretmem_fault(struct vm_fault *vmf)
511507f512SMike Rapoport {
521507f512SMike Rapoport struct address_space *mapping = vmf->vma->vm_file->f_mapping;
531507f512SMike Rapoport struct inode *inode = file_inode(vmf->vma->vm_file);
541507f512SMike Rapoport pgoff_t offset = vmf->pgoff;
551507f512SMike Rapoport gfp_t gfp = vmf->gfp_mask;
561507f512SMike Rapoport unsigned long addr;
571507f512SMike Rapoport struct page *page;
587e2fca52SZhangPeng struct folio *folio;
5984ac0130SMike Rapoport vm_fault_t ret;
601507f512SMike Rapoport int err;
611507f512SMike Rapoport
621507f512SMike Rapoport if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
631507f512SMike Rapoport return vmf_error(-EINVAL);
641507f512SMike Rapoport
6584ac0130SMike Rapoport filemap_invalidate_lock_shared(mapping);
6684ac0130SMike Rapoport
671507f512SMike Rapoport retry:
681507f512SMike Rapoport page = find_lock_page(mapping, offset);
691507f512SMike Rapoport if (!page) {
707e2fca52SZhangPeng folio = folio_alloc(gfp | __GFP_ZERO, 0);
717e2fca52SZhangPeng if (!folio) {
7284ac0130SMike Rapoport ret = VM_FAULT_OOM;
7384ac0130SMike Rapoport goto out;
7484ac0130SMike Rapoport }
751507f512SMike Rapoport
767e2fca52SZhangPeng page = &folio->page;
771507f512SMike Rapoport err = set_direct_map_invalid_noflush(page);
781507f512SMike Rapoport if (err) {
797e2fca52SZhangPeng folio_put(folio);
8084ac0130SMike Rapoport ret = vmf_error(err);
8184ac0130SMike Rapoport goto out;
821507f512SMike Rapoport }
831507f512SMike Rapoport
847e2fca52SZhangPeng __folio_mark_uptodate(folio);
857e2fca52SZhangPeng err = filemap_add_folio(mapping, folio, offset, gfp);
861507f512SMike Rapoport if (unlikely(err)) {
877e2fca52SZhangPeng folio_put(folio);
881507f512SMike Rapoport /*
891507f512SMike Rapoport * If a split of large page was required, it
901507f512SMike Rapoport * already happened when we marked the page invalid
911507f512SMike Rapoport * which guarantees that this call won't fail
921507f512SMike Rapoport */
931507f512SMike Rapoport set_direct_map_default_noflush(page);
941507f512SMike Rapoport if (err == -EEXIST)
951507f512SMike Rapoport goto retry;
961507f512SMike Rapoport
9784ac0130SMike Rapoport ret = vmf_error(err);
9884ac0130SMike Rapoport goto out;
991507f512SMike Rapoport }
1001507f512SMike Rapoport
1011507f512SMike Rapoport addr = (unsigned long)page_address(page);
1021507f512SMike Rapoport flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1031507f512SMike Rapoport }
1041507f512SMike Rapoport
1051507f512SMike Rapoport vmf->page = page;
10684ac0130SMike Rapoport ret = VM_FAULT_LOCKED;
10784ac0130SMike Rapoport
10884ac0130SMike Rapoport out:
10984ac0130SMike Rapoport filemap_invalidate_unlock_shared(mapping);
11084ac0130SMike Rapoport return ret;
1111507f512SMike Rapoport }
1121507f512SMike Rapoport
1131507f512SMike Rapoport static const struct vm_operations_struct secretmem_vm_ops = {
1141507f512SMike Rapoport .fault = secretmem_fault,
1151507f512SMike Rapoport };
1161507f512SMike Rapoport
secretmem_release(struct inode * inode,struct file * file)1179a436f8fSMike Rapoport static int secretmem_release(struct inode *inode, struct file *file)
1189a436f8fSMike Rapoport {
11987066fddSLinus Torvalds atomic_dec(&secretmem_users);
1209a436f8fSMike Rapoport return 0;
1219a436f8fSMike Rapoport }
1229a436f8fSMike Rapoport
secretmem_mmap(struct file * file,struct vm_area_struct * vma)1231507f512SMike Rapoport static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
1241507f512SMike Rapoport {
1251507f512SMike Rapoport unsigned long len = vma->vm_end - vma->vm_start;
1261507f512SMike Rapoport
1271507f512SMike Rapoport if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
1281507f512SMike Rapoport return -EINVAL;
1291507f512SMike Rapoport
130b0cc5e89SAndrew Morton if (!mlock_future_ok(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
1311507f512SMike Rapoport return -EAGAIN;
1321507f512SMike Rapoport
1331c71222eSSuren Baghdasaryan vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP);
1341507f512SMike Rapoport vma->vm_ops = &secretmem_vm_ops;
1351507f512SMike Rapoport
1361507f512SMike Rapoport return 0;
1371507f512SMike Rapoport }
1381507f512SMike Rapoport
vma_is_secretmem(struct vm_area_struct * vma)1391507f512SMike Rapoport bool vma_is_secretmem(struct vm_area_struct *vma)
1401507f512SMike Rapoport {
1411507f512SMike Rapoport return vma->vm_ops == &secretmem_vm_ops;
1421507f512SMike Rapoport }
1431507f512SMike Rapoport
1441507f512SMike Rapoport static const struct file_operations secretmem_fops = {
1459a436f8fSMike Rapoport .release = secretmem_release,
1461507f512SMike Rapoport .mmap = secretmem_mmap,
1471507f512SMike Rapoport };
1481507f512SMike Rapoport
secretmem_migrate_folio(struct address_space * mapping,struct folio * dst,struct folio * src,enum migrate_mode mode)1495409548dSMatthew Wilcox (Oracle) static int secretmem_migrate_folio(struct address_space *mapping,
1505409548dSMatthew Wilcox (Oracle) struct folio *dst, struct folio *src, enum migrate_mode mode)
1511507f512SMike Rapoport {
1521507f512SMike Rapoport return -EBUSY;
1531507f512SMike Rapoport }
1541507f512SMike Rapoport
secretmem_free_folio(struct folio * folio)1556612ed24SMatthew Wilcox (Oracle) static void secretmem_free_folio(struct folio *folio)
1561507f512SMike Rapoport {
1576612ed24SMatthew Wilcox (Oracle) set_direct_map_default_noflush(&folio->page);
1586612ed24SMatthew Wilcox (Oracle) folio_zero_segment(folio, 0, folio_size(folio));
1591507f512SMike Rapoport }
1601507f512SMike Rapoport
1611507f512SMike Rapoport const struct address_space_operations secretmem_aops = {
16246de8b97SMatthew Wilcox (Oracle) .dirty_folio = noop_dirty_folio,
1636612ed24SMatthew Wilcox (Oracle) .free_folio = secretmem_free_folio,
1645409548dSMatthew Wilcox (Oracle) .migrate_folio = secretmem_migrate_folio,
1651507f512SMike Rapoport };
1661507f512SMike Rapoport
secretmem_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)167c1632a0fSChristian Brauner static int secretmem_setattr(struct mnt_idmap *idmap,
168f9b141f9SAxel Rasmussen struct dentry *dentry, struct iattr *iattr)
169f9b141f9SAxel Rasmussen {
170f9b141f9SAxel Rasmussen struct inode *inode = d_inode(dentry);
17184ac0130SMike Rapoport struct address_space *mapping = inode->i_mapping;
172f9b141f9SAxel Rasmussen unsigned int ia_valid = iattr->ia_valid;
17384ac0130SMike Rapoport int ret;
17484ac0130SMike Rapoport
17584ac0130SMike Rapoport filemap_invalidate_lock(mapping);
176f9b141f9SAxel Rasmussen
177f9b141f9SAxel Rasmussen if ((ia_valid & ATTR_SIZE) && inode->i_size)
17884ac0130SMike Rapoport ret = -EINVAL;
17984ac0130SMike Rapoport else
180c1632a0fSChristian Brauner ret = simple_setattr(idmap, dentry, iattr);
181f9b141f9SAxel Rasmussen
18284ac0130SMike Rapoport filemap_invalidate_unlock(mapping);
18384ac0130SMike Rapoport
18484ac0130SMike Rapoport return ret;
185f9b141f9SAxel Rasmussen }
186f9b141f9SAxel Rasmussen
187f9b141f9SAxel Rasmussen static const struct inode_operations secretmem_iops = {
188f9b141f9SAxel Rasmussen .setattr = secretmem_setattr,
189f9b141f9SAxel Rasmussen };
190f9b141f9SAxel Rasmussen
1911507f512SMike Rapoport static struct vfsmount *secretmem_mnt;
1921507f512SMike Rapoport
secretmem_file_create(unsigned long flags)1931507f512SMike Rapoport static struct file *secretmem_file_create(unsigned long flags)
1941507f512SMike Rapoport {
19598001fd6SColin Ian King struct file *file;
1961507f512SMike Rapoport struct inode *inode;
1972bfe15c5SChristian Göttsche const char *anon_name = "[secretmem]";
1982bfe15c5SChristian Göttsche const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
1992bfe15c5SChristian Göttsche int err;
2001507f512SMike Rapoport
2011507f512SMike Rapoport inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
2021507f512SMike Rapoport if (IS_ERR(inode))
2031507f512SMike Rapoport return ERR_CAST(inode);
2041507f512SMike Rapoport
2052bfe15c5SChristian Göttsche err = security_inode_init_security_anon(inode, &qname, NULL);
2062bfe15c5SChristian Göttsche if (err) {
2072bfe15c5SChristian Göttsche file = ERR_PTR(err);
2082bfe15c5SChristian Göttsche goto err_free_inode;
2092bfe15c5SChristian Göttsche }
2102bfe15c5SChristian Göttsche
2111507f512SMike Rapoport file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
2121507f512SMike Rapoport O_RDWR, &secretmem_fops);
2131507f512SMike Rapoport if (IS_ERR(file))
2141507f512SMike Rapoport goto err_free_inode;
2151507f512SMike Rapoport
2161507f512SMike Rapoport mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
2171507f512SMike Rapoport mapping_set_unevictable(inode->i_mapping);
2181507f512SMike Rapoport
219f9b141f9SAxel Rasmussen inode->i_op = &secretmem_iops;
2201507f512SMike Rapoport inode->i_mapping->a_ops = &secretmem_aops;
2211507f512SMike Rapoport
2221507f512SMike Rapoport /* pretend we are a normal file with zero size */
2231507f512SMike Rapoport inode->i_mode |= S_IFREG;
2241507f512SMike Rapoport inode->i_size = 0;
2251507f512SMike Rapoport
2261507f512SMike Rapoport return file;
2271507f512SMike Rapoport
2281507f512SMike Rapoport err_free_inode:
2291507f512SMike Rapoport iput(inode);
2301507f512SMike Rapoport return file;
2311507f512SMike Rapoport }
2321507f512SMike Rapoport
SYSCALL_DEFINE1(memfd_secret,unsigned int,flags)2331507f512SMike Rapoport SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
2341507f512SMike Rapoport {
2351507f512SMike Rapoport struct file *file;
2361507f512SMike Rapoport int fd, err;
2371507f512SMike Rapoport
2381507f512SMike Rapoport /* make sure local flags do not confict with global fcntl.h */
2391507f512SMike Rapoport BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
2401507f512SMike Rapoport
241*7caf9663SPatrick Roy if (!secretmem_enable || !can_set_direct_map())
2421507f512SMike Rapoport return -ENOSYS;
2431507f512SMike Rapoport
2441507f512SMike Rapoport if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
2451507f512SMike Rapoport return -EINVAL;
246cb685432SMatthew Wilcox (Oracle) if (atomic_read(&secretmem_users) < 0)
247cb685432SMatthew Wilcox (Oracle) return -ENFILE;
2481507f512SMike Rapoport
2491507f512SMike Rapoport fd = get_unused_fd_flags(flags & O_CLOEXEC);
2501507f512SMike Rapoport if (fd < 0)
2511507f512SMike Rapoport return fd;
2521507f512SMike Rapoport
2531507f512SMike Rapoport file = secretmem_file_create(flags);
2541507f512SMike Rapoport if (IS_ERR(file)) {
2551507f512SMike Rapoport err = PTR_ERR(file);
2561507f512SMike Rapoport goto err_put_fd;
2571507f512SMike Rapoport }
2581507f512SMike Rapoport
2591507f512SMike Rapoport file->f_flags |= O_LARGEFILE;
2601507f512SMike Rapoport
26187066fddSLinus Torvalds atomic_inc(&secretmem_users);
262855d4443SKees Cook fd_install(fd, file);
2631507f512SMike Rapoport return fd;
2641507f512SMike Rapoport
2651507f512SMike Rapoport err_put_fd:
2661507f512SMike Rapoport put_unused_fd(fd);
2671507f512SMike Rapoport return err;
2681507f512SMike Rapoport }
2691507f512SMike Rapoport
secretmem_init_fs_context(struct fs_context * fc)2701507f512SMike Rapoport static int secretmem_init_fs_context(struct fs_context *fc)
2711507f512SMike Rapoport {
2721507f512SMike Rapoport return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
2731507f512SMike Rapoport }
2741507f512SMike Rapoport
2751507f512SMike Rapoport static struct file_system_type secretmem_fs = {
2761507f512SMike Rapoport .name = "secretmem",
2771507f512SMike Rapoport .init_fs_context = secretmem_init_fs_context,
2781507f512SMike Rapoport .kill_sb = kill_anon_super,
2791507f512SMike Rapoport };
2801507f512SMike Rapoport
secretmem_init(void)2811ea41595SXiu Jianfeng static int __init secretmem_init(void)
2821507f512SMike Rapoport {
283*7caf9663SPatrick Roy if (!secretmem_enable || !can_set_direct_map())
284f7c5b1aaSXiu Jianfeng return 0;
2851507f512SMike Rapoport
2861507f512SMike Rapoport secretmem_mnt = kern_mount(&secretmem_fs);
2871507f512SMike Rapoport if (IS_ERR(secretmem_mnt))
2884eb5bbdeSBinyi Han return PTR_ERR(secretmem_mnt);
2891507f512SMike Rapoport
2901507f512SMike Rapoport /* prevent secretmem mappings from ever getting PROT_EXEC */
2911507f512SMike Rapoport secretmem_mnt->mnt_flags |= MNT_NOEXEC;
2921507f512SMike Rapoport
293f7c5b1aaSXiu Jianfeng return 0;
2941507f512SMike Rapoport }
2951507f512SMike Rapoport fs_initcall(secretmem_init);
296