Backported memfd_create() system call

This commit is contained in:
Nathan
2025-04-07 00:23:38 -05:00
parent 3d1c55ae1e
commit eee847d17d
12 changed files with 258 additions and 1 deletions

View File

@@ -66,6 +66,9 @@ static struct vfsmount *shm_mnt;
#include <linux/highmem.h>
#include <linux/seq_file.h>
#include <linux/magic.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -603,6 +606,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
int error;
error = inode_change_ok(inode, attr);
@@ -613,6 +617,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
/* protected by i_mutex */
if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
(newsize > oldsize && (info->seals & F_SEAL_GROW)))
return -EPERM;
if (newsize != oldsize) {
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -1448,6 +1457,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
@@ -1501,7 +1511,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
/* i_mutex is held by caller */
if (unlikely(info->seals)) {
if (info->seals & F_SEAL_WRITE)
return -EPERM;
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
return -EPERM;
}
return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
}
@@ -1872,11 +1892,113 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
return offset;
}
static int shmem_wait_for_pins(struct address_space *mapping)
{
return 0;
}
#define F_ALL_SEALS (F_SEAL_SEAL | \
F_SEAL_SHRINK | \
F_SEAL_GROW | \
F_SEAL_WRITE)
int shmem_add_seals(struct file *file, unsigned int seals)
{
struct inode *inode = file_inode(file);
struct shmem_inode_info *info = SHMEM_I(inode);
int error;
/*
* SEALING
* Sealing allows multiple parties to share a shmem-file but restrict
* access to a specific subset of file operations. Seals can only be
* added, but never removed. This way, mutually untrusted parties can
* share common memory regions with a well-defined policy. A malicious
* peer can thus never perform unwanted operations on a shared object.
*
* Seals are only supported on special shmem-files and always affect
* the whole underlying inode. Once a seal is set, it may prevent some
* kinds of access to the file. Currently, the following seals are
* defined:
* SEAL_SEAL: Prevent further seals from being set on this file
* SEAL_SHRINK: Prevent the file from shrinking
* SEAL_GROW: Prevent the file from growing
* SEAL_WRITE: Prevent write access to the file
*
* As we don't require any trust relationship between two parties, we
* must prevent seals from being removed. Therefore, sealing a file
* only adds a given set of seals to the file, it never touches
* existing seals. Furthermore, the "setting seals"-operation can be
* sealed itself, which basically prevents any further seal from being
* added.
*
* Semantics of sealing are only defined on volatile files. Only
* anonymous shmem files support sealing. More importantly, seals are
* never written to disk. Therefore, there's no plan to support it on
* other file types.
*/
if (file->f_op != &shmem_file_operations)
return -EINVAL;
if (!(file->f_mode & FMODE_WRITE))
return -EPERM;
if (seals & ~(unsigned int)F_ALL_SEALS)
return -EINVAL;
mutex_lock(&inode->i_mutex);
if (info->seals & F_SEAL_SEAL) {
error = -EPERM;
goto unlock;
}
info->seals |= seals;
error = 0;
unlock:
mutex_unlock(&inode->i_mutex);
return error;
}
EXPORT_SYMBOL_GPL(shmem_add_seals);
int shmem_get_seals(struct file *file)
{
if (file->f_op != &shmem_file_operations)
return -EINVAL;
return SHMEM_I(file_inode(file))->seals;
}
EXPORT_SYMBOL_GPL(shmem_get_seals);
long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
{
long error;
switch (cmd) {
case F_ADD_SEALS:
/* disallow upper 32bit */
if (arg > UINT_MAX)
return -EINVAL;
error = shmem_add_seals(file, arg);
break;
case F_GET_SEALS:
error = shmem_get_seals(file);
break;
default:
error = -EINVAL;
break;
}
return error;
}
static long shmem_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_falloc shmem_falloc;
pgoff_t start, index, end;
int error;
@@ -1889,6 +2011,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
/* protected by i_mutex */
if (info->seals & F_SEAL_WRITE) {
error = -EPERM;
goto out;
}
shmem_falloc.waitq = &shmem_falloc_waitq;
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1915,6 +2043,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (error)
goto out;
if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
error = -EPERM;
goto out;
}
start = offset >> PAGE_CACHE_SHIFT;
end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/* Try to avoid a swapstorm if len is impossible to satisfy */
@@ -2631,6 +2764,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
shmem_show_mpol(seq, sbinfo->mpol);
return 0;
}
#define MFD_NAME_PREFIX "memfd:"
#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
unsigned int, flags)
{
struct shmem_inode_info *info;
struct file *file;
int fd, error;
char *name;
long len;
if (flags & ~(unsigned int)MFD_ALL_FLAGS)
return -EINVAL;
/* length includes terminating zero */
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
if (len <= 0)
return -EFAULT;
if (len > MFD_NAME_MAX_LEN + 1)
return -EINVAL;
name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
if (!name)
return -ENOMEM;
strcpy(name, MFD_NAME_PREFIX);
if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
error = -EFAULT;
goto err_name;
}
/* terminating-zero may have changed after strnlen_user() returned */
if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
error = -EFAULT;
goto err_name;
}
fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
if (fd < 0) {
error = fd;
goto err_name;
}
file = shmem_file_setup(name, 0, VM_NORESERVE);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto err_fd;
}
info = SHMEM_I(file_inode(file));
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
file->f_flags |= O_RDWR | O_LARGEFILE;
if (flags & MFD_ALLOW_SEALING)
info->seals &= ~F_SEAL_SEAL;
fd_install(fd, file);
kfree(name);
return fd;
err_fd:
put_unused_fd(fd);
err_name:
kfree(name);
return error;
}
#endif /* CONFIG_TMPFS */
static void shmem_put_super(struct super_block *sb)