Initial commit; kernel source import

This commit is contained in:
Nathan
2025-04-06 23:50:55 -05:00
commit 25c6d769f4
45093 changed files with 18199410 additions and 0 deletions

6
fs/notify/Kconfig Normal file
View File

@@ -0,0 +1,6 @@
config FSNOTIFY
def_bool n
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
source "fs/notify/fanotify/Kconfig"

6
fs/notify/Makefile Normal file
View File

@@ -0,0 +1,6 @@
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
mark.o vfsmount_mark.o fdinfo.o
obj-y += dnotify/
obj-y += inotify/
obj-y += fanotify/

11
fs/notify/dnotify/Kconfig Normal file
View File

@@ -0,0 +1,11 @@
config DNOTIFY
bool "Dnotify support"
select FSNOTIFY
default y
help
Dnotify is a directory-based per-fd file change notification system
that uses signals to communicate events to user-space. There exist
superior alternatives, but some applications may still rely on
dnotify.
If unsure, say Y.

View File

@@ -0,0 +1 @@
obj-$(CONFIG_DNOTIFY) += dnotify.o

411
fs/notify/dnotify/dnotify.c Normal file
View File

@@ -0,0 +1,411 @@
/*
* Directory notifications for Linux.
*
* Copyright (C) 2000,2001,2002 Stephen Rothwell
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* dnotify was largly rewritten to use the new fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/dnotify.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
int dir_notify_enable __read_mostly = 1;
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_cache __read_mostly;
static struct fsnotify_group *dnotify_group __read_mostly;
static DEFINE_MUTEX(dnotify_mark_mutex);
/*
* dnotify will attach one of these to each inode (i_fsnotify_marks) which
* is being watched by dnotify. If multiple userspace applications are watching
* the same directory with dnotify their information is chained in dn
*/
struct dnotify_mark {
struct fsnotify_mark fsn_mark;
struct dnotify_struct *dn;
};
/*
* When a process starts or stops watching an inode the set of events which
* dnotify cares about for that inode may change. This function runs the
* list of everything receiving dnotify events about this directory and calculates
* the set of all those events. After it updates what dnotify is interested in
* it calls the fsnotify function so it can update the set of all events relevant
* to this inode.
*/
static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
{
__u32 new_mask, old_mask;
struct dnotify_struct *dn;
struct dnotify_mark *dn_mark = container_of(fsn_mark,
struct dnotify_mark,
fsn_mark);
assert_spin_locked(&fsn_mark->lock);
old_mask = fsn_mark->mask;
new_mask = 0;
for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
if (old_mask == new_mask)
return;
if (fsn_mark->i.inode)
fsnotify_recalc_inode_mask(fsn_mark->i.inode);
}
/*
* Mains fsnotify call where events are delivered to dnotify.
* Find the dnotify mark on the relevant inode, run the list of dnotify structs
* on that mark and determine which of them has expressed interest in receiving
* events of this type. When found send the correct process and signal and
* destroy the dnotify struct if it was not registered to receive multiple
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct dnotify_mark *dn_mark;
struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
BUG_ON(vfsmount_mark);
to_tell = event->to_tell;
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
spin_lock(&inode_mark->lock);
prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_mask & test_mask) == 0) {
prev = &dn->dn_next;
continue;
}
fown = &dn->dn_filp->f_owner;
send_sigio(fown, dn->dn_fd, POLL_MSG);
if (dn->dn_mask & FS_DN_MULTISHOT)
prev = &dn->dn_next;
else {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(inode_mark);
}
}
spin_unlock(&inode_mark->lock);
return 0;
}
/*
* Given an inode and mask determine if dnotify would be interested in sending
* userspace notification for that pair.
*/
static bool dnotify_should_send_event(struct fsnotify_group *group,
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
/* not a dir, dnotify doesn't care */
if (!S_ISDIR(inode->i_mode))
return false;
return true;
}
static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
{
struct dnotify_mark *dn_mark = container_of(fsn_mark,
struct dnotify_mark,
fsn_mark);
BUG_ON(dn_mark->dn);
kmem_cache_free(dnotify_mark_cache, dn_mark);
}
static struct fsnotify_ops dnotify_fsnotify_ops = {
.handle_event = dnotify_handle_event,
.should_send_event = dnotify_should_send_event,
.free_group_priv = NULL,
.freeing_mark = NULL,
.free_event_priv = NULL,
};
/*
* Called every time a file is closed. Looks first for a dnotify mark on the
* inode. If one is found run all of the ->dn structures attached to that
* mark for one relevant to this process closing the file and remove that
* dnotify_struct. If that was the last dnotify_struct also remove the
* fsnotify_mark.
*/
void dnotify_flush(struct file *filp, fl_owner_t id)
{
struct fsnotify_mark *fsn_mark;
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct inode *inode;
inode = file_inode(filp);
if (!S_ISDIR(inode->i_mode))
return;
fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
if (!fsn_mark)
return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
mutex_lock(&dnotify_mark_mutex);
spin_lock(&fsn_mark->lock);
prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(fsn_mark);
break;
}
prev = &dn->dn_next;
}
spin_unlock(&fsn_mark->lock);
/* nothing else could have found us thanks to the dnotify_mark_mutex */
if (dn_mark->dn == NULL)
fsnotify_destroy_mark(fsn_mark, dnotify_group);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(fsn_mark);
}
/* this conversion is done only at watch creation */
static __u32 convert_arg(unsigned long arg)
{
__u32 new_mask = FS_EVENT_ON_CHILD;
if (arg & DN_MULTISHOT)
new_mask |= FS_DN_MULTISHOT;
if (arg & DN_DELETE)
new_mask |= (FS_DELETE | FS_MOVED_FROM);
if (arg & DN_MODIFY)
new_mask |= FS_MODIFY;
if (arg & DN_ACCESS)
new_mask |= FS_ACCESS;
if (arg & DN_ATTRIB)
new_mask |= FS_ATTRIB;
if (arg & DN_RENAME)
new_mask |= FS_DN_RENAME;
if (arg & DN_CREATE)
new_mask |= (FS_CREATE | FS_MOVED_TO);
return new_mask;
}
/*
* If multiple processes watch the same inode with dnotify there is only one
* dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
* onto that mark. This function either attaches the new dnotify_struct onto
* that list, or it |= the mask onto an existing dnofiy_struct.
*/
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
struct dnotify_struct *odn;
odn = dn_mark->dn;
while (odn != NULL) {
/* adding more events to existing dnofiy_struct? */
if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
odn->dn_fd = fd;
odn->dn_mask |= mask;
return -EEXIST;
}
odn = odn->dn_next;
}
dn->dn_mask = mask;
dn->dn_fd = fd;
dn->dn_filp = filp;
dn->dn_owner = id;
dn->dn_next = dn_mark->dn;
dn_mark->dn = dn;
return 0;
}
/*
* When a process calls fcntl to attach a dnotify watch to a directory it ends
* up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
* attached to the fsnotify_mark.
*/
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
struct dnotify_mark *new_dn_mark, *dn_mark;
struct fsnotify_mark *new_fsn_mark, *fsn_mark;
struct dnotify_struct *dn;
struct inode *inode;
fl_owner_t id = current->files;
struct file *f;
int destroy = 0, error = 0;
__u32 mask;
/* we use these to tell if we need to kfree */
new_fsn_mark = NULL;
dn = NULL;
if (!dir_notify_enable) {
error = -EINVAL;
goto out_err;
}
/* a 0 mask means we are explicitly removing the watch */
if ((arg & ~DN_MULTISHOT) == 0) {
dnotify_flush(filp, id);
error = 0;
goto out_err;
}
/* dnotify only works on directories */
inode = file_inode(filp);
if (!S_ISDIR(inode->i_mode)) {
error = -ENOTDIR;
goto out_err;
}
/* expect most fcntl to add new rather than augment old */
dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
if (!dn) {
error = -ENOMEM;
goto out_err;
}
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
error = -ENOMEM;
goto out_err;
}
/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
mask = convert_arg(arg);
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
new_fsn_mark->mask = mask;
new_dn_mark->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */
mutex_lock(&dnotify_mark_mutex);
/* add the new_fsn_mark or find an old one. */
fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
if (fsn_mark) {
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
} else {
fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
spin_lock(&new_fsn_mark->lock);
fsn_mark = new_fsn_mark;
dn_mark = new_dn_mark;
/* we used new_fsn_mark, so don't free it */
new_fsn_mark = NULL;
}
rcu_read_lock();
f = fcheck(fd);
rcu_read_unlock();
/* if (f != filp) means that we lost a race and another task/thread
* actually closed the fd we are still playing with before we grabbed
* the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the
* only time we clean up the marks we need to get our mark off
* the list. */
if (f != filp) {
/* if we added ourselves, shoot ourselves, it's possible that
* the flush actually did shoot this fsn_mark. That's fine too
* since multiple calls to destroy_mark is perfectly safe, if
* we found a dn_mark already attached to the inode, just sod
* off silently as the flush at close time dealt with it.
*/
if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error) {
/* if we added, we must shoot */
if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
error = attach_dn(dn, dn_mark, id, fd, filp, mask);
/* !error means that we attached the dn to the dn_mark, so don't free it */
if (!error)
dn = NULL;
/* -EEXIST means that we didn't add this new dn and used an old one.
* that isn't an error (and the unused dn should be freed) */
else if (error == -EEXIST)
error = 0;
dnotify_recalc_inode_mask(fsn_mark);
out:
spin_unlock(&fsn_mark->lock);
if (destroy)
fsnotify_destroy_mark(fsn_mark, dnotify_group);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(fsn_mark);
out_err:
if (new_fsn_mark)
fsnotify_put_mark(new_fsn_mark);
if (dn)
kmem_cache_free(dnotify_struct_cache, dn);
return error;
}
static int __init dnotify_init(void)
{
dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
return 0;
}
module_init(dnotify_init)

View File

@@ -0,0 +1,26 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
select ANON_INODES
default n
---help---
Say Y here to enable fanotify support. fanotify is a file access
notification system which differs from inotify in that it sends
an open file descriptor to the userspace listener along with
the event.
If unsure, say Y.
config FANOTIFY_ACCESS_PERMISSIONS
bool "fanotify permissions checking"
depends on FANOTIFY
depends on SECURITY
default n
---help---
Say Y here is you want fanotify listeners to be able to make permissions
decisions concerning filesystem events. This is used by some fanotify
listeners which need to scan files before allowing the system access to
use those files. This is used by some anti-malware vendors and by some
hierarchical storage managent systems.
If unsure, say N.

View File

@@ -0,0 +1 @@
obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o

View File

@@ -0,0 +1,235 @@
#include <linux/fanotify.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* UINT_MAX */
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/wait.h>
static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
{
pr_debug("%s: old=%p new=%p\n", __func__, old, new);
if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
old->tgid == new->tgid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_PATH):
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* dont merge two permission events */
if ((old->mask & FAN_ALL_PERM_EVENTS) &&
(new->mask & FAN_ALL_PERM_EVENTS))
return false;
#endif
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
BUG();
};
}
return false;
}
/* and the list better be locked by something too! */
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
struct fsnotify_event *new_event;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
list_for_each_entry_reverse(test_holder, list, event_list) {
if (should_merge(test_holder->event, event)) {
test_event = test_holder->event;
break;
}
}
if (!test_event)
return NULL;
fsnotify_get_event(test_event);
/* if they are exactly the same we are done */
if (test_event->mask == event->mask)
return test_event;
/*
* if the refcnt == 2 this is the only queue
* for this event and so we can update the mask
* in place.
*/
if (atomic_read(&test_event->refcnt) == 2) {
test_event->mask |= event->mask;
return test_event;
}
new_event = fsnotify_clone_event(test_event);
/* done with test_event */
fsnotify_put_event(test_event);
/* couldn't allocate memory, merge was not possible */
if (unlikely(!new_event))
return ERR_PTR(-ENOMEM);
/* build new event and replace it on the list */
new_event->mask = (test_event->mask | event->mask);
fsnotify_replace_event(test_holder, new_event);
/* we hold a reference on new_event from clone_event */
return new_event;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
struct fsnotify_event *event)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
wait_event(group->fanotify_data.access_waitq, event->response ||
atomic_read(&group->fanotify_data.bypass_perm));
if (!event->response) /* bypass_perm set */
return 0;
/* userspace responded, convert to something usable */
spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
break;
case FAN_DENY:
default:
ret = -EPERM;
}
event->response = 0;
spin_unlock(&event->lock);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
return ret;
}
#endif
static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
struct fsnotify_event *event)
{
int ret = 0;
struct fsnotify_event *notify_event = NULL;
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
if (IS_ERR(notify_event))
return PTR_ERR(notify_event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
/* if we merged we need to wait on the new event */
if (notify_event)
event = notify_event;
ret = fanotify_get_response_from_access(group, event);
}
#endif
if (notify_event)
fsnotify_put_event(notify_event);
return ret;
}
static bool fanotify_should_send_event(struct fsnotify_group *group,
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
struct path *path = data;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
return false;
/* sorry, fanotify only gives a damn about files and dirs */
if (!S_ISREG(path->dentry->d_inode->i_mode) &&
!S_ISDIR(path->dentry->d_inode->i_mode))
return false;
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
} else if (inode_mark) {
/*
* if the event is for a child and this inode doesn't care about
* events on the child, don't send it!
*/
if ((event_mask & FS_EVENT_ON_CHILD) &&
!(inode_mark->mask & FS_EVENT_ON_CHILD))
return false;
marks_mask = inode_mark->mask;
marks_ignored_mask = inode_mark->ignored_mask;
} else if (vfsmnt_mark) {
marks_mask = vfsmnt_mark->mask;
marks_ignored_mask = vfsmnt_mark->ignored_mask;
} else {
BUG();
}
if (S_ISDIR(path->dentry->d_inode->i_mode) &&
(marks_ignored_mask & FS_ISDIR))
return false;
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
user = group->fanotify_data.user;
atomic_dec(&user->fanotify_listeners);
free_uid(user);
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
.free_group_priv = fanotify_free_group_priv,
.free_event_priv = NULL,
.freeing_mark = NULL,
};

View File

@@ -0,0 +1,891 @@
#include <linux/fanotify.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/poll.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
#include <asm/ioctls.h>
#include "../../mount.h"
#include "../fdinfo.h"
#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
#define FANOTIFY_DEFAULT_MAX_MARKS 8192
#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
extern const struct fsnotify_ops fanotify_fsnotify_ops;
static struct kmem_cache *fanotify_mark_cache __read_mostly;
static struct kmem_cache *fanotify_response_event_cache __read_mostly;
struct fanotify_response_event {
struct list_head list;
__s32 fd;
struct fsnotify_event *event;
};
/*
* Get an fsnotify notification event if one exists and is small
* enough to fit in "count". Return an error pointer if the count
* is not large enough.
*
* Called with the group->notification_mutex held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
BUG_ON(!mutex_is_locked(&group->notification_mutex));
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
if (fsnotify_notify_queue_is_empty(group))
return NULL;
if (FAN_EVENT_METADATA_LEN > count)
return ERR_PTR(-EINVAL);
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
return fsnotify_remove_notify_event(group);
}
static int create_fd(struct fsnotify_group *group,
struct fsnotify_event *event,
struct file **file)
{
int client_fd;
struct file *new_file;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
client_fd = get_unused_fd();
if (client_fd < 0)
return client_fd;
if (event->data_type != FSNOTIFY_EVENT_PATH) {
WARN_ON(1);
put_unused_fd(client_fd);
return -EINVAL;
}
/*
* we need a new file handle for the userspace program so it can read even if it was
* originally opened O_WRONLY.
*/
/* it's possible this event was an overflow event. in that case dentry and mnt
* are NULL; That's fine, just don't call dentry open */
if (event->path.dentry && event->path.mnt)
new_file = dentry_open(&event->path,
group->fanotify_data.f_flags | FMODE_NONOTIFY,
current_cred());
else
new_file = ERR_PTR(-EOVERFLOW);
if (IS_ERR(new_file)) {
/*
* we still send an event even if we can't open the file. this
* can happen when say tasks are gone and we try to open their
* /proc files or we try to open a WRONLY file like in sysfs
* we just send the errno to userspace since there isn't much
* else we can do.
*/
put_unused_fd(client_fd);
client_fd = PTR_ERR(new_file);
} else {
*file = new_file;
}
return client_fd;
}
static int fill_event_metadata(struct fsnotify_group *group,
struct fanotify_event_metadata *metadata,
struct fsnotify_event *event,
struct file **file)
{
int ret = 0;
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
group, metadata, event);
*file = NULL;
metadata->event_len = FAN_EVENT_METADATA_LEN;
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
metadata->reserved = 0;
metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
metadata->pid = pid_vnr(event->tgid);
if (unlikely(event->mask & FAN_Q_OVERFLOW))
metadata->fd = FAN_NOFD;
else {
metadata->fd = create_fd(group, event, file);
if (metadata->fd < 0)
ret = metadata->fd;
}
return ret;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
__s32 fd)
{
struct fanotify_response_event *re, *return_re = NULL;
mutex_lock(&group->fanotify_data.access_mutex);
list_for_each_entry(re, &group->fanotify_data.access_list, list) {
if (re->fd != fd)
continue;
list_del_init(&re->list);
return_re = re;
break;
}
mutex_unlock(&group->fanotify_data.access_mutex);
pr_debug("%s: found return_re=%p\n", __func__, return_re);
return return_re;
}
static int process_access_response(struct fsnotify_group *group,
struct fanotify_response *response_struct)
{
struct fanotify_response_event *re;
__s32 fd = response_struct->fd;
__u32 response = response_struct->response;
pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
fd, response);
/*
* make sure the response is valid, if invalid we do nothing and either
* userspace can send a valid response or we will clean it up after the
* timeout
*/
switch (response) {
case FAN_ALLOW:
case FAN_DENY:
break;
default:
return -EINVAL;
}
if (fd < 0)
return -EINVAL;
re = dequeue_re(group, fd);
if (!re)
return -ENOENT;
re->event->response = response;
wake_up(&group->fanotify_data.access_waitq);
kmem_cache_free(fanotify_response_event_cache, re);
return 0;
}
static int prepare_for_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
struct fanotify_response_event *re;
if (!(event->mask & FAN_ALL_PERM_EVENTS))
return 0;
re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
if (!re)
return -ENOMEM;
re->event = event;
re->fd = fd;
mutex_lock(&group->fanotify_data.access_mutex);
if (atomic_read(&group->fanotify_data.bypass_perm)) {
mutex_unlock(&group->fanotify_data.access_mutex);
kmem_cache_free(fanotify_response_event_cache, re);
event->response = FAN_ALLOW;
return 0;
}
list_add_tail(&re->list, &group->fanotify_data.access_list);
mutex_unlock(&group->fanotify_data.access_mutex);
return 0;
}
#else
static int prepare_for_access_response(struct fsnotify_group *group,
struct fsnotify_event *event,
__s32 fd)
{
return 0;
}
#endif
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event,
char __user *buf)
{
struct fanotify_event_metadata fanotify_event_metadata;
struct file *f;
int fd, ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
if (ret < 0)
goto out;
fd = fanotify_event_metadata.fd;
ret = -EFAULT;
if (copy_to_user(buf, &fanotify_event_metadata,
fanotify_event_metadata.event_len))
goto out_close_fd;
ret = prepare_for_access_response(group, event, fd);
if (ret)
goto out_close_fd;
if (fd != FAN_NOFD)
fd_install(fd, f);
return fanotify_event_metadata.event_len;
out_close_fd:
if (fd != FAN_NOFD) {
put_unused_fd(fd);
fput(f);
}
out:
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
event->response = FAN_DENY;
wake_up(&group->fanotify_data.access_waitq);
}
#endif
return ret;
}
/* intofiy userspace file descriptor functions */
static unsigned int fanotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
mutex_lock(&group->notification_mutex);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
mutex_unlock(&group->notification_mutex);
return ret;
}
static ssize_t fanotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
start = buf;
group = file->private_data;
pr_debug("%s: group=%p\n", __func__, group);
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
fsnotify_put_event(kevent);
if (ret < 0)
break;
buf += ret;
count -= ret;
continue;
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
ret = -ERESTARTSYS;
if (signal_pending(current))
break;
if (start != buf)
break;
schedule();
}
finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
}
static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_response response = { .fd = -1, .response = -1 };
struct fsnotify_group *group;
int ret;
group = file->private_data;
if (count > sizeof(response))
count = sizeof(response);
pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
if (copy_from_user(&response, buf, count))
return -EFAULT;
ret = process_access_response(group, &response);
if (ret < 0)
count = ret;
return count;
#else
return -EINVAL;
#endif
}
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_response_event *re, *lre;
mutex_lock(&group->fanotify_data.access_mutex);
atomic_inc(&group->fanotify_data.bypass_perm);
list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
re, re->event);
list_del_init(&re->list);
re->event->response = FAN_ALLOW;
kmem_cache_free(fanotify_response_event_cache, re);
}
mutex_unlock(&group->fanotify_data.access_mutex);
wake_up(&group->fanotify_data.access_waitq);
#endif
if (file->f_flags & FASYNC)
fsnotify_fasync(-1, file, 0);
/* matches the fanotify_init->fsnotify_alloc_group */
fsnotify_destroy_group(group);
return 0;
}
static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct fsnotify_group *group;
struct fsnotify_event_holder *holder;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
group = file->private_data;
p = (void __user *) arg;
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
list_for_each_entry(holder, &group->notification_list, event_list)
send_len += FAN_EVENT_METADATA_LEN;
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
break;
}
return ret;
}
static const struct file_operations fanotify_fops = {
.show_fdinfo = fanotify_show_fdinfo,
.poll = fanotify_poll,
.read = fanotify_read,
.write = fanotify_write,
.fasync = NULL,
.release = fanotify_release,
.unlocked_ioctl = fanotify_ioctl,
.compat_ioctl = fanotify_ioctl,
.llseek = noop_llseek,
};
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
kmem_cache_free(fanotify_mark_cache, fsn_mark);
}
static int fanotify_find_path(int dfd, const char __user *filename,
struct path *path, unsigned int flags)
{
int ret;
pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
dfd, filename, flags);
if (filename == NULL) {
struct fd f = fdget(dfd);
ret = -EBADF;
if (!f.file)
goto out;
ret = -ENOTDIR;
if ((flags & FAN_MARK_ONLYDIR) &&
!(S_ISDIR(file_inode(f.file)->i_mode))) {
fdput(f);
goto out;
}
*path = f.file->f_path;
path_get(path);
fdput(f);
} else {
unsigned int lookup_flags = 0;
if (!(flags & FAN_MARK_DONT_FOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flags & FAN_MARK_ONLYDIR)
lookup_flags |= LOOKUP_DIRECTORY;
ret = user_path_at(dfd, filename, lookup_flags, path);
if (ret)
goto out;
}
/* you can only watch an inode if you have read permissions on it */
ret = inode_permission(path->dentry->d_inode, MAY_READ);
if (ret)
path_put(path);
out:
return ret;
}
static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags,
int *destroy)
{
__u32 oldmask;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
} else {
oldmask = fsn_mark->ignored_mask;
fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
}
spin_unlock(&fsn_mark->lock);
*destroy = !(oldmask & ~mask);
return mask & oldmask;
}
static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark = NULL;
__u32 removed;
int destroy_mark;
fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
if (!fsn_mark)
return -ENOENT;
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
&destroy_mark);
if (destroy_mark)
fsnotify_destroy_mark(fsn_mark, group);
fsnotify_put_mark(fsn_mark);
if (removed & real_mount(mnt)->mnt_fsnotify_mask)
fsnotify_recalc_vfsmount_mask(mnt);
return 0;
}
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark = NULL;
__u32 removed;
int destroy_mark;
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
return -ENOENT;
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
&destroy_mark);
if (destroy_mark)
fsnotify_destroy_mark(fsn_mark, group);
/* matches the fsnotify_find_inode_mark() */
fsnotify_put_mark(fsn_mark);
if (removed & inode->i_fsnotify_mask)
fsnotify_recalc_inode_mask(inode);
return 0;
}
static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags)
{
__u32 oldmask = -1;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
} else {
__u32 tmask = fsn_mark->ignored_mask | mask;
fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
if (!(flags & FAN_MARK_ONDIR)) {
__u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
}
spin_unlock(&fsn_mark->lock);
return mask & ~oldmask;
}
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
int ret = 0;
fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
if (!fsn_mark) {
if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
return -ENOSPC;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
if (ret)
goto err;
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
fsnotify_recalc_vfsmount_mask(mnt);
err:
fsnotify_put_mark(fsn_mark);
return ret;
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
int ret = 0;
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
/*
* If some other task has this inode open for write we should not add
* an ignored mark, unless that ignored mark is supposed to survive
* modification changes anyway.
*/
if ((flags & FAN_MARK_IGNORED_MASK) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
(atomic_read(&inode->i_writecount) > 0))
return 0;
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark) {
if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
return -ENOSPC;
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
fsnotify_init_mark(fsn_mark, fanotify_free_mark);
ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
if (ret)
goto err;
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
if (added & ~inode->i_fsnotify_mask)
fsnotify_recalc_inode_mask(inode);
err:
fsnotify_put_mark(fsn_mark);
return ret;
}
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct fsnotify_group *group;
int f_flags, fd;
struct user_struct *user;
pr_debug("%s: flags=%d event_f_flags=%d\n",
__func__, flags, event_f_flags);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (flags & ~FAN_ALL_INIT_FLAGS)
return -EINVAL;
user = get_current_user();
if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
free_uid(user);
return -EMFILE;
}
f_flags = O_RDWR | FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK)
f_flags |= O_NONBLOCK;
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
if (IS_ERR(group)) {
free_uid(user);
return PTR_ERR(group);
}
group->fanotify_data.user = user;
atomic_inc(&user->fanotify_listeners);
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
mutex_init(&group->fanotify_data.access_mutex);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0;
break;
case FAN_CLASS_CONTENT:
group->priority = FS_PRIO_1;
break;
case FAN_CLASS_PRE_CONTENT:
group->priority = FS_PRIO_2;
break;
default:
fd = -EINVAL;
goto out_destroy_group;
}
if (flags & FAN_UNLIMITED_QUEUE) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
group->max_events = UINT_MAX;
} else {
group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
}
if (flags & FAN_UNLIMITED_MARKS) {
fd = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out_destroy_group;
group->fanotify_data.max_marks = UINT_MAX;
} else {
group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
}
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
if (fd < 0)
goto out_destroy_group;
return fd;
out_destroy_group:
fsnotify_destroy_group(group);
return fd;
}
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
__u64, mask, int, dfd,
const char __user *, pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct fd f;
struct path path;
int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
__func__, fanotify_fd, flags, dfd, pathname, mask);
/* we only use the lower 32 bits as of right now. */
if (mask & ((__u64)0xffffffff << 32))
return -EINVAL;
if (flags & ~FAN_ALL_MARK_FLAGS)
return -EINVAL;
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD: /* fallthrough */
case FAN_MARK_REMOVE:
if (!mask)
return -EINVAL;
case FAN_MARK_FLUSH:
break;
default:
return -EINVAL;
}
if (mask & FAN_ONDIR) {
flags |= FAN_MARK_ONDIR;
mask &= ~FAN_ONDIR;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
#else
if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
#endif
return -EINVAL;
f = fdget(fanotify_fd);
if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an fanotify instance */
ret = -EINVAL;
if (unlikely(f.file->f_op != &fanotify_fops))
goto fput_and_out;
group = f.file->private_data;
/*
* group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
* allowed to set permissions events.
*/
ret = -EINVAL;
if (mask & FAN_ALL_PERM_EVENTS &&
group->priority == FS_PRIO_0)
goto fput_and_out;
ret = fanotify_find_path(dfd, pathname, &path, flags);
if (ret)
goto fput_and_out;
/* inode held in place by reference to path; group by fget on fd */
if (!(flags & FAN_MARK_MOUNT))
inode = path.dentry->d_inode;
else
mnt = path.mnt;
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD:
if (flags & FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
else
ret = fanotify_add_inode_mark(group, inode, mask, flags);
break;
case FAN_MARK_REMOVE:
if (flags & FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
else
ret = fanotify_remove_inode_mark(group, inode, mask, flags);
break;
case FAN_MARK_FLUSH:
if (flags & FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
else
fsnotify_clear_inode_marks_by_group(group);
break;
default:
ret = -EINVAL;
}
path_put(&path);
fput_and_out:
fdput(f);
return ret;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
__u32, mask0, __u32, mask1, int, dfd,
const char __user *, pathname)
{
return sys_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
((__u64)mask1 << 32) | mask0,
#else
((__u64)mask0 << 32) | mask1,
#endif
dfd, pathname);
}
#endif
/*
* fanotify_user_setup - Our initialization function. Note that we cannot return
* error because we have compiled-in VFS hooks. So an (unlikely) failure here
* must result in panic().
*/
static int __init fanotify_user_setup(void)
{
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
SLAB_PANIC);
return 0;
}
device_initcall(fanotify_user_setup);

179
fs/notify/fdinfo.c Normal file
View File

@@ -0,0 +1,179 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fsnotify_backend.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/inotify.h>
#include <linux/fanotify.h>
#include <linux/kernel.h>
#include <linux/namei.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/exportfs.h>
#include "inotify/inotify.h"
#include "../fs/mount.h"
#if defined(CONFIG_PROC_FS)
#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
static int show_fdinfo(struct seq_file *m, struct file *f,
int (*show)(struct seq_file *m, struct fsnotify_mark *mark))
{
struct fsnotify_group *group = f->private_data;
struct fsnotify_mark *mark;
int ret = 0;
mutex_lock(&group->mark_mutex);
list_for_each_entry(mark, &group->marks_list, g_list) {
ret = show(m, mark);
if (ret)
break;
}
mutex_unlock(&group->mark_mutex);
return ret;
}
#if defined(CONFIG_EXPORTFS)
static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
struct {
struct file_handle handle;
u8 pad[64];
} f;
int size, ret, i;
f.handle.handle_bytes = sizeof(f.pad);
size = f.handle.handle_bytes >> 2;
ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
if ((ret == 255) || (ret == -ENOSPC)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return 0;
}
f.handle.handle_type = ret;
f.handle.handle_bytes = size * sizeof(u32);
ret = seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
f.handle.handle_bytes, f.handle.handle_type);
for (i = 0; i < f.handle.handle_bytes; i++)
ret |= seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
return ret;
}
#else
static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
return 0;
}
#endif
#ifdef CONFIG_INOTIFY_USER
static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
{
struct inotify_inode_mark *inode_mark;
struct inode *inode;
int ret = 0;
if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
return 0;
inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
inode = igrab(mark->i.inode);
if (inode) {
ret = seq_printf(m, "inotify wd:%x ino:%lx sdev:%x "
"mask:%x ignored_mask:%x ",
inode_mark->wd, inode->i_ino,
inode->i_sb->s_dev,
mark->mask, mark->ignored_mask);
ret |= show_mark_fhandle(m, inode);
ret |= seq_putc(m, '\n');
iput(inode);
}
return ret;
}
int inotify_show_fdinfo(struct seq_file *m, struct file *f)
{
return show_fdinfo(m, f, inotify_fdinfo);
}
#endif /* CONFIG_INOTIFY_USER */
#ifdef CONFIG_FANOTIFY
static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
{
unsigned int mflags = 0;
struct inode *inode;
int ret = 0;
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
return 0;
if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
inode = igrab(mark->i.inode);
if (!inode)
goto out;
ret = seq_printf(m, "fanotify ino:%lx sdev:%x "
"mflags:%x mask:%x ignored_mask:%x ",
inode->i_ino, inode->i_sb->s_dev,
mflags, mark->mask, mark->ignored_mask);
ret |= show_mark_fhandle(m, inode);
ret |= seq_putc(m, '\n');
iput(inode);
} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
struct mount *mnt = real_mount(mark->m.mnt);
ret = seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x "
"ignored_mask:%x\n", mnt->mnt_id, mflags,
mark->mask, mark->ignored_mask);
}
out:
return ret;
}
int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
{
struct fsnotify_group *group = f->private_data;
unsigned int flags = 0;
switch (group->priority) {
case FS_PRIO_0:
flags |= FAN_CLASS_NOTIF;
break;
case FS_PRIO_1:
flags |= FAN_CLASS_CONTENT;
break;
case FS_PRIO_2:
flags |= FAN_CLASS_PRE_CONTENT;
break;
}
if (group->max_events == UINT_MAX)
flags |= FAN_UNLIMITED_QUEUE;
if (group->fanotify_data.max_marks == UINT_MAX)
flags |= FAN_UNLIMITED_MARKS;
seq_printf(m, "fanotify flags:%x event-flags:%x\n",
flags, group->fanotify_data.f_flags);
return show_fdinfo(m, f, fanotify_fdinfo);
}
#endif /* CONFIG_FANOTIFY */
#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */
#endif /* CONFIG_PROC_FS */

27
fs/notify/fdinfo.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef __FSNOTIFY_FDINFO_H__
#define __FSNOTIFY_FDINFO_H__
#include <linux/errno.h>
#include <linux/proc_fs.h>
struct seq_file;
struct file;
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_INOTIFY_USER
extern int inotify_show_fdinfo(struct seq_file *m, struct file *f);
#endif
#ifdef CONFIG_FANOTIFY
extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f);
#endif
#else /* CONFIG_PROC_FS */
#define inotify_show_fdinfo NULL
#define fanotify_show_fdinfo NULL
#endif /* CONFIG_PROC_FS */
#endif /* __FSNOTIFY_FDINFO_H__ */

311
fs/notify/fsnotify.c Normal file
View File

@@ -0,0 +1,311 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/dcache.h>
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/srcu.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include "../mount.h"
/*
* Clear all of the marks on an inode when it is being evicted from core
*/
void __fsnotify_inode_delete(struct inode *inode)
{
fsnotify_clear_marks_by_inode(inode);
}
EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
fsnotify_clear_marks_by_mount(mnt);
}
/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
* parent cares. Thus when an event happens on a child it can quickly tell if
* if there is a need to find a parent and send the event to the parent.
*/
void __fsnotify_update_child_dentry_flags(struct inode *inode)
{
struct dentry *alias;
int watched;
if (!S_ISDIR(inode->i_mode))
return;
/* determine if the children should tell inode about their events */
watched = fsnotify_inode_watches_children(inode);
spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
struct dentry *child;
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
spin_lock(&alias->d_lock);
list_for_each_entry(child, &alias->d_subdirs, d_child) {
if (!child->d_inode)
continue;
spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
if (watched)
child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
spin_unlock(&alias->d_lock);
}
spin_unlock(&inode->i_lock);
}
/* Notify this dentry's parent about a child's events. */
int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
int ret = 0;
if (!dentry)
dentry = path->dentry;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return 0;
parent = dget_parent(dentry);
p_inode = parent->d_inode;
if (unlikely(!fsnotify_inode_watches_children(p_inode)))
__fsnotify_update_child_dentry_flags(p_inode);
else if (p_inode->i_fsnotify_mask & mask) {
/* we are notifying a parent so come up with the new mask which
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
if (path)
ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
dentry->d_name.name, 0);
else
ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
}
dput(parent);
return ret;
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
static int send_to_group(struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data,
int data_is, u32 cookie,
const unsigned char *file_name,
struct fsnotify_event **event)
{
struct fsnotify_group *group = NULL;
__u32 inode_test_mask = 0;
__u32 vfsmount_test_mask = 0;
if (unlikely(!inode_mark && !vfsmount_mark)) {
BUG();
return 0;
}
/* clear ignored on inode modification */
if (mask & FS_MODIFY) {
if (inode_mark &&
!(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
inode_mark->ignored_mask = 0;
if (vfsmount_mark &&
!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
vfsmount_mark->ignored_mask = 0;
}
/* does the inode mark tell us to do something? */
if (inode_mark) {
group = inode_mark->group;
inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
inode_test_mask &= inode_mark->mask;
inode_test_mask &= ~inode_mark->ignored_mask;
}
/* does the vfsmount_mark tell us to do something? */
if (vfsmount_mark) {
vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
group = vfsmount_mark->group;
vfsmount_test_mask &= vfsmount_mark->mask;
vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
if (inode_mark)
vfsmount_test_mask &= ~inode_mark->ignored_mask;
}
pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
" inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
" data=%p data_is=%d cookie=%d event=%p\n",
__func__, group, to_tell, mask, inode_mark,
inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
data_is, cookie, *event);
if (!inode_test_mask && !vfsmount_test_mask)
return 0;
if (group->ops->should_send_event(group, to_tell, inode_mark,
vfsmount_mark, mask, data,
data_is) == false)
return 0;
if (!*event) {
*event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name,
cookie, GFP_KERNEL);
if (!*event)
return -ENOMEM;
}
return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
}
/*
* This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call
* out to all of the registered fsnotify_group. Those groups can then use the
* notification event in whatever means they feel necessary.
*/
int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
struct fsnotify_event *event = NULL;
struct mount *mnt;
int idx, ret = 0;
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (data_is == FSNOTIFY_EVENT_PATH)
mnt = real_mount(((struct path *)data)->mnt);
else
mnt = NULL;
/*
* if this is a modify event we may need to clear the ignored masks
* otherwise return if neither the inode nor the vfsmount care about
* this type of event.
*/
if (!(mask & FS_MODIFY) &&
!(test_mask & to_tell->i_fsnotify_mask) &&
!(mnt && test_mask & mnt->mnt_fsnotify_mask))
return 0;
idx = srcu_read_lock(&fsnotify_mark_srcu);
if ((mask & FS_MODIFY) ||
(test_mask & to_tell->i_fsnotify_mask))
inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
&fsnotify_mark_srcu);
if (mnt && ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))) {
vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
&fsnotify_mark_srcu);
inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
&fsnotify_mark_srcu);
}
while (inode_node || vfsmount_node) {
inode_group = vfsmount_group = NULL;
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
struct fsnotify_mark, i.i_list);
inode_group = inode_mark->group;
}
if (vfsmount_node) {
vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
struct fsnotify_mark, m.m_list);
vfsmount_group = vfsmount_mark->group;
}
if (inode_group > vfsmount_group) {
/* handle inode */
ret = send_to_group(to_tell, inode_mark, NULL, mask, data,
data_is, cookie, file_name, &event);
/* we didn't use the vfsmount_mark */
vfsmount_group = NULL;
} else if (vfsmount_group > inode_group) {
ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data,
data_is, cookie, file_name, &event);
inode_group = NULL;
} else {
ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
mask, data, data_is, cookie, file_name,
&event);
}
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
if (inode_group)
inode_node = srcu_dereference(inode_node->next,
&fsnotify_mark_srcu);
if (vfsmount_group)
vfsmount_node = srcu_dereference(vfsmount_node->next,
&fsnotify_mark_srcu);
}
ret = 0;
out:
srcu_read_unlock(&fsnotify_mark_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
* up while we are still trying to add it to lists, drop that one.
*/
if (event)
fsnotify_put_event(event);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify);
static __init int fsnotify_init(void)
{
int ret;
BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
panic("initializing fsnotify_mark_srcu");
return 0;
}
core_initcall(fsnotify_init);

47
fs/notify/fsnotify.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef __FS_NOTIFY_FSNOTIFY_H_
#define __FS_NOTIFY_FSNOTIFY_H_
#include <linux/list.h>
#include <linux/fsnotify.h>
#include <linux/srcu.h>
#include <linux/types.h>
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask);
/* add a mark to an inode */
extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups);
/* add a mark to a vfsmount */
extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct vfsmount *mnt,
int allow_dups);
/* final kfree of a group */
extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
/* vfsmount specific destruction of a mark */
extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
/* inode specific destruction of a mark */
extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
/* run the list of all marks associated with inode and flag them to be freed */
extern void fsnotify_clear_marks_by_inode(struct inode *inode);
/* run the list of all marks associated with vfsmount and flag them to be freed */
extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
*/
extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
/* allocate and destroy and event holder to attach events to notification/access queues */
extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
#endif /* __FS_NOTIFY_FSNOTIFY_H_ */

111
fs/notify/group.c Normal file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include <linux/atomic.h>
/*
* Final freeing of a group
*/
void fsnotify_final_destroy_group(struct fsnotify_group *group)
{
if (group->ops->free_group_priv)
group->ops->free_group_priv(group);
kfree(group);
}
/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
* hold a ref to the group.
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
/* clear all inode marks for this group */
fsnotify_clear_marks_by_group(group);
synchronize_srcu(&fsnotify_mark_srcu);
/* clear the notification queue of all events */
fsnotify_flush_notify(group);
fsnotify_put_group(group);
}
/*
* Get reference to a group.
*/
void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (atomic_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group);
}
/*
* Create a new fsnotify_group and hold a reference for the group returned.
*/
struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
{
struct fsnotify_group *group;
group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
/* set to 0 when there a no external references to this group */
atomic_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0);
mutex_init(&group->notification_mutex);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->max_events = UINT_MAX;
mutex_init(&group->mark_mutex);
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
return group;
}
int fsnotify_fasync(int fd, struct file *file, int on)
{
struct fsnotify_group *group = file->private_data;
return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO;
}

321
fs/notify/inode_mark.c Normal file
View File

@@ -0,0 +1,321 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include "../internal.h"
/*
* Recalculate the mask of events relevant to a given inode locked.
*/
static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
{
struct fsnotify_mark *mark;
__u32 new_mask = 0;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
new_mask |= mark->mask;
inode->i_fsnotify_mask = new_mask;
}
/*
* Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
* any notifier is interested in hearing for this inode.
*/
void fsnotify_recalc_inode_mask(struct inode *inode)
{
spin_lock(&inode->i_lock);
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
__fsnotify_update_child_dentry_flags(inode);
}
void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
{
struct inode *inode = mark->i.inode;
BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
assert_spin_locked(&mark->lock);
spin_lock(&inode->i_lock);
hlist_del_init_rcu(&mark->i.i_list);
mark->i.inode = NULL;
/*
* this mark is now off the inode->i_fsnotify_marks list and we
* hold the inode->i_lock, so this is the perfect time to update the
* inode->i_fsnotify_mask
*/
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
}
/*
* Given an inode, destroy all of the marks associated with that inode.
*/
void fsnotify_clear_marks_by_inode(struct inode *inode)
{
struct fsnotify_mark *mark, *lmark;
struct hlist_node *n;
LIST_HEAD(free_list);
spin_lock(&inode->i_lock);
hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
list_add(&mark->i.free_i_list, &free_list);
hlist_del_init_rcu(&mark->i.i_list);
fsnotify_get_mark(mark);
}
spin_unlock(&inode->i_lock);
list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
struct fsnotify_group *group;
spin_lock(&mark->lock);
fsnotify_get_group(mark->group);
group = mark->group;
spin_unlock(&mark->lock);
fsnotify_destroy_mark(mark, group);
fsnotify_put_mark(mark);
fsnotify_put_group(group);
}
}
/*
* Given a group clear all of the inode marks associated with that group.
*/
void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
}
/*
* given a group and inode, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
struct fsnotify_group *group,
struct inode *inode)
{
struct fsnotify_mark *mark;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
}
}
return NULL;
}
/*
* given a group and inode, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
struct inode *inode)
{
struct fsnotify_mark *mark;
spin_lock(&inode->i_lock);
mark = fsnotify_find_inode_mark_locked(group, inode);
spin_unlock(&inode->i_lock);
return mark;
}
/*
* If we are setting a mark mask on an inode mark we should pin the inode
* in memory.
*/
void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
__u32 mask)
{
struct inode *inode;
assert_spin_locked(&mark->lock);
if (mask &&
mark->i.inode &&
!(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
inode = igrab(mark->i.inode);
/*
* we shouldn't be able to get here if the inode wasn't
* already safely held in memory. But bug in case it
* ever is wrong.
*/
BUG_ON(!inode);
}
}
/*
* Attach an initialized mark to a given inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes. These
* marks are ordered according to priority, highest number first, and then by
* the group's location in memory.
*/
int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups)
{
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
BUG_ON(!mutex_is_locked(&group->mark_mutex));
assert_spin_locked(&mark->lock);
spin_lock(&inode->i_lock);
mark->i.inode = inode;
/* is mark the first mark? */
if (hlist_empty(&inode->i_fsnotify_marks)) {
hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
goto out;
}
/* should mark be in the middle of the current list? */
hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
last = lmark;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
goto out;
}
if (mark->group->priority < lmark->group->priority)
continue;
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
goto out;
}
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
return ret;
}
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
* Called during unmount with no locks held, so needs to be safe against
* concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
*/
void fsnotify_unmount_inodes(struct list_head *list)
{
struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_sb_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
/*
* We cannot __iget() an inode in state I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
}
/*
* If i_count is zero, the inode cannot have any watches and
* doing an __iget/iput with MS_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
if (!atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
continue;
}
need_iput_tmp = need_iput;
need_iput = NULL;
/* In case fsnotify_inode_delete() drops a reference. */
if (inode != need_iput_tmp)
__iget(inode);
else
need_iput_tmp = NULL;
spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count)) {
spin_lock(&next_i->i_lock);
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
__iget(next_i);
need_iput = next_i;
}
spin_unlock(&next_i->i_lock);
}
/*
* We can safely drop inode_sb_list_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun.
*/
spin_unlock(&inode_sb_list_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
/* for each watch, send FS_UNMOUNT and then remove it */
fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
fsnotify_inode_delete(inode);
iput(inode);
spin_lock(&inode_sb_list_lock);
}
spin_unlock(&inode_sb_list_lock);
}

17
fs/notify/inotify/Kconfig Normal file
View File

@@ -0,0 +1,17 @@
config INOTIFY_USER
bool "Inotify support for userspace"
select ANON_INODES
select FSNOTIFY
default y
---help---
Say Y here to enable inotify support for userspace, including the
associated system calls. Inotify allows monitoring of both files and
directories via a single open fd. Events are read from the file
descriptor, which is also select()- and poll()-able.
Inotify fixes numerous shortcomings in dnotify and introduces several
new features including multiple file events, one-shot support, and
unmount notification.
For more information, see <file:Documentation/filesystems/inotify.txt>
If unsure, say Y.

View File

@@ -0,0 +1 @@
obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o

View File

@@ -0,0 +1,21 @@
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
#include <linux/slab.h> /* struct kmem_cache */
extern struct kmem_cache *event_priv_cachep;
struct inotify_event_private_data {
struct fsnotify_event_private_data fsnotify_event_priv_data;
int wd;
};
struct inotify_inode_mark {
struct fsnotify_mark fsn_mark;
int wd;
};
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
extern const struct fsnotify_ops inotify_fsnotify_ops;

View File

@@ -0,0 +1,223 @@
/*
* fs/inotify_user.c - inotify support for userspace
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* inotify was largely rewriten to make use of the fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/dcache.h> /* d_unlinked */
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
#include <linux/path.h> /* struct path */
#include <linux/slab.h> /* kmem_* */
#include <linux/types.h>
#include <linux/sched.h>
#include "inotify.h"
/*
* Check if 2 events contain the same information. We do not compare private data
* but at this moment that isn't a problem for any know fsnotify listeners.
*/
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
{
if ((old->mask == new->mask) &&
(old->to_tell == new->to_tell) &&
(old->data_type == new->data_type) &&
(old->name_len == new->name_len)) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_INODE):
/* remember, after old was put on the wait_q we aren't
* allowed to look at the inode any more, only thing
* left to check was if the file_name is the same */
if (!old->name_len ||
!strcmp(old->file_name, new->file_name))
return true;
break;
case (FSNOTIFY_EVENT_PATH):
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
if (old->mask & FS_Q_OVERFLOW)
return true;
else if (old->mask & FS_IN_IGNORED)
return false;
return true;
};
}
return false;
}
static struct fsnotify_event *inotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
/* and the list better be locked by something too */
spin_lock(&event->lock);
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
last_event = last_holder->event;
if (event_compare(last_event, event))
fsnotify_get_event(last_event);
else
last_event = NULL;
spin_unlock(&event->lock);
return last_event;
}
static int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
struct inotify_inode_mark *i_mark;
struct inode *to_tell;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
struct fsnotify_event *added_event;
int wd, ret = 0;
BUG_ON(vfsmount_mark);
pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
event, event->to_tell, event->mask);
to_tell = event->to_tell;
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
wd = i_mark->wd;
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
if (unlikely(!event_priv))
return -ENOMEM;
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
fsnotify_get_group(group);
fsn_event_priv->group = group;
event_priv->wd = wd;
added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
if (added_event) {
inotify_free_event_priv(fsn_event_priv);
if (!IS_ERR(added_event))
fsnotify_put_event(added_event);
else
ret = PTR_ERR(added_event);
}
if (inode_mark->mask & IN_ONESHOT)
fsnotify_destroy_mark(inode_mark, group);
return ret;
}
static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(fsn_mark, group);
}
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data, int data_type)
{
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
(data_type == FSNOTIFY_EVENT_PATH)) {
struct path *path = data;
if (d_unlinked(path->dentry))
return false;
}
return true;
}
/*
* This is NEVER supposed to be called. Inotify marks should either have been
* removed from the idr when the watch was removed or in the
* fsnotify_destroy_mark_by_group() call when the inotify instance was being
* torn down. This is only called if the idr is about to be freed but there
* are still marks in it.
*/
static int idr_callback(int id, void *p, void *data)
{
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
static bool warned = false;
if (warned)
return 0;
warned = true;
fsn_mark = p;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
"idr. Probably leaking memory\n", id, p, data);
/*
* I'm taking the liberty of assuming that the mark in question is a
* valid address and I'm dereferencing it. This might help to figure
* out why we got here and the panic is no worse than the original
* BUG() that was here.
*/
if (fsn_mark)
printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
return 0;
}
static void inotify_free_group_priv(struct fsnotify_group *group)
{
/* ideally the idr is empty and we won't hit the BUG in the callback */
idr_for_each(&group->inotify_data.idr, idr_callback, group);
idr_destroy(&group->inotify_data.idr);
atomic_dec(&group->inotify_data.user->inotify_devs);
free_uid(group->inotify_data.user);
}
void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
{
struct inotify_event_private_data *event_priv;
event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
fsnotify_event_priv_data);
fsnotify_put_group(fsn_event_priv->group);
kmem_cache_free(event_priv_cachep, event_priv);
}
const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
.should_send_event = inotify_should_send_event,
.free_group_priv = inotify_free_group_priv,
.free_event_priv = inotify_free_event_priv,
.freeing_mark = inotify_freeing_mark,
};

View File

@@ -0,0 +1,852 @@
/*
* fs/inotify_user.c - inotify support for userspace
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* inotify was largely rewriten to make use of the fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/file.h>
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/idr.h>
#include <linux/init.h> /* module_init */
#include <linux/inotify.h>
#include <linux/kernel.h> /* roundup() */
#include <linux/namei.h> /* LOOKUP_FOLLOW */
#include <linux/sched.h> /* struct user */
#include <linux/slab.h> /* struct kmem_cache */
#include <linux/syscalls.h>
#include <linux/types.h>
#include <linux/anon_inodes.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <linux/wait.h>
#include "inotify.h"
#include "../fdinfo.h"
#include <asm/ioctls.h>
/* these are configurable via /proc/sys/fs/inotify/ */
static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
static int inotify_max_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
struct kmem_cache *event_priv_cachep __read_mostly;
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
static int zero;
ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &inotify_max_user_instances,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{
.procname = "max_user_watches",
.data = &inotify_max_user_watches,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{
.procname = "max_queued_events",
.data = &inotify_max_queued_events,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero
},
{ }
};
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(u32 arg)
{
__u32 mask;
/*
* everything should accept their own ignored, cares about children,
* and should receive events when the inode is unmounted
*/
mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
/* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
return mask;
}
static inline u32 inotify_mask_to_arg(__u32 mask)
{
return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
IN_Q_OVERFLOW);
}
/* intofiy userspace file descriptor functions */
static unsigned int inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
mutex_lock(&group->notification_mutex);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
mutex_unlock(&group->notification_mutex);
return ret;
}
/*
* Get an inotify_kernel_event if one exists and is small
* enough to fit in "count". Return an error pointer if
* not large enough.
*
* Called with the group->notification_mutex held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
size_t event_size = sizeof(struct inotify_event);
struct fsnotify_event *event;
if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = fsnotify_peek_notify_event(group);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
if (event->name_len)
event_size += roundup(event->name_len + 1, event_size);
if (event_size > count)
return ERR_PTR(-EINVAL);
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
fsnotify_remove_notify_event(group);
return event;
}
/*
* Copy an event to user space, returning how much we copied.
*
* We already checked that the event size is smaller than the
* buffer we had in "get_one_event()" above.
*/
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event,
char __user *buf)
{
struct inotify_event inotify_event;
struct fsnotify_event_private_data *fsn_priv;
struct inotify_event_private_data *priv;
size_t event_size = sizeof(struct inotify_event);
size_t name_len = 0;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
/* we get the inotify watch descriptor from the event private data */
spin_lock(&event->lock);
fsn_priv = fsnotify_remove_priv_from_event(group, event);
spin_unlock(&event->lock);
if (!fsn_priv)
inotify_event.wd = -1;
else {
priv = container_of(fsn_priv, struct inotify_event_private_data,
fsnotify_event_priv_data);
inotify_event.wd = priv->wd;
inotify_free_event_priv(fsn_priv);
}
/*
* round up event->name_len so it is a multiple of event_size
* plus an extra byte for the terminating '\0'.
*/
if (event->name_len)
name_len = roundup(event->name_len + 1, event_size);
inotify_event.len = name_len;
inotify_event.mask = inotify_mask_to_arg(event->mask);
inotify_event.cookie = event->sync_cookie;
/* send the main event */
if (copy_to_user(buf, &inotify_event, event_size))
return -EFAULT;
buf += event_size;
/*
* fsnotify only stores the pathname, so here we have to send the pathname
* and then pad that pathname out to a multiple of sizeof(inotify_event)
* with zeros. I get my zeros from the nul_inotify_event.
*/
if (name_len) {
unsigned int len_to_zero = name_len - event->name_len;
/* copy the path name */
if (copy_to_user(buf, event->file_name, event->name_len))
return -EFAULT;
buf += event->name_len;
/* fill userspace with 0's */
if (clear_user(buf, len_to_zero))
return -EFAULT;
buf += len_to_zero;
event_size += name_len;
}
return event_size;
}
static ssize_t inotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
start = buf;
group = file->private_data;
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
fsnotify_put_event(kevent);
if (ret < 0)
break;
buf += ret;
count -= ret;
continue;
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
ret = -ERESTARTSYS;
if (signal_pending(current))
break;
if (start != buf)
break;
schedule();
}
finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
}
static int inotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
pr_debug("%s: group=%p\n", __func__, group);
/* free this group, matching get was inotify_init->fsnotify_obtain_group */
fsnotify_destroy_group(group);
return 0;
}
static long inotify_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fsnotify_group *group;
struct fsnotify_event_holder *holder;
struct fsnotify_event *event;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
group = file->private_data;
p = (void __user *) arg;
pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
list_for_each_entry(holder, &group->notification_list, event_list) {
event = holder->event;
send_len += sizeof(struct inotify_event);
if (event->name_len)
send_len += roundup(event->name_len + 1,
sizeof(struct inotify_event));
}
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
break;
}
return ret;
}
static const struct file_operations inotify_fops = {
.show_fdinfo = inotify_show_fdinfo,
.poll = inotify_poll,
.read = inotify_read,
.fasync = fsnotify_fasync,
.release = inotify_release,
.unlocked_ioctl = inotify_ioctl,
.compat_ioctl = inotify_ioctl,
.llseek = noop_llseek,
};
/*
* find_inode - resolve a user-given path to a specific inode
*/
static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
{
int error;
error = user_path_at(AT_FDCWD, dirname, flags, path);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
error = inode_permission(path->dentry->d_inode, MAY_READ);
if (error)
path_put(path);
return error;
}
static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
struct inotify_inode_mark *i_mark)
{
int ret;
idr_preload(GFP_KERNEL);
spin_lock(idr_lock);
ret = idr_alloc_cyclic(idr, i_mark, 1, 0, GFP_NOWAIT);
if (ret >= 0) {
/* we added the mark to the idr, take a reference */
i_mark->wd = ret;
fsnotify_get_mark(&i_mark->fsn_mark);
}
spin_unlock(idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
}
static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
int wd)
{
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
struct inotify_inode_mark *i_mark;
assert_spin_locked(idr_lock);
i_mark = idr_find(idr, wd);
if (i_mark) {
struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
fsnotify_get_mark(fsn_mark);
/* One ref for being in the idr, one ref we just took */
BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
}
return i_mark;
}
static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
int wd)
{
struct inotify_inode_mark *i_mark;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
spin_lock(idr_lock);
i_mark = inotify_idr_find_locked(group, wd);
spin_unlock(idr_lock);
return i_mark;
}
static void do_inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark *i_mark)
{
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
int wd = i_mark->wd;
assert_spin_locked(idr_lock);
idr_remove(idr, wd);
/* removed from the idr, drop that ref */
fsnotify_put_mark(&i_mark->fsn_mark);
}
/*
* Remove the mark from the idr (if present) and drop the reference
* on the mark because it was in the idr.
*/
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark *i_mark)
{
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
struct inotify_inode_mark *found_i_mark = NULL;
int wd;
spin_lock(idr_lock);
wd = i_mark->wd;
/*
* does this i_mark think it is in the idr? we shouldn't get called
* if it wasn't....
*/
if (wd == -1) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
goto out;
}
/* Lets look in the idr to see if we find it */
found_i_mark = inotify_idr_find_locked(group, wd);
if (unlikely(!found_i_mark)) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
goto out;
}
/*
* We found an mark in the idr at the right wd, but it's
* not the mark we were told to remove. eparis seriously
* fucked up somewhere.
*/
if (unlikely(found_i_mark != i_mark)) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
"found_i_mark->group=%p found_i_mark->inode=%p\n",
__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
found_i_mark->fsn_mark.group,
found_i_mark->fsn_mark.i.inode);
goto out;
}
/*
* One ref for being in the idr
* one ref held by the caller trying to kill us
* one ref grabbed by inotify_idr_find
*/
if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
/* we can't really recover with bad ref cnting.. */
BUG();
}
do_inotify_remove_from_idr(group, i_mark);
out:
/* match the ref taken by inotify_idr_find_locked() */
if (found_i_mark)
fsnotify_put_mark(&found_i_mark->fsn_mark);
i_mark->wd = -1;
spin_unlock(idr_lock);
}
/*
* Send IN_IGNORED for this wd, remove this wd from the idr.
*/
void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
struct fsnotify_event *ignored_event, *notify_event;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int ret;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
FSNOTIFY_EVENT_NONE, NULL, 0,
GFP_NOFS);
if (!ignored_event)
goto skip_send_ignore;
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
if (unlikely(!event_priv))
goto skip_send_ignore;
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
fsnotify_get_group(group);
fsn_event_priv->group = group;
event_priv->wd = i_mark->wd;
notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
if (notify_event) {
if (IS_ERR(notify_event))
ret = PTR_ERR(notify_event);
else
fsnotify_put_event(notify_event);
inotify_free_event_priv(fsn_event_priv);
}
skip_send_ignore:
/* matches the reference taken when the event was created */
if (ignored_event)
fsnotify_put_event(ignored_event);
/* remove this mark from the idr */
inotify_remove_from_idr(group, i_mark);
atomic_dec(&group->inotify_data.user->inotify_watches);
}
/* ding dong the mark is dead */
static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
{
struct inotify_inode_mark *i_mark;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
kmem_cache_free(inotify_inode_mark_cachep, i_mark);
}
static int inotify_update_existing_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
__u32 old_mask, new_mask;
__u32 mask;
int add = (arg & IN_MASK_ADD);
int ret;
mask = inotify_arg_to_mask(arg);
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
return -ENOENT;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
old_mask = fsn_mark->mask;
if (add)
fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
else
fsnotify_set_mark_mask_locked(fsn_mark, mask);
new_mask = fsn_mark->mask;
spin_unlock(&fsn_mark->lock);
if (old_mask != new_mask) {
/* more bits in old than in new? */
int dropped = (old_mask & ~new_mask);
/* more bits in this fsn_mark than the inode's mask? */
int do_inode = (new_mask & ~inode->i_fsnotify_mask);
/* update the inode with this new fsn_mark */
if (dropped || do_inode)
fsnotify_recalc_inode_mask(inode);
}
/* return the wd */
ret = i_mark->wd;
/* match the get from fsnotify_find_mark() */
fsnotify_put_mark(fsn_mark);
return ret;
}
static int inotify_new_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct inotify_inode_mark *tmp_i_mark;
__u32 mask;
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
mask = inotify_arg_to_mask(arg);
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
return -ENOMEM;
fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
tmp_i_mark->fsn_mark.mask = mask;
tmp_i_mark->wd = -1;
ret = -ENOSPC;
if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
goto out_err;
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
if (ret)
goto out_err;
/* we are on the idr, now get on the inode */
ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
if (ret) {
/* we failed to get on the inode, get off the idr */
inotify_remove_from_idr(group, tmp_i_mark);
goto out_err;
}
/* increment the number of watches the user has */
atomic_inc(&group->inotify_data.user->inotify_watches);
/* return the watch descriptor for this new mark */
ret = tmp_i_mark->wd;
out_err:
/* match the ref from fsnotify_init_mark() */
fsnotify_put_mark(&tmp_i_mark->fsn_mark);
return ret;
}
static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
{
int ret = 0;
retry:
/* try to update and existing watch with the new arg */
ret = inotify_update_existing_watch(group, inode, arg);
/* no mark present, try to add a new one */
if (ret == -ENOENT)
ret = inotify_new_watch(group, inode, arg);
/*
* inotify_new_watch could race with another thread which did an
* inotify_new_watch between the update_existing and the add watch
* here, go back and try to update an existing mark again.
*/
if (ret == -EEXIST)
goto retry;
return ret;
}
static struct fsnotify_group *inotify_new_group(unsigned int max_events)
{
struct fsnotify_group *group;
group = fsnotify_alloc_group(&inotify_fsnotify_ops);
if (IS_ERR(group))
return group;
group->max_events = max_events;
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
group->inotify_data.user = get_current_user();
if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
inotify_max_user_instances) {
fsnotify_destroy_group(group);
return ERR_PTR(-EMFILE);
}
return group;
}
/* inotify syscalls */
SYSCALL_DEFINE1(inotify_init1, int, flags)
{
struct fsnotify_group *group;
int ret;
/* Check the IN_* constants for consistency. */
BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
return -EINVAL;
/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
group = inotify_new_group(inotify_max_queued_events);
if (IS_ERR(group))
return PTR_ERR(group);
ret = anon_inode_getfd("inotify", &inotify_fops, group,
O_RDONLY | flags);
if (ret < 0)
fsnotify_destroy_group(group);
return ret;
}
SYSCALL_DEFINE0(inotify_init)
{
return sys_inotify_init1(0);
}
SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
u32, mask)
{
struct fsnotify_group *group;
struct inode *inode;
struct path path;
struct fd f;
int ret;
unsigned flags = 0;
/* don't allow invalid bits: we don't want flags set */
if (unlikely(!(mask & ALL_INOTIFY_BITS)))
return -EINVAL;
f = fdget(fd);
if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
if (unlikely(f.file->f_op != &inotify_fops)) {
ret = -EINVAL;
goto fput_and_out;
}
if (!(mask & IN_DONT_FOLLOW))
flags |= LOOKUP_FOLLOW;
if (mask & IN_ONLYDIR)
flags |= LOOKUP_DIRECTORY;
ret = inotify_find_inode(pathname, &path, flags);
if (ret)
goto fput_and_out;
/* inode held in place by reference to path; group by fget on fd */
inode = path.dentry->d_inode;
group = f.file->private_data;
/* create/update an inode mark */
ret = inotify_update_watch(group, inode, mask);
path_put(&path);
fput_and_out:
fdput(f);
return ret;
}
SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
{
struct fsnotify_group *group;
struct inotify_inode_mark *i_mark;
struct fd f;
int ret = 0;
f = fdget(fd);
if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
ret = -EINVAL;
if (unlikely(f.file->f_op != &inotify_fops))
goto out;
group = f.file->private_data;
ret = -EINVAL;
i_mark = inotify_idr_find(group, wd);
if (unlikely(!i_mark))
goto out;
ret = 0;
fsnotify_destroy_mark(&i_mark->fsn_mark, group);
/* match ref taken by inotify_idr_find */
fsnotify_put_mark(&i_mark->fsn_mark);
out:
fdput(f);
return ret;
}
/*
* inotify_user_setup - Our initialization function. Note that we cannot return
* error because we have compiled-in VFS hooks. So an (unlikely) failure here
* must result in panic().
*/
static int __init inotify_user_setup(void)
{
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(IN_OPEN != FS_OPEN);
BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
BUILD_BUG_ON(IN_CREATE != FS_CREATE);
BUILD_BUG_ON(IN_DELETE != FS_DELETE);
BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
inotify_max_queued_events = 16384;
inotify_max_user_instances = 128;
inotify_max_user_watches = 8192;
return 0;
}
module_init(inotify_user_setup);

381
fs/notify/mark.c Normal file
View File

@@ -0,0 +1,381 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* fsnotify inode mark locking/lifetime/and refcnting
*
* REFCNT:
* The mark->refcnt tells how many "things" in the kernel currently are
* referencing this object. The object typically will live inside the kernel
* with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
* which can find this object holding the appropriete locks, can take a reference
* and the object itself is guaranteed to survive until the reference is dropped.
*
* LOCKING:
* There are 3 spinlocks involved with fsnotify inode marks and they MUST
* be taken in order as follows:
*
* mark->lock
* group->mark_lock
* inode->i_lock
*
* mark->lock protects 2 things, mark->group and mark->inode. You must hold
* that lock to dereference either of these things (they could be NULL even with
* the lock)
*
* group->mark_lock protects the marks_list anchored inside a given group
* and each mark is hooked via the g_list. It also sorta protects the
* free_g_list, which when used is anchored by a private list on the stack of the
* task which held the group->mark_lock.
*
* inode->i_lock protects the i_fsnotify_marks list anchored inside a
* given inode and each mark is hooked via the i_list. (and sorta the
* free_i_list)
*
*
* LIFETIME:
* Inode marks survive between when they are added to an inode and when their
* refcnt==0.
*
* The inode mark can be cleared for a number of different reasons including:
* - The inode is unlinked for the last time. (fsnotify_inode_remove)
* - The inode is being evicted from cache. (fsnotify_inode_delete)
* - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
* - Something explicitly requests that it be removed. (fsnotify_destroy_mark)
* - The fsnotify_group associated with the mark is going away and all such marks
* need to be cleaned up. (fsnotify_clear_marks_by_group)
*
* Worst case we are given an inode and need to clean up all the marks on that
* inode. We take i_lock and walk the i_fsnotify_marks safely. For each
* mark on the list we take a reference (so the mark can't disappear under us).
* We remove that mark form the inode's list of marks and we add this mark to a
* private list anchored on the stack using i_free_list; At this point we no
* longer fear anything finding the mark using the inode's list of marks.
*
* We can safely and locklessly run the private list on the stack of everything
* we just unattached from the original inode. For each mark on the private list
* we grab the mark-> and can thus dereference mark->group and mark->inode. If
* we see the group and inode are not NULL we take those locks. Now holding all
* 3 locks we can completely remove the mark from other tasks finding it in the
* future. Remember, 10 things might already be referencing this mark, but they
* better be holding a ref. We drop our reference we took before we unhooked it
* from the inode. When the ref hits 0 we can free the mark.
*
* Very similarly for freeing by group, except we use free_g_list.
*
* This has the very interesting property of being able to run concurrently with
* any (or all) other directions.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
struct srcu_struct fsnotify_mark_srcu;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
atomic_inc(&mark->refcnt);
}
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
if (atomic_dec_and_test(&mark->refcnt)) {
if (mark->group)
fsnotify_put_group(mark->group);
mark->free_mark(mark);
}
}
/*
* Any time a mark is getting freed we end up here.
* The caller had better be holding a reference to this mark so we don't actually
* do the final put under the mark->lock
*/
void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
struct fsnotify_group *group)
{
struct inode *inode = NULL;
BUG_ON(!mutex_is_locked(&group->mark_mutex));
spin_lock(&mark->lock);
/* something else already called this function on this mark */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
spin_unlock(&mark->lock);
return;
}
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
inode = mark->i.inode;
fsnotify_destroy_inode_mark(mark);
} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
fsnotify_destroy_vfsmount_mark(mark);
else
BUG();
list_del_init(&mark->g_list);
spin_unlock(&mark->lock);
if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
iput(inode);
/* release lock temporarily */
mutex_unlock(&group->mark_mutex);
spin_lock(&destroy_lock);
list_add(&mark->destroy_list, &destroy_list);
spin_unlock(&destroy_lock);
wake_up(&destroy_waitq);
/*
* We don't necessarily have a ref on mark from caller so the above destroy
* may have actually freed it, unless this group provides a 'freeing_mark'
* function which must be holding a reference.
*/
/*
* Some groups like to know that marks are being freed. This is a
* callback to the group function to let it know that this mark
* is being freed.
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(mark, group);
/*
* __fsnotify_update_child_dentry_flags(inode);
*
* I really want to call that, but we can't, we have no idea if the inode
* still exists the second we drop the mark->lock.
*
* The next time an event arrive to this inode from one of it's children
* __fsnotify_parent will see that the inode doesn't care about it's
* children and will update all of these flags then. So really this
* is just a lazy update (and could be a perf win...)
*/
atomic_dec(&group->num_marks);
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
}
void fsnotify_destroy_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group)
{
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
fsnotify_destroy_mark_locked(mark, group);
mutex_unlock(&group->mark_mutex);
}
void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
assert_spin_locked(&mark->lock);
mark->mask = mask;
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
fsnotify_set_inode_mark_mask_locked(mark, mask);
}
void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
assert_spin_locked(&mark->lock);
mark->ignored_mask = mask;
}
/*
* Attach an initialized mark to a given group and fs object.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group.
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
struct vfsmount *mnt, int allow_dups)
{
int ret = 0;
BUG_ON(inode && mnt);
BUG_ON(!inode && !mnt);
BUG_ON(!mutex_is_locked(&group->mark_mutex));
/*
* LOCKING ORDER!!!!
* group->mark_mutex
* mark->lock
* inode->i_lock
*/
spin_lock(&mark->lock);
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
fsnotify_get_group(group);
mark->group = group;
list_add(&mark->g_list, &group->marks_list);
atomic_inc(&group->num_marks);
fsnotify_get_mark(mark); /* for i_list and g_list */
if (inode) {
ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
if (ret)
goto err;
} else if (mnt) {
ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
if (ret)
goto err;
} else {
BUG();
}
/* this will pin the object if appropriate */
fsnotify_set_mark_mask_locked(mark, mark->mask);
spin_unlock(&mark->lock);
if (inode)
__fsnotify_update_child_dentry_flags(inode);
return ret;
err:
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
list_del_init(&mark->g_list);
fsnotify_put_group(group);
mark->group = NULL;
atomic_dec(&group->num_marks);
spin_unlock(&mark->lock);
spin_lock(&destroy_lock);
list_add(&mark->destroy_list, &destroy_list);
spin_unlock(&destroy_lock);
wake_up(&destroy_waitq);
return ret;
}
int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
struct inode *inode, struct vfsmount *mnt, int allow_dups)
{
int ret;
mutex_lock(&group->mark_mutex);
ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
mutex_unlock(&group->mark_mutex);
return ret;
}
/*
* clear any marks in a group in which mark->flags & flags is true
*/
void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
unsigned int flags)
{
struct fsnotify_mark *lmark, *mark;
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->flags & flags) {
fsnotify_get_mark(mark);
fsnotify_destroy_mark_locked(mark, group);
fsnotify_put_mark(mark);
}
}
mutex_unlock(&group->mark_mutex);
}
/*
* Given a group, destroy all of the marks associated with that group.
*/
void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
}
void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
{
assert_spin_locked(&old->lock);
new->i.inode = old->i.inode;
new->m.mnt = old->m.mnt;
if (old->group)
fsnotify_get_group(old->group);
new->group = old->group;
new->mask = old->mask;
new->free_mark = old->free_mark;
}
/*
* Nothing fancy, just initialize lists and locks and counters.
*/
void fsnotify_init_mark(struct fsnotify_mark *mark,
void (*free_mark)(struct fsnotify_mark *mark))
{
memset(mark, 0, sizeof(*mark));
spin_lock_init(&mark->lock);
atomic_set(&mark->refcnt, 1);
mark->free_mark = free_mark;
}
static int fsnotify_mark_destroy(void *ignored)
{
struct fsnotify_mark *mark, *next;
LIST_HEAD(private_destroy_list);
for (;;) {
spin_lock(&destroy_lock);
/* exchange the list head */
list_replace_init(&destroy_list, &private_destroy_list);
spin_unlock(&destroy_lock);
synchronize_srcu(&fsnotify_mark_srcu);
list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
list_del_init(&mark->destroy_list);
fsnotify_put_mark(mark);
}
wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
}
return 0;
}
static int __init fsnotify_mark_init(void)
{
struct task_struct *thread;
thread = kthread_run(fsnotify_mark_destroy, NULL,
"fsnotify_mark");
if (IS_ERR(thread))
panic("unable to start fsnotify mark destruction thread.");
return 0;
}
device_initcall(fsnotify_mark_init);

464
fs/notify/notification.c Normal file
View File

@@ -0,0 +1,464 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Basic idea behind the notification queue: An fsnotify group (like inotify)
* sends the userspace notification about events asynchronously some time after
* the event happened. When inotify gets an event it will need to add that
* event to the group notify queue. Since a single event might need to be on
* multiple group's notification queues we can't add the event directly to each
* queue and instead add a small "event_holder" to each queue. This event_holder
* has a pointer back to the original event. Since the majority of events are
* going to end up on one, and only one, notification queue we embed one
* event_holder into each event. This means we have a single allocation instead
* of always needing two. If the embedded event_holder is already in use by
* another group a new event_holder (from fsnotify_event_holder_cachep) will be
* allocated and used.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/namei.h>
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
static struct kmem_cache *fsnotify_event_cachep;
static struct kmem_cache *fsnotify_event_holder_cachep;
/*
* This is a magic event we send when the q is too full. Since it doesn't
* hold real event information we just keep one system wide and use it any time
* it is needed. It's refcnt is set 1 at kernel init time and will never
* get set to 0 so it will never get 'freed'
*/
static struct fsnotify_event *q_overflow_event;
static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
/**
* fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
* Called from fsnotify_move, which is inlined into filesystem modules.
*/
u32 fsnotify_get_cookie(void)
{
return atomic_inc_return(&fsnotify_sync_cookie);
}
EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
/* return true if the notify queue is empty, false otherwise */
bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&group->notification_mutex));
return list_empty(&group->notification_list) ? true : false;
}
void fsnotify_get_event(struct fsnotify_event *event)
{
atomic_inc(&event->refcnt);
}
void fsnotify_put_event(struct fsnotify_event *event)
{
if (!event)
return;
if (atomic_dec_and_test(&event->refcnt)) {
pr_debug("%s: event=%p\n", __func__, event);
if (event->data_type == FSNOTIFY_EVENT_PATH)
path_put(&event->path);
BUG_ON(!list_empty(&event->private_data_list));
kfree(event->file_name);
put_pid(event->tgid);
kmem_cache_free(fsnotify_event_cachep, event);
}
}
struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
{
return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
}
void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
{
if (holder)
kmem_cache_free(fsnotify_event_holder_cachep, holder);
}
/*
* Find the private data that the group previously attached to this event when
* the group added the event to the notification queue (fsnotify_add_notify_event)
*/
struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
{
struct fsnotify_event_private_data *lpriv;
struct fsnotify_event_private_data *priv = NULL;
assert_spin_locked(&event->lock);
list_for_each_entry(lpriv, &event->private_data_list, event_list) {
if (lpriv->group == group) {
priv = lpriv;
list_del(&priv->event_list);
break;
}
}
return priv;
}
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. If the event is successfully added to the
* group's notification queue, a reference is taken on event.
*/
struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
struct fsnotify_event_private_data *priv,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *))
{
struct fsnotify_event *return_event = NULL;
struct fsnotify_event_holder *holder = NULL;
struct list_head *list = &group->notification_list;
pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
/*
* There is one fsnotify_event_holder embedded inside each fsnotify_event.
* Check if we expect to be able to use that holder. If not alloc a new
* holder.
* For the overflow event it's possible that something will use the in
* event holder before we get the lock so we may need to jump back and
* alloc a new holder, this can't happen for most events...
*/
if (!list_empty(&event->holder.event_list)) {
alloc_holder:
holder = fsnotify_alloc_event_holder();
if (!holder)
return ERR_PTR(-ENOMEM);
}
mutex_lock(&group->notification_mutex);
if (group->q_len >= group->max_events) {
event = q_overflow_event;
/*
* we need to return the overflow event
* which means we need a ref
*/
fsnotify_get_event(event);
return_event = event;
/* sorry, no private data on the overflow event */
priv = NULL;
}
if (!list_empty(list) && merge) {
struct fsnotify_event *tmp;
tmp = merge(list, event);
if (tmp) {
mutex_unlock(&group->notification_mutex);
if (return_event)
fsnotify_put_event(return_event);
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
return tmp;
}
}
spin_lock(&event->lock);
if (list_empty(&event->holder.event_list)) {
if (unlikely(holder))
fsnotify_destroy_event_holder(holder);
holder = &event->holder;
} else if (unlikely(!holder)) {
/* between the time we checked above and got the lock the in
* event holder was used, go back and get a new one */
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
if (return_event) {
fsnotify_put_event(return_event);
return_event = NULL;
}
goto alloc_holder;
}
group->q_len++;
holder->event = event;
fsnotify_get_event(event);
list_add_tail(&holder->event_list, list);
if (priv)
list_add_tail(&priv->event_list, &event->private_data_list);
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
wake_up(&group->notification_waitq);
kill_fasync(&group->fsn_fa, SIGIO, POLL_IN);
return return_event;
}
/*
* Remove and return the first event from the notification list. There is a
* reference held on this event since it was on the list. It is the responsibility
* of the caller to drop this reference.
*/
struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_holder *holder;
BUG_ON(!mutex_is_locked(&group->notification_mutex));
pr_debug("%s: group=%p\n", __func__, group);
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
event = holder->event;
spin_lock(&event->lock);
holder->event = NULL;
list_del_init(&holder->event_list);
spin_unlock(&event->lock);
/* event == holder means we are referenced through the in event holder */
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
group->q_len--;
return event;
}
/*
* This will not remove the event, that must be done with fsnotify_remove_notify_event()
*/
struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_holder *holder;
BUG_ON(!mutex_is_locked(&group->notification_mutex));
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
event = holder->event;
return event;
}
/*
* Called when a group is being torn down to clean up any outstanding
* event notifications.
*/
void fsnotify_flush_notify(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_private_data *priv;
mutex_lock(&group->notification_mutex);
while (!fsnotify_notify_queue_is_empty(group)) {
event = fsnotify_remove_notify_event(group);
/* if they don't implement free_event_priv they better not have attached any */
if (group->ops->free_event_priv) {
spin_lock(&event->lock);
priv = fsnotify_remove_priv_from_event(group, event);
spin_unlock(&event->lock);
if (priv)
group->ops->free_event_priv(priv);
}
fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
}
mutex_unlock(&group->notification_mutex);
}
static void initialize_event(struct fsnotify_event *event)
{
INIT_LIST_HEAD(&event->holder.event_list);
atomic_set(&event->refcnt, 1);
spin_lock_init(&event->lock);
INIT_LIST_HEAD(&event->private_data_list);
}
/*
* Caller damn well better be holding whatever mutex is protecting the
* old_holder->event_list and the new_event must be a clean event which
* cannot be found anywhere else in the kernel.
*/
int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
struct fsnotify_event *new_event)
{
struct fsnotify_event *old_event = old_holder->event;
struct fsnotify_event_holder *new_holder = &new_event->holder;
enum event_spinlock_class {
SPINLOCK_OLD,
SPINLOCK_NEW,
};
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
/*
* if the new_event's embedded holder is in use someone
* screwed up and didn't give us a clean new event.
*/
BUG_ON(!list_empty(&new_holder->event_list));
spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
new_holder->event = new_event;
list_replace_init(&old_holder->event_list, &new_holder->event_list);
spin_unlock(&new_event->lock);
spin_unlock(&old_event->lock);
/* event == holder means we are referenced through the in event holder */
if (old_holder != &old_event->holder)
fsnotify_destroy_event_holder(old_holder);
fsnotify_get_event(new_event); /* on the list take reference */
fsnotify_put_event(old_event); /* off the list, drop reference */
return 0;
}
struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
{
struct fsnotify_event *event;
event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
if (!event)
return NULL;
pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
memcpy(event, old_event, sizeof(*event));
initialize_event(event);
if (event->name_len) {
event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
if (!event->file_name) {
kmem_cache_free(fsnotify_event_cachep, event);
return NULL;
}
}
event->tgid = get_pid(old_event->tgid);
if (event->data_type == FSNOTIFY_EVENT_PATH)
path_get(&event->path);
return event;
}
/*
* fsnotify_create_event - Allocate a new event which will be sent to each
* group's handle_event function if the group was interested in this
* particular event.
*
* @to_tell the inode which is supposed to receive the event (sometimes a
* parent of the inode to which the event happened.
* @mask what actually happened.
* @data pointer to the object which was actually affected
* @data_type flag indication if the data is a file, path, inode, nothing...
* @name the filename, if available
*/
struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
int data_type, const unsigned char *name,
u32 cookie, gfp_t gfp)
{
struct fsnotify_event *event;
event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
if (!event)
return NULL;
pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
__func__, event, to_tell, mask, data, data_type);
initialize_event(event);
if (name) {
event->file_name = kstrdup(name, gfp);
if (!event->file_name) {
kmem_cache_free(fsnotify_event_cachep, event);
return NULL;
}
event->name_len = strlen(event->file_name);
}
event->tgid = get_pid(task_tgid(current));
event->sync_cookie = cookie;
event->to_tell = to_tell;
event->data_type = data_type;
switch (data_type) {
case FSNOTIFY_EVENT_PATH: {
struct path *path = data;
event->path.dentry = path->dentry;
event->path.mnt = path->mnt;
path_get(&event->path);
break;
}
case FSNOTIFY_EVENT_INODE:
event->inode = data;
break;
case FSNOTIFY_EVENT_NONE:
event->inode = NULL;
event->path.dentry = NULL;
event->path.mnt = NULL;
break;
default:
BUG();
}
event->mask = mask;
return event;
}
static __init int fsnotify_notification_init(void)
{
fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
FSNOTIFY_EVENT_NONE, NULL, 0,
GFP_KERNEL);
if (!q_overflow_event)
panic("unable to allocate fsnotify q_overflow_event\n");
return 0;
}
subsys_initcall(fsnotify_notification_init);

200
fs/notify/vfsmount_mark.c Normal file
View File

@@ -0,0 +1,200 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include "../mount.h"
void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
struct fsnotify_mark *mark, *lmark;
struct hlist_node *n;
struct mount *m = real_mount(mnt);
LIST_HEAD(free_list);
spin_lock(&mnt->mnt_root->d_lock);
hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
list_add(&mark->m.free_m_list, &free_list);
hlist_del_init_rcu(&mark->m.m_list);
fsnotify_get_mark(mark);
}
spin_unlock(&mnt->mnt_root->d_lock);
list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
struct fsnotify_group *group;
spin_lock(&mark->lock);
fsnotify_get_group(mark->group);
group = mark->group;
spin_unlock(&mark->lock);
fsnotify_destroy_mark(mark, group);
fsnotify_put_mark(mark);
fsnotify_put_group(group);
}
}
void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
{
fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
}
/*
* Recalculate the mask of events relevant to a given vfsmount locked.
*/
static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
{
struct mount *m = real_mount(mnt);
struct fsnotify_mark *mark;
__u32 new_mask = 0;
assert_spin_locked(&mnt->mnt_root->d_lock);
hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
new_mask |= mark->mask;
m->mnt_fsnotify_mask = new_mask;
}
/*
* Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
* any notifier is interested in hearing for this mount point
*/
void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
{
spin_lock(&mnt->mnt_root->d_lock);
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
}
void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
{
struct vfsmount *mnt = mark->m.mnt;
BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
assert_spin_locked(&mark->lock);
spin_lock(&mnt->mnt_root->d_lock);
hlist_del_init_rcu(&mark->m.m_list);
mark->m.mnt = NULL;
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
}
static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
struct vfsmount *mnt)
{
struct mount *m = real_mount(mnt);
struct fsnotify_mark *mark;
assert_spin_locked(&mnt->mnt_root->d_lock);
hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
}
}
return NULL;
}
/*
* given a group and vfsmount, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt)
{
struct fsnotify_mark *mark;
spin_lock(&mnt->mnt_root->d_lock);
mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
spin_unlock(&mnt->mnt_root->d_lock);
return mark;
}
/*
* Attach an initialized mark to a given group and vfsmount.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which groups.
*/
int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct vfsmount *mnt,
int allow_dups)
{
struct mount *m = real_mount(mnt);
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
BUG_ON(!mutex_is_locked(&group->mark_mutex));
assert_spin_locked(&mark->lock);
spin_lock(&mnt->mnt_root->d_lock);
mark->m.mnt = mnt;
/* is mark the first mark? */
if (hlist_empty(&m->mnt_fsnotify_marks)) {
hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
goto out;
}
/* should mark be in the middle of the current list? */
hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
last = lmark;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
goto out;
}
if (mark->group->priority < lmark->group->priority)
continue;
if ((mark->group->priority == lmark->group->priority) &&
(mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
goto out;
}
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
return ret;
}