diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 810c28fb8c3c..584249454822 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -41,6 +41,7 @@ #include #include #include +#include /* * LOCKING: @@ -133,8 +134,12 @@ struct nested_calls { * of these on a server and we do not want this to take another cache line. */ struct epitem { - /* RB tree node used to link this structure to the eventpoll RB tree */ - struct rb_node rbn; + union { + /* RB tree node links this structure to the eventpoll RB tree */ + struct rb_node rbn; + /* Used to free the struct epitem */ + struct rcu_head rcu; + }; /* List header used to link this structure to the eventpoll ready list */ struct list_head rdllink; @@ -671,6 +676,12 @@ static int ep_scan_ready_list(struct eventpoll *ep, return error; } +static void epi_rcu_free(struct rcu_head *head) +{ + struct epitem *epi = container_of(head, struct epitem, rcu); + kmem_cache_free(epi_cache, epi); +} + /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. @@ -692,8 +703,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); - if (ep_is_linked(&epi->fllink)) - list_del_init(&epi->fllink); + list_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -704,9 +714,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) spin_unlock_irqrestore(&ep->lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); - - /* At this point it is safe to free the eventpoll item */ - kmem_cache_free(epi_cache, epi); + /* + * At this point it is safe to free the eventpoll item. Use the union + * field epi->rcu, since we are trying to minimize the size of + * 'struct epitem'. The 'rbn' field is no longer in use. Protected by + * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make + * use of the rbn field. + */ + call_rcu(&epi->rcu, epi_rcu_free); atomic_long_dec(&ep->user->epoll_watches); @@ -872,7 +887,6 @@ static const struct file_operations eventpoll_fops = { */ void eventpoll_release_file(struct file *file) { - struct list_head *lsthead = &file->f_ep_links; struct eventpoll *ep; struct epitem *epi; @@ -890,17 +904,12 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - - while (!list_empty(lsthead)) { - epi = list_first_entry(lsthead, struct epitem, fllink); - + list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { ep = epi->ep; - list_del_init(&epi->fllink); mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); mutex_unlock(&ep->mtx); } - mutex_unlock(&epmutex); } @@ -1138,7 +1147,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) struct file *child_file; struct epitem *epi; - list_for_each_entry(epi, &file->f_ep_links, fllink) { + /* CTL_DEL can remove links here, but that can't increase our count */ + rcu_read_lock(); + list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { child_file = epi->ep->file; if (is_file_epoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { @@ -1160,6 +1171,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) "file is not an ep!\n"); } } + rcu_read_unlock(); return error; } @@ -1286,7 +1298,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); - list_add_tail(&epi->fllink, &tfile->f_ep_links); + list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); /* @@ -1327,8 +1339,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, error_remove_epi: spin_lock(&tfile->f_lock); - if (ep_is_linked(&epi->fllink)) - list_del_init(&epi->fllink); + list_del_rcu(&epi->fllink); spin_unlock(&tfile->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -1844,15 +1855,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * and hang them on the tfile_check_list, so we can check that we * haven't created too many possible wakeup paths. * - * We need to hold the epmutex across both ep_insert and ep_remove - * b/c we want to make sure we are looking at a coherent view of - * epoll network. + * We need to hold the epmutex across ep_insert to prevent + * multple adds from creating loops in parallel. */ - if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { + if (op == EPOLL_CTL_ADD) { mutex_lock(&epmutex); did_lock_epmutex = 1; - } - if (op == EPOLL_CTL_ADD) { if (is_file_epoll(tf.file)) { error = -ELOOP; if (ep_loop_check(ep, tf.file) != 0) {