diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/inode.c | 10 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 5 | ||||
-rw-r--r-- | fs/cifs/dir.c | 2 | ||||
-rw-r--r-- | fs/cifs/file.c | 35 | ||||
-rw-r--r-- | fs/cifs/inode.c | 19 | ||||
-rw-r--r-- | fs/cifs/misc.c | 50 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 2 | ||||
-rw-r--r-- | fs/configfs/file.c | 18 | ||||
-rw-r--r-- | fs/dax.c | 2 | ||||
-rw-r--r-- | fs/fuse/dax.c | 6 | ||||
-rw-r--r-- | fs/io-wq.c | 26 | ||||
-rw-r--r-- | fs/io_uring.c | 58 | ||||
-rw-r--r-- | fs/pipe.c | 15 |
13 files changed, 154 insertions, 94 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0117d867ecf8..06f9f167222b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9226,8 +9226,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, bool dest_log_pinned = false; bool need_abort = false; - /* we only allow rename subvolume link between subvolumes */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + /* + * For non-subvolumes allow exchange only within one subvolume, in the + * same inode namespace. Two subvolumes (represented as directory) can + * be exchanged as they're a logical link and have a fixed inode number. + */ + if (root != dest && + (old_ino != BTRFS_FIRST_FREE_OBJECTID || + new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c0bfc2f01030..c6a9542ca281 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1611,6 +1611,11 @@ struct dfs_info3_param { int ttl; }; +struct file_list { + struct list_head list; + struct cifsFileInfo *cfile; +}; + /* * common struct for holding inode info when searching for or updating an * inode with new info diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 79402ca0ddfa..5f8a302ffcb2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -100,7 +100,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0; - s = dentry_path_raw(direntry, page, PAGE_SIZE); + s = dentry_path_raw(direntry, page, PATH_MAX); if (IS_ERR(s)) return s; if (!s[1]) // for root we want "", not "/" diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0a72840a88f1..bb98fbdd22a9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4848,34 +4848,33 @@ void cifs_oplock_break(struct work_struct *work) oplock_break_ack: /* - * releasing stale oplock after recent reconnect of smb session using - * a now incorrect file handle is not a data integrity issue but do - * not bother sending an oplock release if session to server still is - * disconnected since oplock already released by the server - */ - if (!cfile->oplock_break_cancelled) { - rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, - cinode); - cifs_dbg(FYI, "Oplock release rc = %d\n", rc); - } - /* * When oplock break is received and there are no active * file handles but cached, then schedule deferred close immediately. * So, new open will not use cached handle. */ spin_lock(&CIFS_I(inode)->deferred_lock); is_deferred = cifs_is_deferred_close(cfile, &dclose); + spin_unlock(&CIFS_I(inode)->deferred_lock); if (is_deferred && cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + _cifsFileInfo_put(cfile, false, false); + goto oplock_break_done; + } } - spin_unlock(&CIFS_I(inode)->deferred_lock); + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!cfile->oplock_break_cancelled) { + rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, + cinode); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } +oplock_break_done: _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b96b253e7635..65f8a70cece3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1625,7 +1625,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(inode)); if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, @@ -2084,6 +2084,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; + int retry_count = 0; if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2113,10 +2114,24 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, goto cifs_rename_exit; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(d_inode(source_dentry))); + if (d_inode(target_dentry) != NULL) + cifs_close_deferred_file(CIFS_I(d_inode(target_dentry))); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (rc == -EACCES) { + while (retry_count < 3) { + cifs_close_all_deferred_files(tcon); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, + to_name); + if (rc != -EACCES) + break; + retry_count++; + } + } + /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 844abeb2b48f..9469f1cf0b46 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -723,13 +723,31 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *cfile = NULL; - struct cifs_deferred_close *dclose; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + + if (cifs_inode == NULL) + return; + INIT_LIST_HEAD(&file_head); + spin_lock(&cifs_inode->open_file_lock); list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { - spin_lock(&cifs_inode->deferred_lock); - if (cifs_is_deferred_close(cfile, &dclose)) - mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); - spin_unlock(&cifs_inode->deferred_lock); + if (delayed_work_pending(&cfile->deferred)) { + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } + } + } + spin_unlock(&cifs_inode->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); } } @@ -738,20 +756,30 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) { struct cifsFileInfo *cfile; struct list_head *tmp; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + INIT_LIST_HEAD(&file_head); spin_lock(&tcon->open_file_lock); list_for_each(tmp, &tcon->openFileList) { cfile = list_entry(tmp, struct cifsFileInfo, tlist); if (delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } } } spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } } /* parses DFS refferal V3 structure diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 781d14e5f2af..b6d2e3591927 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2426,7 +2426,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) memcpy(aclptr, &acl, sizeof(struct cifs_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); - *len = ptr - (__u8 *)buf; + *len = roundup(ptr - (__u8 *)buf, 8); return buf; } diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 5a0be9985bae..0ad32150611e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -177,28 +177,22 @@ out: return retval; } -/* Fill [buffer, buffer + pos) with data coming from @from. */ -static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos, +/* Fill @buffer with data coming from @from. */ +static int fill_write_buffer(struct configfs_buffer *buffer, struct iov_iter *from) { - loff_t to_copy; int copied; - u8 *to; if (!buffer->page) buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); if (!buffer->page) return -ENOMEM; - to_copy = SIMPLE_ATTR_SIZE - 1 - pos; - if (to_copy <= 0) - return 0; - to = buffer->page + pos; - copied = copy_from_iter(to, to_copy, from); + copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, * so e.g. sscanf() can scan the string easily */ - to[copied] = 0; + buffer->page[copied] = 0; return copied ? : -EFAULT; } @@ -227,10 +221,10 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct configfs_buffer *buffer = file->private_data; - ssize_t len; + int len; mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, iocb->ki_pos, from); + len = fill_write_buffer(buffer, from); if (len > 0) len = flush_write_buffer(file, buffer, len); if (len > 0) @@ -722,7 +722,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index e55723744f58..9d58371d22c2 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1235,8 +1235,6 @@ void fuse_dax_conn_free(struct fuse_conn *fc) static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; - void *kaddr; - pfn_t pfn; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; @@ -1248,8 +1246,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); - nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, - &pfn); + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL, + NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); diff --git a/fs/io-wq.c b/fs/io-wq.c index 12fc19353bb0..7d2ed8c7dd31 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -129,7 +129,7 @@ struct io_cb_cancel_data { bool cancel_all; }; -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first); static void io_wqe_dec_running(struct io_worker *worker); static bool io_worker_get(struct io_worker *worker) @@ -248,18 +248,20 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) rcu_read_unlock(); if (!ret) { - bool do_create = false; + bool do_create = false, first = false; raw_spin_lock_irq(&wqe->lock); if (acct->nr_workers < acct->max_workers) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); + if (!acct->nr_workers) + first = true; acct->nr_workers++; do_create = true; } raw_spin_unlock_irq(&wqe->lock); if (do_create) - create_io_worker(wqe->wq, wqe, acct->index); + create_io_worker(wqe->wq, wqe, acct->index, first); } } @@ -282,16 +284,26 @@ static void create_worker_cb(struct callback_head *cb) struct io_wq *wq; struct io_wqe *wqe; struct io_wqe_acct *acct; + bool do_create = false, first = false; cwd = container_of(cb, struct create_worker_data, work); wqe = cwd->wqe; wq = wqe->wq; acct = &wqe->acct[cwd->index]; raw_spin_lock_irq(&wqe->lock); - if (acct->nr_workers < acct->max_workers) + if (acct->nr_workers < acct->max_workers) { + if (!acct->nr_workers) + first = true; acct->nr_workers++; + do_create = true; + } raw_spin_unlock_irq(&wqe->lock); - create_io_worker(wq, cwd->wqe, cwd->index); + if (do_create) { + create_io_worker(wq, wqe, cwd->index, first); + } else { + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + } kfree(cwd); } @@ -629,7 +641,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk) raw_spin_unlock_irq(&worker->wqe->lock); } -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first) { struct io_wqe_acct *acct = &wqe->acct[index]; struct io_worker *worker; @@ -670,7 +682,7 @@ fail: worker->flags |= IO_WORKER_F_FREE; if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND)) + if (first && (worker->flags & IO_WORKER_F_BOUND)) worker->flags |= IO_WORKER_F_FIXED; raw_spin_unlock_irq(&wqe->lock); wake_up_new_task(tsk); diff --git a/fs/io_uring.c b/fs/io_uring.c index bf548af0426c..04c6d059ea94 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -78,6 +78,7 @@ #include <linux/task_work.h> #include <linux/pagemap.h> #include <linux/io_uring.h> +#include <linux/tracehook.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -1499,7 +1500,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) all_flushed = list_empty(&ctx->cq_overflow_list); if (all_flushed) { clear_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); } if (posted) @@ -1578,7 +1580,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, } if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); + } ocqe->cqe.user_data = user_data; ocqe->cqe.res = res; @@ -2222,9 +2226,9 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) static inline bool io_run_task_work(void) { - if (current->task_works) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { __set_current_state(TASK_RUNNING); - task_work_run(); + tracehook_notify_signal(); return true; } @@ -6803,14 +6807,16 @@ static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) { /* Tell userspace we may need a wakeup call */ spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) { spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } @@ -7132,16 +7138,6 @@ static void **io_alloc_page_table(size_t size) return table; } -static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx) -{ - spin_lock_bh(&ctx->rsrc_ref_lock); -} - -static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx) -{ - spin_unlock_bh(&ctx->rsrc_ref_lock); -} - static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) { percpu_ref_exit(&ref_node->refs); @@ -7158,9 +7154,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx, struct io_rsrc_node *rsrc_node = ctx->rsrc_node; rsrc_node->rsrc_data = data_to_kill; - io_rsrc_ref_lock(ctx); + spin_lock_irq(&ctx->rsrc_ref_lock); list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); - io_rsrc_ref_unlock(ctx); + spin_unlock_irq(&ctx->rsrc_ref_lock); atomic_inc(&data_to_kill->refs); percpu_ref_kill(&rsrc_node->refs); @@ -7199,17 +7195,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct /* kill initial ref, already quiesced if zero */ if (atomic_dec_and_test(&data->refs)) break; + mutex_unlock(&ctx->uring_lock); flush_delayed_work(&ctx->rsrc_put_work); ret = wait_for_completion_interruptible(&data->done); - if (!ret) + if (!ret) { + mutex_lock(&ctx->uring_lock); break; + } atomic_inc(&data->refs); /* wait for all works potentially completing data->done */ flush_delayed_work(&ctx->rsrc_put_work); reinit_completion(&data->done); - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(); mutex_lock(&ctx->uring_lock); } while (ret >= 0); @@ -7668,9 +7666,10 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) { struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); struct io_ring_ctx *ctx = node->rsrc_data->ctx; + unsigned long flags; bool first_add = false; - io_rsrc_ref_lock(ctx); + spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; while (!list_empty(&ctx->rsrc_ref_list)) { @@ -7682,7 +7681,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) list_del(&node->node); first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); } - io_rsrc_ref_unlock(ctx); + spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); @@ -8653,13 +8652,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static bool io_wait_rsrc_data(struct io_rsrc_data *data) +static void io_wait_rsrc_data(struct io_rsrc_data *data) { - if (!data) - return false; - if (!atomic_dec_and_test(&data->refs)) + if (data && !atomic_dec_and_test(&data->refs)) wait_for_completion(&data->done); - return true; } static void io_ring_ctx_free(struct io_ring_ctx *ctx) @@ -8671,10 +8667,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); + mutex_lock(&ctx->uring_lock); - if (io_wait_rsrc_data(ctx->buf_data)) + if (ctx->buf_data) __io_sqe_buffers_unregister(ctx); - if (io_wait_rsrc_data(ctx->file_data)) + if (ctx->file_data) __io_sqe_files_unregister(ctx); if (ctx->rings) __io_cqring_overflow_flush(ctx, true); diff --git a/fs/pipe.c b/fs/pipe.c index 8e6ef62aeb1c..678dee2a8228 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -444,9 +444,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Epoll nonsensically wants a wakeup whether the pipe - * was already empty or not. - * * If it wasn't empty we try to merge new data into * the last buffer. * @@ -455,9 +452,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * spanning multiple pages. */ head = pipe->head; - was_empty = true; + was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); - if (chars && !pipe_empty(head, pipe->tail)) { + if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -590,8 +587,11 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) { wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -654,6 +654,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * |