diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/9p/Makefile | 1 | ||||
-rw-r--r-- | net/9p/client.c | 166 | ||||
-rw-r--r-- | net/9p/protocol.c | 44 | ||||
-rw-r--r-- | net/9p/trans_common.c | 97 | ||||
-rw-r--r-- | net/9p/trans_common.h | 32 | ||||
-rw-r--r-- | net/9p/trans_fd.c | 52 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 129 | ||||
-rw-r--r-- | net/core/scm.c | 2 | ||||
-rw-r--r-- | net/rds/ib.c | 9 | ||||
-rw-r--r-- | net/rds/ib.h | 2 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 27 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 2 | ||||
-rw-r--r-- | net/unix/af_unix.c | 2 | ||||
-rw-r--r-- | net/unix/garbage.c | 2 |
14 files changed, 440 insertions, 127 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a6..a0874cc1f718 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o util.o \ protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbff..347ec0cd2718 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + int alloc_msize = min(c->msize, 4096); + req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->tc->capacity = alloc_msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->rc->capacity = alloc_msize; + } else { + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->tc->capacity = c->msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc->capacity = c->msize; + } if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->tc->capacity = c->msize; req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - req->rc->capacity = c->msize; } p9pdu_reset(req->tc); @@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { int8_t type; int err; + int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); if (err) { @@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (type == P9_RERROR || type == P9_RLERROR) { - int ecode; - - if (!p9_is_proto_dotl(c)) { - char *ename; + if (type != P9_RERROR && type != P9_RLERROR) + return 0; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) - goto out_err; + if (!p9_is_proto_dotl(c)) { + char *ename; + + if (req->tc->pbuf_size) { + /* Handle user buffers */ + size_t len = req->rc->size - req->rc->offset; + if (req->tc->pubuf) { + /* User Buffer */ + err = copy_from_user( + &req->rc->sdata[req->rc->offset], + req->tc->pubuf, len); + if (err) { + err = -EFAULT; + goto out_err; + } + } else { + /* Kernel Buffer */ + memmove(&req->rc->sdata[req->rc->offset], + req->tc->pkbuf, len); + } + } + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (p9_is_proto_dotu(c)) - err = -ecode; + if (p9_is_proto_dotu(c)) + err = -ecode; - if (!err || !IS_ERR_VALUE(err)) { - err = p9_errstr2errno(ename, strlen(ename)); + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, + ename); - kfree(ename); - } - } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); - err = -ecode; - - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + kfree(ename); } + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } - } else - err = 0; return err; @@ -1191,6 +1220,27 @@ error: } EXPORT_SYMBOL(p9_client_fsync); +int p9_client_sync_fs(struct p9_fid *fid) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); + + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_sync_fs); + int p9_client_clunk(struct p9_fid *fid) { int err; @@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (data) { - memmove(data, dataptr, count); - } else { - err = copy_to_user(udata, dataptr, count); - if (err) { - err = -EFAULT; - goto free_and_error; + if (!req->tc->pbuf_size) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } } p9_free_req(clnt, req); @@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - if (data) - req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, - rsize, data); - else - req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, - rsize, udata); + + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, + offset, rsize, data); + } else { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (data) + if (!req->tc->pbuf_size && data) memmove(data, dataptr, count); p9_free_req(clnt, req); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 1e308f210928..2ce515b859b3 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } +static size_t +pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, + size_t size) +{ + BUG_ON(pdu->size > P9_IOHDRSZ); + pdu->pubuf = (char __user *)udata; + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + +static size_t +pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) +{ + BUG_ON(pdu->size > P9_READDIRHDRSZ); + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + /* b - int8_t w - int16_t @@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'E':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + const char *u = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_urw(pdu, k, u, cnt)) + errcode = -EFAULT; + } + break; + case 'F':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_readdir(pdu, k, cnt)) + errcode = -EFAULT; + } + break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { + pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } @@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; + pdu->private = NULL; + pdu->pubuf = NULL; + pdu->pkbuf = NULL; + pdu->pbuf_size = 0; } int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 000000000000..d62b9aa58df8 --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + * @*private: PDU's private page of struct trans_rpage_info + */ +void +p9_release_req_pages(struct trans_rpage_info *rpinfo) +{ + int i = 0; + + while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { + put_page(rpinfo->rp_data[i]); + i++; + } +} +EXPORT_SYMBOL(p9_release_req_pages); + +/** + * p9_nr_pages - Return number of pages needed to accomodate the payload. + */ +int +p9_nr_pages(struct p9_req_t *req) +{ + int start_page, end_page; + start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; + end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accomodate the payload + * @rw: Indicates if the pages are for read or write. + */ +int +p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, + int nr_pages, u8 rw) +{ + uint32_t first_page_bytes = 0; + uint32_t pdata_mapped_pages; + struct trans_rpage_info *rpinfo; + + *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + + if (*pdata_off) + first_page_bytes = min((PAGE_SIZE - *pdata_off), + req->tc->pbuf_size); + + rpinfo = req->tc->private; + pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, + nr_pages, rw, &rpinfo->rp_data[0]); + + if (pdata_mapped_pages < 0) { + printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" + "nr_pages:%d\n", pdata_mapped_pages, + req->tc->pubuf, nr_pages); + pdata_mapped_pages = 0; + return -EIO; + } + rpinfo->rp_nr_pages = pdata_mapped_pages; + if (*pdata_off) { + *pdata_len = first_page_bytes; + *pdata_len += min((req->tc->pbuf_size - *pdata_len), + ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); + } else { + *pdata_len = min(req->tc->pbuf_size, + (size_t)pdata_mapped_pages << PAGE_SHIFT); + } + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 000000000000..76309223bb02 --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,32 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/* TRUE if it is user context */ +#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) + +/** + * struct trans_rpage_info - To store mapped page information in PDU. + * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. + * @rp_nr_pages: Number of mapped pages + * @rp_data: Array of page pointers + */ +struct trans_rpage_info { + u8 rp_alloc; + int rp_nr_pages; + struct page *rp_data[0]; +}; + +void p9_release_req_pages(struct trans_rpage_info *); +int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); +int p9_nr_pages(struct p9_req_t *); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 078eb162d9bf..a30471e51740 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -153,10 +153,11 @@ struct p9_conn { unsigned long wsched; }; +static void p9_poll_workfn(struct work_struct *work); + static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); -static struct workqueue_struct *p9_mux_wq; -static struct task_struct *p9_poll_task; +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static void p9_mux_poll_stop(struct p9_conn *m) { @@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work) if (n & POLLIN) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } else clear_bit(Rworksched, &m->wsched); } else @@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work) if (n & POLLOUT) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } else clear_bit(Wworksched, &m->wsched); } else @@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; - DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); - /* perform the default wake up operation */ - return default_wake_function(&dummy_wait, mode, sync, key); + schedule_work(&p9_poll_work); + return 1; } /** @@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m) P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + schedule_work(&m->rq); } } @@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m) if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); } } } @@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); return 0; } @@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = { * */ -static int p9_poll_proc(void *a) +static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); - repeat: + spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, @@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a) } spin_unlock_irqrestore(&p9_poll_lock, flags); - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&p9_poll_pending_list)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); - schedule(); - } - __set_current_state(TASK_RUNNING); - - if (!kthread_should_stop()) - goto repeat; - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); - return 0; } int p9_trans_fd_init(void) { - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); - if (IS_ERR(p9_poll_task)) { - destroy_workqueue(p9_mux_wq); - printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); - return PTR_ERR(p9_poll_task); - } - v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - kthread_stop(p9_poll_task); + flush_work_sync(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); - - destroy_workqueue(p9_mux_wq); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20e..9b550ed9c711 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -45,6 +45,7 @@ #include <linux/scatterlist.h> #include <linux/virtio.h> #include <linux/virtio_9p.h> +#include "trans_common.h" #define VIRTQUEUE_NUM 128 @@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq) rc->tag); req = p9_tag_lookup(chan->client, rc->tag); req->status = REQ_STATUS_RCVD; + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + /*Release pages */ + p9_release_req_pages(rp); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } p9_client_cb(chan->client, req); } else { spin_unlock_irqrestore(&chan->lock, flags); @@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) } /** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata_off: Offset into the first page + * @**pdata: a list of pages to add into sg. + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, + struct page **pdata, int count) +{ + int s; + int i = 0; + int index = start; + + if (pdata_off) { + s = min((int)(PAGE_SIZE - pdata_off), count); + sg_set_page(&sg[index++], pdata[i++], s, pdata_off); + count -= s; + } + + while (count) { + BUG_ON(index > limit); + s = min((int)PAGE_SIZE, count); + sg_set_page(&sg[index++], pdata[i++], s, 0); + count -= s; + } + return index-start; +} + +/** * p9_virtio_request - issue a request * @client: client instance issuing the request * @req: request to be issued @@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out; + int in, out, inp, outp; struct virtio_chan *chan = client->trans; char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; - int err; + size_t pdata_off = 0; + struct trans_rpage_info *rpinfo = NULL; + int err, pdata_len = 0; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req_retry: req->status = REQ_STATUS_SENT; + if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { + int nr_pages = p9_nr_pages(req); + int rpinfo_size = sizeof(struct trans_rpage_info) + + sizeof(struct page *) * nr_pages; + + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { + /* We can use sdata */ + req->tc->private = req->tc->sdata + req->tc->size; + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 0; + } else { + req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); + if (!req->tc->private) { + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " + "private kmalloc returned NULL"); + return -ENOMEM; + } + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 1; + } + + err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, + req->tc->id == P9_TREAD ? 1 : 0); + if (err < 0) { + if (rpinfo->rp_alloc) + kfree(rpinfo); + return err; + } + } + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, - client->msize); + req->tc->size); + + if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { + /* We have additional write payload buffer to take care */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, + req->tc->pbuf_size); + } + out += outp; + } + + /* Handle in VirtIO ring buffers */ + if (req->tc->pbuf_size && + ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { + /* + * Take care of additional Read payload. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + /* + * Running executables in the filesystem may result in + * a read request with kernel buffer as opposed to user buffer. + */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, + pbuf, req->tc->pbuf_size); + } + in += inp; + } else { + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, + client->msize); + } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -246,6 +362,8 @@ req_retry: P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " "virtio rpc add_buf returned failure"); + if (rpinfo && rpinfo->rp_alloc) + kfree(rpinfo); return -EIO; } } @@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = { .request = p9_virtio_request, .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, + .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/core/scm.c b/net/core/scm.c index bbe454450801..4c1ef026d695 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) int fd = fdp[i]; struct file *file; - if (fd < 0 || !(file = fget(fd))) + if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; *fpp++ = file; fpl->count++; diff --git a/net/rds/ib.c b/net/rds/ib.c index 4123967d4d65..cce19f95c624 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -364,7 +364,6 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); - rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -400,13 +399,9 @@ int rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = rds_ib_fmr_init(); - if (ret) - goto out; - ret = ib_register_client(&rds_ib_client); if (ret) - goto out_fmr_exit; + goto out; ret = rds_ib_sysctl_init(); if (ret) @@ -430,8 +425,6 @@ out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); -out_fmr_exit: - rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index e34ad032b66d..4297d92788dc 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); -int rds_ib_fmr_init(void); -void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 18a833c450c8..819c35a0d9cb 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -38,8 +38,6 @@ #include "ib.h" #include "xlist.h" -static struct workqueue_struct *rds_ib_fmr_wq; - static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) int err = 0, iter = 0; if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); while (1) { ibmr = rds_ib_reuse_fmr(pool); @@ -696,24 +694,6 @@ out_nolock: return ret; } -int rds_ib_fmr_init(void) -{ - rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); - if (!rds_ib_fmr_wq) - return -ENOMEM; - return 0; -} - -/* - * By the time this is called all the IB devices should have been torn down and - * had their pools freed. As each pool is freed its work struct is waited on, - * so the pool flushing work queue should be idle by the time we get here. - */ -void rds_ib_fmr_exit(void) -{ - destroy_workqueue(rds_ib_fmr_wq); -} - static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); @@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { @@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ - queue_delayed_work(rds_ib_fmr_wq, - &pool->flush_worker, 10); + schedule_delayed_work(&pool->flush_worker, 10); } } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 59e599498e37..3fc8624fcd17 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -955,7 +955,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 437a99e560e1..ba5b8c208498 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index f89f83bf828e..b6f4b994eb35 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp) /* * Socket ? */ - if (S_ISSOCK(inode->i_mode)) { + if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; |