diff options
Diffstat (limited to 'net')
36 files changed, 1143 insertions, 183 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index ff34c5acc130..c42c0c400bf9 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -20,6 +20,12 @@ config NET_9P_VIRTIO This builds support for a transports between guest partitions and a host partition. +config NET_9P_RDMA + depends on NET_9P && INFINIBAND && EXPERIMENTAL + tristate "9P RDMA Transport (Experimental)" + help + This builds support for a RDMA transport. + config NET_9P_DEBUG bool "Debug information" depends on NET_9P diff --git a/net/9p/Makefile b/net/9p/Makefile index 1041b7bd12e2..198a640d53a6 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_NET_9P) := 9pnet.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o +obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o 9pnet-objs := \ mod.o \ @@ -11,3 +12,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet_virtio-objs := \ trans_virtio.o \ + +9pnet_rdma-objs := \ + trans_rdma.o \ diff --git a/net/9p/client.c b/net/9p/client.c index bbac2f72b4d2..67717f69412e 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -159,6 +159,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) if (!c->reqs[row]) { printk(KERN_ERR "Couldn't grow tag array\n"); + spin_unlock_irqrestore(&c->lock, flags); return ERR_PTR(-ENOMEM); } for (col = 0; col < P9_ROW_MAXTAG; col++) { diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 29be52439086..dcd7666824ba 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -53,6 +53,7 @@ static int p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); +#ifdef CONFIG_NET_9P_DEBUG void p9pdu_dump(int way, struct p9_fcall *pdu) { @@ -81,6 +82,12 @@ p9pdu_dump(int way, struct p9_fcall *pdu) else P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); } +#else +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ +} +#endif EXPORT_SYMBOL(p9pdu_dump); void p9stat_free(struct p9_wstat *stbuf) @@ -179,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case 's':{ - char **ptr = va_arg(ap, char **); + char **sptr = va_arg(ap, char **); int16_t len; int size; @@ -189,17 +196,17 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) size = MAX(len, 0); - *ptr = kmalloc(size + 1, GFP_KERNEL); - if (*ptr == NULL) { + *sptr = kmalloc(size + 1, GFP_KERNEL); + if (*sptr == NULL) { errcode = -EFAULT; break; } - if (pdu_read(pdu, *ptr, size)) { + if (pdu_read(pdu, *sptr, size)) { errcode = -EFAULT; - kfree(*ptr); - *ptr = NULL; + kfree(*sptr); + *sptr = NULL; } else - (*ptr)[size] = 0; + (*sptr)[size] = 0; } break; case 'Q':{ @@ -373,13 +380,13 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case 's':{ - const char *ptr = va_arg(ap, const char *); + const char *sptr = va_arg(ap, const char *); int16_t len = 0; - if (ptr) - len = MIN(strlen(ptr), USHORT_MAX); + if (sptr) + len = MIN(strlen(sptr), USHORT_MAX); errcode = p9pdu_writef(pdu, optional, "w", len); - if (!errcode && pdu_write(pdu, ptr, len)) + if (!errcode && pdu_write(pdu, sptr, len)) errcode = -EFAULT; } break; @@ -419,7 +426,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = - va_arg(ap, const void *); + va_arg(ap, const void __user *); errcode = p9pdu_writef(pdu, optional, "d", count); if (!errcode && pdu_write_u(pdu, udata, count)) @@ -542,8 +549,10 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; +#ifdef CONFIG_NET_9P_DEBUG if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) p9pdu_dump(0, pdu); +#endif P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index be65d8242fd2..1df0356f242b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -678,11 +678,9 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { - struct p9_trans_fd *ts = client->trans; - struct p9_conn *m = ts->conn; int ret = 1; - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req); + P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); list_del(&req->req_list); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c new file mode 100644 index 000000000000..8d6cc4777aae --- /dev/null +++ b/net/9p/trans_rdma.c @@ -0,0 +1,712 @@ +/* + * linux/fs/9p/trans_rdma.c + * + * RDMA transport layer based on the trans_fd.c implementation. + * + * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> + * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/parser.h> +#include <linux/semaphore.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> +#include <rdma/ib_verbs.h> + +#define P9_PORT 5640 +#define P9_RDMA_SQ_DEPTH 32 +#define P9_RDMA_RQ_DEPTH 32 +#define P9_RDMA_SEND_SGE 4 +#define P9_RDMA_RECV_SGE 4 +#define P9_RDMA_IRD 0 +#define P9_RDMA_ORD 0 +#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ +#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can + * safely advertise a maxsize + * of 64k */ + +#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) +/** + * struct p9_trans_rdma - RDMA transport instance + * + * @state: tracks the transport state machine for connection setup and tear down + * @cm_id: The RDMA CM ID + * @pd: Protection Domain pointer + * @qp: Queue Pair pointer + * @cq: Completion Queue pointer + * @lkey: The local access only memory region key + * @timeout: Number of uSecs to wait for connection management events + * @sq_depth: The depth of the Send Queue + * @sq_sem: Semaphore for the SQ + * @rq_depth: The depth of the Receive Queue. + * @addr: The remote peer's address + * @req_lock: Protects the active request list + * @send_wait: Wait list when the SQ fills up + * @cm_done: Completion event for connection management tracking + */ +struct p9_trans_rdma { + enum { + P9_RDMA_INIT, + P9_RDMA_ADDR_RESOLVED, + P9_RDMA_ROUTE_RESOLVED, + P9_RDMA_CONNECTED, + P9_RDMA_FLUSHING, + P9_RDMA_CLOSING, + P9_RDMA_CLOSED, + } state; + struct rdma_cm_id *cm_id; + struct ib_pd *pd; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_mr *dma_mr; + u32 lkey; + long timeout; + int sq_depth; + struct semaphore sq_sem; + int rq_depth; + atomic_t rq_count; + struct sockaddr_in addr; + spinlock_t req_lock; + + struct completion cm_done; +}; + +/** + * p9_rdma_context - Keeps track of in-process WR + * + * @wc_op: The original WR op for when the CQE completes in error. + * @busa: Bus address to unmap when the WR completes + * @req: Keeps track of requests (send) + * @rc: Keepts track of replies (receive) + */ +struct p9_rdma_req; +struct p9_rdma_context { + enum ib_wc_opcode wc_op; + dma_addr_t busa; + union { + struct p9_req_t *req; + struct p9_fcall *rc; + }; +}; + +/** + * p9_rdma_opts - Collection of mount options + * @port: port of connection + * @sq_depth: The requested depth of the SQ. This really doesn't need + * to be any deeper than the number of threads used in the client + * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth + * @timeout: Time to wait in msecs for CM events + */ +struct p9_rdma_opts { + short port; + int sq_depth; + int rq_depth; + long timeout; +}; + +/* + * Option Parsing (code inspired by NFS code) + */ +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, +}; + +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_sq_depth, "sq=%u"}, + {Opt_rq_depth, "rq=%u"}, + {Opt_timeout, "timeout=%u"}, + {Opt_err, NULL}, +}; + +/** + * parse_options - parse mount options into session structure + * @options: options string passed from mount + * @opts: transport-specific structure to parse options into + * + * Returns 0 upon success, -ERRNO upon failure + */ +static int parse_opts(char *params, struct p9_rdma_opts *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *options; + int ret; + + opts->port = P9_PORT; + opts->sq_depth = P9_RDMA_SQ_DEPTH; + opts->rq_depth = P9_RDMA_RQ_DEPTH; + opts->timeout = P9_RDMA_TIMEOUT; + + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + + while ((p = strsep(&options, ",")) != NULL) { + int token; + int r; + if (!*p) + continue; + token = match_token(p, tokens, args); + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_sq_depth: + opts->sq_depth = option; + break; + case Opt_rq_depth: + opts->rq_depth = option; + break; + case Opt_timeout: + opts->timeout = option; + break; + default: + continue; + } + } + /* RQ must be at least as large as the SQ */ + opts->rq_depth = max(opts->rq_depth, opts->sq_depth); + kfree(options); + return 0; +} + +static int +p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct p9_client *c = id->context; + struct p9_trans_rdma *rdma = c->trans; + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_INIT); + rdma->state = P9_RDMA_ADDR_RESOLVED; + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); + rdma->state = P9_RDMA_ROUTE_RESOLVED; + break; + + case RDMA_CM_EVENT_ESTABLISHED: + BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); + rdma->state = P9_RDMA_CONNECTED; + break; + + case RDMA_CM_EVENT_DISCONNECTED: + if (rdma) + rdma->state = P9_RDMA_CLOSED; + if (c) + c->status = Disconnected; + break; + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + break; + + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_CONNECT_REQUEST: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + c->status = Disconnected; + rdma_disconnect(rdma->cm_id); + break; + default: + BUG(); + } + complete(&rdma->cm_done); + return 0; +} + +static void +handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + struct p9_req_t *req; + int err = 0; + int16_t tag; + + req = NULL; + ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, + DMA_FROM_DEVICE); + + if (status != IB_WC_SUCCESS) + goto err_out; + + err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); + if (err) + goto err_out; + + req = p9_tag_lookup(client, tag); + if (!req) + goto err_out; + + req->rc = c->rc; + p9_client_cb(client, req); + + return; + + err_out: + P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", + req, err, status); + rdma->state = P9_RDMA_FLUSHING; + client->status = Disconnected; + return; +} + +static void +handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + ib_dma_unmap_single(rdma->cm_id->device, + c->busa, c->req->tc->size, + DMA_TO_DEVICE); +} + +static void qp_event_handler(struct ib_event *event, void *context) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, + context); +} + +static void cq_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct p9_client *client = cq_context; + struct p9_trans_rdma *rdma = client->trans; + int ret; + struct ib_wc wc; + + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { + struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; + + switch (c->wc_op) { + case IB_WC_RECV: + atomic_dec(&rdma->rq_count); + handle_recv(client, rdma, c, wc.status, wc.byte_len); + break; + + case IB_WC_SEND: + handle_send(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->sq_sem); + break; + + default: + printk(KERN_ERR "9prdma: unexpected completion type, " + "c->wc_op=%d, wc.opcode=%d, status=%d\n", + c->wc_op, wc.opcode, wc.status); + break; + } + kfree(c); + } +} + +static void cq_event_handler(struct ib_event *e, void *v) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); +} + +static void rdma_destroy_trans(struct p9_trans_rdma *rdma) +{ + if (!rdma) + return; + + if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) + ib_dereg_mr(rdma->dma_mr); + + if (rdma->qp && !IS_ERR(rdma->qp)) + ib_destroy_qp(rdma->qp); + + if (rdma->pd && !IS_ERR(rdma->pd)) + ib_dealloc_pd(rdma->pd); + + if (rdma->cq && !IS_ERR(rdma->cq)) + ib_destroy_cq(rdma->cq); + + if (rdma->cm_id && !IS_ERR(rdma->cm_id)) + rdma_destroy_id(rdma->cm_id); + + kfree(rdma); +} + +static int +post_recv(struct p9_client *client, struct p9_rdma_context *c) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_recv_wr wr, *bad_wr; + struct ib_sge sge; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->rc->sdata, client->msize, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = client->msize; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_RECV; + wr.wr_id = (unsigned long) c; + wr.sg_list = &sge; + wr.num_sge = 1; + return ib_post_recv(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; +} + +static int rdma_request(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_send_wr wr, *bad_wr; + struct ib_sge sge; + int err = 0; + unsigned long flags; + struct p9_rdma_context *c = NULL; + struct p9_rdma_context *rpl_context = NULL; + + /* Allocate an fcall for the reply */ + rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); + if (!rpl_context) + goto err_close; + + /* + * If the request has a buffer, steal it, otherwise + * allocate a new one. Typically, requests should already + * have receive buffers allocated and just swap them around + */ + if (!req->rc) { + req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, + GFP_KERNEL); + if (req->rc) { + req->rc->sdata = (char *) req->rc + + sizeof(struct p9_fcall); + req->rc->capacity = client->msize; + } + } + rpl_context->rc = req->rc; + if (!rpl_context->rc) { + kfree(rpl_context); + goto err_close; + } + + /* + * Post a receive buffer for this request. We need to ensure + * there is a reply buffer available for every outstanding + * request. A flushed request can result in no reply for an + * outstanding request, so we must keep a count to avoid + * overflowing the RQ. + */ + if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { + err = post_recv(client, rpl_context); + if (err) { + kfree(rpl_context->rc); + kfree(rpl_context); + goto err_close; + } + } else + atomic_dec(&rdma->rq_count); + + /* remove posted receive buffer from request structure */ + req->rc = NULL; + + /* Post the request */ + c = kmalloc(sizeof *c, GFP_KERNEL); + if (!c) + goto err_close; + c->req = req; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->req->tc->sdata, c->req->tc->size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = c->req->tc->size; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_SEND; + wr.wr_id = (unsigned long) c; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + wr.sg_list = &sge; + wr.num_sge = 1; + + if (down_interruptible(&rdma->sq_sem)) + goto error; + + return ib_post_send(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; + + err_close: + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_CLOSING) { + rdma->state = P9_RDMA_CLOSING; + spin_unlock_irqrestore(&rdma->req_lock, flags); + rdma_disconnect(rdma->cm_id); + } else + spin_unlock_irqrestore(&rdma->req_lock, flags); + return err; +} + +static void rdma_close(struct p9_client *client) +{ + struct p9_trans_rdma *rdma; + + if (!client) + return; + + rdma = client->trans; + if (!rdma) + return; + + client->status = Disconnected; + rdma_disconnect(rdma->cm_id); + rdma_destroy_trans(rdma); +} + +/** + * alloc_rdma - Allocate and initialize the rdma transport structure + * @msize: MTU + * @dotu: Extension attribute + * @opts: Mount options structure + */ +static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) +{ + struct p9_trans_rdma *rdma; + + rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); + if (!rdma) + return NULL; + + rdma->sq_depth = opts->sq_depth; + rdma->rq_depth = opts->rq_depth; + rdma->timeout = opts->timeout; + spin_lock_init(&rdma->req_lock); + init_completion(&rdma->cm_done); + sema_init(&rdma->sq_sem, rdma->sq_depth); + atomic_set(&rdma->rq_count, 0); + + return rdma; +} + +/* its not clear to me we can do anything after send has been posted */ +static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * trans_create_rdma - Transport method for creating atransport instance + * @client: client instance + * @addr: IP address string + * @args: Mount options string + */ +static int +rdma_create_trans(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct p9_rdma_opts opts; + struct p9_trans_rdma *rdma; + struct rdma_conn_param conn_param; + struct ib_qp_init_attr qp_attr; + struct ib_device_attr devattr; + + /* Parse the transport specific mount options */ + err = parse_opts(args, &opts); + if (err < 0) + return err; + + /* Create and initialize the RDMA transport structure */ + rdma = alloc_rdma(&opts); + if (!rdma) + return -ENOMEM; + + /* Create the RDMA CM ID */ + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + if (IS_ERR(rdma->cm_id)) + goto error; + + /* Resolve the server's address */ + rdma->addr.sin_family = AF_INET; + rdma->addr.sin_addr.s_addr = in_aton(addr); + rdma->addr.sin_port = htons(opts.port); + err = rdma_resolve_addr(rdma->cm_id, NULL, + (struct sockaddr *)&rdma->addr, + rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) + goto error; + + /* Resolve the route to the server */ + err = rdma_resolve_route(rdma->cm_id, rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) + goto error; + + /* Query the device attributes */ + err = ib_query_device(rdma->cm_id->device, &devattr); + if (err) + goto error; + + /* Create the Completion Queue */ + rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, + cq_event_handler, client, + opts.sq_depth + opts.rq_depth + 1, 0); + if (IS_ERR(rdma->cq)) + goto error; + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + + /* Create the Protection Domain */ + rdma->pd = ib_alloc_pd(rdma->cm_id->device); + if (IS_ERR(rdma->pd)) + goto error; + + /* Cache the DMA lkey in the transport */ + rdma->dma_mr = NULL; + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + rdma->lkey = rdma->cm_id->device->local_dma_lkey; + else { + rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(rdma->dma_mr)) + goto error; + rdma->lkey = rdma->dma_mr->lkey; + } + + /* Create the Queue Pair */ + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.event_handler = qp_event_handler; + qp_attr.qp_context = client; + qp_attr.cap.max_send_wr = opts.sq_depth; + qp_attr.cap.max_recv_wr = opts.rq_depth; + qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; + qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = rdma->cq; + qp_attr.recv_cq = rdma->cq; + err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); + if (err) + goto error; + rdma->qp = rdma->cm_id->qp; + + /* Request a connection */ + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + conn_param.responder_resources = P9_RDMA_IRD; + conn_param.initiator_depth = P9_RDMA_ORD; + err = rdma_connect(rdma->cm_id, &conn_param); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_CONNECTED)) + goto error; + + client->trans = rdma; + client->status = Connected; + + return 0; + +error: + rdma_destroy_trans(rdma); + return -ENOTCONN; +} + +static struct p9_trans_module p9_rdma_trans = { + .name = "rdma", + .maxsize = P9_RDMA_MAXSIZE, + .def = 0, + .owner = THIS_MODULE, + .create = rdma_create_trans, + .close = rdma_close, + .request = rdma_request, + .cancel = rdma_cancel, +}; + +/** + * p9_trans_rdma_init - Register the 9P RDMA transport driver + */ +static int __init p9_trans_rdma_init(void) +{ + v9fs_register_trans(&p9_rdma_trans); + return 0; +} + +static void __exit p9_trans_rdma_exit(void) +{ + v9fs_unregister_trans(&p9_rdma_trans); +} + +module_init(p9_trans_rdma_init); +module_exit(p9_trans_rdma_exit); + +MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); +MODULE_DESCRIPTION("RDMA Transport for 9P"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 22ba8632196f..6c023f0f8252 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL; + NETIF_F_NETNS_LOCAL | NETIF_F_GSO; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 573e20f7dba4..0a09ccf68c1c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -347,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features; + unsigned long features, mask; - features = br->feature_mask; + features = mask = br->feature_mask; + if (list_empty(&br->port_list)) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; list_for_each_entry(p, &br->port_list, list) { - features = netdev_compute_features(features, p->dev->features); + features = netdev_increment_features(features, + p->dev->features, mask); } - br->dev->features = features; +done: + br->dev->features = netdev_fix_features(features, NULL); } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index b8a4fd0806af..d9038e328cc1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3947,6 +3947,46 @@ static void netdev_init_queue_locks(struct net_device *dev) __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); } +unsigned long netdev_fix_features(unsigned long features, const char *name) +{ + /* Fix illegal SG+CSUM combinations. */ + if ((features & NETIF_F_SG) && + !(features & NETIF_F_ALL_CSUM)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " + "checksum feature.\n", name); + features &= ~NETIF_F_SG; + } + + /* TSO requires that SG is present as well. */ + if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " + "SG feature.\n", name); + features &= ~NETIF_F_TSO; + } + + if (features & NETIF_F_UFO) { + if (!(features & NETIF_F_GEN_CSUM)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_HW_CSUM feature.\n", + name); + features &= ~NETIF_F_UFO; + } + + if (!(features & NETIF_F_SG)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_SG feature.\n", name); + features &= ~NETIF_F_UFO; + } + } + + return features; +} +EXPORT_SYMBOL(netdev_fix_features); + /** * register_netdevice - register a network device * @dev: device to register @@ -4032,36 +4072,7 @@ int register_netdevice(struct net_device *dev) dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } - - /* Fix illegal SG+CSUM combinations. */ - if ((dev->features & NETIF_F_SG) && - !(dev->features & NETIF_F_ALL_CSUM)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", - dev->name); - dev->features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((dev->features & NETIF_F_TSO) && - !(dev->features & NETIF_F_SG)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_TSO; - } - if (dev->features & NETIF_F_UFO) { - if (!(dev->features & NETIF_F_HW_CSUM)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_HW_CSUM feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - if (!(dev->features & NETIF_F_SG)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - } + dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) @@ -4700,49 +4711,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ /** - * netdev_compute_feature - compute conjunction of two feature sets - * @all: first feature set - * @one: second feature set + * netdev_increment_features - increment feature set by one + * @all: current feature set + * @one: new feature set + * @mask: mask feature set * * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Returns - * the new feature set. + * @one to the master device with current feature set @all. Will not + * enable anything that is off in @mask. Returns the new feature set. */ -int netdev_compute_features(unsigned long all, unsigned long one) -{ - /* if device needs checksumming, downgrade to hw checksumming */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (one & NETIF_F_GSO) - one |= NETIF_F_GSO_SOFTWARE; - one |= NETIF_F_GSO; - - /* - * If even one device supports a GSO protocol with software fallback, - * enable it for all. - */ - all |= one & NETIF_F_GSO_SOFTWARE; +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask) +{ + /* If device needs checksumming, downgrade to it. */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); + else if (mask & NETIF_F_ALL_CSUM) { + /* If one device supports v4/v6 checksumming, set for all. */ + if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && + !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + } - /* If even one device supports robust GSO, enable it for all. */ - if (one & NETIF_F_GSO_ROBUST) - all |= NETIF_F_GSO_ROBUST; + /* If one device supports hw checksumming, set for all. */ + if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= NETIF_F_HW_CSUM; + } + } - all &= one | NETIF_F_LLTX; + one |= NETIF_F_ALL_CSUM; - if (!(all & NETIF_F_ALL_CSUM)) - all &= ~NETIF_F_SG; - if (!(all & NETIF_F_SG)) - all &= ~NETIF_F_GSO_MASK; + one |= all & NETIF_F_ONE_FOR_ALL; + all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; } -EXPORT_SYMBOL(netdev_compute_features); +EXPORT_SYMBOL(netdev_increment_features); static struct hlist_head *netdev_create_hash(void) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f1d07b5c1e17..1895a4ca9c4f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -325,6 +325,38 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); +int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) +{ + int rv; + + mutex_lock(&net_mutex); +again: + rv = ida_get_new_above(&net_generic_ids, 1, id); + if (rv < 0) { + if (rv == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + goto out; + } + rv = register_pernet_operations(first_device, ops); + if (rv < 0) + ida_remove(&net_generic_ids, *id); + mutex_unlock(&net_mutex); +out: + return rv; +} +EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); + +void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(ops); + ida_remove(&net_generic_ids, id); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); + /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 99f656d35b4f..a47f5bad110d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1973,28 +1973,27 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* make sure that we don't pick a non-existing transmit queue */ ntxq = pkt_dev->odev->real_num_tx_queues; - if (ntxq <= num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { + if (ntxq > num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU " - "disabled because CPU count (%d) exceeds number ", - num_online_cpus()); - printk(KERN_WARNING "pktgen: WARNING: of tx queues " - "(%d) on %s \n", ntxq, pkt_dev->odev->name); + "disabled because CPU count (%d) exceeds number " + "of tx queues (%d) on %s\n", num_online_cpus(), ntxq, + pkt_dev->odev->name); pkt_dev->flags &= ~F_QUEUE_MAP_CPU; } if (ntxq <= pkt_dev->queue_map_min) { printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_min (%d) exceeds number of tx\n", - pkt_dev->queue_map_min); - printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " - "%s, resetting\n", ntxq, pkt_dev->odev->name); + "queue_map_min (zero-based) (%d) exceeds valid range " + "[0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_min, (ntxq ?: 1)- 1, ntxq, + pkt_dev->odev->name); pkt_dev->queue_map_min = ntxq - 1; } - if (ntxq <= pkt_dev->queue_map_max) { + if (pkt_dev->queue_map_max >= ntxq) { printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_max (%d) exceeds number of tx\n", - pkt_dev->queue_map_max); - printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " - "%s, resetting\n", ntxq, pkt_dev->odev->name); + "queue_map_max (zero-based) (%d) exceeds valid range " + "[0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_max, (ntxq ?: 1)- 1, ntxq, + pkt_dev->odev->name); pkt_dev->queue_map_max = ntxq - 1; } diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e22e3a35359..ebb6b94f8af2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -449,6 +449,18 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +/** + * skb_recycle_check - check if skb can be reused for receive + * @skb: buffer + * @skb_size: minimum receive buffer size + * + * Checks that the skb passed in is not shared or cloned, and + * that it is linear and its head portion at least as large as + * skb_size so that it can be recycled as a receive buffer. + * If these conditions are met, this function does any necessary + * reference count dropping and cleans up the skbuff as if it + * just came from __alloc_skb(). + */ int skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 490e035c6d90..2e78f6bd9775 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2063,9 +2063,10 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, u32 opt_len; int len_delta; - buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); - if (buf_len < 0) - return buf_len; + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + return ret_val; + buf_len = ret_val; opt_len = (buf_len + 3) & ~3; /* we overwrite any existing options to ensure that we have enough diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 990a58493235..ba85d8831893 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -362,6 +362,17 @@ struct tcp_out_options { __u32 tsval, tsecr; /* need to include OPTION_TS */ }; +/* Beware: Something in the Internet is very sensitive to the ordering of + * TCP options, we learned this through the hard way, so be careful here. + * Luckily we can at least blame others for their non-compliance but from + * inter-operatibility perspective it seems that we're somewhat stuck with + * the ordering which we have been using if we want to keep working with + * those broken things (not that it currently hurts anybody as there isn't + * particular reason why the ordering would need to be changed). + * + * At least SACK_PERM as the first option is known to lead to a disaster + * (but it may well be that other scenarios fail similarly). + */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, const struct tcp_out_options *opts, __u8 **md5_hash) { @@ -376,6 +387,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *md5_hash = NULL; } + if (unlikely(opts->mss)) { + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + } + if (likely(OPTION_TS & opts->options)) { if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | @@ -392,12 +409,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - if (unlikely(opts->mss)) { - *ptr++ = htonl((TCPOPT_MSS << 24) | - (TCPOLEN_MSS << 16) | - opts->mss); - } - if (unlikely(OPTION_SACK_ADVERTISE & opts->options && !(OPTION_TS & opts->options))) { *ptr++ = htonl((TCPOPT_NOP << 24) | @@ -432,7 +443,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; } } } @@ -2268,6 +2279,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } memset(&opts, 0, sizeof(opts)); +#ifdef CONFIG_SYN_COOKIES + if (unlikely(req->cookie_ts)) + TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); + else +#endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5) + @@ -2293,11 +2309,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); -#ifdef CONFIG_SYN_COOKIES - if (unlikely(req->cookie_ts)) - TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); - else -#endif tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2095abc3caba..cf02701ced48 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -284,7 +284,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, } EXPORT_SYMBOL_GPL(udp4_lib_lookup); -static inline struct sock *udp_v4_mcast_next(struct sock *sk, +static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) @@ -296,7 +296,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (s->sk_hash != hnum || + if (!net_eq(sock_net(s), net) || + s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -1079,15 +1080,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + daddr, uh->source, saddr, + dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51da8c092fa..8b48512ebf6a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -138,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -158,6 +159,8 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -180,9 +183,14 @@ try_again: if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -196,7 +204,7 @@ try_again: sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -207,7 +215,7 @@ try_again: } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { @@ -228,8 +236,14 @@ out: csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) @@ -328,7 +342,7 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct sock *sk, +static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, struct in6_addr *loc_addr, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) @@ -340,7 +354,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (sock_net(s) != sock_net(sk)) + if (!net_eq(sock_net(s), net)) continue; if (s->sk_hash == num && s->sk_family == PF_INET6) { @@ -383,14 +397,14 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = inet6_iif(skb); - sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { kfree_skb(skb); goto out; } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { diff --git a/net/key/af_key.c b/net/key/af_key.c index e55e0441e4d9..3440a4637f01 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2075,7 +2075,6 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in req_size += socklen * 2; } else { size -= 2*socklen; - socklen = 0; } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index 0b024cd6b809..98f480708050 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -94,8 +94,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) prob / 10, prob % 10, mr->last_success, mr->last_attempts, - mr->succ_hist, - mr->att_hist); + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); } p += sprintf(p, "\nTotal packet count:: ideal %d " "lookaround %d\n\n", @@ -106,7 +106,7 @@ minstrel_stats_open(struct inode *inode, struct file *file) return 0; } -static int +static ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o) { struct minstrel_stats_info *ms; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a2cdbcbf64c4..4ab62ad85dd4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -335,7 +335,7 @@ static int __init nf_ct_proto_gre_init(void) rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); if (rv < 0) return rv; - rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops); + rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops); if (rv < 0) nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); return rv; @@ -344,7 +344,7 @@ static int __init nf_ct_proto_gre_init(void) static void nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); - unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops); + unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops); } module_init(nf_ct_proto_gre_init); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index b0925a303353..249f6b92f153 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -315,6 +315,7 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, * Audit Helper Functions */ +#ifdef CONFIG_AUDIT /** * netlbl_af4list_audit_addr - Audit an IPv4 address * @audit_buf: audit buffer @@ -386,3 +387,4 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, } } #endif /* IPv6 */ +#endif /* CONFIG_AUDIT */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 0242bead405f..07ae7fd82be1 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -120,9 +120,19 @@ struct netlbl_af4list *netlbl_af4list_search(__be32 addr, struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, __be32 mask, struct list_head *head); + +#ifdef CONFIG_AUDIT void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, __be32 addr, __be32 mask); +#else +static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + return; +} +#endif #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -179,11 +189,23 @@ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, const struct in6_addr *mask, struct list_head *head); + +#ifdef CONFIG_AUDIT void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, const struct in6_addr *addr, const struct in6_addr *mask); +#else +static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + return; +} +#endif #endif /* IPV6 */ #endif diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index ee769ecaa13c..0a0ef17b2a40 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -265,7 +265,7 @@ add_failure: static int netlbl_mgmt_listentry(struct sk_buff *skb, struct netlbl_dom_map *entry) { - int ret_val; + int ret_val = 0; struct nlattr *nla_a; struct nlattr *nla_b; struct netlbl_af4list *iter4; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index b9d97effebe3..defeb7a0d502 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -261,6 +261,8 @@ static inline int can_respond(struct sk_buff *skb) return 0; /* we are not the destination */ if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) return 0; + if (ph->pn_res == PN_COMMGR) /* indications */ + return 0; ph = pn_hdr(skb); /* re-acquires the pointer */ pm = pn_msg(skb); @@ -309,7 +311,8 @@ static int send_reset_indications(struct sk_buff *rskb) return pn_raw_send(data, sizeof(data), rskb->dev, pn_object(oph->pn_sdev, 0x00), - pn_object(oph->pn_rdev, oph->pn_robj), 0x10); + pn_object(oph->pn_rdev, oph->pn_robj), + PN_COMMGR); } diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index 21124ec0a73d..bfdade72e066 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -256,6 +256,11 @@ static struct input_handler rfkill_handler = { static int __init rfkill_handler_init(void) { + unsigned long last_run = jiffies - msecs_to_jiffies(500); + rfkill_wlan.last = last_run; + rfkill_bt.last = last_run; + rfkill_uwb.last = last_run; + rfkill_wimax.last = last_run; return input_register_handler(&rfkill_handler); } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 8b06fa900482..03e389e8d945 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -545,9 +545,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) expires = ktime_set(0, 0); expires = ktime_add_ns(expires, PSCHED_US2NS(sched)); if (hrtimer_try_to_cancel(&q->delay_timer) && - ktime_to_ns(ktime_sub(q->delay_timer.expires, - expires)) > 0) - q->delay_timer.expires = expires; + ktime_to_ns(ktime_sub( + hrtimer_get_expires(&q->delay_timer), + expires)) > 0) + hrtimer_set_expires(&q->delay_timer, expires); hrtimer_restart(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; diff --git a/net/sctp/input.c b/net/sctp/input.c index a49fa80b57b9..bf612d954d41 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -369,7 +369,7 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (!t || (t->pathmtu == pmtu)) + if (!t || (t->pathmtu <= pmtu)) return; if (sock_owned_by_user(sk)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d4c3fbc4671e..a6a0ea71ae93 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2544,6 +2544,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_shutdownhdr_t *sdh; sctp_disposition_t disposition; struct sctp_ulpevent *ev; + __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2558,6 +2559,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sdh = (sctp_shutdownhdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + ctsn = ntohl(sdh->cum_tsn_ack); + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ctsn, asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to @@ -2599,6 +2608,51 @@ out: return disposition; } +/* + * sctp_sf_do_9_2_shut_ctsn + * + * Once an endpoint has reached the SHUTDOWN-RECEIVED state, + * it MUST NOT send a SHUTDOWN in response to a ULP request. + * The Cumulative TSN Ack of the received SHUTDOWN chunk + * MUST be processed. + */ +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + sctp_shutdownhdr_t *sdh; + + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + /* Make sure that the SHUTDOWN chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_shutdown_chunk_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + + /* verify, by checking the Cumulative TSN Ack field of the + * chunk, that all its outstanding DATA chunks have been + * received by the SHUTDOWN sender. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, + SCTP_BE32(sdh->cum_tsn_ack)); + + return SCTP_DISPOSITION_CONSUME; +} + /* RFC 2960 9.2 * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index dd4ddc40c0ad..5c8186d88c61 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -266,11 +266,11 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shut_ctsn), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_SHUTDOWN */ diff --git a/net/socket.c b/net/socket.c index 2b7a4b5c9b72..57550c3bcabe 100644 --- a/net/socket.c +++ b/net/socket.c @@ -990,7 +990,6 @@ static int sock_close(struct inode *inode, struct file *filp) printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } - sock_fasync(-1, filp, 0); sock_release(SOCKET_I(inode)); return 0; } diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 436bf1b4b76c..cb216b2df666 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -228,19 +228,21 @@ static int rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; - struct rpc_cred *cred; + struct rpc_cred *cred, *next; unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; - while (!list_empty(&cred_unused)) { - cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { + + /* Enforce a 60 second garbage collection moratorium */ + if (time_in_range(cred->cr_expire, expired, jiffies) && + test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + continue; + list_del_init(&cred->cr_lru); number_cred_unused--; if (atomic_read(&cred->cr_count) != 0) continue; - /* Enforce a 5 second garbage collection moratorium */ - if (time_in_range(cred->cr_expire, expired, jiffies) && - test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) - continue; + cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); if (atomic_read(&cred->cr_count) == 0) { @@ -453,7 +455,7 @@ need_lock: } if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) rpcauth_unhash_cred(cred); - else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { cred->cr_expire = jiffies; list_add_tail(&cred->cr_lru, &cred_unused); number_cred_unused++; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9a288d5eea64..0a50361e3d83 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -249,6 +249,7 @@ struct sock_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); }; /* @@ -698,8 +699,9 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; - case -ECONNREFUSED: case -ECONNRESET: + xs_tcp_shutdown(xprt); + case -ECONNREFUSED: case -ENOTCONN: case -EPIPE: status = -ENOTCONN; @@ -742,6 +744,22 @@ out_release: xprt_release_xprt(xprt, task); } +static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) +{ + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + transport->old_error_report = sk->sk_error_report; +} + +static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) +{ + sk->sk_data_ready = transport->old_data_ready; + sk->sk_state_change = transport->old_state_change; + sk->sk_write_space = transport->old_write_space; + sk->sk_error_report = transport->old_error_report; +} + /** * xs_close - close a socket * @xprt: transport @@ -765,9 +783,8 @@ static void xs_close(struct rpc_xprt *xprt) transport->sock = NULL; sk->sk_user_data = NULL; - sk->sk_data_ready = transport->old_data_ready; - sk->sk_state_change = transport->old_state_change; - sk->sk_write_space = transport->old_write_space; + + xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); sk->sk_no_check = 0; @@ -1180,6 +1197,28 @@ static void xs_tcp_state_change(struct sock *sk) } /** + * xs_tcp_error_report - callback mainly for catching RST events + * @sk: socket + */ +static void xs_tcp_error_report(struct sock *sk) +{ + struct rpc_xprt *xprt; + + read_lock(&sk->sk_callback_lock); + if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) + goto out; + if (!(xprt = xprt_from_sock(sk))) + goto out; + dprintk("RPC: %s client %p...\n" + "RPC: error %d\n", + __func__, xprt, sk->sk_err); + + xprt_force_disconnect(xprt); +out: + read_unlock(&sk->sk_callback_lock); +} + +/** * xs_udp_write_space - callback invoked when socket buffer space * becomes available * @sk: socket whose state has changed @@ -1454,10 +1493,9 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_lock_bh(&sk->sk_callback_lock); + xs_save_old_callbacks(transport, sk); + sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; @@ -1589,13 +1627,13 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_lock_bh(&sk->sk_callback_lock); + xs_save_old_callbacks(transport, sk); + sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; + sk->sk_error_report = xs_tcp_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c647aab8d418..4d3c6071b9a4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -711,28 +711,30 @@ static struct sock *unix_find_other(struct net *net, int type, unsigned hash, int *error) { struct sock *u; - struct nameidata nd; + struct path path; int err = 0; if (sunname->sun_path[0]) { - err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); + struct inode *inode; + err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - err = vfs_permission(&nd, MAY_WRITE); + inode = path.dentry->d_inode; + err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; err = -ECONNREFUSED; - if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode)) + if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, nd.path.dentry->d_inode); + u = unix_find_socket_byinode(net, inode); if (!u) goto put_fail; if (u->sk_type == type) - touch_atime(nd.path.mnt, nd.path.dentry); + touch_atime(path.mnt, path.dentry); - path_put(&nd.path); + path_put(&path); err=-EPROTOTYPE; if (u->sk_type != type) { @@ -753,7 +755,7 @@ static struct sock *unix_find_other(struct net *net, return u; put_fail: - path_put(&nd.path); + path_put(&path); fail: *error=err; return NULL; @@ -2211,7 +2213,7 @@ static int unix_net_init(struct net *net) #endif error = 0; out: - return 0; + return error; } static void unix_net_exit(struct net *net) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 7d82be07fa1d..646c7121dbc0 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -16,7 +16,7 @@ config NL80211 config WIRELESS_OLD_REGULATORY bool "Old wireless static regulatory definitions" - default n + default y ---help--- This option enables the old static regulatory information and uses it within the new framework. This is available @@ -40,11 +40,10 @@ config WIRELESS_OLD_REGULATORY ieee80211_regdom module parameter. This is being phased out and you should stop using them ASAP. - Say N unless you cannot install a new userspace application - or have one currently depending on the ieee80211_regdom module - parameter and cannot port it to use the new userspace interfaces. - - This is scheduled for removal for 2.6.29. + Say Y unless you have installed a new userspace application. + Also say Y if have one currently depending on the ieee80211_regdom + module parameter and cannot port it to use the new userspace + interfaces. config WIRELESS_EXT bool "Wireless extensions" diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 832b47c1de80..058f04f54b90 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } @@ -1251,6 +1251,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, -EINVAL : -EAGAIN); xfrm_state_put(x); } + else if (error == -ESRCH) + error = -EAGAIN; if (!tmpl->optional) goto fail; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4a8a1abb59ee..a278a6f3b991 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1816,7 +1816,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); - memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } |