diff options
Diffstat (limited to 'net')
103 files changed, 381 insertions, 383 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index bd345f3d29f8..b324e31401a9 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -11,7 +11,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/802/fddi.c b/net/802/fddi.c index 94b3ad08f39a..5ab25cd4314b 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -27,7 +27,6 @@ */ #include <linux/module.h> -#include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/802/hippi.c b/net/802/hippi.c index 91aca8780fd0..056794e66375 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -35,7 +35,6 @@ #include <net/arp.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> /* * Create the HIPPI MAC header for an arbitrary protocol layer diff --git a/net/802/tr.c b/net/802/tr.c index 5e20cf8a074b..b9a3a145e348 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -16,7 +16,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/9p/client.c b/net/9p/client.c index 776618cd2be5..b23a17c431c8 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -740,10 +740,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } +again: /* Wait for the response */ err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + goto again; + } + if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; @@ -1420,6 +1428,7 @@ int p9_client_clunk(struct p9_fid *fid) int err; struct p9_client *clnt; struct p9_req_t *req; + int retries = 0; if (!fid) { pr_warn("%s (%d): Trying to clunk with NULL fid\n", @@ -1428,7 +1437,9 @@ int p9_client_clunk(struct p9_fid *fid) return 0; } - p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); +again: + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; clnt = fid->clnt; @@ -1444,8 +1455,14 @@ int p9_client_clunk(struct p9_fid *fid) error: /* * Fid is not valid even after a failed clunk + * If interrupted, retry once then give up and + * leak fid until umount. */ - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) { + if (retries++ == 0) + goto again; + } else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1470,7 +1487,10 @@ int p9_client_remove(struct p9_fid *fid) p9_free_req(clnt, req); error: - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) + p9_client_clunk(fid); + else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); diff --git a/net/atm/clip.c b/net/atm/clip.c index 5de42ea309bc..8ae3a7879335 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -37,7 +37,6 @@ #include <linux/param.h> /* for HZ */ #include <linux/uaccess.h> #include <asm/byteorder.h> /* for htons etc. */ -#include <asm/system.h> /* save/restore_flags */ #include <linux/atomic.h> #include "common.h" diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 3cd0a0dc91cb..0906c194a413 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -33,7 +33,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index 7e7964dd987b..9162409559cf 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -22,7 +22,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index c1cb982f6e86..d0de30e89591 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -24,7 +24,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 8273b1200eee..9bd31e88aeca 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -23,7 +23,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 85816e612dc0..5ea7fd3e2af9 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -24,7 +24,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index c7d81436213d..993c439b4f71 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -25,7 +25,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 60b545e2822a..7d5f24b82cc8 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -24,7 +24,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 9bb776541203..96f4cab3a2f9 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -27,7 +27,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index cf0c47a26530..846ae4e2b115 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -24,7 +24,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 37507d806f65..be8a25e0db65 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -27,7 +27,6 @@ #include <linux/netfilter.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 87fddab22e0f..a65588040b9e 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -32,7 +32,6 @@ #include <linux/spinlock.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index a8eef88d8652..3fbf8f7b2cf4 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -30,7 +30,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c index 277f81bb979a..8b66a41e538f 100644 --- a/net/ax25/ax25_std_subr.c +++ b/net/ax25/ax25_std_subr.c @@ -21,7 +21,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 96e4b9273250..004467c9e6e1 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -25,7 +25,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index c6715ee4ab8f..1997538a5d23 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -26,7 +26,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index db29ea71e80a..c3cffa79bafb 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -29,7 +29,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 4c83137b5954..e3c579ba6325 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -26,7 +26,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 9f9c8dcd8af0..180bfc45810d 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -42,7 +42,6 @@ #include <linux/uaccess.h> #include <net/sock.h> -#include <asm/system.h> #include "bnep.h" diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 1230faaac29b..311668d14571 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -39,7 +39,6 @@ #include <linux/isdn/capilli.h> -#include <asm/system.h> #include "cmtp.h" diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 947172bf1621..5238b6b3ea6a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -37,7 +37,6 @@ #include <linux/interrupt.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <asm/unaligned.h> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 59ec99eb739b..e33af63a884a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -45,7 +45,6 @@ #include <linux/crypto.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <asm/unaligned.h> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index badb7851d116..b37531094c49 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -37,7 +37,6 @@ #include <linux/interrupt.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <asm/unaligned.h> diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 63afd234283e..49142612916e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -42,7 +42,6 @@ #include <linux/ioctl.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <asm/unaligned.h> diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3e450f4a3125..b8e17e4dac8b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -49,7 +49,6 @@ #include <linux/crc16.h> #include <net/sock.h> -#include <asm/system.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 22169c3f1482..a55a43e9f70e 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -45,7 +45,6 @@ #include <linux/security.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <net/bluetooth/bluetooth.h> diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8bf26d1bc5c1..f6ab12907963 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -44,7 +44,6 @@ #include <linux/security.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <net/bluetooth/bluetooth.h> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 761ad9d6cc3b..cc913193d992 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -201,7 +201,9 @@ enum { Opt_ip, Opt_last_string, /* string args above */ + Opt_share, Opt_noshare, + Opt_crc, Opt_nocrc, }; @@ -217,7 +219,9 @@ static match_table_t opt_tokens = { {Opt_key, "key=%s"}, {Opt_ip, "ip=%s"}, /* string args above */ + {Opt_share, "share"}, {Opt_noshare, "noshare"}, + {Opt_crc, "crc"}, {Opt_nocrc, "nocrc"}, {-1, NULL} }; @@ -277,10 +281,11 @@ out: return err; } -int ceph_parse_options(struct ceph_options **popt, char *options, - const char *dev_name, const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private) +struct ceph_options * +ceph_parse_options(char *options, const char *dev_name, + const char *dev_name_end, + int (*parse_extra_token)(char *c, void *private), + void *private) { struct ceph_options *opt; const char *c; @@ -289,7 +294,7 @@ int ceph_parse_options(struct ceph_options **popt, char *options, opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) - return err; + return ERR_PTR(-ENOMEM); opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), GFP_KERNEL); if (!opt->mon_addr) @@ -398,10 +403,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options, opt->mount_timeout = intval; break; + case Opt_share: + opt->flags &= ~CEPH_OPT_NOSHARE; + break; case Opt_noshare: opt->flags |= CEPH_OPT_NOSHARE; break; + case Opt_crc: + opt->flags &= ~CEPH_OPT_NOCRC; + break; case Opt_nocrc: opt->flags |= CEPH_OPT_NOCRC; break; @@ -412,12 +423,11 @@ int ceph_parse_options(struct ceph_options **popt, char *options, } /* success */ - *popt = opt; - return 0; + return opt; out: ceph_destroy_options(opt); - return err; + return ERR_PTR(err); } EXPORT_SYMBOL(ceph_parse_options); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad5b70801f37..f0993af2ae4d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -38,48 +38,54 @@ static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; static struct lock_class_key socket_class; #endif +/* + * When skipping (ignoring) a block of input we read it into a "skip + * buffer," which is this many bytes in size. + */ +#define SKIP_BUF_SIZE 1024 static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); static void ceph_fault(struct ceph_connection *con); /* - * nicely render a sockaddr as a string. + * Nicely render a sockaddr as a string. An array of formatted + * strings is used, to approximate reentrancy. */ -#define MAX_ADDR_STR 20 -#define MAX_ADDR_STR_LEN 60 -static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; -static DEFINE_SPINLOCK(addr_str_lock); -static int last_addr_str; +#define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */ +#define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG) +#define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1) +#define MAX_ADDR_STR_LEN 64 /* 54 is enough */ + +static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN]; +static atomic_t addr_str_seq = ATOMIC_INIT(0); + +static struct page *zero_page; /* used in certain error cases */ const char *ceph_pr_addr(const struct sockaddr_storage *ss) { int i; char *s; - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; - - spin_lock(&addr_str_lock); - i = last_addr_str++; - if (last_addr_str == MAX_ADDR_STR) - last_addr_str = 0; - spin_unlock(&addr_str_lock); + struct sockaddr_in *in4 = (struct sockaddr_in *) ss; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; + + i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; s = addr_str[i]; switch (ss->ss_family) { case AF_INET: - snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, - (unsigned int)ntohs(in4->sin_port)); + snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, + ntohs(in4->sin_port)); break; case AF_INET6: - snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, - (unsigned int)ntohs(in6->sin6_port)); + snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, + ntohs(in6->sin6_port)); break; default: - snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", - (int)ss->ss_family); + snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", + ss->ss_family); } return s; @@ -95,22 +101,43 @@ static void encode_my_addr(struct ceph_messenger *msgr) /* * work queue for all reading and writing to/from the socket. */ -struct workqueue_struct *ceph_msgr_wq; +static struct workqueue_struct *ceph_msgr_wq; + +void _ceph_msgr_exit(void) +{ + if (ceph_msgr_wq) { + destroy_workqueue(ceph_msgr_wq); + ceph_msgr_wq = NULL; + } + + BUG_ON(zero_page == NULL); + kunmap(zero_page); + page_cache_release(zero_page); + zero_page = NULL; +} int ceph_msgr_init(void) { + BUG_ON(zero_page != NULL); + zero_page = ZERO_PAGE(0); + page_cache_get(zero_page); + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); - if (!ceph_msgr_wq) { - pr_err("msgr_init failed to create workqueue\n"); - return -ENOMEM; - } - return 0; + if (ceph_msgr_wq) + return 0; + + pr_err("msgr_init failed to create workqueue\n"); + _ceph_msgr_exit(); + + return -ENOMEM; } EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { - destroy_workqueue(ceph_msgr_wq); + BUG_ON(ceph_msgr_wq == NULL); + + _ceph_msgr_exit(); } EXPORT_SYMBOL(ceph_msgr_exit); @@ -128,8 +155,8 @@ EXPORT_SYMBOL(ceph_msgr_flush); /* data available on socket, or listen socket received a connect */ static void ceph_data_ready(struct sock *sk, int count_unused) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; + if (sk->sk_state != TCP_CLOSE_WAIT) { dout("ceph_data_ready on %p state = %lu, queueing work\n", con, con->state); @@ -140,26 +167,30 @@ static void ceph_data_ready(struct sock *sk, int count_unused) /* socket has buffer space for writing */ static void ceph_write_space(struct sock *sk) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; - /* only queue to workqueue if there is data we want to write. */ + /* only queue to workqueue if there is data we want to write, + * and there is sufficient space in the socket buffer to accept + * more data. clear SOCK_NOSPACE so that ceph_write_space() + * doesn't get called again until try_write() fills the socket + * buffer. See net/ipv4/tcp_input.c:tcp_check_space() + * and net/core/stream.c:sk_stream_write_space(). + */ if (test_bit(WRITE_PENDING, &con->state)) { - dout("ceph_write_space %p queueing write work\n", con); - queue_con(con); + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + dout("ceph_write_space %p queueing write work\n", con); + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + queue_con(con); + } } else { dout("ceph_write_space %p nothing to write\n", con); } - - /* since we have our own write_space, clear the SOCK_NOSPACE flag */ - clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); } /* socket's state has changed */ static void ceph_state_change(struct sock *sk) { - struct ceph_connection *con = - (struct ceph_connection *)sk->sk_user_data; + struct ceph_connection *con = sk->sk_user_data; dout("ceph_state_change %p state = %lu sk_state = %u\n", con, con->state, sk->sk_state); @@ -184,6 +215,8 @@ static void ceph_state_change(struct sock *sk) dout("ceph_state_change TCP_ESTABLISHED\n"); queue_con(con); break; + default: /* Everything else is uninteresting */ + break; } } @@ -194,7 +227,7 @@ static void set_sock_callbacks(struct socket *sock, struct ceph_connection *con) { struct sock *sk = sock->sk; - sk->sk_user_data = (void *)con; + sk->sk_user_data = con; sk->sk_data_ready = ceph_data_ready; sk->sk_write_space = ceph_write_space; sk->sk_state_change = ceph_state_change; @@ -208,7 +241,7 @@ static void set_sock_callbacks(struct socket *sock, /* * initiate connection to a remote socket. */ -static struct socket *ceph_tcp_connect(struct ceph_connection *con) +static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; @@ -218,8 +251,7 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) - return ERR_PTR(ret); - con->sock = sock; + return ret; sock->sk->sk_allocation = GFP_NOFS; #ifdef CONFIG_LOCKDEP @@ -236,19 +268,17 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr.in_addr), sock->sk->sk_state); - ret = 0; - } - if (ret < 0) { + } else if (ret < 0) { pr_err("connect %s error %d\n", ceph_pr_addr(&con->peer_addr.in_addr), ret); sock_release(sock); - con->sock = NULL; con->error_msg = "connect error"; + + return ret; } + con->sock = sock; - if (ret < 0) - return ERR_PTR(ret); - return sock; + return 0; } static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) @@ -284,6 +314,19 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, return r; } +static int ceph_tcp_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int more) +{ + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); + int ret; + + ret = kernel_sendpage(sock, page, offset, size, flags); + if (ret == -EAGAIN) + ret = 0; + + return ret; +} + /* * Shutdown/close the socket for the given connection. @@ -391,22 +434,23 @@ bool ceph_con_opened(struct ceph_connection *con) */ struct ceph_connection *ceph_con_get(struct ceph_connection *con) { - dout("con_get %p nref = %d -> %d\n", con, - atomic_read(&con->nref), atomic_read(&con->nref) + 1); - if (atomic_inc_not_zero(&con->nref)) - return con; - return NULL; + int nref = __atomic_add_unless(&con->nref, 1, 0); + + dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); + + return nref ? con : NULL; } void ceph_con_put(struct ceph_connection *con) { - dout("con_put %p nref = %d -> %d\n", con, - atomic_read(&con->nref), atomic_read(&con->nref) - 1); - BUG_ON(atomic_read(&con->nref) == 0); - if (atomic_dec_and_test(&con->nref)) { + int nref = atomic_dec_return(&con->nref); + + BUG_ON(nref < 0); + if (nref == 0) { BUG_ON(con->sock); kfree(con); } + dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); } /* @@ -442,14 +486,35 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) return ret; } +static void ceph_con_out_kvec_reset(struct ceph_connection *con) +{ + con->out_kvec_left = 0; + con->out_kvec_bytes = 0; + con->out_kvec_cur = &con->out_kvec[0]; +} + +static void ceph_con_out_kvec_add(struct ceph_connection *con, + size_t size, void *data) +{ + int index; + + index = con->out_kvec_left; + BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); + + con->out_kvec[index].iov_len = size; + con->out_kvec[index].iov_base = data; + con->out_kvec_left++; + con->out_kvec_bytes += size; +} /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ -static void prepare_write_message_footer(struct ceph_connection *con, int v) +static void prepare_write_message_footer(struct ceph_connection *con) { struct ceph_msg *m = con->out_msg; + int v = con->out_kvec_left; dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; @@ -467,9 +532,9 @@ static void prepare_write_message_footer(struct ceph_connection *con, int v) static void prepare_write_message(struct ceph_connection *con) { struct ceph_msg *m; - int v = 0; + u32 crc; - con->out_kvec_bytes = 0; + ceph_con_out_kvec_reset(con); con->out_kvec_is_msg = true; con->out_msg_done = false; @@ -477,16 +542,13 @@ static void prepare_write_message(struct ceph_connection *con) * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; - con->out_kvec[v].iov_base = &tag_ack; - con->out_kvec[v++].iov_len = 1; + ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - con->out_kvec[v].iov_base = &con->out_temp_ack; - con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); - con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); + ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); } - m = list_first_entry(&con->out_queue, - struct ceph_msg, list_head); + m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); con->out_msg = m; /* put message on sent list */ @@ -510,30 +572,26 @@ static void prepare_write_message(struct ceph_connection *con) BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); /* tag + hdr + front + middle */ - con->out_kvec[v].iov_base = &tag_msg; - con->out_kvec[v++].iov_len = 1; - con->out_kvec[v].iov_base = &m->hdr; - con->out_kvec[v++].iov_len = sizeof(m->hdr); - con->out_kvec[v++] = m->front; + ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); + ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); + if (m->middle) - con->out_kvec[v++] = m->middle->vec; - con->out_kvec_left = v; - con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + - (m->middle ? m->middle->vec.iov_len : 0); - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_add(con, m->middle->vec.iov_len, + m->middle->vec.iov_base); /* fill in crc (except data pages), footer */ - con->out_msg->hdr.crc = - cpu_to_le32(crc32c(0, (void *)&m->hdr, - sizeof(m->hdr) - sizeof(m->hdr.crc))); + crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); + con->out_msg->hdr.crc = cpu_to_le32(crc); con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; - con->out_msg->footer.front_crc = - cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); - if (m->middle) - con->out_msg->footer.middle_crc = - cpu_to_le32(crc32c(0, m->middle->vec.iov_base, - m->middle->vec.iov_len)); - else + + crc = crc32c(0, m->front.iov_base, m->front.iov_len); + con->out_msg->footer.front_crc = cpu_to_le32(crc); + if (m->middle) { + crc = crc32c(0, m->middle->vec.iov_base, + m->middle->vec.iov_len); + con->out_msg->footer.middle_crc = cpu_to_le32(crc); + } else con->out_msg->footer.middle_crc = 0; con->out_msg->footer.data_crc = 0; dout("prepare_write_message front_crc %u data_crc %u\n", @@ -549,11 +607,11 @@ static void prepare_write_message(struct ceph_connection *con) else con->out_msg_pos.page_pos = 0; con->out_msg_pos.data_pos = 0; - con->out_msg_pos.did_page_crc = 0; + con->out_msg_pos.did_page_crc = false; con->out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ - prepare_write_message_footer(con, v); + prepare_write_message_footer(con); } set_bit(WRITE_PENDING, &con->state); @@ -568,14 +626,14 @@ static void prepare_write_ack(struct ceph_connection *con) con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; - con->out_kvec[0].iov_base = &tag_ack; - con->out_kvec[0].iov_len = 1; + ceph_con_out_kvec_reset(con); + + ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - con->out_kvec[1].iov_base = &con->out_temp_ack; - con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); - con->out_kvec_left = 2; - con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); + con->out_more = 1; /* more will follow.. eventually.. */ set_bit(WRITE_PENDING, &con->state); } @@ -586,11 +644,8 @@ static void prepare_write_ack(struct ceph_connection *con) static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); - con->out_kvec[0].iov_base = &tag_keepalive; - con->out_kvec[0].iov_len = 1; - con->out_kvec_left = 1; - con->out_kvec_bytes = 1; - con->out_kvec_cur = con->out_kvec; + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); set_bit(WRITE_PENDING, &con->state); } @@ -619,12 +674,9 @@ static int prepare_connect_authorizer(struct ceph_connection *con) con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); con->out_connect.authorizer_len = cpu_to_le32(auth_len); - if (auth_len) { - con->out_kvec[con->out_kvec_left].iov_base = auth_buf; - con->out_kvec[con->out_kvec_left].iov_len = auth_len; - con->out_kvec_left++; - con->out_kvec_bytes += auth_len; - } + if (auth_len) + ceph_con_out_kvec_add(con, auth_len, auth_buf); + return 0; } @@ -634,22 +686,18 @@ static int prepare_connect_authorizer(struct ceph_connection *con) static void prepare_write_banner(struct ceph_messenger *msgr, struct ceph_connection *con) { - int len = strlen(CEPH_BANNER); + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); + ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr), + &msgr->my_enc_addr); - con->out_kvec[0].iov_base = CEPH_BANNER; - con->out_kvec[0].iov_len = len; - con->out_kvec[1].iov_base = &msgr->my_enc_addr; - con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); - con->out_kvec_left = 2; - con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); - con->out_kvec_cur = con->out_kvec; con->out_more = 0; set_bit(WRITE_PENDING, &con->state); } static int prepare_write_connect(struct ceph_messenger *msgr, struct ceph_connection *con, - int after_banner) + int include_banner) { unsigned global_seq = get_global_seq(con->msgr, 0); int proto; @@ -678,22 +726,18 @@ static int prepare_write_connect(struct ceph_messenger *msgr, con->out_connect.protocol_version = cpu_to_le32(proto); con->out_connect.flags = 0; - if (!after_banner) { - con->out_kvec_left = 0; - con->out_kvec_bytes = 0; - } - con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; - con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); - con->out_kvec_left++; - con->out_kvec_bytes += sizeof(con->out_connect); - con->out_kvec_cur = con->out_kvec; + if (include_banner) + prepare_write_banner(msgr, con); + else + ceph_con_out_kvec_reset(con); + ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); + con->out_more = 0; set_bit(WRITE_PENDING, &con->state); return prepare_connect_authorizer(con); } - /* * write as much of pending kvecs to the socket as we can. * 1 -> done @@ -714,17 +758,18 @@ static int write_partial_kvec(struct ceph_connection *con) con->out_kvec_bytes -= ret; if (con->out_kvec_bytes == 0) break; /* done */ - while (ret > 0) { - if (ret >= con->out_kvec_cur->iov_len) { - ret -= con->out_kvec_cur->iov_len; - con->out_kvec_cur++; - con->out_kvec_left--; - } else { - con->out_kvec_cur->iov_len -= ret; - con->out_kvec_cur->iov_base += ret; - ret = 0; - break; - } + + /* account for full iov entries consumed */ + while (ret >= con->out_kvec_cur->iov_len) { + BUG_ON(!con->out_kvec_left); + ret -= con->out_kvec_cur->iov_len; + con->out_kvec_cur++; + con->out_kvec_left--; + } + /* and for a partially-consumed entry */ + if (ret) { + con->out_kvec_cur->iov_len -= ret; + con->out_kvec_cur->iov_base += ret; } } con->out_kvec_left = 0; @@ -773,7 +818,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) struct ceph_msg *msg = con->out_msg; unsigned data_len = le32_to_cpu(msg->hdr.data_len); size_t len; - int crc = con->msgr->nocrc; + bool do_datacrc = !con->msgr->nocrc; int ret; int total_max_write; int in_trail = 0; @@ -790,9 +835,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) while (data_len > con->out_msg_pos.data_pos) { struct page *page = NULL; - void *kaddr = NULL; int max_write = PAGE_SIZE; - int page_shift = 0; + int bio_offset = 0; total_max_write = data_len - trail_len - con->out_msg_pos.data_pos; @@ -811,58 +855,47 @@ static int write_partial_msg_pages(struct ceph_connection *con) page = list_first_entry(&msg->trail->head, struct page, lru); - if (crc) - kaddr = kmap(page); max_write = PAGE_SIZE; } else if (msg->pages) { page = msg->pages[con->out_msg_pos.page]; - if (crc) - kaddr = kmap(page); } else if (msg->pagelist) { page = list_first_entry(&msg->pagelist->head, struct page, lru); - if (crc) - kaddr = kmap(page); #ifdef CONFIG_BLOCK } else if (msg->bio) { struct bio_vec *bv; bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); page = bv->bv_page; - page_shift = bv->bv_offset; - if (crc) - kaddr = kmap(page) + page_shift; + bio_offset = bv->bv_offset; max_write = bv->bv_len; #endif } else { - page = con->msgr->zero_page; - if (crc) - kaddr = page_address(con->msgr->zero_page); + page = zero_page; } len = min_t(int, max_write - con->out_msg_pos.page_pos, total_max_write); - if (crc && !con->out_msg_pos.did_page_crc) { - void *base = kaddr + con->out_msg_pos.page_pos; + if (do_datacrc && !con->out_msg_pos.did_page_crc) { + void *base; + u32 crc; u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); + char *kaddr; + kaddr = kmap(page); BUG_ON(kaddr == NULL); - con->out_msg->footer.data_crc = - cpu_to_le32(crc32c(tmpcrc, base, len)); - con->out_msg_pos.did_page_crc = 1; + base = kaddr + con->out_msg_pos.page_pos + bio_offset; + crc = crc32c(tmpcrc, base, len); + con->out_msg->footer.data_crc = cpu_to_le32(crc); + con->out_msg_pos.did_page_crc = true; } - ret = kernel_sendpage(con->sock, page, - con->out_msg_pos.page_pos + page_shift, - len, - MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_MORE); - - if (crc && - (msg->pages || msg->pagelist || msg->bio || in_trail)) + ret = ceph_tcp_sendpage(con->sock, page, + con->out_msg_pos.page_pos + bio_offset, + len, 1); + + if (do_datacrc) kunmap(page); - if (ret == -EAGAIN) - ret = 0; if (ret <= 0) goto out; @@ -871,7 +904,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) if (ret == len) { con->out_msg_pos.page_pos = 0; con->out_msg_pos.page++; - con->out_msg_pos.did_page_crc = 0; + con->out_msg_pos.did_page_crc = false; if (in_trail) list_move_tail(&page->lru, &msg->trail->head); @@ -888,12 +921,10 @@ static int write_partial_msg_pages(struct ceph_connection *con) dout("write_partial_msg_pages %p msg %p done\n", con, msg); /* prepare and queue up footer, too */ - if (!crc) + if (!do_datacrc) con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; - con->out_kvec_bytes = 0; - con->out_kvec_left = 0; - con->out_kvec_cur = con->out_kvec; - prepare_write_message_footer(con, 0); + ceph_con_out_kvec_reset(con); + prepare_write_message_footer(con); ret = 1; out: return ret; @@ -907,12 +938,9 @@ static int write_partial_skip(struct ceph_connection *con) int ret; while (con->out_skip > 0) { - struct kvec iov = { - .iov_base = page_address(con->msgr->zero_page), - .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) - }; + size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); - ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); if (ret <= 0) goto out; con->out_skip -= ret; @@ -1085,8 +1113,8 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, char delim, const char **ipend) { - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; + struct sockaddr_in *in4 = (struct sockaddr_in *) ss; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; memset(ss, 0, sizeof(*ss)); @@ -1512,10 +1540,9 @@ static int read_partial_message_section(struct ceph_connection *con, if (ret <= 0) return ret; section->iov_len += ret; - if (section->iov_len == sec_len) - *crc = crc32c(0, section->iov_base, - section->iov_len); } + if (section->iov_len == sec_len) + *crc = crc32c(0, section->iov_base, section->iov_len); return 1; } @@ -1527,7 +1554,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, static int read_partial_message_pages(struct ceph_connection *con, struct page **pages, - unsigned data_len, int datacrc) + unsigned data_len, bool do_datacrc) { void *p; int ret; @@ -1540,7 +1567,7 @@ static int read_partial_message_pages(struct ceph_connection *con, p = kmap(pages[con->in_msg_pos.page]); ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, left); - if (ret > 0 && datacrc) + if (ret > 0 && do_datacrc) con->in_data_crc = crc32c(con->in_data_crc, p + con->in_msg_pos.page_pos, ret); @@ -1560,7 +1587,7 @@ static int read_partial_message_pages(struct ceph_connection *con, #ifdef CONFIG_BLOCK static int read_partial_message_bio(struct ceph_connection *con, struct bio **bio_iter, int *bio_seg, - unsigned data_len, int datacrc) + unsigned data_len, bool do_datacrc) { struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); void *p; @@ -1576,7 +1603,7 @@ static int read_partial_message_bio(struct ceph_connection *con, ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, left); - if (ret > 0 && datacrc) + if (ret > 0 && do_datacrc) con->in_data_crc = crc32c(con->in_data_crc, p + con->in_msg_pos.page_pos, ret); @@ -1603,9 +1630,10 @@ static int read_partial_message(struct ceph_connection *con) int ret; int to, left; unsigned front_len, middle_len, data_len; - int datacrc = con->msgr->nocrc; + bool do_datacrc = !con->msgr->nocrc; int skip; u64 seq; + u32 crc; dout("read_partial_message con %p msg %p\n", con, m); @@ -1618,17 +1646,16 @@ static int read_partial_message(struct ceph_connection *con) if (ret <= 0) return ret; con->in_base_pos += ret; - if (con->in_base_pos == sizeof(con->in_hdr)) { - u32 crc = crc32c(0, (void *)&con->in_hdr, - sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); - if (crc != le32_to_cpu(con->in_hdr.crc)) { - pr_err("read_partial_message bad hdr " - " crc %u != expected %u\n", - crc, con->in_hdr.crc); - return -EBADMSG; - } - } } + + crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); + if (cpu_to_le32(crc) != con->in_hdr.crc) { + pr_err("read_partial_message bad hdr " + " crc %u != expected %u\n", + crc, con->in_hdr.crc); + return -EBADMSG; + } + front_len = le32_to_cpu(con->in_hdr.front_len); if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; @@ -1714,7 +1741,7 @@ static int read_partial_message(struct ceph_connection *con) while (con->in_msg_pos.data_pos < data_len) { if (m->pages) { ret = read_partial_message_pages(con, m->pages, - data_len, datacrc); + data_len, do_datacrc); if (ret <= 0) return ret; #ifdef CONFIG_BLOCK @@ -1722,7 +1749,7 @@ static int read_partial_message(struct ceph_connection *con) ret = read_partial_message_bio(con, &m->bio_iter, &m->bio_seg, - data_len, datacrc); + data_len, do_datacrc); if (ret <= 0) return ret; #endif @@ -1757,7 +1784,7 @@ static int read_partial_message(struct ceph_connection *con) m, con->in_middle_crc, m->footer.middle_crc); return -EBADMSG; } - if (datacrc && + if (do_datacrc && (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { pr_err("read_partial_message %p data crc %u != exp. %u\n", m, @@ -1819,7 +1846,6 @@ more: /* open the socket first? */ if (con->sock == NULL) { - prepare_write_banner(msgr, con); prepare_write_connect(msgr, con, 1); prepare_read_banner(con); set_bit(CONNECTING, &con->state); @@ -1829,11 +1855,9 @@ more: con->in_tag = CEPH_MSGR_TAG_READY; dout("try_write initiating connect on %p new state %lu\n", con, con->state); - con->sock = ceph_tcp_connect(con); - if (IS_ERR(con->sock)) { - con->sock = NULL; + ret = ceph_tcp_connect(con); + if (ret < 0) { con->error_msg = "connect error"; - ret = -1; goto out; } } @@ -1953,8 +1977,9 @@ more: * * FIXME: there must be a better way to do this! */ - static char buf[1024]; - int skip = min(1024, -con->in_base_pos); + static char buf[SKIP_BUF_SIZE]; + int skip = min((int) sizeof (buf), -con->in_base_pos); + dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); ret = ceph_tcp_recvmsg(con->sock, buf, skip); if (ret <= 0) @@ -2216,15 +2241,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, spin_lock_init(&msgr->global_seq_lock); - /* the zero page is needed if a request is "canceled" while the message - * is being written over the socket */ - msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); - if (!msgr->zero_page) { - kfree(msgr); - return ERR_PTR(-ENOMEM); - } - kmap(msgr->zero_page); - if (myaddr) msgr->inst.addr = *myaddr; @@ -2241,8 +2257,6 @@ EXPORT_SYMBOL(ceph_messenger_create); void ceph_messenger_destroy(struct ceph_messenger *msgr) { dout("destroy %p\n", msgr); - kunmap(msgr->zero_page); - __free_page(msgr->zero_page); kfree(msgr); dout("destroyed messenger %p\n", msgr); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index fd863fe76934..29ad46ec9dcf 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -283,7 +283,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) ceph_decode_32_safe(p, end, yes, bad); #if BITS_PER_LONG == 32 err = -EINVAL; - if (yes > ULONG_MAX / sizeof(struct crush_rule_step)) + if (yes > (ULONG_MAX - sizeof(*r)) + / sizeof(struct crush_rule_step)) goto bad; #endif r = c->rules[i] = kmalloc(sizeof(*r) + diff --git a/net/core/datagram.c b/net/core/datagram.c index d3cf12f62c8f..e4fbfd6e2bd4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -37,7 +37,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/errno.h> diff --git a/net/core/dev.c b/net/core/dev.c index 452db7090d18..5d59155adf2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -73,7 +73,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/cpu.h> diff --git a/net/core/filter.c b/net/core/filter.c index 5dea45279215..cf4989ac503b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -33,7 +33,6 @@ #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/filter.h> diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 43b03dd71e85..d9d198aa9fed 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -14,7 +14,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a63c6efd2ea..90430b776ece 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -38,7 +38,6 @@ #include <linux/pci.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/inet.h> #include <linux/netdevice.h> diff --git a/net/core/scm.c b/net/core/scm.c index ff52ad0a5150..611c5efd4cb0 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -28,7 +28,6 @@ #include <linux/nsproxy.h> #include <linux/slab.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <net/protocol.h> diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6eb656acdfe5..f223cdc75da6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -66,7 +66,6 @@ #include <net/xfrm.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <trace/events/skb.h> #include "kmap_skb.h" @@ -321,12 +320,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, EXPORT_SYMBOL(__netdev_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size) + int size, unsigned int truesize) { skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; - skb->truesize += size; + skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag); diff --git a/net/core/sock.c b/net/core/sock.c index 9be6d0d6c533..b2e14c07d920 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -115,7 +115,6 @@ #include <linux/memcontrol.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/netdevice.h> #include <net/protocol.h> diff --git a/net/core/utils.c b/net/core/utils.c index 386e263f6066..dc3c3faff2f4 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -30,7 +30,6 @@ #include <net/net_ratelimit.h> #include <asm/byteorder.h> -#include <asm/system.h> #include <asm/uaccess.h> int net_msg_warn __read_mostly = 1; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 19acd00a6382..4136987d94da 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -119,7 +119,6 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <net/sock.h> #include <net/tcp_states.h> #include <net/flow.h> -#include <asm/system.h> #include <asm/ioctls.h> #include <linux/capability.h> #include <linux/mm.h> diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 74d321a60e7b..c00e3077988c 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -42,7 +42,6 @@ #include <linux/notifier.h> #include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 73fa268fe2e8..f6544b2c91b0 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -60,7 +60,6 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/termios.h> diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index bd78836a81eb..e446e85e64a6 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -52,7 +52,6 @@ #include <linux/route.h> #include <linux/slab.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/termios.h> diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 7e717cb35ad1..71b5edcee401 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -47,7 +47,6 @@ #include <linux/mutex.h> #include <linux/uaccess.h> -#include <asm/system.h> static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a93af86b8474..bf10a311cf1c 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -59,7 +59,6 @@ #include <net/ip.h> #include <net/dsa.h> #include <asm/uaccess.h> -#include <asm/system.h> __setup("ether=", netdev_boot_setup); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fdf49fd44bb4..10e3751466b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -91,7 +91,6 @@ #include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/inet.h> #include <linux/igmp.h> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 73f46d691abc..18d9b81ecb1a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -113,7 +113,6 @@ #include <net/ax25.h> #include <net/netrom.h> -#include <asm/system.h> #include <linux/uaccess.h> #include <linux/netfilter_arp.h> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d4fad5c77447..6e447ff94dfa 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -27,7 +27,6 @@ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/module.h> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 76e72bacc217..cbe3a68507cf 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -15,7 +15,6 @@ #include <linux/module.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/types.h> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a8c5c1d6715b..5063fa38ac7b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -14,7 +14,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index da9b9cb2282d..bce36f1a37b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -51,7 +51,6 @@ #define VERSION "0.409" #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 9664d353ccd8..2cb2bf845641 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -91,7 +91,6 @@ #include <linux/errno.h> #include <linux/timer.h> #include <linux/init.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <net/checksum.h> #include <net/xfrm.h> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 450e5d21ed2a..5dfecfd7d5e9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -73,7 +73,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f3f1108940f5..26eccc5bab1c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -115,7 +115,6 @@ #define pr_fmt(fmt) "IPv4: " fmt -#include <asm/system.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ff302bde8890..4910176d24ed 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -43,7 +43,6 @@ */ #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0518a4fb177b..960fbfc3e976 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -26,7 +26,6 @@ * */ -#include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> #include <linux/capability.h> diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ab6b36e6da15..50009c787bcd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -20,7 +20,6 @@ * */ -#include <asm/system.h> #include <linux/uaccess.h> #include <linux/types.h> #include <linux/fcntl.h> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12ccf880eb88..4dc1c104c942 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -66,7 +66,6 @@ #include <linux/module.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d6f5feeb3eaf..fe141052a1be 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -79,7 +79,6 @@ #define pr_fmt(fmt) "UDP: " fmt -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/ioctls.h> #include <linux/bootmem.h> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5605f9dca87e..8ed1b930e75f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,7 +60,6 @@ #endif #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/mroute6.h> MODULE_AUTHOR("Cast of dozens"); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index af88934e4d79..27ac95a63429 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -66,7 +66,6 @@ #include <net/inet_common.h> #include <asm/uaccess.h> -#include <asm/system.h> /* * The ICMP socket(s). This is the most convenient way to flow control diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 5aa3981a3922..8110362e0af5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -16,7 +16,6 @@ * */ -#include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> #include <linux/sched.h> diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 24c456e8aa1d..496b62712fe8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2474,8 +2474,12 @@ static int rt6_fill_node(struct net *net, rcu_read_lock(); n = dst_get_neighbour_noref(&rt->dst); - if (n) - NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + if (n) { + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } + } rcu_read_unlock(); if (rt->dst.dev) diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index ba1a3fc39b5c..42cf1390ce9c 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -37,7 +37,6 @@ #include <linux/bitops.h> #include <net/arp.h> -#include <asm/system.h> #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 579617cca125..7ac4d1becbfc 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -40,7 +40,6 @@ #include <linux/moduleparam.h> #include <linux/bitops.h> -#include <asm/system.h> #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index 8b61cf0d8a69..32dcaac70b0c 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -36,7 +36,6 @@ #include <linux/bitops.h> #include <linux/slab.h> -#include <asm/system.h> #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/irda/timer.c b/net/irda/timer.c index f418cb2ad49c..1d552b3946fc 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <asm/system.h> #include <linux/delay.h> #include <net/irda/timer.h> diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 8d0324bac01c..ab3d35f23257 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -32,7 +32,6 @@ #include <linux/slab.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c index 2ec1af5c36cc..f4e3c1accab7 100644 --- a/net/lapb/lapb_in.c +++ b/net/lapb/lapb_in.c @@ -30,7 +30,6 @@ #include <linux/slab.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c index c75a79540f9f..baab2760f651 100644 --- a/net/lapb/lapb_out.c +++ b/net/lapb/lapb_out.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c index 43a2a7fb327b..066225b4e824 100644 --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c @@ -27,7 +27,6 @@ #include <linux/slab.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index af6d14b44e2e..f8cd641dfc82 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -28,7 +28,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index fe6cb4304d72..52856178c9d7 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -31,7 +31,6 @@ #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> -#include <asm/system.h> #include <linux/stat.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 85312939695f..f843a8833250 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -25,7 +25,6 @@ #include <net/protocol.h> #include <net/tcp.h> #include <net/udp.h> -#include <asm/system.h> #include <linux/stat.h> #include <linux/proc_fs.h> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7b48035826ee..cbdb754dbb10 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -768,8 +768,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, - unsigned int dataoff, u32 hash, - unsigned int *timeouts) + unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_help *help; @@ -777,6 +776,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + struct nf_conn_timeout *timeout_ext; + unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); @@ -788,12 +789,21 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; + timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; } + if (timeout_ext) + nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); @@ -854,8 +864,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, int *set_reply, - enum ip_conntrack_info *ctinfo, - unsigned int *timeouts) + enum ip_conntrack_info *ctinfo) { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; @@ -875,7 +884,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, - skb, dataoff, hash, timeouts); + skb, dataoff, hash); if (!h) return NULL; if (IS_ERR(h)) @@ -964,19 +973,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } - /* Decide what timeout policy we want to apply to this flow. */ - if (tmpl) { - timeout_ext = nf_ct_timeout_find(tmpl); - if (timeout_ext) - timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); - else - timeouts = l4proto->get_timeouts(net); - } else - timeouts = l4proto->get_timeouts(net); - ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo, - timeouts); + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); @@ -993,6 +991,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); + /* Decide what timeout policy we want to apply to this flow. */ + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) + timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); + else + timeouts = l4proto->get_timeouts(net); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5701c8dd783c..be3da2c8cdc5 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -127,6 +127,27 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); +struct nf_conntrack_l4proto * +nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) +{ + struct nf_conntrack_l4proto *p; + + rcu_read_lock(); + p = __nf_ct_l4proto_find(l3num, l4num); + if (!try_module_get(p->me)) + p = &nf_conntrack_l4proto_generic; + rcu_read_unlock(); + + return p; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); + +void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) +{ + module_put(p->me); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); + static int kill_l3proto(struct nf_conn *i, void *data) { return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4d70785b953d..e6ddde165612 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -23,7 +23,6 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <net/sock.h> #include <net/netlink.h> #include <linux/init.h> diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index fec29a43de4d..2b9e79f5ef05 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -98,11 +98,13 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - l4proto = __nf_ct_l4proto_find(l3num, l4num); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); /* This protocol is not supportted, skip. */ - if (l4proto->l4proto != l4num) - return -EOPNOTSUPP; + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err_proto_put; + } if (matching) { if (nlh->nlmsg_flags & NLM_F_REPLACE) { @@ -110,20 +112,25 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, * different kind, sorry. */ if (matching->l3num != l3num || - matching->l4num != l4num) - return -EINVAL; + matching->l4proto->l4proto != l4num) { + ret = -EINVAL; + goto err_proto_put; + } ret = ctnl_timeout_parse_policy(matching, l4proto, cda[CTA_TIMEOUT_DATA]); return ret; } - return -EBUSY; + ret = -EBUSY; + goto err_proto_put; } timeout = kzalloc(sizeof(struct ctnl_timeout) + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); - if (timeout == NULL) - return -ENOMEM; + if (timeout == NULL) { + ret = -ENOMEM; + goto err_proto_put; + } ret = ctnl_timeout_parse_policy(timeout, l4proto, cda[CTA_TIMEOUT_DATA]); @@ -132,13 +139,15 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->l3num = l3num; - timeout->l4num = l4num; + timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &cttimeout_list); return 0; err: kfree(timeout); +err_proto_put: + nf_ct_l4proto_put(l4proto); return ret; } @@ -149,7 +158,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; unsigned int flags = pid ? NLM_F_MULTI : 0; - struct nf_conntrack_l4proto *l4proto; + struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); @@ -163,20 +172,10 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name); NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)); - NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4num); + NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto); NLA_PUT_BE32(skb, CTA_TIMEOUT_USE, htonl(atomic_read(&timeout->refcnt))); - l4proto = __nf_ct_l4proto_find(timeout->l3num, timeout->l4num); - - /* If the timeout object does not match the layer 4 protocol tracker, - * then skip dumping the data part since we don't know how to - * interpret it. This may happen for UPDlite, SCTP and DCCP since - * you can unload the module. - */ - if (timeout->l4num != l4proto->l4proto) - goto out; - if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { struct nlattr *nest_parms; int ret; @@ -192,7 +191,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, nla_nest_end(skb, nest_parms); } -out: + nlmsg_end(skb, nlh); return skb->len; @@ -293,6 +292,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) if (atomic_dec_and_test(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); + nf_ct_l4proto_put(timeout->l4proto); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -417,6 +417,7 @@ static void __exit cttimeout_exit(void) /* We are sure that our objects have no clients at this point, * it's safe to release them all without checking refcnt. */ + nf_ct_l4proto_put(cur->l4proto); kfree_rcu(cur, rcu_head); } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b873445df444..0c8e43810ce3 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -14,8 +14,10 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_CT.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -217,50 +219,59 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; + rcu_read_lock(); timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (timeout_find_get) { const struct ipt_entry *e = par->entryinfo; + struct nf_conntrack_l4proto *l4proto; if (e->ip.invflags & IPT_INV_PROTO) { ret = -EINVAL; pr_info("You cannot use inversion on " "L4 protocol\n"); - goto err3; + goto err4; } timeout = timeout_find_get(info->timeout); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", info->timeout); - goto err3; + goto err4; } if (timeout->l3num != par->family) { ret = -EINVAL; pr_info("Timeout policy `%s' can only be " "used by L3 protocol number %d\n", info->timeout, timeout->l3num); - goto err3; + goto err4; } - if (timeout->l4num != e->ip.proto) { + /* Make sure the timeout policy matches any existing + * protocol tracker, otherwise default to generic. + */ + l4proto = __nf_ct_l4proto_find(par->family, + e->ip.proto); + if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; pr_info("Timeout policy `%s' can only be " "used by L4 protocol number %d\n", - info->timeout, timeout->l4num); - goto err3; + info->timeout, + timeout->l4proto->l4proto); + goto err4; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_KERNEL); if (timeout_ext == NULL) { ret = -ENOMEM; - goto err3; + goto err4; } } else { ret = -ENOENT; pr_info("Timeout policy base is empty\n"); - goto err3; + goto err4; } + rcu_read_unlock(); } #endif @@ -270,6 +281,8 @@ out: info->ct = ct; return 0; +err4: + rcu_read_unlock(); err3: nf_conntrack_free(ct); err2: @@ -311,6 +324,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) nf_ct_l3proto_module_put(par->family); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT + rcu_read_lock(); timeout_put = rcu_dereference(nf_ct_timeout_put_hook); if (timeout_put) { @@ -318,6 +332,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) if (timeout_ext) timeout_put(timeout_ext->timeout); } + rcu_read_unlock(); #endif } nf_ct_put(info->ct); diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index f99f8dee238b..ff5f75fddb15 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -480,7 +480,7 @@ ipt_log_packet(u_int8_t pf, sb_close(m); } -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) /* One level of recursion won't kill us */ static void dump_ipv6_packet(struct sbuff *m, const struct nf_loginfo *info, @@ -824,7 +824,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) if (par->family == NFPROTO_IPV4) ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); @@ -864,7 +864,7 @@ static struct xt_target log_tg_regs[] __read_mostly = { .checkentry = log_tg_check, .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "LOG", .family = NFPROTO_IPV6, @@ -882,7 +882,7 @@ static struct nf_logger ipt_log_logger __read_mostly = { .me = THIS_MODULE, }; -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static struct nf_logger ip6t_log_logger __read_mostly = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, @@ -899,7 +899,7 @@ static int __init log_tg_init(void) return ret; nf_log_register(NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); #endif return 0; @@ -908,7 +908,7 @@ static int __init log_tg_init(void) static void __exit log_tg_exit(void) { nf_log_unregister(&ipt_log_logger); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_unregister(&ip6t_log_logger); #endif xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7dab229bfbcc..06592d8b4a2b 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -31,7 +31,6 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 64e6dde9749d..1c51d7a58f0b 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -21,7 +21,6 @@ #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/slab.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 6d4ef6d65b3d..c3073a2ef634 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -24,7 +24,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 607fddb4fdbb..0b4bcb2bf38f 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -23,7 +23,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 2cf330162d7e..70ffff76a967 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -26,7 +26,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 6a947ae50dbd..ca40e2298f5a 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -23,7 +23,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 1cb98e88f5e1..ff2c1b142f57 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -24,7 +24,6 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2c030505b335..e44e631ea952 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -38,7 +38,6 @@ #include <linux/udp.h> #include <linux/ethtool.h> #include <linux/wait.h> -#include <asm/system.h> #include <asm/div64.h> #include <linux/highmem.h> #include <linux/netfilter_bridge.h> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ae2d484416dd..4f2c0df79563 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -73,7 +73,6 @@ #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/ioctls.h> #include <asm/page.h> diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index f9ea925ad9cb..c4719ce604c2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -34,7 +34,6 @@ #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 178ff4f73c85..1ab8689726ec 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -21,7 +21,6 @@ #include <linux/if_ether.h> #include <linux/slab.h> -#include <asm/system.h> #include <asm/io.h> #include <linux/inet.h> diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 7f7fcb46b4fa..79c4abcfa6b4 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -26,7 +26,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index 7a02bd1cc5a0..bc5514211b0c 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -22,7 +22,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c index 4ebf33afbe47..9ad98b524646 100644 --- a/net/rose/rose_out.c +++ b/net/rose/rose_out.c @@ -21,7 +21,6 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index cd9b7ee60f3e..40148932c8a4 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -25,7 +25,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index f6c71caa94b9..47f1fdb346b0 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -22,7 +22,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b6c8f38cc26c..bc5469d6d9cb 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -23,7 +23,6 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/system.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7a4cb5fdc212..67972462a543 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -17,7 +17,6 @@ * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> */ -#include <asm/system.h> #include <linux/module.h> #include <linux/types.h> |