From 64b0812b6d26bb30cac74c65f51f4ebfb4ec5429 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Oct 2007 11:50:38 +0900 Subject: SCTP : Fix bad formatted comment in outqueue.c Just fix the bad format of the comment in outqueue.c. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/outqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 28f4fe77ceee..e315c6c756ca 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -641,7 +641,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* If we are here due to a retransmit timeout or a fast * retransmit and if there are any chunks left in the retransmit - * queue that could not fit in the PMTU sized packet, they need * to be marked as ineligible for a subsequent fast retransmit. + * queue that could not fit in the PMTU sized packet, they need + * to be marked as ineligible for a subsequent fast retransmit. */ if (rtx_timeout && !lchunk) { list_for_each(lchunk1, lqueue) { -- cgit v1.2.3 From f3830ccc2ea503ab37d605f6c313d61423ddd94e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 15 Oct 2007 11:51:03 +0900 Subject: SCTP : Fix to process bundled ASCONF chunk correctly If ASCONF chunk is bundled with other chunks as the first chunk, when process the ASCONF parameters, full packet data will be process as the parameters of the ASCONF chunk, not only the real parameters. So if you send a ASCONF chunk bundled with other chunks, you will get an unexpect result. This problem also exists when ASCONF-ACK chunk is bundled with other chunks. This patch fix this problem. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index c377e4e8f653..5de4729fe993 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2848,10 +2848,11 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, __be16 err_code; int length = 0; - int chunk_len = asconf->skb->len; + int chunk_len; __u32 serial; int all_param_pass = 1; + chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); hdr = (sctp_addiphdr_t *)asconf->skb->data; serial = ntohl(hdr->serial); @@ -2990,7 +2991,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, sctp_addip_param_t *asconf_ack_param; sctp_errhdr_t *err_param; int length; - int asconf_ack_len = asconf_ack->skb->len; + int asconf_ack_len; __be16 err_code; if (no_err) @@ -2998,6 +2999,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, else err_code = SCTP_ERROR_REQ_REFUSED; + asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) - + sizeof(sctp_chunkhdr_t); + /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. */ -- cgit v1.2.3 From b6157d8e03e1e780660a328f7183bcbfa4a93a19 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 24 Oct 2007 15:59:16 -0400 Subject: SCTP: Fix difference cases of retransmit. Commit d0ce92910bc04e107b2f3f2048f07e94f570035d broke several retransmit cases including fast retransmit. The reason is that we should only delay by rto while doing retranmists as a result of a timeout. Retransmit as a result of path mtu discover, fast retransmit, or other evernts that should trigger immidiate retransmissions got broken. Also, since rto is doubled prior to marking of packets elegable for retransmission, we never marked correct chunks anyway. The fix is provide a reason for a given retransmission so that we can mark chunks appropriately and to save the old rto value to do comparisons against. All regressions tests passed with this code. Spotted by Wei Yongjun Signed-off-by: Vlad Yasevich --- include/net/sctp/command.h | 1 + include/net/sctp/constants.h | 1 + include/net/sctp/sctp.h | 1 + include/net/sctp/structs.h | 5 +++-- net/sctp/outqueue.c | 33 +++++++++++++++++---------------- net/sctp/sm_sideeffect.c | 10 +++++++++- net/sctp/sm_statefuns.c | 2 +- net/sctp/transport.c | 5 +++-- 8 files changed, 36 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index b8733364557f..c1f797673571 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -103,6 +103,7 @@ typedef enum { SCTP_CMD_ASSOC_CHANGE, /* generate and send assoc_change event */ SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */ SCTP_CMD_ASSOC_SHKEY, /* generate the association shared keys */ + SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index da8354e8e33c..73fbdf6a24f8 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -407,6 +407,7 @@ typedef enum { SCTP_RTXR_T3_RTX, SCTP_RTXR_FAST_RTX, SCTP_RTXR_PMTUD, + SCTP_RTXR_T1_RTX, } sctp_retransmit_reason_t; /* Reasons to lower cwnd. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 93eb708609e7..70827305f501 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -267,6 +267,7 @@ enum SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, SCTP_MIB_DELAY_SACK_EXPIREDS, SCTP_MIB_AUTOCLOSE_EXPIREDS, + SCTP_MIB_T1_RETRANSMITS, SCTP_MIB_T3_RETRANSMITS, SCTP_MIB_PMTUD_RETRANSMITS, SCTP_MIB_FAST_RETRANSMITS, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ef892e00c833..482c2aab3d67 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -873,10 +873,11 @@ struct sctp_transport { * address list derived from the INIT or INIT ACK chunk, a * number of data elements needs to be maintained including: */ - __u32 rtt; /* This is the most recent RTT. */ - /* RTO : The current retransmission timeout value. */ unsigned long rto; + unsigned long last_rto; + + __u32 rtt; /* This is the most recent RTT. */ /* RTTVAR : The current RTT variation. */ __u32 rttvar; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e315c6c756ca..99a3db5d5fae 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -382,7 +382,7 @@ static void sctp_insert_list(struct list_head *head, struct list_head *new) /* Mark all the eligible packets on a transport for retransmission. */ void sctp_retransmit_mark(struct sctp_outq *q, struct sctp_transport *transport, - __u8 fast_retransmit) + __u8 reason) { struct list_head *lchunk, *ltemp; struct sctp_chunk *chunk; @@ -412,20 +412,20 @@ void sctp_retransmit_mark(struct sctp_outq *q, continue; } - /* If we are doing retransmission due to a fast retransmit, - * only the chunk's that are marked for fast retransmit - * should be added to the retransmit queue. If we are doing - * retransmission due to a timeout or pmtu discovery, only the - * chunks that are not yet acked should be added to the - * retransmit queue. + /* If we are doing retransmission due to a timeout or pmtu + * discovery, only the chunks that are not yet acked should + * be added to the retransmit queue. */ - if ((fast_retransmit && (chunk->fast_retransmit > 0)) || - (!fast_retransmit && !chunk->tsn_gap_acked)) { + if ((reason == SCTP_RTXR_FAST_RTX && + (chunk->fast_retransmit > 0)) || + (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* If this chunk was sent less then 1 rto ago, do not * retransmit this chunk, but give the peer time - * to acknowlege it. + * to acknowlege it. Do this only when + * retransmitting due to T3 timeout. */ - if ((jiffies - chunk->sent_at) < transport->rto) + if (reason == SCTP_RTXR_T3_RTX && + (jiffies - chunk->sent_at) < transport->last_rto) continue; /* RFC 2960 6.2.1 Processing a Received SACK @@ -467,10 +467,10 @@ void sctp_retransmit_mark(struct sctp_outq *q, } } - SCTP_DEBUG_PRINTK("%s: transport: %p, fast_retransmit: %d, " + SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, " "cwnd: %d, ssthresh: %d, flight_size: %d, " "pba: %d\n", __FUNCTION__, - transport, fast_retransmit, + transport, reason, transport->cwnd, transport->ssthresh, transport->flight_size, transport->partial_bytes_acked); @@ -484,7 +484,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { int error = 0; - __u8 fast_retransmit = 0; switch(reason) { case SCTP_RTXR_T3_RTX: @@ -499,16 +498,18 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); - fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); break; + case SCTP_RTXR_T1_RTX: + SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + break; default: BUG(); } - sctp_retransmit_mark(q, transport, fast_retransmit); + sctp_retransmit_mark(q, transport, reason); /* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bbdc938da86f..78d1a8a49bd0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -453,6 +453,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * maximum value discussed in rule C7 above (RTO.max) may be * used to provide an upper bound to this doubling operation. */ + transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } @@ -1267,6 +1268,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_ootb_pkt_free(packet); break; + case SCTP_CMD_T1_RETRAN: + /* Mark a transport for retransmission. */ + sctp_retransmit(&asoc->outqueue, cmd->obj.transport, + SCTP_RTXR_T1_RTX); + break; + case SCTP_CMD_RETRAN: /* Mark a transport for retransmission. */ sctp_retransmit(&asoc->outqueue, cmd->obj.transport, @@ -1393,7 +1400,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - sctp_retransmit_mark(&asoc->outqueue, t, 0); + sctp_retransmit_mark(&asoc->outqueue, t, + SCTP_RTXR_T1_RTX); } sctp_add_cmd_sf(commands, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f01b408508ff..a66075a70f29 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2305,7 +2305,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, /* If we've sent any data bundled with COOKIE-ECHO we will need to * resend */ - sctp_add_cmd_sf(commands, SCTP_CMD_RETRAN, + sctp_add_cmd_sf(commands, SCTP_CMD_T1_RETRAN, SCTP_TRANSPORT(asoc->peer.primary_path)); /* Cast away the const modifier, as we want to just diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5f467c914f80..d55ce83a020b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -74,8 +74,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ + peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rtt = 0; - peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; @@ -385,6 +385,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) tp->rto = tp->asoc->rto_max; tp->rtt = rtt; + tp->last_rto = tp->rto; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. @@ -578,7 +579,7 @@ void sctp_transport_reset(struct sctp_transport *t) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->ssthresh = asoc->peer.i.a_rwnd; - t->rto = asoc->rto_initial; + t->last_rto = t->rto = asoc->rto_initial; t->rtt = 0; t->srtt = 0; t->rttvar = 0; -- cgit v1.2.3 From 0ed90fb0f668fd07f14ae2007a809e8b26cd27a6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 24 Oct 2007 16:10:00 -0400 Subject: SCTP: Update RCU handling during the ADD-IP case After learning more about rcu, it looks like the ADD-IP hadling doesn't need to call call_rcu_bh. All the rcu critical sections use rcu_read_lock, so using call_rcu_bh is wrong here. Now, restore the local_bh_disable() code blocks and use normal call_rcu() calls. Also restore the missing return statement. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 4 +--- net/sctp/bind_addr.c | 13 +++---------- net/sctp/sm_make_chunk.c | 6 +++++- net/sctp/socket.c | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 482c2aab3d67..a17701740624 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1185,9 +1185,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, int flags); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, __u8 use_as_src, gfp_t gfp); -int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - void fastcall (*rcu_call)(struct rcu_head *, - void (*func)(struct rcu_head *))); +int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index dfffa94fb9f6..cae95af9a8cc 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -180,9 +180,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, /* Delete an address from the bind address list in the SCTP_bind_addr * structure. */ -int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, - void fastcall (*rcu_call)(struct rcu_head *head, - void (*func)(struct rcu_head *head))) +int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) { struct sctp_sockaddr_entry *addr, *temp; @@ -198,15 +196,10 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, } } - /* Call the rcu callback provided in the args. This function is - * called by both BH packet processing and user side socket option - * processing, but it works on different lists in those 2 contexts. - * Each context provides it's own callback, whether call_rcu_bh() - * or call_rcu(), to make sure that we wait for an appropriate time. - */ if (addr && !addr->valid) { - rcu_call(&addr->rcu, sctp_local_addr_free); + call_rcu(&addr->rcu, sctp_local_addr_free); SCTP_DBG_OBJCNT_DEC(addr); + return 0; } return -EINVAL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5de4729fe993..c60564dd169d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2953,13 +2953,17 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, /* This is always done in BH context with a socket lock * held, so the list can not change. */ + local_bh_disable(); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) saddr->use_as_src = 1; } + local_bh_enable(); break; case SCTP_PARAM_DEL_IP: - retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh); + local_bh_disable(); + retval = sctp_del_bind_addr(bp, &addr); + local_bh_enable(); list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a7ecf3159e53..6ce9b490fad5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -660,7 +660,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) * socket routing and failover schemes. Refer to comments in * sctp_do_bind(). -daisy */ - retval = sctp_del_bind_addr(bp, sa_addr, call_rcu); + retval = sctp_del_bind_addr(bp, sa_addr); addr_buf += af->sockaddr_len; err_bindx_rem: -- cgit v1.2.3 From 88799fe5ec65fad1d5cb1d4dc5d8f78edb949f1c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 24 Oct 2007 17:24:23 -0400 Subject: SCTP: Correctly disable ADD-IP when AUTH is not supported. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 1 - net/sctp/associola.c | 2 +- net/sctp/sm_make_chunk.c | 5 +++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a17701740624..41f1039186dd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1540,7 +1540,6 @@ struct sctp_association { __u8 asconf_capable; /* Does peer support ADDIP? */ __u8 prsctp_capable; /* Can peer do PR-SCTP? */ __u8 auth_capable; /* Is peer doing SCTP-AUTH? */ - __u8 addip_capable; /* Can peer do ADD-IP */ __u32 adaptation_ind; /* Adaptation Code point. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 03158e3665da..eaad5c5535a8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -265,7 +265,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Assume that the peer recongizes ASCONF until reported otherwise * via an ERROR chunk. */ - asoc->peer.asconf_capable = 1; + asoc->peer.asconf_capable = 0; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index c60564dd169d..2ff3a3df049d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1847,7 +1847,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - asoc->peer.addip_capable = 1; + asoc->peer.asconf_capable = 1; break; default: break; @@ -2138,10 +2138,11 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, /* If the peer claims support for ADD-IP without support * for AUTH, disable support for ADD-IP. */ - if (asoc->peer.addip_capable && !asoc->peer.auth_capable) { + if (asoc->peer.asconf_capable && !asoc->peer.auth_capable) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | SCTP_PARAM_SET_PRIMARY); + asoc->peer.asconf_capable = 0; } /* Walk list of transports, removing transports in the UNKNOWN state. */ -- cgit v1.2.3 From 73d9c4fd1a6ec4950b2eac8135d35506bf400d6c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 24 Oct 2007 17:24:26 -0400 Subject: SCTP: Allow ADD_IP to work with AUTH for backward compatibility. This patch adds a tunable that will allow ADD_IP to work without AUTH for backward compatibility. The default value is off since the default value for ADD_IP is off as well. People who need to use ADD-IP with older implementations take risks of connection hijacking and should consider upgrading or turning this tunable on. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 2 ++ net/sctp/associola.c | 8 ++++++-- net/sctp/protocol.c | 1 + net/sctp/sm_make_chunk.c | 4 +++- net/sctp/sysctl.c | 9 +++++++++ 5 files changed, 21 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 41f1039186dd..44f2672859e2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -212,6 +212,7 @@ extern struct sctp_globals { /* Flag to indicate if addip is enabled. */ int addip_enable; + int addip_noauth_enable; /* Flag to indicate if PR-SCTP is enabled. */ int prsctp_enable; @@ -249,6 +250,7 @@ extern struct sctp_globals { #define sctp_local_addr_list (sctp_globals.local_addr_list) #define sctp_local_addr_lock (sctp_globals.addr_list_lock) #define sctp_addip_enable (sctp_globals.addip_enable) +#define sctp_addip_noauth (sctp_globals.addip_noauth_enable) #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index eaad5c5535a8..013e3d3ab0f1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -262,10 +262,14 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->peer.sack_needed = 1; - /* Assume that the peer recongizes ASCONF until reported otherwise - * via an ERROR chunk. + /* Assume that the peer will tell us if he recognizes ASCONF + * as part of INIT exchange. + * The sctp_addip_noauth option is there for backward compatibilty + * and will revert old behavior. */ asoc->peer.asconf_capable = 0; + if (sctp_addip_noauth) + asoc->peer.asconf_capable = 1; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 40c1a47d1b8d..ecfab0344e73 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1179,6 +1179,7 @@ SCTP_STATIC __init int sctp_init(void) /* Disable ADDIP by default. */ sctp_addip_enable = 0; + sctp_addip_noauth = 0; /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2ff3a3df049d..43e8de1228f9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2137,8 +2137,10 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, /* If the peer claims support for ADD-IP without support * for AUTH, disable support for ADD-IP. + * Do this only if backward compatible mode is turned off. */ - if (asoc->peer.asconf_capable && !asoc->peer.auth_capable) { + if (!sctp_addip_noauth && + (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | SCTP_PARAM_SET_PRIMARY); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 0669778e4335..da4f15734fb1 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -263,6 +263,15 @@ static ctl_table sctp_table[] = { .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "addip_noauth_enable", + .data = &sctp_addip_noauth, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec + }, { .ctl_name = 0 } }; -- cgit v1.2.3 From 027f6e1ad32de32f9fe1c61d0f744e329e8acfd9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 7 Nov 2007 11:39:27 -0500 Subject: SCTP: Fix a potential race between timers and receive path. There is a possible race condition where the timer code will free the association and the next packet in the queue will also attempt to free the same association. The example is, when we receive an ABORT at about the same time as the retransmission timer fires. If the timer wins the race, it will free the association. Once it releases the lock, the queue processing will recieve the ABORT and will try to free the association again. Signed-off-by: Vlad Yasevich --- net/sctp/inqueue.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index f10fe7fbf24c..cf4b7eb023b3 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -90,6 +90,10 @@ void sctp_inq_free(struct sctp_inq *queue) void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ + if (chunk->rcvr->dead) { + sctp_chunk_free(chunk); + return; + } /* We are now calling this either from the soft interrupt * or from the backlog processing. -- cgit v1.2.3 From 123ed979eaa8de0dd2422862d247469eda0bd645 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 9 Nov 2007 11:41:36 -0500 Subject: SCTP: Use hashed lookup when looking for an association. A SCTP endpoint may have a lot of associations on them and walking the list is fairly inefficient. Instead, use a hashed lookup, and filter out the hash list based on the endopoing we already have. Signed-off-by: Vlad Yasevich --- net/sctp/endpointola.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 2d2d81ef4a69..68f0556efcc6 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -328,24 +328,34 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( const union sctp_addr *paddr, struct sctp_transport **transport) { + struct sctp_association *asoc = NULL; + struct sctp_transport *t = NULL; + struct sctp_hashbucket *head; + struct sctp_ep_common *epb; + int hash; int rport; - struct sctp_association *asoc; - struct list_head *pos; + *transport = NULL; rport = ntohs(paddr->v4.sin_port); - list_for_each(pos, &ep->asocs) { - asoc = list_entry(pos, struct sctp_association, asocs); - if (rport == asoc->peer.port) { - *transport = sctp_assoc_lookup_paddr(asoc, paddr); - - if (*transport) - return asoc; + hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); + head = &sctp_assoc_hashtable[hash]; + read_lock(&head->lock); + for (epb = head->chain; epb; epb = epb->next) { + asoc = sctp_assoc(epb); + if (asoc->ep != ep || rport != asoc->peer.port) + goto next; + + t = sctp_assoc_lookup_paddr(asoc, paddr); + if (t) { + *transport = t; + break; } +next: + asoc = NULL; } - - *transport = NULL; - return NULL; + read_unlock(&head->lock); + return asoc; } /* Lookup association on an endpoint based on a peer address. BH-safe. */ -- cgit v1.2.3 From d970dbf8455eb1b8cebd3cde6e18f73dd1b3ce38 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 9 Nov 2007 11:43:40 -0500 Subject: SCTP: Convert custom hash lists to use hlist. Convert the custom hash list traversals to use hlist functions. Signed-off-by: Vlad Yasevich --- include/net/sctp/sctp.h | 3 +++ include/net/sctp/structs.h | 10 ++++------ net/sctp/endpointola.c | 3 ++- net/sctp/input.c | 43 +++++++++++-------------------------------- net/sctp/proc.c | 6 ++++-- net/sctp/protocol.c | 6 +++--- net/sctp/socket.c | 14 +++++--------- 7 files changed, 32 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 70827305f501..67c997cecf58 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -665,6 +665,9 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) return (h & (sctp_assoc_hashsize-1)); } +#define sctp_for_each_hentry(epb, node, head) \ + hlist_for_each_entry(epb, node, head, node) + /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 44f2672859e2..eb3113c38a94 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -100,20 +100,19 @@ struct crypto_hash; struct sctp_bind_bucket { unsigned short port; unsigned short fastreuse; - struct sctp_bind_bucket *next; - struct sctp_bind_bucket **pprev; + struct hlist_node node; struct hlist_head owner; }; struct sctp_bind_hashbucket { spinlock_t lock; - struct sctp_bind_bucket *chain; + struct hlist_head chain; }; /* Used for hashing all associations. */ struct sctp_hashbucket { rwlock_t lock; - struct sctp_ep_common *chain; + struct hlist_head chain; } __attribute__((__aligned__(8))); @@ -1230,8 +1229,7 @@ typedef enum { struct sctp_ep_common { /* Fields to help us manage our entries in the hash tables. */ - struct sctp_ep_common *next; - struct sctp_ep_common **pprev; + struct hlist_node node; int hashent; /* Runtime type information. What kind of endpoint is this? */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 68f0556efcc6..de6f505d6ff8 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -332,6 +332,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport *t = NULL; struct sctp_hashbucket *head; struct sctp_ep_common *epb; + struct hlist_node *node; int hash; int rport; @@ -341,7 +342,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { + sctp_for_each_hentry(epb, node, &head->chain) { asoc = sctp_assoc(epb); if (asoc->ep != ep || rport != asoc->peer.port) goto next; diff --git a/net/sctp/input.c b/net/sctp/input.c index 86503e7fa21e..91ae463b079b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -656,7 +656,6 @@ discard: /* Insert endpoint into the hash table. */ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) { - struct sctp_ep_common **epp; struct sctp_ep_common *epb; struct sctp_hashbucket *head; @@ -666,12 +665,7 @@ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - epp = &head->chain; - epb->next = *epp; - if (epb->next) - (*epp)->pprev = &epb->next; - *epp = epb; - epb->pprev = epp; + hlist_add_head(&epb->node, &head->chain); sctp_write_unlock(&head->lock); } @@ -691,19 +685,15 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) epb = &ep->base; + if (hlist_unhashed(&epb->node)) + return; + epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - - if (epb->pprev) { - if (epb->next) - epb->next->pprev = epb->pprev; - *epb->pprev = epb->next; - epb->pprev = NULL; - } - + __hlist_del(&epb->node); sctp_write_unlock(&head->lock); } @@ -721,12 +711,13 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; + struct hlist_node *node; int hash; hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { + sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); if (sctp_endpoint_is_match(ep, laddr)) goto hit; @@ -744,7 +735,6 @@ hit: /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { - struct sctp_ep_common **epp; struct sctp_ep_common *epb; struct sctp_hashbucket *head; @@ -756,12 +746,7 @@ static void __sctp_hash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - epp = &head->chain; - epb->next = *epp; - if (epb->next) - (*epp)->pprev = &epb->next; - *epp = epb; - epb->pprev = epp; + hlist_add_head(&epb->node, &head->chain); sctp_write_unlock(&head->lock); } @@ -790,14 +775,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - - if (epb->pprev) { - if (epb->next) - epb->next->pprev = epb->pprev; - *epb->pprev = epb->next; - epb->pprev = NULL; - } - + __hlist_del(&epb->node); sctp_write_unlock(&head->lock); } @@ -822,6 +800,7 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_ep_common *epb; struct sctp_association *asoc; struct sctp_transport *transport; + struct hlist_node *node; int hash; /* Optimize here for direct hit, only listening connections can @@ -830,7 +809,7 @@ static struct sctp_association *__sctp_lookup_association( hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { + sctp_for_each_hentry(epb, node, &head->chain) { asoc = sctp_assoc(epb); transport = sctp_assoc_is_match(asoc, local, peer); if (transport) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index e4cd841a22e4..249973204070 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -225,6 +225,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; + struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_ep_hashsize) @@ -233,7 +234,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { + sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, @@ -328,6 +329,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_association *assoc; struct sock *sk; + struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_assoc_hashsize) @@ -336,7 +338,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) head = &sctp_assoc_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { + sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; seq_printf(seq, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ecfab0344e73..d50f610d1b02 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1137,7 +1137,7 @@ SCTP_STATIC __init int sctp_init(void) } for (i = 0; i < sctp_assoc_hashsize; i++) { rwlock_init(&sctp_assoc_hashtable[i].lock); - sctp_assoc_hashtable[i].chain = NULL; + INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain); } /* Allocate and initialize the endpoint hash table. */ @@ -1151,7 +1151,7 @@ SCTP_STATIC __init int sctp_init(void) } for (i = 0; i < sctp_ep_hashsize; i++) { rwlock_init(&sctp_ep_hashtable[i].lock); - sctp_ep_hashtable[i].chain = NULL; + INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); } /* Allocate and initialize the SCTP port hash table. */ @@ -1170,7 +1170,7 @@ SCTP_STATIC __init int sctp_init(void) } for (i = 0; i < sctp_port_hashsize; i++) { spin_lock_init(&sctp_port_hashtable[i].lock); - sctp_port_hashtable[i].chain = NULL; + INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } printk(KERN_INFO "SCTP: Hash tables configured " diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6ce9b490fad5..ff8bc95670ed 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5307,6 +5307,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_bind_hashbucket *head; /* hash list */ struct sctp_bind_bucket *pp; /* hash list port iterator */ + struct hlist_node *node; unsigned short snum; int ret; @@ -5331,7 +5332,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) index = sctp_phashfn(rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); - for (pp = head->chain; pp; pp = pp->next) + sctp_for_each_hentry(pp, node, &head->chain) if (pp->port == rover) goto next; break; @@ -5358,7 +5359,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) */ head = &sctp_port_hashtable[sctp_phashfn(snum)]; sctp_spin_lock(&head->lock); - for (pp = head->chain; pp; pp = pp->next) { + sctp_for_each_hentry(pp, node, &head->chain) { if (pp->port == snum) goto pp_found; } @@ -5702,10 +5703,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( pp->port = snum; pp->fastreuse = 0; INIT_HLIST_HEAD(&pp->owner); - if ((pp->next = head->chain) != NULL) - pp->next->pprev = &pp->next; - head->chain = pp; - pp->pprev = &head->chain; + hlist_add_head(&pp->node, &head->chain); } return pp; } @@ -5714,9 +5712,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) { if (pp && hlist_empty(&pp->owner)) { - if (pp->next) - pp->next->pprev = pp->pprev; - *(pp->pprev) = pp->next; + __hlist_del(&pp->node); kmem_cache_free(sctp_bucket_cachep, pp); SCTP_DBG_OBJCNT_DEC(bind_bucket); } -- cgit v1.2.3 From 7ab9080467040054e27ae54d67cc185f24d881ae Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 9 Nov 2007 11:43:41 -0500 Subject: SCTP: Make sctp_verify_param return multiple indications. SCTP-AUTH and future ADD-IP updates have a requirement to do additional verification of parameters and an ability to ABORT the association if verification fails. So, introduce additional return code so that we can clear signal a required action. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 2 + net/sctp/sm_make_chunk.c | 149 ++++++++++++++++++++++--------------------- 2 files changed, 77 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 73fbdf6a24f8..f30b537d6952 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -186,6 +186,8 @@ typedef enum { SCTP_IERROR_AUTH_BAD_HMAC, SCTP_IERROR_AUTH_BAD_KEYID, SCTP_IERROR_PROTO_VIOLATION, + SCTP_IERROR_ERROR, + SCTP_IERROR_ABORT, } sctp_ierror_t; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 43e8de1228f9..5a9783c38de1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1788,9 +1788,14 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sizeof(sctp_paramhdr_t); + /* This is a fatal error. Any accumulated non-fatal errors are + * not reported. + */ + if (*errp) + sctp_chunk_free(*errp); + /* Create an error chunk and fill it in with our payload. */ - if (!*errp) - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); + *errp = sctp_make_op_error_space(asoc, chunk, payload_len); if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, @@ -1813,9 +1818,15 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, { __u16 len = ntohs(param.p->length); - /* Make an ERROR chunk. */ - if (!*errp) - *errp = sctp_make_op_error_space(asoc, chunk, len); + /* Processing of the HOST_NAME parameter will generate an + * ABORT. If we've accumulated any non-fatal errors, they + * would be unrecognized parameters and we should not include + * them in the ABORT. + */ + if (*errp) + sctp_chunk_free(*errp); + + *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED, len); @@ -1862,56 +1873,40 @@ static void sctp_process_ext_param(struct sctp_association *asoc, * taken if the processing endpoint does not recognize the * Parameter Type. * - * 00 - Stop processing this SCTP chunk and discard it, - * do not process any further chunks within it. + * 00 - Stop processing this parameter; do not process any further + * parameters within this chunk * - * 01 - Stop processing this SCTP chunk and discard it, - * do not process any further chunks within it, and report - * the unrecognized parameter in an 'Unrecognized - * Parameter Type' (in either an ERROR or in the INIT ACK). + * 01 - Stop processing this parameter, do not process any further + * parameters within this chunk, and report the unrecognized + * parameter in an 'Unrecognized Parameter' ERROR chunk. * * 10 - Skip this parameter and continue processing. * * 11 - Skip this parameter and continue processing but * report the unrecognized parameter in an - * 'Unrecognized Parameter Type' (in either an ERROR or in - * the INIT ACK). + * 'Unrecognized Parameter' ERROR chunk. * * Return value: - * 0 - discard the chunk - * 1 - continue with the chunk + * SCTP_IERROR_NO_ERROR - continue with the chunk + * SCTP_IERROR_ERROR - stop and report an error. + * SCTP_IERROR_NOMEME - out of memory. */ -static int sctp_process_unk_param(const struct sctp_association *asoc, - union sctp_params param, - struct sctp_chunk *chunk, - struct sctp_chunk **errp) +static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, + union sctp_params param, + struct sctp_chunk *chunk, + struct sctp_chunk **errp) { - int retval = 1; + int retval = SCTP_IERROR_NO_ERROR; switch (param.p->type & SCTP_PARAM_ACTION_MASK) { case SCTP_PARAM_ACTION_DISCARD: - retval = 0; - break; - case SCTP_PARAM_ACTION_DISCARD_ERR: - retval = 0; - /* Make an ERROR chunk, preparing enough room for - * returning multiple unknown parameters. - */ - if (NULL == *errp) - *errp = sctp_make_op_error_space(asoc, chunk, - ntohs(chunk->chunk_hdr->length)); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk(*errp, - WORD_ROUND(ntohs(param.p->length)), - param.v); - } - + retval = SCTP_IERROR_ERROR; break; case SCTP_PARAM_ACTION_SKIP: break; + case SCTP_PARAM_ACTION_DISCARD_ERR: + retval = SCTP_IERROR_ERROR; + /* Fall through */ case SCTP_PARAM_ACTION_SKIP_ERR: /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -1932,9 +1927,8 @@ static int sctp_process_unk_param(const struct sctp_association *asoc, * to the peer and the association won't be * established. */ - retval = 0; + retval = SCTP_IERROR_NOMEM; } - break; default: break; @@ -1943,18 +1937,20 @@ static int sctp_process_unk_param(const struct sctp_association *asoc, return retval; } -/* Find unrecognized parameters in the chunk. +/* Verify variable length parameters * Return values: - * 0 - discard the chunk - * 1 - continue with the chunk + * SCTP_IERROR_ABORT - trigger an ABORT + * SCTP_IERROR_NOMEM - out of memory (abort) + * SCTP_IERROR_ERROR - stop processing, trigger an ERROR + * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static int sctp_verify_param(const struct sctp_association *asoc, - union sctp_params param, - sctp_cid_t cid, - struct sctp_chunk *chunk, - struct sctp_chunk **err_chunk) +static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, + union sctp_params param, + sctp_cid_t cid, + struct sctp_chunk *chunk, + struct sctp_chunk **err_chunk) { - int retval = 1; + int retval = SCTP_IERROR_NO_ERROR; /* FIXME - This routine is not looking at each parameter per the * chunk type, i.e., unrecognized parameters should be further @@ -1976,7 +1972,9 @@ static int sctp_verify_param(const struct sctp_association *asoc, case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ - return sctp_process_hn_param(asoc, param, chunk, err_chunk); + sctp_process_hn_param(asoc, param, chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + break; case SCTP_PARAM_FWD_TSN_SUPPORT: if (sctp_prsctp_enable) @@ -1993,9 +1991,11 @@ static int sctp_verify_param(const struct sctp_association *asoc, * cause 'Protocol Violation'. */ if (SCTP_AUTH_RANDOM_LENGTH != - ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) - return sctp_process_inv_paramlength(asoc, param.p, + ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) { + sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + } break; case SCTP_PARAM_CHUNKS: @@ -2007,9 +2007,11 @@ static int sctp_verify_param(const struct sctp_association *asoc, * INIT-ACK chunk if the sender wants to receive authenticated * chunks. Its maximum length is 260 bytes. */ - if (260 < ntohs(param.p->length)) - return sctp_process_inv_paramlength(asoc, param.p, - chunk, err_chunk); + if (260 < ntohs(param.p->length)) { + sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + } break; case SCTP_PARAM_HMAC_ALGO: @@ -2020,8 +2022,7 @@ fallthrough: default: SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", ntohs(param.p->type), cid); - return sctp_process_unk_param(asoc, param, chunk, err_chunk); - + retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); break; } return retval; @@ -2036,6 +2037,7 @@ int sctp_verify_init(const struct sctp_association *asoc, { union sctp_params param; int has_cookie = 0; + int result; /* Verify stream values are non-zero. */ if ((0 == peer_init->init_hdr.num_outbound_streams) || @@ -2043,8 +2045,7 @@ int sctp_verify_init(const struct sctp_association *asoc, (0 == peer_init->init_hdr.init_tag) || (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { - sctp_process_inv_mandatory(asoc, chunk, errp); - return 0; + return sctp_process_inv_mandatory(asoc, chunk, errp); } /* Check for missing mandatory parameters. */ @@ -2062,29 +2063,29 @@ int sctp_verify_init(const struct sctp_association *asoc, * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ - if (param.v != (void*)chunk->chunk_end) { - sctp_process_inv_paramlength(asoc, param.p, chunk, errp); - return 0; - } + if (param.v != (void*)chunk->chunk_end) + return sctp_process_inv_paramlength(asoc, param.p, chunk, errp); /* The only missing mandatory param possible today is * the state cookie for an INIT-ACK chunk. */ - if ((SCTP_CID_INIT_ACK == cid) && !has_cookie) { - sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE, - chunk, errp); - return 0; - } - - /* Find unrecognized parameters. */ + if ((SCTP_CID_INIT_ACK == cid) && !has_cookie) + return sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE, + chunk, errp); + /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - if (!sctp_verify_param(asoc, param, cid, chunk, errp)) { - if (SCTP_PARAM_HOST_NAME_ADDRESS == param.p->type) + result = sctp_verify_param(asoc, param, cid, chunk, errp); + switch (result) { + case SCTP_IERROR_ABORT: + case SCTP_IERROR_NOMEM: return 0; - else + case SCTP_IERROR_ERROR: return 1; + case SCTP_IERROR_NO_ERROR: + default: + break; } } /* for (loop through all parameters) */ -- cgit v1.2.3 From cd3ae8e61570b55e32864c5f9e50085aa67126e9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 9 Nov 2007 11:43:41 -0500 Subject: SCTP: Fix PR-SCTP to deliver all the accumulated ordered chunks There is a small bug when we process a FWD-TSN. We'll deliver anything upto the current next expected SSN. However, if the next expected is already in the queue, it will take another chunk to trigger its delivery. The fix is to simply check the current queued SSN is the next expected one. Signed-off-by: Vlad Yasevich --- net/sctp/ulpqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 4be92d0a2cab..4908041ffb31 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -862,7 +862,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) continue; /* see if this ssn has been marked by skipping */ - if (!SSN_lt(cssn, sctp_ssn_peek(in, csid))) + if (!SSN_lte(cssn, sctp_ssn_peek(in, csid))) break; __skb_unlink(pos, &ulpq->lobby); -- cgit v1.2.3 From 7d54dc6876b83d6bb75b8f7e865b7b9051056d22 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 9 Nov 2007 11:43:41 -0500 Subject: SCTP: Always flush the queue when uncorcking. When the code calls uncork, trigger a queue flush, even if the queue was not corked. Most callers that explicitely cork the queue will have additinal checks to see if they corked it. Callers who do not cork the queue expect packets to flow when they call uncork. The scneario that showcased this bug happend when we were not able to bundle DATA with outgoing COOKIE-ECHO. As a result the data just sat in the outqueue and did not get transmitted. The application expected a response, but nothing happened. Signed-off-by: Vlad Yasevich --- net/sctp/outqueue.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 99a3db5d5fae..fa76f235169b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -662,10 +662,9 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int sctp_outq_uncork(struct sctp_outq *q) { int error = 0; - if (q->cork) { + if (q->cork) q->cork = 0; - error = sctp_outq_flush(q, 0); - } + error = sctp_outq_flush(q, 0); return error; } -- cgit v1.2.3 From 9abed245a6dc94c32b2f45a1ecc51a0829d11470 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 11 Nov 2007 23:57:49 +0100 Subject: Fix memory leak in discard case of sctp_sf_abort_violation() In net/sctp/sm_statefuns.c::sctp_sf_abort_violation() we may leak the storage allocated for 'abort' by returning from the function without using or freeing it. This happens in case "sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)" is true and we jump to the 'discard' label. Spotted by the Coverity checker. The simple fix is to simply move the creation of the "abort chunk" to after the possible jump to the 'discard' label. This way we don't even have to allocate the memory at all in the problem case. Signed-off-by: Jesper Juhl Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a66075a70f29..5ebbe808d801 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4064,11 +4064,6 @@ static sctp_disposition_t sctp_sf_abort_violation( struct sctp_chunk *chunk = arg; struct sctp_chunk *abort = NULL; - /* Make the abort chunk. */ - abort = sctp_make_abort_violation(asoc, chunk, payload, paylen); - if (!abort) - goto nomem; - /* SCTP-AUTH, Section 6.3: * It should be noted that if the receiver wants to tear * down an association in an authenticated way only, the @@ -4083,6 +4078,11 @@ static sctp_disposition_t sctp_sf_abort_violation( if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) goto discard; + /* Make the abort chunk. */ + abort = sctp_make_abort_violation(asoc, chunk, payload, paylen); + if (!abort) + goto nomem; + if (asoc) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); -- cgit v1.2.3 From dbb2ed24851a290616d66212dc75373fd863d636 Mon Sep 17 00:00:00 2001 From: Pierre Ynard Date: Mon, 12 Nov 2007 17:58:35 -0800 Subject: [IPV6]: Add ifindex field to ND user option messages. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userland neighbor discovery options are typically heavily involved with the interface on which thay are received: add a missing ifindex field to the original struct. Thanks to Rémi Denis-Courmont. Signed-off-by: Pierre Ynard Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ net/ipv6/ndisc.c | 1 + 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5bf618241ab9..4e81836191df 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -491,9 +491,11 @@ struct nduseroptmsg unsigned char nduseropt_family; unsigned char nduseropt_pad1; unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; __u8 nduseropt_icmp_type; __u8 nduseropt_icmp_code; unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; /* Followed by one or more ND options */ }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 36f7dbfb6dbb..67997a74ddce 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1037,6 +1037,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) ndmsg = nlmsg_data(nlh); ndmsg->nduseropt_family = AF_INET6; + ndmsg->nduseropt_ifindex = ra->dev->ifindex; ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; -- cgit v1.2.3 From 91cf45f02af5c871251165d000c3f42a2a0b0552 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Nov 2007 18:10:39 -0800 Subject: [NET]: Add the helper kernel_sock_shutdown() ...and fix a couple of bugs in the NBD, CIFS and OCFS2 socket handlers. Looking at the sock->op->shutdown() handlers, it looks as if all of them take a SHUT_RD/SHUT_WR/SHUT_RDWR argument instead of the RCV_SHUTDOWN/SEND_SHUTDOWN arguments. Add a helper, and then define the SHUT_* enum to ensure that kernel users of shutdown() don't get confused. Signed-off-by: Trond Myklebust Acked-by: Mark Fasheh Acked-by: David Howells Signed-off-by: David S. Miller --- drivers/block/nbd.c | 3 ++- fs/cifs/connect.c | 2 +- fs/ocfs2/cluster/tcp.c | 4 ++-- include/linux/net.h | 8 ++++++++ net/rxrpc/ar-local.c | 4 ++-- net/socket.c | 6 ++++++ 6 files changed, 21 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6332acad078c..b4c0888aedc3 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -126,7 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock) if (lo->sock) { printk(KERN_WARNING "%s: shutting down socket\n", lo->disk->disk_name); - lo->sock->ops->shutdown(lo->sock, SEND_SHUTDOWN|RCV_SHUTDOWN); + kernel_sock_shutdown(lo->sock, SHUT_RDWR); lo->sock = NULL; } if (lock) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1102160f6661..c52a76ff4bb9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -160,7 +160,7 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->ssocket) { cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state, server->ssocket->flags)); - server->ssocket->ops->shutdown(server->ssocket, SEND_SHUTDOWN); + kernel_sock_shutdown(server->ssocket, SHUT_WR); cFYI(1, ("Post shutdown state: 0x%x Flags: 0x%lx", server->ssocket->state, server->ssocket->flags)); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 685c18065c82..d84bd155997b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -616,8 +617,7 @@ static void o2net_shutdown_sc(struct work_struct *work) del_timer_sync(&sc->sc_idle_timeout); o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); sc_put(sc); - sc->sc_sock->ops->shutdown(sc->sc_sock, - RCV_SHUTDOWN|SEND_SHUTDOWN); + kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR); } /* not fatal so failed connects before the other guy has our diff --git a/include/linux/net.h b/include/linux/net.h index dd79cdb8c4cf..596131ea46f4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -95,6 +95,12 @@ enum sock_type { #endif /* ARCH_HAS_SOCKET_TYPES */ +enum sock_shutdown_cmd { + SHUT_RD = 0, + SHUT_WR = 1, + SHUT_RDWR = 2, +}; + /** * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) @@ -223,6 +229,8 @@ extern int kernel_setsockopt(struct socket *sock, int level, int optname, extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); +extern int kernel_sock_shutdown(struct socket *sock, + enum sock_shutdown_cmd how); #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index fe03f71f17da..f3a2bd747a8f 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -114,7 +114,7 @@ static int rxrpc_create_local(struct rxrpc_local *local) return 0; error: - local->socket->ops->shutdown(local->socket, 2); + kernel_sock_shutdown(local->socket, SHUT_RDWR); local->socket->sk->sk_user_data = NULL; sock_release(local->socket); local->socket = NULL; @@ -267,7 +267,7 @@ static void rxrpc_destroy_local(struct work_struct *work) /* finish cleaning up the local descriptor */ rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); - local->socket->ops->shutdown(local->socket, 2); + kernel_sock_shutdown(local->socket, SHUT_RDWR); sock_release(local->socket); up_read(&rxrpc_local_sem); diff --git a/net/socket.c b/net/socket.c index 5d879fd3d01d..74784dfe8e5b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2319,6 +2319,11 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) +{ + return sock->ops->shutdown(sock, how); +} + /* ABI emulation layers need these two */ EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); @@ -2345,3 +2350,4 @@ EXPORT_SYMBOL(kernel_getsockopt); EXPORT_SYMBOL(kernel_setsockopt); EXPORT_SYMBOL(kernel_sendpage); EXPORT_SYMBOL(kernel_sock_ioctl); +EXPORT_SYMBOL(kernel_sock_shutdown); -- cgit v1.2.3 From be85d4ad8ab69520e5ca4717c491a311c9eeae59 Mon Sep 17 00:00:00 2001 From: Urs Thuermann Date: Mon, 12 Nov 2007 21:05:20 -0800 Subject: [AF_PACKET]: Fix minor code duplication Simplify some code by eliminating duplicate if-else clauses in packet_do_bind(). Signed-off-by: Urs Thuermann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index eb6be5030c70..8a7807dbba01 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -881,20 +881,14 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc if (protocol == 0) goto out_unlock; - if (dev) { - if (dev->flags&IFF_UP) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; - } else { - sk->sk_err = ENETDOWN; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_error_report(sk); - } - } else { + if (!dev || (dev->flags & IFF_UP)) { dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; + } else { + sk->sk_err = ENETDOWN; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_error_report(sk); } out_unlock: -- cgit v1.2.3 From 6aed42159db1f99e83ccf17b1aa1a83bc75ac3e8 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 12 Nov 2007 21:24:14 -0800 Subject: [NET]: Unexport sysctl_{r,w}mem_max. sysctl_{r,w}mem_max can now be unexported. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/core/sock.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8fc2f84209e4..c519b439b8b1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2097,7 +2097,3 @@ EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(sock_i_uid); EXPORT_SYMBOL(sock_i_ino); EXPORT_SYMBOL(sysctl_optmem_max); -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(sysctl_rmem_max); -EXPORT_SYMBOL(sysctl_wmem_max); -#endif -- cgit v1.2.3 From 22649d1afbe6988688a07fd70abb06f1e2213567 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 12 Nov 2007 21:25:24 -0800 Subject: [IPVS]: Remove unused exports. This patch removes the following unused EXPORT_SYMBOL's: - ip_vs_try_bind_dest - ip_vs_find_dest Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_conn.c | 1 - net/ipv4/ipvs/ip_vs_ctl.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index b7eeae622d9b..0a9f3c37e18d 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -441,7 +441,6 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) } else return NULL; } -EXPORT_SYMBOL(ip_vs_try_bind_dest); /* diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 3c4d22a468ec..b64cf45a9ead 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -604,7 +604,6 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, ip_vs_service_put(svc); return dest; } -EXPORT_SYMBOL(ip_vs_find_dest); /* * Lookup dest by {svc,addr,port} in the destination trash. -- cgit v1.2.3 From d71209ded2ba6010070d02005384897c59859d00 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 12 Nov 2007 21:27:28 -0800 Subject: [INET]: Use list_head-s in inetpeer.c The inetpeer.c tracks the LRU list of inet_perr-s, but makes it by hands. Use the list_head-s for this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 +- net/ipv4/inetpeer.c | 42 +++++++++++++++--------------------------- 2 files changed, 16 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index aa10a8178e70..ad8404b56113 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -22,7 +22,7 @@ struct inet_peer __be32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ - struct inet_peer *unused_next, **unused_prevp; + struct list_head unused; __u32 dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 771031dfbd0f..af995198f643 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -61,7 +61,7 @@ * 4. Global variable peer_total is modified under the pool lock. * 5. struct inet_peer fields modification: * avl_left, avl_right, avl_parent, avl_height: pool lock - * unused_next, unused_prevp: unused node list lock + * unused: unused node list lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * dtime: unused node list lock @@ -94,8 +94,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min int inet_peer_gc_mintime __read_mostly = 10 * HZ; int inet_peer_gc_maxtime __read_mostly = 120 * HZ; -static struct inet_peer *inet_peer_unused_head; -static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; +static LIST_HEAD(unused_peers); static DEFINE_SPINLOCK(inet_peer_unused_lock); static void peer_check_expire(unsigned long dummy); @@ -138,15 +137,7 @@ void __init inet_initpeers(void) static void unlink_from_unused(struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); - if (p->unused_prevp != NULL) { - /* On unused list. */ - *p->unused_prevp = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; - else - inet_peer_unused_tailp = p->unused_prevp; - p->unused_prevp = NULL; /* mark it as removed */ - } + list_del_init(&p->unused); spin_unlock_bh(&inet_peer_unused_lock); } @@ -337,24 +328,24 @@ static void unlink_from_pool(struct inet_peer *p) /* May be called with local BH enabled. */ static int cleanup_once(unsigned long ttl) { - struct inet_peer *p; + struct inet_peer *p = NULL; /* Remove the first entry from the list of unused nodes. */ spin_lock_bh(&inet_peer_unused_lock); - p = inet_peer_unused_head; - if (p != NULL) { - __u32 delta = (__u32)jiffies - p->dtime; + if (!list_empty(&unused_peers)) { + __u32 delta; + + p = list_first_entry(&unused_peers, struct inet_peer, unused); + delta = (__u32)jiffies - p->dtime; + if (delta < ttl) { /* Do not prune fresh entries. */ spin_unlock_bh(&inet_peer_unused_lock); return -1; } - inet_peer_unused_head = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; - else - inet_peer_unused_tailp = p->unused_prevp; - p->unused_prevp = NULL; /* mark as not on the list */ + + list_del_init(&p->unused); + /* Grab an extra reference to prevent node disappearing * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); @@ -412,7 +403,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) /* Link the node. */ link_to_pool(n); - n->unused_prevp = NULL; /* not on the list */ + INIT_LIST_HEAD(&n->unused); peer_total++; write_unlock_bh(&peer_pool_lock); @@ -467,10 +458,7 @@ void inet_putpeer(struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); if (atomic_dec_and_test(&p->refcnt)) { - p->unused_prevp = inet_peer_unused_tailp; - p->unused_next = NULL; - *inet_peer_unused_tailp = p; - inet_peer_unused_tailp = &p->unused_next; + list_add_tail(&p->unused, &unused_peers); p->dtime = (__u32)jiffies; } spin_unlock_bh(&inet_peer_unused_lock); -- cgit v1.2.3 From e0bf9cf15fc30d300b7fbd821c6bc975531fab44 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 13 Nov 2007 02:57:16 -0800 Subject: [NETFILTER]: nf_nat: fix memset error The size passing to memset is the size of a pointer. Signed-off-by: Li Zefan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 56e93f692e82..70e7997ea284 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -681,7 +681,7 @@ static int clean_nat(struct nf_conn *i, void *data) if (!nat) return 0; - memset(nat, 0, sizeof(nat)); + memset(nat, 0, sizeof(*nat)); i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); return 0; } -- cgit v1.2.3 From 4ce5ba6aecb218981fc76585b6d9a7d0019e61b5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Nov 2007 02:58:09 -0800 Subject: [NETFILTER]: Consolidate nf_sockopt and compat_nf_sockopt Both lookup the nf_sockopt_ops object to call the get/set callbacks from, but they perform it in a completely similar way. Introduce the helper for finding the ops. Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_sockopt.c | 106 +++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 2dfac3253569..87bc1443c520 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -60,46 +60,57 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg) } EXPORT_SYMBOL(nf_unregister_sockopt); -/* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, - char __user *opt, int *len, int get) +static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, + int val, int get) { struct nf_sockopt_ops *ops; - int ret; if (sk->sk_net != &init_net) - return -ENOPROTOOPT; + return ERR_PTR(-ENOPROTOOPT); if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; + return ERR_PTR(-EINTR); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; + if (get) { - if (val >= ops->get_optmin - && val < ops->get_optmax) { - mutex_unlock(&nf_sockopt_mutex); - ret = ops->get(sk, val, opt, len); + if (val >= ops->get_optmin && + val < ops->get_optmax) goto out; - } } else { - if (val >= ops->set_optmin - && val < ops->set_optmax) { - mutex_unlock(&nf_sockopt_mutex); - ret = ops->set(sk, val, opt, *len); + if (val >= ops->set_optmin && + val < ops->set_optmax) goto out; - } } module_put(ops->owner); } } - out_nosup: +out_nosup: + ops = ERR_PTR(-ENOPROTOOPT); +out: mutex_unlock(&nf_sockopt_mutex); - return -ENOPROTOOPT; + return ops; +} + +/* Call get/setsockopt() */ +static int nf_sockopt(struct sock *sk, int pf, int val, + char __user *opt, int *len, int get) +{ + struct nf_sockopt_ops *ops; + int ret; + + ops = nf_sockopt_find(sk, pf, val, get); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + if (get) + ret = ops->get(sk, val, opt, len); + else + ret = ops->set(sk, val, opt, *len); - out: module_put(ops->owner); return ret; } @@ -124,51 +135,22 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, struct nf_sockopt_ops *ops; int ret; - if (sk->sk_net != &init_net) - return -ENOPROTOOPT; - - - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - - list_for_each_entry(ops, &nf_sockopts, list) { - if (ops->pf == pf) { - if (!try_module_get(ops->owner)) - goto out_nosup; - - if (get) { - if (val >= ops->get_optmin - && val < ops->get_optmax) { - mutex_unlock(&nf_sockopt_mutex); - if (ops->compat_get) - ret = ops->compat_get(sk, - val, opt, len); - else - ret = ops->get(sk, - val, opt, len); - goto out; - } - } else { - if (val >= ops->set_optmin - && val < ops->set_optmax) { - mutex_unlock(&nf_sockopt_mutex); - if (ops->compat_set) - ret = ops->compat_set(sk, - val, opt, *len); - else - ret = ops->set(sk, - val, opt, *len); - goto out; - } - } - module_put(ops->owner); - } + ops = nf_sockopt_find(sk, pf, val, get); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + if (get) { + if (ops->compat_get) + ret = ops->compat_get(sk, val, opt, len); + else + ret = ops->get(sk, val, ops, len); + } else { + if (ops->compat_set) + ret = ops->compat_set(sk, val, ops, *len); + else + ret = ops->set(sk, val, ops, *len); } - out_nosup: - mutex_unlock(&nf_sockopt_mutex); - return -ENOPROTOOPT; - out: module_put(ops->owner); return ret; } -- cgit v1.2.3 From 81d9ddae856678c45297550e9353c8a5a7fd6438 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 13 Nov 2007 02:58:44 -0800 Subject: [NETFILTER]: bridge: fix double POSTROUTING hook invocation Packets routed between bridges have the POST_ROUTING hook invoked twice since bridging mistakes them for bridged packets because they have skb->nf_bridge set. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index da22f900e89d..c1757c79dfbb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -766,6 +766,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, if (!nf_bridge) return NF_ACCEPT; + if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))) + return NF_ACCEPT; + if (!realoutdev) return NF_DROP; -- cgit v1.2.3 From ed160e839d2e1118529e58b04d52dba703ca629c Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 13 Nov 2007 03:23:21 -0800 Subject: [NET]: Cleanup pernet operation without CONFIG_NET_NS If CONFIG_NET_NS is not set, the only namespace is possible. This patch removes list of pernet_operations and cleanups code a bit. This list is not needed if there are no namespaces. We should just call ->init method. Additionally, the ->exit will be called on module unloading only. This case is safe - the code is not discarded. For the in/kernel code, ->exit should never be called. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/core/net_namespace.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3f6d37deac45..383252b50411 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -188,6 +188,7 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); +#ifdef CONFIG_NET_NS static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { @@ -228,6 +229,23 @@ static void unregister_pernet_operations(struct pernet_operations *ops) ops->exit(net); } +#else + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + if (ops->init == NULL) + return 0; + return ops->init(&init_net); +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + if (ops->exit) + ops->exit(&init_net); +} +#endif + /** * register_pernet_subsys - register a network namespace subsystem * @ops: pernet operations structure for the subsystem -- cgit v1.2.3 From 022cbae611a37eda80d498f8f379794c8ac3be47 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 13 Nov 2007 03:23:50 -0800 Subject: [NET]: Move unneeded data to initdata section. This patch reverts Eric's commit 2b008b0a8e96b726c603c5e1a5a7a509b5f61e35 It diets .text & .data section of the kernel if CONFIG_NET_NS is not set. This is safe after list operations cleanup. Signed-of-by: Denis V. Lunev Signed-off-by: David S. Miller --- drivers/net/loopback.c | 2 +- fs/proc/proc_net.c | 2 +- include/net/net_namespace.h | 2 ++ net/core/dev.c | 6 +++--- net/core/dev_mcast.c | 2 +- net/netlink/af_netlink.c | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 45f30a2974b8..662b8d16803c 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -284,7 +284,7 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations loopback_net_ops = { +static struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 153554cf5575..131f9c68be5f 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -178,7 +178,7 @@ static __net_exit void proc_net_ns_exit(struct net *net) kfree(net->proc_net_root); } -static struct pernet_operations proc_net_ns_ops = { +static struct pernet_operations __net_initdata proc_net_ns_ops = { .init = proc_net_ns_init, .exit = proc_net_ns_exit, }; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1fd449a6530b..5dd6d90b37eb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -119,9 +119,11 @@ static inline struct net *maybe_get_net(struct net *net) #ifdef CONFIG_NET_NS #define __net_init #define __net_exit +#define __net_initdata #else #define __net_init __init #define __net_exit __exit_refok +#define __net_initdata __initdata #endif struct pernet_operations { diff --git a/net/core/dev.c b/net/core/dev.c index dd7e30754cbc..dd40b35bb006 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2688,7 +2688,7 @@ static void __net_exit dev_proc_net_exit(struct net *net) proc_net_remove(net, "dev"); } -static struct pernet_operations dev_proc_ops = { +static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; @@ -4353,7 +4353,7 @@ static void __net_exit netdev_exit(struct net *net) kfree(net->dev_index_head); } -static struct pernet_operations netdev_net_ops = { +static struct pernet_operations __net_initdata netdev_net_ops = { .init = netdev_init, .exit = netdev_exit, }; @@ -4384,7 +4384,7 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } -static struct pernet_operations default_device_ops = { +static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, }; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 647973daca2b..69fff16ece10 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -285,7 +285,7 @@ static void __net_exit dev_mc_net_exit(struct net *net) proc_net_remove(net, "dev_mcast"); } -static struct pernet_operations dev_mc_net_ops = { +static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 415c97236f63..de3988ba1f46 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1888,7 +1888,7 @@ static void __net_exit netlink_net_exit(struct net *net) #endif } -static struct pernet_operations netlink_net_ops = { +static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, }; -- cgit v1.2.3 From 53756524e42a71011f5ae6410d6ac386bf3a9e7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Nov 2007 03:49:53 -0800 Subject: [NETFILTER]: xt_time should not assume CONFIG_KTIME_SCALAR It is not correct to assume one can get nsec from a ktime directly by using .tv64 field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/xt_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ff44f86c24ce..f9c55dcd894b 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -170,7 +170,7 @@ static bool xt_time_match(const struct sk_buff *skb, if (skb->tstamp.tv64 == 0) __net_timestamp((struct sk_buff *)skb); - stamp = skb->tstamp.tv64; + stamp = ktime_to_ns(skb->tstamp); do_div(stamp, NSEC_PER_SEC); if (info->flags & XT_TIME_LOCAL_TZ) -- cgit v1.2.3