From a17c859849402315613a0015ac8fbf101acf0cc1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 27 May 2009 17:50:35 +0200 Subject: netfilter: conntrack: add support for DCCP handshake sequence to ctnetlink This patch adds CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ that exposes the u64 handshake sequence number to user-space. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 1a865e48b8eb..ed4ef8d0b11b 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -101,6 +101,7 @@ enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, CTA_PROTOINFO_DCCP_ROLE, + CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) -- cgit v1.2.3 From 874ab9233eeddb85fd2dd85131c145bde75da39a Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 2 Jun 2009 13:58:56 +0200 Subject: netfilter: nf_ct_tcp: TCP simultaneous open support The patch below adds supporting TCP simultaneous open to conntrack. The unused LISTEN state is replaced by a new state (SYN_SENT2) denoting the second SYN sent from the reply direction in the new case. The state table is updated and the function tcp_in_window is modified to handle simultaneous open. The functionality can fairly easily be tested by socat. A sample tcpdump recording 23:21:34.244733 IP (tos 0x0, ttl 64, id 49224, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xe75f (correct), 3383710133:3383710133(0) win 5840 23:21:34.244783 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 40) 192.168.0.1.2020 > 192.168.0.254.2020: R, cksum 0x0253 (correct), 0:0(0) ack 3383710134 win 0 23:21:36.038680 IP (tos 0x0, ttl 64, id 28092, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.1.2020 > 192.168.0.254.2020: S, cksum 0x704b (correct), 2634546729:2634546729(0) win 5840 23:21:36.038777 IP (tos 0x0, ttl 64, id 49225, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xb179 (correct), 3383710133:3383710133(0) ack 2634546730 win 5840 23:21:36.038847 IP (tos 0x0, ttl 64, id 28093, offset 0, flags [DF], proto TCP (6), length 52) 192.168.0.1.2020 > 192.168.0.254.2020: ., cksum 0xebad (correct), ack 3383710134 win 2920 and the corresponding netlink events: [NEW] tcp 6 120 SYN_SENT src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 [UNREPLIED] src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 120 LISTEN src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 60 SYN_RECV src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [UPDATE] tcp 6 432000 ESTABLISHED src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [ASSURED] The RST packet was dropped in the raw table, thus it did not reach conntrack. nfnetlink_conntrack is unpatched so it shows the new SYN_SENT2 state as the old unused LISTEN. With TCP simultaneous open support we satisfy REQ-2 in RFC 5382 ;-) . Additional minor correction in this patch is that in order to catch uninitialized reply directions, "td_maxwin == 0" is used instead of "td_end == 0" because the former can't be true except in uninitialized state while td_end may accidentally be equal to zero in the mid of a connection. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 3 +- net/netfilter/nf_conntrack_proto_tcp.c | 98 +++++++++++++++++++----------- 2 files changed, 63 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 3066789b972a..74c27ca770e5 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -15,7 +15,8 @@ enum tcp_conntrack { TCP_CONNTRACK_LAST_ACK, TCP_CONNTRACK_TIME_WAIT, TCP_CONNTRACK_CLOSE, - TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_LISTEN, /* obsolete */ +#define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN TCP_CONNTRACK_MAX, TCP_CONNTRACK_IGNORE }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e7..4c7f6f0dae90 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -59,7 +59,7 @@ static const char *const tcp_conntrack_names[] = { "LAST_ACK", "TIME_WAIT", "CLOSE", - "LISTEN" + "SYN_SENT2", }; #define SECS * HZ @@ -82,6 +82,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_LAST_ACK] = 30 SECS, [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE] = 10 SECS, + [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, }; #define sNO TCP_CONNTRACK_NONE @@ -93,7 +94,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { #define sLA TCP_CONNTRACK_LAST_ACK #define sTW TCP_CONNTRACK_TIME_WAIT #define sCL TCP_CONNTRACK_CLOSE -#define sLI TCP_CONNTRACK_LISTEN +#define sS2 TCP_CONNTRACK_SYN_SENT2 #define sIV TCP_CONNTRACK_MAX #define sIG TCP_CONNTRACK_IGNORE @@ -123,6 +124,7 @@ enum tcp_bit_set { * * NONE: initial state * SYN_SENT: SYN-only packet seen + * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open * SYN_RECV: SYN-ACK packet seen * ESTABLISHED: ACK packet seen * FIN_WAIT: FIN packet seen @@ -131,26 +133,24 @@ enum tcp_bit_set { * TIME_WAIT: last ACK seen * CLOSE: closed connection (RST) * - * LISTEN state is not used. - * * Packets marked as IGNORED (sIG): * if they may be either invalid or valid * and the receiver may send back a connection * closing RST or a SYN/ACK. * * Packets marked as INVALID (sIV): - * if they are invalid - * or we do not support the request (simultaneous open) + * if we regard them as truly invalid packets */ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, /* * sNO -> sSS Initialize a new connection * sSS -> sSS Retransmitted SYN - * sSR -> sIG Late retransmitted SYN? + * sS2 -> sS2 Late retransmitted SYN + * sSR -> sIG * sES -> sIG Error: SYNs in window outside the SYN_SENT state * are errors. Receiver will reply with RST * and close the connection. @@ -161,22 +161,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sSS Reopened connection (RFC 1122). * sCL -> sSS */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* - * A SYN/ACK from the client is always invalid: - * - either it tries to set up a simultaneous open, which is - * not supported; - * - or the firewall has just been inserted between the two hosts - * during the session set-up. The SYN will be retransmitted - * by the true client (or it'll time out). + * sNO -> sIV Too late and no reason to do anything + * sSS -> sIV Client can't send SYN and then SYN/ACK + * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open + * sSR -> sIG + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sNO -> sIV Too late and no reason to do anything... * sSS -> sIV Client migth not send FIN in this state: * we enforce waiting for a SYN/ACK reply first. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions, waiting for @@ -187,11 +195,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sNO -> sES Assumed. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sS2 -> sIV * sSR -> sES Established state is reached. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -200,29 +209,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, /* * sNO -> sIV Never reached. - * sSS -> sIV Simultaneous open, not supported - * sSR -> sIV Simultaneous open, not supported. - * sES -> sIV Server may not initiate a connection. + * sSS -> sS2 Simultaneous open + * sS2 -> sS2 Retransmitted simultaneous SYN + * sSR -> sIV Invalid SYN packets sent by the server + * sES -> sIV * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sIV Reopened connection, but server may not do it. * sCL -> sIV */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. + * sS2 -> sSR Simultaneous open * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN @@ -231,10 +242,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sIG * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sSS -> sIV Server might not send FIN in this state. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions. @@ -243,10 +255,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, /* * sSS -> sIG Might be a half-open connection. + * sS2 -> sIG * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -255,8 +268,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; @@ -521,13 +534,14 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - if (sender->td_end == 0) { + if (sender->td_maxwin == 0) { /* * Initialize sender data. */ - if (tcph->syn && tcph->ack) { + if (tcph->syn) { /* - * Outgoing SYN-ACK in reply to a SYN. + * SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; @@ -543,6 +557,9 @@ static bool tcp_in_window(const struct nf_conn *ct, && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; + if (!tcph->ack) + /* Simultaneous open */ + return true; } else { /* * We are in the middle of a connection, @@ -1068,7 +1085,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[1].td_end = 0; ct->proto.tcp.seen[1].td_maxend = 0; - ct->proto.tcp.seen[1].td_maxwin = 1; + ct->proto.tcp.seen[1].td_maxwin = 0; ct->proto.tcp.seen[1].td_scale = 0; /* tcp_packet will set them */ @@ -1309,6 +1326,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "ip_conntrack_tcp_timeout_syn_sent2", + .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], -- cgit v1.2.3 From 6bfea1984aea86089907caf8974513c2402a3b3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Jun 2009 20:08:44 +0200 Subject: netfilter: conntrack: remove events flags from userspace exposed file This patch moves the event flags from linux/netfilter/nf_conntrack_common.h to net/netfilter/nf_conntrack_ecache.h. This flags are not of any use from userspace. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 69 --------------------------- include/net/netfilter/nf_conntrack_ecache.h | 69 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 885cbe282260..a8248ee422b7 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -75,75 +75,6 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; -/* Connection tracking event bits */ -enum ip_conntrack_events -{ - /* New conntrack */ - IPCT_NEW_BIT = 0, - IPCT_NEW = (1 << IPCT_NEW_BIT), - - /* Expected connection */ - IPCT_RELATED_BIT = 1, - IPCT_RELATED = (1 << IPCT_RELATED_BIT), - - /* Destroyed conntrack */ - IPCT_DESTROY_BIT = 2, - IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), - - /* Timer has been refreshed */ - IPCT_REFRESH_BIT = 3, - IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), - - /* Status has changed */ - IPCT_STATUS_BIT = 4, - IPCT_STATUS = (1 << IPCT_STATUS_BIT), - - /* Update of protocol info */ - IPCT_PROTOINFO_BIT = 5, - IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), - - /* Volatile protocol info */ - IPCT_PROTOINFO_VOLATILE_BIT = 6, - IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), - - /* New helper for conntrack */ - IPCT_HELPER_BIT = 7, - IPCT_HELPER = (1 << IPCT_HELPER_BIT), - - /* Update of helper info */ - IPCT_HELPINFO_BIT = 8, - IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), - - /* Volatile helper info */ - IPCT_HELPINFO_VOLATILE_BIT = 9, - IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), - - /* NAT info */ - IPCT_NATINFO_BIT = 10, - IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), - - /* Counter highest bit has been set, unused */ - IPCT_COUNTER_FILLING_BIT = 11, - IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), - - /* Mark is set */ - IPCT_MARK_BIT = 12, - IPCT_MARK = (1 << IPCT_MARK_BIT), - - /* NAT sequence adjustment */ - IPCT_NATSEQADJ_BIT = 13, - IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), - - /* Secmark is set */ - IPCT_SECMARK_BIT = 14, - IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW_BIT = 0, - IPEXP_NEW = (1 << IPEXP_NEW_BIT), -}; - #ifdef __KERNEL__ struct ip_conntrack_stat { diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 0ff0dc69ca4a..892b8cdf7f62 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -11,6 +11,75 @@ #include #include +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set, unused */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), + + /* Mark is set */ + IPCT_MARK_BIT = 12, + IPCT_MARK = (1 << IPCT_MARK_BIT), + + /* NAT sequence adjustment */ + IPCT_NATSEQADJ_BIT = 13, + IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), + + /* Secmark is set */ + IPCT_SECMARK_BIT = 14, + IPCT_SECMARK = (1 << IPCT_SECMARK_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), +}; + #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache { struct nf_conn *ct; -- cgit v1.2.3 From e34d5c1a4f9919a81b4ea4591d7383245f35cb8e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 3 Jun 2009 10:32:06 +0200 Subject: netfilter: conntrack: replace notify chain by function pointer This patch removes the notify chain infrastructure and replace it by a simple function pointer. This issue has been mentioned in the mailing list several times: the use of the notify chain adds too much overhead for something that is only used by ctnetlink. This patch also changes nfnetlink_send(). It seems that gfp_any() returns GFP_KERNEL for user-context request, like those via ctnetlink, inside the RCU read-side section which is not valid. Using GFP_KERNEL is also evil since netlink may schedule(), this leads to "scheduling while atomic" bug reports. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 68 ++++++++++++++++------- net/netfilter/nf_conntrack_ecache.c | 83 +++++++++++++++++++++++------ net/netfilter/nf_conntrack_netlink.c | 42 +++++++-------- net/netfilter/nfnetlink.c | 5 +- 5 files changed, 139 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index c600083cbdf5..2214e5161461 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -75,7 +75,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, - int echo); + int echo, gfp_t flags); extern void nfnetlink_set_err(u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 2e17a2d0eb3b..1afb907e015a 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -6,7 +6,6 @@ #define _NF_CONNTRACK_ECACHE_H #include -#include #include #include #include @@ -69,9 +68,13 @@ struct nf_ct_event { int report; }; -extern struct atomic_notifier_head nf_conntrack_chain; -extern int nf_conntrack_register_notifier(struct notifier_block *nb); -extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); +struct nf_ct_event_notifier { + int (*fcn)(unsigned int events, struct nf_ct_event *item); +}; + +extern struct nf_ct_event_notifier *nf_conntrack_event_cb; +extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); +extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); extern void __nf_ct_event_cache_init(struct nf_conn *ct); @@ -97,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event, u32 pid, int report) { - struct nf_ct_event item = { - .ct = ct, - .pid = pid, - .report = report - }; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - atomic_notifier_call_chain(&nf_conntrack_chain, event, &item); + struct nf_ct_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + struct nf_ct_event item = { + .ct = ct, + .pid = pid, + .report = report + }; + notify->fcn(event, &item); + } +out_unlock: + rcu_read_unlock(); } static inline void @@ -118,9 +131,13 @@ struct nf_exp_event { int report; }; -extern struct atomic_notifier_head nf_ct_expect_chain; -extern int nf_ct_expect_register_notifier(struct notifier_block *nb); -extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); +struct nf_exp_event_notifier { + int (*fcn)(unsigned int events, struct nf_exp_event *item); +}; + +extern struct nf_exp_event_notifier *nf_expect_event_cb; +extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); +extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, @@ -128,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct nf_exp_event item = { - .exp = exp, - .pid = pid, - .report = report - }; - atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item); + struct nf_exp_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_expect_event_cb); + if (notify == NULL) + goto out_unlock; + + { + struct nf_exp_event item = { + .exp = exp, + .pid = pid, + .report = report + }; + notify->fcn(event, &item); + } +out_unlock: + rcu_read_unlock(); } static inline void diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index dee4190209cc..5516b3e64b43 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,24 +16,32 @@ #include #include #include -#include #include #include #include #include -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_chain); +static DEFINE_MUTEX(nf_ct_ecache_mutex); -ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); -EXPORT_SYMBOL_GPL(nf_ct_expect_chain); +struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); + +struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { + struct nf_ct_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) && ecache->events) { struct nf_ct_event item = { @@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) .report = 0 }; - atomic_notifier_call_chain(&nf_conntrack_chain, - ecache->events, - &item); + notify->fcn(ecache->events, &item); } ecache->events = 0; nf_ct_put(ecache->ct); ecache->ct = NULL; + +out_unlock: + rcu_read_unlock(); } /* Deliver all cached events for a particular conntrack. This is called @@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net) free_percpu(net->ct.ecache); } -int nf_conntrack_register_notifier(struct notifier_block *nb) +int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_register(&nf_conntrack_chain, nb); + int ret = 0; + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_conntrack_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -int nf_conntrack_unregister_notifier(struct notifier_block *nb) +void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb); + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_conntrack_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct notifier_block *nb) +int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); + int ret = 0; + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_expect_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_ct_expect_unregister_notifier(struct notifier_block *nb) +void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_expect_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b1b9e4fb7ded..4448b062de0c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -454,13 +453,12 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) ; } -static int ctnetlink_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_ct_event *item = (struct nf_ct_event *)ptr; struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; @@ -468,7 +466,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* ignore our fake conntrack entry */ if (ct == &nf_conntrack_untracked) - return NOTIFY_DONE; + return 0; if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; @@ -481,10 +479,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(group)) - return NOTIFY_DONE; + return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); if (skb == NULL) @@ -560,8 +558,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, group, item->report); - return NOTIFY_DONE; + nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); @@ -570,7 +568,7 @@ nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, group, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1507,12 +1505,11 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static int ctnetlink_expect_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_exp_event *item = (struct nf_exp_event *)ptr; struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; @@ -1522,11 +1519,11 @@ static int ctnetlink_expect_event(struct notifier_block *this, type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; + return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) @@ -1548,8 +1545,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); - return NOTIFY_DONE; + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); @@ -1558,7 +1556,7 @@ nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, 0, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif static int ctnetlink_exp_done(struct netlink_callback *cb) @@ -1864,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static struct notifier_block ctnl_notifier = { - .notifier_call = ctnetlink_conntrack_event, +static struct nf_ct_event_notifier ctnl_notifier = { + .fcn = ctnetlink_conntrack_event, }; -static struct notifier_block ctnl_notifier_exp = { - .notifier_call = ctnetlink_expect_event, +static struct nf_exp_event_notifier ctnl_notifier_exp = { + .fcn = ctnetlink_expect_event, }; #endif diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9dbd5709aad7..92761a988375 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int nfnetlink_send(struct sk_buff *skb, u32 pid, + unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); + return nlmsg_notify(nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -- cgit v1.2.3 From a5e78820966e17c2316866e00047e4e7e5480f04 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 4 Jun 2009 16:54:42 +0200 Subject: netfilter: x_tables: added hook number into match extension parameter structure. Signed-off-by: Evgeniy Polyakov Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 ++++-- net/bridge/netfilter/ebtables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c9efe039dc57..1030b7593898 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -184,9 +184,10 @@ struct xt_counters_info * @matchinfo: per-match data * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hotdrop: drop packet if we had inspection problems + * @hook: hook number given packet came from * @family: Actual NFPROTO_* through which the function is invoked * (helpful when match->family == NFPROTO_UNSPEC) + * @hotdrop: drop packet if we had inspection problems */ struct xt_match_param { const struct net_device *in, *out; @@ -194,8 +195,9 @@ struct xt_match_param { const void *matchinfo; int fragoff; unsigned int thoff; - bool *hotdrop; + unsigned int hooknum; u_int8_t family; + bool *hotdrop; }; /** diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 24555834d431..37928d5f2840 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -170,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.hotdrop = &hotdrop; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7b35c0b3841b..5bf7c3f185da 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -343,7 +343,7 @@ ipt_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV4; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5164e0bf3bcc..ced1f2c0cb65 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV6; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); -- cgit v1.2.3 From 10662aa3083f869c645cc2abf5d66849001e2f5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2009 13:24:24 +0200 Subject: netfilter: xt_NFQUEUE: queue balancing support Adds support for specifying a range of queues instead of a single queue id. Flows will be distributed across the given range. This is useful for multicore systems: Instead of having a single application read packets from a queue, start multiple instances on queues x, x+1, .. x+n. Each instance can process flows independently. Packets for the same connection are put into the same queue. Signed-off-by: Holger Eitzenberger Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_NFQUEUE.h | 5 ++ net/netfilter/xt_NFQUEUE.c | 93 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h index 982a89f78272..2584f4a777de 100644 --- a/include/linux/netfilter/xt_NFQUEUE.h +++ b/include/linux/netfilter/xt_NFQUEUE.h @@ -15,4 +15,9 @@ struct xt_NFQ_info { __u16 queuenum; }; +struct xt_NFQ_info_v1 { + __u16 queuenum; + __u16 queues_total; +}; + #endif /* _XT_NFQ_TARGET_H */ diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 6e0f84d4d058..498b45101df7 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,6 +11,10 @@ #include #include +#include +#include +#include + #include #include #include @@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); +static u32 jhash_initval __read_mostly; + static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -31,6 +37,72 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_QUEUE_NR(tinfo->queuenum); } +static u32 hash_v4(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + u32 ipaddr; + + /* packets in either direction go into same queue */ + ipaddr = iph->saddr ^ iph->daddr; + + return jhash_2words(ipaddr, iph->protocol, jhash_initval); +} + +static unsigned int +nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v4(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 hash_v6(const struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 addr[4]; + + addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; + addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; + addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; + addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; + + return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); +} + +static unsigned int +nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v6(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} +#endif + +static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 maxid; + + if (info->queues_total == 0) { + pr_err("NFQUEUE: number of total queues is 0\n"); + return false; + } + maxid = info->queues_total - 1 + info->queuenum; + if (maxid > 0xffff) { + pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); + return false; + } + return true; +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", @@ -39,10 +111,31 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 1, + .family = NFPROTO_IPV4, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg4_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), + .me = THIS_MODULE, + }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + { + .name = "NFQUEUE", + .revision = 1, + .family = NFPROTO_IPV6, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg6_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), + .me = THIS_MODULE, + }, +#endif }; static int __init nfqueue_tg_init(void) { + get_random_bytes(&jhash_initval, sizeof(jhash_initval)); return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } -- cgit v1.2.3 From 11eeef41d5f63c7d2f7fdfcc733eb7fb137cc384 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Mon, 8 Jun 2009 17:01:51 +0200 Subject: netfilter: passive OS fingerprint xtables match Passive OS fingerprinting netfilter module allows to passively detect remote OS and perform various netfilter actions based on that knowledge. This module compares some data (WS, MSS, options and it's order, ttl, df and others) from packets with SYN bit set with dynamically loaded OS fingerprints. Fingerprint matching rules can be downloaded from OpenBSD source tree or found in archive and loaded via netfilter netlink subsystem into the kernel via special util found in archive. Archive contains library file (also attached), which was shipped with iptables extensions some time ago (at least when ipt_osf existed in patch-o-matic). Following changes were made in this release: * added NLM_F_CREATE/NLM_F_EXCL checks * dropped _rcu list traversing helpers in the protected add/remove calls * dropped unneded structures, debug prints, obscure comment and check Fingerprints can be downloaded from http://www.openbsd.org/cgi-bin/cvsweb/src/etc/pf.os or can be found in archive Example usage: -d switch removes fingerprints Please consider for inclusion. Thank you. Passive OS fingerprint homepage (archives, examples): http://www.ioremap.net/projects/osf Signed-off-by: Evgeniy Polyakov Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/xt_osf.h | 133 +++++++++++ net/netfilter/Kconfig | 13 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_osf.c | 428 ++++++++++++++++++++++++++++++++++++ 6 files changed, 578 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/xt_osf.h create mode 100644 net/netfilter/xt_osf.c (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index af9d2fb97212..2aea50399c0b 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -33,6 +33,7 @@ header-y += xt_limit.h header-y += xt_mac.h header-y += xt_mark.h header-y += xt_multiport.h +header-y += xt_osf.h header-y += xt_owner.h header-y += xt_pkttype.h header-y += xt_quota.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 2214e5161461..bff4d5741d98 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -46,7 +46,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTNETLINK_EXP 2 #define NFNL_SUBSYS_QUEUE 3 #define NFNL_SUBSYS_ULOG 4 -#define NFNL_SUBSYS_COUNT 5 +#define NFNL_SUBSYS_OSF 5 +#define NFNL_SUBSYS_COUNT 6 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h new file mode 100644 index 000000000000..fd2272e0959a --- /dev/null +++ b/include/linux/netfilter/xt_osf.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XT_OSF_H +#define _XT_OSF_H + +#define MAXGENRELEN 32 + +#define XT_OSF_GENRE (1<<0) +#define XT_OSF_TTL (1<<1) +#define XT_OSF_LOG (1<<2) +#define XT_OSF_INVERT (1<<3) + +#define XT_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */ +#define XT_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */ +#define XT_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */ + +#define XT_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */ +#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */ +#define XT_OSF_TTL_NOCHECK 2 /* Do not compare ip and fingerprint TTL at all */ + +struct xt_osf_info { + char genre[MAXGENRELEN]; + __u32 len; + __u32 flags; + __u32 loglevel; + __u32 ttl; +}; + +/* + * Wildcard MSS (kind of). + * It is used to implement a state machine for the different wildcard values + * of the MSS and window sizes. + */ +struct xt_osf_wc { + __u32 wc; + __u32 val; +}; + +/* + * This struct represents IANA options + * http://www.iana.org/assignments/tcp-parameters + */ +struct xt_osf_opt { + __u16 kind, length; + struct xt_osf_wc wc; +}; + +struct xt_osf_user_finger { + struct xt_osf_wc wss; + + __u8 ttl, df; + __u16 ss, mss; + __u16 opt_num; + + char genre[MAXGENRELEN]; + char version[MAXGENRELEN]; + char subtype[MAXGENRELEN]; + + /* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */ + struct xt_osf_opt opt[MAX_IPOPTLEN]; +}; + +struct xt_osf_nlmsg { + struct xt_osf_user_finger f; + struct iphdr ip; + struct tcphdr tcp; +}; + +/* Defines for IANA option kinds */ + +enum iana_options { + OSFOPT_EOL = 0, /* End of options */ + OSFOPT_NOP, /* NOP */ + OSFOPT_MSS, /* Maximum segment size */ + OSFOPT_WSO, /* Window scale option */ + OSFOPT_SACKP, /* SACK permitted */ + OSFOPT_SACK, /* SACK */ + OSFOPT_ECHO, + OSFOPT_ECHOREPLY, + OSFOPT_TS, /* Timestamp option */ + OSFOPT_POCP, /* Partial Order Connection Permitted */ + OSFOPT_POSP, /* Partial Order Service Profile */ + + /* Others are not used in the current OSF */ + OSFOPT_EMPTY = 255, +}; + +/* + * Initial window size option state machine: multiple of mss, mtu or + * plain numeric value. Can also be made as plain numeric value which + * is not a multiple of specified value. + */ +enum xt_osf_window_size_options { + OSF_WSS_PLAIN = 0, + OSF_WSS_MSS, + OSF_WSS_MTU, + OSF_WSS_MODULO, + OSF_WSS_MAX, +}; + +/* + * Add/remove fingerprint from the kernel. + */ +enum xt_osf_msg_types { + OSF_MSG_ADD, + OSF_MSG_REMOVE, + OSF_MSG_MAX, +}; + +enum xt_osf_attr_type { + OSF_ATTR_UNSPEC, + OSF_ATTR_FINGER, + OSF_ATTR_MAX, +}; + +#endif /* _XT_OSF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index cb3ad741ebf8..79ba47f042c0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. +config NETFILTER_XT_MATCH_OSF + tristate '"osf" Passive OS fingerprint match' + depends on NETFILTER_ADVANCED && NETFILTER_NETLINK + help + This option selects the Passive OS Fingerprinting match module + that allows to passively match the remote operating system by + analyzing incoming TCP SYN packets. + + Rules and loading software can be downloaded from + http://www.ioremap.net/projects/osf + + To compile it as a module, choose M here. If unsure, say N. + endif # NETFILTER_XTABLES endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6282060fbda9..49f62ee4e9ff 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c new file mode 100644 index 000000000000..863e40977a4d --- /dev/null +++ b/net/netfilter/xt_osf.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +struct xt_osf_finger { + struct rcu_head rcu_head; + struct list_head finger_entry; + struct xt_osf_user_finger finger; +}; + +enum osf_fmatch_states { + /* Packet does not match the fingerprint */ + FMATCH_WRONG = 0, + /* Packet matches the fingerprint */ + FMATCH_OK, + /* Options do not match the fingerprint, but header does */ + FMATCH_OPT_WRONG, +}; + +/* + * Indexed by dont-fragment bit. + * It is the only constant value in the fingerprint. + */ +static struct list_head xt_osf_fingers[2]; + +static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { + [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, +}; + +static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) +{ + struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head); + + kfree(f); +} + +static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *kf = NULL, *sf; + int err = 0; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL); + if (!kf) + return -ENOMEM; + + memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger)); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + kfree(kf); + kf = NULL; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + err = -EEXIST; + break; + } + + /* + * We are protected by nfnl mutex. + */ + if (kf) + list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]); + + return err; +} + +static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *sf; + int err = ENOENT; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + /* + * We are protected by nfnl mutex. + */ + list_del_rcu(&sf->finger_entry); + call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); + + err = 0; + break; + } + + return err; +} + +static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = { + [OSF_MSG_ADD] = { + .call = xt_osf_add_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, + [OSF_MSG_REMOVE] = { + .call = xt_osf_remove_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, +}; + +static const struct nfnetlink_subsystem xt_osf_nfnetlink = { + .name = "osf", + .subsys_id = NFNL_SUBSYS_OSF, + .cb_count = OSF_MSG_MAX, + .cb = xt_osf_nfnetlink_callbacks, +}; + +static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info, + unsigned char f_ttl) +{ + const struct iphdr *ip = ip_hdr(skb); + + if (info->flags & XT_OSF_TTL) { + if (info->ttl == XT_OSF_TTL_TRUE) + return ip->ttl == f_ttl; + if (info->ttl == XT_OSF_TTL_NOCHECK) + return 1; + else if (ip->ttl <= f_ttl) + return 1; + else { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + int ret = 0; + + for_ifa(in_dev) { + if (inet_ifa_match(ip->saddr, ifa)) { + ret = (ip->ttl == f_ttl); + break; + } + } + endfor_ifa(in_dev); + + return ret; + } + } + + return ip->ttl == f_ttl; +} + +static bool xt_osf_match_packet(const struct sk_buff *skb, + const struct xt_match_param *p) +{ + const struct xt_osf_info *info = p->matchinfo; + const struct iphdr *ip = ip_hdr(skb); + const struct tcphdr *tcp; + struct tcphdr _tcph; + int fmatch = FMATCH_WRONG, fcount = 0; + unsigned int optsize = 0, check_WSS = 0; + u16 window, totlen, mss = 0; + bool df; + const unsigned char *optp = NULL, *_optp = NULL; + unsigned char opts[MAX_IPOPTLEN]; + const struct xt_osf_finger *kf; + const struct xt_osf_user_finger *f; + + if (!info) + return false; + + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + if (!tcp) + return false; + + if (!tcp->syn) + return false; + + totlen = ntohs(ip->tot_len); + df = ntohs(ip->frag_off) & IP_DF; + window = ntohs(tcp->window); + + if (tcp->doff * 4 > sizeof(struct tcphdr)) { + optsize = tcp->doff * 4 - sizeof(struct tcphdr); + + _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + + sizeof(struct tcphdr), optsize, opts); + } + + rcu_read_lock(); + list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + f = &kf->finger; + + if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) + continue; + + optp = _optp; + fmatch = FMATCH_WRONG; + + if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { + int foptsize, optnum; + + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; + + /* Check options */ + + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; + + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; + + check_WSS = f->wss.wc; + + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; + + fmatch = FMATCH_OK; + + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; + + mss = ntohs(mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } + + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; + + if (fmatch != FMATCH_OK) + break; + } + + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; + + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ +#define SMART_MSS_1 1460 +#define SMART_MSS_2 1448 + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; + } + } + + if (fmatch != FMATCH_OK) + continue; + + fcount++; + + if (info->flags & XT_OSF_LOG) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); + + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; + } + } + rcu_read_unlock(); + + if (!fcount && (info->flags & XT_OSF_LOG)) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "Remote OS is not known: %pi4:%u -> %pi4:%u\n", + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest)); + + if (fcount) + fmatch = FMATCH_OK; + + return fmatch == FMATCH_OK; +} + +static struct xt_match xt_osf_match = { + .name = "osf", + .revision = 0, + .family = NFPROTO_IPV4, + .proto = IPPROTO_TCP, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD), + .match = xt_osf_match_packet, + .matchsize = sizeof(struct xt_osf_info), + .me = THIS_MODULE, +}; + +static int __init xt_osf_init(void) +{ + int err = -EINVAL; + int i; + + for (i=0; ifinger_entry); + call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); + } + } + rcu_read_unlock(); + + rcu_barrier(); +} + +module_init(xt_osf_init); +module_exit(xt_osf_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_DESCRIPTION("Passive OS fingerprint matching."); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); -- cgit v1.2.3 From a31e1ffd2231b8fdf7eda9ed750a4a0df9bcad4e Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Tue, 9 Jun 2009 15:16:34 +0200 Subject: netfilter: xt_socket: added new revision of the 'socket' match supporting flags If the XT_SOCKET_TRANSPARENT flag is set, enabled 'transparent' socket option is required for the socket to be matched. Signed-off-by: Laszlo Attila Toth Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_socket.h | 12 +++++++ net/netfilter/xt_socket.c | 63 ++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 include/linux/netfilter/xt_socket.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h new file mode 100644 index 000000000000..6f475b8ff34b --- /dev/null +++ b/include/linux/netfilter/xt_socket.h @@ -0,0 +1,12 @@ +#ifndef _XT_SOCKET_H +#define _XT_SOCKET_H + +enum { + XT_SOCKET_TRANSPARENT = 1 << 0, +}; + +struct xt_socket_mtinfo1 { + __u8 flags; +}; + +#endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1acc089be7e9..ebf00ad5b194 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,6 +22,8 @@ #include #include +#include + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #define XT_SOCKET_HAVE_CONNTRACK 1 #include @@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb, static bool -socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +socket_match(const struct sk_buff *skb, const struct xt_match_param *par, + const struct xt_socket_mtinfo1 *info) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); + bool wildcard; + bool transparent = true; + + /* Ignore sockets listening on INADDR_ANY */ + wildcard = (sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->rcv_saddr == 0); + + /* Ignore non-transparent sockets, + if XT_SOCKET_TRANSPARENT is used */ + if (info && info->flags & XT_SOCKET_TRANSPARENT) + transparent = ((sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->transparent) || + (sk->sk_state == TCP_TIME_WAIT && + inet_twsk(sk)->tw_transparent)); nf_tproxy_put_sock(sk); - if (wildcard) + + if (wildcard || !transparent) sk = NULL; } @@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) return (sk != NULL); } -static struct xt_match socket_mt_reg __read_mostly = { - .name = "socket", - .family = AF_INET, - .match = socket_mt, - .hooks = 1 << NF_INET_PRE_ROUTING, - .me = THIS_MODULE, +static bool +socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, NULL); +} + +static bool +socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, par->matchinfo); +} + +static struct xt_match socket_mt_reg[] __read_mostly = { + { + .name = "socket", + .revision = 0, + .family = NFPROTO_IPV4, + .match = socket_mt_v0, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, + { + .name = "socket", + .revision = 1, + .family = NFPROTO_IPV4, + .match = socket_mt_v1, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, }; static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); - return xt_register_match(&socket_mt_reg); + return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } static void __exit socket_mt_exit(void) { - xt_unregister_match(&socket_mt_reg); + xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } module_init(socket_mt_init); -- cgit v1.2.3