From ff4cc3ac93e1d0369928fd60ec1fe82417afc576 Mon Sep 17 00:00:00 2001 From: Mike Kershaw Date: Thu, 1 Sep 2005 17:40:05 -0700 Subject: [TUNTAP]: Allow setting the linktype of the tap device from userspace Currently tun/tap only supports the EN10MB ARP type. For use with wireless and other networking types it should be possible to set the ARP type via an ioctl. Patch v2: Included check that the tap interface is down before changing the link type out from underneath it Signed-off-by: Mike Kershaw Signed-off-by: David S. Miller --- include/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 096a85a58ae5..88aef7b86ef4 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -77,6 +77,7 @@ struct tun_struct { #define TUNSETIFF _IOW('T', 202, int) #define TUNSETPERSIST _IOW('T', 203, int) #define TUNSETOWNER _IOW('T', 204, int) +#define TUNSETLINK _IOW('T', 205, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From 732db659b83579b922c18dee9123e1529b5fb5d2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 1 Sep 2005 17:40:26 -0700 Subject: [IPVS]: "extern inline" -> "static inline" "extern inline" doesn't make much sense. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7a3c43711a17..e426641c519f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -958,7 +958,7 @@ static __inline__ int ip_vs_todrop(void) */ #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) -extern __inline__ char ip_vs_fwd_tag(struct ip_vs_conn *cp) +static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) { char fwd; -- cgit v1.2.3 From 64baf3cfea974d2b9e671ccfdbc03e030ea5ebc6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:43:05 -0700 Subject: [CRYPTO]: Added CRYPTO_TFM_REQ_MAY_SLEEP flag The crypto layer currently uses in_atomic() to determine whether it is allowed to sleep. This is incorrect since spin locks don't always cause in_atomic() to return true. Instead of that, this patch returns to an earlier idea of a per-tfm flag which determines whether sleeping is allowed. Unlike the earlier version, the default is to not allow sleeping. This ensures that no existing code can break. As usual, this flag may either be set through crypto_alloc_tfm(), or just before a specific crypto operation. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 3 ++- crypto/cipher.c | 4 ---- crypto/internal.h | 3 ++- include/linux/crypto.h | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/crypto/api.c b/crypto/api.c index b4728811ce3b..959c4e5f264f 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -66,7 +66,8 @@ static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) { - tfm->crt_flags = 0; + tfm->crt_flags = flags & CRYPTO_TFM_REQ_MASK; + flags &= ~CRYPTO_TFM_REQ_MASK; switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: diff --git a/crypto/cipher.c b/crypto/cipher.c index 8da644364cb4..3df47f93c9db 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -377,11 +377,7 @@ static int nocrypt_iv(struct crypto_tfm *tfm, int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags) { u32 mode = flags & CRYPTO_TFM_MODE_MASK; - tfm->crt_cipher.cit_mode = mode ? mode : CRYPTO_TFM_MODE_ECB; - if (flags & CRYPTO_TFM_REQ_WEAK_KEY) - tfm->crt_flags = CRYPTO_TFM_REQ_WEAK_KEY; - return 0; } diff --git a/crypto/internal.h b/crypto/internal.h index 37515beafc8c..37aa652ce5ce 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -17,6 +17,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -38,7 +39,7 @@ static inline void crypto_kunmap(void *vaddr, int out) static inline void crypto_yield(struct crypto_tfm *tfm) { - if (!in_atomic()) + if (tfm->crt_flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); } diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5e2bcc636a02..3c89df6e7768 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -45,6 +45,7 @@ #define CRYPTO_TFM_MODE_CTR 0x00000008 #define CRYPTO_TFM_REQ_WEAK_KEY 0x00000100 +#define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 #define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 #define CRYPTO_TFM_RES_BAD_KEY_SCHED 0x00400000 -- cgit v1.2.3 From d80d99d643090c3cf2b1f9fb3fadd1256f7e384f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:48:23 -0700 Subject: [NET]: Add sk_stream_wmem_schedule This patch introduces sk_stream_wmem_schedule as a short-hand for the sk_forward_alloc checking on egress. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++---- net/ipv4/tcp.c | 3 +-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 312cb25cbd18..e51e626e9af1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -709,6 +709,12 @@ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) sk_stream_mem_schedule(sk, skb->truesize, 1); } +static inline int sk_stream_wmem_schedule(struct sock *sk, int size) +{ + return size <= sk->sk_forward_alloc || + sk_stream_mem_schedule(sk, size, 0); +} + /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it * from under us. It essentially blocks any incoming @@ -1203,8 +1209,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, skb = alloc_skb_fclone(size + hdr_len, gfp); if (skb) { skb->truesize += mem; - if (sk->sk_forward_alloc >= (int)skb->truesize || - sk_stream_mem_schedule(sk, skb->truesize, 0)) { + if (sk_stream_wmem_schedule(sk, skb->truesize)) { skb_reserve(skb, hdr_len); return skb; } @@ -1227,8 +1232,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) { struct page *page = NULL; - if (sk->sk_forward_alloc >= (int)PAGE_SIZE || - sk_stream_mem_schedule(sk, PAGE_SIZE, 0)) + if (sk_stream_wmem_schedule(sk, PAGE_SIZE)) page = alloc_pages(sk->sk_allocation, 0); else { sk->sk_prot->enter_memory_pressure(); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 02fdda68718d..854f6d0c4bb3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -552,8 +552,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (sk->sk_forward_alloc < copy && - !sk_stream_mem_schedule(sk, copy, 0)) + if (!sk_stream_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { -- cgit v1.2.3 From ef015786152adaff5a6a8bf0c8ea2f70cee8059d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:48:59 -0700 Subject: [TCP]: Fix sk_forward_alloc underflow in tcp_sendmsg I've finally found a potential cause of the sk_forward_alloc underflows that people have been reporting sporadically. When tcp_sendmsg tacks on extra bits to an existing TCP_PAGE we don't check sk_forward_alloc even though a large amount of time may have elapsed since we allocated the page. In the mean time someone could've come along and liberated packets and reclaimed sk_forward_alloc memory. This patch makes tcp_sendmsg check sk_forward_alloc every time as we do in do_tcp_sendpages. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++--- net/ipv4/tcp.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index e51e626e9af1..cf628261da52 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1232,9 +1232,8 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) { struct page *page = NULL; - if (sk_stream_wmem_schedule(sk, PAGE_SIZE)) - page = alloc_pages(sk->sk_allocation, 0); - else { + page = alloc_pages(sk->sk_allocation, 0); + if (!page) { sk->sk_prot->enter_memory_pressure(); sk_stream_moderate_sndbuf(sk); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 854f6d0c4bb3..cbcc9fc47783 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -769,19 +769,23 @@ new_segment: if (off == PAGE_SIZE) { put_page(page); TCP_PAGE(sk) = page = NULL; + TCP_OFF(sk) = off = 0; } - } + } else + BUG_ON(off); + + if (copy > PAGE_SIZE - off) + copy = PAGE_SIZE - off; + + if (!sk_stream_wmem_schedule(sk, copy)) + goto wait_for_memory; if (!page) { /* Allocate new cache page. */ if (!(page = sk_stream_alloc_page(sk))) goto wait_for_memory; - off = 0; } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; - /* Time to copy data. We are close to * the end! */ err = skb_copy_to_page(sk, from, skb, page, -- cgit v1.2.3 From 6475be16fd9b3c6746ca4d18959246b13c669ea8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Sep 2005 22:47:01 -0700 Subject: [TCP]: Keep TSO enabled even during loss events. All we need to do is resegment the queue so that we record SACK information accurately. The edges of the SACK blocks guide our resegmenting decisions. With help from Herbert Xu. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++----------- net/ipv4/tcp_output.c | 55 +++++++++++++++++++-------------------------------- 3 files changed, 45 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d6bcf1317a6a..97af77c4d096 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -454,6 +454,7 @@ extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); +extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int); extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1afb080bdf0c..29222b964951 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -923,14 +923,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int flag = 0; int i; - /* So, SACKs for already sent large segments will be lost. - * Not good, but alternative is to resegment the queue. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - tp->mss_cache = tp->mss_cache; - } - if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; @@ -978,20 +970,40 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_LOST; sk_stream_for_retrans_queue(skb, sk) { - u8 sacked = TCP_SKB_CB(skb)->sacked; - int in_sack; + int in_sack, pcount; + u8 sacked; /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ - if(!before(TCP_SKB_CB(skb)->seq, end_seq)) + if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; - fack_count += tcp_skb_pcount(skb); + pcount = tcp_skb_pcount(skb); + + if (pcount > 1 && + (after(start_seq, TCP_SKB_CB(skb)->seq) || + before(end_seq, TCP_SKB_CB(skb)->end_seq))) { + unsigned int pkt_len; + + if (after(start_seq, TCP_SKB_CB(skb)->seq)) + pkt_len = (start_seq - + TCP_SKB_CB(skb)->seq); + else + pkt_len = (end_seq - + TCP_SKB_CB(skb)->seq); + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) + break; + pcount = tcp_skb_pcount(skb); + } + + fack_count += pcount; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); + sacked = TCP_SKB_CB(skb)->sacked; + /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 75b68116682a..6094db5e11be 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -428,11 +428,11 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) +int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize; + int nsize, old_factor; u16 flags; nsize = skb_headlen(skb) - len; @@ -490,18 +490,29 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned tp->left_out -= tcp_skb_pcount(skb); } + old_factor = tcp_skb_pcount(skb); + /* Fix up tso_factor for both original and new SKB. */ tcp_set_skb_tso_segs(sk, skb, mss_now); tcp_set_skb_tso_segs(sk, buff, mss_now); - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(skb); - tp->left_out += tcp_skb_pcount(skb); - } + /* If this packet has been sent out already, we must + * adjust the various packet counters. + */ + if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { + int diff = old_factor - tcp_skb_pcount(skb) - + tcp_skb_pcount(buff); - if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out += tcp_skb_pcount(buff); - tp->left_out += tcp_skb_pcount(buff); + tp->packets_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + tp->lost_out -= diff; + tp->left_out -= diff; + } + if (diff > 0) { + tp->fackets_out -= diff; + if ((int)tp->fackets_out < 0) + tp->fackets_out = 0; + } } /* Link BUFF into the send queue. */ @@ -1350,12 +1361,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); - - if (sk->sk_route_caps & NETIF_F_TSO) { - sk->sk_route_caps &= ~NETIF_F_TSO; - sock_set_flag(sk, SOCK_NO_LARGESEND); - } - if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -1370,22 +1375,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -EAGAIN; if (skb->len > cur_mss) { - int old_factor = tcp_skb_pcount(skb); - int diff; - if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ - - /* New SKB created, account for it. */ - diff = old_factor - tcp_skb_pcount(skb) - - tcp_skb_pcount(skb->next); - tp->packets_out -= diff; - - if (diff > 0) { - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; - } } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -1993,12 +1984,6 @@ int tcp_write_wakeup(struct sock *sk) TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; - /* SWS override triggered forced fragmentation. - * Disable TSO, the connection is too sick. */ - if (sk->sk_route_caps & NETIF_F_TSO) { - sock_set_flag(sk, SOCK_NO_LARGESEND); - sk->sk_route_caps &= ~NETIF_F_TSO; - } } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); -- cgit v1.2.3