From 7c2330f1afe13b3a934140857bc8060d00103a89 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Sep 2013 18:08:02 -0700 Subject: regulator: fix fatal kernel-doc error Fix fatal kernel-doc error in : Error(include/linux/regulator/driver.h:52): cannot understand prototype: 'struct regulator_linear_range ' Signed-off-by: Randy Dunlap [Rewrote first line -- broonie] Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 67e13aa5a478..9bdad43ad228 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -40,6 +40,8 @@ enum regulator_status { }; /** + * struct regulator_linear_range - specify linear voltage ranges + * * Specify a range of voltages for regulator_map_linar_range() and * regulator_list_linear_range(). * -- cgit v1.2.3 From 5e130367d43ff22836bbae380d197d600fe8ddbb Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 13 Sep 2013 08:58:17 +0300 Subject: Bluetooth: Introduce a new HCI_RFKILLED flag This makes it more convenient to check for rfkill (no need to check for dev->rfkill before calling rfkill_blocked()) and also avoids potential races if the RFKILL state needs to be checked from within the rfkill callback. Signed-off-by: Johan Hedberg Cc: stable@vger.kernel.org Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_core.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index aaeaf0938ec0..15f10841e2b5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -104,6 +104,7 @@ enum { enum { HCI_SETUP, HCI_AUTO_OFF, + HCI_RFKILLED, HCI_MGMT, HCI_PAIRABLE, HCI_SERVICE_CACHE, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 634debab4d54..0d5bc246b607 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,7 @@ int hci_dev_open(__u16 dev) goto done; } - if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { + if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { ret = -ERFKILL; goto done; } @@ -1566,10 +1566,12 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (!blocked) - return 0; - - hci_dev_do_close(hdev); + if (blocked) { + set_bit(HCI_RFKILLED, &hdev->dev_flags); + hci_dev_do_close(hdev); + } else { + clear_bit(HCI_RFKILLED, &hdev->dev_flags); +} return 0; } @@ -2209,6 +2211,9 @@ int hci_register_dev(struct hci_dev *hdev) } } + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) + set_bit(HCI_RFKILLED, &hdev->dev_flags); + set_bit(HCI_SETUP, &hdev->dev_flags); if (hdev->dev_type != HCI_AMP) -- cgit v1.2.3 From c16526a7b99c1c28e9670a8c8e3dbcf741bb32be Mon Sep 17 00:00:00 2001 From: Simon Kirby Date: Sat, 10 Aug 2013 01:26:18 -0700 Subject: ipvs: fix overflow on dest weight multiply Schedulers such as lblc and lblcr require the weight to be as high as the maximum number of active connections. In commit b552f7e3a9524abcbcdf ("ipvs: unify the formula to estimate the overhead of processing connections"), the consideration of inactconns and activeconns was cleaned up to always count activeconns as 256 times more important than inactconns. In cases where 3000 or more connections are expected, a weight of 3000 * 256 * 3000 connections overflows the 32-bit signed result used to determine if rescheduling is required. On amd64, this merely changes the multiply and comparison instructions to 64-bit. On x86, a 64-bit result is already present from imull, so only a few more comparison instructions are emitted. Signed-off-by: Simon Kirby Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_lblc.c | 4 ++-- net/netfilter/ipvs/ip_vs_lblcr.c | 12 ++++++------ net/netfilter/ipvs/ip_vs_nq.c | 8 ++++---- net/netfilter/ipvs/ip_vs_sed.c | 8 ++++---- net/netfilter/ipvs/ip_vs_wlc.c | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f0d70f066f3d..fe782ed2fe72 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1649,7 +1649,7 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) /* CONFIG_IP_VS_NFCT */ #endif -static inline unsigned int +static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { /* diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1383b0eadc0e..eb814bf74e64 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -443,8 +443,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 5199448697f6..e65f7c573090 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -200,8 +200,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) continue; doh = ip_vs_dest_conn_overhead(dest); - if ((loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) + if (((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { least = dest; loh = doh; @@ -246,8 +246,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) dest = rcu_dereference_protected(e->dest, 1); doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ - if ((moh * atomic_read(&dest->weight) < - doh * atomic_read(&most->weight)) + if (((__s64)moh * atomic_read(&dest->weight) < + (__s64)doh * atomic_read(&most->weight)) && (atomic_read(&dest->weight) > 0)) { most = dest; moh = doh; @@ -611,8 +611,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index d8d9860934fe..961a6de9bb29 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -40,7 +40,7 @@ #include -static inline unsigned int +static inline int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { /* @@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; - unsigned int loh = 0, doh; + int loh = 0, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } if (!least || - (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight))) { + ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight))) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index a5284cc3d882..e446b9fa7424 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -44,7 +44,7 @@ #include -static inline unsigned int +static inline int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { /* @@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_sed_dest_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6dc1fa128840..b5b4650d50a9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); @@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } -- cgit v1.2.3 From bcbde4c0a7556cca72874c5e1efa4dccb5198a2b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 12 Sep 2013 11:21:07 +0300 Subject: ipvs: make the service replacement more robust commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added IP_VS_DEST_STATE_REMOVING flag and RCU callback named ip_vs_dest_wait_readers() to keep dests and services after removal for at least a RCU grace period. But we have the following corner cases: - we can not reuse the same dest if its service is removed while IP_VS_DEST_STATE_REMOVING is still set because another dest removal in the first grace period can not extend this period. It can happen when ipvsadm -C && ipvsadm -R is used. - dest->svc can be replaced but ip_vs_in_stats() and ip_vs_out_stats() have no explicit read memory barriers when accessing dest->svc. It can happen that dest->svc was just freed (replaced) while we use it to update the stats. We solve the problems as follows: - IP_VS_DEST_STATE_REMOVING is removed and we ensure a fixed idle period for the dest (IP_VS_DEST_TRASH_PERIOD). idle_start will remember when for first time after deletion we noticed dest->refcnt=0. Later, the connections can grab a reference while in RCU grace period but if refcnt becomes 0 we can safely free the dest and its svc. - dest->svc becomes RCU pointer. As result, we add explicit RCU locking in ip_vs_in_stats() and ip_vs_out_stats(). - __ip_vs_unbind_svc is renamed to __ip_vs_svc_put(), it now can free the service immediately or after a RCU grace period. dest->svc is not set to NULL anymore. As result, unlinked dests and their services are freed always after IP_VS_DEST_TRASH_PERIOD period, unused services are freed after a RCU grace period. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 7 +--- net/netfilter/ipvs/ip_vs_core.c | 12 +++++- net/netfilter/ipvs/ip_vs_ctl.c | 86 +++++++++++++++++------------------------ 3 files changed, 47 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe782ed2fe72..9c4d37ec45a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -723,8 +723,6 @@ struct ip_vs_dest_dst { struct rcu_head rcu_head; }; -/* In grace period after removing */ -#define IP_VS_DEST_STATE_REMOVING 0x01 /* * The real server destination forwarding entry * with ip address, port number, and so on. @@ -742,7 +740,7 @@ struct ip_vs_dest { atomic_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ - unsigned long state; /* state flags */ + unsigned long idle_start; /* start time, jiffies */ /* connection counters and thresholds */ atomic_t activeconns; /* active connections */ @@ -756,14 +754,13 @@ struct ip_vs_dest { struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ /* for virtual service */ - struct ip_vs_service *svc; /* service it belongs to */ + struct ip_vs_service __rcu *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ __be16 vport; /* virtual port number */ union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ struct list_head t_list; /* in dest_trash */ - struct rcu_head rcu_head; unsigned int in_rs_table:1; /* we are in rs_table */ }; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f69e83ff836..74fd00c27210 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.inpkts++; @@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; @@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.outpkts++; @@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8148e487386..a3df9bddc4f7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -460,7 +460,7 @@ static inline void __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) { atomic_inc(&svc->refcnt); - dest->svc = svc; + rcu_assign_pointer(dest->svc, svc); } static void ip_vs_service_free(struct ip_vs_service *svc) @@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc) kfree(svc); } -static void -__ip_vs_unbind_svc(struct ip_vs_dest *dest) +static void ip_vs_service_rcu_free(struct rcu_head *head) { - struct ip_vs_service *svc = dest->svc; + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} - dest->svc = NULL; +static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) +{ if (atomic_dec_and_test(&svc->refcnt)) { IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port)); - ip_vs_service_free(svc); + if (do_delay) + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); + else + ip_vs_service_free(svc); } } @@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - /* We can not reuse dest while in grace period - * because conns still can use dest->svc - */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -697,8 +699,10 @@ out: static void ip_vs_dest_free(struct ip_vs_dest *dest) { + struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); + __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); + __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); kfree(dest); } @@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_service *old_svc; struct ip_vs_scheduler *sched; int conn_flags; @@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, atomic_set(&dest->conn_flags, conn_flags); /* bind the service */ - if (!dest->svc) { + old_svc = rcu_dereference_protected(dest->svc, 1); + if (!old_svc) { __ip_vs_bind_svc(dest, svc); } else { - if (dest->svc != svc) { - __ip_vs_unbind_svc(dest); + if (old_svc != svc) { ip_vs_zero_stats(&dest->stats); __ip_vs_bind_svc(dest, svc); + __ip_vs_svc_put(old_svc, true); } } @@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } -static void ip_vs_dest_wait_readers(struct rcu_head *head) -{ - struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, - rcu_head); - - /* End of grace period after unlinking */ - clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); -} - - /* * Delete a destination (must be already unlinked from the service) */ @@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - if (!cleanup) { - set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); - call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); - } - spin_lock_bh(&ipvs->dest_trash_lock); IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); /* dest lives in trash without reference */ list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); ip_vs_dest_put(dest); } @@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data) struct net *net = (struct net *) data; struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_dest *dest, *next; + unsigned long now = jiffies; spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - /* Skip if dest is in grace period */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (atomic_read(&dest->refcnt) > 0) continue; + if (dest->idle_start) { + if (time_before(now, dest->idle_start + + IP_VS_DEST_TRASH_PERIOD)) + continue; + } else { + dest->idle_start = max(1UL, now); + continue; + } IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", dest->vfwmark, - IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); list_del(&dest->t_list); ip_vs_dest_free(dest); } if (!list_empty(&ipvs->dest_trash)) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); spin_unlock(&ipvs->dest_trash_lock); } @@ -1320,14 +1318,6 @@ out: return ret; } -static void ip_vs_service_rcu_free(struct rcu_head *head) -{ - struct ip_vs_service *svc; - - svc = container_of(head, struct ip_vs_service, rcu_head); - ip_vs_service_free(svc); -} - /* * Delete a service from the service list * - The service must be unlinked, unlocked and not referenced! @@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", - svc->fwmark, - IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port)); - call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); - } + __ip_vs_svc_put(svc, true); /* decrease the module use count */ ip_vs_use_count_dec(); -- cgit v1.2.3 From c26d436cbf7a9549ec1073480a2e3f0d3f64e02d Mon Sep 17 00:00:00 2001 From: Andre Naujoks Date: Fri, 13 Sep 2013 19:37:12 +0200 Subject: lib: introduce upper case hex ascii helpers To be able to use the hex ascii functions in case sensitive environments the array hex_asc_upper[] and the needed functions for hex_byte_pack_upper() are introduced. Signed-off-by: Andre Naujoks Signed-off-by: David S. Miller --- include/linux/kernel.h | 11 +++++++++++ lib/hexdump.c | 2 ++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 482ad2d84a32..672ddc4de4af 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -439,6 +439,17 @@ static inline char *hex_byte_pack(char *buf, u8 byte) return buf; } +extern const char hex_asc_upper[]; +#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)] +#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4] + +static inline char *hex_byte_pack_upper(char *buf, u8 byte) +{ + *buf++ = hex_asc_upper_hi(byte); + *buf++ = hex_asc_upper_lo(byte); + return buf; +} + static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) { return hex_byte_pack(buf, byte); diff --git a/lib/hexdump.c b/lib/hexdump.c index 3f0494c9d57a..8499c810909a 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -14,6 +14,8 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); +const char hex_asc_upper[] = "0123456789ABCDEF"; +EXPORT_SYMBOL(hex_asc_upper); /** * hex_to_bin - convert a hex digit to its real value -- cgit v1.2.3 From 47d06e532e95b71c0db3839ebdef3fe8812fca2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 10 Sep 2013 10:52:35 -0400 Subject: random: run random_int_secret_init() run after all late_initcalls The some platforms (e.g., ARM) initializes their clocks as late_initcalls for some unknown reason. So make sure random_int_secret_init() is run after all of the late_initcalls are run. Cc: stable@vger.kernel.org Signed-off-by: "Theodore Ts'o" --- drivers/char/random.c | 3 +-- include/linux/random.h | 1 + init/main.c | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/char/random.c b/drivers/char/random.c index 0d91fe52f3f5..92e6c67e1ae6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1462,12 +1462,11 @@ struct ctl_table random_table[] = { static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init random_int_secret_init(void) +int random_int_secret_init(void) { get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but diff --git a/include/linux/random.h b/include/linux/random.h index 3b9377d6b7a5..6312dd9ba449 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); +extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/init/main.c b/init/main.c index d03d2ec2eacf..586cd3359c02 100644 --- a/init/main.c +++ b/init/main.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -778,6 +779,7 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); + random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) -- cgit v1.2.3 From 9fe34f5d920b183ec063550e0f4ec854aa373316 Mon Sep 17 00:00:00 2001 From: Noel Burton-Krahn Date: Wed, 18 Sep 2013 12:24:40 -0700 Subject: mrp: add periodictimer to allow retries when packets get lost MRP doesn't implement the periodictimer in 802.1Q, so it never retries if packets get lost. I ran into this problem when MRP sent a MVRP JoinIn before the interface was fully up. The JoinIn was lost, MRP didn't retry, and MVRP registration failed. Tested against Juniper QFabric switches Signed-off-by: Noel Burton-Krahn Acked-by: David Ward Signed-off-by: David S. Miller --- include/net/mrp.h | 1 + net/802/mrp.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/mrp.h b/include/net/mrp.h index 4fbf02aa2ec1..0f7558b638ae 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -112,6 +112,7 @@ struct mrp_applicant { struct mrp_application *app; struct net_device *dev; struct timer_list join_timer; + struct timer_list periodic_timer; spinlock_t lock; struct sk_buff_head queue; diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d80b07b..3ed616215870 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -24,6 +24,11 @@ static unsigned int mrp_join_time __read_mostly = 200; module_param(mrp_join_time, uint, 0644); MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); + +static unsigned int mrp_periodic_time __read_mostly = 1000; +module_param(mrp_periodic_time, uint, 0644); +MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); + MODULE_LICENSE("GPL"); static const u8 @@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) mrp_join_timer_arm(app); } +static void mrp_periodic_timer_arm(struct mrp_applicant *app) +{ + mod_timer(&app->periodic_timer, + jiffies + msecs_to_jiffies(mrp_periodic_time)); +} + +static void mrp_periodic_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_periodic_timer_arm(app); +} + static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) { __be16 endmark; @@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); mrp_join_timer_arm(app); + setup_timer(&app->periodic_timer, mrp_periodic_timer, + (unsigned long)app); + mrp_periodic_timer_arm(app); return 0; err3: @@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + del_timer_sync(&app->periodic_timer); spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); -- cgit v1.2.3 From 82aeef0bf03684b377678c00c05e613f30dca39c Mon Sep 17 00:00:00 2001 From: "Li, Zhen-Hua" Date: Fri, 13 Sep 2013 14:27:32 +0800 Subject: x86/iommu: correct ICS register offset According to Intel Vt-D specs, the offset of Invalidation complete status register should be 0x9C, not 0x98. See Intel's VT-d spec, Revision 1.3, Chapter 10.4, Page 98; Signed-off-by: Li, Zhen-Hua Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 78e2ada50cd5..d380c5e68008 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -55,7 +55,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) -- cgit v1.2.3 From 19872d20c890073c5207d9e02bb8f14d451a11eb Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 9 Sep 2013 18:33:54 +0200 Subject: HID: uhid: allocate static minor udev has this nice feature of creating "dead" /dev/ device-nodes if it finds a devnode: modalias. Once the node is accessed, the kernel automatically loads the module that provides the node. However, this requires udev to know the major:minor code to use for the node. This feature was introduced by: commit 578454ff7eab61d13a26b568f99a89a2c9edc881 Author: Kay Sievers Date: Thu May 20 18:07:20 2010 +0200 driver core: add devname module aliases to allow module on-demand auto-loading However, uhid uses dynamic minor numbers so this doesn't actually work. We need to load uhid to know which minor it's going to use. Hence, allocate a static minor (just like uinput does) and we're good to go. Reported-by: Tom Gundersen Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/uhid.c | 3 ++- include/linux/miscdevice.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 5bf2fb785844..93b00d76374c 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -615,7 +615,7 @@ static const struct file_operations uhid_fops = { static struct miscdevice uhid_misc = { .fops = &uhid_fops, - .minor = MISC_DYNAMIC_MINOR, + .minor = UHID_MINOR, .name = UHID_NAME, }; @@ -634,4 +634,5 @@ module_exit(uhid_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Herrmann "); MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem"); +MODULE_ALIAS_MISCDEV(UHID_MINOR); MODULE_ALIAS("devname:" UHID_NAME); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 09c2300ddb37..cb358355ef43 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -45,6 +45,7 @@ #define MAPPER_CTRL_MINOR 236 #define LOOP_CTRL_MINOR 237 #define VHOST_NET_MINOR 238 +#define UHID_MINOR 239 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From 5bc2afc2b53fc73f154e6344cd898585628e6d27 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 Sep 2013 18:01:28 -0400 Subject: NFSv4: Honour the 'opened' parameter in the atomic_open() filesystem method Determine if we've created a new file by examining the directory change attribute and/or the O_EXCL flag. This fixes a regression when doing a non-exclusive create of a new file. If the FILE_CREATED flag is not set, the atomic_open() command will perform full file access permissions checks instead of just checking for MAY_OPEN. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/nfs4file.c | 3 ++- fs/nfs/nfs4proc.c | 26 +++++++++++++++++++------- include/linux/nfs_xdr.h | 3 ++- 4 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 854a8f05a610..02b0df769e2d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1458,7 +1458,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, trace_nfs_atomic_open_enter(dir, ctx, open_flags); nfs_block_sillyrename(dentry->d_parent); - inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { err = PTR_ERR(inode); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e5b804dd944c..77efaf15ec90 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct inode *dir; unsigned openflags = filp->f_flags; struct iattr attr; + int opened = 0; int err; /* @@ -55,7 +56,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_wb_all(inode); } - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); if (IS_ERR(inode)) { err = PTR_ERR(inode); switch (err) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 989bb9d3074d..488ef9b5c51a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -912,6 +912,7 @@ struct nfs4_opendata { struct iattr attrs; unsigned long timestamp; unsigned int rpc_done : 1; + unsigned int file_created : 1; unsigned int is_recover : 1; int rpc_status; int cancelled; @@ -1946,8 +1947,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) + if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_EXCL) + data->file_created = 1; + else if (o_res->cinfo.before != o_res->cinfo.after) + data->file_created = 1; + } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -2191,7 +2197,8 @@ static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, + int *opened) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -2261,6 +2268,8 @@ static int _nfs4_do_open(struct inode *dir, nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } } + if (opendata->file_created) + *opened |= FILE_CREATED; if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) *ctx_th = opendata->f_attr.mdsthreshold; @@ -2289,7 +2298,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, + int *opened) { struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; @@ -2297,7 +2307,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label); + status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); res = ctx->state; trace_nfs4_open_file(ctx, flags, status); if (status == 0) @@ -2659,7 +2669,8 @@ out: } static struct inode * -nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + int open_flags, struct iattr *attr, int *opened) { struct nfs4_state *state; struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; @@ -2667,7 +2678,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx, open_flags, attr, label); + state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened); nfs4_label_release_security(label); @@ -3332,6 +3343,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_label l, *ilabel = NULL; struct nfs_open_context *ctx; struct nfs4_state *state; + int opened = 0; int status = 0; ctx = alloc_nfs_open_context(dentry, FMODE_READ); @@ -3341,7 +3353,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); + state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 01fd84b566f7..49f52c8f4422 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1455,7 +1455,8 @@ struct nfs_rpc_ops { struct inode * (*open_context) (struct inode *dir, struct nfs_open_context *ctx, int open_flags, - struct iattr *iattr); + struct iattr *iattr, + int *); int (*have_delegation)(struct inode *, fmode_t); int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); -- cgit v1.2.3 From 2bedea8f26c92e2610f2f67889144990749461e0 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 25 Sep 2013 12:11:02 +0200 Subject: bcma: make bcma_core_pci_{up,down}() callable from atomic context This patch removes the bcma_core_pci_power_save() call from the bcma_core_pci_{up,down}() functions as it tries to schedule thus requiring to call them from non-atomic context. The function bcma_core_pci_power_save() is now exported so the calling module can explicitly use it in non-atomic context. This fixes the 'scheduling while atomic' issue reported by Tod Jackson and Joe Perches. [ 13.210710] BUG: scheduling while atomic: dhcpcd/1800/0x00000202 [ 13.210718] Modules linked in: brcmsmac nouveau coretemp kvm_intel kvm cordic brcmutil bcma dell_wmi atl1c ttm mxm_wmi wmi [ 13.210756] CPU: 2 PID: 1800 Comm: dhcpcd Not tainted 3.11.0-wl #1 [ 13.210762] Hardware name: Alienware M11x R2/M11x R2, BIOS A04 11/23/2010 [ 13.210767] ffff880177c92c40 ffff880170fd1948 ffffffff8169af5b 0000000000000007 [ 13.210777] ffff880170fd1ab0 ffff880170fd1958 ffffffff81697ee2 ffff880170fd19d8 [ 13.210785] ffffffff816a19f5 00000000000f4240 000000000000d080 ffff880170fd1fd8 [ 13.210794] Call Trace: [ 13.210813] [] dump_stack+0x4f/0x84 [ 13.210826] [] __schedule_bug+0x43/0x51 [ 13.210837] [] __schedule+0x6e5/0x810 [ 13.210845] [] schedule+0x24/0x70 [ 13.210855] [] schedule_hrtimeout_range_clock+0x10c/0x150 [ 13.210867] [] ? update_rmtp+0x60/0x60 [ 13.210877] [] ? hrtimer_start_range_ns+0xf/0x20 [ 13.210887] [] schedule_hrtimeout_range+0xe/0x10 [ 13.210897] [] usleep_range+0x3b/0x40 [ 13.210910] [] bcma_pcie_mdio_set_phy.isra.3+0x4f/0x80 [bcma] [ 13.210921] [] bcma_pcie_mdio_write.isra.4+0xbf/0xd0 [bcma] [ 13.210932] [] bcma_pcie_mdio_writeread.isra.6.constprop.13+0x18/0x30 [bcma] [ 13.210942] [] bcma_core_pci_power_save+0x3e/0x80 [bcma] [ 13.210953] [] bcma_core_pci_up+0x2d/0x60 [bcma] [ 13.210975] [] brcms_c_up+0xfc/0x430 [brcmsmac] [ 13.210989] [] brcms_up+0x1d/0x20 [brcmsmac] [ 13.211003] [] brcms_ops_start+0x298/0x340 [brcmsmac] [ 13.211020] [] ? cfg80211_netdev_notifier_call+0xd2/0x5f0 [ 13.211030] [] ? packet_notifier+0xad/0x1d0 [ 13.211064] [] ieee80211_do_open+0x325/0xf80 [ 13.211076] [] ? __raw_notifier_call_chain+0x9/0x10 [ 13.211086] [] ieee80211_open+0x71/0x80 [ 13.211101] [] __dev_open+0x87/0xe0 [ 13.211109] [] __dev_change_flags+0x9c/0x180 [ 13.211117] [] dev_change_flags+0x23/0x70 [ 13.211127] [] devinet_ioctl+0x5b8/0x6a0 [ 13.211136] [] inet_ioctl+0x75/0x90 [ 13.211147] [] sock_do_ioctl+0x2b/0x70 [ 13.211155] [] sock_ioctl+0x71/0x2a0 [ 13.211169] [] do_vfs_ioctl+0x87/0x520 [ 13.211180] [] ? ____fput+0x9/0x10 [ 13.211198] [] ? task_work_run+0x9c/0xd0 [ 13.211202] [] SyS_ioctl+0x91/0xb0 [ 13.211208] [] system_call_fastpath+0x16/0x1b [ 13.211217] NOHZ: local_softirq_pending 202 The issue was introduced in v3.11 kernel by following commit: commit aa51e598d04c6acf5477934cd6383f5a17ce9029 Author: Hauke Mehrtens Date: Sat Aug 24 00:32:31 2013 +0200 brcmsmac: use bcma PCIe up and down functions replace the calls to bcma_core_pci_extend_L1timer() by calls to the newly introduced bcma_core_pci_ip() and bcma_core_pci_down() Signed-off-by: Hauke Mehrtens Cc: Arend van Spriel Signed-off-by: John W. Linville This fix has been discussed with Hauke Mehrtens [1] selection option 3) and is intended for v3.12. Ref: [1] http://mid.gmane.org/5239B12D.3040206@hauke-m.de Cc: # 3.11.x Cc: Tod Jackson Cc: Joe Perches Cc: Rafal Milecki Cc: Hauke Mehrtens Reviewed-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/bcma/driver_pci.c | 49 +++++++++++++++++++----------------- include/linux/bcma/bcma_driver_pci.h | 1 + 2 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index c9fd6943ce45..50329d1057ed 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -210,25 +210,6 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc) } } -static void bcma_core_pci_power_save(struct bcma_drv_pci *pc, bool up) -{ - u16 data; - - if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) { - data = up ? 0x74 : 0x7C; - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64); - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); - } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) { - data = up ? 0x75 : 0x7D; - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65); - bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, - BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); - } -} - /************************************************** * Init. **************************************************/ @@ -255,6 +236,32 @@ void bcma_core_pci_init(struct bcma_drv_pci *pc) bcma_core_pci_clientmode_init(pc); } +void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) +{ + struct bcma_drv_pci *pc; + u16 data; + + if (bus->hosttype != BCMA_HOSTTYPE_PCI) + return; + + pc = &bus->drv_pci[0]; + + if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) { + data = up ? 0x74 : 0x7C; + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64); + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); + } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) { + data = up ? 0x75 : 0x7D; + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65); + bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1, + BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data); + } +} +EXPORT_SYMBOL_GPL(bcma_core_pci_power_save); + int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable) { @@ -310,8 +317,6 @@ void bcma_core_pci_up(struct bcma_bus *bus) pc = &bus->drv_pci[0]; - bcma_core_pci_power_save(pc, true); - bcma_core_pci_extend_L1timer(pc, true); } EXPORT_SYMBOL_GPL(bcma_core_pci_up); @@ -326,7 +331,5 @@ void bcma_core_pci_down(struct bcma_bus *bus) pc = &bus->drv_pci[0]; bcma_core_pci_extend_L1timer(pc, false); - - bcma_core_pci_power_save(pc, false); } EXPORT_SYMBOL_GPL(bcma_core_pci_down); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index d66033f418c9..0333e605ea0d 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -242,6 +242,7 @@ extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_up(struct bcma_bus *bus); extern void bcma_core_pci_down(struct bcma_bus *bus); +extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); -- cgit v1.2.3 From 60e453a940ac678565b6641d65f8c18541bb9f28 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 23 Sep 2013 20:59:35 +0800 Subject: USBNET: fix handling padding packet Commit 638c5115a7949(USBNET: support DMA SG) introduces DMA SG if the usb host controller is capable of building packet from discontinuous buffers, but missed handling padding packet when building DMA SG. This patch attachs the pre-allocated padding packet at the end of the sg list, so padding packet can be sent to device if drivers require that. Reported-by: David Laight Acked-by: Oliver Neukum Signed-off-by: Ming Lei Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 27 +++++++++++++++++++++------ include/linux/usb/usbnet.h | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 7b331e613e02..bf94e10a37c8 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1241,7 +1241,9 @@ static int build_dma_sg(const struct sk_buff *skb, struct urb *urb) if (num_sgs == 1) return 0; - urb->sg = kmalloc(num_sgs * sizeof(struct scatterlist), GFP_ATOMIC); + /* reserve one for zero packet */ + urb->sg = kmalloc((num_sgs + 1) * sizeof(struct scatterlist), + GFP_ATOMIC); if (!urb->sg) return -ENOMEM; @@ -1305,7 +1307,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, if (build_dma_sg(skb, urb) < 0) goto drop; } - entry->length = length = urb->transfer_buffer_length; + length = urb->transfer_buffer_length; /* don't assume the hardware handles USB_ZERO_PACKET * NOTE: strictly conforming cdc-ether devices should expect @@ -1317,15 +1319,18 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, if (length % dev->maxpacket == 0) { if (!(info->flags & FLAG_SEND_ZLP)) { if (!(info->flags & FLAG_MULTI_PACKET)) { - urb->transfer_buffer_length++; - if (skb_tailroom(skb)) { + length++; + if (skb_tailroom(skb) && !urb->num_sgs) { skb->data[skb->len] = 0; __skb_put(skb, 1); - } + } else if (urb->num_sgs) + sg_set_buf(&urb->sg[urb->num_sgs++], + dev->padding_pkt, 1); } } else urb->transfer_flags |= URB_ZERO_PACKET; } + entry->length = urb->transfer_buffer_length = length; spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); @@ -1509,6 +1514,7 @@ void usbnet_disconnect (struct usb_interface *intf) usb_kill_urb(dev->interrupt); usb_free_urb(dev->interrupt); + kfree(dev->padding_pkt); free_netdev(net); } @@ -1679,9 +1685,16 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) /* initialize max rx_qlen and tx_qlen */ usbnet_update_max_qlen(dev); + if (dev->can_dma_sg && !(info->flags & FLAG_SEND_ZLP) && + !(info->flags & FLAG_MULTI_PACKET)) { + dev->padding_pkt = kzalloc(1, GFP_KERNEL); + if (!dev->padding_pkt) + goto out4; + } + status = register_netdev (net); if (status) - goto out4; + goto out5; netif_info(dev, probe, dev->net, "register '%s' at usb-%s-%s, %s, %pM\n", udev->dev.driver->name, @@ -1699,6 +1712,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) return 0; +out5: + kfree(dev->padding_pkt); out4: usb_free_urb(dev->interrupt); out3: diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9cb2fe8ca944..e303eef94dd5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -42,6 +42,7 @@ struct usbnet { struct usb_host_endpoint *status; unsigned maxpacket; struct timer_list delay; + const char *padding_pkt; /* protocol/interface state */ struct net_device *net; -- cgit v1.2.3 From 7df37ff33dc122f7bd0614d707939fe84322d264 Mon Sep 17 00:00:00 2001 From: "Catalin\\(ux\\) M. BOIE" Date: Mon, 23 Sep 2013 23:04:19 +0300 Subject: IPv6 NAT: Do not drop DNATed 6to4/6rd packets When a router is doing DNAT for 6to4/6rd packets the latest anti-spoofing commit 218774dc ("ipv6: add anti-spoofing checks for 6to4 and 6rd") will drop them because the IPv6 address embedded does not match the IPv4 destination. This patch will allow them to pass by testing if we have an address that matches on 6to4/6rd interface. I have been hit by this problem using Fedora and IPV6TO4_IPV4ADDR. Also, log the dropped packets (with rate limit). Signed-off-by: Catalin(ux) M. BOIE Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 +++ net/ipv6/addrconf.c | 27 ++++++++++++++++ net/ipv6/sit.c | 84 +++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 100 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index fb314de2b61b..86505bfa5d2c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -67,6 +67,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); #endif +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, + struct net_device *dev); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff12617f36..a0c3abe72461 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb96db34..afd5605aea7c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } -- cgit v1.2.3 From 50624c934db18ab90aaea4908f60dd39aab4e6e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Sep 2013 21:19:49 -0700 Subject: net: Delay default_device_exit_batch until no devices are unregistering v2 There is currently serialization network namespaces exiting and network devices exiting as the final part of netdev_run_todo does not happen under the rtnl_lock. This is compounded by the fact that the only list of devices unregistering in netdev_run_todo is local to the netdev_run_todo. This lack of serialization in extreme cases results in network devices unregistering in netdev_run_todo after the loopback device of their network namespace has been freed (making dst_ifdown unsafe), and after the their network namespace has exited (making the NETDEV_UNREGISTER, and NETDEV_UNREGISTER_FINAL callbacks unsafe). Add the missing serialization by a per network namespace count of how many network devices are unregistering and having a wait queue that is woken up whenever the count is decreased. The count and wait queue allow default_device_exit_batch to wait until all of the unregistration activity for a network namespace has finished before proceeding to unregister the loopback device and then allowing the network namespace to exit. Only a single global wait queue is used because there is a single global lock, and there is a single waiter, per network namespace wait queues would be a waste of resources. The per network namespace count of unregistering devices gives a progress guarantee because the number of network devices unregistering in an exiting network namespace must ultimately drop to zero (assuming network device unregistration completes). The basic logic remains the same as in v1. This patch is now half comment and half rtnl_lock_unregistering an expanded version of wait_event performs no extra work in the common case where no network devices are unregistering when we get to default_device_exit_batch. Reported-by: Francesco Ruggeri Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 + net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1313456a0994..9d22f08896c6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -74,6 +74,7 @@ struct net { struct hlist_head *dev_index_head; unsigned int dev_base_seq; /* protected by rtnl_mutex */ int ifindex; + unsigned int dev_unreg_count; /* core fib_rules */ struct list_head rules_ops; diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..65f829cfd928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); +static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); + dev_net(dev)->dev_unreg_count++; } static void rollback_registered_many(struct list_head *head) @@ -5918,6 +5920,12 @@ void netdev_run_todo(void) if (dev->destructor) dev->destructor(dev); + /* Report a network device has been unregistered */ + rtnl_lock(); + dev_net(dev)->dev_unreg_count--; + __rtnl_unlock(); + wake_up(&netdev_unregistering_wq); + /* Free network device */ kobject_put(&dev->dev.kobj); } @@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } +static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) +{ + /* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace in net_list. + */ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); - rtnl_lock(); + /* To prevent network device cleanup code from dereferencing + * loopback devices or network devices that have been freed + * wait here for all pending unregistrations to complete, + * before unregistring the loopback device and allowing the + * network namespace be freed. + * + * The netdev todo list containing all network devices + * unregistrations that happen in default_device_exit_batch + * will run in the rtnl_unlock() at the end of + * default_device_exit_batch. + */ + rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops) -- cgit v1.2.3 From 9a3bab6b05383f1e4c3716b3615500c51285959e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2013 06:19:57 -0700 Subject: net: net_secret should not depend on TCP A host might need net_secret[] and never open a single socket. Problem added in commit aebda156a570782 ("net: defer net_secret[] initialization") Based on prior patch from Hannes Frederic Sowa. Reported-by: Hannes Frederic Sowa Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/secure_seq.h | 1 - net/core/secure_seq.c | 27 ++++++++++++++++++++++++--- net/ipv4/af_inet.c | 4 +--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 6ca975bebd37..c2e542b27a5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,7 +3,6 @@ #include -extern void net_secret_init(void); extern __u32 secure_ip_id(__be32 daddr); extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..3f1ec1586ae1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,11 +10,24 @@ #include -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } #ifdef CONFIG_INET @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b7b8fd..cfeb85cff4f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,10 +263,8 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } } EXPORT_SYMBOL(build_ehash_secret); -- cgit v1.2.3 From f4a87e7bd2eaef26a3ca25437ce8b807de2966ad Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 30 Sep 2013 08:51:46 +0100 Subject: netfilter: synproxy: fix BUG_ON triggered by corrupt TCP packets TCP packets hitting the SYN proxy through the SYNPROXY target are not validated by TCP conntrack. When th->doff is below 5, an underflow happens when calculating the options length, causing skb_header_pointer() to return NULL and triggering the BUG_ON(). Handle this case gracefully by checking for NULL instead of using BUG_ON(). Reported-by: Martin Topholm Tested-by: Martin Topholm Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_synproxy.h | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 10 +++++++--- net/ipv6/netfilter/ip6t_SYNPROXY.c | 10 +++++++--- net/netfilter/nf_synproxy_core.c | 12 +++++++----- 4 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 806f54a290d6..f572f313d6f1 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -56,7 +56,7 @@ struct synproxy_options { struct tcphdr; struct xt_synproxy_info; -extern void synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, +extern bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts); extern unsigned int synproxy_options_size(const struct synproxy_options *opts); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 67e17dcda65e..b6346bf2fde3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8dbcaa..2748b042da72 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6fd967c6278c..cdf4567ba9b3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -24,7 +24,7 @@ int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); -void +bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts) { @@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, u8 buf[40], *ptr; ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); - BUG_ON(ptr == NULL); + if (ptr == NULL) + return false; opts->options = 0; while (length > 0) { @@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_EOL: - return; + return true; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2) - return; + return true; if (opsize > length) - return; + return true; switch (opcode) { case TCPOPT_MSS: @@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, length -= opsize; } } + return true; } EXPORT_SYMBOL_GPL(synproxy_parse_options); -- cgit v1.2.3 From 559835ea7292e2f09304d81eda16f4209433245e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 24 Sep 2013 10:25:40 -0700 Subject: vxlan: Use RCU apis to access sk_user_data. Use of RCU api makes vxlan code easier to understand. It also fixes bug due to missing ACCESS_ONCE() on sk_user_data dereference. In rare case without ACCESS_ONCE() compiler might omit vs on sk_user_data dereference. Compiler can use vs as alias for sk->sk_user_data, resulting in multiple sk_user_data dereference in rcu read context which could change. CC: Jesse Gross Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 9 +++------ include/net/sock.h | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d1292fe746bc..2ef5b6219f3f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -952,8 +952,7 @@ void vxlan_sock_release(struct vxlan_sock *vs) spin_lock(&vn->sock_lock); hlist_del_rcu(&vs->hlist); - smp_wmb(); - vs->sock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(vs->sock->sk, NULL); vxlan_notify_del_rx_port(sk); spin_unlock(&vn->sock_lock); @@ -1048,8 +1047,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) port = inet_sk(sk)->inet_sport; - smp_read_barrier_depends(); - vs = (struct vxlan_sock *)sk->sk_user_data; + vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; @@ -2302,8 +2300,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, atomic_set(&vs->refcnt, 1); vs->rcv = rcv; vs->data = data; - smp_wmb(); - vs->sock->sk->sk_user_data = vs; + rcu_assign_sk_user_data(vs->sock->sk, vs); spin_lock(&vn->sock_lock); hlist_add_head_rcu(&vs->hlist, vs_head(net, port)); diff --git a/include/net/sock.h b/include/net/sock.h index 6ba2e7b0e2b1..1d37a8086bed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -409,6 +409,11 @@ struct sock { void (*sk_destruct)(struct sock *sk); }; +#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) + +#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) +#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) + /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK * or not whether his port will be reused by someone else. SK_FORCE_REUSE -- cgit v1.2.3 From 2a156a6b52f45355d999b58b745eef93021cc81a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 30 Sep 2013 13:45:13 -0700 Subject: include/asm-generic/vtime.h: avoid zero-length file patch(1) can't handle zero-length files - it appears to simply not create the file, so my powerpc build fails. Put something in here to make life easier. Cc: Hugh Dickins Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/vtime.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h index e69de29bb2d1..b1a49677fe25 100644 --- a/include/asm-generic/vtime.h +++ b/include/asm-generic/vtime.h @@ -0,0 +1 @@ +/* no content, but patch(1) dislikes empty files */ -- cgit v1.2.3 From 117aad1e9e4d97448d1df3f84b08bd65811e6d6a Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Mon, 30 Sep 2013 13:45:16 -0700 Subject: mm: avoid reinserting isolated balloon pages into LRU lists Isolated balloon pages can wrongly end up in LRU lists when migrate_pages() finishes its round without draining all the isolated page list. The same issue can happen when reclaim_clean_pages_from_list() tries to reclaim pages from an isolated page list, before migration, in the CMA path. Such balloon page leak opens a race window against LRU lists shrinkers that leads us to the following kernel panic: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] shrink_page_list+0x24e/0x897 PGD 3cda2067 PUD 3d713067 PMD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 340 Comm: kswapd0 Not tainted 3.12.0-rc1-22626-g4367597 #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: shrink_page_list+0x24e/0x897 RSP: 0000:ffff88003da499b8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88003e82bd60 RCX: 00000000000657d5 RDX: 0000000000000000 RSI: 000000000000031f RDI: ffff88003e82bd40 RBP: ffff88003da49ab0 R08: 0000000000000001 R09: 0000000081121a45 R10: ffffffff81121a45 R11: ffff88003c4a9a28 R12: ffff88003e82bd40 R13: ffff88003da0e800 R14: 0000000000000001 R15: ffff88003da49d58 FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000067d9000 CR3: 000000003ace5000 CR4: 00000000000407b0 Call Trace: shrink_inactive_list+0x240/0x3de shrink_lruvec+0x3e0/0x566 __shrink_zone+0x94/0x178 shrink_zone+0x3a/0x82 balance_pgdat+0x32a/0x4c2 kswapd+0x2f0/0x372 kthread+0xa2/0xaa ret_from_fork+0x7c/0xb0 Code: 80 7d 8f 01 48 83 95 68 ff ff ff 00 4c 89 e7 e8 5a 7b 00 00 48 85 c0 49 89 c5 75 08 80 7d 8f 00 74 3e eb 31 48 8b 80 18 01 00 00 <48> 8b 74 0d 48 8b 78 30 be 02 00 00 00 ff d2 eb RIP [] shrink_page_list+0x24e/0x897 RSP CR2: 0000000000000028 ---[ end trace 703d2451af6ffbfd ]--- Kernel panic - not syncing: Fatal exception This patch fixes the issue, by assuring the proper tests are made at putback_movable_pages() & reclaim_clean_pages_from_list() to avoid isolated balloon pages being wrongly reinserted in LRU lists. [akpm@linux-foundation.org: clarify awkward comment text] Signed-off-by: Rafael Aquini Reported-by: Luiz Capitulino Tested-by: Luiz Capitulino Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/balloon_compaction.h | 25 +++++++++++++++++++++++++ mm/migrate.c | 2 +- mm/vmscan.c | 4 +++- 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index f7f1d7169b11..089743ade734 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -158,6 +158,26 @@ static inline bool balloon_page_movable(struct page *page) return false; } +/* + * isolated_balloon_page - identify an isolated balloon page on private + * compaction/migration page lists. + * + * After a compaction thread isolates a balloon page for migration, it raises + * the page refcount to prevent concurrent compaction threads from re-isolating + * the same page. For that reason putback_movable_pages(), or other routines + * that need to identify isolated balloon pages on private pagelists, cannot + * rely on balloon_page_movable() to accomplish the task. + */ +static inline bool isolated_balloon_page(struct page *page) +{ + /* Already isolated balloon pages, by default, have a raised refcount */ + if (page_flags_cleared(page) && !page_mapped(page) && + page_count(page) >= 2) + return __is_movable_balloon_page(page); + + return false; +} + /* * balloon_page_insert - insert a page into the balloon's page list and make * the page->mapping assignment accordingly. @@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page) return false; } +static inline bool isolated_balloon_page(struct page *page) +{ + return false; +} + static inline bool balloon_page_isolate(struct page *page) { return false; diff --git a/mm/migrate.c b/mm/migrate.c index 9c8d5f59d30b..a26bccd44ccb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l) list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - if (unlikely(balloon_page_movable(page))) + if (unlikely(isolated_balloon_page(page))) balloon_page_putback(page); else putback_lru_page(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index beb35778c69f..53f2f82f83ae 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -48,6 +48,7 @@ #include #include +#include #include "internal.h" @@ -1113,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_cache(page) && !PageDirty(page)) { + if (page_is_file_cache(page) && !PageDirty(page) && + !isolated_balloon_page(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } -- cgit v1.2.3 From 45906723578f768d029ba7774cd97b435f2c5125 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 30 Sep 2013 14:16:41 +0200 Subject: skbuff: size of hole is wrong in a comment Since commit c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves"), hole size is one bit less than what is written in the comment. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ddb48d9312c..c2d89335f637 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -498,7 +498,7 @@ struct sk_buff { * headers if needed */ __u8 encapsulation:1; - /* 7/9 bit hole (depending on ndisc_nodetype presence) */ + /* 6/8 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL -- cgit v1.2.3 From 26794942461f438a6bc725ec7294b08a6bd782c4 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 2 Oct 2013 14:25:09 -0400 Subject: mm: Fix generic hugetlb pte check return type. The include/asm-generic/hugetlb.h stubs that just vector huge_pte_*() calls to the pte_*() implementations won't work in certain situations. x86 and sparc, for example, return "unsigned long" from the bit checks, and just go "return pte_val(pte) & PTE_BIT_FOO;" But since huge_pte_*() returns 'int', if any high bits on 64-bit are relevant, they get chopped off. The net effect is that we can loop forever trying to COW a huge page, because the huge_pte_write() check signals false all the time. Reported-by: Gurudas Pai Tested-by: Gurudas Pai Signed-off-by: David S. Miller Acked-by: David Rientjes --- include/asm-generic/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index d06079c774a0..99b490b4d05a 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -6,12 +6,12 @@ static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) return mk_pte(page, pgprot); } -static inline int huge_pte_write(pte_t pte) +static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); } -static inline int huge_pte_dirty(pte_t pte) +static inline unsigned long huge_pte_dirty(pte_t pte) { return pte_dirty(pte); } -- cgit v1.2.3 From 9886167d20c0720dcfb01e62cdff4d906b226f43 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Oct 2013 16:02:23 +0200 Subject: perf: Fix perf_pmu_migrate_context While auditing the list_entry usage due to a trinity bug I found that perf_pmu_migrate_context violates the rules for perf_event::event_entry. The problem is that perf_event::event_entry is a RCU list element, and hence we must wait for a full RCU grace period before re-using the element after deletion. Therefore the usage in perf_pmu_migrate_context() which re-uses the entry immediately is broken. For now introduce another list_head into perf_event for this specific usage. This doesn't actually fix the trinity report because that never goes through this code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-mkj72lxagw1z8fvjm648iznw@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 +++++++++++++++++++++++- kernel/events/core.c | 6 +++--- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 866e85c5eb94..c8ba627c1d60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -294,9 +294,31 @@ struct ring_buffer; */ struct perf_event { #ifdef CONFIG_PERF_EVENTS - struct list_head group_entry; + /* + * entry onto perf_event_context::event_list; + * modifications require ctx->lock + * RCU safe iterations. + */ struct list_head event_entry; + + /* + * XXX: group_entry and sibling_list should be mutually exclusive; + * either you're a sibling on a group, or you're the group leader. + * Rework the code to always use the same list element. + * + * Locked for modification by both ctx->mutex and ctx->lock; holding + * either sufficies for read. + */ + struct list_head group_entry; struct list_head sibling_list; + + /* + * We need storage to track the entries in perf_pmu_migrate_context; we + * cannot use the event_entry because of RCU and we want to keep the + * group in tact which avoids us using the other two entries. + */ + struct list_head migrate_entry; + struct hlist_node hlist_entry; int nr_siblings; int group_flags; diff --git a/kernel/events/core.c b/kernel/events/core.c index cb4238e85b38..d49a9d29334c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7234,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) perf_remove_from_context(event); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); - list_add(&event->event_entry, &events); + list_add(&event->migrate_entry, &events); } mutex_unlock(&src_ctx->mutex); synchronize_rcu(); mutex_lock(&dst_ctx->mutex); - list_for_each_entry_safe(event, tmp, &events, event_entry) { - list_del(&event->event_entry); + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; account_event_cpu(event, dst_cpu); -- cgit v1.2.3 From d623a0e19dcbc4e44a8db047158815d7f8c2b839 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Oct 2013 10:22:01 -0700 Subject: ARM: dts: Fix pinctrl mask for omap3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wake-up interrupt bit is available on omap3/4/5 processors unlike what we claim. Without fixing it we cannot use it on omap3 and the system configured for wake-up events will just hang on wake-up. Cc: Grygorii Strashko Cc: Benoît Cousson Cc: devicetree@vger.kernel.org Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3.dtsi | 4 ++-- arch/arm/mach-omap2/mux.h | 4 +--- include/dt-bindings/pinctrl/omap.h | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 7d95cda1fae4..b41bd57f4328 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -108,7 +108,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; omap3_pmx_wkup: pinmux@0x48002a00 { @@ -117,7 +117,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; gpio1: gpio@48310000 { diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 5d2080ef7923..16f78a990d04 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -28,7 +28,7 @@ #define OMAP_PULL_UP (1 << 4) #define OMAP_ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define OMAP_INPUT_EN (1 << 8) #define OMAP_OFF_EN (1 << 9) #define OMAP_OFFOUT_EN (1 << 10) @@ -36,8 +36,6 @@ #define OMAP_OFF_PULL_EN (1 << 12) #define OMAP_OFF_PULL_UP (1 << 13) #define OMAP_WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define OMAP_WAKEUP_EVENT (1 << 15) /* Active pin states */ diff --git a/include/dt-bindings/pinctrl/omap.h b/include/dt-bindings/pinctrl/omap.h index edbd250809cb..bed35e36fd27 100644 --- a/include/dt-bindings/pinctrl/omap.h +++ b/include/dt-bindings/pinctrl/omap.h @@ -23,7 +23,7 @@ #define PULL_UP (1 << 4) #define ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define INPUT_EN (1 << 8) #define OFF_EN (1 << 9) #define OFFOUT_EN (1 << 10) @@ -31,8 +31,6 @@ #define OFF_PULL_EN (1 << 12) #define OFF_PULL_UP (1 << 13) #define WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define WAKEUP_EVENT (1 << 15) /* Active pin states */ -- cgit v1.2.3 From c1868b822515313c72445e70b7d9e47d8815bc52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Sep 2013 16:35:25 +0300 Subject: mlx5: Remove checksum on command interface commands Checksum calculations consume CPU resources and can be significant to the rate of resource creation/destruction. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 26 ++++++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 21 +++++---------------- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/driver.h | 4 +--- 4 files changed, 21 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5472cbd34028..db3a6584939f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -180,28 +180,32 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token) +static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, + int csum) { block->token = token; - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); + if (csum) { + block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - + sizeof(block->data) - 2); + block->sig = ~xor8_buf(block, sizeof(*block) - 1); + } } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token) +static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) { struct mlx5_cmd_mailbox *next = msg->next; while (next) { - calc_block_sig(next->buf, token); + calc_block_sig(next->buf, token, csum); next = next->next; } } -static void set_signature(struct mlx5_cmd_work_ent *ent) +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token); - calc_chain_sig(ent->out, ent->token); + calc_chain_sig(ent->in, ent->token, csum); + calc_chain_sig(ent->out, ent->token, csum); } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -539,8 +543,7 @@ static void cmd_work_handler(struct work_struct *work) lay->type = MLX5_PCI_CMD_XPORT; lay->token = ent->token; lay->status_own = CMD_OWNER_HW; - if (!cmd->checksum_disabled) - set_signature(ent); + set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ktime_get_ts(&ent->ts1); @@ -773,8 +776,6 @@ static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); block = next->buf; - if (xor8_buf(block, sizeof(*block)) != 0xff) - return -EINVAL; memcpy(to, block->data, copy); to += copy; @@ -1361,6 +1362,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_map; } + cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b47739b0b5f6..bc0f5fb66e24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -165,9 +165,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL; struct mlx5_cmd_query_hca_cap_mbox_in query_ctx; struct mlx5_cmd_set_hca_cap_mbox_out set_out; - struct mlx5_profile *prof = dev->profile; u64 flags; - int csum = 1; int err; memset(&query_ctx, 0, sizeof(query_ctx)); @@ -197,20 +195,14 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) memcpy(&set_ctx->hca_cap, &query_out->hca_cap, sizeof(set_ctx->hca_cap)); - if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) { - csum = !!prof->cmdif_csum; - flags = be64_to_cpu(set_ctx->hca_cap.flags); - if (csum) - flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - else - flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - - set_ctx->hca_cap.flags = cpu_to_be64(flags); - } - if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; + flags = be64_to_cpu(query_out->hca_cap.flags); + /* disable checksum */ + flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + + set_ctx->hca_cap.flags = cpu_to_be64(flags); memset(&set_out, 0, sizeof(set_out)); set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12); set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP); @@ -225,9 +217,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) if (err) goto query_ex; - if (!csum) - dev->cmd.checksum_disabled = 1; - query_ex: kfree(query_out); kfree(set_ctx); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 68029b30c3dc..770e3b448b3b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -181,7 +181,7 @@ enum { MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, - MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8888381fc150..2cfc4309d45f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -747,8 +747,7 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, }; enum { @@ -758,7 +757,6 @@ enum { struct mlx5_profile { u64 mask; u32 log_max_qp; - int cmdif_csum; struct { int size; int limit; -- cgit v1.2.3 From 2f6daec14d02deb84e7896a93196d78fbe9956a2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Sep 2013 16:35:29 +0300 Subject: mlx5: Fix layout of struct mlx5_init_seg The layout of struct health_buffer was not according to firmware specification. Fix it to comply. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 770e3b448b3b..5eb4e31af22b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -417,7 +417,7 @@ struct mlx5_init_seg { struct health_buffer health; __be32 rsvd2[884]; __be32 health_counter; - __be32 rsvd3[1023]; + __be32 rsvd3[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; -- cgit v1.2.3 From ada9f5d007971a71d619e2abf66ebd3a9a399413 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Sep 2013 16:35:34 +0300 Subject: IB/mlx5: Fix eq names to display nicely in /proc/interrupts It's helpful for a driver to put the pci slot name in its interrupt names, so /proc/interrupts will show the pci slot of the device. Signed-off-by: Sagi Grimberg Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +++- include/linux/mlx5/driver.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b267c65261c0..b1a6cb3a2809 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) static int alloc_comp_eqs(struct mlx5_ib_dev *dev) { struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; struct mlx5_eq *eq, *n; int ncomp_vec; int nent; @@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev) goto clean; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(&dev->mdev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - eq->name, - &dev->mdev.priv.uuari.uars[0]); + name, &dev->mdev.priv.uuari.uars[0]); if (err) { kfree(eq); goto clean; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 443cc4d7b024..2231d93cc7ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -366,9 +366,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } + snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + name, pci_name(dev->pdev)); eq->eqn = out.eq_number; err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - name, eq); + eq->name, eq); if (err) goto err_eq; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2cfc4309d45f..6b8c496572c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -82,7 +82,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 20 + MLX5_MAX_EQ_NAME = 32 }; enum { -- cgit v1.2.3 From 61875f30daf60305712e25b209ef41ced2635bad Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 21 Sep 2013 13:58:22 -0400 Subject: random: allow architectures to optionally define random_get_entropy() Allow architectures which have a disabled get_cycles() function to provide a random_get_entropy() function which provides a fine-grained, rapidly changing counter that can be used by the /dev/random driver. For example, an architecture might have a rapidly changing register used to control random TLB cache eviction, or DRAM refresh that doesn't meet the requirements of get_cycles(), but which is good enough for the needs of the random driver. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 8 ++++---- include/linux/timex.h | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/char/random.c b/drivers/char/random.c index 92e6c67e1ae6..2d5daf9b58e9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -643,7 +643,7 @@ struct timer_rand_state { */ void add_device_randomness(const void *buf, unsigned int size) { - unsigned long time = get_cycles() ^ jiffies; + unsigned long time = random_get_entropy() ^ jiffies; mix_pool_bytes(&input_pool, buf, size, NULL); mix_pool_bytes(&input_pool, &time, sizeof(time), NULL); @@ -680,7 +680,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) goto out; sample.jiffies = jiffies; - sample.cycles = get_cycles(); + sample.cycles = random_get_entropy(); sample.num = num; mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); @@ -747,7 +747,7 @@ void add_interrupt_randomness(int irq, int irq_flags) struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; - __u32 input[4], cycles = get_cycles(); + __u32 input[4], cycles = random_get_entropy(); input[0] = cycles ^ jiffies; input[1] = irq; @@ -1485,7 +1485,7 @@ unsigned int get_random_int(void) hash = get_cpu_var(get_random_int_hash); - hash[0] += current->pid + jiffies + get_cycles(); + hash[0] += current->pid + jiffies + random_get_entropy(); md5_transform(hash, random_int_secret); ret = hash[0]; put_cpu_var(get_random_int_hash); diff --git a/include/linux/timex.h b/include/linux/timex.h index b3726e61368e..da4c32dbb2aa 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -64,6 +64,20 @@ #include +#ifndef random_get_entropy +/* + * The random_get_entropy() function is used by the /dev/random driver + * in order to extract entropy via the relative unpredictability of + * when an interrupt takes places versus a high speed, fine-grained + * timing source or cycle counter. Since it will be occurred on every + * single interrupt, it must have a very low cost/overhead. + * + * By default we use get_cycles() for this purpose, but individual + * architectures may override this in their asm/timex.h header file. + */ +#define random_get_entropy() get_cycles() +#endif + /* * SHIFT_PLL is used as a dampening factor to define how much we * adjust the frequency correction for a given offset in PLL mode. -- cgit v1.2.3 From ebff5fa9d545574324095d9c6a3cb80c9157abc5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Oct 2013 15:12:04 +1000 Subject: Revert "i915: Update VGA arbiter support for newer devices" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 81b5c7bc8de3e6f63419139c2fc91bf81dea8a7d. Adding drm/i915 into the vga arbiter chain means that X (in a piece of well-meant paranoia) will do a get/put on the vga decoding around _every_ accel call down into the ddx. Which results in some nice performance disasters [1]. This really breaks userspace, by disabling DRI for everyone, and stops OpenGL from working, this isn't limited to just the i915 but both the integrated and discrete GPUs on multi-gpu systems, in other words this causes untold worlds of pain, Ville tried to come up with a Great Hack to fiddle the required VGA I/O ops behind everyone's back using stop_machine, but that didn't really work out [2]. Given that we're fairly late in the -rc stage for such games let's just revert this all. One thing we might want to keep is to delay the disabling of the vga decoding until the fbdev emulation and the fbcon screen is set up. If we kill vga mem decoding beforehand fbcon can end up with a white square in the top-left corner it tried to save from the vga memory for a seamless transition. And we have bug reports on older platforms which seem to match these symptoms. But again that's something to play around with in -next. References: [1] http://lists.x.org/archives/xorg-devel/2013-September/037763.html References: [2] http://www.spinics.net/lists/intel-gfx/msg34062.html Cc: Alex Williamson Cc: Ville Syrjälä Cc: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 9 +++------ drivers/gpu/drm/i915/intel_display.c | 25 ------------------------- include/linux/vgaarb.h | 7 ------- 3 files changed, 3 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 52f5ad8037cc..d5c784d48671 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1290,12 +1290,9 @@ static int i915_load_modeset_init(struct drm_device *dev) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - if (!HAS_PCH_SPLIT(dev)) { - ret = vga_client_register(dev->pdev, dev, NULL, - i915_vga_set_decode); - if (ret && ret != -ENODEV) - goto out; - } + ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode); + if (ret && ret != -ENODEV) + goto out; intel_register_dsm_handler(); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aaea3ec811ed..581fb4b2f766 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10038,15 +10038,6 @@ static void i915_disable_vga(struct drm_device *dev) outb(SR01, VGA_SR_INDEX); sr1 = inb(VGA_SR_DATA); outb(sr1 | 1<<5, VGA_SR_DATA); - - /* Disable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - outb(inb(VGA_MSR_READ) & ~VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - } - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); @@ -10054,20 +10045,6 @@ static void i915_disable_vga(struct drm_device *dev) POSTING_READ(vga_reg); } -static void i915_enable_vga(struct drm_device *dev) -{ - /* Enable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ) | VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - } -} - void intel_modeset_init_hw(struct drm_device *dev) { intel_init_power_well(dev); @@ -10559,8 +10536,6 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_disable_fbc(dev); - i915_enable_vga(dev); - intel_disable_gt_powersave(dev); ironlake_teardown_rc6(dev); diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 80cf8173a65b..2c02f3a8d2ba 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -65,15 +65,8 @@ struct pci_dev; * out of the arbitration process (and can be safe to take * interrupts at any time. */ -#if defined(CONFIG_VGA_ARB) extern void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); -#else -static inline void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes) -{ -} -#endif /** * vga_get - acquire & locks VGA resources -- cgit v1.2.3 From 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 10:16:30 +0200 Subject: compiler/gcc4: Add quirk for 'asm goto' miscompilation bug Fengguang Wu, Oleg Nesterov and Peter Zijlstra tracked down a kernel crash to a GCC bug: GCC miscompiles certain 'asm goto' constructs, as outlined here: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 Implement a workaround suggested by Jakub Jelinek. Reported-and-tested-by: Fengguang Wu Reported-by: Oleg Nesterov Reported-by: Peter Zijlstra Suggested-by: Jakub Jelinek Reviewed-by: Richard Henderson Cc: Linus Torvalds Cc: Andrew Morton Cc: Signed-off-by: Ingo Molnar --- arch/arm/include/asm/jump_label.h | 2 +- arch/mips/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/jump_label.h | 2 +- arch/s390/include/asm/jump_label.h | 2 +- arch/sparc/include/asm/jump_label.h | 2 +- arch/x86/include/asm/cpufeature.h | 6 +++--- arch/x86/include/asm/jump_label.h | 2 +- arch/x86/include/asm/mutex_64.h | 4 ++-- include/linux/compiler-gcc4.h | 15 +++++++++++++++ 9 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index bfc198c75913..863c892b4aaa 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -16,7 +16,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" JUMP_LABEL_NOP "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 4d6d77ed9b9d..e194f957ca8c 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\tnop\n\t" + asm_volatile_goto("1:\tnop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index ae098c438f00..f016bb699b5f 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -19,7 +19,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 6c32190dc73e..346b1c85ffb4 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -15,7 +15,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 5080d16a832f..ec2e2e2aba7d 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -9,7 +9,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d8..89270b4318db 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) * Catch too early usage of this before alternatives * have run. */ - asm goto("1: jmp %l[t_warn]\n" + asm_volatile_goto("1: jmp %l[t_warn]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif - asm goto("1: jmp %l[t_no]\n" + asm_volatile_goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) * have. Thus, we force the jump to the widest, 4-byte, signed relative * offset even though the last would often fit in less bytes. */ - asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800c..6a2cefb4395a 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:" + asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648ed..07537a44216e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -20,7 +20,7 @@ static inline void __mutex_fastpath_lock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " decl %0\n" + asm_volatile_goto(LOCK_PREFIX " decl %0\n" " jns %l[exit]\n" : : "m" (v->counter) : "memory", "cc" @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) static inline void __mutex_fastpath_unlock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " incl %0\n" + asm_volatile_goto(LOCK_PREFIX " incl %0\n" " jg %l[exit]\n" : : "m" (v->counter) : "memory", "cc" diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 842de225055f..ded429966c1f 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -65,6 +65,21 @@ #define __visible __attribute__((externally_visible)) #endif +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#if GCC_VERSION <= 40801 +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) +#else +# define asm_volatile_goto(x...) do { asm goto(x); } while (0) +#endif #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if GCC_VERSION >= 40400 -- cgit v1.2.3 From 1931ee143b0ab72924944bc06e363d837ba05063 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 11 Oct 2013 09:27:28 +0200 Subject: Revert "drivers: of: add initialization code for dma reserved memory" This reverts commit 9d8eab7af79cb4ce2de5de39f82c455b1f796963. There is still no consensus on the bindings for the reserved memory and various drawbacks of the proposed solution has been shown, so the best now is to revert it completely and start again from scratch later. Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely --- Documentation/devicetree/bindings/memory.txt | 168 -------------------------- drivers/of/Kconfig | 6 - drivers/of/Makefile | 1 - drivers/of/of_reserved_mem.c | 173 --------------------------- drivers/of/platform.c | 4 - include/linux/of_reserved_mem.h | 14 --- 6 files changed, 366 deletions(-) delete mode 100644 Documentation/devicetree/bindings/memory.txt delete mode 100644 drivers/of/of_reserved_mem.c delete mode 100644 include/linux/of_reserved_mem.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt deleted file mode 100644 index eb2469365593..000000000000 --- a/Documentation/devicetree/bindings/memory.txt +++ /dev/null @@ -1,168 +0,0 @@ -*** Memory binding *** - -The /memory node provides basic information about the address and size -of the physical memory. This node is usually filled or updated by the -bootloader, depending on the actual memory configuration of the given -hardware. - -The memory layout is described by the following node: - -/ { - #address-cells = <(n)>; - #size-cells = <(m)>; - memory { - device_type = "memory"; - reg = <(baseaddr1) (size1) - (baseaddr2) (size2) - ... - (baseaddrN) (sizeN)>; - }; - ... -}; - -A memory node follows the typical device tree rules for "reg" property: -n: number of cells used to store base address value -m: number of cells used to store size value -baseaddrX: defines a base address of the defined memory bank -sizeX: the size of the defined memory bank - - -More than one memory bank can be defined. - - -*** Reserved memory regions *** - -In /memory/reserved-memory node one can create child nodes describing -particular reserved (excluded from normal use) memory regions. Such -memory regions are usually designed for the special usage by various -device drivers. A good example are contiguous memory allocations or -memory sharing with other operating system on the same hardware board. -Those special memory regions might depend on the board configuration and -devices used on the target system. - -Parameters for each memory region can be encoded into the device tree -with the following convention: - -[(label):] (name) { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <(address) (size)>; - (linux,default-contiguous-region); -}; - -compatible: one or more of: - - "linux,contiguous-memory-region" - enables binding of this - region to Contiguous Memory Allocator (special region for - contiguous memory allocations, shared with movable system - memory, Linux kernel-specific). - - "reserved-memory-region" - compatibility is defined, given - region is assigned for exclusive usage for by the respective - devices. - -reg: standard property defining the base address and size of - the memory region - -linux,default-contiguous-region: property indicating that the region - is the default region for all contiguous memory - allocations, Linux specific (optional) - -It is optional to specify the base address, so if one wants to use -autoconfiguration of the base address, '0' can be specified as a base -address in the 'reg' property. - -The /memory/reserved-memory node must contain the same #address-cells -and #size-cells value as the root node. - - -*** Device node's properties *** - -Once regions in the /memory/reserved-memory node have been defined, they -may be referenced by other device nodes. Bindings that wish to reference -memory regions should explicitly document their use of the following -property: - -memory-region = <&phandle_to_defined_region>; - -This property indicates that the device driver should use the memory -region pointed by the given phandle. - - -*** Example *** - -This example defines a memory consisting of 4 memory banks. 3 contiguous -regions are defined for Linux kernel, one default of all device drivers -(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the -framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB) -and one for multimedia processing (labelled multimedia_mem, placed at -0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000 -device for DMA memory allocations (Linux kernel drivers will use CMA is -available or dma-exclusive usage otherwise). 'multimedia_mem' is -assigned to scaler@12500000 and codec@12600000 devices for contiguous -memory allocations when CMA driver is enabled. - -The reason for creating a separate region for framebuffer device is to -match the framebuffer base address to the one configured by bootloader, -so once Linux kernel drivers starts no glitches on the displayed boot -logo appears. Scaller and codec drivers should share the memory -allocations. - -/ { - #address-cells = <1>; - #size-cells = <1>; - - /* ... */ - - memory { - reg = <0x40000000 0x10000000 - 0x50000000 0x10000000 - 0x60000000 0x10000000 - 0x70000000 0x10000000>; - - reserved-memory { - #address-cells = <1>; - #size-cells = <1>; - - /* - * global autoconfigured region for contiguous allocations - * (used only with Contiguous Memory Allocator) - */ - contig_region@0 { - compatible = "linux,contiguous-memory-region"; - reg = <0x0 0x4000000>; - linux,default-contiguous-region; - }; - - /* - * special region for framebuffer - */ - display_region: region@78000000 { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <0x78000000 0x800000>; - }; - - /* - * special region for multimedia processing devices - */ - multimedia_region: region@77000000 { - compatible = "linux,contiguous-memory-region"; - reg = <0x77000000 0x4000000>; - }; - }; - }; - - /* ... */ - - fb0: fb@12300000 { - status = "okay"; - memory-region = <&display_region>; - }; - - scaler: scaler@12500000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; - - codec: codec@12600000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; -}; diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 9d2009a9004d..78cc76053328 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -74,10 +74,4 @@ config OF_MTD depends on MTD def_bool y -config OF_RESERVED_MEM - depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK)) - def_bool y - help - Initialization code for DMA reserved memory - endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index ed9660adad77..efd05102c405 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -9,4 +9,3 @@ obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o -obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c deleted file mode 100644 index 0fe40c7d6904..000000000000 --- a/drivers/of/of_reserved_mem.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Device tree based initialization code for reserved memory. - * - * Copyright (c) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * Author: Marek Szyprowski - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_RESERVED_REGIONS 16 -struct reserved_mem { - phys_addr_t base; - unsigned long size; - struct cma *cma; - char name[32]; -}; -static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; -static int reserved_mem_count; - -static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname, - int depth, void *data) -{ - struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; - phys_addr_t base, size; - int is_cma, is_reserved; - unsigned long len; - const char *status; - __be32 *prop; - - is_cma = IS_ENABLED(CONFIG_DMA_CMA) && - of_flat_dt_is_compatible(node, "linux,contiguous-memory-region"); - is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region"); - - if (!is_reserved && !is_cma) { - /* ignore node and scan next one */ - return 0; - } - - status = of_get_flat_dt_prop(node, "status", &len); - if (status && strcmp(status, "okay") != 0) { - /* ignore disabled node nad scan next one */ - return 0; - } - - prop = of_get_flat_dt_prop(node, "reg", &len); - if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) * - sizeof(__be32))) { - pr_err("Reserved mem: node %s, incorrect \"reg\" property\n", - uname); - /* ignore node and scan next one */ - return 0; - } - base = dt_mem_next_cell(dt_root_addr_cells, &prop); - size = dt_mem_next_cell(dt_root_size_cells, &prop); - - if (!size) { - /* ignore node and scan next one */ - return 0; - } - - pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n", - uname, (unsigned long)base, (unsigned long)size / SZ_1M); - - if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) - return -ENOSPC; - - rmem->base = base; - rmem->size = size; - strlcpy(rmem->name, uname, sizeof(rmem->name)); - - if (is_cma) { - struct cma *cma; - if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) { - rmem->cma = cma; - reserved_mem_count++; - if (of_get_flat_dt_prop(node, - "linux,default-contiguous-region", - NULL)) - dma_contiguous_set_default(cma); - } - } else if (is_reserved) { - if (memblock_remove(base, size) == 0) - reserved_mem_count++; - else - pr_err("Failed to reserve memory for %s\n", uname); - } - - return 0; -} - -static struct reserved_mem *get_dma_memory_region(struct device *dev) -{ - struct device_node *node; - const char *name; - int i; - - node = of_parse_phandle(dev->of_node, "memory-region", 0); - if (!node) - return NULL; - - name = kbasename(node->full_name); - for (i = 0; i < reserved_mem_count; i++) - if (strcmp(name, reserved_mem[i].name) == 0) - return &reserved_mem[i]; - return NULL; -} - -/** - * of_reserved_mem_device_init() - assign reserved memory region to given device - * - * This function assign memory region pointed by "memory-region" device tree - * property to the given device. - */ -void of_reserved_mem_device_init(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region) - return; - - if (region->cma) { - dev_set_cma_area(dev, region->cma); - pr_info("Assigned CMA %s to %s device\n", region->name, - dev_name(dev)); - } else { - if (dma_declare_coherent_memory(dev, region->base, region->base, - region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0) - pr_info("Declared reserved memory %s to %s device\n", - region->name, dev_name(dev)); - } -} - -/** - * of_reserved_mem_device_release() - release reserved memory device structures - * - * This function releases structures allocated for memory region handling for - * the given device. - */ -void of_reserved_mem_device_release(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region && !region->cma) - dma_release_declared_memory(dev); -} - -/** - * early_init_dt_scan_reserved_mem() - create reserved memory regions - * - * This function grabs memory from early allocator for device exclusive use - * defined in device tree structures. It should be called by arch specific code - * once the early allocator (memblock) has been activated and all other - * subsystems have already allocated/reserved memory. - */ -void __init early_init_dt_scan_reserved_mem(void) -{ - of_scan_flat_dt_by_path("/memory/reserved-memory", - fdt_scan_reserved_mem, NULL); -} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 9b439ac63d8e..f6dcde220821 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -21,7 +21,6 @@ #include #include #include -#include #include const struct of_device_id of_default_bus_match_table[] = { @@ -219,8 +218,6 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; - of_reserved_mem_device_init(&dev->dev); - /* We do not fill the DMA ops for platform devices by default. * This is currently the responsibility of the platform code * to do such, possibly using a device notifier @@ -228,7 +225,6 @@ static struct platform_device *of_platform_device_create_pdata( if (of_device_add(dev) != 0) { platform_device_put(dev); - of_reserved_mem_device_release(&dev->dev); return NULL; } diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h deleted file mode 100644 index c84128255814..000000000000 --- a/include/linux/of_reserved_mem.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __OF_RESERVED_MEM_H -#define __OF_RESERVED_MEM_H - -#ifdef CONFIG_OF_RESERVED_MEM -void of_reserved_mem_device_init(struct device *dev); -void of_reserved_mem_device_release(struct device *dev); -void early_init_dt_scan_reserved_mem(void); -#else -static inline void of_reserved_mem_device_init(struct device *dev) { } -static inline void of_reserved_mem_device_release(struct device *dev) { } -static inline void early_init_dt_scan_reserved_mem(void) { } -#endif - -#endif /* __OF_RESERVED_MEM_H */ -- cgit v1.2.3 From 32c37fc30c52508711ea6a108cfd5855b8a07176 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 14 Oct 2013 15:24:55 +0200 Subject: usb-storage: add quirk for mandatory READ_CAPACITY_16 Some USB drive enclosures do not correctly report an overflow condition if they hold a drive with a capacity over 2TB and are confronted with a READ_CAPACITY_10. They answer with their capacity modulo 2TB. The generic layer cannot cope with that. It must be told to use READ_CAPACITY_16 from the beginning. Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 5 ++++- drivers/usb/storage/unusual_devs.h | 7 +++++++ include/linux/usb_usual.h | 4 +++- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 94d75edef77f..18509e6c21ab 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -211,8 +211,11 @@ static int slave_configure(struct scsi_device *sdev) /* * Many devices do not respond properly to READ_CAPACITY_16. * Tell the SCSI layer to try READ_CAPACITY_10 first. + * However some USB 3.0 drive enclosures return capacity + * modulo 2TB. Those must use READ_CAPACITY_16 */ - sdev->try_rc_10_first = 1; + if (!(us->fflags & US_FL_NEEDS_CAP16)) + sdev->try_rc_10_first = 1; /* assume SPC3 or latter devices support sense size > 18 */ if (sdev->scsi_level > SCSI_SPC_2) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index c015f2c16729..de32cfa5bfa6 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1925,6 +1925,13 @@ UNUSUAL_DEV( 0x1652, 0x6600, 0x0201, 0x0201, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_RESIDUE ), +/* Reported by Oliver Neukum */ +UNUSUAL_DEV( 0x174c, 0x55aa, 0x0100, 0x0100, + "ASMedia", + "AS2105", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NEEDS_CAP16), + /* Reported by Jesse Feddema */ UNUSUAL_DEV( 0x177f, 0x0400, 0x0000, 0x0000, "Yarvik", diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index bf99cd01be20..630356866030 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -66,7 +66,9 @@ US_FLAG(INITIAL_READ10, 0x00100000) \ /* Initial READ(10) (and others) must be retried */ \ US_FLAG(WRITE_CACHE, 0x00200000) \ - /* Write Cache status is not available */ + /* Write Cache status is not available */ \ + US_FLAG(NEEDS_CAP16, 0x00400000) + /* cannot handle READ_CAPACITY_10 */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From 4942642080ea82d99ab5b653abb9a12b7ba31f4a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Oct 2013 13:46:59 -0700 Subject: mm: memcg: handle non-error OOM situations more gracefully Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full callstack on OOM") assumed that only a few places that can trigger a memcg OOM situation do not return VM_FAULT_OOM, like optional page cache readahead. But there are many more and it's impractical to annotate them all. First of all, we don't want to invoke the OOM killer when the failed allocation is gracefully handled, so defer the actual kill to the end of the fault handling as well. This simplifies the code quite a bit for added bonus. Second, since a failed allocation might not be the abrupt end of the fault, the memcg OOM handler needs to be re-entrant until the fault finishes for subsequent allocation attempts. If an allocation is attempted after the task already OOMed, allow it to bypass the limit so that it can quickly finish the fault and invoke the OOM killer. Reported-by: azurIt Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 50 ++++------------ include/linux/sched.h | 7 +-- mm/filemap.c | 11 +--- mm/memcontrol.c | 139 +++++++++++++++++---------------------------- mm/memory.c | 18 ++++-- mm/oom_kill.c | 2 +- 6 files changed, 79 insertions(+), 148 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ecc82b37c4cc..b3e7a667e03c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -/** - * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task - * @new: true to enable, false to disable - * - * Toggle whether a failed memcg charge should invoke the OOM killer - * or just return -ENOMEM. Returns the previous toggle state. - * - * NOTE: Any path that enables the OOM killer before charging must - * call mem_cgroup_oom_synchronize() afterward to finalize the - * OOM handling and clean up. - */ -static inline bool mem_cgroup_toggle_oom(bool new) +static inline void mem_cgroup_oom_enable(void) { - bool old; - - old = current->memcg_oom.may_oom; - current->memcg_oom.may_oom = new; - - return old; + WARN_ON(current->memcg_oom.may_oom); + current->memcg_oom.may_oom = 1; } -static inline void mem_cgroup_enable_oom(void) +static inline void mem_cgroup_oom_disable(void) { - bool old = mem_cgroup_toggle_oom(true); - - WARN_ON(old == true); -} - -static inline void mem_cgroup_disable_oom(void) -{ - bool old = mem_cgroup_toggle_oom(false); - - WARN_ON(old == false); + WARN_ON(!current->memcg_oom.may_oom); + current->memcg_oom.may_oom = 0; } static inline bool task_in_memcg_oom(struct task_struct *p) { - return p->memcg_oom.in_memcg_oom; + return p->memcg_oom.memcg; } -bool mem_cgroup_oom_synchronize(void); +bool mem_cgroup_oom_synchronize(bool wait); #ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; @@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page, { } -static inline bool mem_cgroup_toggle_oom(bool new) -{ - return false; -} - -static inline void mem_cgroup_enable_oom(void) +static inline void mem_cgroup_oom_enable(void) { } -static inline void mem_cgroup_disable_oom(void) +static inline void mem_cgroup_oom_disable(void) { } @@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p) return false; } -static inline bool mem_cgroup_oom_synchronize(void) +static inline bool mem_cgroup_oom_synchronize(bool wait) { return false; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..e27baeeda3f4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1394,11 +1394,10 @@ struct task_struct { } memcg_batch; unsigned int memcg_kmem_skip_account; struct memcg_oom_info { + struct mem_cgroup *memcg; + gfp_t gfp_mask; + int order; unsigned int may_oom:1; - unsigned int in_memcg_oom:1; - unsigned int oom_locked:1; - int wakeups; - struct mem_cgroup *wait_on_memcg; } memcg_oom; #endif #ifdef CONFIG_UPROBES diff --git a/mm/filemap.c b/mm/filemap.c index 1e6aec4a2d2e..ae4846ff4849 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; struct page *page; - bool memcg_oom; pgoff_t size; int ret = 0; @@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* - * Do we have something in the page cache already? Either - * way, try readahead, but disable the memcg OOM killer for it - * as readahead is optional and no errors are propagated up - * the fault stack. The OOM killer is enabled while trying to - * instantiate the faulting page individually below. + * Do we have something in the page cache already? */ page = find_get_page(mapping, offset); if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { @@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * We found the page, so try async readahead before * waiting for the lock. */ - memcg_oom = mem_cgroup_toggle_oom(false); do_async_mmap_readahead(vma, ra, file, page, offset); - mem_cgroup_toggle_oom(memcg_oom); } else if (!page) { /* No page in the page cache at all */ - memcg_oom = mem_cgroup_toggle_oom(false); do_sync_mmap_readahead(vma, ra, file, offset); - mem_cgroup_toggle_oom(memcg_oom); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5335b2b6be77..65fc6a449841 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2161,110 +2161,59 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) memcg_wakeup_oom(memcg); } -/* - * try to call OOM killer - */ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - bool locked; - int wakeups; - if (!current->memcg_oom.may_oom) return; - - current->memcg_oom.in_memcg_oom = 1; - /* - * As with any blocking lock, a contender needs to start - * listening for wakeups before attempting the trylock, - * otherwise it can miss the wakeup from the unlock and sleep - * indefinitely. This is just open-coded because our locking - * is so particular to memcg hierarchies. + * We are in the middle of the charge context here, so we + * don't want to block when potentially sitting on a callstack + * that holds all kinds of filesystem and mm locks. + * + * Also, the caller may handle a failed allocation gracefully + * (like optional page cache readahead) and so an OOM killer + * invocation might not even be necessary. + * + * That's why we don't do anything here except remember the + * OOM context and then deal with it at the end of the page + * fault when the stack is unwound, the locks are released, + * and when we know whether the fault was overall successful. */ - wakeups = atomic_read(&memcg->oom_wakeups); - mem_cgroup_mark_under_oom(memcg); - - locked = mem_cgroup_oom_trylock(memcg); - - if (locked) - mem_cgroup_oom_notify(memcg); - - if (locked && !memcg->oom_kill_disable) { - mem_cgroup_unmark_under_oom(memcg); - mem_cgroup_out_of_memory(memcg, mask, order); - mem_cgroup_oom_unlock(memcg); - /* - * There is no guarantee that an OOM-lock contender - * sees the wakeups triggered by the OOM kill - * uncharges. Wake any sleepers explicitely. - */ - memcg_oom_recover(memcg); - } else { - /* - * A system call can just return -ENOMEM, but if this - * is a page fault and somebody else is handling the - * OOM already, we need to sleep on the OOM waitqueue - * for this memcg until the situation is resolved. - * Which can take some time because it might be - * handled by a userspace task. - * - * However, this is the charge context, which means - * that we may sit on a large call stack and hold - * various filesystem locks, the mmap_sem etc. and we - * don't want the OOM handler to deadlock on them - * while we sit here and wait. Store the current OOM - * context in the task_struct, then return -ENOMEM. - * At the end of the page fault handler, with the - * stack unwound, pagefault_out_of_memory() will check - * back with us by calling - * mem_cgroup_oom_synchronize(), possibly putting the - * task to sleep. - */ - current->memcg_oom.oom_locked = locked; - current->memcg_oom.wakeups = wakeups; - css_get(&memcg->css); - current->memcg_oom.wait_on_memcg = memcg; - } + css_get(&memcg->css); + current->memcg_oom.memcg = memcg; + current->memcg_oom.gfp_mask = mask; + current->memcg_oom.order = order; } /** * mem_cgroup_oom_synchronize - complete memcg OOM handling + * @handle: actually kill/wait or just clean up the OOM state * - * This has to be called at the end of a page fault if the the memcg - * OOM handler was enabled and the fault is returning %VM_FAULT_OOM. + * This has to be called at the end of a page fault if the memcg OOM + * handler was enabled. * - * Memcg supports userspace OOM handling, so failed allocations must + * Memcg supports userspace OOM handling where failed allocations must * sleep on a waitqueue until the userspace task resolves the * situation. Sleeping directly in the charge context with all kinds * of locks held is not a good idea, instead we remember an OOM state * in the task and mem_cgroup_oom_synchronize() has to be called at - * the end of the page fault to put the task to sleep and clean up the - * OOM state. + * the end of the page fault to complete the OOM handling. * * Returns %true if an ongoing memcg OOM situation was detected and - * finalized, %false otherwise. + * completed, %false otherwise. */ -bool mem_cgroup_oom_synchronize(void) +bool mem_cgroup_oom_synchronize(bool handle) { + struct mem_cgroup *memcg = current->memcg_oom.memcg; struct oom_wait_info owait; - struct mem_cgroup *memcg; + bool locked; /* OOM is global, do not handle */ - if (!current->memcg_oom.in_memcg_oom) - return false; - - /* - * We invoked the OOM killer but there is a chance that a kill - * did not free up any charges. Everybody else might already - * be sleeping, so restart the fault and keep the rampage - * going until some charges are released. - */ - memcg = current->memcg_oom.wait_on_memcg; if (!memcg) - goto out; + return false; - if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) - goto out_memcg; + if (!handle) + goto cleanup; owait.memcg = memcg; owait.wait.flags = 0; @@ -2273,13 +2222,25 @@ bool mem_cgroup_oom_synchronize(void) INIT_LIST_HEAD(&owait.wait.task_list); prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); - /* Only sleep if we didn't miss any wakeups since OOM */ - if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups) + mem_cgroup_mark_under_oom(memcg); + + locked = mem_cgroup_oom_trylock(memcg); + + if (locked) + mem_cgroup_oom_notify(memcg); + + if (locked && !memcg->oom_kill_disable) { + mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); + mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask, + current->memcg_oom.order); + } else { schedule(); - finish_wait(&memcg_oom_waitq, &owait.wait); -out_memcg: - mem_cgroup_unmark_under_oom(memcg); - if (current->memcg_oom.oom_locked) { + mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); + } + + if (locked) { mem_cgroup_oom_unlock(memcg); /* * There is no guarantee that an OOM-lock contender @@ -2288,10 +2249,9 @@ out_memcg: */ memcg_oom_recover(memcg); } +cleanup: + current->memcg_oom.memcg = NULL; css_put(&memcg->css); - current->memcg_oom.wait_on_memcg = NULL; -out: - current->memcg_oom.in_memcg_oom = 0; return true; } @@ -2705,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, || fatal_signal_pending(current))) goto bypass; + if (unlikely(task_in_memcg_oom(current))) + goto bypass; + /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the diff --git a/mm/memory.c b/mm/memory.c index f7b7692c05ed..1311f26497e6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3865,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) - mem_cgroup_enable_oom(); + mem_cgroup_oom_enable(); ret = __handle_mm_fault(mm, vma, address, flags); - if (flags & FAULT_FLAG_USER) - mem_cgroup_disable_oom(); - - if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))) - mem_cgroup_oom_synchronize(); + if (flags & FAULT_FLAG_USER) { + mem_cgroup_oom_disable(); + /* + * The task may have entered a memcg OOM situation but + * if the allocation error was handled gracefully (no + * VM_FAULT_OOM), there is no need to kill anything. + * Just clean up the OOM state peacefully. + */ + if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) + mem_cgroup_oom_synchronize(false); + } return ret; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 314e9d274381..6738c47f1f72 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -680,7 +680,7 @@ void pagefault_out_of_memory(void) { struct zonelist *zonelist; - if (mem_cgroup_oom_synchronize()) + if (mem_cgroup_oom_synchronize(true)) return; zonelist = node_zonelist(first_online_node, GFP_KERNEL); -- cgit v1.2.3 From 2421ad48f4aed63bc890e8f3c53ed581a542fb66 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Oct 2013 15:44:48 +0200 Subject: ACPI / PM: Drop two functions that are not used any more Two functions defined in device_pm.c, acpi_dev_pm_add_dependent() and acpi_dev_pm_remove_dependent(), have no callers and may be dropped, so drop them. Moreover, they are the only functions adding entries to and removing entries from the power_dependent list in struct acpi_device, so drop that list too. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 56 ------------------------------------------------ drivers/acpi/scan.c | 1 - include/acpi/acpi_bus.h | 7 ------ 3 files changed, 64 deletions(-) (limited to 'include') diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 59d3202f6b36..a94383d1f350 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1025,60 +1025,4 @@ void acpi_dev_pm_detach(struct device *dev, bool power_off) } } EXPORT_SYMBOL_GPL(acpi_dev_pm_detach); - -/** - * acpi_dev_pm_add_dependent - Add physical device depending for PM. - * @handle: Handle of ACPI device node. - * @depdev: Device depending on that node for PM. - */ -void acpi_dev_pm_add_dependent(acpi_handle handle, struct device *depdev) -{ - struct acpi_device_physical_node *dep; - struct acpi_device *adev; - - if (!depdev || acpi_bus_get_device(handle, &adev)) - return; - - mutex_lock(&adev->physical_node_lock); - - list_for_each_entry(dep, &adev->power_dependent, node) - if (dep->dev == depdev) - goto out; - - dep = kzalloc(sizeof(*dep), GFP_KERNEL); - if (dep) { - dep->dev = depdev; - list_add_tail(&dep->node, &adev->power_dependent); - } - - out: - mutex_unlock(&adev->physical_node_lock); -} -EXPORT_SYMBOL_GPL(acpi_dev_pm_add_dependent); - -/** - * acpi_dev_pm_remove_dependent - Remove physical device depending for PM. - * @handle: Handle of ACPI device node. - * @depdev: Device depending on that node for PM. - */ -void acpi_dev_pm_remove_dependent(acpi_handle handle, struct device *depdev) -{ - struct acpi_device_physical_node *dep; - struct acpi_device *adev; - - if (!depdev || acpi_bus_get_device(handle, &adev)) - return; - - mutex_lock(&adev->physical_node_lock); - - list_for_each_entry(dep, &adev->power_dependent, node) - if (dep->dev == depdev) { - list_del(&dep->node); - kfree(dep); - break; - } - - mutex_unlock(&adev->physical_node_lock); -} -EXPORT_SYMBOL_GPL(acpi_dev_pm_remove_dependent); #endif /* CONFIG_PM */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 407ad13cac2f..fee8a297c7d9 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -999,7 +999,6 @@ int acpi_device_add(struct acpi_device *device, INIT_LIST_HEAD(&device->wakeup_list); INIT_LIST_HEAD(&device->physical_node_list); mutex_init(&device->physical_node_lock); - INIT_LIST_HEAD(&device->power_dependent); new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL); if (!new_bus_id) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 02e113bb8b7d..d9019821aa60 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -311,7 +311,6 @@ struct acpi_device { unsigned int physical_node_count; struct list_head physical_node_list; struct mutex physical_node_lock; - struct list_head power_dependent; void (*remove)(struct acpi_device *); }; @@ -456,8 +455,6 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, acpi_status acpi_remove_pm_notifier(struct acpi_device *adev, acpi_notify_handler handler); int acpi_pm_device_sleep_state(struct device *, int *, int); -void acpi_dev_pm_add_dependent(acpi_handle handle, struct device *depdev); -void acpi_dev_pm_remove_dependent(acpi_handle handle, struct device *depdev); #else static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev, acpi_notify_handler handler, @@ -478,10 +475,6 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m) return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ? m : ACPI_STATE_D0; } -static inline void acpi_dev_pm_add_dependent(acpi_handle handle, - struct device *depdev) {} -static inline void acpi_dev_pm_remove_dependent(acpi_handle handle, - struct device *depdev) {} #endif #ifdef CONFIG_PM_RUNTIME -- cgit v1.2.3 From 94468783cd960aa14b22503dd59afd14efb785aa Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 16 Oct 2013 19:18:41 -0700 Subject: usb: usb_phy_gen: refine conditional declaration of usb_nop_xceiv_register Commit 3fa4d734 (usb: phy: rename nop_usb_xceiv => usb_phy_gen_xceiv) changed the conditional around the declaration of usb_nop_xceiv_register from #if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE)) to #if IS_ENABLED(CONFIG_NOP_USB_XCEIV) While that looks the same, it is semantically different. The first expression is true if CONFIG_NOP_USB_XCEIV is built as module and if the including code is built as module. The second expression is true if code depending on CONFIG_NOP_USB_XCEIV if built as module or into the kernel. As a result, the arm:allmodconfig build fails with arch/arm/mach-omap2/built-in.o: In function `omap3_evm_init': arch/arm/mach-omap2/board-omap3evm.c:703: undefined reference to `usb_nop_xceiv_register' Fix the problem by reverting to the old conditional. Cc: Josh Boyer Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/usb_phy_gen_xceiv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h index f9a7e7bc925b..11d85b9c1b08 100644 --- a/include/linux/usb/usb_phy_gen_xceiv.h +++ b/include/linux/usb/usb_phy_gen_xceiv.h @@ -12,7 +12,7 @@ struct usb_phy_gen_xceiv_platform_data { unsigned int needs_reset:1; }; -#if IS_ENABLED(CONFIG_NOP_USB_XCEIV) +#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE)) /* sometimes transceivers are accessed only through e.g. ULPI */ extern void usb_nop_xceiv_register(void); extern void usb_nop_xceiv_unregister(void); -- cgit v1.2.3