From 0cd3be6e9a46f84ef7a42e1a5645d32ad547b12e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Mon, 4 May 2015 23:04:16 +0200 Subject: clk: si5351: Do not pass struct clk in platform_data When registering clk-si5351 by platform_data, we should not pass struct clk for the reference clocks. Drop struct clk from platform_data and rework the driver to use devm_clk_get of named clock references. While at it, check for at least one valid input clock and properly prepare/ enable valid reference clocks. Signed-off-by: Sebastian Hesselbarth Reported-by: Michael Welling Reported-by: Jean-Francois Moine Reported-by: Russell King Tested-by: Michael Welling Tested-by: Jean-Francois Moine Signed-off-by: Michael Turquette --- include/linux/platform_data/si5351.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index a947ab8b441a..533d9807e543 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -5,8 +5,6 @@ #ifndef __LINUX_PLATFORM_DATA_SI5351_H__ #define __LINUX_PLATFORM_DATA_SI5351_H__ -struct clk; - /** * enum si5351_pll_src - Si5351 pll clock source * @SI5351_PLL_SRC_DEFAULT: default, do not change eeprom config @@ -107,8 +105,6 @@ struct si5351_clkout_config { * @clkout: array of clkout configuration */ struct si5351_platform_data { - struct clk *clk_xtal; - struct clk *clk_clkin; enum si5351_pll_src pll_src[2]; struct si5351_clkout_config clkout[8]; }; -- cgit v1.2.3 From 2d94e5224e81c58986a8cf44a3bf4830ce5cb96e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 May 2015 14:26:34 -0700 Subject: HID: hid-sensor-hub: Fix debug lock warning When CONFIG_DEBUG_LOCK_ALLOC is defined, mutex magic is compared and warned for (l->magic != l), here l is the address of mutex pointer. In hid-sensor-hub as part of hsdev creation, a per hsdev mutex is initialized during MFD cell creation. This hsdev, which contains, mutex is part of platform data for the a cell. But platform_data is copied in platform_device_add_data() in platform.c. This copy will copy the whole hsdev structure including mutex. But once copied the magic will no longer match. So when client driver call sensor_hub_input_attr_get_raw_value, this will trigger mutex warning. So to avoid this allocate mutex dynamically. This will be same even after copy. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 13 ++++++++++--- include/linux/hid-sensor-hub.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index c3f6f1e311ea..090a1ba0abb6 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -294,7 +294,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, if (!report) return -EINVAL; - mutex_lock(&hsdev->mutex); + mutex_lock(hsdev->mutex_ptr); if (flag == SENSOR_HUB_SYNC) { memset(&hsdev->pending, 0, sizeof(hsdev->pending)); init_completion(&hsdev->pending.ready); @@ -328,7 +328,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, kfree(hsdev->pending.raw_data); hsdev->pending.status = false; } - mutex_unlock(&hsdev->mutex); + mutex_unlock(hsdev->mutex_ptr); return ret_val; } @@ -667,7 +667,14 @@ static int sensor_hub_probe(struct hid_device *hdev, hsdev->vendor_id = hdev->vendor; hsdev->product_id = hdev->product; hsdev->usage = collection->usage; - mutex_init(&hsdev->mutex); + hsdev->mutex_ptr = devm_kzalloc(&hdev->dev, + sizeof(struct mutex), + GFP_KERNEL); + if (!hsdev->mutex_ptr) { + ret = -ENOMEM; + goto err_stop_hw; + } + mutex_init(hsdev->mutex_ptr); hsdev->start_collection_index = i; if (last_hsdev) last_hsdev->end_collection_index = i; diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..0042bf330b99 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -74,7 +74,7 @@ struct sensor_hub_pending { * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection - * @mutex: synchronizing mutex. + * @mutex_ptr: synchronizing mutex pointer. * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { @@ -84,7 +84,7 @@ struct hid_sensor_hub_device { u32 usage; int start_collection_index; int end_collection_index; - struct mutex mutex; + struct mutex *mutex_ptr; struct sensor_hub_pending pending; }; -- cgit v1.2.3 From 336b7e1f230912cd8df2497be8dd7be4647d8fc8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 May 2015 14:06:32 -0400 Subject: block: remove export for blk_queue_bio With commit ff36ab345 ("dm: remove request-based logic from make_request_fn wrapper") DM no longer calls blk_queue_bio() directly, so remove its export. Doing so required a forward declaration in blk-core.c. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- include/linux/blkdev.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 7871603f0a29..03b5f8d77f37 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -734,6 +734,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) } EXPORT_SYMBOL(blk_init_queue_node); +static void blk_queue_bio(struct request_queue *q, struct bio *bio); + struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, spinlock_t *lock) @@ -1578,7 +1580,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -void blk_queue_bio(struct request_queue *q, struct bio *bio) +static void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1686,7 +1688,6 @@ out_unlock: spin_unlock_irq(q->queue_lock); } } -EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..5d93a6645e88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -821,8 +821,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern void blk_queue_bio(struct request_queue *q, struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3 From f7bcb70ebae0dcdb5a2d859b09e4465784d99029 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 May 2015 13:47:23 -0700 Subject: ktime: Fix ktime_divns to do signed division It was noted that the 32bit implementation of ktime_divns() was doing unsigned division and didn't properly handle negative values. And when a ktime helper was changed to utilize ktime_divns, it caused a regression on some IR blasters. See the following bugzilla for details: https://bugzilla.redhat.com/show_bug.cgi?id=1200353 This patch fixes the problem in ktime_divns by checking and preserving the sign bit, and then reapplying it if appropriate after the division, it also changes the return type to a s64 to make it more obvious this is expected. Nicolas also pointed out that negative dividers would cause infinite loops on 32bit systems, negative dividers is unlikely for users of this function, but out of caution this patch adds checks for negative dividers for both 32-bit (BUG_ON) and 64-bit(WARN_ON) versions to make sure no such use cases creep in. [ tglx: Hand an u64 to do_div() to avoid the compiler warning ] Fixes: 166afb64511e 'ktime: Sanitize ktime_to_us/ms conversion' Reported-and-tested-by: Trevor Cordes Signed-off-by: John Stultz Acked-by: Nicolas Pitre Cc: Ingo Molnar Cc: Josh Boyer Cc: One Thousand Gnomes Cc: Link: http://lkml.kernel.org/r/1431118043-23452-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/ktime.h | 27 +++++++++++++++++++++------ kernel/time/hrtimer.c | 14 ++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5fc3d1083071..2b6a204bd8d4 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 __ktime_divns(const ktime_t kt, s64 div); -static inline u64 ktime_divns(const ktime_t kt, s64 div) +extern s64 __ktime_divns(const ktime_t kt, s64 div); +static inline s64 ktime_divns(const ktime_t kt, s64 div) { + /* + * Negative divisors could cause an inf loop, + * so bug out here. + */ + BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { - u64 ns = kt.tv64; - do_div(ns, div); - return ns; + s64 ns = kt.tv64; + u64 tmp = ns < 0 ? -ns : ns; + + do_div(tmp, div); + return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) +static inline s64 ktime_divns(const ktime_t kt, s64 div) +{ + /* + * 32-bit implementation cannot handle negative divisors, + * so catch them on 64bit as well. + */ + WARN_ON(div < 0); + return kt.tv64 / div; +} #endif static inline s64 ktime_to_us(const ktime_t kt) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 76d4bd962b19..93ef7190bdea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 __ktime_divns(const ktime_t kt, s64 div) +s64 __ktime_divns(const ktime_t kt, s64 div) { - u64 dclc; int sft = 0; + s64 dclc; + u64 tmp; dclc = ktime_to_ns(kt); + tmp = dclc < 0 ? -dclc : dclc; + /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } - dclc >>= sft; - do_div(dclc, (unsigned long) div); - - return dclc; + tmp >>= sft; + do_div(tmp, (unsigned long) div); + return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ -- cgit v1.2.3 From eea39946a1f36e8a5a47c86e7ecfca6076868505 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 13 May 2015 21:17:41 -0700 Subject: rename RTNH_F_EXTERNAL to RTNH_F_OFFLOAD RTNH_F_EXTERNAL today is printed as "offload" in iproute2 output. This patch renames the flag to be consistent with what the user sees. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 +- net/ipv4/fib_trie.c | 2 +- net/switchdev/switchdev.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 974db03f7b1a..17fb02f488da 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -337,7 +337,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_EXTERNAL 8 /* Route installed externally */ +#define RTNH_F_OFFLOAD 8 /* offloaded route */ /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..64c2076ced54 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1764,7 +1764,7 @@ void fib_table_flush_external(struct fib_table *tb) /* record local slen */ slen = fa->fa_slen; - if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) + if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) continue; netdev_switch_fib_ipv4_del(n->key, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 46568b85c333..055453d48668 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -338,7 +338,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, fi, tos, type, nlflags, tb_id); if (!err) - fi->fib_flags |= RTNH_F_EXTERNAL; + fi->fib_flags |= RTNH_F_OFFLOAD; } return err; @@ -364,7 +364,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, const struct swdev_ops *ops; int err = 0; - if (!(fi->fib_flags & RTNH_F_EXTERNAL)) + if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; dev = netdev_switch_get_dev_by_nhs(fi); @@ -376,7 +376,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, fi, tos, type, tb_id); if (!err) - fi->fib_flags &= ~RTNH_F_EXTERNAL; + fi->fib_flags &= ~RTNH_F_OFFLOAD; } return err; -- cgit v1.2.3 From b3cad287d13b5f6695c6b4aab72969cd64bf0171 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 7 May 2015 14:54:16 +0200 Subject: conntrack: RFC5961 challenge ACK confuse conntrack LAST-ACK transition In compliance with RFC5961, the network stack send challenge ACK in response to spurious SYN packets, since commit 0c228e833c88 ("tcp: Restore RFC5961-compliant behavior for SYN packets"). This pose a problem for netfilter conntrack in state LAST_ACK, because this challenge ACK is (falsely) seen as ACKing last FIN, causing a false state transition (into TIME_WAIT). The challenge ACK is hard to distinguish from real last ACK. Thus, solution introduce a flag that tracks the potential for seeing a challenge ACK, in case a SYN packet is let through and current state is LAST_ACK. When conntrack transition LAST_ACK to TIME_WAIT happens, this flag is used for determining if we are expecting a challenge ACK. Scapy based reproducer script avail here: https://github.com/netoptimizer/network-testing/blob/master/scapy/tcp_hacks_3WHS_LAST_ACK.py Fixes: 0c228e833c88 ("tcp: Restore RFC5961-compliant behavior for SYN packets") Signed-off-by: Jesper Dangaard Brouer Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_tcp.h | 3 +++ net/netfilter/nf_conntrack_proto_tcp.c | 35 ++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h index 9993a421201c..ef9f80f0f529 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -42,6 +42,9 @@ enum tcp_conntrack { /* The field td_maxack has been set */ #define IP_CT_TCP_FLAG_MAXACK_SET 0x20 +/* Marks possibility for expected RFC5961 challenge ACK */ +#define IP_CT_EXP_CHALLENGE_ACK 0x40 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 5caa0c41bf26..70383de72054 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -202,7 +202,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW - * sLA -> sTW Last ACK detected. + * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ @@ -261,7 +261,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. * sCW -> sCW - * sLA -> sTW Last ACK detected. + * sLA -> sTW Last ACK detected (RFC5961 challenged) * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ @@ -906,6 +906,7 @@ static int tcp_packet(struct nf_conn *ct, 1 : ct->proto.tcp.last_win; ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = ct->proto.tcp.last_wscale; + ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; memset(&ct->proto.tcp.seen[dir], 0, @@ -923,7 +924,9 @@ static int tcp_packet(struct nf_conn *ct, * may be in sync but we are not. In that case, we annotate * the TCP options and let the packet go through. If it is a * valid SYN packet, the server will reply with a SYN/ACK, and - * then we'll get in sync. Otherwise, the server ignores it. */ + * then we'll get in sync. Otherwise, the server potentially + * responds with a challenge ACK if implementing RFC5961. + */ if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { struct ip_ct_tcp_state seen = {}; @@ -939,6 +942,13 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags |= IP_CT_TCP_FLAG_SACK_PERM; } + /* Mark the potential for RFC5961 challenge ACK, + * this pose a special problem for LAST_ACK state + * as ACK is intrepretated as ACKing last FIN. + */ + if (old_state == TCP_CONNTRACK_LAST_ACK) + ct->proto.tcp.last_flags |= + IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) @@ -970,6 +980,25 @@ static int tcp_packet(struct nf_conn *ct, nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; + case TCP_CONNTRACK_TIME_WAIT: + /* RFC5961 compliance cause stack to send "challenge-ACK" + * e.g. in response to spurious SYNs. Conntrack MUST + * not believe this ACK is acking last FIN. + */ + if (old_state == TCP_CONNTRACK_LAST_ACK && + index == TCP_ACK_SET && + ct->proto.tcp.last_dir != dir && + ct->proto.tcp.last_index == TCP_SYN_SET && + (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { + /* Detected RFC5961 challenge ACK */ + ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; + spin_unlock_bh(&ct->lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: challenge-ACK ignored "); + return NF_ACCEPT; /* Don't change state */ + } + break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) -- cgit v1.2.3 From 07ee0722bf941960fb3888f9c9b5839473372fd1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 May 2015 11:30:47 +0800 Subject: rhashtable: Add cap on number of elements in hash table We currently have no limit on the number of elements in a hash table. This is a problem because some users (tipc) set a ceiling on the maximum table size and when that is reached the hash table may degenerate. Others may encounter OOM when growing and if we allow insertions when that happens the hash table perofrmance may also suffer. This patch adds a new paramater insecure_max_entries which becomes the cap on the table. If unset it defaults to max_size * 2. If it is also zero it means that there is no cap on the number of elements in the table. However, the table will grow whenever the utilisation hits 100% and if that growth fails, you will get ENOMEM on insertion. As allowing oversubscription is potentially dangerous, the name contains the word insecure. Note that the cap is not a hard limit. This is done for performance reasons as enforcing a hard limit will result in use of atomic ops that are heavier than the ones we currently use. The reasoning is that we're only guarding against a gross over- subscription of the table, rather than a small breach of the limit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++++++++++++++++++ lib/rhashtable.c | 11 +++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index dbcbcc59aa92..843ceca9a21e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -17,6 +17,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include #include #include #include @@ -100,6 +101,7 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed + * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -115,6 +117,7 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; + unsigned int insecure_max_entries; unsigned int max_size; unsigned int min_size; u32 nulls_base; @@ -286,6 +289,18 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return ht->p.insecure_max_entries && + atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -589,6 +604,10 @@ restart: goto out; } + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b28df4019ade..4396434e4715 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -14,6 +14,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -446,6 +447,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto exit; + err = -EAGAIN; if (rhashtable_check_elasticity(ht, tbl, hash) || rht_grow_above_100(ht, tbl)) @@ -738,6 +743,12 @@ int rhashtable_init(struct rhashtable *ht, if (params->max_size) ht->p.max_size = rounddown_pow_of_two(params->max_size); + if (params->insecure_max_entries) + ht->p.insecure_max_entries = + rounddown_pow_of_two(params->insecure_max_entries); + else + ht->p.insecure_max_entries = ht->p.max_size * 2; + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); /* The maximum (not average) chain length grows with the -- cgit v1.2.3 From b5d721d761fccf491f92eefc611e318561683d0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2015 17:06:14 -0700 Subject: inet: properly align icsk_ca_priv tcp_illinois and upcoming tcp_cdg require 64bit alignment of icsk_ca_priv x86 does not care, but other architectures might. Fixes: 05cbc0db03e82 ("ipv4: Create probe timer for tcp PMTU as per RFC4821") Signed-off-by: Eric Dumazet Cc: Fan Du Acked-by: Fan Du Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 48a815823587..497bc14cdb85 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -129,9 +129,10 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; - u32 icsk_ca_priv[16]; u32 icsk_user_timeout; -#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) + + u64 icsk_ca_priv[64 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -- cgit v1.2.3 From 77bb3dfdc0d554befad58fdefbc41be5bc3ed38a Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 19 May 2015 18:40:49 +0100 Subject: xen/events: don't bind non-percpu VIRQs with percpu chip A non-percpu VIRQ (e.g., VIRQ_CONSOLE) may be freed on a different VCPU than it is bound to. This can result in a race between handle_percpu_irq() and removing the action in __free_irq() because handle_percpu_irq() does not take desc->lock. The interrupt handler sees a NULL action and oopses. Only use the percpu chip/handler for per-CPU VIRQs (like VIRQ_TIMER). # cat /proc/interrupts | grep virq 40: 87246 0 xen-percpu-virq timer0 44: 0 0 xen-percpu-virq debug0 47: 0 20995 xen-percpu-virq timer1 51: 0 0 xen-percpu-virq debug1 69: 0 0 xen-dyn-virq xen-pcpu 74: 0 0 xen-dyn-virq mce 75: 29 0 xen-dyn-virq hvc_console Signed-off-by: David Vrabel Cc: --- drivers/tty/hvc/hvc_xen.c | 2 +- drivers/xen/events/events_base.c | 12 ++++++++---- include/xen/events.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 5bab1c684bb1..7a3d146a5f0e 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -289,7 +289,7 @@ static int xen_initial_domain_console_init(void) return -ENOMEM; } - info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); + info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; spin_lock(&xencons_lock); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 2b8553bd8715..38387950490e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -957,7 +957,7 @@ unsigned xen_evtchn_nr_channels(void) } EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); -int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; int evtchn, irq, ret; @@ -971,8 +971,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, - handle_percpu_irq, "virq"); + if (percpu) + irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + handle_percpu_irq, "virq"); + else + irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_edge_irq, "virq"); bind_virq.virq = virq; bind_virq.vcpu = cpu; @@ -1062,7 +1066,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, { int irq, retval; - irq = bind_virq_to_irq(virq, cpu); + irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); diff --git a/include/xen/events.h b/include/xen/events.h index 5321cd9636e6..7d95fdf9cf3e 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,7 +17,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); -int bind_virq_to_irq(unsigned int virq, unsigned int cpu); +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, -- cgit v1.2.3 From faecbb45ebefb20260ad4a631e011e93c896cb73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 May 2015 13:42:25 +0200 Subject: Revert "netfilter: bridge: query conntrack about skb dnat" This reverts commit c055d5b03bb4cb69d349d787c9787c0383abd8b2. There are two issues: 'dnat_took_place' made me think that this is related to -j DNAT/MASQUERADE. But thats only one part of the story. This is also relevant for SNAT when we undo snat translation in reverse/reply direction. Furthermore, I originally wanted to do this mainly to avoid storing ipv6 addresses once we make DNAT/REDIRECT work for ipv6 on bridges. However, I forgot about SNPT/DNPT which is stateless. So we can't escape storing address for ipv6 anyway. Might as well do it for ipv4 too. Reported-and-tested-by: Bernhard Thaler Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..f15154a879c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -176,6 +176,7 @@ struct nf_bridge_info { struct net_device *physindev; struct net_device *physoutdev; char neigh_header[8]; + __be32 ipv4_daddr; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab55e2472beb..60ddfbeb47f5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,10 +37,6 @@ #include #include -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#include -#endif - #include #include "br_private.h" #ifdef CONFIG_SYSCTL @@ -350,24 +346,15 @@ free_skb: return 0; } -static bool dnat_took_place(const struct sk_buff *skb) +static bool daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) { -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_untracked(ct)) - return false; - - return test_bit(IPS_DST_NAT_BIT, &ct->status); -#else - return false; -#endif + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. + * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge @@ -421,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { + if (daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -632,6 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); @@ -669,6 +657,9 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, -- cgit v1.2.3 From d654976cbf852ee20612ee10dbe57cdacda9f452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 21:51:19 -0700 Subject: tcp: fix a potential deadlock in tcp_get_info() Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [] lock_acquire+0xbb/0x270 [ 523.802565] [] _raw_spin_lock+0x38/0x50 [ 523.808628] [] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [] tcp_check_req+0x3c1/0x500 [ 523.828199] [] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [] ip_local_deliver+0xaa/0xc0 [ 523.853440] [] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 11 +++++++---- net/ipv4/tcp_fastopen.c | 4 ++++ net/ipv4/tcp_input.c | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e8bbf403618f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,8 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ + u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..f1377f2a0472 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -402,6 +402,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; + u64_stats_init(&tp->syncp); tp->reordering = sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2598,6 +2599,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); @@ -2665,10 +2667,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; - spin_lock_bh(&sk->sk_lock.slock); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; - spin_unlock_bh(&sk->sk_lock.slock); + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3c673d5e6cff..46b087a27503 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,10 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, + * as we certainly are not changing upper 32bit value (0) + */ tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 243d674b3ef5..c9ab964189a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3286,7 +3286,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; + u64_stats_update_begin(&tp->syncp); tp->bytes_acked += delta; + u64_stats_update_end(&tp->syncp); tp->snd_una = ack; } @@ -3295,7 +3297,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; + u64_stats_update_begin(&tp->syncp); tp->bytes_received += delta; + u64_stats_update_end(&tp->syncp); tp->rcv_nxt = seq; } -- cgit v1.2.3 From cc4a84c3da6f9dd6a297dc81fe4437643a60fe03 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 May 2015 14:07:30 -0700 Subject: net: phy: bcm7xxx: Fix 7425 PHY ID and flags While adding support for 7425 PHY in the 7xxx PHY driver, the ID that was used was actually coming from an external PHY: a BCM5461x. Fix this by using the proper ID for the internal 7425 PHY and set the PHY_IS_INTERNAL flag, otherwise consumers of this PHY driver would not be able to properly identify it as such. Fixes: d068b02cfdfc2 ("net: phy: add BCM7425 and BCM7429 PHYs") Signed-off-by: Florian Fainelli Reviewed-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 2 +- include/linux/brcmphy.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 64c74c6a4828..b5dc59de094e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = { .name = "Broadcom BCM7425", .features = PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = 0, + .flags = PHY_IS_INTERNAL, .config_init = bcm7xxx_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 -- cgit v1.2.3 From 194ec9368c0dbc421acdb2620d4dfb3cc3d022ff Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 14 May 2015 15:28:24 +0100 Subject: drivers: of/base: move of_init to driver_init Commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") adds the symlink `of_node` for each device pointing to it's device tree node while creating/initialising it. However the devicetree sysfs is created and setup in of_init which is executed at core_initcall level. For all the devices created before of_init, the following error is thrown: "Error -2(-ENOENT) creating of_node link" Like many other components in driver model, initialize the sysfs support for OF/devicetree from driver_init so that it's ready before any devices are created. Fixes: 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") Suggested-by: Rob Herring Cc: Grant Likely Cc: Pawel Moll Cc: Benjamin Herrenschmidt Signed-off-by: Sudeep Holla Tested-by: Robert Schwebel Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/init.c | 2 ++ drivers/of/base.c | 8 +++----- include/linux/of.h | 6 ++++++ 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/base/init.c b/drivers/base/init.c index da033d3bab3c..48c0e220acc0 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "base.h" @@ -34,4 +35,5 @@ void __init driver_init(void) cpu_dev_init(); memory_dev_init(); container_dev_init(); + of_core_init(); } diff --git a/drivers/of/base.c b/drivers/of/base.c index 99764db0875a..f0650265febf 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -189,7 +189,7 @@ int __of_attach_node_sysfs(struct device_node *np) return 0; } -static int __init of_init(void) +void __init of_core_init(void) { struct device_node *np; @@ -198,7 +198,8 @@ static int __init of_init(void) of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); if (!of_kset) { mutex_unlock(&of_mutex); - return -ENOMEM; + pr_err("devicetree: failed to register existing nodes\n"); + return; } for_each_of_allnodes(np) __of_attach_node_sysfs(np); @@ -207,10 +208,7 @@ static int __init of_init(void) /* Symlink in /proc as required by userspace ABI */ if (of_root) proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); - - return 0; } -core_initcall(of_init); static struct property *__of_find_property(const struct device_node *np, const char *name, int *lenp) diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..b871ff9d81d7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -121,6 +121,8 @@ extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF +void of_core_init(void); + static inline bool is_of_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_OF; @@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ +static inline void of_core_init(void) +{ +} + static inline bool is_of_node(struct fwnode_handle *fwnode) { return false; -- cgit v1.2.3 From b371b594317869971af326adcf7cd65cabdb4087 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:13 +0200 Subject: perf/x86: Fix event/group validation Commit 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()") violated the rule that 'fake' scheduling; as used for event/group validation; should not change the event state. This went mostly un-noticed because repeated calls of x86_pmu::get_event_constraints() would give the same result. And x86_pmu::put_event_constraints() would mostly not do anything. Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround") made the situation much worse by actually setting the event->hw.constraint value to NULL, so when validation and actual scheduling interact we get NULL ptr derefs. Fix it by removing the constraint pointer from the event and move it back to an array, this time in cpuc instead of on the stack. validate_group() x86_schedule_events() event->hw.constraint = c; # store perf_task_event_sched_in() ... x86_schedule_events(); event->hw.constraint = c2; # store ... put_event_constraints(event); # assume failure to schedule intel_put_event_constraints() event->hw.constraint = NULL; c = event->hw.constraint; # read -> NULL if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref This in particular is possible when the event in question is a cpu-wide event and group-leader, where the validate_group() tries to add an event to the group. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Hunter Cc: Linus Torvalds Cc: Maria Dimakopoulou Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()") Fixes: e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 33 +++++++++++++++------------ arch/x86/kernel/cpu/perf_event.h | 9 ++++---- arch/x86/kernel/cpu/perf_event_intel.c | 15 ++++-------- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 ++-- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 +++--- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 + include/linux/perf_event.h | 4 ---- 7 files changed, 33 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 87848ebe2bb7..1664eeea65e0 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -620,7 +620,7 @@ struct sched_state { struct perf_sched { int max_weight; int max_events; - struct perf_event **events; + struct event_constraint **constraints; struct sched_state state; int saved_states; struct sched_state saved[SCHED_STATES_MAX]; @@ -629,7 +629,7 @@ struct perf_sched { /* * Initialize interator that runs through all events and counters. */ -static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, int num, int wmin, int wmax) { int idx; @@ -637,10 +637,10 @@ static void perf_sched_init(struct perf_sched *sched, struct perf_event **events memset(sched, 0, sizeof(*sched)); sched->max_events = num; sched->max_weight = wmax; - sched->events = events; + sched->constraints = constraints; for (idx = 0; idx < num; idx++) { - if (events[idx]->hw.constraint->weight == wmin) + if (constraints[idx]->weight == wmin) break; } @@ -687,7 +687,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (sched->state.event >= sched->max_events) return false; - c = sched->events[sched->state.event]->hw.constraint; + c = sched->constraints[sched->state.event]; /* Prefer fixed purpose counters */ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; @@ -745,7 +745,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) if (sched->state.weight > sched->max_weight) return false; } - c = sched->events[sched->state.event]->hw.constraint; + c = sched->constraints[sched->state.event]; } while (c->weight != sched->state.weight); sched->state.counter = 0; /* start with first counter */ @@ -756,12 +756,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -int perf_assign_events(struct perf_event **events, int n, +int perf_assign_events(struct event_constraint **constraints, int n, int wmin, int wmax, int *assign) { struct perf_sched sched; - perf_sched_init(&sched, events, n, wmin, wmax); + perf_sched_init(&sched, constraints, n, wmin, wmax); do { if (!perf_sched_find_counter(&sched)) @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) x86_pmu.start_scheduling(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { - hwc = &cpuc->event_list[i]->hw; + cpuc->event_constraint[i] = NULL; c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); - hwc->constraint = c; + cpuc->event_constraint[i] = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); @@ -801,7 +801,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = hwc->constraint; + c = cpuc->event_constraint[i]; /* never assigned */ if (hwc->idx == -1) @@ -821,9 +821,10 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) } /* slow path */ - if (i != n) - unsched = perf_assign_events(cpuc->event_list, n, wmin, + if (i != n) { + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, wmax, assign); + } /* * In case of success (unsched = 0), mark events as committed, @@ -840,7 +841,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) e = cpuc->event_list[i]; e->hw.flags |= PERF_X86_EVENT_COMMITTED; if (x86_pmu.commit_scheduling) - x86_pmu.commit_scheduling(cpuc, e, assign[i]); + x86_pmu.commit_scheduling(cpuc, i, assign[i]); } } @@ -1292,8 +1293,10 @@ static void x86_pmu_del(struct perf_event *event, int flags) x86_pmu.put_event_constraints(cpuc, event); /* Delete the array entry. */ - while (++i < cpuc->n_events) + while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; + cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + } --cpuc->n_events; perf_event_update_userpage(event); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6ac5cb7a9e14..fdfaab7c5e55 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -172,7 +172,10 @@ struct cpu_hw_events { added in the current transaction */ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; + unsigned int group_flag; int is_fake; @@ -519,9 +522,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); - void (*commit_scheduling)(struct cpu_hw_events *cpuc, - struct perf_event *event, - int cntr); + void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); void (*start_scheduling)(struct cpu_hw_events *cpuc); @@ -717,7 +718,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, void x86_pmu_enable_all(int added); -int perf_assign_events(struct perf_event **events, int n, +int perf_assign_events(struct event_constraint **constraints, int n, int wmin, int wmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3998131d1a68..7a58fb5df15c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2106,7 +2106,7 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = event->hw.constraint; + struct event_constraint *c1 = cpuc->event_constraint[idx]; struct event_constraint *c2; /* @@ -2188,8 +2188,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct event_constraint *c = event->hw.constraint; - intel_put_shared_regs_event_constraints(cpuc, event); /* @@ -2197,19 +2195,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, * all events are subject to and must call the * put_excl_constraints() routine */ - if (c && cpuc->excl_cntrs) + if (cpuc->excl_cntrs) intel_put_excl_constraints(cpuc, event); - - /* cleanup dynamic constraint */ - if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) - event->hw.constraint = NULL; } -static void intel_commit_scheduling(struct cpu_hw_events *cpuc, - struct perf_event *event, int cntr) +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) { struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct event_constraint *c = event->hw.constraint; + struct event_constraint *c = cpuc->event_constraint[idx]; struct intel_excl_states *xlo, *xl; int tid = cpuc->excl_thread_id; int o_tid = 1 - tid; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 813f75d71175..7f73b3553e2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event) cpuc->pebs_enabled &= ~(1ULL << hwc->idx); - if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); - else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); if (cpuc->enabled) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b49e93..ec2ba578d286 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { - hwc = &box->event_list[i]->hw; c = uncore_get_event_constraint(box, box->event_list[i]); - hwc->constraint = c; + box->event_constraint[i] = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int /* fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { hwc = &box->event_list[i]->hw; - c = hwc->constraint; + c = box->event_constraint[i]; /* never assigned */ if (hwc->idx == -1) @@ -395,7 +394,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int } /* slow path */ if (i != n) - ret = perf_assign_events(box->event_list, n, + ret = perf_assign_events(box->event_constraint, n, wmin, wmax, assign); if (!assign || ret) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7e69d8..f789ec9a0133 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -97,6 +97,7 @@ struct intel_uncore_box { atomic_t refcnt; struct perf_event *events[UNCORE_PMC_IDX_MAX]; struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; + struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX]; unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; u64 tags[UNCORE_PMC_IDX_MAX]; struct pci_dev *pci_dev; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..d8a82a89f35a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -92,8 +92,6 @@ struct hw_perf_event_extra { int idx; /* index in shared_regs->regs[] */ }; -struct event_constraint; - /** * struct hw_perf_event - performance event hardware details: */ @@ -112,8 +110,6 @@ struct hw_perf_event { struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; - - struct event_constraint *constraint; }; struct { /* software */ struct hrtimer hrtimer; -- cgit v1.2.3 From 9302d7bb0c5cd46be5706859301f18c137b2439f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 26 May 2015 17:30:17 -0600 Subject: sctp: Fix mangled IPv4 addresses on a IPv6 listening socket sctp_v4_map_v6 was subtly writing and reading from members of a union in a way the clobbered data it needed to read before it read it. Zeroing the v6 flowinfo overwrites the v4 sin_addr with 0, meaning that every place that calls sctp_v4_map_v6 gets ::ffff:0.0.0.0 as the result. Reorder things to guarantee correct behaviour no matter what the union layout is. This impacts user space clients that open an IPv6 SCTP socket and receive IPv4 connections. Prior to 299ee user space would see a sockaddr with AF_INET and a correct address, after 299ee the sockaddr is AF_INET6, but the address is wrong. Fixes: 299ee123e198 (sctp: Fixup v4mapped behaviour to comply with Sock API) Signed-off-by: Jason Gunthorpe Acked-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c56a438c3a1e..ce13cf20f625 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr) /* Map v4 address to v4-mapped v6 address */ static inline void sctp_v4_map_v6(union sctp_addr *addr) { + __be16 port; + + port = addr->v4.sin_port; + addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + addr->v6.sin6_port = port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_scope_id = 0; - addr->v6.sin6_port = addr->v4.sin_port; - addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; addr->v6.sin6_addr.s6_addr32[0] = 0; addr->v6.sin6_addr.s6_addr32[1] = 0; addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff); -- cgit v1.2.3 From f36963c9d3f6f415732710da3acdd8608a9fa0e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 9 May 2015 03:14:13 +0930 Subject: cpumask_set_cpu_local_first => cpumask_local_spread, lament da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell (then rebased) Tested-by: Amir Vadai Acked-by: Amir Vadai Acked-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 +-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +-- include/linux/cpumask.h | 6 +-- lib/cpumask.c | 74 +++++++++----------------- 5 files changed, 37 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..6f9ffb9026cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + int numa_node = dev_to_node(&adapter->pdev->dev); if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) return -ENOMEM; - cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), - eqo->affinity_mask); - + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..cf467a9f6cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) { struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; int numa_node = priv->mdev->dev->numa_node; - int ret = 0; if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) return -ENOMEM; - ret = cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask); - if (ret) - free_cpumask_var(ring->affinity_mask); - - return ret; + cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node), + ring->affinity_mask); + return 0; } static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f7bf312fb443..7bed3a88579f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->queue_index = queue_index; if (queue_index < priv->num_tx_rings_p_up) - cpumask_set_cpu_local_first(queue_index, - priv->mdev->dev->numa_node, - &ring->affinity_mask); + cpumask_set_cpu(cpumask_local_spread(queue_index, + priv->mdev->dev->numa_node), + &ring->affinity_mask); *pring = ring; return 0; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5f627084f2e9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; + + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; + + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); -- cgit v1.2.3 From 3a7af58faa7829faa26026c245d2a8a44e9605c5 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 13 Apr 2015 18:27:35 +0200 Subject: mac80211: Fix mac80211.h docbook comments A couple of enums in mac80211.h became structures recently, but the comments didn't follow suit, leading to errors like: Error(.//include/net/mac80211.h:367): Cannot parse enum! Documentation/DocBook/Makefile:93: recipe for target 'Documentation/DocBook/80211.xml' failed make[1]: *** [Documentation/DocBook/80211.xml] Error 1 Makefile:1361: recipe for target 'mandocs' failed make: *** [mandocs] Error 2 Fix the comments comments accordingly. Added a couple of other small comment fixes while I was there to silence other recently-added docbook warnings. Reported-by: Jim Davis Signed-off-by: Jonathan Corbet Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..fc57f6b82fc5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -354,7 +354,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +388,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -401,9 +401,10 @@ struct ieee80211_mlme_event { /** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @u: union holding the above two fields */ struct ieee80211_event { enum ieee80211_event_type type; -- cgit v1.2.3 From aad653a0bc09dd4ebcb5579f9f835bbae9ef2ba3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 19 May 2015 15:58:37 +1000 Subject: block: discard bdi_unregister() in favour of bdi_destroy() bdi_unregister() now contains very little functionality. It contains a "WARN_ON" if bdi->dev is NULL. This warning is of no real consequence as bdi->dev isn't needed by anything else in the function, and it triggers if blk_cleanup_queue() -> bdi_destroy() is called before bdi_unregister, which happens since Commit: 6cd18e711dd8 ("block: destroy bdi before blockdev is unregistered.") So this isn't wanted. It also calls bdi_set_min_ratio(). This needs to be called after writes through the bdi have all been flushed, and before the bdi is destroyed. Calling it early is better than calling it late as it frees up a global resource. Calling it immediately after bdi_wb_shutdown() in bdi_destroy() perfectly fits these requirements. So bdi_unregister() can be discarded with the important content moved to bdi_destroy(), as can the writeback_bdi_unregister event which is already not used. Reported-by: Mike Snitzer Cc: stable@vger.kernel.org (v4.0) Fixes: c4db59d31e39 ("fs: don't reassign dirty inodes to default_backing_dev_info") Fixes: 6cd18e711dd8 ("block: destroy bdi before blockdev is unregistered.") Acked-by: Peter Zijlstra (Intel) Acked-by: Dan Williams Tested-by: Nicholas Moulin Signed-off-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 1 - include/linux/backing-dev.h | 1 - include/trace/events/writeback.h | 1 - mm/backing-dev.c | 18 +----------------- 4 files changed, 1 insertion(+), 20 deletions(-) (limited to 'include') diff --git a/block/genhd.c b/block/genhd.c index 0a536dc05f3b..666e11b83983 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -653,7 +653,6 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); blk_unregister_region(disk_devt(disk), disk->minors); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aff923ae8c4b..d87d8eced064 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -116,7 +116,6 @@ __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); -void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 880dd7437172..c178d13d6f4c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -250,7 +250,6 @@ DEFINE_EVENT(writeback_class, name, \ DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); -DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6dc4580df2af..000e7b3b9896 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -359,23 +359,6 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) flush_delayed_work(&bdi->wb.dwork); } -/* - * Called when the device behind @bdi has been removed or ejected. - * - * We can't really do much here except for reducing the dirty ratio at - * the moment. In the future we should be able to set a flag so that - * the filesystem can handle errors at mark_inode_dirty time instead - * of only at writeback time. - */ -void bdi_unregister(struct backing_dev_info *bdi) -{ - if (WARN_ON_ONCE(!bdi->dev)) - return; - - bdi_set_min_ratio(bdi, 0); -} -EXPORT_SYMBOL(bdi_unregister); - static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) { memset(wb, 0, sizeof(*wb)); @@ -443,6 +426,7 @@ void bdi_destroy(struct backing_dev_info *bdi) int i; bdi_wb_shutdown(bdi); + bdi_set_min_ratio(bdi, 0); WARN_ON(!list_empty(&bdi->work_list)); WARN_ON(delayed_work_pending(&bdi->wb.dwork)); -- cgit v1.2.3 From 80188b0d77d7426b494af739ac129e0e684acb84 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:39:34 +1000 Subject: percpu_counter: batch size aware __percpu_counter_compare() XFS uses non-stanard batch sizes for avoiding frequent global counter updates on it's allocated inode counters, as they increment or decrement in batches of 64 inodes. Hence the standard percpu counter batch of 32 means that the counter is effectively a global counter. Currently Xfs uses a batch size of 128 so that it doesn't take the global lock on every single modification. However, Xfs also needs to compare accurately against zero, which means we need to use percpu_counter_compare(), and that has a hard-coded batch size of 32, and hence will spuriously fail to detect when it is supposed to use precise comparisons and hence the accounting goes wrong. Add __percpu_counter_compare() to take a custom batch size so we can use it sanely in XFS and factor percpu_counter_compare() to use it. Signed-off-by: Dave Chinner Acked-by: Tejun Heo Signed-off-by: Dave Chinner --- include/linux/percpu_counter.h | 13 ++++++++++++- lib/percpu_counter.c | 6 +++--- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { -- cgit v1.2.3 From e5feb1ebaadfb1f5f70a3591e762d780232a4f5d Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:16 -0700 Subject: tracing/mm: don't trace kmem_cache_free on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_kmem_cache_free can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:148 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .kmem_cache_free+0x344/0x4b0 .__mmdrop+0x4c/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting kmem_cache_free trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reported-by: Aneesh Kumar K.V Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 81ea59812117..78efa0a197a9 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -140,11 +140,22 @@ DEFINE_EVENT(kmem_free, kfree, TP_ARGS(call_site, ptr) ); -DEFINE_EVENT(kmem_free, kmem_cache_free, +DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr) + TP_ARGS(call_site, ptr), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())) ); TRACE_EVENT(mm_page_free, -- cgit v1.2.3 From 1f0c27b50f4f2567e649f49d77daee2bbf3f40e5 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:19 -0700 Subject: tracing/mm: don't trace mm_page_free on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_mm_page_free can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:170 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .free_pages_prepare+0x494/0x680 .free_hot_cold_page+0x50/0x280 .destroy_context+0x90/0xd0 .__mmdrop+0x58/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting mm_page_free trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 78efa0a197a9..aa863549e77a 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -158,12 +158,24 @@ DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, TP_CONDITION(cpu_online(raw_smp_processor_id())) ); -TRACE_EVENT(mm_page_free, +TRACE_EVENT_CONDITION(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), + + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) -- cgit v1.2.3 From 649b8de2f75145bf14cb1783688e16d51ac9b89a Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Thu, 28 May 2015 15:44:22 -0700 Subject: tracing/mm: don't trace mm_page_pcpu_drain on offline cpus Since tracepoints use RCU for protection, they must not be called on offline cpus. trace_mm_page_pcpu_drain can be called on an offline cpu in this scenario caught by LOCKDEP: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc1+ #9 Not tainted ------------------------------- include/trace/events/kmem.h:265 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 1 lock held by swapper/5/0: #0: (&(&zone->lock)->rlock){..-...}, at: [] .free_pcppages_bulk+0x70/0x920 stack backtrace: CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.1.0-rc1+ #9 Call Trace: .dump_stack+0x98/0xd4 (unreliable) .lockdep_rcu_suspicious+0x108/0x170 .free_pcppages_bulk+0x60c/0x920 .free_hot_cold_page+0x208/0x280 .destroy_context+0x90/0xd0 .__mmdrop+0x58/0x160 .idle_task_exit+0xf0/0x100 .pnv_smp_cpu_kill_self+0x58/0x2c0 .cpu_die+0x34/0x50 .arch_cpu_idle_dead+0x20/0x40 .cpu_startup_entry+0x708/0x7a0 .start_secondary+0x36c/0x3a0 start_secondary_prolog+0x10/0x14 Fix this by converting mm_page_pcpu_drain trace point into TRACE_EVENT_CONDITION where condition is cpu_online(smp_processor_id()) Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aa863549e77a..f7554fd7fc62 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -276,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_ARGS(page, order, migratetype) ); -DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, +TRACE_EVENT_CONDITION(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), + /* + * This trace can be potentially called from an offlined cpu. + * Since trace points use RCU and RCU should not be used from + * offline cpus, filter such calls out. + * While this trace can be called from a preemptable section, + * it has no impact on the condition since tasks can migrate + * only from online cpus to other online cpus. Thus its safe + * to use raw_smp_processor_id. + */ + TP_CONDITION(cpu_online(raw_smp_processor_id())), + + TP_STRUCT__entry( + __field( unsigned long, pfn ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->pfn = page ? page_to_pfn(page) : -1UL; + __entry->order = order; + __entry->migratetype = migratetype; + ), + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) -- cgit v1.2.3 From d588cf8f618d7b316743a0bc99fede20f7a01bb7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 May 2015 08:50:52 +0200 Subject: target: Fix se_tpg_tfo->tf_subsys regression + remove tf_subsystem There is just one configfs subsystem in the target code, so we might as well add two helpers to reference / unreference it from the core code instead of passing pointers to it around. This fixes a regression introduced for v4.1-rc1 with commit 9ac8928e6, where configfs_depend_item() callers using se_tpg_tfo->tf_subsys would fail, because the assignment from the original target_core_subsystem[] is no longer happening at target_register_template() time. (Fix target_core_exit_configfs pointer dereference - Sagi) Signed-off-by: Christoph Hellwig Reported-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 6 ++---- drivers/target/target_core_configfs.c | 30 ++++++++++++++---------------- drivers/target/target_core_internal.h | 3 --- drivers/target/target_core_pr.c | 32 +++++++------------------------- drivers/target/target_core_xcopy.c | 15 ++++++--------- drivers/vhost/scsi.c | 6 ++---- include/target/target_core_configfs.h | 2 -- include/target/target_core_fabric.h | 4 +++- 8 files changed, 34 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 68c2002e78bf..5c9e680aa375 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1020,8 +1020,7 @@ static void tcm_qla2xxx_depend_tpg(struct work_struct *work) struct se_portal_group *se_tpg = &base_tpg->se_tpg; struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item)) { + if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { atomic_set(&base_tpg->lport_tpg_enabled, 1); qlt_enable_vha(base_vha); } @@ -1037,8 +1036,7 @@ static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { atomic_set(&base_tpg->lport_tpg_enabled, 0); - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } complete(&base_tpg->tpg_base_comp); } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index ddaf76a4ac2a..1580077971f8 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric( pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); - /* - * Setup tf_ops.tf_subsys pointer for usage with configfs_depend_item() - */ - tf->tf_ops.tf_subsys = tf->tf_subsys; tf->tf_fabric = &tf->tf_group.cg_item; pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric" " for %s\n", name); @@ -291,10 +287,17 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -struct configfs_subsystem *target_core_subsystem[] = { - &target_core_fabrics, - NULL, -}; +int target_depend_item(struct config_item *item) +{ + return configfs_depend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_depend_item); + +void target_undepend_item(struct config_item *item) +{ + return configfs_undepend_item(&target_core_fabrics, item); +} +EXPORT_SYMBOL(target_undepend_item); /*############################################################################## // Start functions called by external Target Fabrics Modules @@ -467,7 +470,6 @@ int target_register_template(const struct target_core_fabric_ops *fo) * struct target_fabric_configfs->tf_cit_tmpl */ tf->tf_module = fo->module; - tf->tf_subsys = target_core_subsystem[0]; snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); tf->tf_ops = *fo; @@ -2870,7 +2872,7 @@ static int __init target_core_init_configfs(void) { struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL; struct config_group *lu_gp_cg = NULL; - struct configfs_subsystem *subsys; + struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; int ret; @@ -2878,7 +2880,6 @@ static int __init target_core_init_configfs(void) " Engine: %s on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine); - subsys = target_core_subsystem[0]; config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); @@ -3008,13 +3009,10 @@ out_global: static void __exit target_core_exit_configfs(void) { - struct configfs_subsystem *subsys; struct config_group *hba_cg, *alua_cg, *lu_gp_cg; struct config_item *item; int i; - subsys = target_core_subsystem[0]; - lu_gp_cg = &alua_lu_gps_group; for (i = 0; lu_gp_cg->default_groups[i]; i++) { item = &lu_gp_cg->default_groups[i]->cg_item; @@ -3045,8 +3043,8 @@ static void __exit target_core_exit_configfs(void) * We expect subsys->su_group.default_groups to be released * by configfs subsystem provider logic.. */ - configfs_unregister_subsystem(subsys); - kfree(subsys->su_group.default_groups); + configfs_unregister_subsystem(&target_core_fabrics); + kfree(target_core_fabrics.su_group.default_groups); core_alua_free_lu_gp(default_lu_gp); default_lu_gp = NULL; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 874a9bc988d8..68bd7f5d9f73 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -4,9 +4,6 @@ /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; -/* target_core_configfs.c */ -extern struct configfs_subsystem *target_core_subsystem[]; - /* target_core_device.c */ extern struct mutex g_device_mutex; extern struct list_head g_device_list; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index c1aa9655e96e..b7c81acf08d0 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1367,41 +1367,26 @@ void core_scsi3_free_all_registrations( static int core_scsi3_tpg_depend_item(struct se_portal_group *tpg) { - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); + return target_depend_item(&tpg->tpg_group.cg_item); } static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg) { - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &tpg->tpg_group.cg_item); - + target_undepend_item(&tpg->tpg_group.cg_item); atomic_dec_mb(&tpg->tpg_pr_ref_count); } static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - if (nacl->dynamic_node_acl) return 0; - - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); + return target_depend_item(&nacl->acl_group.cg_item); } static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl) { - struct se_portal_group *tpg = nacl->se_tpg; - - if (nacl->dynamic_node_acl) { - atomic_dec_mb(&nacl->acl_pr_ref_count); - return; - } - - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &nacl->acl_group.cg_item); - + if (!nacl->dynamic_node_acl) + target_undepend_item(&nacl->acl_group.cg_item); atomic_dec_mb(&nacl->acl_pr_ref_count); } @@ -1419,8 +1404,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); + return target_depend_item(&lun_acl->se_lun_group.cg_item); } static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) @@ -1438,9 +1422,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; - configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, - &lun_acl->se_lun_group.cg_item); - + target_undepend_item(&lun_acl->se_lun_group.cg_item); atomic_dec_mb(&se_deve->pr_ref_count); } diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index a600ff15dcfd..8fd680ac941b 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -58,7 +58,6 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op bool src) { struct se_device *se_dev; - struct configfs_subsystem *subsys = target_core_subsystem[0]; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; int rc; @@ -90,8 +89,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op " se_dev\n", xop->src_dev); } - rc = configfs_depend_item(subsys, - &se_dev->dev_group.cg_item); + rc = target_depend_item(&se_dev->dev_group.cg_item); if (rc != 0) { pr_err("configfs_depend_item attempt failed:" " %d for se_dev: %p\n", rc, se_dev); @@ -99,8 +97,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op return rc; } - pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" - " se_dev->se_dev_group: %p\n", subsys, se_dev, + pr_debug("Called configfs_depend_item for se_dev: %p" + " se_dev->se_dev_group: %p\n", se_dev, &se_dev->dev_group); mutex_unlock(&g_device_mutex); @@ -373,7 +371,6 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct configfs_subsystem *subsys = target_core_subsystem[0]; struct se_device *remote_dev; if (xop->op_origin == XCOL_SOURCE_RECV_OP) @@ -381,11 +378,11 @@ static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) else remote_dev = xop->src_dev; - pr_debug("Calling configfs_undepend_item for subsys: %p" + pr_debug("Calling configfs_undepend_item for" " remote_dev: %p remote_dev->dev_group: %p\n", - subsys, remote_dev, &remote_dev->dev_group.cg_item); + remote_dev, &remote_dev->dev_group.cg_item); - configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); + target_undepend_item(&remote_dev->dev_group.cg_item); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5e19bb53b3a9..ea32b386797f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1409,8 +1409,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, * dependency now. */ se_tpg = &tpg->se_tpg; - ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + ret = target_depend_item(&se_tpg->tpg_group.cg_item); if (ret) { pr_warn("configfs_depend_item() failed: %d\n", ret); kfree(vs_tpg); @@ -1513,8 +1512,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. */ se_tpg = &tpg->se_tpg; - configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, - &se_tpg->tpg_group.cg_item); + target_undepend_item(&se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 25bb04c4209e..b99c01170392 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -40,8 +40,6 @@ struct target_fabric_configfs { struct config_item *tf_fabric; /* Passed from fabric modules */ struct config_item_type *tf_fabric_cit; - /* Pointer to target core subsystem */ - struct configfs_subsystem *tf_subsys; /* Pointer to fabric's struct module */ struct module *tf_module; struct target_core_fabric_ops tf_ops; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 17c7f5ac7ea0..0f4dc3768587 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,7 +4,6 @@ struct target_core_fabric_ops { struct module *module; const char *name; - struct configfs_subsystem *tf_subsys; char *(*get_fabric_name)(void); u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); @@ -109,6 +108,9 @@ struct target_core_fabric_ops { int target_register_template(const struct target_core_fabric_ops *fo); void target_unregister_template(const struct target_core_fabric_ops *fo); +int target_depend_item(struct config_item *item); +void target_undepend_item(struct config_item *item); + struct se_session *transport_init_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int); -- cgit v1.2.3 From 7bfea53b5c936d706d0bf60ec218fa72cde77121 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:40 -0700 Subject: target: Move passthrough CDB parsing into a common function Aside from whether they handle BIDI ops or not, parsing of the CDB by kernel and user SCSI passthrough modules should be identical. Move this into a new passthrough_parse_cdb() and call it from tcm-pscsi and tcm-user. Reported-by: Christoph Hellwig Reviewed-by: Ilias Tsitsimpis Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 74 ++++++++++++++++++++++++++++++++++++ drivers/target/target_core_pscsi.c | 53 +------------------------- drivers/target/target_core_user.c | 43 +-------------------- include/target/target_core_backend.h | 2 + 4 files changed, 78 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 7faa6aef9a4d..56b20dc54087 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1707,3 +1708,76 @@ void core_dev_release_virtual_lun0(void) target_free_device(g_lun0_dev); core_delete_hba(hba); } + +/* + * Common CDB parsing for kernel and user passthrough. + */ +sense_reason_t +passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)) +{ + unsigned char *cdb = cmd->t_task_cdb; + + /* + * Clear a lun set in the cdb if the initiator talking to use spoke + * and old standards version, as we can't assume the underlying device + * won't choke up on it. + */ + switch (cdb[0]) { + case READ_10: /* SBC - RDProtect */ + case READ_12: /* SBC - RDProtect */ + case READ_16: /* SBC - RDProtect */ + case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ + case VERIFY: /* SBC - VRProtect */ + case VERIFY_16: /* SBC - VRProtect */ + case WRITE_VERIFY: /* SBC - VRProtect */ + case WRITE_VERIFY_12: /* SBC - VRProtect */ + case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ + break; + default: + cdb[1] &= 0x1f; /* clear logical unit number */ + break; + } + + /* + * For REPORT LUNS we always need to emulate the response, for everything + * else, pass it up. + */ + if (cdb[0] == REPORT_LUNS) { + cmd->execute_cmd = spc_emulate_report_luns; + return TCM_NO_SENSE; + } + + /* Set DATA_CDB flag for ops that should have it */ + switch (cdb[0]) { + case READ_6: + case READ_10: + case READ_12: + case READ_16: + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case 0x8e: /* WRITE_VERIFY_16 */ + case COMPARE_AND_WRITE: + case XDWRITEREAD_10: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + case VARIABLE_LENGTH_CMD: + switch (get_unaligned_be16(&cdb[8])) { + case READ_32: + case WRITE_32: + case 0x0c: /* WRITE_VERIFY_32 */ + case XDWRITEREAD_32: + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; + break; + } + } + + cmd->execute_cmd = exec_cmd; + + return TCM_NO_SENSE; +} +EXPORT_SYMBOL(passthrough_parse_cdb); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 4073869d2090..53bd0eb57095 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -973,64 +973,13 @@ fail: return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } -/* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ -static inline void pscsi_clear_cdb_lun(unsigned char *cdb) -{ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } -} - static sense_reason_t pscsi_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - if (cmd->se_cmd_flags & SCF_BIDI) return TCM_UNSUPPORTED_SCSI_OPCODE; - pscsi_clear_cdb_lun(cdb); - - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else the default for pSCSI is to pass the command to the underlying - * LLD / physical hardware. - */ - switch (cdb[0]) { - case REPORT_LUNS: - cmd->execute_cmd = spc_emulate_report_luns; - return 0; - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - /* FALLTHROUGH*/ - default: - cmd->execute_cmd = pscsi_execute_cmd; - return 0; - } + return passthrough_parse_cdb(cmd, pscsi_execute_cmd); } static sense_reason_t diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 90e084ea7b92..2768ea2cfd7a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1049,48 +1049,7 @@ tcmu_pass_op(struct se_cmd *se_cmd) static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - unsigned char *cdb = cmd->t_task_cdb; - - /* - * For REPORT LUNS we always need to emulate the response, for everything - * else, pass it up. - */ - if (cdb[0] == REPORT_LUNS) { - cmd->execute_cmd = spc_emulate_report_luns; - return TCM_NO_SENSE; - } - - /* Set DATA_CDB flag for ops that should have it */ - switch (cdb[0]) { - case READ_6: - case READ_10: - case READ_12: - case READ_16: - case WRITE_6: - case WRITE_10: - case WRITE_12: - case WRITE_16: - case WRITE_VERIFY: - case WRITE_VERIFY_12: - case 0x8e: /* WRITE_VERIFY_16 */ - case COMPARE_AND_WRITE: - case XDWRITEREAD_10: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - break; - case VARIABLE_LENGTH_CMD: - switch (get_unaligned_be16(&cdb[8])) { - case READ_32: - case WRITE_32: - case 0x0c: /* WRITE_VERIFY_32 */ - case XDWRITEREAD_32: - cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - break; - } - } - - cmd->execute_cmd = tcmu_pass_op; - - return TCM_NO_SENSE; + return passthrough_parse_cdb(cmd, tcmu_pass_op); } DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index d61be7297b2c..563e11970152 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -138,5 +138,7 @@ int se_dev_set_queue_depth(struct se_device *, u32); int se_dev_set_max_sectors(struct se_device *, u32); int se_dev_set_optimal_sectors(struct se_device *, u32); int se_dev_set_block_size(struct se_device *, u32); +sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, + sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); #endif /* TARGET_CORE_BACKEND_H */ -- cgit v1.2.3 From a3541703ebbf99d499656b15987175f6579b42ac Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 19 May 2015 14:44:41 -0700 Subject: target: Use a PASSTHROUGH flag instead of transport_types It seems like we only care if a transport is passthrough or not. Convert transport_type to a flags field and replace TRANSPORT_PLUGIN_* with a flag, TRANSPORT_FLAG_PASSTHROUGH. Signed-off-by: Andy Grover Reviewed-by: Ilias Tsitsimpis Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 4 ++-- drivers/target/target_core_configfs.c | 10 +++++----- drivers/target/target_core_device.c | 4 ++-- drivers/target/target_core_file.c | 1 - drivers/target/target_core_iblock.c | 1 - drivers/target/target_core_pr.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- drivers/target/target_core_rd.c | 1 - drivers/target/target_core_transport.c | 6 +++--- drivers/target/target_core_user.c | 2 +- include/target/target_core_backend.h | 6 ++---- 11 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 75cbde1f7c5b..4f8d4d459aa4 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -704,7 +704,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (!port) @@ -2377,7 +2377,7 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 1580077971f8..e7b0430a0575 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -811,7 +811,7 @@ static ssize_t target_core_dev_pr_show_attr_res_holder(struct se_device *dev, { int ret; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "Passthrough\n"); spin_lock(&dev->dev_reservation_lock); @@ -962,7 +962,7 @@ SE_DEV_PR_ATTR_RO(res_pr_type); static ssize_t target_core_dev_pr_show_attr_res_type( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return sprintf(page, "SPC_PASSTHROUGH\n"); else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); @@ -975,7 +975,7 @@ SE_DEV_PR_ATTR_RO(res_type); static ssize_t target_core_dev_pr_show_attr_res_aptpl_active( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -990,7 +990,7 @@ SE_DEV_PR_ATTR_RO(res_aptpl_active); static ssize_t target_core_dev_pr_show_attr_res_aptpl_metadata( struct se_device *dev, char *page) { - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1037,7 +1037,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( u16 port_rpti = 0, tpgt = 0; u8 type = 0, scope; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 56b20dc54087..ce5f768181ff 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -528,7 +528,7 @@ static void core_export_port( list_add_tail(&port->sep_list, &dev->dev_sep_list); spin_unlock(&dev->se_port_lock); - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV && + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port); if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) { @@ -1604,7 +1604,7 @@ int target_configure_device(struct se_device *dev) * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI * passthrough because this is being provided by the backend LLD. */ - if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV) { + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) { strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8); strncpy(&dev->t10_wwn.model[0], dev->transport->inquiry_prod, 16); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index f7e6e51aed36..3f27bfd816d8 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -958,7 +958,6 @@ static struct se_subsystem_api fileio_template = { .inquiry_prod = "FILEIO", .inquiry_rev = FD_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = fd_attach_hba, .detach_hba = fd_detach_hba, .alloc_device = fd_alloc_device, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 1b7947c2510f..8c965683789f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -904,7 +904,6 @@ static struct se_subsystem_api iblock_template = { .inquiry_prod = "IBLOCK", .inquiry_rev = IBLOCK_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV, .attach_hba = iblock_attach_hba, .detach_hba = iblock_detach_hba, .alloc_device = iblock_alloc_device, diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index b7c81acf08d0..a15411c79ae9 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4093,7 +4093,7 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; spin_lock(&dev->dev_reservation_lock); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 53bd0eb57095..ecc5eaef13d6 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1141,7 +1141,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = { static struct se_subsystem_api pscsi_template = { .name = "pscsi", .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index a263bf5fab8d..d16489b6a1a4 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -733,7 +733,6 @@ static struct se_subsystem_api rd_mcp_template = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", .inquiry_rev = RD_MCP_VERSION, - .transport_type = TRANSPORT_PLUGIN_VHBA_VDEV, .attach_hba = rd_attach_hba, .detach_hba = rd_detach_hba, .alloc_device = rd_alloc_device, diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 90c92f04a586..675f2d9d1f14 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1196,7 +1196,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd) * Check if SAM Task Attribute emulation is enabled for this * struct se_device storage object */ - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; if (cmd->sam_task_attr == TCM_ACA_TAG) { @@ -1787,7 +1787,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; /* @@ -1912,7 +1912,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2768ea2cfd7a..07d2996d8c1f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1077,7 +1077,7 @@ static struct se_subsystem_api tcmu_template = { .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, .owner = THIS_MODULE, - .transport_type = TRANSPORT_PLUGIN_PHBA_PDEV, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, .attach_hba = tcmu_attach_hba, .detach_hba = tcmu_detach_hba, .alloc_device = tcmu_alloc_device, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 563e11970152..5f1225706993 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -1,9 +1,7 @@ #ifndef TARGET_CORE_BACKEND_H #define TARGET_CORE_BACKEND_H -#define TRANSPORT_PLUGIN_PHBA_PDEV 1 -#define TRANSPORT_PLUGIN_VHBA_PDEV 2 -#define TRANSPORT_PLUGIN_VHBA_VDEV 3 +#define TRANSPORT_FLAG_PASSTHROUGH 1 struct target_backend_cits { struct config_item_type tb_dev_cit; @@ -22,7 +20,7 @@ struct se_subsystem_api { char inquiry_rev[4]; struct module *owner; - u8 transport_type; + u8 transport_flags; int (*attach_hba)(struct se_hba *, u32); void (*detach_hba)(struct se_hba *); -- cgit v1.2.3 From 9f950415e4e28e7cfae2e416b43e862e8101d996 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 29 May 2015 13:47:07 -0400 Subject: tcp: fix child sockets to use system default congestion control if not set Linux 3.17 and earlier are explicitly engineered so that if the app doesn't specifically request a CC module on a listener before the SYN arrives, then the child gets the system default CC when the connection is established. See tcp_init_congestion_control() in 3.17 or earlier, which says "if no choice made yet assign the current value set as default". The change ("net: tcp: assign tcp cong_ops when tcp sk is created") altered these semantics, so that children got their parent listener's congestion control even if the system default had changed after the listener was created. This commit returns to those original semantics from 3.17 and earlier, since they are the original semantics from 2007 in 4d4d3d1e8 ("[TCP]: Congestion control initialization."), and some Linux congestion control workflows depend on that. In summary, if a listener socket specifically sets TCP_CONGESTION to "x", or the route locks the CC module to "x", then the child gets "x". Otherwise the child gets current system default from net.ipv4.tcp_congestion_control. That's the behavior in 3.17 and earlier, and this commit restores that. Fixes: 55d8694fa82c ("net: tcp: assign tcp cong_ops when tcp sk is created") Cc: Florian Westphal Cc: Daniel Borkmann Cc: Glenn Judd Cc: Stephen Hemminger Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 3 ++- net/ipv4/tcp_cong.c | 5 ++++- net/ipv4/tcp_minisocks.c | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 497bc14cdb85..0320bbb7d7b5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -98,7 +98,8 @@ struct inet_connection_sock { const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); - __u8 icsk_ca_state:7, + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, icsk_ca_dst_locked:1; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7a5ae50c80c8..84be008c945c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -187,6 +187,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; + icsk->icsk_ca_setsockopt = 1; if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); @@ -335,8 +336,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) rcu_read_lock(); ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ - if (ca == icsk->icsk_ca_ops) + if (ca == icsk->icsk_ca_ops) { + icsk->icsk_ca_setsockopt = 1; goto out; + } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b5732a54f2ad..17e7339ee5ca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -420,7 +420,10 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) rcu_read_unlock(); } - if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + /* If no valid choice made yet, assign current system default ca. */ + if (!ca_got_dst && + (!icsk->icsk_ca_setsockopt || + !try_module_get(icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); -- cgit v1.2.3 From 8a7b19d8b542b87bccc3eaaf81dcc90a5ca48aea Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sat, 30 May 2015 17:39:25 +0200 Subject: include/uapi/linux/virtio_balloon.h: include linux/virtio_types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: unknown type name ‘__virtio16’ __virtio16 tag; Signed-off-by: Mikko Rapeli Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 984169a819ee..d7f1cbc3766c 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include +#include #include #include -- cgit v1.2.3