From f1c6f1a7eed963ed233ba4c8b6fa8addb86c6ddc Mon Sep 17 00:00:00 2001
From: Carsten Emde <C.Emde@osadl.org>
Date: Wed, 26 Oct 2011 23:14:16 +0200
Subject: sched: Set the command name of the idle tasks in SMP kernels

In UP systems, the idle task is initialized using the init_task
structure from which the command name is taken (currently "swapper").

In SMP systems, one idle task per CPU is forked by the worker thread
from which the task structure is copied. The command name is, therefore,
"kworker/0:0" or "kworker/0:1", if not updated. Since such update was
lacking, all idle tasks in SMP systems were incorrectly named. This
longtime bug was not discovered immediately, because there is no /proc/0
entry - the bug only becomes apparent when tracing is enabled.

This patch sets the command name of the idle tasks in SMP systems to the
name that is used in the INIT_TASK structure suffixed by a slash and the
number of the CPU.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111026211708.768925506@osadl.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 08ffab01e76c..b6e5b8b000e0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -126,6 +126,8 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#define INIT_TASK_COMM "swapper"
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -162,7 +164,7 @@ extern struct cred init_cred;
 	.group_leader	= &tsk,						\
 	RCU_INIT_POINTER(.real_cred, &init_cred),			\
 	RCU_INIT_POINTER(.cred, &init_cred),				\
-	.comm		= "swapper",					\
+	.comm		= INIT_TASK_COMM,				\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
 	.files		= &init_files,					\
-- 
cgit v1.2.3


From 5eccdf5e06eb67779716ae26142402a1ae9b012c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 21 Nov 2011 06:53:46 +0000
Subject: tc: comment spelling fixes

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index c5336705921f..7281d5acf2f9 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -30,7 +30,7 @@
  */
 
 struct tc_stats {
-	__u64	bytes;			/* NUmber of enqueues bytes */
+	__u64	bytes;			/* Number of enqueued bytes */
 	__u32	packets;		/* Number of enqueued packets	*/
 	__u32	drops;			/* Packets dropped because of lack of resources */
 	__u32	overlimits;		/* Number of throttle events when this
@@ -297,7 +297,7 @@ struct tc_htb_glob {
 	__u32 debug;		/* debug flags */
 
 	/* stats */
-	__u32 direct_pkts; /* count of non shapped packets */
+	__u32 direct_pkts; /* count of non shaped packets */
 };
 enum {
 	TCA_HTB_UNSPEC,
@@ -503,7 +503,7 @@ enum {
 };
 #define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
 
-/* State transition probablities for 4 state model */
+/* State transition probabilities for 4 state model */
 struct tc_netem_gimodel {
 	__u32	p13;
 	__u32	p31;
-- 
cgit v1.2.3


From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 27 Nov 2011 21:14:46 +0000
Subject: net: Fix corruption in /proc/*/net/dev_mcast

I just hit this during my testing. Isn't there another bug lurking?

BUG kmalloc-8: Redzone overwritten

INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc
INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896
	.__kmalloc+0x1e0/0x2d0
	.__seq_open_private+0x30/0xa0
	.seq_open_net+0x60/0xe0
	.dev_mc_seq_open+0x4c/0x70
	.proc_reg_open+0xd8/0x260
	.__dentry_open.clone.11+0x2b8/0x400
	.do_last+0xf4/0x950
	.path_openat+0xf8/0x480
	.do_filp_open+0x48/0xc0
	.do_sys_open+0x140/0x250
	syscall_exit+0x0/0x40

dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
sizeof(struct seq_net_private) of private data, whereas it expects
sizeof(struct dev_iter_state):

struct dev_iter_state {
	struct seq_net_private p;
	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
};

Create dev_seq_open_ops and use it so we don't have to expose
struct dev_iter_state.

[ Problem added by commit f04565ddf52e4 (dev: use name hash for
  dev_seq_ops) -Eric ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 6 ++++++
 net/core/dev_addr_lists.c | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..a82ad4dd306a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2536,6 +2536,8 @@ extern void		net_disable_timestamp(void);
 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int dev_seq_open_ops(struct inode *inode, struct file *file,
+			    const struct seq_operations *ops);
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..1482eea0bbf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 			    sizeof(struct dev_iter_state));
 }
 
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
+{
+	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
 static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
+	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3


From 4f718a29fe4908c2cea782f751e9805319684e2b Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:14 +0100
Subject: firmware: Sigma: Prevent out of bounds memory access

The SigmaDSP firmware loader currently does not perform enough boundary size
checks when processing the firmware. As a result it is possible that a
malformed firmware can cause an out of bounds memory access.

This patch adds checks which ensure that both the action header and the payload
are completely inside the firmware data boundaries before processing them.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 drivers/firmware/sigma.c | 76 +++++++++++++++++++++++++++++++++++-------------
 include/linux/sigma.h    |  5 ----
 2 files changed, 55 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
index f10fc521951b..c780baa59ed9 100644
--- a/drivers/firmware/sigma.c
+++ b/drivers/firmware/sigma.c
@@ -14,13 +14,34 @@
 #include <linux/module.h>
 #include <linux/sigma.h>
 
-/* Return: 0==OK, <0==error, =1 ==no more actions */
+static size_t sigma_action_size(struct sigma_action *sa)
+{
+	size_t payload = 0;
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		payload = sigma_action_len(sa);
+		break;
+	default:
+		break;
+	}
+
+	payload = ALIGN(payload, 2);
+
+	return payload + sizeof(struct sigma_action);
+}
+
+/*
+ * Returns a negative error value in case of an error, 0 if processing of
+ * the firmware should be stopped after this action, 1 otherwise.
+ */
 static int
-process_sigma_action(struct i2c_client *client, struct sigma_firmware *ssfw)
+process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
 {
-	struct sigma_action *sa = (void *)(ssfw->fw->data + ssfw->pos);
 	size_t len = sigma_action_len(sa);
-	int ret = 0;
+	int ret;
 
 	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
 		sa->instr, sa->addr, len);
@@ -29,44 +50,50 @@ process_sigma_action(struct i2c_client *client, struct sigma_firmware *ssfw)
 	case SIGMA_ACTION_WRITEXBYTES:
 	case SIGMA_ACTION_WRITESINGLE:
 	case SIGMA_ACTION_WRITESAFELOAD:
-		if (ssfw->fw->size < ssfw->pos + len)
-			return -EINVAL;
 		ret = i2c_master_send(client, (void *)&sa->addr, len);
 		if (ret < 0)
 			return -EINVAL;
 		break;
-
 	case SIGMA_ACTION_DELAY:
-		ret = 0;
 		udelay(len);
 		len = 0;
 		break;
-
 	case SIGMA_ACTION_END:
-		return 1;
-
+		return 0;
 	default:
 		return -EINVAL;
 	}
 
-	/* when arrive here ret=0 or sent data */
-	ssfw->pos += sigma_action_size(sa, len);
-	return ssfw->pos == ssfw->fw->size;
+	return 1;
 }
 
 static int
 process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
 {
-	pr_debug("%s: processing %p\n", __func__, ssfw);
+	struct sigma_action *sa;
+	size_t size;
+	int ret;
+
+	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
+		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
+
+		size = sigma_action_size(sa);
+		ssfw->pos += size;
+		if (ssfw->pos > ssfw->fw->size || size == 0)
+			break;
+
+		ret = process_sigma_action(client, sa);
 
-	while (1) {
-		int ret = process_sigma_action(client, ssfw);
 		pr_debug("%s: action returned %i\n", __func__, ret);
-		if (ret == 1)
-			return 0;
-		else if (ret)
+
+		if (ret <= 0)
 			return ret;
 	}
+
+	if (ssfw->pos != ssfw->fw->size)
+		return -EINVAL;
+
+	return 0;
 }
 
 int process_sigma_firmware(struct i2c_client *client, const char *name)
@@ -89,7 +116,14 @@ int process_sigma_firmware(struct i2c_client *client, const char *name)
 
 	/* then verify the header */
 	ret = -EINVAL;
-	if (fw->size < sizeof(*ssfw_head))
+
+	/*
+	 * Reject too small or unreasonable large files. The upper limit has been
+	 * chosen a bit arbitrarily, but it should be enough for all practical
+	 * purposes and having the limit makes it easier to avoid integer
+	 * overflows later in the loading process.
+	 */
+	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
 		goto done;
 
 	ssfw_head = (void *)fw->data;
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
index e2accb3164d8..9a138c2946bb 100644
--- a/include/linux/sigma.h
+++ b/include/linux/sigma.h
@@ -50,11 +50,6 @@ static inline u32 sigma_action_len(struct sigma_action *sa)
 	return (sa->len_hi << 16) | sa->len;
 }
 
-static inline size_t sigma_action_size(struct sigma_action *sa, u32 payload_len)
-{
-	return sizeof(*sa) + payload_len + (payload_len % 2);
-}
-
 extern int process_sigma_firmware(struct i2c_client *client, const char *name);
 
 #endif
-- 
cgit v1.2.3


From bda63586bc5929e97288cdb371bb6456504867ed Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:16 +0100
Subject: firmware: Sigma: Fix endianess issues

Currently the SigmaDSP firmware loader only works correctly on little-endian
systems. Fix this by using the proper endianess conversion functions.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 drivers/firmware/sigma.c | 2 +-
 include/linux/sigma.h    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
index 36265de0a9e8..1eedb6f7fdab 100644
--- a/drivers/firmware/sigma.c
+++ b/drivers/firmware/sigma.c
@@ -133,7 +133,7 @@ int process_sigma_firmware(struct i2c_client *client, const char *name)
 	crc = crc32(0, fw->data + sizeof(*ssfw_head),
 			fw->size - sizeof(*ssfw_head));
 	pr_debug("%s: crc=%x\n", __func__, crc);
-	if (crc != ssfw_head->crc)
+	if (crc != le32_to_cpu(ssfw_head->crc))
 		goto done;
 
 	ssfw.pos = sizeof(*ssfw_head);
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
index 9a138c2946bb..d0de882c0d96 100644
--- a/include/linux/sigma.h
+++ b/include/linux/sigma.h
@@ -24,7 +24,7 @@ struct sigma_firmware {
 struct sigma_firmware_header {
 	unsigned char magic[7];
 	u8 version;
-	u32 crc;
+	__le32 crc;
 };
 
 enum {
@@ -40,14 +40,14 @@ enum {
 struct sigma_action {
 	u8 instr;
 	u8 len_hi;
-	u16 len;
-	u16 addr;
+	__le16 len;
+	__be16 addr;
 	unsigned char payload[];
 };
 
 static inline u32 sigma_action_len(struct sigma_action *sa)
 {
-	return (sa->len_hi << 16) | sa->len;
+	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
 }
 
 extern int process_sigma_firmware(struct i2c_client *client, const char *name);
-- 
cgit v1.2.3


From a67ba43d30bf8c1cfdc2615439455302d2408453 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 1 Dec 2011 12:54:31 -0500
Subject: asm-generic/unistd.h: support new process_vm_{readv,write} syscalls

Also prototype the "compat" functions so they can be referenced
from C code.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/unistd.h | 8 +++++++-
 include/linux/compat.h       | 9 +++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index f4c38d8c6674..2292d1af9d70 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -685,9 +685,15 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
+#define __NR_process_vm_readv 270
+__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
+          compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 271
+__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
+          compat_sys_process_vm_writev)
 
 #undef __NR_syscalls
-#define __NR_syscalls 270
+#define __NR_syscalls 272
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 154bf5683015..66ed067fb729 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -552,5 +552,14 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 
 extern void __user *compat_alloc_user_space(unsigned long len);
 
+asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
+		const struct compat_iovec __user *lvec,
+		unsigned long liovcnt, const struct compat_iovec __user *rvec,
+		unsigned long riovcnt, unsigned long flags);
+asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
+		const struct compat_iovec __user *lvec,
+		unsigned long liovcnt, const struct compat_iovec __user *rvec,
+		unsigned long riovcnt, unsigned long flags);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
-- 
cgit v1.2.3


From 10c6db110d0eb4466b59812c49088ab56218fc2e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 26 Nov 2011 02:47:31 +0100
Subject: perf: Fix loss of notification with multi-event

When you do:
        $ perf record -e cycles,cycles,cycles noploop 10

You expect about 10,000 samples for each event, i.e., 10s at
1000samples/sec. However, this is not what's happening. You
get much fewer samples, maybe 3700 samples/event:

$ perf report -D | tail -15
Aggregated stats:
           TOTAL events:      10998
            MMAP events:         66
            COMM events:          2
          SAMPLE events:      10930
cycles stats:
           TOTAL events:       3644
          SAMPLE events:       3644
cycles stats:
           TOTAL events:       3642
          SAMPLE events:       3642
cycles stats:
           TOTAL events:       3644
          SAMPLE events:       3644

On a Intel Nehalem or even AMD64, there are 4 counters capable
of measuring cycles, so there is plenty of space to measure those
events without multiplexing (even with the NMI watchdog active).
And even with multiplexing, we'd expect roughly the same number
of samples per event.

The root of the problem was that when the event that caused the buffer
to become full was not the first event passed on the cmdline, the user
notification would get lost. The notification was sent to the file
descriptor of the overflowed event but the perf tool was not polling
on it.  The perf tool aggregates all samples into a single buffer,
i.e., the buffer of the first event. Consequently, it assumes
notifications for any event will come via that descriptor.

The seemingly straight forward solution of moving the waitq into the
ringbuffer object doesn't work because of life-time issues. One could
perf_event_set_output() on a fd that you're also blocking on and cause
the old rb object to be freed while its waitq would still be
referenced by the blocked thread -> FAIL.

Therefore link all events to the ringbuffer and broadcast the wakeup
from the ringbuffer object to all possible events that could be waited
upon. This is rather ugly, and we're open to better solutions but it
works for now.

Reported-by: Stephane Eranian <eranian@google.com>
Finished-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h  |  1 +
 kernel/events/core.c        | 86 +++++++++++++++++++++++++++++++++++++++++++--
 kernel/events/internal.h    |  3 ++
 kernel/events/ring_buffer.c |  3 ++
 4 files changed, 91 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e9ebe5e0091..b1f89122bf6a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -822,6 +822,7 @@ struct perf_event {
 	int				mmap_locked;
 	struct user_struct		*mmap_user;
 	struct ring_buffer		*rb;
+	struct list_head		rb_entry;
 
 	/* poll related */
 	wait_queue_head_t		waitq;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0c1186fd97b..600c1629b64d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 static void update_context_time(struct perf_event_context *ctx);
 static u64 perf_event_time(struct perf_event *event);
 
+static void ring_buffer_attach(struct perf_event *event,
+			       struct ring_buffer *rb);
+
 void __weak perf_event_print_debug(void)	{ }
 
 extern __weak const char *perf_pmu_name(void)
@@ -3191,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	struct ring_buffer *rb;
 	unsigned int events = POLL_HUP;
 
+	/*
+	 * Race between perf_event_set_output() and perf_poll(): perf_poll()
+	 * grabs the rb reference but perf_event_set_output() overrides it.
+	 * Here is the timeline for two threads T1, T2:
+	 * t0: T1, rb = rcu_dereference(event->rb)
+	 * t1: T2, old_rb = event->rb
+	 * t2: T2, event->rb = new rb
+	 * t3: T2, ring_buffer_detach(old_rb)
+	 * t4: T1, ring_buffer_attach(rb1)
+	 * t5: T1, poll_wait(event->waitq)
+	 *
+	 * To avoid this problem, we grab mmap_mutex in perf_poll()
+	 * thereby ensuring that the assignment of the new ring buffer
+	 * and the detachment of the old buffer appear atomic to perf_poll()
+	 */
+	mutex_lock(&event->mmap_mutex);
+
 	rcu_read_lock();
 	rb = rcu_dereference(event->rb);
-	if (rb)
+	if (rb) {
+		ring_buffer_attach(event, rb);
 		events = atomic_xchg(&rb->poll, 0);
+	}
 	rcu_read_unlock();
 
+	mutex_unlock(&event->mmap_mutex);
+
 	poll_wait(file, &event->waitq, wait);
 
 	return events;
@@ -3497,6 +3521,49 @@ unlock:
 	return ret;
 }
 
+static void ring_buffer_attach(struct perf_event *event,
+			       struct ring_buffer *rb)
+{
+	unsigned long flags;
+
+	if (!list_empty(&event->rb_entry))
+		return;
+
+	spin_lock_irqsave(&rb->event_lock, flags);
+	if (!list_empty(&event->rb_entry))
+		goto unlock;
+
+	list_add(&event->rb_entry, &rb->event_list);
+unlock:
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_detach(struct perf_event *event,
+			       struct ring_buffer *rb)
+{
+	unsigned long flags;
+
+	if (list_empty(&event->rb_entry))
+		return;
+
+	spin_lock_irqsave(&rb->event_lock, flags);
+	list_del_init(&event->rb_entry);
+	wake_up_all(&event->waitq);
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_wakeup(struct perf_event *event)
+{
+	struct ring_buffer *rb;
+
+	rcu_read_lock();
+	rb = rcu_dereference(event->rb);
+	list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
+		wake_up_all(&event->waitq);
+	}
+	rcu_read_unlock();
+}
+
 static void rb_free_rcu(struct rcu_head *rcu_head)
 {
 	struct ring_buffer *rb;
@@ -3522,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 
 static void ring_buffer_put(struct ring_buffer *rb)
 {
+	struct perf_event *event, *n;
+	unsigned long flags;
+
 	if (!atomic_dec_and_test(&rb->refcount))
 		return;
 
+	spin_lock_irqsave(&rb->event_lock, flags);
+	list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+		list_del_init(&event->rb_entry);
+		wake_up_all(&event->waitq);
+	}
+	spin_unlock_irqrestore(&rb->event_lock, flags);
+
 	call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
@@ -3547,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
 		vma->vm_mm->pinned_vm -= event->mmap_locked;
 		rcu_assign_pointer(event->rb, NULL);
+		ring_buffer_detach(event, rb);
 		mutex_unlock(&event->mmap_mutex);
 
 		ring_buffer_put(rb);
@@ -3701,7 +3779,7 @@ static const struct file_operations perf_fops = {
 
 void perf_event_wakeup(struct perf_event *event)
 {
-	wake_up_all(&event->waitq);
+	ring_buffer_wakeup(event);
 
 	if (event->pending_kill) {
 		kill_fasync(&event->fasync, SIGIO, event->pending_kill);
@@ -5823,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	INIT_LIST_HEAD(&event->group_entry);
 	INIT_LIST_HEAD(&event->event_entry);
 	INIT_LIST_HEAD(&event->sibling_list);
+	INIT_LIST_HEAD(&event->rb_entry);
+
 	init_waitqueue_head(&event->waitq);
 	init_irq_work(&event->pending, perf_pending_event);
 
@@ -6029,6 +6109,8 @@ set:
 
 	old_rb = event->rb;
 	rcu_assign_pointer(event->rb, rb);
+	if (old_rb)
+		ring_buffer_detach(event, old_rb);
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09097dd8116c..64568a699375 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -22,6 +22,9 @@ struct ring_buffer {
 	local_t				lost;		/* nr records lost   */
 
 	long				watermark;	/* wakeup watermark  */
+	/* poll crap */
+	spinlock_t			event_lock;
+	struct list_head		event_list;
 
 	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a2a29205cc0f..7f3011c6b57f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 		rb->writable = 1;
 
 	atomic_set(&rb->refcount, 1);
+
+	INIT_LIST_HEAD(&rb->event_list);
+	spin_lock_init(&rb->event_lock);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
-- 
cgit v1.2.3


From f62ef5f3e9cff065aa845e2b7f487e1810b8e57e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 2 Dec 2011 08:21:43 +0100
Subject: x86, amd: Fix up numa_node information for AMD CPU family 15h model
 0-0fh northbridge functions

I've received complaints that the numa_node attribute for family
15h model 00-0fh (e.g. Interlagos) northbridge functions shows
-1 instead of the proper node ID.

Correct this with attached quirks (similar to quirks for other
AMD CPU families used in multi-socket systems).

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Frank Arnold <frank.arnold@amd.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/20111202072143.GA31916@alberich.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/quirks.c | 13 +++++++++++++
 include/linux/pci_ids.h  |  4 ++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index b78643d0f9a5..03920a15a632 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
 			quirk_amd_nb_node);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
 			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
+			quirk_amd_nb_node);
+
 #endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 172ba70306d1..2aaee0ca9da8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -517,8 +517,12 @@
 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM	0x1302
 #define PCI_DEVICE_ID_AMD_11H_NB_MISC	0x1303
 #define PCI_DEVICE_ID_AMD_11H_NB_LINK	0x1304
+#define PCI_DEVICE_ID_AMD_15H_NB_F0	0x1600
+#define PCI_DEVICE_ID_AMD_15H_NB_F1	0x1601
+#define PCI_DEVICE_ID_AMD_15H_NB_F2	0x1602
 #define PCI_DEVICE_ID_AMD_15H_NB_F3	0x1603
 #define PCI_DEVICE_ID_AMD_15H_NB_F4	0x1604
+#define PCI_DEVICE_ID_AMD_15H_NB_F5	0x1605
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
cgit v1.2.3


From 27b14b56af081ec7edeefb3a38b2c9577cc5ef48 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 1 Nov 2011 09:09:35 +0800
Subject: tracing: Restore system filter behavior

Though not all events have field 'prev_pid', it was allowed to do this:

  # echo 'prev_pid == 100' > events/sched/filter

but commit 75b8e98263fdb0bfbdeba60d4db463259f1fe8a2 (tracing/filter: Swap
entire filter of events) broke it without any reason.

Link: http://lkml.kernel.org/r/4EAF46CF.8040408@cn.fujitsu.com

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       | 2 ++
 kernel/trace/trace_events_filter.c | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 96efa6794ea5..c3da42dd22ba 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -172,6 +172,7 @@ enum {
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_RECORDED_CMD_BIT,
 	TRACE_EVENT_FL_CAP_ANY_BIT,
+	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 };
 
 enum {
@@ -179,6 +180,7 @@ enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
+	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 };
 
 struct ftrace_event_call {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d6e7926dcd26..95dc31efd6dd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system,
 		 */
 		err = replace_preds(call, NULL, ps, filter_string, true);
 		if (err)
-			goto fail;
+			call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+		else
+			call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
@@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system,
 		if (strcmp(call->class->system, system->name) != 0)
 			continue;
 
+		if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+			continue;
+
 		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
 		if (!filter_item)
 			goto fail_mem;
-- 
cgit v1.2.3


From 02125a826459a6ad142f8d91c5b6357562f96615 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 5 Dec 2011 08:43:34 -0500
Subject: fix apparmor dereferencing potentially freed dentry, sanitize
 __d_path() API

__d_path() API is asking for trouble and in case of apparmor d_namespace_path()
getting just that.  The root cause is that when __d_path() misses the root
it had been told to look for, it stores the location of the most remote ancestor
in *root.  Without grabbing references.  Sure, at the moment of call it had
been pinned down by what we have in *path.  And if we raced with umount -l, we
could have very well stopped at vfsmount/dentry that got freed as soon as
prepend_path() dropped vfsmount_lock.

It is safe to compare these pointers with pre-existing (and known to be still
alive) vfsmount and dentry, as long as all we are asking is "is it the same
address?".  Dereferencing is not safe and apparmor ended up stepping into
that.  d_namespace_path() really wants to examine the place where we stopped,
even if it's not connected to our namespace.  As the result, it looked
at ->d_sb->s_magic of a dentry that might've been already freed by that point.
All other callers had been careful enough to avoid that, but it's really
a bad interface - it invites that kind of trouble.

The fix is fairly straightforward, even though it's bigger than I'd like:
	* prepend_path() root argument becomes const.
	* __d_path() is never called with NULL/NULL root.  It was a kludge
to start with.  Instead, we have an explicit function - d_absolute_root().
Same as __d_path(), except that it doesn't get root passed and stops where
it stops.  apparmor and tomoyo are using it.
	* __d_path() returns NULL on path outside of root.  The main
caller is show_mountinfo() and that's precisely what we pass root for - to
skip those outside chroot jail.  Those who don't want that can (and do)
use d_path().
	* __d_path() root argument becomes const.  Everyone agrees, I hope.
	* apparmor does *NOT* try to use __d_path() or any of its variants
when it sees that path->mnt is an internal vfsmount.  In that case it's
definitely not mounted anywhere and dentry_path() is exactly what we want
there.  Handling of sysctl()-triggered weirdness is moved to that place.
	* if apparmor is asked to do pathname relative to chroot jail
and __d_path() tells it we it's not in that jail, the sucker just calls
d_absolute_path() instead.  That's the other remaining caller of __d_path(),
BTW.
        * seq_path_root() does _NOT_ return -ENAMETOOLONG (it's stupid anyway -
the normal seq_file logics will take care of growing the buffer and redoing
the call of ->show() just fine).  However, if it gets path not reachable
from root, it returns SEQ_SKIP.  The only caller adjusted (i.e. stopped
ignoring the return value as it used to do).

Reviewed-by: John Johansen <john.johansen@canonical.com>
ACKed-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
---
 fs/dcache.c                | 71 ++++++++++++++++++++++++++++------------------
 fs/namespace.c             | 20 +++++++------
 fs/seq_file.c              |  6 ++--
 include/linux/dcache.h     |  3 +-
 include/linux/fs.h         |  1 +
 security/apparmor/path.c   | 65 ++++++++++++++++++++++++------------------
 security/tomoyo/realpath.c |  3 +-
 7 files changed, 100 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 10ba92def3f6..89509b5a090e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2439,16 +2439,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 /**
  * prepend_path - Prepend path string to a buffer
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
  * Caller holds the rename_lock.
- *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
  */
-static int prepend_path(const struct path *path, struct path *root,
+static int prepend_path(const struct path *path,
+			const struct path *root,
 			char **buffer, int *buflen)
 {
 	struct dentry *dentry = path->dentry;
@@ -2483,10 +2481,10 @@ static int prepend_path(const struct path *path, struct path *root,
 		dentry = parent;
 	}
 
-out:
 	if (!error && !slash)
 		error = prepend(buffer, buflen, "/", 1);
 
+out:
 	br_read_unlock(vfsmount_lock);
 	return error;
 
@@ -2500,15 +2498,17 @@ global_root:
 		WARN(1, "Root dentry has weird name <%.*s>\n",
 		     (int) dentry->d_name.len, dentry->d_name.name);
 	}
-	root->mnt = vfsmnt;
-	root->dentry = dentry;
+	if (!slash)
+		error = prepend(buffer, buflen, "/", 1);
+	if (!error)
+		error = vfsmnt->mnt_ns ? 1 : 2;
 	goto out;
 }
 
 /**
  * __d_path - return the path of a dentry
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buf: buffer to return value in
  * @buflen: buffer length
  *
@@ -2519,10 +2519,10 @@ global_root:
  *
  * "buflen" should be positive.
  *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
+ * If the path is not reachable from the supplied root, return %NULL.
  */
-char *__d_path(const struct path *path, struct path *root,
+char *__d_path(const struct path *path,
+	       const struct path *root,
 	       char *buf, int buflen)
 {
 	char *res = buf + buflen;
@@ -2533,7 +2533,28 @@ char *__d_path(const struct path *path, struct path *root,
 	error = prepend_path(path, root, &res, &buflen);
 	write_sequnlock(&rename_lock);
 
-	if (error)
+	if (error < 0)
+		return ERR_PTR(error);
+	if (error > 0)
+		return NULL;
+	return res;
+}
+
+char *d_absolute_path(const struct path *path,
+	       char *buf, int buflen)
+{
+	struct path root = {};
+	char *res = buf + buflen;
+	int error;
+
+	prepend(&res, &buflen, "\0", 1);
+	write_seqlock(&rename_lock);
+	error = prepend_path(path, &root, &res, &buflen);
+	write_sequnlock(&rename_lock);
+
+	if (error > 1)
+		error = -EINVAL;
+	if (error < 0)
 		return ERR_PTR(error);
 	return res;
 }
@@ -2541,8 +2562,9 @@ char *__d_path(const struct path *path, struct path *root,
 /*
  * same as __d_path but appends "(deleted)" for unlinked files.
  */
-static int path_with_deleted(const struct path *path, struct path *root,
-				 char **buf, int *buflen)
+static int path_with_deleted(const struct path *path,
+			     const struct path *root,
+			     char **buf, int *buflen)
 {
 	prepend(buf, buflen, "\0", 1);
 	if (d_unlinked(path->dentry)) {
@@ -2579,7 +2601,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
 {
 	char *res = buf + buflen;
 	struct path root;
-	struct path tmp;
 	int error;
 
 	/*
@@ -2594,9 +2615,8 @@ char *d_path(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	write_seqlock(&rename_lock);
-	tmp = root;
-	error = path_with_deleted(path, &tmp, &res, &buflen);
-	if (error)
+	error = path_with_deleted(path, &root, &res, &buflen);
+	if (error < 0)
 		res = ERR_PTR(error);
 	write_sequnlock(&rename_lock);
 	path_put(&root);
@@ -2617,7 +2637,6 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 {
 	char *res = buf + buflen;
 	struct path root;
-	struct path tmp;
 	int error;
 
 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
@@ -2625,9 +2644,8 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	write_seqlock(&rename_lock);
-	tmp = root;
-	error = path_with_deleted(path, &tmp, &res, &buflen);
-	if (!error && !path_equal(&tmp, &root))
+	error = path_with_deleted(path, &root, &res, &buflen);
+	if (error > 0)
 		error = prepend_unreachable(&res, &buflen);
 	write_sequnlock(&rename_lock);
 	path_put(&root);
@@ -2758,19 +2776,18 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
-		struct path tmp = root;
 		char *cwd = page + PAGE_SIZE;
 		int buflen = PAGE_SIZE;
 
 		prepend(&cwd, &buflen, "\0", 1);
-		error = prepend_path(&pwd, &tmp, &cwd, &buflen);
+		error = prepend_path(&pwd, &root, &cwd, &buflen);
 		write_sequnlock(&rename_lock);
 
-		if (error)
+		if (error < 0)
 			goto out;
 
 		/* Unreachable from current root */
-		if (!path_equal(&tmp, &root)) {
+		if (error > 0) {
 			error = prepend_unreachable(&cwd, &buflen);
 			if (error)
 				goto out;
diff --git a/fs/namespace.c b/fs/namespace.c
index 6d3a1963879b..cfc6d4448aa5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1048,15 +1048,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	if (err)
 		goto out;
 	seq_putc(m, ' ');
-	seq_path_root(m, &mnt_path, &root, " \t\n\\");
-	if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
-		/*
-		 * Mountpoint is outside root, discard that one.  Ugly,
-		 * but less so than trying to do that in iterator in a
-		 * race-free way (due to renames).
-		 */
-		return SEQ_SKIP;
-	}
+
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	if (err)
+		goto out;
+
 	seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
 	show_mnt_opts(m, mnt);
 
@@ -2776,3 +2773,8 @@ void kern_unmount(struct vfsmount *mnt)
 	}
 }
 EXPORT_SYMBOL(kern_unmount);
+
+bool our_mnt(struct vfsmount *mnt)
+{
+	return check_mnt(mnt);
+}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c95..dba43c3ea3af 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -449,8 +449,6 @@ EXPORT_SYMBOL(seq_path);
 
 /*
  * Same as seq_path, but relative to supplied root.
- *
- * root may be changed, see __d_path().
  */
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc)
@@ -463,6 +461,8 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		char *p;
 
 		p = __d_path(path, root, buf, size);
+		if (!p)
+			return SEQ_SKIP;
 		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
 			char *end = mangle_path(buf, p, esc);
@@ -474,7 +474,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 	}
 	seq_commit(m, res);
 
-	return res < 0 ? res : 0;
+	return res < 0 && res != -ENAMETOOLONG ? res : 0;
 }
 
 /*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4df926199369..ed9f74f6c519 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -339,7 +339,8 @@ extern int d_validate(struct dentry *, struct dentry *);
  */
 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 
-extern char *__d_path(const struct path *path, struct path *root, char *, int);
+extern char *__d_path(const struct path *, const struct path *, char *, int);
+extern char *d_absolute_path(const struct path *, char *, int);
 extern char *d_path(const struct path *, char *, int);
 extern char *d_path_with_unreachable(const struct path *, char *, int);
 extern char *dentry_path_raw(struct dentry *, char *, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3130220ce3e..019dc558df1a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,6 +1942,7 @@ extern int fd_statfs(int, struct kstatfs *);
 extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
+extern bool our_mnt(struct vfsmount *mnt);
 
 extern int current_umask(void);
 
diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 36cc0cc39e78..b566eba4a65c 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -57,23 +57,44 @@ static int prepend(char **buffer, int buflen, const char *str, int namelen)
 static int d_namespace_path(struct path *path, char *buf, int buflen,
 			    char **name, int flags)
 {
-	struct path root, tmp;
 	char *res;
-	int connected, error = 0;
+	int error = 0;
+	int connected = 1;
+
+	if (path->mnt->mnt_flags & MNT_INTERNAL) {
+		/* it's not mounted anywhere */
+		res = dentry_path(path->dentry, buf, buflen);
+		*name = res;
+		if (IS_ERR(res)) {
+			*name = buf;
+			return PTR_ERR(res);
+		}
+		if (path->dentry->d_sb->s_magic == PROC_SUPER_MAGIC &&
+		    strncmp(*name, "/sys/", 5) == 0) {
+			/* TODO: convert over to using a per namespace
+			 * control instead of hard coded /proc
+			 */
+			return prepend(name, *name - buf, "/proc", 5);
+		}
+		return 0;
+	}
 
-	/* Get the root we want to resolve too, released below */
+	/* resolve paths relative to chroot?*/
 	if (flags & PATH_CHROOT_REL) {
-		/* resolve paths relative to chroot */
+		struct path root;
 		get_fs_root(current->fs, &root);
-	} else {
-		/* resolve paths relative to namespace */
-		root.mnt = current->nsproxy->mnt_ns->root;
-		root.dentry = root.mnt->mnt_root;
-		path_get(&root);
+		res = __d_path(path, &root, buf, buflen);
+		if (res && !IS_ERR(res)) {
+			/* everything's fine */
+			*name = res;
+			path_put(&root);
+			goto ok;
+		}
+		path_put(&root);
+		connected = 0;
 	}
 
-	tmp = root;
-	res = __d_path(path, &tmp, buf, buflen);
+	res = d_absolute_path(path, buf, buflen);
 
 	*name = res;
 	/* handle error conditions - and still allow a partial path to
@@ -84,7 +105,10 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 		*name = buf;
 		goto out;
 	}
+	if (!our_mnt(path->mnt))
+		connected = 0;
 
+ok:
 	/* Handle two cases:
 	 * 1. A deleted dentry && profile is not allowing mediation of deleted
 	 * 2. On some filesystems, newly allocated dentries appear to the
@@ -97,10 +121,7 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 			goto out;
 	}
 
-	/* Determine if the path is connected to the expected root */
-	connected = tmp.dentry == root.dentry && tmp.mnt == root.mnt;
-
-	/* If the path is not connected,
+	/* If the path is not connected to the expected root,
 	 * check if it is a sysctl and handle specially else remove any
 	 * leading / that __d_path may have returned.
 	 * Unless
@@ -112,17 +133,9 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 	 *     namespace root.
 	 */
 	if (!connected) {
-		/* is the disconnect path a sysctl? */
-		if (tmp.dentry->d_sb->s_magic == PROC_SUPER_MAGIC &&
-		    strncmp(*name, "/sys/", 5) == 0) {
-			/* TODO: convert over to using a per namespace
-			 * control instead of hard coded /proc
-			 */
-			error = prepend(name, *name - buf, "/proc", 5);
-		} else if (!(flags & PATH_CONNECT_PATH) &&
+		if (!(flags & PATH_CONNECT_PATH) &&
 			   !(((flags & CHROOT_NSCONNECT) == CHROOT_NSCONNECT) &&
-			     (tmp.mnt == current->nsproxy->mnt_ns->root &&
-			      tmp.dentry == tmp.mnt->mnt_root))) {
+			     our_mnt(path->mnt))) {
 			/* disconnected path, don't return pathname starting
 			 * with '/'
 			 */
@@ -133,8 +146,6 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
 	}
 
 out:
-	path_put(&root);
-
 	return error;
 }
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 738bbdf8d4c7..36fa7c9bedc4 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -101,9 +101,8 @@ static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
 {
 	char *pos = ERR_PTR(-ENOMEM);
 	if (buflen >= 256) {
-		struct path ns_root = { };
 		/* go to whatever namespace root we are under */
-		pos = __d_path(path, &ns_root, buffer, buflen - 1);
+		pos = d_absolute_path(path, buffer, buflen - 1);
 		if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
 			struct inode *inode = path->dentry->d_inode;
 			if (inode && S_ISDIR(inode->i_mode)) {
-- 
cgit v1.2.3


From 83aeeada7c69f35e5100b27ec354335597a7a488 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 8 Dec 2011 14:33:54 -0800
Subject: vmscan: use atomic-long for shrinker batching

Use atomic-long operations instead of looping around cmpxchg().

[akpm@linux-foundation.org: massage atomic.h inclusions]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h       |  2 +-
 include/linux/mm.h       |  1 +
 include/linux/shrinker.h |  2 +-
 mm/vmscan.c              | 17 +++++++----------
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 019dc558df1a..e0bc4ffb8e7f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -393,8 +393,8 @@ struct inodes_stat_t {
 #include <linux/semaphore.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
-#include <linux/shrinker.h>
 #include <linux/atomic.h>
+#include <linux/shrinker.h>
 
 #include <asm/byteorder.h>
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c2c485..4baadd18f4ad 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,6 +10,7 @@
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
+#include <linux/atomic.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
 #include <linux/range.h>
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index a83833a1f7a2..07ceb97d53fa 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -35,7 +35,7 @@ struct shrinker {
 
 	/* These are for internal use */
 	struct list_head list;
-	long nr;	/* objs pending delete */
+	atomic_long_t nr_in_batch; /* objs pending delete */
 };
 #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
 extern void register_shrinker(struct shrinker *);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f5255442ae2b..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
  */
 void register_shrinker(struct shrinker *shrinker)
 {
-	shrinker->nr = 0;
+	atomic_long_set(&shrinker->nr_in_batch, 0);
 	down_write(&shrinker_rwsem);
 	list_add_tail(&shrinker->list, &shrinker_list);
 	up_write(&shrinker_rwsem);
@@ -264,9 +264,7 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		 * and zero it so that other concurrent shrinker invocations
 		 * don't also do this scanning work.
 		 */
-		do {
-			nr = shrinker->nr;
-		} while (cmpxchg(&shrinker->nr, nr, 0) != nr);
+		nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
 
 		total_scan = nr;
 		delta = (4 * nr_pages_scanned) / shrinker->seeks;
@@ -328,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		 * manner that handles concurrent updates. If we exhausted the
 		 * scan, there is no need to do an update.
 		 */
-		do {
-			nr = shrinker->nr;
-			new_nr = total_scan + nr;
-			if (total_scan <= 0)
-				break;
-		} while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
+		if (total_scan > 0)
+			new_nr = atomic_long_add_return(total_scan,
+					&shrinker->nr_in_batch);
+		else
+			new_nr = atomic_long_read(&shrinker->nr_in_batch);
 
 		trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
 	}
-- 
cgit v1.2.3