From 5bd078dda4d4fbdb4bd138a6bd5b6e274c019ed2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robherring2@gmail.com>
Date: Wed, 14 Sep 2011 11:31:36 -0500
Subject: irq: Add declaration of irq_domain_simple_ops to irqdomain.h

irq_domain_simple_ops is exported, but is not declared in irqdomain.h,
so add it.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: marc.zyngier@arm.com
Cc: thomas.abraham@linaro.org
Cc: jamie@jamieiles.com
Cc: b-cousson@ti.com
Cc: shawn.guo@linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: devicetree-discuss@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1316017900-19918-2-git-send-email-robherring2@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index e807ad687a07..3ad553e8eae2 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -80,6 +80,7 @@ extern void irq_domain_del(struct irq_domain *domain);
 #endif /* CONFIG_IRQ_DOMAIN */
 
 #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ)
+extern struct irq_domain_ops irq_domain_simple_ops;
 extern void irq_domain_add_simple(struct device_node *controller, int irq_base);
 extern void irq_domain_generate_simple(const struct of_device_id *match,
 					u64 phys_base, unsigned int irq_start);
-- 
cgit v1.2.3


From b6cf8788a3382c2000743a0e393bcc8aeb0601cb Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 20 Sep 2011 17:07:29 +0200
Subject: [S390] kvm: extension capability for new address space layout

598841ca9919d008b520114d8a4378c4ce4e40a1 ([S390] use gmap address
spaces for kvm guest images) changed kvm on s390 to use a separate
address space for kvm guests. We can now put KVM guests anywhere
in the user address mode with a size up to 8PB - as long as the
memory is 1MB-aligned. This change was done without KVM extension
capability bit.
The change was added after 3.0, but we still have a chance to add
a feature bit before 3.1 (keeping the releases in a sane state).
We use number 71 to avoid collisions with other pending kvm patches
as requested by Alexander Graf.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Avi Kivity <avi@redhat.com>
Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 1 +
 include/linux/kvm.h      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b4eced131e5c..dc2b580e27bc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -123,6 +123,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 	switch (ext) {
 	case KVM_CAP_S390_PSW:
+	case KVM_CAP_S390_GMAP:
 		r = 1;
 		break;
 	default:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2c366b52f505..aace6b8691a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -553,6 +553,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_SPAPR_TCE 63
 #define KVM_CAP_PPC_SMT 64
 #define KVM_CAP_PPC_RMA	65
+#define KVM_CAP_S390_GMAP 71
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 983c7db347db8ce2d8453fd1d89b7a4bb6920d56 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Sun, 25 Sep 2011 23:26:21 +0100
Subject: dm crypt: always disable discard_zeroes_data

If optional discard support in dm-crypt is enabled, discards requests
bypass the crypt queue and blocks of the underlying device are discarded.
For the read path, discarded blocks are handled the same as normal
ciphertext blocks, thus decrypted.

So if the underlying device announces discarded regions return zeroes,
dm-crypt must disable this flag because after decryption there is just
random noise instead of zeroes.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  2 ++
 drivers/md/dm-table.c         | 19 +++++++++++++++++++
 include/linux/device-mapper.h |  5 +++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 49da55c1528a..8c2a000cf3f5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1698,6 +1698,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->num_flush_requests = 1;
+	ti->discard_zeroes_data_unsupported = 1;
+
 	return 0;
 
 bad:
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 322669807077..bc04518e9d8b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1283,6 +1283,22 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
 	return 0;
 }
 
+static bool dm_table_discard_zeroes_data(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	/* Ensure that all targets supports discard_zeroes_data. */
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (ti->discard_zeroes_data_unsupported)
+			return 0;
+	}
+
+	return 1;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1305,6 +1321,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_flush(q, flush);
 
+	if (!dm_table_discard_zeroes_data(t))
+		q->limits.discard_zeroes_data = 0;
+
 	dm_table_set_integrity(t);
 
 	/*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 3fa1f3d90ce0..99e3e50b5c57 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -197,6 +197,11 @@ struct dm_target {
 	 * whether or not its underlying devices have support.
 	 */
 	unsigned discards_supported:1;
+
+	/*
+	 * Set if this target does not return zeroes on discarded blocks.
+	 */
+	unsigned discard_zeroes_data_unsupported:1;
 };
 
 /* Each target can link one of these into the table */
-- 
cgit v1.2.3


From d94c177beeb4469cd4f6e83354ab0223353e98ed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 26 Sep 2011 17:44:55 -0700
Subject: vfs pathname lookup: Add LOOKUP_AUTOMOUNT flag

Since we've now turned around and made LOOKUP_FOLLOW *not* force an
automount, we want to add the ability to force an automount event on
lookup even if we don't happen to have one of the other flags that force
it implicitly (LOOKUP_OPEN, LOOKUP_DIRECTORY, LOOKUP_PARENT..)

Most cases will never want to use this, since you'd normally want to
delay automounting as long as possible, which usually implies
LOOKUP_OPEN (when we open a file or directory, we really cannot avoid
the automount any more).

But Trond argued sufficiently forcefully that at a minimum bind mounting
a file and quotactl will want to force the automount lookup.  Some other
cases (like nfs_follow_remote_path()) could use it too, although
LOOKUP_DIRECTORY would work there as well.

This commit just adds the flag and logic, no users yet, though.  It also
doesn't actually touch the LOOKUP_NO_AUTOMOUNT flag that is related, and
was made irrelevant by the same change that made us not follow on
LOOKUP_FOLLOW.

Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg KH <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c            | 2 +-
 include/linux/namei.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index f4788365ea22..09606fd83d57 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -739,7 +739,7 @@ static int follow_automount(struct path *path, unsigned flags,
 	 * of the daemon to instantiate them before they can be used.
 	 */
 	if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-		     LOOKUP_OPEN | LOOKUP_CREATE)) &&
+		     LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
 	    path->dentry->d_inode)
 		return -EISDIR;
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 76fe2c62ae71..e13dac7caab2 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -48,6 +48,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  */
 #define LOOKUP_FOLLOW		0x0001
 #define LOOKUP_DIRECTORY	0x0002
+#define LOOKUP_AUTOMOUNT	0x0004
 
 #define LOOKUP_PARENT		0x0010
 #define LOOKUP_REVAL		0x0020
-- 
cgit v1.2.3


From b6c8069d3577481390b3f24a8434ad72a3235594 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 27 Sep 2011 08:12:33 -0700
Subject: vfs: remove LOOKUP_NO_AUTOMOUNT flag

That flag no longer makes sense, since we don't look up automount points
as eagerly any more.  Additionally, it turns out that the NO_AUTOMOUNT
handling was buggy to begin with: it would avoid automounting even for
cases where we really *needed* to do the automount handling, and could
return ENOENT for autofs entries that hadn't been instantiated yet.

With our new non-eager automount semantics, one discussion has been
about adding a AT_AUTOMOUNT flag to vfs_fstatat (and thus the
newfstatat() and fstatat64() system calls), but it's probably not worth
it: you can always force at least directory automounting by simply
adding the final '/' to the filename, which works for *all* of the stat
family system calls, old and new.

So AT_NO_AUTOMOUNT (and thus LOOKUP_NO_AUTOMOUNT) really were just a
result of our bad default behavior.

Acked-by: Ian Kent <raven@themaw.net>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c            | 6 ------
 fs/stat.c             | 2 --
 include/linux/namei.h | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 09606fd83d57..0b3138de2a3b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -721,12 +721,6 @@ static int follow_automount(struct path *path, unsigned flags,
 	if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
 		return -EREMOTE;
 
-	/* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
-	 * and this is the terminal part of the path.
-	 */
-	if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
-		return -EISDIR; /* we actually want to stop here */
-
 	/* We don't want to mount if someone's just doing a stat -
 	 * unless they're stat'ing a directory and appended a '/' to
 	 * the name.
diff --git a/fs/stat.c b/fs/stat.c
index ba5316ffac61..78a3aa83c7ea 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
 
 	if (!(flag & AT_SYMLINK_NOFOLLOW))
 		lookup_flags |= LOOKUP_FOLLOW;
-	if (flag & AT_NO_AUTOMOUNT)
-		lookup_flags |= LOOKUP_NO_AUTOMOUNT;
 	if (flag & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e13dac7caab2..409328d1cbbb 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -53,7 +53,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_PARENT		0x0010
 #define LOOKUP_REVAL		0x0020
 #define LOOKUP_RCU		0x0040
-#define LOOKUP_NO_AUTOMOUNT	0x0080
+
 /*
  * Intent data
  */
-- 
cgit v1.2.3


From f75159e9936143177b442afc78150b7a7ad8aa07 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Tue, 20 Sep 2011 01:25:41 +0000
Subject: ptp: fix L2 event message recognition

The IEEE 1588 standard defines two kinds of messages, event and general
messages. Event messages require time stamping, and general do not. When
using UDP transport, two separate ports are used for the two message
types.

The BPF designed to recognize event messages incorrectly classifies L2
general messages as event messages. This commit fixes the issue by
extending the filter to check the message type field for L2 PTP packets.
Event messages are be distinguished from general messages by testing
the "general" bit.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: <stable@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index e07e2742a865..1dc420ba213a 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -51,6 +51,7 @@
 #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN)
 
 #define PTP_EV_PORT 319
+#define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
 
 #define OFF_ETYPE	12
 #define OFF_IHL		14
@@ -116,14 +117,20 @@ static inline int ptp_filter_init(struct sock_filter *f, int len)
 	{OP_OR,		0,   0, PTP_CLASS_IPV6		}, /*              */ \
 	{OP_RETA,	0,   0, 0			}, /*              */ \
 /*L3x*/	{OP_RETK,	0,   0, PTP_CLASS_NONE		}, /*              */ \
-/*L40*/	{OP_JEQ,	0,   6, ETH_P_8021Q		}, /* f goto L50   */ \
+/*L40*/	{OP_JEQ,	0,   9, ETH_P_8021Q		}, /* f goto L50   */ \
 	{OP_LDH,	0,   0, OFF_ETYPE + 4		}, /*              */ \
-	{OP_JEQ,	0,   9, ETH_P_1588		}, /* f goto L60   */ \
+	{OP_JEQ,	0,  15, ETH_P_1588		}, /* f goto L60   */ \
+	{OP_LDB,	0,   0, ETH_HLEN + VLAN_HLEN	}, /*              */ \
+	{OP_AND,	0,   0, PTP_GEN_BIT		}, /*              */ \
+	{OP_JEQ,	0,  12, 0			}, /* f goto L6x   */ \
 	{OP_LDH,	0,   0, ETH_HLEN + VLAN_HLEN	}, /*              */ \
 	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
 	{OP_OR,		0,   0, PTP_CLASS_VLAN		}, /*              */ \
 	{OP_RETA,	0,   0, 0			}, /*              */ \
-/*L50*/	{OP_JEQ,	0,   4, ETH_P_1588		}, /* f goto L61   */ \
+/*L50*/	{OP_JEQ,	0,   7, ETH_P_1588		}, /* f goto L61   */ \
+	{OP_LDB,	0,   0, ETH_HLEN		}, /*              */ \
+	{OP_AND,	0,   0, PTP_GEN_BIT		}, /*              */ \
+	{OP_JEQ,	0,   4, 0			}, /* f goto L6x   */ \
 	{OP_LDH,	0,   0, ETH_HLEN		}, /*              */ \
 	{OP_AND,	0,   0, PTP_CLASS_VMASK		}, /*              */ \
 	{OP_OR,		0,   0, PTP_CLASS_L2		}, /*              */ \
-- 
cgit v1.2.3


From d670ec13178d0fd8680e6742a2bc6e04f28f87d8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 1 Sep 2011 12:42:04 +0200
Subject: posix-cpu-timers: Cure SMP wobbles

David reported:

  Attached below is a watered-down version of rt/tst-cpuclock2.c from
  GLIBC.  Just build it with "gcc -o test test.c -lpthread -lrt" or
  similar.

  Run it several times, and you will see cases where the main thread
  will measure a process clock difference before and after the nanosleep
  which is smaller than the cpu-burner thread's individual thread clock
  difference.  This doesn't make any sense since the cpu-burner thread
  is part of the top-level process's thread group.

  I've reproduced this on both x86-64 and sparc64 (using both 32-bit and
  64-bit binaries).

  For example:

  [davem@boricha build-x86_64-linux]$ ./test
  process: before(0.001221967) after(0.498624371) diff(497402404)
  thread:  before(0.000081692) after(0.498316431) diff(498234739)
  self:    before(0.001223521) after(0.001240219) diff(16698)
  [davem@boricha build-x86_64-linux]$

  The diff of 'process' should always be >= the diff of 'thread'.

  I make sure to wrap the 'thread' clock measurements the most tightly
  around the nanosleep() call, and that the 'process' clock measurements
  are the outer-most ones.

  ---
  #include <unistd.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <time.h>
  #include <fcntl.h>
  #include <string.h>
  #include <errno.h>
  #include <pthread.h>

  static pthread_barrier_t barrier;

  static void *chew_cpu(void *arg)
  {
	  pthread_barrier_wait(&barrier);
	  while (1)
		  __asm__ __volatile__("" : : : "memory");
	  return NULL;
  }

  int main(void)
  {
	  clockid_t process_clock, my_thread_clock, th_clock;
	  struct timespec process_before, process_after;
	  struct timespec me_before, me_after;
	  struct timespec th_before, th_after;
	  struct timespec sleeptime;
	  unsigned long diff;
	  pthread_t th;
	  int err;

	  err = clock_getcpuclockid(0, &process_clock);
	  if (err)
		  return 1;

	  err = pthread_getcpuclockid(pthread_self(), &my_thread_clock);
	  if (err)
		  return 1;

	  pthread_barrier_init(&barrier, NULL, 2);
	  err = pthread_create(&th, NULL, chew_cpu, NULL);
	  if (err)
		  return 1;

	  err = pthread_getcpuclockid(th, &th_clock);
	  if (err)
		  return 1;

	  pthread_barrier_wait(&barrier);

	  err = clock_gettime(process_clock, &process_before);
	  if (err)
		  return 1;

	  err = clock_gettime(my_thread_clock, &me_before);
	  if (err)
		  return 1;

	  err = clock_gettime(th_clock, &th_before);
	  if (err)
		  return 1;

	  sleeptime.tv_sec = 0;
	  sleeptime.tv_nsec = 500000000;
	  nanosleep(&sleeptime, NULL);

	  err = clock_gettime(th_clock, &th_after);
	  if (err)
		  return 1;

	  err = clock_gettime(my_thread_clock, &me_after);
	  if (err)
		  return 1;

	  err = clock_gettime(process_clock, &process_after);
	  if (err)
		  return 1;

	  diff = process_after.tv_nsec - process_before.tv_nsec;
	  printf("process: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n",
		 process_before.tv_sec, process_before.tv_nsec,
		 process_after.tv_sec, process_after.tv_nsec, diff);
	  diff = th_after.tv_nsec - th_before.tv_nsec;
	  printf("thread:  before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n",
		 th_before.tv_sec, th_before.tv_nsec,
		 th_after.tv_sec, th_after.tv_nsec, diff);
	  diff = me_after.tv_nsec - me_before.tv_nsec;
	  printf("self:    before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n",
		 me_before.tv_sec, me_before.tv_nsec,
		 me_after.tv_sec, me_after.tv_nsec, diff);

	  return 0;
  }

This is due to us using p->se.sum_exec_runtime in
thread_group_cputime() where we iterate the thread group and sum all
data. This does not take time since the last schedule operation (tick
or otherwise) into account. We can cure this by using
task_sched_runtime() at the cost of having to take locks.

This also means we can (and must) do away with
thread_group_sched_runtime() since the modified thread_group_cputime()
is now more accurate and would deadlock when called from
thread_group_sched_runtime().

Aside of that it makes the function safe on 32 bit systems. The old
code added t->se.sum_exec_runtime unprotected. sum_exec_runtime is a
64bit value and could be changed on another cpu at the same time.

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/1314874459.7945.22.camel@twins
Tested-by: David Miller <davem@davemloft.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h     |  1 -
 kernel/posix-cpu-timers.c |  5 +++--
 kernel/sched.c            | 24 ------------------------
 3 files changed, 3 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ac2c0578e0f..41d0237fd449 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1956,7 +1956,6 @@ static inline void disable_sched_clock_irqtime(void) {}
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
-extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 58f405b581e7..c8008dd58ef2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	do {
 		times->utime = cputime_add(times->utime, t->utime);
 		times->stime = cputime_add(times->stime, t->stime);
-		times->sum_exec_runtime += t->se.sum_exec_runtime;
+		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 out:
 	rcu_read_unlock();
@@ -312,7 +312,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = thread_group_sched_runtime(p);
+		thread_group_cputime(p, &cputime);
+		cpu->sched = cputime.sum_exec_runtime;
 		break;
 	}
 	return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index d249ea88428c..b50b0f0c9aa9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3724,30 +3724,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 	return ns;
 }
 
-/*
- * Return sum_exec_runtime for the thread group.
- * In case the task is currently running, return the sum plus current's
- * pending runtime that have not been accounted yet.
- *
- * Note that the thread group might have other running tasks as well,
- * so the return value not includes other pending runtime that other
- * running tasks might have.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
-	struct task_cputime totals;
-	unsigned long flags;
-	struct rq *rq;
-	u64 ns;
-
-	rq = task_rq_lock(p, &flags);
-	thread_group_cputime(p, &totals);
-	ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, p, &flags);
-
-	return ns;
-}
-
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
-- 
cgit v1.2.3


From 5f39e6705faade2e89d119958a8c51b9b6e2c53c Mon Sep 17 00:00:00 2001
From: Jon Mason <mason@myri.com>
Date: Mon, 3 Oct 2011 09:50:20 -0500
Subject: PCI: Disable MPS configuration by default

Add the ability to disable PCI-E MPS turning and using the BIOS
configured MPS defaults.  Due to the number of issues recently
discovered on some x86 chipsets, make this the default behavior.

Also, add the option for peer to peer DMA MPS configuration.  Peer to
peer DMA is outside the scope of this patch, but MPS configuration could
prevent it from working by having the MPS on one root port different
than the MPS on another.  To work around this, simply make the system
wide MPS the smallest possible value (128B).

Signed-off-by: Jon Mason <mason@myri.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c   |  6 +++++-
 drivers/pci/probe.c | 14 +++++++++++++-
 include/linux/pci.h |  3 ++-
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4e84fd4a4312..e9651f0a8817 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
 
 /*
  * The default CLS is used if arch didn't set CLS explicitly and not
@@ -3568,10 +3568,14 @@ static int __init pci_setup(char *str)
 				pci_hotplug_io_size = memparse(str + 9, &str);
 			} else if (!strncmp(str, "hpmemsize=", 10)) {
 				pci_hotplug_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "pcie_bus_tune_off", 17)) {
+				pcie_bus_config = PCIE_BUS_TUNE_OFF;
 			} else if (!strncmp(str, "pcie_bus_safe", 13)) {
 				pcie_bus_config = PCIE_BUS_SAFE;
 			} else if (!strncmp(str, "pcie_bus_perf", 13)) {
 				pcie_bus_config = PCIE_BUS_PERFORMANCE;
+			} else if (!strncmp(str, "pcie_bus_peer2peer", 18)) {
+				pcie_bus_config = PCIE_BUS_PEER2PEER;
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f3f94a5c068f..6ab6bd3df4b2 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1458,12 +1458,24 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
  */
 void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
 {
-	u8 smpss = mpss;
+	u8 smpss;
 
 	if (!pci_is_pcie(bus->self))
 		return;
 
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF)
+		return;
+
+	/* FIXME - Peer to peer DMA is possible, though the endpoint would need
+	 * to be aware to the MPS of the destination.  To work around this,
+	 * simply force the MPS of the entire system to the smallest possible.
+	 */
+	if (pcie_bus_config == PCIE_BUS_PEER2PEER)
+		smpss = 0;
+
 	if (pcie_bus_config == PCIE_BUS_SAFE) {
+		smpss = mpss;
+
 		pcie_find_smpss(bus->self, &smpss);
 		pci_walk_bus(bus, pcie_find_smpss, &smpss);
 	}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8c230cbcbb48..9fc01226055b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -621,8 +621,9 @@ struct pci_driver {
 extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss);
 
 enum pcie_bus_config_types {
-	PCIE_BUS_PERFORMANCE,
+	PCIE_BUS_TUNE_OFF,
 	PCIE_BUS_SAFE,
+	PCIE_BUS_PERFORMANCE,
 	PCIE_BUS_PEER2PEER,
 };
 
-- 
cgit v1.2.3