From 446411e18b2cb17d153e45f634a3c9a79ada3ac2 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Wed, 13 Feb 2013 14:20:25 +0100
Subject: spi: Initialize cs_gpio and cs_gpios with -ENOENT

The return value from of_get_named_gpio is -ENOENT when the given index
matches a hole in the "cs-gpios" property phandle list. However, the
default value of cs_gpio in struct spi_device and entries of cs_gpios in
struct spi_master is -EINVAL, which is documented to indicate that a
GPIO line should not be used for the given spi_device.

This sets the default value of cs_gpio in struct spi_device and entries
of cs_gpios in struct spi_master to -ENOENT. Thus, -ENOENT is the only
value used to indicate that no GPIO line should be used.

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/spi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 38c2b925923d..c395f32b53b3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -57,7 +57,7 @@ extern struct bus_type spi_bus_type;
  * @modalias: Name of the driver to use with this device, or an alias
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
- * @cs_gpio: gpio number of the chipselect line (optional, -EINVAL when
+ * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
  *	when not using a GPIO line)
  *
  * A @spi_device is used to interchange data between an SPI slave
@@ -261,7 +261,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
- *	number. Any individual value may be -EINVAL for CS lines that
+ *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
  *
  * Each SPI master controller can communicate with one or more @spi_device
-- 
cgit v1.2.3


From d450f445f9a654080a6be4094376c2192d9a1f36 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 19 Feb 2013 02:58:25 +0300
Subject: <linux/of_platform.h>: fix compilation warnings with DT disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compilation warnings (in Simon Horman's renesas.git repo):

In file included from arch/arm/mach-shmobile/setup-r8a7779.c:24:0:
include/linux/of_platform.h:107:13: warning: ‘struct of_device_id’ declared
inside parameter list [enabled by default]
include/linux/of_platform.h:107:13: warning: its scope is only this definition
or declaration, which is probably not what you want [enabled by default]
include/linux/of_platform.h:107:13: warning: ‘struct device_node’ declared
inside parameter list [enabled by default]

<linux/of_platform.h> only #include's headers with definitions of the above
mentioned structures if CONFIG_OF_DEVICE=y but uses them even if not. One
solution is to move some #include's out of #ifdef CONFIG_OF_DEVICE and use
incomplete declarations for the rest of the structures where the #ifdef move
doesn't help...

Reported-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of_platform.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 3863a4dbdf18..2a93b64a3869 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -11,9 +11,10 @@
  *
  */
 
-#ifdef CONFIG_OF_DEVICE
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+
+#ifdef CONFIG_OF_DEVICE
 #include <linux/pm.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -100,7 +101,7 @@ extern int of_platform_populate(struct device_node *root,
 
 #if !defined(CONFIG_OF_ADDRESS)
 struct of_dev_auxdata;
-struct device;
+struct device_node;
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
 					const struct of_dev_auxdata *lookup,
-- 
cgit v1.2.3


From 7677fc965fba41d1386fa3b76a1f00303f02bb2d Mon Sep 17 00:00:00 2001
From: Rony Efraim <ronye@mellanox.com>
Date: Wed, 8 May 2013 22:22:35 +0000
Subject: net/mlx4: Strengthen VLAN tags/priorities enforcement in VST mode

Make sure that the following steps are taken:

- drop packets sent by the VF with vlan tag
- block packets with vlan tag which are steered to the VF
- drop/block tagged packets when the policy is priority-tagged
- make sure VLAN stripping for received packets is set
- make sure force UP bit for the VF QP is set

Use enum values for all the above instead of numerical bit offsets.

Signed-off-by: Rony Efraim <ronye@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_resources.c  |  2 +-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 20 ++++++++++++---
 include/linux/mlx4/qp.h                            | 29 ++++++++++++++++++++--
 3 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 91f2b2c43c12..d3f508697a3d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -60,7 +60,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
 	if (user_prio >= 0) {
 		context->pri_path.sched_queue |= user_prio << 3;
-		context->pri_path.feup = 1 << 6;
+		context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP;
 	}
 	context->pri_path.counter_index = 0xff;
 	context->cqn_send = cpu_to_be32(cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index d4a9de666fbd..1157f028a90f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -372,14 +372,28 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 		if (MLX4_QP_ST_RC == qp_type)
 			return -EINVAL;
 
+		/* force strip vlan by clear vsd */
+		qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN);
+		if (0 != vp_oper->state.default_vlan) {
+			qpc->pri_path.vlan_control =
+				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+				MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+		} else { /* priority tagged */
+			qpc->pri_path.vlan_control =
+				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+		}
+
+		qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN;
 		qpc->pri_path.vlan_index = vp_oper->vlan_idx;
-		qpc->pri_path.fl = (1 << 6) | (1 << 2); /* set cv bit and hide_cqe_vlan bit*/
-		qpc->pri_path.feup |= 1 << 3; /* set fvl bit */
+		qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+		qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
 		qpc->pri_path.sched_queue &= 0xC7;
 		qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
 	}
 	if (vp_oper->state.spoofchk) {
-		qpc->pri_path.feup |= 1 << 5; /* set fsm bit */;
+		qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
 		qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
 	}
 	return 0;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 67f46ad6920a..352eec9df1b8 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -126,7 +126,7 @@ struct mlx4_rss_context {
 
 struct mlx4_qp_path {
 	u8			fl;
-	u8			reserved1[1];
+	u8			vlan_control;
 	u8			disable_pkey_check;
 	u8			pkey_index;
 	u8			counter_index;
@@ -141,11 +141,32 @@ struct mlx4_qp_path {
 	u8			sched_queue;
 	u8			vlan_index;
 	u8			feup;
-	u8			reserved3;
+	u8			fvl_rx;
 	u8			reserved4[2];
 	u8			dmac[6];
 };
 
+enum { /* fl */
+	MLX4_FL_CV      = 1 << 6,
+	MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2
+};
+enum { /* vlan_control */
+	MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED	= 1 << 6,
+	MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED	= 1 << 2,
+	MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED	= 1 << 1, /* 802.1p priority tag */
+	MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED	= 1 << 0
+};
+
+enum { /* feup */
+	MLX4_FEUP_FORCE_ETH_UP          = 1 << 6, /* force Eth UP */
+	MLX4_FSM_FORCE_ETH_SRC_MAC      = 1 << 5, /* force Source MAC */
+	MLX4_FVL_FORCE_ETH_VLAN         = 1 << 3  /* force Eth vlan */
+};
+
+enum { /* fvl_rx */
+	MLX4_FVL_RX_FORCE_ETH_VLAN      = 1 << 0 /* enforce Eth rx vlan */
+};
+
 struct mlx4_qp_context {
 	__be32			flags;
 	__be32			pd;
@@ -185,6 +206,10 @@ struct mlx4_qp_context {
 	u32			reserved5[10];
 };
 
+enum { /* param3 */
+	MLX4_STRIP_VLAN = 1 << 30
+};
+
 /* Which firmware version adds support for NEC (NoErrorCompletion) bit */
 #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
 
-- 
cgit v1.2.3


From e2555fde4159467fb579e6ae3c0a8fc33015d0f5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 May 2013 09:45:01 -0400
Subject: jbd,jbd2: fix oops in jbd2_journal_put_journal_head()

Commit ae4647fb (jbd2: reduce journal_head size) introduced a
regression where we occasionally hit panic in
jbd2_journal_put_journal_head() because of wrong b_jcount. The bug is
caused by gcc making 64-bit access to 32-bit bitfield and thus
clobbering b_jcount.

At least for now, those 8 bytes saved in struct journal_head are not
worth the trouble with gcc bitfield handling so revert that part of
the patch.

Reported-by: EUNBONG SONG <eunb.song@samsung.com>
Reported-by: Tony Luck <tony.luck@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/journal-head.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 13a3da25ff07..98cd41bb39c8 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -30,15 +30,19 @@ struct journal_head {
 
 	/*
 	 * Journalling list for this buffer [jbd_lock_bh_state()]
+	 * NOTE: We *cannot* combine this with b_modified into a bitfield
+	 * as gcc would then (which the C standard allows but which is
+	 * very unuseful) make 64-bit accesses to the bitfield and clobber
+	 * b_jcount if its update races with bitfield modification.
 	 */
-	unsigned b_jlist:4;
+	unsigned b_jlist;
 
 	/*
 	 * This flag signals the buffer has been modified by
 	 * the currently running transaction
 	 * [jbd_lock_bh_state()]
 	 */
-	unsigned b_modified:1;
+	unsigned b_modified;
 
 	/*
 	 * Copy of the buffer data frozen for writing to the log.
-- 
cgit v1.2.3


From ee8209fd026b074bb8eb75bece516a338a281b1b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 May 2013 11:55:48 +0300
Subject: dma: acpi-dma: parse CSRT to extract additional resources

Since we have CSRT only to get additional DMA controller resources, let's get
rid of drivers/acpi/csrt.c and move its logic inside ACPI DMA helpers code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/acpi/Makefile    |   1 -
 drivers/acpi/csrt.c      | 159 -------------------------------------------
 drivers/acpi/internal.h  |   1 -
 drivers/acpi/scan.c      |   1 -
 drivers/dma/acpi-dma.c   | 172 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi_dma.h |   4 ++
 6 files changed, 173 insertions(+), 165 deletions(-)
 delete mode 100644 drivers/acpi/csrt.c

(limited to 'include/linux')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index ecb743bf05a5..6050c8028dce 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -38,7 +38,6 @@ acpi-y				+= processor_core.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o
-acpi-y				+= csrt.o
 acpi-$(CONFIG_X86_INTEL_LPSS)	+= acpi_lpss.o
 acpi-y				+= acpi_platform.o
 acpi-y				+= power.o
diff --git a/drivers/acpi/csrt.c b/drivers/acpi/csrt.c
deleted file mode 100644
index 5c15a91faf0b..000000000000
--- a/drivers/acpi/csrt.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Support for Core System Resources Table (CSRT)
- *
- * Copyright (C) 2013, Intel Corporation
- * Authors: Mika Westerberg <mika.westerberg@linux.intel.com>
- *	    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt) "ACPI: CSRT: " fmt
-
-#include <linux/acpi.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/sizes.h>
-
-ACPI_MODULE_NAME("CSRT");
-
-static int __init acpi_csrt_parse_shared_info(struct platform_device *pdev,
-					      const struct acpi_csrt_group *grp)
-{
-	const struct acpi_csrt_shared_info *si;
-	struct resource res[3];
-	size_t nres;
-	int ret;
-
-	memset(res, 0, sizeof(res));
-	nres = 0;
-
-	si = (const struct acpi_csrt_shared_info *)&grp[1];
-	/*
-	 * The peripherals that are listed on CSRT typically support only
-	 * 32-bit addresses so we only use the low part of MMIO base for
-	 * now.
-	 */
-	if (!si->mmio_base_high && si->mmio_base_low) {
-		/*
-		 * There is no size of the memory resource in shared_info
-		 * so we assume that it is 4k here.
-		 */
-		res[nres].start = si->mmio_base_low;
-		res[nres].end = res[0].start + SZ_4K - 1;
-		res[nres++].flags = IORESOURCE_MEM;
-	}
-
-	if (si->gsi_interrupt) {
-		int irq = acpi_register_gsi(NULL, si->gsi_interrupt,
-					    si->interrupt_mode,
-					    si->interrupt_polarity);
-		res[nres].start = irq;
-		res[nres].end = irq;
-		res[nres++].flags = IORESOURCE_IRQ;
-	}
-
-	if (si->base_request_line || si->num_handshake_signals) {
-		/*
-		 * We pass the driver a DMA resource describing the range
-		 * of request lines the device supports.
-		 */
-		res[nres].start = si->base_request_line;
-		res[nres].end = res[nres].start + si->num_handshake_signals - 1;
-		res[nres++].flags = IORESOURCE_DMA;
-	}
-
-	ret = platform_device_add_resources(pdev, res, nres);
-	if (ret) {
-		if (si->gsi_interrupt)
-			acpi_unregister_gsi(si->gsi_interrupt);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int __init
-acpi_csrt_parse_resource_group(const struct acpi_csrt_group *grp)
-{
-	struct platform_device *pdev;
-	char vendor[5], name[16];
-	int ret, i;
-
-	vendor[0] = grp->vendor_id;
-	vendor[1] = grp->vendor_id >> 8;
-	vendor[2] = grp->vendor_id >> 16;
-	vendor[3] = grp->vendor_id >> 24;
-	vendor[4] = '\0';
-
-	if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
-		return -ENODEV;
-
-	snprintf(name, sizeof(name), "%s%04X", vendor, grp->device_id);
-	pdev = platform_device_alloc(name, PLATFORM_DEVID_AUTO);
-	if (!pdev)
-		return -ENOMEM;
-
-	/* Add resources based on the shared info */
-	ret = acpi_csrt_parse_shared_info(pdev, grp);
-	if (ret)
-		goto fail;
-
-	ret = platform_device_add(pdev);
-	if (ret)
-		goto fail;
-
-	for (i = 0; i < pdev->num_resources; i++)
-		dev_dbg(&pdev->dev, "%pR\n", &pdev->resource[i]);
-
-	return 0;
-
-fail:
-	platform_device_put(pdev);
-	return ret;
-}
-
-/*
- * CSRT or Core System Resources Table is a proprietary ACPI table
- * introduced by Microsoft. This table can contain devices that are not in
- * the system DSDT table. In particular DMA controllers might be described
- * here.
- *
- * We present these devices as normal platform devices that don't have ACPI
- * IDs or handle. The platform device name will be something like
- * <VENDOR><DEVID>.<n>.auto for example: INTL9C06.0.auto.
- */
-void __init acpi_csrt_init(void)
-{
-	struct acpi_csrt_group *grp, *end;
-	struct acpi_table_csrt *csrt;
-	acpi_status status;
-	int ret;
-
-	status = acpi_get_table(ACPI_SIG_CSRT, 0,
-				(struct acpi_table_header **)&csrt);
-	if (ACPI_FAILURE(status)) {
-		if (status != AE_NOT_FOUND)
-			pr_warn("failed to get the CSRT table\n");
-		return;
-	}
-
-	pr_debug("parsing CSRT table for devices\n");
-
-	grp = (struct acpi_csrt_group *)(csrt + 1);
-	end = (struct acpi_csrt_group *)((void *)csrt + csrt->header.length);
-
-	while (grp < end) {
-		ret = acpi_csrt_parse_resource_group(grp);
-		if (ret) {
-			pr_warn("error in parsing resource group: %d\n", ret);
-			return;
-		}
-
-		grp = (struct acpi_csrt_group *)((void *)grp + grp->length);
-	}
-}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 6f1afd9118c8..297cbf456f86 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -35,7 +35,6 @@ void acpi_pci_link_init(void);
 void acpi_pci_root_hp_init(void);
 void acpi_platform_init(void);
 int acpi_sysfs_init(void);
-void acpi_csrt_init(void);
 #ifdef CONFIG_ACPI_CONTAINER
 void acpi_container_init(void);
 #else
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index fe158fd4f1df..aacc08f951aa 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2042,7 +2042,6 @@ int __init acpi_scan_init(void)
 	acpi_pci_link_init();
 	acpi_platform_init();
 	acpi_lpss_init();
-	acpi_csrt_init();
 	acpi_container_init();
 	acpi_memory_hotplug_init();
 
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index ba6fc62e9651..5a18f82f732a 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -4,7 +4,8 @@
  * Based on of-dma.c
  *
  * Copyright (C) 2013, Intel Corporation
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Mika Westerberg <mika.westerberg@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -16,12 +17,124 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/ioport.h>
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
 
 static LIST_HEAD(acpi_dma_list);
 static DEFINE_MUTEX(acpi_dma_lock);
 
+/**
+ * acpi_dma_parse_resource_group - match device and parse resource group
+ * @grp:	CSRT resource group
+ * @adev:	ACPI device to match with
+ * @adma:	struct acpi_dma of the given DMA controller
+ *
+ * Returns 1 on success, 0 when no information is available, or appropriate
+ * errno value on error.
+ *
+ * In order to match a device from DSDT table to the corresponding CSRT device
+ * we use MMIO address and IRQ.
+ */
+static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
+		struct acpi_device *adev, struct acpi_dma *adma)
+{
+	const struct acpi_csrt_shared_info *si;
+	struct list_head resource_list;
+	struct resource_list_entry *rentry;
+	resource_size_t mem = 0, irq = 0;
+	u32 vendor_id;
+	int ret;
+
+	if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (ret <= 0)
+		return 0;
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		if (resource_type(&rentry->res) == IORESOURCE_MEM)
+			mem = rentry->res.start;
+		else if (resource_type(&rentry->res) == IORESOURCE_IRQ)
+			irq = rentry->res.start;
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	/* Consider initial zero values as resource not found */
+	if (mem == 0 && irq == 0)
+		return 0;
+
+	si = (const struct acpi_csrt_shared_info *)&grp[1];
+
+	/* Match device by MMIO and IRQ */
+	if (si->mmio_base_low != mem || si->gsi_interrupt != irq)
+		return 0;
+
+	vendor_id = le32_to_cpu(grp->vendor_id);
+	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
+		(char *)&vendor_id, grp->device_id, grp->revision);
+
+	/* Check if the request line range is available */
+	if (si->base_request_line == 0 && si->num_handshake_signals == 0)
+		return 0;
+
+	adma->base_request_line = si->base_request_line;
+	adma->end_request_line = si->base_request_line +
+				 si->num_handshake_signals - 1;
+
+	dev_dbg(&adev->dev, "request line base: 0x%04x end: 0x%04x\n",
+		adma->base_request_line, adma->end_request_line);
+
+	return 1;
+}
+
+/**
+ * acpi_dma_parse_csrt - parse CSRT to exctract additional DMA resources
+ * @adev:	ACPI device to match with
+ * @adma:	struct acpi_dma of the given DMA controller
+ *
+ * CSRT or Core System Resources Table is a proprietary ACPI table
+ * introduced by Microsoft. This table can contain devices that are not in
+ * the system DSDT table. In particular DMA controllers might be described
+ * here.
+ *
+ * We are using this table to get the request line range of the specific DMA
+ * controller to be used later.
+ *
+ */
+static void acpi_dma_parse_csrt(struct acpi_device *adev, struct acpi_dma *adma)
+{
+	struct acpi_csrt_group *grp, *end;
+	struct acpi_table_csrt *csrt;
+	acpi_status status;
+	int ret;
+
+	status = acpi_get_table(ACPI_SIG_CSRT, 0,
+				(struct acpi_table_header **)&csrt);
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND)
+			dev_warn(&adev->dev, "failed to get the CSRT table\n");
+		return;
+	}
+
+	grp = (struct acpi_csrt_group *)(csrt + 1);
+	end = (struct acpi_csrt_group *)((void *)csrt + csrt->header.length);
+
+	while (grp < end) {
+		ret = acpi_dma_parse_resource_group(grp, adev, adma);
+		if (ret < 0) {
+			dev_warn(&adev->dev,
+				 "error in parsing resource group\n");
+			return;
+		}
+
+		grp = (struct acpi_csrt_group *)((void *)grp + grp->length);
+	}
+}
+
 /**
  * acpi_dma_controller_register - Register a DMA controller to ACPI DMA helpers
  * @dev:		struct device of DMA controller
@@ -61,6 +174,8 @@ int acpi_dma_controller_register(struct device *dev,
 	adma->acpi_dma_xlate = acpi_dma_xlate;
 	adma->data = data;
 
+	acpi_dma_parse_csrt(adev, adma);
+
 	/* Now queue acpi_dma controller structure in list */
 	mutex_lock(&acpi_dma_lock);
 	list_add_tail(&adma->dma_controllers, &acpi_dma_list);
@@ -149,6 +264,45 @@ void devm_acpi_dma_controller_free(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free);
 
+/**
+ * acpi_dma_update_dma_spec - prepare dma specifier to pass to translation function
+ * @adma:	struct acpi_dma of DMA controller
+ * @dma_spec:	dma specifier to update
+ *
+ * Returns 0, if no information is avaiable, -1 on mismatch, and 1 otherwise.
+ *
+ * Accordingly to ACPI 5.0 Specification Table 6-170 "Fixed DMA Resource
+ * Descriptor":
+ *	DMA Request Line bits is a platform-relative number uniquely
+ *	identifying the request line assigned. Request line-to-Controller
+ *	mapping is done in a controller-specific OS driver.
+ * That's why we can safely adjust slave_id when the appropriate controller is
+ * found.
+ */
+static int acpi_dma_update_dma_spec(struct acpi_dma *adma,
+		struct acpi_dma_spec *dma_spec)
+{
+	/* Set link to the DMA controller device */
+	dma_spec->dev = adma->dev;
+
+	/* Check if the request line range is available */
+	if (adma->base_request_line == 0 && adma->end_request_line == 0)
+		return 0;
+
+	/* Check if slave_id falls to the range */
+	if (dma_spec->slave_id < adma->base_request_line ||
+	    dma_spec->slave_id > adma->end_request_line)
+		return -1;
+
+	/*
+	 * Here we adjust slave_id. It should be a relative number to the base
+	 * request line.
+	 */
+	dma_spec->slave_id -= adma->base_request_line;
+
+	return 1;
+}
+
 struct acpi_dma_parser_data {
 	struct acpi_dma_spec dma_spec;
 	size_t index;
@@ -193,6 +347,7 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
 	struct acpi_device *adev;
 	struct acpi_dma *adma;
 	struct dma_chan *chan = NULL;
+	int found;
 
 	/* Check if the device was enumerated by ACPI */
 	if (!dev || !ACPI_HANDLE(dev))
@@ -219,9 +374,20 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
 	mutex_lock(&acpi_dma_lock);
 
 	list_for_each_entry(adma, &acpi_dma_list, dma_controllers) {
-		dma_spec->dev = adma->dev;
+		/*
+		 * We are not going to call translation function if slave_id
+		 * doesn't fall to the request range.
+		 */
+		found = acpi_dma_update_dma_spec(adma, dma_spec);
+		if (found < 0)
+			continue;
 		chan = adma->acpi_dma_xlate(dma_spec, adma);
-		if (chan)
+		/*
+		 * Try to get a channel only from the DMA controller that
+		 * matches the slave_id. See acpi_dma_update_dma_spec()
+		 * description for the details.
+		 */
+		if (found > 0 || chan)
 			break;
 	}
 
diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h
index d09deabc7bf6..fb0298082916 100644
--- a/include/linux/acpi_dma.h
+++ b/include/linux/acpi_dma.h
@@ -37,6 +37,8 @@ struct acpi_dma_spec {
  * @dev:		struct device of this controller
  * @acpi_dma_xlate:	callback function to find a suitable channel
  * @data:		private data used by a callback function
+ * @base_request_line:	first supported request line (CSRT)
+ * @end_request_line:	last supported request line (CSRT)
  */
 struct acpi_dma {
 	struct list_head	dma_controllers;
@@ -44,6 +46,8 @@ struct acpi_dma {
 	struct dma_chan		*(*acpi_dma_xlate)
 				(struct acpi_dma_spec *, struct acpi_dma *);
 	void			*data;
+	unsigned short		base_request_line;
+	unsigned short		end_request_line;
 };
 
 /* Used with acpi_dma_simple_xlate() */
-- 
cgit v1.2.3


From b59cc200ac025aca597fb21862c1c9e667f2eff2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 8 May 2013 11:55:49 +0300
Subject: ACPI / LPSS: register clock device for Lynxpoint DMA properly

The DMA controller in Lynxpoint is enumerated as a regular ACPI device now. To
work properly it is using the LPSS root clock as a functional clock. That's why
we have to register the clock device accordingly to the ACPI ID of the DMA
controller. The acpi_lpss.c module is responsible to do the job.

This patch also removes hardcoded name of the DMA device in clk-lpt.c and the
name of the root clock in acpi_lpss.c.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/acpi/acpi_lpss.c               | 26 ++++++++++++++++++++++----
 drivers/clk/x86/clk-lpt.c              | 15 +++++++++++----
 include/linux/platform_data/clk-lpss.h |  5 +++++
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index b1c95422ce74..652fd5ce303c 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -35,11 +35,16 @@ ACPI_MODULE_NAME("acpi_lpss");
 
 struct lpss_device_desc {
 	bool clk_required;
-	const char *clk_parent;
+	const char *clkdev_name;
 	bool ltr_required;
 	unsigned int prv_offset;
 };
 
+static struct lpss_device_desc lpss_dma_desc = {
+	.clk_required = true,
+	.clkdev_name = "hclk",
+};
+
 struct lpss_private_data {
 	void __iomem *mmio_base;
 	resource_size_t mmio_size;
@@ -49,7 +54,6 @@ struct lpss_private_data {
 
 static struct lpss_device_desc lpt_dev_desc = {
 	.clk_required = true,
-	.clk_parent = "lpss_clk",
 	.prv_offset = 0x800,
 	.ltr_required = true,
 };
@@ -60,6 +64,9 @@ static struct lpss_device_desc lpt_sdio_dev_desc = {
 };
 
 static const struct acpi_device_id acpi_lpss_device_ids[] = {
+	/* Generic LPSS devices */
+	{ "INTL9C60", (unsigned long)&lpss_dma_desc },
+
 	/* Lynxpoint LPSS devices */
 	{ "INT33C0", (unsigned long)&lpt_dev_desc },
 	{ "INT33C1", (unsigned long)&lpt_dev_desc },
@@ -91,16 +98,27 @@ static int register_device_clock(struct acpi_device *adev,
 				 struct lpss_private_data *pdata)
 {
 	const struct lpss_device_desc *dev_desc = pdata->dev_desc;
+	struct lpss_clk_data *clk_data;
 
 	if (!lpss_clk_dev)
 		lpt_register_clock_device();
 
-	if (!dev_desc->clk_parent || !pdata->mmio_base
+	clk_data = platform_get_drvdata(lpss_clk_dev);
+	if (!clk_data)
+		return -ENODEV;
+
+	if (dev_desc->clkdev_name) {
+		clk_register_clkdev(clk_data->clk, dev_desc->clkdev_name,
+				    dev_name(&adev->dev));
+		return 0;
+	}
+
+	if (!pdata->mmio_base
 	    || pdata->mmio_size < dev_desc->prv_offset + LPSS_CLK_SIZE)
 		return -ENODATA;
 
 	pdata->clk = clk_register_gate(NULL, dev_name(&adev->dev),
-				       dev_desc->clk_parent, 0,
+				       clk_data->name, 0,
 				       pdata->mmio_base + dev_desc->prv_offset,
 				       0, 0, NULL);
 	if (IS_ERR(pdata->clk))
diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpt.c
index 5cf4f4686406..4f45eee9e33b 100644
--- a/drivers/clk/x86/clk-lpt.c
+++ b/drivers/clk/x86/clk-lpt.c
@@ -15,22 +15,29 @@
 #include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/platform_data/clk-lpss.h>
 #include <linux/platform_device.h>
 
 #define PRV_CLOCK_PARAMS 0x800
 
 static int lpt_clk_probe(struct platform_device *pdev)
 {
+	struct lpss_clk_data *drvdata;
 	struct clk *clk;
 
+	drvdata = devm_kzalloc(&pdev->dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
 	/* LPSS free running clock */
-	clk = clk_register_fixed_rate(&pdev->dev, "lpss_clk", NULL, CLK_IS_ROOT,
-				      100000000);
+	drvdata->name = "lpss_clk";
+	clk = clk_register_fixed_rate(&pdev->dev, drvdata->name, NULL,
+				      CLK_IS_ROOT, 100000000);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	/* Shared DMA clock */
-	clk_register_clkdev(clk, "hclk", "INTL9C60.0.auto");
+	drvdata->clk = clk;
+	platform_set_drvdata(pdev, drvdata);
 	return 0;
 }
 
diff --git a/include/linux/platform_data/clk-lpss.h b/include/linux/platform_data/clk-lpss.h
index 528e73ce46d2..23901992b9dd 100644
--- a/include/linux/platform_data/clk-lpss.h
+++ b/include/linux/platform_data/clk-lpss.h
@@ -13,6 +13,11 @@
 #ifndef __CLK_LPSS_H
 #define __CLK_LPSS_H
 
+struct lpss_clk_data {
+	const char *name;
+	struct clk *clk;
+};
+
 extern int lpt_clk_init(void);
 
 #endif /* __CLK_LPSS_H */
-- 
cgit v1.2.3


From 63ad9cbff5896dadfee510f631ffcb0176984722 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Tue, 23 Apr 2013 15:28:38 +0200
Subject: pinctrl: generic: Fix typos and clarify comments

Drive strength controls both sink and source currents, clarify the
description accordingly.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 72474e18f1e0..6aa238096622 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -37,17 +37,17 @@
  *	if it is 0, pull-down is disabled.
  * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and
  *	low, this is the most typical case and is typically achieved with two
- *	active transistors on the output. Sending this config will enabale
+ *	active transistors on the output. Setting this config will enable
  *	push-pull mode, the argument is ignored.
  * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
  *	collector) which means it is usually wired with other output ports
- *	which are then pulled up with an external resistor. Sending this
- *	config will enabale open drain mode, the argument is ignored.
+ *	which are then pulled up with an external resistor. Setting this
+ *	config will enable open drain mode, the argument is ignored.
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
- *	(open emitter). Sending this config will enabale open drain mode, the
+ *	(open emitter). Setting this config will enable open drain mode, the
  *	argument is ignored.
- * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as
- * 	argument. The argument is in mA.
+ * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current
+ *	passed as argument. The argument is in mA.
  * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
  *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
  *      schmitt-trigger mode is disabled.
-- 
cgit v1.2.3


From b4f711ee03d28f776fd2324fd0bd999cc428e4d2 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 24 Apr 2013 11:32:56 -0700
Subject: time: Revert ALWAYS_USE_PERSISTENT_CLOCK compile time optimizaitons

Kay Sievers noted that the ALWAYS_USE_PERSISTENT_CLOCK config,
which enables some minor compile time optimization to avoid
uncessary code in mostly the suspend/resume path could cause
problems for userland.

In particular, the dependency for RTC_HCTOSYS on
!ALWAYS_USE_PERSISTENT_CLOCK, which avoids setting the time
twice and simplifies suspend/resume, has the side effect
of causing the /sys/class/rtc/rtcN/hctosys flag to always be
zero, and this flag is commonly used by udev to setup the
/dev/rtc symlink to /dev/rtcN, which can cause pain for
older applications.

While the udev rules could use some work to be less fragile,
breaking userland should strongly be avoided. Additionally
the compile time optimizations are fairly minor, and the code
being optimized is likely to be reworked in the future, so
lets revert this change.

Reported-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: stable <stable@vger.kernel.org> #3.9
Cc: Feng Tang <feng.tang@intel.com>
Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Link: http://lkml.kernel.org/r/1366828376-18124-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig     | 1 -
 drivers/rtc/Kconfig  | 2 --
 include/linux/time.h | 4 ----
 kernel/time/Kconfig  | 5 -----
 4 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5db2117ae288..45c41249321f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,7 +108,6 @@ config X86
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
 	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
-	select ALWAYS_USE_PERSISTENT_CLOCK
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_CONTEXT_TRACKING if X86_64
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0c81915b1997..b9838130a7b0 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,7 +20,6 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
-	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
@@ -29,7 +28,6 @@ config RTC_HCTOSYS
 config RTC_SYSTOHC
 	bool "Set the RTC time based on NTP synchronization"
 	default y
-	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be stored
 	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
diff --git a/include/linux/time.h b/include/linux/time.h
index 22d81b3c955b..d5d229b2e5af 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -117,14 +117,10 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 
 extern bool persistent_clock_exist;
 
-#ifdef ALWAYS_USE_PERSISTENT_CLOCK
-#define has_persistent_clock()	true
-#else
 static inline bool has_persistent_clock(void)
 {
 	return persistent_clock_exist;
 }
-#endif
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 24510d84efd7..b69692250af4 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,11 +12,6 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
-# Platforms has a persistent clock
-config ALWAYS_USE_PERSISTENT_CLOCK
-	bool
-	default n
-
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
-- 
cgit v1.2.3


From ccf5ae83a6cf3d9cfe9a7038bfe7cd38ab03d5e1 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Mon, 13 May 2013 16:30:06 -0400
Subject: target: close target_put_sess_cmd() vs. core_tmr_abort_task() race

It is possible for one thread to to take se_sess->sess_cmd_lock in
core_tmr_abort_task() before taking a reference count on
se_cmd->cmd_kref, while another thread in target_put_sess_cmd() drops
se_cmd->cmd_kref before taking se_sess->sess_cmd_lock.

This introduces kref_put_spinlock_irqsave() and uses it in
target_put_sess_cmd() to close the race window.

Signed-off-by: Joern Engel <joern@logfs.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 11 +++++------
 include/linux/kref.h                   | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c3477fa60942..4a793362309d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2211,21 +2211,19 @@ static void target_release_cmd_kref(struct kref *kref)
 {
 	struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
 	struct se_session *se_sess = se_cmd->se_sess;
-	unsigned long flags;
 
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	if (list_empty(&se_cmd->se_cmd_list)) {
-		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+		spin_unlock(&se_sess->sess_cmd_lock);
 		se_cmd->se_tfo->release_cmd(se_cmd);
 		return;
 	}
 	if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
-		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+		spin_unlock(&se_sess->sess_cmd_lock);
 		complete(&se_cmd->cmd_wait_comp);
 		return;
 	}
 	list_del(&se_cmd->se_cmd_list);
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+	spin_unlock(&se_sess->sess_cmd_lock);
 
 	se_cmd->se_tfo->release_cmd(se_cmd);
 }
@@ -2236,7 +2234,8 @@ static void target_release_cmd_kref(struct kref *kref)
  */
 int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd)
 {
-	return kref_put(&se_cmd->cmd_kref, target_release_cmd_kref);
+	return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref,
+			&se_sess->sess_cmd_lock);
 }
 EXPORT_SYMBOL(target_put_sess_cmd);
 
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 4972e6e9ca93..7419c02085d7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 struct kref {
 	atomic_t refcount;
@@ -95,6 +96,38 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 	return kref_sub(kref, 1, release);
 }
 
+/**
+ * kref_put_spinlock_irqsave - decrement refcount for object.
+ * @kref: object.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ * @lock: lock to take in release case
+ *
+ * Behaves identical to kref_put with one exception.  If the reference count
+ * drops to zero, the lock will be taken atomically wrt dropping the reference
+ * count.  The release function has to call spin_unlock() without _irqrestore.
+ */
+static inline int kref_put_spinlock_irqsave(struct kref *kref,
+		void (*release)(struct kref *kref),
+		spinlock_t *lock)
+{
+	unsigned long flags;
+
+	WARN_ON(release == NULL);
+	if (atomic_add_unless(&kref->refcount, -1, 1))
+		return 0;
+	spin_lock_irqsave(lock, flags);
+	if (atomic_dec_and_test(&kref->refcount)) {
+		release(kref);
+		local_irq_restore(flags);
+		return 1;
+	}
+	spin_unlock_irqrestore(lock, flags);
+	return 0;
+}
+
 static inline int kref_put_mutex(struct kref *kref,
 				 void (*release)(struct kref *kref),
 				 struct mutex *lock)
-- 
cgit v1.2.3


From de97f250394996f5acb07ba9e6dbdfc15ee4316c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 2 May 2013 09:51:44 -0400
Subject: Correct typo "supperspeed" to "superspeed".

Tidy up kernel-doc content for USB GADGET. No functional change.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c454a88abf2e..f1b0dca60f12 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -563,9 +563,8 @@ static inline int gadget_is_dualspeed(struct usb_gadget *g)
 }
 
 /**
- * gadget_is_superspeed() - return true if the hardware handles
- * supperspeed
- * @g: controller that might support supper speed
+ * gadget_is_superspeed() - return true if the hardware handles superspeed
+ * @g: controller that might support superspeed
  */
 static inline int gadget_is_superspeed(struct usb_gadget *g)
 {
-- 
cgit v1.2.3


From 755ccb9d577b95b43537cfeb36da59140412f858 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 15 May 2013 08:00:25 +0000
Subject: broadcom: add include guards to include/linux/brcmphy.h

include/linux/brcmphy.h is currently not protected against double
inclusion, add ifdefs guard to fix that.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index b840a4960282..677b4f01b2d0 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,3 +1,6 @@
+#ifndef _LINUX_BRCMPHY_H
+#define _LINUX_BRCMPHY_H
+
 #define PHY_ID_BCM50610			0x0143bd60
 #define PHY_ID_BCM50610M		0x0143bd70
 #define PHY_ID_BCM5241			0x0143bc30
@@ -29,3 +32,5 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 #define PHY_BCM_FLAGS_VALID		0x80000000
+
+#endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From fc1f7d5606487ae28d6c84e95401952927d7379e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:43 +0000
Subject: clocksource: apb_timer: Remove unsused function

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Acked-by: Jamie Iles <jamie@jamieiles.com>
Link: http://lkml.kernel.org/r/20130425143435.558006195@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/clocksource/dw_apb_timer.c | 12 ------------
 include/linux/dw_apb_timer.h       |  1 -
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index 8c2a35f26d9b..e54ca1062d8e 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -387,15 +387,3 @@ cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs)
 {
 	return (cycle_t)~apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE);
 }
-
-/**
- * dw_apb_clocksource_unregister() - unregister and free a clocksource.
- *
- * @dw_cs:	The clocksource to unregister/free.
- */
-void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs)
-{
-	clocksource_unregister(&dw_cs->cs);
-
-	kfree(dw_cs);
-}
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index dd755ce2a5eb..b1cd9597c241 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -51,7 +51,6 @@ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base,
 void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs);
 void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs);
 cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs);
-void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs);
 
 extern void dw_apb_timer_init(void);
 #endif /* __DW_APB_TIMER_H__ */
-- 
cgit v1.2.3


From ba919d1caa2e624eb8c6cae1f2ce0a253e697d45 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:44 +0000
Subject: clocksource: Let timekeeping_notify return success/error

timekeeping_notify() can fail due cs->enable() failure. Though the
caller does not notice and happily keeps the wrong clocksource as the
current one.

Let the caller know about failure, so the current clocksource will be
shown correctly in sysfs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143435.696321912@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 2 +-
 kernel/time/clocksource.c   | 6 +++---
 kernel/time/timekeeping.c   | 5 +++--
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7279b94c01da..aa6ba44e75d5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -321,7 +321,7 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
 }
 
 
-extern void timekeeping_notify(struct clocksource *clock);
+extern int timekeeping_notify(struct clocksource *clock);
 
 extern cycle_t clocksource_mmio_readl_up(struct clocksource *);
 extern cycle_t clocksource_mmio_readl_down(struct clocksource *);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index dda5c7130d93..1923a340bd91 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -611,10 +611,10 @@ static void clocksource_select(void)
 			best = cs;
 		break;
 	}
-	if (curr_clocksource != best) {
-		printk(KERN_INFO "Switching to clocksource %s\n", best->name);
+
+	if (curr_clocksource != best && !timekeeping_notify(best)) {
+		pr_info("Switched to clocksource %s\n", best->name);
 		curr_clocksource = best;
-		timekeeping_notify(curr_clocksource);
 	}
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 98cd470bbe49..da6e10c7a378 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -648,14 +648,15 @@ static int change_clocksource(void *data)
  * This function is called from clocksource.c after a new, better clock
  * source has been registered. The caller holds the clocksource_mutex.
  */
-void timekeeping_notify(struct clocksource *clock)
+int timekeeping_notify(struct clocksource *clock)
 {
 	struct timekeeper *tk = &timekeeper;
 
 	if (tk->clock == clock)
-		return;
+		return 0;
 	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
+	return tk->clock == clock ? 0 : -1;
 }
 
 /**
-- 
cgit v1.2.3


From 09ac369c825d9d593404306d59062d854b321e9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:44 +0000
Subject: clocksource: Add module refcount

Add a module refcount, so the current clocksource cannot be removed
unconditionally.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143435.762417789@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  3 +++
 kernel/time/timekeeping.c   | 19 ++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index aa6ba44e75d5..32a895b114d8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -21,6 +21,7 @@
 /* clocksource cycle base type */
 typedef u64 cycle_t;
 struct clocksource;
+struct module;
 
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 #include <asm/clocksource.h>
@@ -162,6 +163,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
  * @cycle_last:		most recent cycle counter value seen by ::read()
+ * @owner:		module reference, must be set by clocksource in modules
  */
 struct clocksource {
 	/*
@@ -195,6 +197,7 @@ struct clocksource {
 	cycle_t cs_last;
 	cycle_t wd_last;
 #endif
+	struct module *owner;
 } ____cacheline_aligned;
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index da6e10c7a378..933efa4071c3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -627,11 +627,20 @@ static int change_clocksource(void *data)
 	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
-	if (!new->enable || new->enable(new) == 0) {
-		old = tk->clock;
-		tk_setup_internals(tk, new);
-		if (old->disable)
-			old->disable(old);
+	/*
+	 * If the cs is in module, get a module reference. Succeeds
+	 * for built-in code (owner == NULL) as well.
+	 */
+	if (try_module_get(new->owner)) {
+		if (!new->enable || new->enable(new) == 0) {
+			old = tk->clock;
+			tk_setup_internals(tk, new);
+			if (old->disable)
+				old->disable(old);
+			module_put(old->owner);
+		} else {
+			module_put(new->owner);
+		}
 	}
 	timekeeping_update(tk, true, true);
 
-- 
cgit v1.2.3


From a89c7edbe7d7aa80f507915f3dd801211b116b79 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:46 +0000
Subject: clocksource: Let clocksource_unregister() return success/error

The unregister call can fail, if the clocksource is the current one
and there is no replacement clocksource available. It can also fail,
if the clocksource is the watchdog clocksource and I'm not going to
provide support for this.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143436.029915527@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  2 +-
 kernel/time/clocksource.c   | 33 ++++++++++++---------------------
 2 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 32a895b114d8..2f39a4911668 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -282,7 +282,7 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 
 
 extern int clocksource_register(struct clocksource*);
-extern void clocksource_unregister(struct clocksource*);
+extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 791d1aeb17ac..31b90332f47b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -389,28 +389,17 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
 
 static void clocksource_dequeue_watchdog(struct clocksource *cs)
 {
-	struct clocksource *tmp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&watchdog_lock, flags);
-	if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
-		/* cs is a watched clocksource. */
-		list_del_init(&cs->wd_list);
-	} else if (cs == watchdog) {
-		/* Reset watchdog cycles */
-		clocksource_reset_watchdog();
-		/* Current watchdog is removed. Find an alternative. */
-		watchdog = NULL;
-		list_for_each_entry(tmp, &clocksource_list, list) {
-			if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
-				continue;
-			if (!watchdog || tmp->rating > watchdog->rating)
-				watchdog = tmp;
+	if (cs != watchdog) {
+		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
+			/* cs is a watched clocksource. */
+			list_del_init(&cs->wd_list);
+			/* Check if the watchdog timer needs to be stopped. */
+			clocksource_stop_watchdog();
 		}
 	}
-	cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
-	/* Check if the watchdog timer needs to be stopped. */
-	clocksource_stop_watchdog();
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
@@ -841,13 +830,15 @@ static int clocksource_unbind(struct clocksource *cs)
  * clocksource_unregister - remove a registered clocksource
  * @cs:	clocksource to be unregistered
  */
-void clocksource_unregister(struct clocksource *cs)
+int clocksource_unregister(struct clocksource *cs)
 {
+	int ret = 0;
+
 	mutex_lock(&clocksource_mutex);
-	clocksource_dequeue_watchdog(cs);
-	list_del(&cs->list);
-	clocksource_select();
+	if (!list_empty(&cs->list))
+		ret = clocksource_unbind(cs);
 	mutex_unlock(&clocksource_mutex);
+	return ret;
 }
 EXPORT_SYMBOL(clocksource_unregister);
 
-- 
cgit v1.2.3


From 7172a286ced0c1f4f239a0fa09db54ed37d3ead2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:47 +0000
Subject: clockevents: Get rid of the notifier chain

7+ years and still a single user. Kill it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143436.098520211@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   |  1 -
 kernel/time/clockevents.c    | 35 +++--------------------------------
 kernel/time/tick-broadcast.c |  5 ++---
 kernel/time/tick-common.c    | 30 +++++-------------------------
 kernel/time/tick-internal.h  |  7 ++++---
 5 files changed, 14 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 963d71431388..2f498f66c1bb 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -150,7 +150,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode);
-extern int clockevents_register_notifier(struct notifier_block *nb);
 extern int clockevents_program_event(struct clock_event_device *dev,
 				     ktime_t expires, bool force);
 
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c6d6400ee137..dd70b4842c62 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -15,7 +15,6 @@
 #include <linux/hrtimer.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/notifier.h>
 #include <linux/smp.h>
 
 #include "tick-internal.h"
@@ -23,10 +22,6 @@
 /* The registered clock event devices */
 static LIST_HEAD(clockevent_devices);
 static LIST_HEAD(clockevents_released);
-
-/* Notification for clock events */
-static RAW_NOTIFIER_HEAD(clockevents_chain);
-
 /* Protection for the above */
 static DEFINE_RAW_SPINLOCK(clockevents_lock);
 
@@ -232,30 +227,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 	return (rc && force) ? clockevents_program_min_delta(dev) : rc;
 }
 
-/**
- * clockevents_register_notifier - register a clock events change listener
- */
-int clockevents_register_notifier(struct notifier_block *nb)
-{
-	unsigned long flags;
-	int ret;
-
-	raw_spin_lock_irqsave(&clockevents_lock, flags);
-	ret = raw_notifier_chain_register(&clockevents_chain, nb);
-	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
-
-	return ret;
-}
-
-/*
- * Notify about a clock event change. Called with clockevents_lock
- * held.
- */
-static void clockevents_do_notify(unsigned long reason, void *dev)
-{
-	raw_notifier_call_chain(&clockevents_chain, reason, dev);
-}
-
 /*
  * Called after a notify add to make devices available which were
  * released from the notifier call.
@@ -269,7 +240,7 @@ static void clockevents_notify_released(void)
 				 struct clock_event_device, list);
 		list_del(&dev->list);
 		list_add(&dev->list, &clockevent_devices);
-		clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
+		tick_check_new_device(dev);
 	}
 }
 
@@ -290,7 +261,7 @@ void clockevents_register_device(struct clock_event_device *dev)
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
 	list_add(&dev->list, &clockevent_devices);
-	clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
+	tick_check_new_device(dev);
 	clockevents_notify_released();
 
 	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
@@ -433,7 +404,7 @@ void clockevents_notify(unsigned long reason, void *arg)
 	int cpu;
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
-	clockevents_do_notify(reason, arg);
+	tick_notify(reason, arg);
 
 	switch (reason) {
 	case CLOCK_EVT_NOTIFY_CPU_DEAD:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 24938d577669..3500caaa0bfd 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -64,7 +64,7 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
 /*
  * Check, if the device can be utilized as broadcast device:
  */
-int tick_check_broadcast_device(struct clock_event_device *dev)
+void tick_install_broadcast_device(struct clock_event_device *dev)
 {
 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
 
@@ -72,7 +72,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
 	    (tick_broadcast_device.evtdev &&
 	     tick_broadcast_device.evtdev->rating >= dev->rating) ||
 	     (dev->features & CLOCK_EVT_FEAT_C3STOP))
-		return 0;
+		return;
 
 	clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
 	if (cur)
@@ -90,7 +90,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
 	 */
 	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
 		tick_clock_notify();
-	return 1;
 }
 
 /*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 5d3fb100bc06..dbf4e18d5101 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -208,11 +208,11 @@ static void tick_setup_device(struct tick_device *td,
 /*
  * Check, if the new registered device should be used.
  */
-static int tick_check_new_device(struct clock_event_device *newdev)
+void tick_check_new_device(struct clock_event_device *newdev)
 {
 	struct clock_event_device *curdev;
 	struct tick_device *td;
-	int cpu, ret = NOTIFY_OK;
+	int cpu;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&tick_device_lock, flags);
@@ -275,18 +275,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		tick_oneshot_notify();
 
 	raw_spin_unlock_irqrestore(&tick_device_lock, flags);
-	return NOTIFY_STOP;
+	return;
 
 out_bc:
 	/*
 	 * Can the new device be used as a broadcast device ?
 	 */
-	if (tick_check_broadcast_device(newdev))
-		ret = NOTIFY_STOP;
-
+	tick_install_broadcast_device(newdev);
 	raw_spin_unlock_irqrestore(&tick_device_lock, flags);
-
-	return ret;
 }
 
 /*
@@ -360,17 +356,10 @@ static void tick_resume(void)
 	raw_spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 
-/*
- * Notification about clock event devices
- */
-static int tick_notify(struct notifier_block *nb, unsigned long reason,
-			       void *dev)
+void tick_notify(unsigned long reason, void *dev)
 {
 	switch (reason) {
 
-	case CLOCK_EVT_NOTIFY_ADD:
-		return tick_check_new_device(dev);
-
 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
@@ -404,21 +393,12 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
 	default:
 		break;
 	}
-
-	return NOTIFY_OK;
 }
 
-static struct notifier_block tick_notifier = {
-	.notifier_call = tick_notify,
-};
-
 /**
  * tick_init - initialize the tick control
- *
- * Register the notifier with the clockevents framework
  */
 void __init tick_init(void)
 {
-	clockevents_register_notifier(&tick_notifier);
 	tick_broadcast_init();
 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f0299eae4602..60742fe6f63d 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -18,6 +18,8 @@ extern int tick_do_timer_cpu __read_mostly;
 
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
+extern void tick_notify(unsigned long reason, void *dev);
+extern void tick_check_new_device(struct clock_event_device *dev);
 
 extern void clockevents_shutdown(struct clock_event_device *dev);
 
@@ -90,7 +92,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; }
  */
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern int tick_check_broadcast_device(struct clock_event_device *dev);
+extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
 extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
 extern void tick_shutdown_broadcast(unsigned int *cpup);
@@ -102,9 +104,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
 
 #else /* !BROADCAST */
 
-static inline int tick_check_broadcast_device(struct clock_event_device *dev)
+static inline void tick_install_broadcast_device(struct clock_event_device *dev)
 {
-	return 0;
 }
 
 static inline int tick_is_broadcast_device(struct clock_event_device *dev)
-- 
cgit v1.2.3


From ccf33d6880f39a35158fff66db13000ae4943fac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:49 +0000
Subject: clockevents: Add module refcount

We want to be able to remove clockevent modules as well. Add a
refcount so we don't remove a module with an active clock event
device.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143436.307435149@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   | 3 +++
 kernel/time/clockevents.c    | 1 +
 kernel/time/tick-broadcast.c | 3 +++
 kernel/time/tick-common.c    | 4 ++++
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2f498f66c1bb..ae1193bcf074 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -30,6 +30,7 @@ enum clock_event_nofitiers {
 #include <linux/notifier.h>
 
 struct clock_event_device;
+struct module;
 
 /* Clock event mode commands */
 enum clock_event_mode {
@@ -83,6 +84,7 @@ enum clock_event_mode {
  * @irq:		IRQ number (only for non CPU local devices)
  * @cpumask:		cpumask to indicate for which CPUs this device works
  * @list:		list head for the management code
+ * @owner:		module reference
  */
 struct clock_event_device {
 	void			(*event_handler)(struct clock_event_device *);
@@ -112,6 +114,7 @@ struct clock_event_device {
 	int			irq;
 	const struct cpumask	*cpumask;
 	struct list_head	list;
+	struct module		*owner;
 } ____cacheline_aligned;
 
 /*
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0e3a8448e115..89e394caa769 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -357,6 +357,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
 	 * released list and do a notify add later.
 	 */
 	if (old) {
+		module_put(old->owner);
 		clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
 		list_del(&old->list);
 		list_add(&old->list, &clockevents_released);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 3500caaa0bfd..0e374cd2e0ef 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -19,6 +19,7 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 
 #include "tick-internal.h"
 
@@ -73,6 +74,8 @@ void tick_install_broadcast_device(struct clock_event_device *dev)
 	     tick_broadcast_device.evtdev->rating >= dev->rating) ||
 	     (dev->features & CLOCK_EVT_FEAT_C3STOP))
 		return;
+	if (!try_module_get(dev->owner))
+		return;
 
 	clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
 	if (cur)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 84c7cfca4d7d..433a1e11d13b 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/module.h>
 
 #include <asm/irq_regs.h>
 
@@ -257,6 +258,9 @@ void tick_check_new_device(struct clock_event_device *newdev)
 			goto out_bc;
 	}
 
+	if (!try_module_get(newdev->owner))
+		return;
+
 	/*
 	 * Replace the eventually existing device by the new
 	 * device. If the current device is the broadcast device, do
-- 
cgit v1.2.3


From 03e13cf5ee60584fe0c831682c67212effb7fca4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2013 20:31:50 +0000
Subject: clockevents: Implement unbind functionality

Provide a sysfs interface to allow unbinding of clockevent
devices. The device is unbound if it is unused or if there is a
replacement device available. Unbinding of broadcast devices is not
supported as we don't want to foster that nonsense. If no replacement
device is available the unbind returns -EBUSY. Unbind is available
from the kernel and through sysfs, which is necessary to drop the
module refcount.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130425143436.499216659@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h  |   1 +
 kernel/time/clockevents.c   | 125 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/clocksource.c   |   9 ++--
 kernel/time/tick-common.c   |  24 +++++++++
 kernel/time/tick-internal.h |   7 +++
 5 files changed, 162 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index ae1193bcf074..0857922e8ad0 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -141,6 +141,7 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
 extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
+extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu);
 
 extern void clockevents_config(struct clock_event_device *dev, u32 freq);
 extern void clockevents_config_and_register(struct clock_event_device *dev,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0a23f4f29934..38959c866789 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -25,6 +25,13 @@ static LIST_HEAD(clockevent_devices);
 static LIST_HEAD(clockevents_released);
 /* Protection for the above */
 static DEFINE_RAW_SPINLOCK(clockevents_lock);
+/* Protection for unbind operations */
+static DEFINE_MUTEX(clockevents_mutex);
+
+struct ce_unbind {
+	struct clock_event_device *ce;
+	int res;
+};
 
 /**
  * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -245,6 +252,90 @@ static void clockevents_notify_released(void)
 	}
 }
 
+/*
+ * Try to install a replacement clock event device
+ */
+static int clockevents_replace(struct clock_event_device *ced)
+{
+	struct clock_event_device *dev, *newdev = NULL;
+
+	list_for_each_entry(dev, &clockevent_devices, list) {
+		if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+			continue;
+
+		if (!tick_check_replacement(newdev, dev))
+			continue;
+
+		if (!try_module_get(dev->owner))
+			continue;
+
+		if (newdev)
+			module_put(newdev->owner);
+		newdev = dev;
+	}
+	if (newdev) {
+		tick_install_replacement(newdev);
+		list_del_init(&ced->list);
+	}
+	return newdev ? 0 : -EBUSY;
+}
+
+/*
+ * Called with clockevents_mutex and clockevents_lock held
+ */
+static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
+{
+	/* Fast track. Device is unused */
+	if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+		list_del_init(&ced->list);
+		return 0;
+	}
+
+	return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
+}
+
+/*
+ * SMP function call to unbind a device
+ */
+static void __clockevents_unbind(void *arg)
+{
+	struct ce_unbind *cu = arg;
+	int res;
+
+	raw_spin_lock(&clockevents_lock);
+	res = __clockevents_try_unbind(cu->ce, smp_processor_id());
+	if (res == -EAGAIN)
+		res = clockevents_replace(cu->ce);
+	cu->res = res;
+	raw_spin_unlock(&clockevents_lock);
+}
+
+/*
+ * Issues smp function call to unbind a per cpu device. Called with
+ * clockevents_mutex held.
+ */
+static int clockevents_unbind(struct clock_event_device *ced, int cpu)
+{
+	struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
+
+	smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
+	return cu.res;
+}
+
+/*
+ * Unbind a clockevents device.
+ */
+int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
+{
+	int ret;
+
+	mutex_lock(&clockevents_mutex);
+	ret = clockevents_unbind(ced, cpu);
+	mutex_unlock(&clockevents_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clockevents_unbind);
+
 /**
  * clockevents_register_device - register a clock event device
  * @dev:	device to register
@@ -487,6 +578,38 @@ static ssize_t sysfs_show_current_tick_dev(struct device *dev,
 }
 static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
 
+/* We don't support the abomination of removable broadcast devices */
+static ssize_t sysfs_unbind_tick_dev(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	char name[CS_NAME_LEN];
+	size_t ret = sysfs_get_uname(buf, name, count);
+	struct clock_event_device *ce;
+
+	if (ret < 0)
+		return ret;
+
+	ret = -ENODEV;
+	mutex_lock(&clockevents_mutex);
+	raw_spin_lock_irq(&clockevents_lock);
+	list_for_each_entry(ce, &clockevent_devices, list) {
+		if (!strcmp(ce->name, name)) {
+			ret = __clockevents_try_unbind(ce, dev->id);
+			break;
+		}
+	}
+	raw_spin_unlock_irq(&clockevents_lock);
+	/*
+	 * We hold clockevents_mutex, so ce can't go away
+	 */
+	if (ret == -EAGAIN)
+		ret = clockevents_unbind(ce, dev->id);
+	mutex_unlock(&clockevents_mutex);
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static struct device tick_bc_dev = {
 	.init_name	= "broadcast",
@@ -529,6 +652,8 @@ static int __init tick_init_sysfs(void)
 		err = device_register(dev);
 		if (!err)
 			err = device_create_file(dev, &dev_attr_current_device);
+		if (!err)
+			err = device_create_file(dev, &dev_attr_unbind_device);
 		if (err)
 			return err;
 	}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 31b90332f47b..6d05b00410cc 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,6 +31,8 @@
 #include <linux/tick.h>
 #include <linux/kthread.h>
 
+#include "tick-internal.h"
+
 void timecounter_init(struct timecounter *tc,
 		      const struct cyclecounter *cc,
 		      u64 start_tstamp)
@@ -174,7 +176,6 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
 static struct clocksource *curr_clocksource;
 static LIST_HEAD(clocksource_list);
 static DEFINE_MUTEX(clocksource_mutex);
-#define CS_NAME_LEN		32
 static char override_name[CS_NAME_LEN];
 static int finished_booting;
 
@@ -864,7 +865,7 @@ sysfs_show_current_clocksources(struct device *dev,
 	return count;
 }
 
-static size_t clocksource_get_uname(const char *buf, char *dst, size_t cnt)
+size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
 {
 	size_t ret = cnt;
 
@@ -899,7 +900,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
 
 	mutex_lock(&clocksource_mutex);
 
-	ret = clocksource_get_uname(buf, override_name, count);
+	ret = sysfs_get_uname(buf, override_name, count);
 	if (ret >= 0)
 		clocksource_select();
 
@@ -925,7 +926,7 @@ static ssize_t sysfs_unbind_clocksource(struct device *dev,
 	char name[CS_NAME_LEN];
 	size_t ret;
 
-	ret = clocksource_get_uname(buf, name, count);
+	ret = sysfs_get_uname(buf, name, count);
 	if (ret < 0)
 		return ret;
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index c34021650348..5edfb4806032 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -205,6 +205,17 @@ static void tick_setup_device(struct tick_device *td,
 		tick_setup_oneshot(newdev, handler, next_event);
 }
 
+void tick_install_replacement(struct clock_event_device *newdev)
+{
+	struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+	int cpu = smp_processor_id();
+
+	clockevents_exchange_device(td->evtdev, newdev);
+	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
+	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
+		tick_oneshot_notify();
+}
+
 static bool tick_check_percpu(struct clock_event_device *curdev,
 			      struct clock_event_device *newdev, int cpu)
 {
@@ -236,6 +247,19 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
 	return !curdev || newdev->rating > curdev->rating;
 }
 
+/*
+ * Check whether the new device is a better fit than curdev. curdev
+ * can be NULL !
+ */
+bool tick_check_replacement(struct clock_event_device *curdev,
+			    struct clock_event_device *newdev)
+{
+	if (tick_check_percpu(curdev, newdev, smp_processor_id()))
+		return false;
+
+	return tick_check_preferred(curdev, newdev);
+}
+
 /*
  * Check, if the new registered device should be used. Called with
  * clockevents_lock held and interrupts disabled.
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 06bfc8802dfb..be1690eaecff 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -11,6 +11,8 @@ extern seqlock_t jiffies_lock;
 #define TICK_DO_TIMER_NONE	-1
 #define TICK_DO_TIMER_BOOT	-2
 
+#define CS_NAME_LEN	32
+
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 extern ktime_t tick_next_period;
 extern ktime_t tick_period;
@@ -23,9 +25,14 @@ extern void tick_handover_do_timer(int *cpup);
 extern void tick_shutdown(unsigned int *cpup);
 extern void tick_suspend(void);
 extern void tick_resume(void);
+extern bool tick_check_replacement(struct clock_event_device *curdev,
+				   struct clock_event_device *newdev);
+extern void tick_install_replacement(struct clock_event_device *dev);
 
 extern void clockevents_shutdown(struct clock_event_device *dev);
 
+extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
+
 /*
  * NO_HZ / high resolution timer shared code
  */
-- 
cgit v1.2.3


From b0ce3508b25ea6fa10ae3ca254de1d695b521702 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 May 2013 07:34:53 +0000
Subject: bonding: allow TSO being set on bonding master
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some situations, we need to disable TSO on bonding slaves.

bonding device automatically unset TSO in bond_fix_features(), and
performance is not good because :

1) We consume more cpu cycles.

2) GSO segmentation has some bugs leading to out of order TCP packets
if this segmentation is done before virtual device. This particular
problem will be addressed in a separate patch.

This patch allows TSO being set/unset on the bonding master,
so that GSO segmentation is done after bonding layer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Michał Mirosław <mirqus@gmail.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  1 +
 include/linux/netdevice.h       | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d0aade04e49a..449ad9bbe45c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1362,6 +1362,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
 						     slave->dev->features,
 						     mask);
 	}
+	features = netdev_add_tso_features(features, mask);
 
 out:
 	read_unlock(&bond->lock);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a94a5a0ab122..60584b185a0c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2733,6 +2733,17 @@ static inline netdev_features_t netdev_get_wanted_features(
 }
 netdev_features_t netdev_increment_features(netdev_features_t all,
 	netdev_features_t one, netdev_features_t mask);
+
+/* Allow TSO being used on stacked device :
+ * Performing the GSO segmentation before last device
+ * is a performance improvement.
+ */
+static inline netdev_features_t netdev_add_tso_features(netdev_features_t features,
+							netdev_features_t mask)
+{
+	return netdev_increment_features(features, NETIF_F_ALL_TSO, mask);
+}
+
 int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
 void netdev_change_features(struct net_device *dev);
-- 
cgit v1.2.3


From 0b8ebdb18888c55588b932f4f564b9c9529de627 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Thu, 9 May 2013 13:08:08 +0200
Subject: mfd: ab8500-sysctrl: Always enable pm_power_off handler

AB8500 sysctrl driver implements a pm_power_off handler, but that is
currently not registered until a specific platform data field is
enabled.

This patch drops the platform data field and always registers
ab8500_power_off if no other pm_power_off handler was defined before,
and also introduces the necessary cleanup code in the driver's remove
function.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-sysctrl.c      | 6 +++++-
 include/linux/mfd/abx500/ab8500.h | 2 --
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index b851692b0755..0c20361eae26 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -193,7 +193,7 @@ static int ab8500_sysctrl_probe(struct platform_device *pdev)
 
 	sysctrl_dev = &pdev->dev;
 
-	if (plat->pm_power_off)
+	if (!pm_power_off)
 		pm_power_off = ab8500_power_off;
 
 	pdata = plat->sysctrl;
@@ -228,6 +228,10 @@ static int ab8500_sysctrl_probe(struct platform_device *pdev)
 static int ab8500_sysctrl_remove(struct platform_device *pdev)
 {
 	sysctrl_dev = NULL;
+
+	if (pm_power_off == ab8500_power_off)
+		pm_power_off = NULL;
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index fb1bf7d6a410..0390d5943ed6 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -373,13 +373,11 @@ struct ab8500_sysctrl_platform_data;
 /**
  * struct ab8500_platform_data - AB8500 platform data
  * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
- * @pm_power_off: Should machine pm power off hook be registered or not
  * @init: board-specific initialization after detection of ab8500
  * @regulator: machine-specific constraints for regulators
  */
 struct ab8500_platform_data {
 	int irq_base;
-	bool pm_power_off;
 	void (*init) (struct ab8500 *);
 	struct ab8500_regulator_platform_data *regulator;
 	struct abx500_gpio_platform_data *gpio;
-- 
cgit v1.2.3


From 0693196fe7bbb5e6cafd255dfce91ff6d10bc18f Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Sun, 5 May 2013 20:32:27 +0200
Subject: USB: serial: add wait_until_sent operation

Add wait_until_sent operation which can be used to wait for hardware
buffers to drain.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/usb-serial.c | 17 +++++++++++++++++
 include/linux/usb/serial.h      |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index cf75beb1251b..31d27686151f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -375,6 +375,22 @@ static int serial_chars_in_buffer(struct tty_struct *tty)
 	return count;
 }
 
+static void serial_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial *serial = port->serial;
+
+	dev_dbg(tty->dev, "%s\n", __func__);
+
+	if (!port->serial->type->wait_until_sent)
+		return;
+
+	mutex_lock(&serial->disc_mutex);
+	if (!serial->disconnected)
+		port->serial->type->wait_until_sent(tty, timeout);
+	mutex_unlock(&serial->disc_mutex);
+}
+
 static void serial_throttle(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
@@ -1191,6 +1207,7 @@ static const struct tty_operations serial_ops = {
 	.unthrottle =		serial_unthrottle,
 	.break_ctl =		serial_break,
 	.chars_in_buffer =	serial_chars_in_buffer,
+	.wait_until_sent =	serial_wait_until_sent,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
 	.get_icount =		serial_get_icount,
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index b9b0f7b4e43b..afbb7eeaac5f 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -268,6 +268,7 @@ struct usb_serial_driver {
 			struct usb_serial_port *port, struct ktermios *old);
 	void (*break_ctl)(struct tty_struct *tty, int break_state);
 	int  (*chars_in_buffer)(struct tty_struct *tty);
+	void (*wait_until_sent)(struct tty_struct *tty, long timeout);
 	void (*throttle)(struct tty_struct *tty);
 	void (*unthrottle)(struct tty_struct *tty);
 	int  (*tiocmget)(struct tty_struct *tty);
-- 
cgit v1.2.3


From dcf0105039660e951dfea348d317043d17988dfc Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 8 May 2013 17:51:43 +0200
Subject: USB: serial: add generic wait_until_sent implementation

Add generic wait_until_sent implementation which polls for empty
hardware buffers using the new port-operation tx_empty.

The generic implementation will be used for all sub-drivers that
implement tx_empty but does not define wait_until_sent.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/generic.c    | 31 +++++++++++++++++++++++++++++++
 drivers/usb/serial/usb-serial.c |  2 ++
 include/linux/usb/serial.h      |  3 +++
 3 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 297665fdd16d..ba45170c78e5 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -253,6 +253,37 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_chars_in_buffer);
 
+void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	unsigned int bps;
+	unsigned long period;
+	unsigned long expire;
+
+	bps = tty_get_baud_rate(tty);
+	if (!bps)
+		bps = 9600;	/* B0 */
+	/*
+	 * Use a poll-period of roughly the time it takes to send one
+	 * character or at least one jiffy.
+	 */
+	period = max_t(unsigned long, (10 * HZ / bps), 1);
+	period = min_t(unsigned long, period, timeout);
+
+	dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n",
+					__func__, jiffies_to_msecs(timeout),
+					jiffies_to_msecs(period));
+	expire = jiffies + timeout;
+	while (!port->serial->type->tx_empty(port)) {
+		schedule_timeout_interruptible(period);
+		if (signal_pending(current))
+			break;
+		if (time_after(jiffies, expire))
+			break;
+	}
+}
+EXPORT_SYMBOL_GPL(usb_serial_generic_wait_until_sent);
+
 static int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 						int index, gfp_t mem_flags)
 {
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 31d27686151f..60caf9cb99f4 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1333,6 +1333,8 @@ static void usb_serial_operations_init(struct usb_serial_driver *device)
 	set_to_generic_if_null(device, close);
 	set_to_generic_if_null(device, write_room);
 	set_to_generic_if_null(device, chars_in_buffer);
+	if (device->tx_empty)
+		set_to_generic_if_null(device, wait_until_sent);
 	set_to_generic_if_null(device, read_bulk_callback);
 	set_to_generic_if_null(device, write_bulk_callback);
 	set_to_generic_if_null(device, process_read_urb);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index afbb7eeaac5f..302ddf55d2da 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -269,6 +269,7 @@ struct usb_serial_driver {
 	void (*break_ctl)(struct tty_struct *tty, int break_state);
 	int  (*chars_in_buffer)(struct tty_struct *tty);
 	void (*wait_until_sent)(struct tty_struct *tty, long timeout);
+	bool (*tx_empty)(struct usb_serial_port *port);
 	void (*throttle)(struct tty_struct *tty);
 	void (*unthrottle)(struct tty_struct *tty);
 	int  (*tiocmget)(struct tty_struct *tty);
@@ -328,6 +329,8 @@ extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
+extern void usb_serial_generic_wait_until_sent(struct tty_struct *tty,
+								long timeout);
 extern void usb_serial_generic_read_bulk_callback(struct urb *urb);
 extern void usb_serial_generic_write_bulk_callback(struct urb *urb);
 extern void usb_serial_generic_throttle(struct tty_struct *tty);
-- 
cgit v1.2.3


From d4988d4c733ba0b61cb372edd3d1992d26dd10d3 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 9 May 2013 21:24:24 +0200
Subject: bcma: add more core IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCIe and ARM CR4 cores were found on 14e4:43b1 AKA BCM4352.

Reported-by: Gabriel Thörnblad <gabriel@thornblad.com>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/scan.c       | 2 ++
 include/linux/bcma/bcma.h | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index bca9c80056fe..8bffa5c9818c 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -84,6 +84,8 @@ static const struct bcma_device_id_name bcma_bcm_device_names[] = {
 	{ BCMA_CORE_I2S, "I2S" },
 	{ BCMA_CORE_SDR_DDR1_MEM_CTL, "SDR/DDR1 Memory Controller" },
 	{ BCMA_CORE_SHIM, "SHIM" },
+	{ BCMA_CORE_PCIE2, "PCIe Gen2" },
+	{ BCMA_CORE_ARM_CR4, "ARM CR4" },
 	{ BCMA_CORE_DEFAULT, "Default" },
 };
 
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index f14a98a79c9d..2e34db82a643 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -134,7 +134,10 @@ struct bcma_host_ops {
 #define BCMA_CORE_I2S			0x834
 #define BCMA_CORE_SDR_DDR1_MEM_CTL	0x835	/* SDR/DDR1 memory controller core */
 #define BCMA_CORE_SHIM			0x837	/* SHIM component in ubus/6362 */
-#define BCMA_CORE_ARM_CR4		0x83e
+#define BCMA_CORE_PHY_AC		0x83B
+#define BCMA_CORE_PCIE2			0x83C	/* PCI Express Gen2 */
+#define BCMA_CORE_USB30_DEV		0x83D
+#define BCMA_CORE_ARM_CR4		0x83E
 #define BCMA_CORE_DEFAULT		0xFFF
 
 #define BCMA_MAX_NR_CORES		16
-- 
cgit v1.2.3


From 3f327e39b4b8f760c331bb2836735be6d83fbf53 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 7 May 2013 11:06:03 -0600
Subject: PCI: acpiphp: Re-enumerate devices when host bridge receives Bus
 Check

When a PCI host bridge device receives a Bus Check notification, we
must re-enumerate starting with the bridge to discover changes (devices
that have been added or removed).

Prior to 668192b678 ("PCI: acpiphp: Move host bridge hotplug to
pci_root.c"), this happened in _handle_hotplug_event_bridge().  After that
commit, _handle_hotplug_event_bridge() is not installed for host bridges,
and the host bridge notify handler, _handle_hotplug_event_root() did not
re-enumerate.

This patch adds re-enumeration to _handle_hotplug_event_root().

This fixes cases where we don't notice the addition or removal of
PCI devices, e.g., the PCI-to-USB ExpressCard in the bugzilla below.

[bhelgaas: changelog, references]
Reference: https://lkml.kernel.org/r/CAAh6nkmbKR3HTqm5ommevsBwhL_u0N8Rk7Wsms_LfP=nBgKNew@mail.gmail.com
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=57961
Reported-by: Gavin Guo <tuffkidtt@gmail.com>
Tested-by: Gavin Guo <tuffkidtt@gmail.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jiang Liu <jiang.liu@huawei.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
CC: stable@vger.kernel.org	# v3.9+
---
 drivers/acpi/pci_root.c            |  4 +++-
 drivers/pci/hotplug/acpiphp_glue.c | 14 ++++++++++++++
 include/linux/pci-acpi.h           |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 1dd6f6c85874..e427dc516c76 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -641,7 +641,9 @@ static void _handle_hotplug_event_root(struct work_struct *work)
 		/* bus enumerate */
 		printk(KERN_DEBUG "%s: Bus check notify on %s\n", __func__,
 				 (char *)buffer.pointer);
-		if (!root)
+		if (root)
+			acpiphp_check_host_bridge(handle);
+		else
 			handle_root_bridge_insertion(handle);
 
 		break;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 96fed19c6d90..716aa93fff76 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -950,6 +950,20 @@ check_sub_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
 	return AE_OK ;
 }
 
+void acpiphp_check_host_bridge(acpi_handle handle)
+{
+	struct acpiphp_bridge *bridge;
+
+	bridge = acpiphp_handle_to_bridge(handle);
+	if (bridge) {
+		acpiphp_check_bridge(bridge);
+		put_bridge(bridge);
+	}
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
+		ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL, NULL);
+}
+
 static void _handle_hotplug_event_bridge(struct work_struct *work)
 {
 	struct acpiphp_bridge *bridge;
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 81b31613eb25..170447977278 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -60,11 +60,13 @@ static inline void acpi_pci_slot_remove(struct pci_bus *bus) { }
 void acpiphp_init(void);
 void acpiphp_enumerate_slots(struct pci_bus *bus, acpi_handle handle);
 void acpiphp_remove_slots(struct pci_bus *bus);
+void acpiphp_check_host_bridge(acpi_handle handle);
 #else
 static inline void acpiphp_init(void) { }
 static inline void acpiphp_enumerate_slots(struct pci_bus *bus,
 					   acpi_handle handle) { }
 static inline void acpiphp_remove_slots(struct pci_bus *bus) { }
+static inline void acpiphp_check_host_bridge(acpi_handle handle) { }
 #endif
 
 #else	/* CONFIG_ACPI */
-- 
cgit v1.2.3


From 7f18d05a1af75142d4856a91ffd2f1d8eb553c12 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Wed, 15 May 2013 20:18:41 +0530
Subject: SERIAL: OMAP: Remove the slave idle handling from the driver

UART IP slave idle handling now taken care by runtime pm backend(hwmod layer)
so remove the hackery from the driver.

As discussed on the list, in future if dma mode needs to be brought
back to this driver, UART sysc handling needs to be updated in
framework such a way that no-idle/force idle profile can be supported.
Given the broken dma mode for OMAP uarts, its very unlikely.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Vaibhav Bedia <vaibhav.bedia@ti.com>
Tested-by: Sourav Poddar <sourav.poddar@ti.com>
Signed-off-by: Rajendra nayak <rnayak@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>  # OMAP4/Panda
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 drivers/tty/serial/omap-serial.c          | 23 -----------------------
 include/linux/platform_data/serial-omap.h |  2 --
 2 files changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 30d4f7a783cd..f0b9f6b52b32 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -202,26 +202,6 @@ static int serial_omap_get_context_loss_count(struct uart_omap_port *up)
 	return pdata->get_context_loss_count(up->dev);
 }
 
-static void serial_omap_set_forceidle(struct uart_omap_port *up)
-{
-	struct omap_uart_port_info *pdata = up->dev->platform_data;
-
-	if (!pdata || !pdata->set_forceidle)
-		return;
-
-	pdata->set_forceidle(up->dev);
-}
-
-static void serial_omap_set_noidle(struct uart_omap_port *up)
-{
-	struct omap_uart_port_info *pdata = up->dev->platform_data;
-
-	if (!pdata || !pdata->set_noidle)
-		return;
-
-	pdata->set_noidle(up->dev);
-}
-
 static void serial_omap_enable_wakeup(struct uart_omap_port *up, bool enable)
 {
 	struct omap_uart_port_info *pdata = up->dev->platform_data;
@@ -298,8 +278,6 @@ static void serial_omap_stop_tx(struct uart_port *port)
 		serial_out(up, UART_IER, up->ier);
 	}
 
-	serial_omap_set_forceidle(up);
-
 	pm_runtime_mark_last_busy(up->dev);
 	pm_runtime_put_autosuspend(up->dev);
 }
@@ -364,7 +342,6 @@ static void serial_omap_start_tx(struct uart_port *port)
 
 	pm_runtime_get_sync(up->dev);
 	serial_omap_enable_ier_thri(up);
-	serial_omap_set_noidle(up);
 	pm_runtime_mark_last_busy(up->dev);
 	pm_runtime_put_autosuspend(up->dev);
 }
diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
index ff9b0aab5281..c860c1b314c0 100644
--- a/include/linux/platform_data/serial-omap.h
+++ b/include/linux/platform_data/serial-omap.h
@@ -43,8 +43,6 @@ struct omap_uart_port_info {
 	int			DTR_present;
 
 	int (*get_context_loss_count)(struct device *);
-	void (*set_forceidle)(struct device *);
-	void (*set_noidle)(struct device *);
 	void (*enable_wakeup)(struct device *, bool);
 };
 
-- 
cgit v1.2.3


From d2f83e9078b8114e3b9d09082856c1aac299aa37 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 17 May 2013 09:05:21 +0930
Subject: Hoist memcpy_fromiovec/memcpy_toiovec into lib/

ERROR: "memcpy_fromiovec" [drivers/vhost/vhost_scsi.ko] undefined!

That function is only present with CONFIG_NET.  Turns out that
crypto/algif_skcipher.c also uses that outside net, but it actually
needs sockets anyway.

In addition, commit 6d4f0139d642c45411a47879325891ce2a7c164a added
CONFIG_NET dependency to CONFIG_VMCI for memcpy_toiovec, so hoist
that function and revert that commit too.

socket.h already includes uio.h, so no callers need updating; trying
only broke things fo x86_64 randconfig (thanks Fengguang!).

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/misc/vmw_vmci/Kconfig           |  2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c |  2 +-
 include/linux/socket.h                  |  2 --
 include/linux/uio.h                     |  3 ++
 lib/Makefile                            |  2 +-
 lib/iovec.c                             | 53 +++++++++++++++++++++++++++++++++
 net/core/iovec.c                        | 50 -------------------------------
 7 files changed, 59 insertions(+), 55 deletions(-)
 create mode 100644 lib/iovec.c

(limited to 'include/linux')

diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
index ea98f7e9ccd1..39c2ecadb273 100644
--- a/drivers/misc/vmw_vmci/Kconfig
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -4,7 +4,7 @@
 
 config VMWARE_VMCI
 	tristate "VMware VMCI Driver"
-	depends on X86 && PCI && NET
+	depends on X86 && PCI
 	help
 	  This is VMware's Virtual Machine Communication Interface.  It enables
 	  high-speed communication between host and guest in a virtual
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index d94245dbd765..8ff2e5ee8fb8 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -23,7 +23,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/socket.h>
+#include <linux/uio.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
 
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 428c37a1f95c..33bf2dfab19d 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -305,7 +305,6 @@ struct ucred {
 
 extern void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred);
 
-extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			       int offset, int len);
 extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
@@ -314,7 +313,6 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  unsigned int len, __wsum *csump);
 
 extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode);
-extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 629aaf51f30b..c55ce243cc09 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -35,4 +35,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
 }
 
 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
+
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index e9c52e1b853a..237721165035 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@ endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
-	 idr.o int_sqrt.o extable.o \
+	 idr.o int_sqrt.o extable.o iovec.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/iovec.c b/lib/iovec.c
new file mode 100644
index 000000000000..454baa88bf27
--- /dev/null
+++ b/lib/iovec.c
@@ -0,0 +1,53 @@
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/uio.h>
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				return -EFAULT;
+			len -= copy;
+			kdata += copy;
+			iov->iov_base += copy;
+			iov->iov_len -= copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovec);
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				return -EFAULT;
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovec);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 7e7aeb01de45..de178e462682 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -73,31 +73,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 	return err;
 }
 
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
- *
- *	Note: this modifies the original iovec.
- */
-
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
-{
-	while (len > 0) {
-		if (iov->iov_len) {
-			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
-				return -EFAULT;
-			kdata += copy;
-			len -= copy;
-			iov->iov_len -= copy;
-			iov->iov_base += copy;
-		}
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovec);
-
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
  */
@@ -124,31 +99,6 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
 }
 EXPORT_SYMBOL(memcpy_toiovecend);
 
-/*
- *	Copy iovec to kernel. Returns -EFAULT on error.
- *
- *	Note: this modifies the original iovec.
- */
-
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
-{
-	while (len > 0) {
-		if (iov->iov_len) {
-			int copy = min_t(unsigned int, len, iov->iov_len);
-			if (copy_from_user(kdata, iov->iov_base, copy))
-				return -EFAULT;
-			len -= copy;
-			kdata += copy;
-			iov->iov_base += copy;
-			iov->iov_len -= copy;
-		}
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovec);
-
 /*
  *	Copy iovec from kernel. Returns -EFAULT on error.
  */
-- 
cgit v1.2.3


From 421b40a6286ee343d77d5e51f5ee6d04d7a2a90f Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Fri, 17 May 2013 12:41:03 -0400
Subject: tty/vt: Fix vc_deallocate() lock order

Now that the tty port owns the flip buffers and i/o is allowed
from the driver even when no tty is attached, the destruction
of the tty port (and the flip buffers) must ensure that no
outstanding work is pending.

Unfortunately, this creates a lock order problem with the
console_lock (see attached lockdep report [1] below).

For single console deallocation, drop the console_lock prior
to port destruction. When multiple console deallocation,
defer port destruction until the consoles have been
deallocated.

tty_port_destroy() is not required if the port has not
been used; remove from vc_allocate() failure path.

[1] lockdep report from Dave Jones <davej@redhat.com>

 ======================================================
 [ INFO: possible circular locking dependency detected ]
 3.9.0+ #16 Not tainted
 -------------------------------------------------------
 (agetty)/26163 is trying to acquire lock:
 blocked:  ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0

 but task is already holding lock:
 blocked:  (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (console_lock){+.+.+.}:
        [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
        [<ffffffff810416c7>] console_lock+0x77/0x80
        [<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
        [<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
        [<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
        [<ffffffff81064381>] process_one_work+0x211/0x700
        [<ffffffff8106498b>] worker_thread+0x11b/0x3a0
        [<ffffffff8106ce5d>] kthread+0xed/0x100
        [<ffffffff81601cac>] ret_from_fork+0x7c/0xb0

 -> #0 ((&buf->work)){+.+...}:
        [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
        [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
        [<ffffffff810620ae>] flush_work+0x4e/0x2e0
        [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
        [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
        [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
        [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
        [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
        [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
        [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
        [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
        [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b

 other info that might help us debug this:

 [ 6760.076175]  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(console_lock);
                                lock((&buf->work));
                                lock(console_lock);
   lock((&buf->work));

  *** DEADLOCK ***

 1 lock on stack by (agetty)/26163:
  #0: blocked:  (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
 stack backtrace:
 Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
 Call Trace:
  [<ffffffff815edb14>] print_circular_bug+0x200/0x20e
  [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
  [<ffffffff8100a269>] ? sched_clock+0x9/0x10
  [<ffffffff8100a269>] ? sched_clock+0x9/0x10
  [<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
  [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
  [<ffffffff81062065>] ? flush_work+0x5/0x2e0
  [<ffffffff810620ae>] flush_work+0x4e/0x2e0
  [<ffffffff81062065>] ? flush_work+0x5/0x2e0
  [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
  [<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
  [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
  [<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
  [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
  [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
  [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
  [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
  [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
  [<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
  [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
  [<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
  [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
  [<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
  [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
  [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b

Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c       | 14 +++++-----
 drivers/tty/vt/vt_ioctl.c | 67 ++++++++++++++++++++++++++++++++++-------------
 include/linux/vt_kern.h   |  2 +-
 3 files changed, 56 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index fbd447b390f7..740202d8a5c4 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -779,7 +779,6 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 		con_set_default_unimap(vc);
 	    vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
 	    if (!vc->vc_screenbuf) {
-		tty_port_destroy(&vc->port);
 		kfree(vc);
 		vc_cons[currcons].d = NULL;
 		return -ENOMEM;
@@ -986,26 +985,25 @@ static int vt_resize(struct tty_struct *tty, struct winsize *ws)
 	return ret;
 }
 
-void vc_deallocate(unsigned int currcons)
+struct vc_data *vc_deallocate(unsigned int currcons)
 {
+	struct vc_data *vc = NULL;
+
 	WARN_CONSOLE_UNLOCKED();
 
 	if (vc_cons_allocated(currcons)) {
-		struct vc_data *vc = vc_cons[currcons].d;
-		struct vt_notifier_param param = { .vc = vc };
+		struct vt_notifier_param param;
 
+		param.vc = vc = vc_cons[currcons].d;
 		atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
 		vcs_remove_sysfs(currcons);
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
 		kfree(vc->vc_screenbuf);
-		if (currcons >= MIN_NR_CONSOLES) {
-			tty_port_destroy(&vc->port);
-			kfree(vc);
-		}
 		vc_cons[currcons].d = NULL;
 	}
+	return vc;
 }
 
 /*
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 98ff1735eafc..fc2c06c66e89 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -283,6 +283,51 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_
 	return 0;
 }
 
+/* deallocate a single console, if possible (leave 0) */
+static int vt_disallocate(unsigned int vc_num)
+{
+	struct vc_data *vc = NULL;
+	int ret = 0;
+
+	if (!vc_num)
+		return 0;
+
+	console_lock();
+	if (VT_BUSY(vc_num))
+		ret = -EBUSY;
+	else
+		vc = vc_deallocate(vc_num);
+	console_unlock();
+
+	if (vc && vc_num >= MIN_NR_CONSOLES) {
+		tty_port_destroy(&vc->port);
+		kfree(vc);
+	}
+
+	return ret;
+}
+
+/* deallocate all unused consoles, but leave 0 */
+static void vt_disallocate_all(void)
+{
+	struct vc_data *vc[MAX_NR_CONSOLES];
+	int i;
+
+	console_lock();
+	for (i = 1; i < MAX_NR_CONSOLES; i++)
+		if (!VT_BUSY(i))
+			vc[i] = vc_deallocate(i);
+		else
+			vc[i] = NULL;
+	console_unlock();
+
+	for (i = 1; i < MAX_NR_CONSOLES; i++) {
+		if (vc[i] && i >= MIN_NR_CONSOLES) {
+			tty_port_destroy(&vc[i]->port);
+			kfree(vc[i]);
+		}
+	}
+}
 
 
 /*
@@ -769,24 +814,10 @@ int vt_ioctl(struct tty_struct *tty,
 			ret = -ENXIO;
 			break;
 		}
-		if (arg == 0) {
-		    /* deallocate all unused consoles, but leave 0 */
-			console_lock();
-			for (i=1; i<MAX_NR_CONSOLES; i++)
-				if (! VT_BUSY(i))
-					vc_deallocate(i);
-			console_unlock();
-		} else {
-			/* deallocate a single console, if possible */
-			arg--;
-			if (VT_BUSY(arg))
-				ret = -EBUSY;
-			else if (arg) {			      /* leave 0 */
-				console_lock();
-				vc_deallocate(arg);
-				console_unlock();
-			}
-		}
+		if (arg == 0)
+			vt_disallocate_all();
+		else
+			ret = vt_disallocate(--arg);
 		break;
 
 	case VT_RESIZE:
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index e8d65718560b..0d33fca48774 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -36,7 +36,7 @@ extern int fg_console, last_console, want_console;
 int vc_allocate(unsigned int console);
 int vc_cons_allocated(unsigned int console);
 int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines);
-void vc_deallocate(unsigned int console);
+struct vc_data *vc_deallocate(unsigned int console);
 void reset_palette(struct vc_data *vc);
 void do_blank_screen(int entering_gfx);
 void do_unblank_screen(int leaving_gfx);
-- 
cgit v1.2.3


From 154c2670087bd7f54688274aca627433e4a7c181 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 21 May 2013 10:51:10 +0200
Subject: Add include dependencies to <linux/printk.h>.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If <linux/linkage.h> has not been included before <linux/printk.h>,
a build error like the below one will result:

  CC      arch/mips/kernel/idle.o
In file included from arch/mips/kernel/idle.c:17:0:
include/linux/printk.h:109:1: error: data definition has no type or storage class [-Werror]
include/linux/printk.h:109:1: error: type defaults to ‘int’ in declaration of ‘asmlinkage’ [-Werror=implicit-int]
include/linux/printk.h:110:1: error: ‘format’ attribute only applies to function types [-Werror=attributes]
include/linux/printk.h:110:1: error: expected ‘,’ or ‘;’ before ‘int’
include/linux/printk.h:114:1: error: data definition has no type or storage class [-Werror]
include/linux/printk.h:114:1: error: type defaults to ‘int’ in declaration of ‘asmlinkage’ [-Werror=implicit-int]
include/linux/printk.h:115:1: error: ‘format’ attribute only applies to function types [-Werror=attributes]
include/linux/printk.h:115:1: error: expected ‘,’ or ‘;’ before ‘int’
include/linux/printk.h:117:1: error: data definition has no type or storage class [-Werror]
include/linux/printk.h:117:1: error: type defaults to ‘int’ in declaration of ‘asmlinkage’ [-Werror=implicit-int]
include/linux/printk.h:118:1: error: ‘format’ attribute only applies to function types [-Werror=attributes]
include/linux/printk.h:118:1: error: ‘__cold__’ attribute ignored [-Werror=attributes]
include/linux/printk.h:118:1: error: expected ‘,’ or ‘;’ before ‘asmlinkage’
include/linux/printk.h:122:1: error: data definition has no type or storage class [-Werror]
include/linux/printk.h:122:1: error: type defaults to ‘int’ in declaration of ‘asmlinkage’ [-Werror=implicit-int]
include/linux/printk.h:123:1: error: ‘format’ attribute only applies to function types [-Werror=attributes]
include/linux/printk.h:123:1: error: ‘__cold__’ attribute ignored [-Werror=attributes]
include/linux/printk.h:123:1: error: expected ‘,’ or ‘;’ before ‘int’
In file included from include/linux/kernel.h:14:0,
                 from include/linux/sched.h:15,
                 from arch/mips/kernel/idle.c:18:
include/linux/dynamic_debug.h: In function ‘ddebug_dyndbg_module_param_cb’:
include/linux/dynamic_debug.h:124:3: error: implicit declaration of function ‘printk’ [-Werror=implicit-function-declaration]

Fixed by including <linux/linkage.h>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/printk.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6af944ab38f0..22c7052e9372 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -4,6 +4,7 @@
 #include <stdarg.h>
 #include <linux/init.h>
 #include <linux/kern_levels.h>
+#include <linux/linkage.h>
 
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
-- 
cgit v1.2.3


From 2a7851bffb008ff4882eee673da74718997b4265 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 May 2013 03:56:10 +0000
Subject: netfilter: add nf_ipv6_ops hook to fix xt_addrtype with IPv6

Quoting https://bugzilla.netfilter.org/show_bug.cgi?id=812:

[ ip6tables -m addrtype ]
When I tried to use in the nat/PREROUTING it messes up the
routing cache even if the rule didn't matched at all.
[..]
If I remove the --limit-iface-in from the non-working scenario, so just
use the -m addrtype --dst-type LOCAL it works!

This happens when LOCAL type matching is requested with --limit-iface-in,
and the default ipv6 route is via the interface the packet we test
arrived on.

Because xt_addrtype uses ip6_route_output, the ipv6 routing implementation
creates an unwanted cached entry, and the packet won't make it to the
real/expected destination.

Silently ignoring --limit-iface-in makes the routing work but it breaks
rule matching (--dst-type LOCAL with limit-iface-in is supposed to only
match if the dst address is configured on the incoming interface;
without --limit-iface-in it will match if the address is reachable
via lo).

The test should call ipv6_chk_addr() instead.  However, this would add
a link-time dependency on ipv6.

There are two possible solutions:

1) Revert the commit that moved ipt_addrtype to xt_addrtype,
   and put ipv6 specific code into ip6t_addrtype.
2) add new "nf_ipv6_ops" struct to register pointers to ipv6 functions.

While the former might seem preferable, Pablo pointed out that there
are more xt modules with link-time dependeny issues regarding ipv6,
so lets go for 2).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 16 ++++++++++++++++
 include/net/addrconf.h         |  2 +-
 net/ipv6/addrconf.c            |  2 +-
 net/ipv6/netfilter.c           |  7 +++++++
 net/netfilter/core.c           |  2 ++
 net/netfilter/xt_addrtype.c    | 27 ++++++++++++++++-----------
 6 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 98ffb54988b6..2d4df6ce043e 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -17,6 +17,22 @@ extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 
 extern int ipv6_netfilter_init(void);
 extern void ipv6_netfilter_fini(void);
+
+/*
+ * Hook functions for ipv6 to allow xt_* modules to be built-in even
+ * if IPv6 is a module.
+ */
+struct nf_ipv6_ops {
+	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
+			const struct net_device *dev, int strict);
+};
+
+extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
+static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void)
+{
+	return rcu_dereference(nf_ipv6_ops);
+}
+
 #else /* CONFIG_NETFILTER */
 static inline int ipv6_netfilter_init(void) { return 0; }
 static inline void ipv6_netfilter_fini(void) { return; }
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 84a6440f1f19..21f702704f24 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -65,7 +65,7 @@ extern int			addrconf_set_dstaddr(struct net *net,
 
 extern int			ipv6_chk_addr(struct net *net,
 					      const struct in6_addr *addr,
-					      struct net_device *dev,
+					      const struct net_device *dev,
 					      int strict);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d1ab6ab29a55..d1b2d8034b54 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1487,7 +1487,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 }
 
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
-		  struct net_device *dev, int strict)
+		  const struct net_device *dev, int strict)
 {
 	struct inet6_ifaddr *ifp;
 	unsigned int hash = inet6_addr_hash(addr);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 72836f40b730..95f3f1da0d7f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -10,6 +10,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/export.h>
+#include <net/addrconf.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -186,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 };
 
+static const struct nf_ipv6_ops ipv6ops = {
+	.chk_addr	= ipv6_chk_addr,
+};
+
 static const struct nf_afinfo nf_ip6_afinfo = {
 	.family			= AF_INET6,
 	.checksum		= nf_ip6_checksum,
@@ -198,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = {
 
 int __init ipv6_netfilter_init(void)
 {
+	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
 	return nf_register_afinfo(&nf_ip6_afinfo);
 }
 
@@ -206,5 +212,6 @@ int __init ipv6_netfilter_init(void)
  */
 void ipv6_netfilter_fini(void)
 {
+	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
 	nf_unregister_afinfo(&nf_ip6_afinfo);
 }
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 07c865a31a3d..857ca9f35177 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -30,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex);
 
 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
 EXPORT_SYMBOL(nf_afinfo);
+const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ipv6_ops);
 
 int nf_register_afinfo(const struct nf_afinfo *afinfo)
 {
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 49c5ff7f6dd6..68ff29f60867 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -22,6 +22,7 @@
 #include <net/ip6_fib.h>
 #endif
 
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/xt_addrtype.h>
 #include <linux/netfilter/x_tables.h>
 
@@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype");
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
-			    const struct in6_addr *addr)
+			    const struct in6_addr *addr, u16 mask)
 {
 	const struct nf_afinfo *afinfo;
 	struct flowi6 flow;
 	struct rt6_info *rt;
-	u32 ret;
+	u32 ret = 0;
 	int route_err;
 
 	memset(&flow, 0, sizeof(flow));
@@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	rcu_read_lock();
 
 	afinfo = nf_get_afinfo(NFPROTO_IPV6);
-	if (afinfo != NULL)
+	if (afinfo != NULL) {
+		const struct nf_ipv6_ops *v6ops;
+
+		if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+			v6ops = nf_get_ipv6_ops();
+			if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+				ret = XT_ADDRTYPE_LOCAL;
+		}
 		route_err = afinfo->route(net, (struct dst_entry **)&rt,
-					flowi6_to_flowi(&flow), !!dev);
-	else
+					  flowi6_to_flowi(&flow), false);
+	} else {
 		route_err = 1;
-
+	}
 	rcu_read_unlock();
 
 	if (route_err)
@@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 
 	if (rt->rt6i_flags & RTF_REJECT)
 		ret = XT_ADDRTYPE_UNREACHABLE;
-	else
-		ret = 0;
 
-	if (rt->rt6i_flags & RTF_LOCAL)
+	if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
 		ret |= XT_ADDRTYPE_LOCAL;
 	if (rt->rt6i_flags & RTF_ANYCAST)
 		ret |= XT_ADDRTYPE_ANYCAST;
 
-
 	dst_release(&rt->dst);
 	return ret;
 }
@@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev,
 
 	if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
 	     XT_ADDRTYPE_UNREACHABLE) & mask)
-		return !!(mask & match_lookup_rt6(net, dev, addr));
+		return !!(mask & match_lookup_rt6(net, dev, addr, mask));
 	return true;
 }
 
-- 
cgit v1.2.3


From 7805d000db30a3787a4c969bab6ae4d8a5fd8ce6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 24 May 2013 10:50:24 +0900
Subject: cgroup: fix a subtle bug in descendant pre-order walk

When cgroup_next_descendant_pre() initiates a walk, it checks whether
the subtree root doesn't have any children and if not returns NULL.
Later code assumes that the subtree isn't empty.  This is broken
because the subtree may become empty inbetween, which can lead to the
traversal escaping the subtree by walking to the sibling of the
subtree root.

There's no reason to have the early exit path.  Remove it along with
the later assumption that the subtree isn't empty.  This simplifies
the code a bit and fixes the subtle bug.

While at it, fix the comment of cgroup_for_each_descendant_pre() which
was incorrectly referring to ->css_offline() instead of
->css_online().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: stable@vger.kernel.org
---
 include/linux/cgroup.h | 2 +-
 kernel/cgroup.c        | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5047355b9a0f..8bda1294c035 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -707,7 +707,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
  *
  * If a subsystem synchronizes against the parent in its ->css_online() and
  * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant cgroup which finished ->css_offline() is
+ * iteration, any descendant cgroup which finished ->css_online() is
  * guaranteed to be visible in the future iterations.
  *
  * In other words, the following guarantees that a descendant can't escape
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 38b136553044..31e9ef319070 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2954,11 +2954,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* if first iteration, pretend we just visited @cgroup */
-	if (!pos) {
-		if (list_empty(&cgroup->children))
-			return NULL;
+	if (!pos)
 		pos = cgroup;
-	}
 
 	/* visit the first child if exists */
 	next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
@@ -2966,14 +2963,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 		return next;
 
 	/* no child, visit my or the closest ancestor's next sibling */
-	do {
+	while (pos != cgroup) {
 		next = list_entry_rcu(pos->sibling.next, struct cgroup,
 				      sibling);
 		if (&next->sibling != &pos->parent->children)
 			return next;
 
 		pos = pos->parent;
-	} while (pos != cgroup);
+	}
 
 	return NULL;
 }
-- 
cgit v1.2.3


From a11650e11093ed57dca78bf16e7836517c599098 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Fri, 24 May 2013 15:55:05 -0700
Subject: rapidio: make enumeration/discovery configurable

Systems that use RapidIO fabric may need to implement their own
enumeration and discovery methods which are better suitable for needs of
a target application.

The following set of patches is intended to simplify process of
introduction of new RapidIO fabric enumeration/discovery methods.

The first patch offers ability to add new RapidIO enumeration/discovery
methods using kernel configuration options.  This new configuration
option mechanism allows to select statically linked or modular
enumeration/discovery method(s) from the list of existing methods or use
external module(s).

This patch also updates the currently existing enumeration/discovery
code to be used as a statically linked or modular method.

The corresponding configuration option is named "Basic
enumeration/discovery" method.  This is the only one configuration
option available today but new methods are expected to be introduced
after adoption of provided patches.

The second patch address a long time complaint of RapidIO subsystem
users regarding fabric enumeration/discovery start sequence.  Existing
implementation offers only a boot-time enumeration/discovery start which
requires synchronized boot of all endpoints in RapidIO network.  While
it works for small closed configurations with limited number of
endpoints, using this approach in systems with large number of endpoints
is quite challenging.

To eliminate requirement for synchronized start the second patch
introduces RapidIO enumeration/discovery start from user space.

For compatibility with the existing RapidIO subsystem implementation,
automatic boot time enumeration/discovery start can be configured in by
specifying "rio-scan.scan=1" command line parameter if statically linked
basic enumeration method is selected.

This patch:

Rework to implement RapidIO enumeration/discovery method selection
combined with ability to use enumeration/discovery as a kernel module.

This patch adds ability to introduce new RapidIO enumeration/discovery
methods using kernel configuration options.  Configuration option
mechanism allows to select statically linked or modular
enumeration/discovery method from the list of existing methods or use
external modules.  If a modular enumeration/discovery is selected each
RapidIO mport device can have its own method attached to it.

The existing enumeration/discovery code was updated to be used as
statically linked or modular method.  This configuration option is named
"Basic enumeration/discovery" method.

Several common routines have been moved from rio-scan.c to make them
available to other enumeration methods and reduce number of exported
symbols.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@Prodrive.nl>
Cc: Micha Nelissen <micha.nelissen@Prodrive.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/Kconfig      |  20 ++++
 drivers/rapidio/Makefile     |   3 +-
 drivers/rapidio/rio-driver.c |   7 ++
 drivers/rapidio/rio-scan.c   | 166 ++++++++------------------------
 drivers/rapidio/rio.c        | 222 +++++++++++++++++++++++++++++++++++++++++--
 drivers/rapidio/rio.h        |  11 ++-
 include/linux/rio.h          |  13 ++-
 include/linux/rio_drv.h      |   1 +
 8 files changed, 304 insertions(+), 139 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index 6194d35ebb97..5ab056494bbe 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -47,4 +47,24 @@ config RAPIDIO_DEBUG
 
 	  If you are unsure about this, say N here.
 
+choice
+	prompt "Enumeration method"
+	depends on RAPIDIO
+	default RAPIDIO_ENUM_BASIC
+	help
+	  There are different enumeration and discovery mechanisms offered
+	  for RapidIO subsystem. You may select single built-in method or
+	  or any number of methods to be built as modules.
+	  Selecting a built-in method disables use of loadable methods.
+
+	  If unsure, select Basic built-in.
+
+config RAPIDIO_ENUM_BASIC
+	tristate "Basic"
+	help
+	  This option includes basic RapidIO fabric enumeration and discovery
+	  mechanism similar to one described in RapidIO specification Annex 1.
+
+endchoice
+
 source "drivers/rapidio/switches/Kconfig"
diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile
index ec3fb8121004..3036702ffe8b 100644
--- a/drivers/rapidio/Makefile
+++ b/drivers/rapidio/Makefile
@@ -1,7 +1,8 @@
 #
 # Makefile for RapidIO interconnect services
 #
-obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o
+obj-y += rio.o rio-access.o rio-driver.o rio-sysfs.o
+obj-$(CONFIG_RAPIDIO_ENUM_BASIC) += rio-scan.o
 
 obj-$(CONFIG_RAPIDIO)		+= switches/
 obj-$(CONFIG_RAPIDIO)		+= devices/
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 0f4a53bdaa3c..55850bb21480 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -164,6 +164,13 @@ void rio_unregister_driver(struct rio_driver *rdrv)
 	driver_unregister(&rdrv->driver);
 }
 
+void rio_attach_device(struct rio_dev *rdev)
+{
+	rdev->dev.bus = &rio_bus_type;
+	rdev->dev.parent = &rio_bus;
+}
+EXPORT_SYMBOL_GPL(rio_attach_device);
+
 /**
  *  rio_match_bus - Tell if a RIO device structure has a matching RIO driver device id structure
  *  @dev: the standard device structure to match against
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index a965acd3c0e4..7bdc67419cc3 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -37,12 +37,8 @@
 
 #include "rio.h"
 
-LIST_HEAD(rio_devices);
-
 static void rio_init_em(struct rio_dev *rdev);
 
-DEFINE_SPINLOCK(rio_global_list_lock);
-
 static int next_destid = 0;
 static int next_comptag = 1;
 
@@ -326,127 +322,6 @@ static int rio_is_switch(struct rio_dev *rdev)
 	return 0;
 }
 
-/**
- * rio_switch_init - Sets switch operations for a particular vendor switch
- * @rdev: RIO device
- * @do_enum: Enumeration/Discovery mode flag
- *
- * Searches the RIO switch ops table for known switch types. If the vid
- * and did match a switch table entry, then call switch initialization
- * routine to setup switch-specific routines.
- */
-static void rio_switch_init(struct rio_dev *rdev, int do_enum)
-{
-	struct rio_switch_ops *cur = __start_rio_switch_ops;
-	struct rio_switch_ops *end = __end_rio_switch_ops;
-
-	while (cur < end) {
-		if ((cur->vid == rdev->vid) && (cur->did == rdev->did)) {
-			pr_debug("RIO: calling init routine for %s\n",
-				 rio_name(rdev));
-			cur->init_hook(rdev, do_enum);
-			break;
-		}
-		cur++;
-	}
-
-	if ((cur >= end) && (rdev->pef & RIO_PEF_STD_RT)) {
-		pr_debug("RIO: adding STD routing ops for %s\n",
-			rio_name(rdev));
-		rdev->rswitch->add_entry = rio_std_route_add_entry;
-		rdev->rswitch->get_entry = rio_std_route_get_entry;
-		rdev->rswitch->clr_table = rio_std_route_clr_table;
-	}
-
-	if (!rdev->rswitch->add_entry || !rdev->rswitch->get_entry)
-		printk(KERN_ERR "RIO: missing routing ops for %s\n",
-		       rio_name(rdev));
-}
-
-/**
- * rio_add_device- Adds a RIO device to the device model
- * @rdev: RIO device
- *
- * Adds the RIO device to the global device list and adds the RIO
- * device to the RIO device list.  Creates the generic sysfs nodes
- * for an RIO device.
- */
-static int rio_add_device(struct rio_dev *rdev)
-{
-	int err;
-
-	err = device_add(&rdev->dev);
-	if (err)
-		return err;
-
-	spin_lock(&rio_global_list_lock);
-	list_add_tail(&rdev->global_list, &rio_devices);
-	spin_unlock(&rio_global_list_lock);
-
-	rio_create_sysfs_dev_files(rdev);
-
-	return 0;
-}
-
-/**
- * rio_enable_rx_tx_port - enable input receiver and output transmitter of
- * given port
- * @port: Master port associated with the RIO network
- * @local: local=1 select local port otherwise a far device is reached
- * @destid: Destination ID of the device to check host bit
- * @hopcount: Number of hops to reach the target
- * @port_num: Port (-number on switch) to enable on a far end device
- *
- * Returns 0 or 1 from on General Control Command and Status Register
- * (EXT_PTR+0x3C)
- */
-inline int rio_enable_rx_tx_port(struct rio_mport *port,
-				 int local, u16 destid,
-				 u8 hopcount, u8 port_num) {
-#ifdef CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS
-	u32 regval;
-	u32 ext_ftr_ptr;
-
-	/*
-	* enable rx input tx output port
-	*/
-	pr_debug("rio_enable_rx_tx_port(local = %d, destid = %d, hopcount = "
-		 "%d, port_num = %d)\n", local, destid, hopcount, port_num);
-
-	ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, hopcount);
-
-	if (local) {
-		rio_local_read_config_32(port, ext_ftr_ptr +
-				RIO_PORT_N_CTL_CSR(0),
-				&regval);
-	} else {
-		if (rio_mport_read_config_32(port, destid, hopcount,
-		ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), &regval) < 0)
-			return -EIO;
-	}
-
-	if (regval & RIO_PORT_N_CTL_P_TYP_SER) {
-		/* serial */
-		regval = regval | RIO_PORT_N_CTL_EN_RX_SER
-				| RIO_PORT_N_CTL_EN_TX_SER;
-	} else {
-		/* parallel */
-		regval = regval | RIO_PORT_N_CTL_EN_RX_PAR
-				| RIO_PORT_N_CTL_EN_TX_PAR;
-	}
-
-	if (local) {
-		rio_local_write_config_32(port, ext_ftr_ptr +
-					  RIO_PORT_N_CTL_CSR(0), regval);
-	} else {
-		if (rio_mport_write_config_32(port, destid, hopcount,
-		    ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), regval) < 0)
-			return -EIO;
-	}
-#endif
-	return 0;
-}
-
 /**
  * rio_setup_device- Allocates and sets up a RIO device
  * @net: RIO network
@@ -587,8 +462,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 			     rdev->destid);
 	}
 
-	rdev->dev.bus = &rio_bus_type;
-	rdev->dev.parent = &rio_bus;
+	rio_attach_device(rdev);
 
 	device_initialize(&rdev->dev);
 	rdev->dev.release = rio_release_dev;
@@ -1421,3 +1295,41 @@ enum_done:
 bail:
 	return -EBUSY;
 }
+
+static struct rio_scan rio_scan_ops = {
+	.enumerate = rio_enum_mport,
+	.discover = rio_disc_mport,
+};
+
+static bool scan;
+module_param(scan, bool, 0);
+MODULE_PARM_DESC(scan, "Start RapidIO network enumeration/discovery "
+			"(default = 0)");
+
+/**
+ * rio_basic_attach:
+ *
+ * When this enumeration/discovery method is loaded as a module this function
+ * registers its specific enumeration and discover routines for all available
+ * RapidIO mport devices. The "scan" command line parameter controls ability of
+ * the module to start RapidIO enumeration/discovery automatically.
+ *
+ * Returns 0 for success or -EIO if unable to register itself.
+ *
+ * This enumeration/discovery method cannot be unloaded and therefore does not
+ * provide a matching cleanup_module routine.
+ */
+
+static int __init rio_basic_attach(void)
+{
+	if (rio_register_scan(RIO_MPORT_ANY, &rio_scan_ops))
+		return -EIO;
+	if (scan)
+		rio_init_mports();
+	return 0;
+}
+
+late_initcall(rio_basic_attach);
+
+MODULE_DESCRIPTION("Basic RapidIO enumeration/discovery");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index d553b5d13722..6e75dda34799 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -31,7 +31,11 @@
 
 #include "rio.h"
 
+static LIST_HEAD(rio_devices);
+static DEFINE_SPINLOCK(rio_global_list_lock);
+
 static LIST_HEAD(rio_mports);
+static DEFINE_MUTEX(rio_mport_list_lock);
 static unsigned char next_portid;
 static DEFINE_SPINLOCK(rio_mmap_lock);
 
@@ -52,6 +56,32 @@ u16 rio_local_get_device_id(struct rio_mport *port)
 	return (RIO_GET_DID(port->sys_size, result));
 }
 
+/**
+ * rio_add_device- Adds a RIO device to the device model
+ * @rdev: RIO device
+ *
+ * Adds the RIO device to the global device list and adds the RIO
+ * device to the RIO device list.  Creates the generic sysfs nodes
+ * for an RIO device.
+ */
+int rio_add_device(struct rio_dev *rdev)
+{
+	int err;
+
+	err = device_add(&rdev->dev);
+	if (err)
+		return err;
+
+	spin_lock(&rio_global_list_lock);
+	list_add_tail(&rdev->global_list, &rio_devices);
+	spin_unlock(&rio_global_list_lock);
+
+	rio_create_sysfs_dev_files(rdev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_add_device);
+
 /**
  * rio_request_inb_mbox - request inbound mailbox service
  * @mport: RIO master port from which to allocate the mailbox resource
@@ -489,6 +519,7 @@ rio_mport_get_physefb(struct rio_mport *port, int local,
 
 	return ext_ftr_ptr;
 }
+EXPORT_SYMBOL_GPL(rio_mport_get_physefb);
 
 /**
  * rio_get_comptag - Begin or continue searching for a RIO device by component tag
@@ -521,6 +552,7 @@ exit:
 	spin_unlock(&rio_global_list_lock);
 	return rdev;
 }
+EXPORT_SYMBOL_GPL(rio_get_comptag);
 
 /**
  * rio_set_port_lockout - Sets/clears LOCKOUT bit (RIO EM 1.3) for a switch port.
@@ -545,6 +577,107 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock)
 				  regval);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rio_set_port_lockout);
+
+/**
+ * rio_switch_init - Sets switch operations for a particular vendor switch
+ * @rdev: RIO device
+ * @do_enum: Enumeration/Discovery mode flag
+ *
+ * Searches the RIO switch ops table for known switch types. If the vid
+ * and did match a switch table entry, then call switch initialization
+ * routine to setup switch-specific routines.
+ */
+void rio_switch_init(struct rio_dev *rdev, int do_enum)
+{
+	struct rio_switch_ops *cur = __start_rio_switch_ops;
+	struct rio_switch_ops *end = __end_rio_switch_ops;
+
+	while (cur < end) {
+		if ((cur->vid == rdev->vid) && (cur->did == rdev->did)) {
+			pr_debug("RIO: calling init routine for %s\n",
+				 rio_name(rdev));
+			cur->init_hook(rdev, do_enum);
+			break;
+		}
+		cur++;
+	}
+
+	if ((cur >= end) && (rdev->pef & RIO_PEF_STD_RT)) {
+		pr_debug("RIO: adding STD routing ops for %s\n",
+			rio_name(rdev));
+		rdev->rswitch->add_entry = rio_std_route_add_entry;
+		rdev->rswitch->get_entry = rio_std_route_get_entry;
+		rdev->rswitch->clr_table = rio_std_route_clr_table;
+	}
+
+	if (!rdev->rswitch->add_entry || !rdev->rswitch->get_entry)
+		printk(KERN_ERR "RIO: missing routing ops for %s\n",
+		       rio_name(rdev));
+}
+EXPORT_SYMBOL_GPL(rio_switch_init);
+
+/**
+ * rio_enable_rx_tx_port - enable input receiver and output transmitter of
+ * given port
+ * @port: Master port associated with the RIO network
+ * @local: local=1 select local port otherwise a far device is reached
+ * @destid: Destination ID of the device to check host bit
+ * @hopcount: Number of hops to reach the target
+ * @port_num: Port (-number on switch) to enable on a far end device
+ *
+ * Returns 0 or 1 from on General Control Command and Status Register
+ * (EXT_PTR+0x3C)
+ */
+int rio_enable_rx_tx_port(struct rio_mport *port,
+			  int local, u16 destid,
+			  u8 hopcount, u8 port_num)
+{
+#ifdef CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS
+	u32 regval;
+	u32 ext_ftr_ptr;
+
+	/*
+	* enable rx input tx output port
+	*/
+	pr_debug("rio_enable_rx_tx_port(local = %d, destid = %d, hopcount = "
+		 "%d, port_num = %d)\n", local, destid, hopcount, port_num);
+
+	ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, hopcount);
+
+	if (local) {
+		rio_local_read_config_32(port, ext_ftr_ptr +
+				RIO_PORT_N_CTL_CSR(0),
+				&regval);
+	} else {
+		if (rio_mport_read_config_32(port, destid, hopcount,
+		ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), &regval) < 0)
+			return -EIO;
+	}
+
+	if (regval & RIO_PORT_N_CTL_P_TYP_SER) {
+		/* serial */
+		regval = regval | RIO_PORT_N_CTL_EN_RX_SER
+				| RIO_PORT_N_CTL_EN_TX_SER;
+	} else {
+		/* parallel */
+		regval = regval | RIO_PORT_N_CTL_EN_RX_PAR
+				| RIO_PORT_N_CTL_EN_TX_PAR;
+	}
+
+	if (local) {
+		rio_local_write_config_32(port, ext_ftr_ptr +
+					  RIO_PORT_N_CTL_CSR(0), regval);
+	} else {
+		if (rio_mport_write_config_32(port, destid, hopcount,
+		    ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), regval) < 0)
+			return -EIO;
+	}
+#endif
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_enable_rx_tx_port);
+
 
 /**
  * rio_chk_dev_route - Validate route to the specified device.
@@ -610,6 +743,7 @@ rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, u8 hopcount)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rio_mport_chk_dev_access);
 
 /**
  * rio_chk_dev_access - Validate access to the specified device.
@@ -941,6 +1075,7 @@ rio_mport_get_efb(struct rio_mport *port, int local, u16 destid,
 		return RIO_GET_BLOCK_ID(reg_val);
 	}
 }
+EXPORT_SYMBOL_GPL(rio_mport_get_efb);
 
 /**
  * rio_mport_get_feature - query for devices' extended features
@@ -997,6 +1132,7 @@ rio_mport_get_feature(struct rio_mport * port, int local, u16 destid,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rio_mport_get_feature);
 
 /**
  * rio_get_asm - Begin or continue searching for a RIO device by vid/did/asm_vid/asm_did
@@ -1246,6 +1382,71 @@ EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
 
 #endif /* CONFIG_RAPIDIO_DMA_ENGINE */
 
+/**
+ * rio_register_scan - enumeration/discovery method registration interface
+ * @mport_id: mport device ID for which fabric scan routine has to be set
+ *            (RIO_MPORT_ANY = set for all available mports)
+ * @scan_ops: enumeration/discovery control structure
+ *
+ * Assigns enumeration or discovery method to the specified mport device (or all
+ * available mports if RIO_MPORT_ANY is specified).
+ * Returns error if the mport already has an enumerator attached to it.
+ * In case of RIO_MPORT_ANY ignores ports with valid scan routines and returns
+ * an error if was unable to find at least one available mport.
+ */
+int rio_register_scan(int mport_id, struct rio_scan *scan_ops)
+{
+	struct rio_mport *port;
+	int rc = -EBUSY;
+
+	mutex_lock(&rio_mport_list_lock);
+	list_for_each_entry(port, &rio_mports, node) {
+		if (port->id == mport_id || mport_id == RIO_MPORT_ANY) {
+			if (port->nscan && mport_id == RIO_MPORT_ANY)
+				continue;
+			else if (port->nscan)
+				break;
+
+			port->nscan = scan_ops;
+			rc = 0;
+
+			if (mport_id != RIO_MPORT_ANY)
+				break;
+		}
+	}
+	mutex_unlock(&rio_mport_list_lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_register_scan);
+
+/**
+ * rio_unregister_scan - removes enumeration/discovery method from mport
+ * @mport_id: mport device ID for which fabric scan routine has to be
+ *            unregistered (RIO_MPORT_ANY = set for all available mports)
+ *
+ * Removes enumeration or discovery method assigned to the specified mport
+ * device (or all available mports if RIO_MPORT_ANY is specified).
+ */
+int rio_unregister_scan(int mport_id)
+{
+	struct rio_mport *port;
+
+	mutex_lock(&rio_mport_list_lock);
+	list_for_each_entry(port, &rio_mports, node) {
+		if (port->id == mport_id || mport_id == RIO_MPORT_ANY) {
+			if (port->nscan)
+				port->nscan = NULL;
+			if (mport_id != RIO_MPORT_ANY)
+				break;
+		}
+	}
+	mutex_unlock(&rio_mport_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_unregister_scan);
+
 static void rio_fixup_device(struct rio_dev *dev)
 {
 }
@@ -1274,7 +1475,7 @@ static void disc_work_handler(struct work_struct *_work)
 	work = container_of(_work, struct rio_disc_work, work);
 	pr_debug("RIO: discovery work for mport %d %s\n",
 		 work->mport->id, work->mport->name);
-	rio_disc_mport(work->mport);
+	work->mport->nscan->discover(work->mport);
 }
 
 int rio_init_mports(void)
@@ -1290,12 +1491,15 @@ int rio_init_mports(void)
 	 * First, run enumerations and check if we need to perform discovery
 	 * on any of the registered mports.
 	 */
+	mutex_lock(&rio_mport_list_lock);
 	list_for_each_entry(port, &rio_mports, node) {
-		if (port->host_deviceid >= 0)
-			rio_enum_mport(port);
-		else
+		if (port->host_deviceid >= 0) {
+			if (port->nscan)
+				port->nscan->enumerate(port);
+		} else
 			n++;
 	}
+	mutex_unlock(&rio_mport_list_lock);
 
 	if (!n)
 		goto no_disc;
@@ -1322,14 +1526,16 @@ int rio_init_mports(void)
 	}
 
 	n = 0;
+	mutex_lock(&rio_mport_list_lock);
 	list_for_each_entry(port, &rio_mports, node) {
-		if (port->host_deviceid < 0) {
+		if (port->host_deviceid < 0 && port->nscan) {
 			work[n].mport = port;
 			INIT_WORK(&work[n].work, disc_work_handler);
 			queue_work(rio_wq, &work[n].work);
 			n++;
 		}
 	}
+	mutex_unlock(&rio_mport_list_lock);
 
 	flush_workqueue(rio_wq);
 	pr_debug("RIO: destroy discovery workqueue\n");
@@ -1342,8 +1548,6 @@ no_disc:
 	return 0;
 }
 
-device_initcall_sync(rio_init_mports);
-
 static int hdids[RIO_MAX_MPORTS + 1];
 
 static int rio_get_hdid(int index)
@@ -1371,7 +1575,10 @@ int rio_register_mport(struct rio_mport *port)
 
 	port->id = next_portid++;
 	port->host_deviceid = rio_get_hdid(port->id);
+	port->nscan = NULL;
+	mutex_lock(&rio_mport_list_lock);
 	list_add_tail(&port->node, &rio_mports);
+	mutex_unlock(&rio_mport_list_lock);
 	return 0;
 }
 
@@ -1386,3 +1593,4 @@ EXPORT_SYMBOL_GPL(rio_request_inb_mbox);
 EXPORT_SYMBOL_GPL(rio_release_inb_mbox);
 EXPORT_SYMBOL_GPL(rio_request_outb_mbox);
 EXPORT_SYMBOL_GPL(rio_release_outb_mbox);
+EXPORT_SYMBOL_GPL(rio_init_mports);
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index b1af414f15e6..0afdf482517e 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -15,6 +15,7 @@
 #include <linux/rio.h>
 
 #define RIO_MAX_CHK_RETRY	3
+#define RIO_MPORT_ANY		(-1)
 
 /* Functions internal to the RIO core code */
 
@@ -27,8 +28,6 @@ extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid,
 extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid,
 				    u8 hopcount);
 extern int rio_create_sysfs_dev_files(struct rio_dev *rdev);
-extern int rio_enum_mport(struct rio_mport *mport);
-extern int rio_disc_mport(struct rio_mport *mport);
 extern int rio_std_route_add_entry(struct rio_mport *mport, u16 destid,
 				   u8 hopcount, u16 table, u16 route_destid,
 				   u8 route_port);
@@ -39,10 +38,16 @@ extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid,
 				   u8 hopcount, u16 table);
 extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
 extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from);
+extern int rio_add_device(struct rio_dev *rdev);
+extern void rio_switch_init(struct rio_dev *rdev, int do_enum);
+extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
+				 u8 hopcount, u8 port_num);
+extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops);
+extern int rio_unregister_scan(int mport_id);
+extern void rio_attach_device(struct rio_dev *rdev);
 
 /* Structures internal to the RIO core code */
 extern struct device_attribute rio_dev_attrs[];
-extern spinlock_t rio_global_list_lock;
 
 extern struct rio_switch_ops __start_rio_switch_ops[];
 extern struct rio_switch_ops __end_rio_switch_ops[];
diff --git a/include/linux/rio.h b/include/linux/rio.h
index a3e784278667..cd3796ee7410 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -83,7 +83,6 @@
 
 extern struct bus_type rio_bus_type;
 extern struct device rio_bus;
-extern struct list_head rio_devices;	/* list of all devices */
 
 struct rio_mport;
 struct rio_dev;
@@ -237,6 +236,7 @@ enum rio_phy_type {
  * @name: Port name string
  * @priv: Master port private data
  * @dma: DMA device associated with mport
+ * @nscan: RapidIO network enumeration/discovery operations
  */
 struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
@@ -262,6 +262,7 @@ struct rio_mport {
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 	struct dma_device	dma;
 #endif
+	struct rio_scan *nscan;
 };
 
 struct rio_id_table {
@@ -460,6 +461,16 @@ static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
 }
 #endif /* CONFIG_RAPIDIO_DMA_ENGINE */
 
+/**
+ * struct rio_scan - RIO enumeration and discovery operations
+ * @enumerate: Callback to perform RapidIO fabric enumeration.
+ * @discover: Callback to perform RapidIO fabric discovery.
+ */
+struct rio_scan {
+	int (*enumerate)(struct rio_mport *mport);
+	int (*discover)(struct rio_mport *mport);
+};
+
 /* Architecture and hardware-specific functions */
 extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index b75c05920ab5..5059994fe297 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -433,5 +433,6 @@ extern u16 rio_local_get_device_id(struct rio_mport *port);
 extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from);
 extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did,
 				   struct rio_dev *from);
+extern int rio_init_mports(void);
 
 #endif				/* LINUX_RIO_DRV_H */
-- 
cgit v1.2.3


From bc8fcfea18249640f2728c46d70999dcb7e4dc49 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Fri, 24 May 2013 15:55:06 -0700
Subject: rapidio: add enumeration/discovery start from user space

Add RapidIO enumeration/discovery start from user space.  User space
start allows to defer RapidIO fabric scan until the moment when all
participating endpoints are initialized avoiding mandatory synchronized
start of all endpoints (which may be challenging in systems with large
number of RapidIO endpoints).

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@Prodrive.nl>
Cc: Micha Nelissen <micha.nelissen@Prodrive.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-driver.c |  1 +
 drivers/rapidio/rio-scan.c   | 24 ++++++++++++++++++++---
 drivers/rapidio/rio-sysfs.c  | 45 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/rapidio/rio.c        | 28 +++++++++++++++++++++++++--
 drivers/rapidio/rio.h        |  2 ++
 include/linux/rio.h          |  9 +++++++--
 6 files changed, 102 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 55850bb21480..a0c875563d76 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -207,6 +207,7 @@ struct bus_type rio_bus_type = {
 	.name = "rapidio",
 	.match = rio_match_bus,
 	.dev_attrs = rio_dev_attrs,
+	.bus_attrs = rio_bus_attrs,
 	.probe = rio_device_probe,
 	.remove = rio_device_remove,
 };
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 7bdc67419cc3..4c15dbf81087 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -1134,19 +1134,30 @@ static void rio_pw_enable(struct rio_mport *port, int enable)
 /**
  * rio_enum_mport- Start enumeration through a master port
  * @mport: Master port to send transactions
+ * @flags: Enumeration control flags
  *
  * Starts the enumeration process. If somebody has enumerated our
  * master port device, then give up. If not and we have an active
  * link, then start recursive peer enumeration. Returns %0 if
  * enumeration succeeds or %-EBUSY if enumeration fails.
  */
-int rio_enum_mport(struct rio_mport *mport)
+int rio_enum_mport(struct rio_mport *mport, u32 flags)
 {
 	struct rio_net *net = NULL;
 	int rc = 0;
 
 	printk(KERN_INFO "RIO: enumerate master port %d, %s\n", mport->id,
 	       mport->name);
+
+	/*
+	 * To avoid multiple start requests (repeat enumeration is not supported
+	 * by this method) check if enumeration/discovery was performed for this
+	 * mport: if mport was added into the list of mports for a net exit
+	 * with error.
+	 */
+	if (mport->nnode.next || mport->nnode.prev)
+		return -EBUSY;
+
 	/* If somebody else enumerated our master port device, bail. */
 	if (rio_enum_host(mport) < 0) {
 		printk(KERN_INFO
@@ -1236,14 +1247,16 @@ static void rio_build_route_tables(struct rio_net *net)
 /**
  * rio_disc_mport- Start discovery through a master port
  * @mport: Master port to send transactions
+ * @flags: discovery control flags
  *
  * Starts the discovery process. If we have an active link,
- * then wait for the signal that enumeration is complete.
+ * then wait for the signal that enumeration is complete (if wait
+ * is allowed).
  * When enumeration completion is signaled, start recursive
  * peer discovery. Returns %0 if discovery succeeds or %-EBUSY
  * on failure.
  */
-int rio_disc_mport(struct rio_mport *mport)
+int rio_disc_mport(struct rio_mport *mport, u32 flags)
 {
 	struct rio_net *net = NULL;
 	unsigned long to_end;
@@ -1253,6 +1266,11 @@ int rio_disc_mport(struct rio_mport *mport)
 
 	/* If master port has an active link, allocate net and discover peers */
 	if (rio_mport_is_active(mport)) {
+		if (rio_enum_complete(mport))
+			goto enum_done;
+		else if (flags & RIO_SCAN_ENUM_NO_WAIT)
+			return -EAGAIN;
+
 		pr_debug("RIO: wait for enumeration to complete...\n");
 
 		to_end = jiffies + CONFIG_RAPIDIO_DISC_TIMEOUT * HZ;
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 4dbe360989be..66d4acd5e18f 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -285,3 +285,48 @@ void rio_remove_sysfs_dev_files(struct rio_dev *rdev)
 			rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_REMOVE);
 	}
 }
+
+static ssize_t bus_scan_store(struct bus_type *bus, const char *buf,
+				size_t count)
+{
+	long val;
+	struct rio_mport *port = NULL;
+	int rc;
+
+	if (kstrtol(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val == RIO_MPORT_ANY) {
+		rc = rio_init_mports();
+		goto exit;
+	}
+
+	if (val < 0 || val >= RIO_MAX_MPORTS)
+		return -EINVAL;
+
+	port = rio_find_mport((int)val);
+
+	if (!port) {
+		pr_debug("RIO: %s: mport_%d not available\n",
+			 __func__, (int)val);
+		return -EINVAL;
+	}
+
+	if (!port->nscan)
+		return -EINVAL;
+
+	if (port->host_deviceid >= 0)
+		rc = port->nscan->enumerate(port, 0);
+	else
+		rc = port->nscan->discover(port, RIO_SCAN_ENUM_NO_WAIT);
+exit:
+	if (!rc)
+		rc = count;
+
+	return rc;
+}
+
+struct bus_attribute rio_bus_attrs[] = {
+	__ATTR(scan, (S_IWUSR|S_IWGRP), NULL, bus_scan_store),
+	__ATTR_NULL
+};
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 6e75dda34799..cb1c08996fbb 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1382,6 +1382,30 @@ EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
 
 #endif /* CONFIG_RAPIDIO_DMA_ENGINE */
 
+/**
+ * rio_find_mport - find RIO mport by its ID
+ * @mport_id: number (ID) of mport device
+ *
+ * Given a RIO mport number, the desired mport is located
+ * in the global list of mports. If the mport is found, a pointer to its
+ * data structure is returned.  If no mport is found, %NULL is returned.
+ */
+struct rio_mport *rio_find_mport(int mport_id)
+{
+	struct rio_mport *port;
+
+	mutex_lock(&rio_mport_list_lock);
+	list_for_each_entry(port, &rio_mports, node) {
+		if (port->id == mport_id)
+			goto found;
+	}
+	port = NULL;
+found:
+	mutex_unlock(&rio_mport_list_lock);
+
+	return port;
+}
+
 /**
  * rio_register_scan - enumeration/discovery method registration interface
  * @mport_id: mport device ID for which fabric scan routine has to be set
@@ -1475,7 +1499,7 @@ static void disc_work_handler(struct work_struct *_work)
 	work = container_of(_work, struct rio_disc_work, work);
 	pr_debug("RIO: discovery work for mport %d %s\n",
 		 work->mport->id, work->mport->name);
-	work->mport->nscan->discover(work->mport);
+	work->mport->nscan->discover(work->mport, 0);
 }
 
 int rio_init_mports(void)
@@ -1495,7 +1519,7 @@ int rio_init_mports(void)
 	list_for_each_entry(port, &rio_mports, node) {
 		if (port->host_deviceid >= 0) {
 			if (port->nscan)
-				port->nscan->enumerate(port);
+				port->nscan->enumerate(port, 0);
 		} else
 			n++;
 	}
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 0afdf482517e..c14f864dea5c 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -45,9 +45,11 @@ extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
 extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops);
 extern int rio_unregister_scan(int mport_id);
 extern void rio_attach_device(struct rio_dev *rdev);
+extern struct rio_mport *rio_find_mport(int mport_id);
 
 /* Structures internal to the RIO core code */
 extern struct device_attribute rio_dev_attrs[];
+extern struct bus_attribute rio_bus_attrs[];
 
 extern struct rio_switch_ops __start_rio_switch_ops[];
 extern struct rio_switch_ops __end_rio_switch_ops[];
diff --git a/include/linux/rio.h b/include/linux/rio.h
index cd3796ee7410..18e099342e6f 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -265,6 +265,11 @@ struct rio_mport {
 	struct rio_scan *nscan;
 };
 
+/*
+ * Enumeration/discovery control flags
+ */
+#define RIO_SCAN_ENUM_NO_WAIT	0x00000001 /* Do not wait for enum completed */
+
 struct rio_id_table {
 	u16 start;	/* logical minimal id */
 	u32 max;	/* max number of IDs in table */
@@ -467,8 +472,8 @@ static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
  * @discover: Callback to perform RapidIO fabric discovery.
  */
 struct rio_scan {
-	int (*enumerate)(struct rio_mport *mport);
-	int (*discover)(struct rio_mport *mport);
+	int (*enumerate)(struct rio_mport *mport, u32 flags);
+	int (*discover)(struct rio_mport *mport, u32 flags);
 };
 
 /* Architecture and hardware-specific functions */
-- 
cgit v1.2.3


From 4c663cfc523a88d97a8309b04a089c27dc57fd7e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 24 May 2013 15:55:09 -0700
Subject: wait: fix false timeouts when using wait_event_timeout()

Many callers of the wait_event_timeout() and
wait_event_interruptible_timeout() expect that the return value will be
positive if the specified condition becomes true before the timeout
elapses.  However, at the moment this isn't guaranteed.  If the wake-up
handler is delayed enough, the time remaining until timeout will be
calculated as 0 - and passed back as a return value - even if the
condition became true before the timeout has passed.

Fix this by returning at least 1 if the condition becomes true.  This
semantic is in line with what wait_for_condition_timeout() does; see
commit bb10ed09 ("sched: fix wait_for_completion_timeout() spurious
failure under heavy load").

Daniel said "We have 3 instances of this bug in drm/i915.  One case even
where we switch between the interruptible and not interruptible
wait_event_timeout variants, foolishly presuming they have the same
semantics.  I very much like this."

One such bug is reported at
  https://bugs.freedesktop.org/show_bug.cgi?id=64133

Signed-off-by: Imre Deak <imre.deak@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: "Paul E.  McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Lukas Czerner <lczerner@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ac38be2692d8..1133695eb067 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -217,6 +217,8 @@ do {									\
 		if (!ret)						\
 			break;						\
 	}								\
+	if (!ret && (condition))					\
+		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -233,8 +235,9 @@ do {									\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  *
- * The function returns 0 if the @timeout elapsed, and the remaining
- * jiffies if the condition evaluated to true before the timeout elapsed.
+ * The function returns 0 if the @timeout elapsed, or the remaining
+ * jiffies (at least 1) if the @condition evaluated to %true before
+ * the @timeout elapsed.
  */
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
@@ -302,6 +305,8 @@ do {									\
 		ret = -ERESTARTSYS;					\
 		break;							\
 	}								\
+	if (!ret && (condition))					\
+		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -318,9 +323,10 @@ do {									\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  *
- * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
- * was interrupted by a signal, and the remaining jiffies otherwise
- * if the condition evaluated to true before the timeout elapsed.
+ * Returns:
+ * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
+ * a signal, or the remaining jiffies (at least 1) if the @condition
+ * evaluated to %true before the @timeout elapsed.
  */
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
-- 
cgit v1.2.3


From 7450231fb35492951e78a91b833fd935171f4e66 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 24 May 2013 15:55:20 -0700
Subject: linux/kernel.h: fix kernel-doc warning

Fix kernel-doc warning in <linux/kernel.h>:

  Warning(include/linux/kernel.h:590): No description found for parameter 'ip'

scripts/kernel-doc cannot handle macros, functions, or function
prototypes between the function or macro that is being documented and
its definition, so move these prototypes above the function that is
being documented.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e96329ceb28c..e9ef6d6b51d5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -562,6 +562,9 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...);
 extern __printf(2, 3)
 int __trace_printk(unsigned long ip, const char *fmt, ...);
 
+extern int __trace_bputs(unsigned long ip, const char *str);
+extern int __trace_puts(unsigned long ip, const char *str, int size);
+
 /**
  * trace_puts - write a string into the ftrace buffer
  * @str: the string to record
@@ -587,8 +590,6 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
  *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
  */
 
-extern int __trace_bputs(unsigned long ip, const char *str);
-extern int __trace_puts(unsigned long ip, const char *str, int size);
 #define trace_puts(str) ({						\
 	static const char *trace_printk_fmt				\
 		__attribute__((section("__trace_printk_fmt"))) =	\
-- 
cgit v1.2.3


From 26cb63ad11e04047a64309362674bcbbd6a6f246 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 28 May 2013 10:55:48 +0200
Subject: perf: Fix perf mmap bugs

Vince reported a problem found by his perf specific trinity
fuzzer.

Al noticed 2 problems with perf's mmap():

 - it has issues against fork() since we use vma->vm_mm for accounting.
 - it has an rb refcount leak on double mmap().

We fix the issues against fork() by using VM_DONTCOPY; I don't
think there's code out there that uses this; we didn't hear
about weird accounting problems/crashes. If we do need this to
work, the previously proposed VM_PINNED could make this work.

Aside from the rb reference leak spotted by Al, Vince's example
prog was indeed doing a double mmap() through the use of
perf_event_set_output().

This exposes another problem, since we now have 2 events with
one buffer, the accounting gets screwy because we account per
event. Fix this by making the buffer responsible for its own
accounting.

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Link: http://lkml.kernel.org/r/20130528085548.GA12193@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  3 +--
 kernel/events/core.c       | 37 ++++++++++++++++++++-----------------
 kernel/events/internal.h   |  3 +++
 3 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f463a46424e2..c5b6dbf9c2fc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -389,8 +389,7 @@ struct perf_event {
 	/* mmap bits */
 	struct mutex			mmap_mutex;
 	atomic_t			mmap_count;
-	int				mmap_locked;
-	struct user_struct		*mmap_user;
+
 	struct ring_buffer		*rb;
 	struct list_head		rb_entry;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9dc297faf7c0..ae752cd4a086 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2917,7 +2917,7 @@ static void free_event_rcu(struct rcu_head *head)
 	kfree(event);
 }
 
-static void ring_buffer_put(struct ring_buffer *rb);
+static bool ring_buffer_put(struct ring_buffer *rb);
 
 static void free_event(struct perf_event *event)
 {
@@ -3582,13 +3582,13 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 	return rb;
 }
 
-static void ring_buffer_put(struct ring_buffer *rb)
+static bool ring_buffer_put(struct ring_buffer *rb)
 {
 	struct perf_event *event, *n;
 	unsigned long flags;
 
 	if (!atomic_dec_and_test(&rb->refcount))
-		return;
+		return false;
 
 	spin_lock_irqsave(&rb->event_lock, flags);
 	list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
@@ -3598,6 +3598,7 @@ static void ring_buffer_put(struct ring_buffer *rb)
 	spin_unlock_irqrestore(&rb->event_lock, flags);
 
 	call_rcu(&rb->rcu_head, rb_free_rcu);
+	return true;
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -3612,18 +3613,20 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	struct perf_event *event = vma->vm_file->private_data;
 
 	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-		unsigned long size = perf_data_size(event->rb);
-		struct user_struct *user = event->mmap_user;
 		struct ring_buffer *rb = event->rb;
+		struct user_struct *mmap_user = rb->mmap_user;
+		int mmap_locked = rb->mmap_locked;
+		unsigned long size = perf_data_size(rb);
 
-		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
-		vma->vm_mm->pinned_vm -= event->mmap_locked;
 		rcu_assign_pointer(event->rb, NULL);
 		ring_buffer_detach(event, rb);
 		mutex_unlock(&event->mmap_mutex);
 
-		ring_buffer_put(rb);
-		free_uid(user);
+		if (ring_buffer_put(rb)) {
+			atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+			vma->vm_mm->pinned_vm -= mmap_locked;
+			free_uid(mmap_user);
+		}
 	}
 }
 
@@ -3676,9 +3679,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 	mutex_lock(&event->mmap_mutex);
 	if (event->rb) {
-		if (event->rb->nr_pages == nr_pages)
-			atomic_inc(&event->rb->refcount);
-		else
+		if (event->rb->nr_pages != nr_pages)
 			ret = -EINVAL;
 		goto unlock;
 	}
@@ -3720,12 +3721,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		ret = -ENOMEM;
 		goto unlock;
 	}
-	rcu_assign_pointer(event->rb, rb);
+
+	rb->mmap_locked = extra;
+	rb->mmap_user = get_current_user();
 
 	atomic_long_add(user_extra, &user->locked_vm);
-	event->mmap_locked = extra;
-	event->mmap_user = get_current_user();
-	vma->vm_mm->pinned_vm += event->mmap_locked;
+	vma->vm_mm->pinned_vm += extra;
+
+	rcu_assign_pointer(event->rb, rb);
 
 	perf_event_update_userpage(event);
 
@@ -3734,7 +3737,7 @@ unlock:
 		atomic_inc(&event->mmap_count);
 	mutex_unlock(&event->mmap_mutex);
 
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &perf_mmap_vmops;
 
 	return ret;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index eb675c4d59df..5bc6c8e9b851 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -31,6 +31,9 @@ struct ring_buffer {
 	spinlock_t			event_lock;
 	struct list_head		event_list;
 
+	int				mmap_locked;
+	struct user_struct		*mmap_user;
+
 	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
-- 
cgit v1.2.3


From a44b8bd6072ac82fc49af941de8c1ac85d94852d Mon Sep 17 00:00:00 2001
From: Liu Ying <Ying.Liu@freescale.com>
Date: Tue, 7 May 2013 16:38:40 +0800
Subject: ktime: Use macro NSEC_PER_USEC where appropriate

We've got the macro NSEC_PER_USEC defined in header file
include/linux/time.h. To make the code decent, this patch
replaces the immediate number 1000 to convert bewteen a
time value in microseconds and one in nanoseconds with the
macro NSEC_PER_USEC.

Signed-off-by: Liu Ying <Ying.Liu@freescale.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/ktime.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index bbca12804d12..608728a164a9 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -229,7 +229,8 @@ static inline ktime_t timespec_to_ktime(const struct timespec ts)
 static inline ktime_t timeval_to_ktime(const struct timeval tv)
 {
 	return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec,
-				   .nsec = (s32)tv.tv_usec * 1000 } };
+				   .nsec = (s32)(tv.tv_usec *
+						 NSEC_PER_USEC) } };
 }
 
 /**
@@ -320,12 +321,12 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
 
 static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
 {
-	return ktime_add_ns(kt, usec * 1000);
+	return ktime_add_ns(kt, usec * NSEC_PER_USEC);
 }
 
 static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
 {
-	return ktime_sub_ns(kt, usec * 1000);
+	return ktime_sub_ns(kt, usec * NSEC_PER_USEC);
 }
 
 extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
-- 
cgit v1.2.3


From 35b210855069ae34450eeaafc0f1b5bfa4f71c87 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 16 May 2013 15:47:49 +0200
Subject: ktime: Add __must_check prefix to ktime_to_timespec_cond

The function is currently mainly used in the networking code and
if others start using it, they must check the result, otherwise
it cannot be determined if the timespec conversion suceeded.
Currently no user lacks this check, but make future users aware of
a possible misusage.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/ktime.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 608728a164a9..fc66b301b648 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -339,7 +339,8 @@ extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
  *
  * Returns true if there was a successful conversion, false if kt was 0.
  */
-static inline bool ktime_to_timespec_cond(const ktime_t kt, struct timespec *ts)
+static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt,
+						       struct timespec *ts)
 {
 	if (kt.tv64) {
 		*ts = ktime_to_timespec(kt);
-- 
cgit v1.2.3


From 55a68c23e0a675b2b8ac2656fd6edbf98b78e4c6 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@denx.de>
Date: Tue, 7 May 2013 22:11:26 +0200
Subject: dw_apb_timer_of.c: Remove parts that were picoxcell-specific

It seems we made a mistake when creating dw_apb_timer_of.c:
picoxcell sched_clock had parts that were not related to
dw_apb_timer, yet we moved them to dw_apb_timer_of, and tried to
use them on socfpga.

This results in system where user/system time is not measured
properly, as demonstrated by

    time dd if=/dev/urandom of=/dev/zero bs=100000 count=100

So this patch switches sched_clock to hardware that exists on both
platforms, and adds missing of_node_put() in dw_apb_timer_init().

Signed-off-by: Pavel Machek <pavel@denx.de>
Acked-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm/mach-picoxcell/common.h      |  2 --
 drivers/clocksource/dw_apb_timer.c    |  6 ----
 drivers/clocksource/dw_apb_timer_of.c | 52 ++++++++++++++++-------------------
 include/linux/dw_apb_timer.h          |  6 ++++
 4 files changed, 29 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-picoxcell/common.h b/arch/arm/mach-picoxcell/common.h
index 481b42a4ef15..237fb3bcbd04 100644
--- a/arch/arm/mach-picoxcell/common.h
+++ b/arch/arm/mach-picoxcell/common.h
@@ -12,6 +12,4 @@
 
 #include <asm/mach/time.h>
 
-extern void dw_apb_timer_init(void);
-
 #endif /* __PICOXCELL_COMMON_H__ */
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index e54ca1062d8e..e7042bc5c7d2 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -21,12 +21,6 @@
 #define APBT_MIN_PERIOD			4
 #define APBT_MIN_DELTA_USEC		200
 
-#define APBTMR_N_LOAD_COUNT		0x00
-#define APBTMR_N_CURRENT_VALUE		0x04
-#define APBTMR_N_CONTROL		0x08
-#define APBTMR_N_EOI			0x0c
-#define APBTMR_N_INT_STATUS		0x10
-
 #define APBTMRS_INT_STATUS		0xa0
 #define APBTMRS_EOI			0xa4
 #define APBTMRS_RAW_INT_STATUS		0xa8
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index ab09ed3742ee..44a3b9163c46 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -57,6 +57,15 @@ static void add_clockevent(struct device_node *event_timer)
 	dw_apb_clockevent_register(ced);
 }
 
+static void __iomem *sched_io_base;
+
+/* This is actually same as __apbt_read_clocksource(), but with
+   different interface */
+static u32 read_sched_clock_sptimer(void)
+{
+	return ~__raw_readl(sched_io_base + APBTMR_N_CURRENT_VALUE);
+}
+
 static void add_clocksource(struct device_node *source_timer)
 {
 	void __iomem *iobase;
@@ -71,41 +80,27 @@ static void add_clocksource(struct device_node *source_timer)
 
 	dw_apb_clocksource_start(cs);
 	dw_apb_clocksource_register(cs);
-}
 
-static void __iomem *sched_io_base;
-
-static u32 read_sched_clock(void)
-{
-	return __raw_readl(sched_io_base);
+	sched_io_base = iobase;
+	setup_sched_clock(read_sched_clock_sptimer, 32, rate);
 }
 
-static const struct of_device_id sptimer_ids[] __initconst = {
-	{ .compatible = "picochip,pc3x2-rtc" },
+static const struct of_device_id osctimer_ids[] __initconst = {
+	{ .compatible = "picochip,pc3x2-timer" },
+	{ .compatible = "snps,dw-apb-timer-osc" },
 	{ .compatible = "snps,dw-apb-timer-sp" },
-	{ /* Sentinel */ },
+	{  /* Sentinel */ },
 };
 
-static void init_sched_clock(void)
-{
-	struct device_node *sched_timer;
-	u32 rate;
-
-	sched_timer = of_find_matching_node(NULL, sptimer_ids);
-	if (!sched_timer)
-		panic("No RTC for sched clock to use");
+/*
+   You don't have to use dw_apb_timer for scheduler clock,
+   this should also work fine on arm:
 
-	timer_get_base_and_rate(sched_timer, &sched_io_base, &rate);
-	of_node_put(sched_timer);
+  twd_local_timer_of_register();
+  arch_timer_of_register();
+  arch_timer_sched_clock_init();
+*/
 
-	setup_sched_clock(read_sched_clock, 32, rate);
-}
-
-static const struct of_device_id osctimer_ids[] __initconst = {
-	{ .compatible = "picochip,pc3x2-timer" },
-	{ .compatible = "snps,dw-apb-timer-osc" },
-	{},
-};
 
 void __init dw_apb_timer_init(void)
 {
@@ -121,7 +116,6 @@ void __init dw_apb_timer_init(void)
 		panic("No timer for clocksource");
 	add_clocksource(source_timer);
 
+	of_node_put(event_timer);
 	of_node_put(source_timer);
-
-	init_sched_clock();
 }
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index b1cd9597c241..de0904e38f33 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -17,6 +17,12 @@
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
 
+#define APBTMR_N_LOAD_COUNT		0x00
+#define APBTMR_N_CURRENT_VALUE		0x04
+#define APBTMR_N_CONTROL		0x08
+#define APBTMR_N_EOI			0x0c
+#define APBTMR_N_INT_STATUS		0x10
+
 #define APBTMRS_REG_SIZE       0x14
 
 struct dw_apb_timer {
-- 
cgit v1.2.3


From 3565184ed0c1ea46bea5b792da5f72a83c43e49b Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Mon, 13 May 2013 18:56:06 +0100
Subject: x86: Increase precision of x86_platform.get/set_wallclock()

All the virtualized platforms (KVM, lguest and Xen) have persistent
wallclocks that have more than one second of precision.

read_persistent_wallclock() and update_persistent_wallclock() allow
for nanosecond precision but their implementation on x86 with
x86_platform.get/set_wallclock() only allows for one second precision.
This means guests may see a wallclock time that is off by up to 1
second.

Make set_wallclock() and get_wallclock() take a struct timespec
parameter (which allows for nanosecond precision) so KVM and Xen
guests may start with a more accurate wallclock time and a Xen dom0
can maintain a more accurate wallclock for guests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/include/asm/mc146818rtc.h |  4 ++--
 arch/x86/include/asm/x86_init.h    |  6 ++++--
 arch/x86/kernel/kvmclock.c         |  9 +++------
 arch/x86/kernel/rtc.c              | 17 +++++++----------
 arch/x86/lguest/boot.c             |  4 ++--
 arch/x86/platform/efi/efi.c        | 10 ++++++----
 arch/x86/xen/time.c                | 19 ++++++-------------
 include/linux/efi.h                |  4 ++--
 8 files changed, 32 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index d354fb781c57..a55c7efcc4ed 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void)
 unsigned char rtc_cmos_read(unsigned char addr);
 void rtc_cmos_write(unsigned char val, unsigned char addr);
 
-extern int mach_set_rtc_mmss(unsigned long nowtime);
-extern unsigned long mach_get_cmos_time(void);
+extern int mach_set_rtc_mmss(const struct timespec *now);
+extern void mach_get_cmos_time(struct timespec *now);
 
 #define RTC_IRQ 8
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d8d99222b36a..828a1565ba57 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,8 @@ struct x86_cpuinit_ops {
 	void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
 };
 
+struct timespec;
+
 /**
  * struct x86_platform_ops - platform specific runtime functions
  * @calibrate_tsc:		calibrate TSC
@@ -156,8 +158,8 @@ struct x86_cpuinit_ops {
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
-	unsigned long (*get_wallclock)(void);
-	int (*set_wallclock)(unsigned long nowtime);
+	void (*get_wallclock)(struct timespec *ts);
+	int (*set_wallclock)(const struct timespec *ts);
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d2c381280e3c..0db81ab511cc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock;
  * have elapsed since the hypervisor wrote the data. So we try to account for
  * that with system time
  */
-static unsigned long kvm_get_wallclock(void)
+static void kvm_get_wallclock(struct timespec *now)
 {
 	struct pvclock_vcpu_time_info *vcpu_time;
-	struct timespec ts;
 	int low, high;
 	int cpu;
 
@@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void)
 	cpu = smp_processor_id();
 
 	vcpu_time = &hv_clock[cpu].pvti;
-	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
 	preempt_enable();
-
-	return ts.tv_sec;
 }
 
-static int kvm_set_wallclock(unsigned long now)
+static int kvm_set_wallclock(const struct timespec *now)
 {
 	return -1;
 }
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 198eb201ed3b..0aa29394ed6f 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
  * jump to the next second precisely 500 ms later. Check the Motorola
  * MC146818A or Dallas DS12887 data sheet for details.
  */
-int mach_set_rtc_mmss(unsigned long nowtime)
+int mach_set_rtc_mmss(const struct timespec *now)
 {
+	unsigned long nowtime = now->tv_sec;
 	struct rtc_time tm;
 	int retval = 0;
 
@@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 	return retval;
 }
 
-unsigned long mach_get_cmos_time(void)
+void mach_get_cmos_time(struct timespec *now)
 {
 	unsigned int status, year, mon, day, hour, min, sec, century = 0;
 	unsigned long flags;
@@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void)
 	} else
 		year += CMOS_YEARS_OFFS;
 
-	return mktime(year, mon, day, hour, min, sec);
+	now->tv_sec = mktime(year, mon, day, hour, min, sec);
+	now->tv_nsec = 0;
 }
 
 /* Routines for accessing the CMOS RAM/RTC. */
@@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write);
 
 int update_persistent_clock(struct timespec now)
 {
-	return x86_platform.set_wallclock(now.tv_sec);
+	return x86_platform.set_wallclock(&now);
 }
 
 /* not static: needed by APM */
 void read_persistent_clock(struct timespec *ts)
 {
-	unsigned long retval;
-
-	retval = x86_platform.get_wallclock();
-
-	ts->tv_sec = retval;
-	ts->tv_nsec = 0;
+	x86_platform.get_wallclock(ts);
 }
 
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7114c63f047d..8424d5adcfa2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq)
  * It would be far better for everyone if the Guest had its own clock, but
  * until then the Host gives us the time on every interrupt.
  */
-static unsigned long lguest_get_wallclock(void)
+static void lguest_get_wallclock(struct timespec *now)
 {
-	return lguest_data.time.tv_sec;
+	*now = lguest_data.time;
 }
 
 /*
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 55856b2310d3..dd3b82530145 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -352,8 +352,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 	return status;
 }
 
-int efi_set_rtc_mmss(unsigned long nowtime)
+int efi_set_rtc_mmss(const struct timespec *now)
 {
+	unsigned long nowtime = now->tv_sec;
 	efi_status_t 	status;
 	efi_time_t 	eft;
 	efi_time_cap_t 	cap;
@@ -388,7 +389,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-unsigned long efi_get_time(void)
+void efi_get_time(struct timespec *now)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -398,8 +399,9 @@ unsigned long efi_get_time(void)
 	if (status != EFI_SUCCESS)
 		pr_err("Oops: efitime: can't read time!\n");
 
-	return mktime(eft.year, eft.month, eft.day, eft.hour,
-		      eft.minute, eft.second);
+	now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
+			     eft.minute, eft.second);
+	now->tv_nsec = 0;
 }
 
 /*
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3d88bfdf9e1c..a1947ac2da82 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -191,32 +191,25 @@ static void xen_read_wallclock(struct timespec *ts)
 	put_cpu_var(xen_vcpu);
 }
 
-static unsigned long xen_get_wallclock(void)
+static void xen_get_wallclock(struct timespec *now)
 {
-	struct timespec ts;
-
-	xen_read_wallclock(&ts);
-	return ts.tv_sec;
+	xen_read_wallclock(now);
 }
 
-static int xen_set_wallclock(unsigned long now)
+static int xen_set_wallclock(const struct timespec *now)
 {
 	struct xen_platform_op op;
-	int rc;
 
 	/* do nothing for domU */
 	if (!xen_initial_domain())
 		return -1;
 
 	op.cmd = XENPF_settime;
-	op.u.settime.secs = now;
-	op.u.settime.nsecs = 0;
+	op.u.settime.secs = now->tv_sec;
+	op.u.settime.nsecs = now->tv_nsec;
 	op.u.settime.system_time = xen_clocksource_read();
 
-	rc = HYPERVISOR_dom0_op(&op);
-	WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
-
-	return rc;
+	return HYPERVISOR_dom0_op(&op);
 }
 
 static struct clocksource xen_clocksource __read_mostly = {
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2bc0ad78d058..0068bba6f8b6 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -594,8 +594,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
-extern unsigned long efi_get_time(void);
-extern int efi_set_rtc_mmss(unsigned long nowtime);
+extern void efi_get_time(struct timespec *now);
+extern int efi_set_rtc_mmss(const struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
-- 
cgit v1.2.3


From 12bcbe66d7b3cc9f9f86cd02f925666eaa3c2107 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 28 May 2013 14:38:42 -0400
Subject: rcu: Add _notrace variation of rcu_dereference_raw() and
 hlist_for_each_entry_rcu()

As rcu_dereference_raw() under RCU debug config options can add quite a
bit of checks, and that tracing uses rcu_dereference_raw(), these checks
happen with the function tracer. The function tracer also happens to trace
these debug checks too. This added overhead can livelock the system.

Add a new interface to RCU for both rcu_dereference_raw_notrace() as well
as hlist_for_each_entry_rcu_notrace() as the hlist iterator uses the
rcu_dereference_raw() as well, and is used a bit with the function tracer.

Link: http://lkml.kernel.org/r/20130528184209.304356745@goodmis.org

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/rculist.h  | 20 ++++++++++++++++++++
 include/linux/rcupdate.h |  9 +++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8089e35d47ac..f4b1001a4676 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -460,6 +460,26 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
 			&(pos)->member)), typeof(*(pos)), member))
 
+/**
+ * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing)
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ *
+ * This is the same as hlist_for_each_entry_rcu() except that it does
+ * not do any RCU debugging or tracing.
+ */
+#define hlist_for_each_entry_rcu_notrace(pos, head, member)			\
+	for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\
+			typeof(*(pos)), member);			\
+		pos;							\
+		pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\
+			&(pos)->member)), typeof(*(pos)), member))
+
 /**
  * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
  * @pos:	the type * to use as a loop cursor.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 4ccd68e49b00..ddcc7826d907 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -640,6 +640,15 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
 
+/*
+ * The tracing infrastructure traces RCU (we want that), but unfortunately
+ * some of the RCU checks causes tracing to lock up the system.
+ *
+ * The tracing version of rcu_dereference_raw() must not call
+ * rcu_read_lock_held().
+ */
+#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
+
 /**
  * rcu_access_index() - fetch RCU index with no dereferencing
  * @p: The index to read
-- 
cgit v1.2.3


From 6cffe00f7d4e24679eae6b7aae4caaf915288256 Mon Sep 17 00:00:00 2001
From: Todd Poynor <toddpoynor@google.com>
Date: Wed, 15 May 2013 14:38:11 -0700
Subject: alarmtimer: Add functions for timerfd support

Add functions needed for hooking up alarmtimer to timerfd:

* alarm_restart: Similar to hrtimer_restart, restart an alarmtimer after
  the expires time has already been updated (as with alarm_forward).

* alarm_forward_now: Similar to hrtimer_forward_now, move the expires
  time forward to an interval from the current time of the associated clock.

* alarm_start_relative: Start an alarmtimer with an expires time relative to
  the current time of the associated clock.

* alarm_expires_remaining: Similar to hrtimer_expires_remaining, return the
  amount of time remaining until alarm expiry.

Signed-off-by: Todd Poynor <toddpoynor@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/alarmtimer.h |  4 ++++
 kernel/time/alarmtimer.c   | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 9069694e70eb..a899402a5a0e 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -44,10 +44,14 @@ struct alarm {
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
 int alarm_start(struct alarm *alarm, ktime_t start);
+int alarm_start_relative(struct alarm *alarm, ktime_t start);
+void alarm_restart(struct alarm *alarm);
 int alarm_try_to_cancel(struct alarm *alarm);
 int alarm_cancel(struct alarm *alarm);
 
 u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval);
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval);
+ktime_t alarm_expires_remaining(const struct alarm *alarm);
 
 /* Provide way to access the rtc device being used by alarmtimers */
 struct rtc_device *alarmtimer_get_rtcdev(void);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f11d83b12949..3e5cba274475 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -199,6 +199,12 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 
 }
 
+ktime_t alarm_expires_remaining(const struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	return ktime_sub(alarm->node.expires, base->gettime());
+}
+
 #ifdef CONFIG_RTC_CLASS
 /**
  * alarmtimer_suspend - Suspend time callback
@@ -305,7 +311,7 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 }
 
 /**
- * alarm_start - Sets an alarm to fire
+ * alarm_start - Sets an absolute alarm to fire
  * @alarm: ptr to alarm to set
  * @start: time to run the alarm
  */
@@ -324,6 +330,31 @@ int alarm_start(struct alarm *alarm, ktime_t start)
 	return ret;
 }
 
+/**
+ * alarm_start_relative - Sets a relative alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time relative to now to run the alarm
+ */
+int alarm_start_relative(struct alarm *alarm, ktime_t start)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+
+	start = ktime_add(start, base->gettime());
+	return alarm_start(alarm, start);
+}
+
+void alarm_restart(struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+	hrtimer_restart(&alarm->timer);
+	alarmtimer_enqueue(base, alarm);
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
 /**
  * alarm_try_to_cancel - Tries to cancel an alarm timer
  * @alarm: ptr to alarm to be canceled
@@ -394,6 +425,12 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 	return overrun;
 }
 
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+
+	return alarm_forward(alarm, base->gettime(), interval);
+}
 
 
-- 
cgit v1.2.3


From ac4e97abce9b80c020e7113325f49e58b7b15e3f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 30 May 2013 09:19:35 +0200
Subject: scatterlist: sg_set_buf() argument must be in linear mapping

Add a check behind CONFIG_DEBUG_SG to verify this.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/scatterlist.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 5951e3f38878..26806775b11b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -111,6 +111,9 @@ static inline struct page *sg_page(struct scatterlist *sg)
 static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 			      unsigned int buflen)
 {
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(!virt_addr_valid(buf));
+#endif
 	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
 }
 
-- 
cgit v1.2.3


From 37448adfc7ce0d6d5892b87aa8d57edde4126f49 Mon Sep 17 00:00:00 2001
From: Lance Ortiz <lance.ortiz@hp.com>
Date: Thu, 30 May 2013 08:25:12 -0600
Subject: aerdrv: Move cper_print_aer() call out of interrupt context

The following warning was seen on 3.9 when a corrected PCIe error was being
handled by the AER subsystem.

WARNING: at .../drivers/pci/search.c:214 pci_get_dev_by_id+0x8a/0x90()

This occurred because a call to pci_get_domain_bus_and_slot() was added to
cper_print_pcie() to setup for the call to cper_print_aer().  The warning
showed up because cper_print_pcie() is called in an interrupt context and
pci_get* functions are not supposed to be called in that context.

The solution is to move the cper_print_aer() call out of the interrupt
context and into aer_recover_work_func() to avoid any warnings when calling
pci_get* functions.

Signed-off-by: Lance Ortiz <lance.ortiz@hp.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/cper.c               | 18 ------------------
 drivers/acpi/apei/ghes.c               |  4 +++-
 drivers/pci/pcie/aer/aerdrv_core.c     |  5 ++++-
 drivers/pci/pcie/aer/aerdrv_errprint.c |  4 ++--
 include/linux/aer.h                    |  5 +++--
 5 files changed, 12 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index fefc2ca7cc3e..33dc6a004802 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -250,10 +250,6 @@ static const char *cper_pcie_port_type_strs[] = {
 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 			    const struct acpi_hest_generic_data *gdata)
 {
-#ifdef CONFIG_ACPI_APEI_PCIEAER
-	struct pci_dev *dev;
-#endif
-
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -285,20 +281,6 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 		printk(
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
-#ifdef CONFIG_ACPI_APEI_PCIEAER
-	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
-			pcie->device_id.bus, pcie->device_id.function);
-	if (!dev) {
-		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
-			pcie->device_id.segment, pcie->device_id.bus,
-			pcie->device_id.slot, pcie->device_id.function);
-		return;
-	}
-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
-		cper_print_aer(pfx, dev, gdata->error_severity,
-				(struct aer_capability_regs *) pcie->aer_info);
-	pci_dev_put(dev);
-#endif
 }
 
 static const char *apei_estatus_section_flag_strs[] = {
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index d668a8ae602b..403baf4dffc1 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -454,7 +454,9 @@ static void ghes_do_proc(struct ghes *ghes,
 				aer_severity = cper_severity_to_aer(sev);
 				aer_recover_queue(pcie_err->device_id.segment,
 						  pcie_err->device_id.bus,
-						  devfn, aer_severity);
+						  devfn, aer_severity,
+						  (struct aer_capability_regs *)
+						  pcie_err->aer_info);
 			}
 
 		}
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 8ec8b4f48560..0f4554e48cc5 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -580,6 +580,7 @@ struct aer_recover_entry
 	u8	devfn;
 	u16	domain;
 	int	severity;
+	struct aer_capability_regs *regs;
 };
 
 static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
@@ -593,7 +594,7 @@ static DEFINE_SPINLOCK(aer_recover_ring_lock);
 static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
 
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
-		       int severity)
+		       int severity, struct aer_capability_regs *aer_regs)
 {
 	unsigned long flags;
 	struct aer_recover_entry entry = {
@@ -601,6 +602,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 		.devfn		= devfn,
 		.domain		= domain,
 		.severity	= severity,
+		.regs		= aer_regs,
 	};
 
 	spin_lock_irqsave(&aer_recover_ring_lock, flags);
@@ -627,6 +629,7 @@ static void aer_recover_work_func(struct work_struct *work)
 			       PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
 			continue;
 		}
+		cper_print_aer(pdev, entry.severity, entry.regs);
 		do_recovery(pdev, entry.severity);
 		pci_dev_put(pdev);
 	}
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 5ab14251839d..2c7c9f5f592c 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -220,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
+void cper_print_aer(struct pci_dev *dev, int cper_severity,
 		    struct aer_capability_regs *aer)
 {
 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -244,7 +244,7 @@ void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
 	agent = AER_GET_AGENT(aer_severity, status);
 	dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
 	       status, mask);
-	cper_print_bits(prefix, status, status_strs, status_strs_size);
+	cper_print_bits("", status, status_strs, status_strs_size);
 	dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
 	       aer_error_layer[layer], aer_agent_string[agent]);
 	if (aer_severity != AER_CORRECTABLE)
diff --git a/include/linux/aer.h b/include/linux/aer.h
index ec10e1b24c1c..737f90ab4b62 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,10 +49,11 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
-extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
+extern void cper_print_aer(struct pci_dev *dev,
 			   int cper_severity, struct aer_capability_regs *aer);
 extern int cper_severity_to_aer(int cper_severity);
 extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
-			      int severity);
+			      int severity,
+			      struct aer_capability_regs *aer_regs);
 #endif //_AER_H_
 
-- 
cgit v1.2.3


From 45eacc692771bd2b1ea3d384e6345cab3da10861 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 15 May 2013 22:16:32 +0200
Subject: vtime: Use consistent clocks among nohz accounting

While computing the cputime delta of dynticks CPUs,
we are mixing up clocks of differents natures:

* local_clock() which takes care of unstable clock
sources and fix these if needed.

* sched_clock() which is the weaker version of
local_clock(). It doesn't compute any fixup in case
of unstable source.

If the clock source is stable, those two clocks are the
same and we can safely compute the difference against
two random points.

Otherwise it results in random deltas as sched_clock()
can randomly drift away, back or forward, from local_clock().

As a consequence, some strange behaviour with unstable tsc
has been observed such as non progressing constant zero cputime.
(The 'top' command showing no load).

Fix this by only using local_clock(), or its irq safe/remote
equivalent, in vtime code.

Reported-by: Mike Galbraith <efault@gmx.de>
Suggested-by: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vtime.h  | 4 ++--
 kernel/sched/core.c    | 2 +-
 kernel/sched/cputime.c | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 71a5782d8c59..b1dd2db80076 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -34,7 +34,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
 }
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
-extern void vtime_init_idle(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk, int cpu);
 #else
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
@@ -45,7 +45,7 @@ static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
 static inline void vtime_guest_exit(struct task_struct *tsk) { }
-static inline void vtime_init_idle(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 58453b8272fd..e1a27f918723 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4745,7 +4745,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
-	vtime_init_idle(idle);
+	vtime_init_idle(idle, cpu);
 #if defined(CONFIG_SMP)
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index cc2dc3eea8a3..b5ccba22603b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -747,17 +747,17 @@ void arch_vtime_task_switch(struct task_struct *prev)
 
 	write_seqlock(&current->vtime_seqlock);
 	current->vtime_snap_whence = VTIME_SYS;
-	current->vtime_snap = sched_clock();
+	current->vtime_snap = sched_clock_cpu(smp_processor_id());
 	write_sequnlock(&current->vtime_seqlock);
 }
 
-void vtime_init_idle(struct task_struct *t)
+void vtime_init_idle(struct task_struct *t, int cpu)
 {
 	unsigned long flags;
 
 	write_seqlock_irqsave(&t->vtime_seqlock, flags);
 	t->vtime_snap_whence = VTIME_SYS;
-	t->vtime_snap = sched_clock();
+	t->vtime_snap = sched_clock_cpu(cpu);
 	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
 }
 
-- 
cgit v1.2.3


From 521921bad1192fb1b8f9b6a5aa673635848b8b5f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 May 2013 01:21:38 +0200
Subject: kvm: Move guest entry/exit APIs to context_tracking

The kvm_host.h header file doesn't handle well
inclusion when archs don't support KVM.

This results in build crashes for such archs when they
want to implement context tracking because this subsystem
includes kvm_host.h in order to implement the
guest_enter/exit APIs but it doesn't handle KVM off case.

To fix this, move the guest_enter()/guest_exit()
declarations and generic implementation to the context
tracking headers. These generic APIs actually belong to
this subsystem, besides other domains boundary tracking
like user_enter() et al.

KVM now properly becomes a user of this library, not the
other buggy way around.

Reported-by: Kevin Hilman <khilman@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/context_tracking.h | 35 +++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h         | 37 +------------------------------------
 kernel/context_tracking.c        |  1 -
 3 files changed, 36 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 365f4a61bf04..fc09d7b0dacf 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,6 +3,7 @@
 
 #include <linux/sched.h>
 #include <linux/percpu.h>
+#include <linux/vtime.h>
 #include <asm/ptrace.h>
 
 struct context_tracking {
@@ -19,6 +20,26 @@ struct context_tracking {
 	} state;
 };
 
+static inline void __guest_enter(void)
+{
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
+	current->flags |= PF_VCPU;
+}
+
+static inline void __guest_exit(void)
+{
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
+	current->flags &= ~PF_VCPU;
+}
+
 #ifdef CONFIG_CONTEXT_TRACKING
 DECLARE_PER_CPU(struct context_tracking, context_tracking);
 
@@ -35,6 +56,9 @@ static inline bool context_tracking_active(void)
 extern void user_enter(void);
 extern void user_exit(void);
 
+extern void guest_enter(void);
+extern void guest_exit(void);
+
 static inline enum ctx_state exception_enter(void)
 {
 	enum ctx_state prev_ctx;
@@ -57,6 +81,17 @@ extern void context_tracking_task_switch(struct task_struct *prev,
 static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+
+static inline void guest_enter(void)
+{
+	__guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+	__guest_exit();
+}
+
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f0eea07d2c2b..8db53cfaccdb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,6 +23,7 @@
 #include <linux/ratelimit.h>
 #include <linux/err.h>
 #include <linux/irqflags.h>
+#include <linux/context_tracking.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -760,42 +761,6 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 }
 #endif
 
-static inline void __guest_enter(void)
-{
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system(current);
-	current->flags |= PF_VCPU;
-}
-
-static inline void __guest_exit(void)
-{
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system(current);
-	current->flags &= ~PF_VCPU;
-}
-
-#ifdef CONFIG_CONTEXT_TRACKING
-extern void guest_enter(void);
-extern void guest_exit(void);
-
-#else /* !CONFIG_CONTEXT_TRACKING */
-static inline void guest_enter(void)
-{
-	__guest_enter();
-}
-
-static inline void guest_exit(void)
-{
-	__guest_exit();
-}
-#endif /* !CONFIG_CONTEXT_TRACKING */
-
 static inline void kvm_guest_enter(void)
 {
 	unsigned long flags;
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f07b878..85bdde1137eb 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -15,7 +15,6 @@
  */
 
 #include <linux/context_tracking.h>
-#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
-- 
cgit v1.2.3


From 1e2bd517c108816220f262d7954b697af03b5f9c Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 30 May 2013 06:45:27 +0000
Subject: udp6: Fix udp fragmentation for tunnel traffic.

udp6 over GRE tunnel does not work after to GRE tso changes. GRE
tso handler passes inner packet but keeps track of outer header
start in SKB_GSO_CB(skb)->mac_offset.  udp6 fragment need to
take care of outer header, which start at the mac_offset, while
adding fragment header.
This bug is introduced by commit 68c3316311 (GRE: Add TCP
segmentation offload for GRE).

Reported-by: Dmitry Kravkov <dkravkov@gmail.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Tested-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/ipv6/udp_offload.c | 20 ++++++++++++--------
 2 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e0ced1af3b1..9c676eae3968 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2852,6 +2852,21 @@ static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
 		SKB_GSO_CB(inner_skb)->mac_offset;
 }
 
+static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
+{
+	int new_headroom, headroom;
+	int ret;
+
+	headroom = skb_headroom(skb);
+	ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
+	if (ret)
+		return ret;
+
+	new_headroom = skb_headroom(skb);
+	SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
+	return 0;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 3bb3a891a424..d3cfaf9c7a08 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -46,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	unsigned int mss;
 	unsigned int unfrag_ip6hlen, unfrag_len;
 	struct frag_hdr *fptr;
-	u8 *mac_start, *prevhdr;
+	u8 *packet_start, *prevhdr;
 	u8 nexthdr;
 	u8 frag_hdr_sz = sizeof(struct frag_hdr);
 	int offset;
 	__wsum csum;
+	int tnl_hlen;
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
@@ -83,9 +84,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Check if there is enough headroom to insert fragment header. */
-	if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
-	    pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
-		goto out;
+	tnl_hlen = skb_tnl_header_len(skb);
+	if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+		if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+			goto out;
+	}
 
 	/* Find the unfragmentable header and shift it left by frag_hdr_sz
 	 * bytes to insert fragment header.
@@ -93,11 +96,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 	*prevhdr = NEXTHDR_FRAGMENT;
-	unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
-		     unfrag_ip6hlen;
-	mac_start = skb_mac_header(skb);
-	memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+	unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+		     unfrag_ip6hlen + tnl_hlen;
+	packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+	memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
 
+	SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
 	skb->mac_header -= frag_hdr_sz;
 	skb->network_header -= frag_hdr_sz;
 
-- 
cgit v1.2.3


From 6d7581e62f8be462440d7b22c6361f7c9fa4902b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 29 May 2013 05:02:56 +0000
Subject: list: introduce list_first_entry_or_null

non-rcu variant of list_first_or_null_rcu

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 6a1f8df9144b..b83e5657365a 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -361,6 +361,17 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_first_entry(ptr, type, member) \
 	list_entry((ptr)->next, type, member)
 
+/**
+ * list_first_entry_or_null - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note that if the list is empty, it returns NULL.
+ */
+#define list_first_entry_or_null(ptr, type, member) \
+	(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
-- 
cgit v1.2.3


From c87a124a5d5e8cf8e21c4363c3372bcaf53ea190 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 29 May 2013 09:06:27 +0000
Subject: net: force a reload of first item in hlist_nulls_for_each_entry_rcu

Roman Gushchin discovered that udp4_lib_lookup2() was not reloading
first item in the rcu protected list, in case the loop was restarted.

This produced soft lockups as in https://lkml.org/lkml/2013/4/16/37

rcu_dereference(X)/ACCESS_ONCE(X) seem to not work as intended if X is
ptr->field :

In some cases, gcc caches the value or ptr->field in a register.

Use a barrier() to disallow such caching, as documented in
Documentation/atomic_ops.txt line 114

Thanks a lot to Roman for providing analysis and numerous patches.

Diagnosed-by: Roman Gushchin <klamm@yandex-team.ru>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Boris Zhmurov <zhmurov@yandex-team.ru>
Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist_nulls.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 2ae13714828b..1c33dd7da4a7 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -105,9 +105,14 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  * @head:	the head for your list.
  * @member:	the name of the hlist_nulls_node within the struct.
  *
+ * The barrier() is needed to make sure compiler doesn't cache first element [1],
+ * as this loop can be restarted [2]
+ * [1] Documentation/atomic_ops.txt around line 114
+ * [2] Documentation/RCU/rculist_nulls.txt around line 146
  */
 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member)			\
-	for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+	for (({barrier();}),							\
+	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
 		(!is_a_nulls(pos)) &&						\
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
-- 
cgit v1.2.3


From a7526eb5d06b0084ef12d7b168d008fcf516caab Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 5 Jun 2013 19:38:26 +0000
Subject: net: Unbreak compat_sys_{send,recv}msg

I broke them in this commit:

    commit 1be374a0518a288147c6a7398792583200a67261
    Author: Andy Lutomirski <luto@amacapital.net>
    Date:   Wed May 22 14:07:44 2013 -0700

        net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg

This patch adds __sys_sendmsg and __sys_sendmsg as common helpers that accept
MSG_CMSG_COMPAT and blocks MSG_CMSG_COMPAT at the syscall entrypoints.  It
also reverts some unnecessary checks in sys_socketcall.

Apparently I was suffering from underscore blindness the first time around.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Tested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h |  3 +++
 net/compat.c           | 13 +++++++--
 net/socket.c           | 72 +++++++++++++++++++++++---------------------------
 3 files changed, 47 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 33bf2dfab19d..b10ce4b341ea 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -320,6 +320,9 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
 struct timespec;
 
+/* The __sys_...msg variants allow MSG_CMSG_COMPAT */
+extern long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
+extern long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
diff --git a/net/compat.c b/net/compat.c
index 79ae88485001..f0a1ba6c8086 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -734,19 +734,25 @@ static unsigned char nas[21] = {
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
 {
-	return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+	return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned int vlen, unsigned int flags)
 {
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
 	return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 			      flags | MSG_CMSG_COMPAT);
 }
 
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
 {
-	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+	return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
 asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags)
@@ -768,6 +774,9 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 	int datagrams;
 	struct timespec ktspec;
 
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+
 	if (COMPAT_USE_64BIT_TIME)
 		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				      flags | MSG_CMSG_COMPAT,
diff --git a/net/socket.c b/net/socket.c
index 9ff6366fee13..4ca1526db756 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1956,7 +1956,7 @@ struct used_address {
 	unsigned int name_len;
 };
 
-static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags,
 			 struct used_address *used_address)
 {
@@ -2071,26 +2071,30 @@ out:
  *	BSD sendmsg interface
  */
 
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 {
 	int fput_needed, err;
 	struct msghdr msg_sys;
 	struct socket *sock;
 
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
-
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
 
-	err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
+	err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
 
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+{
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+	return __sys_sendmsg(fd, msg, flags);
+}
+
 /*
  *	Linux sendmmsg interface
  */
@@ -2121,15 +2125,16 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 
 	while (datagrams < vlen) {
 		if (MSG_CMSG_COMPAT & flags) {
-			err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
-					    &msg_sys, flags, &used_address);
+			err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+					     &msg_sys, flags, &used_address);
 			if (err < 0)
 				break;
 			err = __put_user(err, &compat_entry->msg_len);
 			++compat_entry;
 		} else {
-			err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
-					    &msg_sys, flags, &used_address);
+			err = ___sys_sendmsg(sock,
+					     (struct msghdr __user *)entry,
+					     &msg_sys, flags, &used_address);
 			if (err < 0)
 				break;
 			err = put_user(err, &entry->msg_len);
@@ -2158,7 +2163,7 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
 	return __sys_sendmmsg(fd, mmsg, vlen, flags);
 }
 
-static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
@@ -2250,27 +2255,31 @@ out:
  *	BSD recvmsg interface
  */
 
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
-		unsigned int, flags)
+long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
 {
 	int fput_needed, err;
 	struct msghdr msg_sys;
 	struct socket *sock;
 
-	if (flags & MSG_CMSG_COMPAT)
-		return -EINVAL;
-
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
 
-	err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+	err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
 
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
+SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+		unsigned int, flags)
+{
+	if (flags & MSG_CMSG_COMPAT)
+		return -EINVAL;
+	return __sys_recvmsg(fd, msg, flags);
+}
+
 /*
  *     Linux recvmmsg interface
  */
@@ -2308,17 +2317,18 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 		 * No need to ask LSM for more than the first datagram.
 		 */
 		if (MSG_CMSG_COMPAT & flags) {
-			err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
-					    &msg_sys, flags & ~MSG_WAITFORONE,
-					    datagrams);
+			err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+					     &msg_sys, flags & ~MSG_WAITFORONE,
+					     datagrams);
 			if (err < 0)
 				break;
 			err = __put_user(err, &compat_entry->msg_len);
 			++compat_entry;
 		} else {
-			err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
-					    &msg_sys, flags & ~MSG_WAITFORONE,
-					    datagrams);
+			err = ___sys_recvmsg(sock,
+					     (struct msghdr __user *)entry,
+					     &msg_sys, flags & ~MSG_WAITFORONE,
+					     datagrams);
 			if (err < 0)
 				break;
 			err = put_user(err, &entry->msg_len);
@@ -2505,31 +2515,15 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 				   (int __user *)a[4]);
 		break;
 	case SYS_SENDMSG:
-		if (a[2] & MSG_CMSG_COMPAT) {
-			err = -EINVAL;
-			break;
-		}
 		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
 	case SYS_SENDMMSG:
-		if (a[3] & MSG_CMSG_COMPAT) {
-			err = -EINVAL;
-			break;
-		}
 		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
 		break;
 	case SYS_RECVMSG:
-		if (a[2] & MSG_CMSG_COMPAT) {
-			err = -EINVAL;
-			break;
-		}
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
 	case SYS_RECVMMSG:
-		if (a[3] & MSG_CMSG_COMPAT) {
-			err = -EINVAL;
-			break;
-		}
 		err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
 				   (struct timespec __user *)a[4]);
 		break;
-- 
cgit v1.2.3


From d62840995a99c9766803d54e9d7923f247a1c1db Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 22 May 2013 02:41:36 -0700
Subject: trace: Allow idle-safe tracepoints to be called from irq

__DECLARE_TRACE_RCU() currently creates an _rcuidle() tracepoint which
may safely be invoked from what RCU considers to be an idle CPU.
However, these _rcuidle() tracepoints may -not- be invoked from the
handler of an irq taken from idle, because rcu_idle_enter() zeroes
RCU's nesting-level counter, so that the rcu_irq_exit() returning to
idle will trigger a WARN_ON_ONCE().

This commit therefore substitutes rcu_irq_enter() for rcu_idle_exit()
and rcu_irq_exit() for rcu_idle_enter() in order to make the _rcuidle()
tracepoints usable from irq handlers as well as from process context.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2f322c38bd4d..f8e084d0fc77 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -145,8 +145,8 @@ static inline void tracepoint_synchronize_unregister(void)
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
 				TP_CONDITION(cond),			\
-				rcu_idle_exit(),			\
-				rcu_idle_enter());			\
+				rcu_irq_enter(),			\
+				rcu_irq_exit());			\
 	}
 #else
 #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
-- 
cgit v1.2.3


From ed13998c319b050fc9abdb73915859dfdbe1fb38 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 5 Jun 2013 15:30:55 +0200
Subject: sock_diag: fix filter code sent to userspace

Filters need to be translated to real BPF code for userland, like SO_GETFILTER.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 1 +
 net/core/filter.c      | 2 +-
 net/core/sock_diag.c   | 9 +++++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c050dcc322a4..f65f5a69db8f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -46,6 +46,7 @@ extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
 extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len);
+extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index dad2a178f9f8..6438f29ff266 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -778,7 +778,7 @@ int sk_detach_filter(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
 
-static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 {
 	static const u16 decodes[] = {
 		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index d5bef0b0f639..a0e9cf6379de 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -73,8 +73,13 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
 		goto out;
 	}
 
-	if (filter)
-		memcpy(nla_data(attr), filter->insns, len);
+	if (filter) {
+		struct sock_filter *fb = (struct sock_filter *)nla_data(attr);
+		int i;
+
+		for (i = 0; i < filter->len; i++, fb++)
+			sk_decode_filter(&filter->insns[i], fb);
+	}
 
 out:
 	rcu_read_unlock();
-- 
cgit v1.2.3


From b79462a8b9f9a452edc20c64a70a89ba3b0a6a88 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sat, 8 Jun 2013 15:00:55 +0200
Subject: team: fix checks in team_get_first_port_txable_rcu()

should be checked if "cur" is txable, not "port".

Introduced by commit 6e88e1357c "team: use function team_port_txable()
for determing enabled and up port"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_team.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 4474557904f6..16fae6436d0e 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -249,12 +249,12 @@ team_get_first_port_txable_rcu(struct team *team, struct team_port *port)
 		return port;
 	cur = port;
 	list_for_each_entry_continue_rcu(cur, &team->port_list, list)
-		if (team_port_txable(port))
+		if (team_port_txable(cur))
 			return cur;
 	list_for_each_entry_rcu(cur, &team->port_list, list) {
 		if (cur == port)
 			break;
-		if (team_port_txable(port))
+		if (team_port_txable(cur))
 			return cur;
 	}
 	return NULL;
-- 
cgit v1.2.3


From 38ff87f77af0b5a93fc8581cff1d6e5692ab8970 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Sat, 1 Jun 2013 23:39:40 -0700
Subject: sched_clock: Make ARM's sched_clock generic for all architectures

Nothing about the sched_clock implementation in the ARM port is
specific to the architecture. Generalize the code so that other
architectures can use it by selecting GENERIC_SCHED_CLOCK.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
[jstultz: Merge minor collisions with other patches in my tree]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm/Kconfig                          |   1 +
 arch/arm/common/timer-sp.c                |   2 +-
 arch/arm/include/asm/sched_clock.h        |  16 ---
 arch/arm/kernel/Makefile                  |   2 +-
 arch/arm/kernel/arch_timer.c              |   2 +-
 arch/arm/kernel/sched_clock.c             | 216 ------------------------------
 arch/arm/kernel/time.c                    |   4 +-
 arch/arm/mach-davinci/time.c              |   2 +-
 arch/arm/mach-imx/time.c                  |   2 +-
 arch/arm/mach-integrator/integrator_ap.c  |   2 +-
 arch/arm/mach-ixp4xx/common.c             |   2 +-
 arch/arm/mach-mmp/time.c                  |   2 +-
 arch/arm/mach-msm/timer.c                 |   2 +-
 arch/arm/mach-omap1/time.c                |   2 +-
 arch/arm/mach-omap2/timer.c               |   2 +-
 arch/arm/mach-pxa/time.c                  |   2 +-
 arch/arm/mach-sa1100/time.c               |   2 +-
 arch/arm/mach-u300/timer.c                |   2 +-
 arch/arm/plat-iop/time.c                  |   2 +-
 arch/arm/plat-omap/counter_32k.c          |   2 +-
 arch/arm/plat-orion/time.c                |   2 +-
 arch/arm/plat-samsung/samsung-time.c      |   2 +-
 arch/arm/plat-versatile/sched-clock.c     |   2 +-
 drivers/clocksource/bcm2835_timer.c       |   2 +-
 drivers/clocksource/clksrc-dbx500-prcmu.c |   3 +-
 drivers/clocksource/dw_apb_timer_of.c     |   3 +-
 drivers/clocksource/mxs_timer.c           |   2 +-
 drivers/clocksource/nomadik-mtu.c         |   2 +-
 drivers/clocksource/samsung_pwm_timer.c   |   2 +-
 drivers/clocksource/tegra20_timer.c       |   2 +-
 drivers/clocksource/time-armada-370-xp.c  |   2 +-
 drivers/clocksource/timer-marco.c         |   2 +-
 drivers/clocksource/timer-prima2.c        |   2 +-
 include/linux/sched_clock.h               |  21 +++
 init/Kconfig                              |   3 +
 init/main.c                               |   2 +
 kernel/time/Makefile                      |   1 +
 kernel/time/sched_clock.c                 | 215 +++++++++++++++++++++++++++++
 38 files changed, 273 insertions(+), 266 deletions(-)
 delete mode 100644 arch/arm/include/asm/sched_clock.h
 delete mode 100644 arch/arm/kernel/sched_clock.c
 create mode 100644 include/linux/sched_clock.h
 create mode 100644 kernel/time/sched_clock.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 49d993cee512..53d3a356f61f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,7 @@ config ARM
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index ddc740769601..023ee63827a2 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -28,8 +28,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/timer-sp.h>
 
diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h
deleted file mode 100644
index 3d520ddca61b..000000000000
--- a/arch/arm/include/asm/sched_clock.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * sched_clock.h: support for extending counters to full 64-bit ns counter
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef ASM_SCHED_CLOCK
-#define ASM_SCHED_CLOCK
-
-extern void sched_clock_postinit(void);
-extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
-
-extern unsigned long long (*sched_clock_func)(void);
-
-#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338eacad2..97cb0576d07c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.
 
 obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o return_address.o sched_clock.o \
+		   process.o ptrace.o return_address.o \
 		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
 
 obj-$(CONFIG_ATAGS)		+= atags_parse.o
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index 59dcdced6e30..221f07b11ccb 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -11,9 +11,9 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/sched_clock.h>
 
 #include <asm/delay.h>
-#include <asm/sched_clock.h>
 
 #include <clocksource/arm_arch_timer.h>
 
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
deleted file mode 100644
index a781c59b93c0..000000000000
--- a/arch/arm/kernel/sched_clock.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/clocksource.h>
-#include <linux/init.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/sched.h>
-#include <linux/syscore_ops.h>
-#include <linux/timer.h>
-
-#include <asm/sched_clock.h>
-
-struct clock_data {
-	u64 epoch_ns;
-	u32 epoch_cyc;
-	u32 epoch_cyc_copy;
-	unsigned long rate;
-	u32 mult;
-	u32 shift;
-	bool suspended;
-};
-
-static void sched_clock_poll(unsigned long wrap_ticks);
-static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
-static int irqtime = -1;
-
-core_param(irqtime, irqtime, int, 0400);
-
-static struct clock_data cd = {
-	.mult	= NSEC_PER_SEC / HZ,
-};
-
-static u32 __read_mostly sched_clock_mask = 0xffffffff;
-
-static u32 notrace jiffy_sched_clock_read(void)
-{
-	return (u32)(jiffies - INITIAL_JIFFIES);
-}
-
-static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
-
-static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
-{
-	return (cyc * mult) >> shift;
-}
-
-static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
-{
-	u64 epoch_ns;
-	u32 epoch_cyc;
-
-	/*
-	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
-	 * ensuring that we always write epoch_cyc, epoch_ns and
-	 * epoch_cyc_copy in strict order, and read them in strict order.
-	 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
-	 * the middle of an update, and we should repeat the load.
-	 */
-	do {
-		epoch_cyc = cd.epoch_cyc;
-		smp_rmb();
-		epoch_ns = cd.epoch_ns;
-		smp_rmb();
-	} while (epoch_cyc != cd.epoch_cyc_copy);
-
-	return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift);
-}
-
-/*
- * Atomically update the sched_clock epoch.
- */
-static void notrace update_sched_clock(void)
-{
-	unsigned long flags;
-	u32 cyc;
-	u64 ns;
-
-	cyc = read_sched_clock();
-	ns = cd.epoch_ns +
-		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-			  cd.mult, cd.shift);
-	/*
-	 * Write epoch_cyc and epoch_ns in a way that the update is
-	 * detectable in cyc_to_fixed_sched_clock().
-	 */
-	raw_local_irq_save(flags);
-	cd.epoch_cyc_copy = cyc;
-	smp_wmb();
-	cd.epoch_ns = ns;
-	smp_wmb();
-	cd.epoch_cyc = cyc;
-	raw_local_irq_restore(flags);
-}
-
-static void sched_clock_poll(unsigned long wrap_ticks)
-{
-	mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
-	update_sched_clock();
-}
-
-void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
-{
-	unsigned long r, w;
-	u64 res, wrap;
-	char r_unit;
-
-	if (cd.rate > rate)
-		return;
-
-	BUG_ON(bits > 32);
-	WARN_ON(!irqs_disabled());
-	read_sched_clock = read;
-	sched_clock_mask = (1 << bits) - 1;
-	cd.rate = rate;
-
-	/* calculate the mult/shift to convert counter ticks to ns. */
-	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
-
-	r = rate;
-	if (r >= 4000000) {
-		r /= 1000000;
-		r_unit = 'M';
-	} else if (r >= 1000) {
-		r /= 1000;
-		r_unit = 'k';
-	} else
-		r_unit = ' ';
-
-	/* calculate how many ns until we wrap */
-	wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
-	do_div(wrap, NSEC_PER_MSEC);
-	w = wrap;
-
-	/* calculate the ns resolution of this counter */
-	res = cyc_to_ns(1ULL, cd.mult, cd.shift);
-	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
-		bits, r, r_unit, res, w);
-
-	/*
-	 * Start the timer to keep sched_clock() properly updated and
-	 * sets the initial epoch.
-	 */
-	sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
-	update_sched_clock();
-
-	/*
-	 * Ensure that sched_clock() starts off at 0ns
-	 */
-	cd.epoch_ns = 0;
-
-	/* Enable IRQ time accounting if we have a fast enough sched_clock */
-	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
-		enable_sched_clock_irqtime();
-
-	pr_debug("Registered %pF as sched_clock source\n", read);
-}
-
-static unsigned long long notrace sched_clock_32(void)
-{
-	u32 cyc = read_sched_clock();
-	return cyc_to_sched_clock(cyc, sched_clock_mask);
-}
-
-unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
-
-unsigned long long notrace sched_clock(void)
-{
-	if (cd.suspended)
-		return cd.epoch_ns;
-
-	return sched_clock_func();
-}
-
-void __init sched_clock_postinit(void)
-{
-	/*
-	 * If no sched_clock function has been provided at that point,
-	 * make it the final one one.
-	 */
-	if (read_sched_clock == jiffy_sched_clock_read)
-		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
-
-	sched_clock_poll(sched_clock_timer.data);
-}
-
-static int sched_clock_suspend(void)
-{
-	sched_clock_poll(sched_clock_timer.data);
-	cd.suspended = true;
-	return 0;
-}
-
-static void sched_clock_resume(void)
-{
-	cd.epoch_cyc = read_sched_clock();
-	cd.epoch_cyc_copy = cd.epoch_cyc;
-	cd.suspended = false;
-}
-
-static struct syscore_ops sched_clock_ops = {
-	.suspend = sched_clock_suspend,
-	.resume = sched_clock_resume,
-};
-
-static int __init sched_clock_syscore_init(void)
-{
-	register_syscore_ops(&sched_clock_ops);
-	return 0;
-}
-device_initcall(sched_clock_syscore_init);
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index abff4e9aaee0..98aee3258398 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -24,9 +24,9 @@
 #include <linux/timer.h>
 #include <linux/clocksource.h>
 #include <linux/irq.h>
+#include <linux/sched_clock.h>
 
 #include <asm/thread_info.h>
-#include <asm/sched_clock.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
@@ -120,6 +120,4 @@ void __init time_init(void)
 		machine_desc->init_time();
 	else
 		clocksource_of_init();
-
-	sched_clock_postinit();
 }
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index bad361ec1666..7a55b5c95971 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -18,8 +18,8 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index fea91313678b..cd46529e9eaa 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -26,8 +26,8 @@
 #include <linux/clockchips.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index b23c8e4f28e8..aa4346227c41 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -41,6 +41,7 @@
 #include <linux/stat.h>
 #include <linux/sys_soc.h>
 #include <linux/termios.h>
+#include <linux/sched_clock.h>
 #include <video/vga.h>
 
 #include <mach/hardware.h>
@@ -49,7 +50,6 @@
 #include <asm/setup.h>
 #include <asm/param.h>		/* HZ */
 #include <asm/mach-types.h>
-#include <asm/sched_clock.h>
 
 #include <mach/lm.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 6600cff6bd92..58307cff1f18 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -30,6 +30,7 @@
 #include <linux/export.h>
 #include <linux/gpio.h>
 #include <linux/cpu.h>
+#include <linux/sched_clock.h>
 
 #include <mach/udc.h>
 #include <mach/hardware.h>
@@ -38,7 +39,6 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/irq.h>
-#include <asm/sched_clock.h>
 #include <asm/system_misc.h>
 
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 86a18b3d252e..7ac41e83cfef 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -28,8 +28,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <mach/addr-map.h>
 #include <mach/regs-timers.h>
 #include <mach/regs-apbc.h>
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 284313f3e02c..b6418fd5fe0d 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -23,10 +23,10 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
 #include <asm/localtimer.h>
-#include <asm/sched_clock.h>
 
 #include "common.h"
 
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 726ec23d29c7..80603d2fef77 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -43,9 +43,9 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/io.h>
+#include <linux/sched_clock.h>
 
 #include <asm/irq.h>
-#include <asm/sched_clock.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/irq.h>
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index f8b23b8040d9..4c069b0cab21 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -41,10 +41,10 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/dmtimer-omap.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
 #include <asm/smp_twd.h>
-#include <asm/sched_clock.h>
 
 #include "omap_hwmod.h"
 #include "omap_device.h"
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 8f1ee92aea30..9aa852a8fab9 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -16,11 +16,11 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/sched_clock.h>
 
 #include <asm/div64.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
-#include <asm/sched_clock.h>
 #include <mach/regs-ost.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index a59a13a665a6..713c86cd3d64 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -14,9 +14,9 @@
 #include <linux/irq.h>
 #include <linux/timex.h>
 #include <linux/clockchips.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
-#include <asm/sched_clock.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index d9e73209c9b8..af771b76fe1c 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -18,12 +18,12 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/irq.h>
+#include <linux/sched_clock.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 
 /* Generic stuff */
-#include <asm/sched_clock.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index 837a2d52e9db..29606bd75f3f 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -22,9 +22,9 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/export.h>
+#include <linux/sched_clock.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
-#include <asm/sched_clock.h>
 #include <asm/uaccess.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 5b0b86bb34bb..d9bc98eb2a6b 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -18,9 +18,9 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/clocksource.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
-#include <asm/sched_clock.h>
 
 #include <plat/counter-32k.h>
 
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 5d5ac0f05422..9d2b2ac74938 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -16,7 +16,7 @@
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <asm/sched_clock.h>
+#include <linux/sched_clock.h>
 
 /*
  * MBus bridge block registers.
diff --git a/arch/arm/plat-samsung/samsung-time.c b/arch/arm/plat-samsung/samsung-time.c
index f899cbc9b288..2957075ca836 100644
--- a/arch/arm/plat-samsung/samsung-time.c
+++ b/arch/arm/plat-samsung/samsung-time.c
@@ -15,12 +15,12 @@
 #include <linux/clk.h>
 #include <linux/clockchips.h>
 #include <linux/platform_device.h>
+#include <linux/sched_clock.h>
 
 #include <asm/smp_twd.h>
 #include <asm/mach/time.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/sched_clock.h>
 
 #include <mach/map.h>
 #include <plat/devs.h>
diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c
index b33b74c87232..51b109e3b6c3 100644
--- a/arch/arm/plat-versatile/sched-clock.c
+++ b/arch/arm/plat-versatile/sched-clock.c
@@ -20,8 +20,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <plat/sched_clock.h>
 
 static void __iomem *ctr;
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index 766611d29945..07ea7ce900dc 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -28,8 +28,8 @@
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <asm/irq.h>
 
 #define REG_CONTROL	0x00
diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c
index 54f3d119d99c..0a7fb2440e29 100644
--- a/drivers/clocksource/clksrc-dbx500-prcmu.c
+++ b/drivers/clocksource/clksrc-dbx500-prcmu.c
@@ -14,8 +14,7 @@
  */
 #include <linux/clockchips.h>
 #include <linux/clksrc-dbx500-prcmu.h>
-
-#include <asm/sched_clock.h>
+#include <linux/sched_clock.h>
 
 #define RATE_32K		32768
 
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index af13b8559b61..a97b4065dacf 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -20,8 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-
-#include <asm/sched_clock.h>
+#include <linux/sched_clock.h>
 
 static void timer_get_base_and_rate(struct device_node *np,
 				    void __iomem **base, u32 *rate)
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index 02af4204af86..0f5e65f74dc3 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -29,9 +29,9 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/stmp_device.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
-#include <asm/sched_clock.h>
 
 /*
  * There are 2 versions of the timrot on Freescale MXS-based SoCs.
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index e405531e1cc5..8864c17841c8 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -18,8 +18,8 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/platform_data/clocksource-nomadik-mtu.h>
+#include <linux/sched_clock.h>
 #include <asm/mach/time.h>
-#include <asm/sched_clock.h>
 
 /*
  * The MTU device hosts four different counters, with 4 set of
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 0234c8d2c8f2..584b5472eea3 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -21,10 +21,10 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/sched_clock.h>
 
 #include <clocksource/samsung_pwm.h>
 
-#include <asm/sched_clock.h>
 
 /*
  * Clocksource driver
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index ae877b021b54..93961703b887 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -26,10 +26,10 @@
 #include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched_clock.h>
 
 #include <asm/mach/time.h>
 #include <asm/smp_twd.h>
-#include <asm/sched_clock.h>
 
 #define RTC_SECONDS            0x08
 #define RTC_SHADOW_SECONDS     0x0c
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index 47a673070d70..efdca3263afe 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -27,8 +27,8 @@
 #include <linux/of_address.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/sched_clock.h>
 
-#include <asm/sched_clock.h>
 #include <asm/localtimer.h>
 #include <linux/percpu.h>
 /*
diff --git a/drivers/clocksource/timer-marco.c b/drivers/clocksource/timer-marco.c
index 97738dbf3e3b..e5dc9129ca26 100644
--- a/drivers/clocksource/timer-marco.c
+++ b/drivers/clocksource/timer-marco.c
@@ -17,7 +17,7 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <asm/sched_clock.h>
+#include <linux/sched_clock.h>
 #include <asm/localtimer.h>
 #include <asm/mach/time.h>
 
diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c
index 760882665d7a..ef3cfb269d8b 100644
--- a/drivers/clocksource/timer-prima2.c
+++ b/drivers/clocksource/timer-prima2.c
@@ -18,7 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <asm/sched_clock.h>
+#include <linux/sched_clock.h>
 #include <asm/mach/time.h>
 
 #define SIRFSOC_TIMER_COUNTER_LO	0x0000
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
new file mode 100644
index 000000000000..fa7922c80a41
--- /dev/null
+++ b/include/linux/sched_clock.h
@@ -0,0 +1,21 @@
+/*
+ * sched_clock.h: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_SCHED_CLOCK
+#define LINUX_SCHED_CLOCK
+
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern void sched_clock_postinit(void);
+#else
+static inline void sched_clock_postinit(void) { }
+#endif
+
+extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+
+extern unsigned long long (*sched_clock_func)(void);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 9d3a7887a6d3..1a3f93329a67 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -757,6 +757,9 @@ config LOG_BUF_SHIFT
 config HAVE_UNSTABLE_SCHED_CLOCK
 	bool
 
+config GENERIC_SCHED_CLOCK
+	bool
+
 #
 # For architectures that want to enable the support for NUMA-affine scheduler
 # balancing logic:
diff --git a/init/main.c b/init/main.c
index 9484f4ba88d0..bef4a6ac7c76 100644
--- a/init/main.c
+++ b/init/main.c
@@ -74,6 +74,7 @@
 #include <linux/ptrace.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
+#include <linux/sched_clock.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -555,6 +556,7 @@ asmlinkage void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	sched_clock_postinit();
 	profile_init();
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index d52ac8bf0006..9250130646f5 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -4,6 +4,7 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= tick-broadcast.o
+obj-$(CONFIG_GENERIC_SCHED_CLOCK)		+= sched_clock.o
 obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o
 obj-$(CONFIG_TICK_ONESHOT)			+= tick-sched.o
 obj-$(CONFIG_TIMER_STATS)			+= timer_stats.o
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
new file mode 100644
index 000000000000..aad1ae6077ef
--- /dev/null
+++ b/kernel/time/sched_clock.c
@@ -0,0 +1,215 @@
+/*
+ * sched_clock.c: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/syscore_ops.h>
+#include <linux/timer.h>
+#include <linux/sched_clock.h>
+
+struct clock_data {
+	u64 epoch_ns;
+	u32 epoch_cyc;
+	u32 epoch_cyc_copy;
+	unsigned long rate;
+	u32 mult;
+	u32 shift;
+	bool suspended;
+};
+
+static void sched_clock_poll(unsigned long wrap_ticks);
+static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
+static int irqtime = -1;
+
+core_param(irqtime, irqtime, int, 0400);
+
+static struct clock_data cd = {
+	.mult	= NSEC_PER_SEC / HZ,
+};
+
+static u32 __read_mostly sched_clock_mask = 0xffffffff;
+
+static u32 notrace jiffy_sched_clock_read(void)
+{
+	return (u32)(jiffies - INITIAL_JIFFIES);
+}
+
+static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+
+static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+	return (cyc * mult) >> shift;
+}
+
+static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask)
+{
+	u64 epoch_ns;
+	u32 epoch_cyc;
+
+	/*
+	 * Load the epoch_cyc and epoch_ns atomically.  We do this by
+	 * ensuring that we always write epoch_cyc, epoch_ns and
+	 * epoch_cyc_copy in strict order, and read them in strict order.
+	 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
+	 * the middle of an update, and we should repeat the load.
+	 */
+	do {
+		epoch_cyc = cd.epoch_cyc;
+		smp_rmb();
+		epoch_ns = cd.epoch_ns;
+		smp_rmb();
+	} while (epoch_cyc != cd.epoch_cyc_copy);
+
+	return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift);
+}
+
+/*
+ * Atomically update the sched_clock epoch.
+ */
+static void notrace update_sched_clock(void)
+{
+	unsigned long flags;
+	u32 cyc;
+	u64 ns;
+
+	cyc = read_sched_clock();
+	ns = cd.epoch_ns +
+		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+			  cd.mult, cd.shift);
+	/*
+	 * Write epoch_cyc and epoch_ns in a way that the update is
+	 * detectable in cyc_to_fixed_sched_clock().
+	 */
+	raw_local_irq_save(flags);
+	cd.epoch_cyc_copy = cyc;
+	smp_wmb();
+	cd.epoch_ns = ns;
+	smp_wmb();
+	cd.epoch_cyc = cyc;
+	raw_local_irq_restore(flags);
+}
+
+static void sched_clock_poll(unsigned long wrap_ticks)
+{
+	mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
+	update_sched_clock();
+}
+
+void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+{
+	unsigned long r, w;
+	u64 res, wrap;
+	char r_unit;
+
+	if (cd.rate > rate)
+		return;
+
+	BUG_ON(bits > 32);
+	WARN_ON(!irqs_disabled());
+	read_sched_clock = read;
+	sched_clock_mask = (1 << bits) - 1;
+	cd.rate = rate;
+
+	/* calculate the mult/shift to convert counter ticks to ns. */
+	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
+
+	r = rate;
+	if (r >= 4000000) {
+		r /= 1000000;
+		r_unit = 'M';
+	} else if (r >= 1000) {
+		r /= 1000;
+		r_unit = 'k';
+	} else
+		r_unit = ' ';
+
+	/* calculate how many ns until we wrap */
+	wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
+	do_div(wrap, NSEC_PER_MSEC);
+	w = wrap;
+
+	/* calculate the ns resolution of this counter */
+	res = cyc_to_ns(1ULL, cd.mult, cd.shift);
+	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
+		bits, r, r_unit, res, w);
+
+	/*
+	 * Start the timer to keep sched_clock() properly updated and
+	 * sets the initial epoch.
+	 */
+	sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
+	update_sched_clock();
+
+	/*
+	 * Ensure that sched_clock() starts off at 0ns
+	 */
+	cd.epoch_ns = 0;
+
+	/* Enable IRQ time accounting if we have a fast enough sched_clock */
+	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
+		enable_sched_clock_irqtime();
+
+	pr_debug("Registered %pF as sched_clock source\n", read);
+}
+
+static unsigned long long notrace sched_clock_32(void)
+{
+	u32 cyc = read_sched_clock();
+	return cyc_to_sched_clock(cyc, sched_clock_mask);
+}
+
+unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
+
+unsigned long long notrace sched_clock(void)
+{
+	if (cd.suspended)
+		return cd.epoch_ns;
+
+	return sched_clock_func();
+}
+
+void __init sched_clock_postinit(void)
+{
+	/*
+	 * If no sched_clock function has been provided at that point,
+	 * make it the final one one.
+	 */
+	if (read_sched_clock == jiffy_sched_clock_read)
+		setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+
+	sched_clock_poll(sched_clock_timer.data);
+}
+
+static int sched_clock_suspend(void)
+{
+	sched_clock_poll(sched_clock_timer.data);
+	cd.suspended = true;
+	return 0;
+}
+
+static void sched_clock_resume(void)
+{
+	cd.epoch_cyc = read_sched_clock();
+	cd.epoch_cyc_copy = cd.epoch_cyc;
+	cd.suspended = false;
+}
+
+static struct syscore_ops sched_clock_ops = {
+	.suspend = sched_clock_suspend,
+	.resume = sched_clock_resume,
+};
+
+static int __init sched_clock_syscore_init(void)
+{
+	register_syscore_ops(&sched_clock_ops);
+	return 0;
+}
+device_initcall(sched_clock_syscore_init);
-- 
cgit v1.2.3


From 16e53dbf10a2d7e228709a7286310e629ede5e45 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Wed, 12 Jun 2013 14:04:36 -0700
Subject: CPU hotplug: provide a generic helper to disable/enable CPU hotplug

There are instances in the kernel where we would like to disable CPU
hotplug (from sysfs) during some important operation.  Today the freezer
code depends on this and the code to do it was kinda tailor-made for
that.

Restructure the code and make it generic enough to be useful for other
usecases too.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Robin Holt <holt@sgi.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Russ Anderson <rja@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h |  4 ++++
 kernel/cpu.c        | 55 ++++++++++++++++++++++-------------------------------
 2 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c6f6e0839b61..9f3c7e81270a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys;
 
 extern void get_online_cpus(void);
 extern void put_online_cpus(void);
+extern void cpu_hotplug_disable(void);
+extern void cpu_hotplug_enable(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
@@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #define get_online_cpus()	do { } while (0)
 #define put_online_cpus()	do { } while (0)
+#define cpu_hotplug_disable()	do { } while (0)
+#define cpu_hotplug_enable()	do { } while (0)
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b5e4ab2d427e..198a38883e64 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -133,6 +133,27 @@ static void cpu_hotplug_done(void)
 	mutex_unlock(&cpu_hotplug.lock);
 }
 
+/*
+ * Wait for currently running CPU hotplug operations to complete (if any) and
+ * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
+ * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
+ * hotplug path before performing hotplug operations. So acquiring that lock
+ * guarantees mutual exclusion from any currently running hotplug operations.
+ */
+void cpu_hotplug_disable(void)
+{
+	cpu_maps_update_begin();
+	cpu_hotplug_disabled = 1;
+	cpu_maps_update_done();
+}
+
+void cpu_hotplug_enable(void)
+{
+	cpu_maps_update_begin();
+	cpu_hotplug_disabled = 0;
+	cpu_maps_update_done();
+}
+
 #else /* #if CONFIG_HOTPLUG_CPU */
 static void cpu_hotplug_begin(void) {}
 static void cpu_hotplug_done(void) {}
@@ -540,36 +561,6 @@ static int __init alloc_frozen_cpus(void)
 }
 core_initcall(alloc_frozen_cpus);
 
-/*
- * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU
- * hotplug when tasks are about to be frozen. Also, don't allow the freezer
- * to continue until any currently running CPU hotplug operation gets
- * completed.
- * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the
- * 'cpu_add_remove_lock'. And this same lock is also taken by the regular
- * CPU hotplug path and released only after it is complete. Thus, we
- * (and hence the freezer) will block here until any currently running CPU
- * hotplug operation gets completed.
- */
-void cpu_hotplug_disable_before_freeze(void)
-{
-	cpu_maps_update_begin();
-	cpu_hotplug_disabled = 1;
-	cpu_maps_update_done();
-}
-
-
-/*
- * When tasks have been thawed, re-enable regular CPU hotplug (which had been
- * disabled while beginning to freeze tasks).
- */
-void cpu_hotplug_enable_after_thaw(void)
-{
-	cpu_maps_update_begin();
-	cpu_hotplug_disabled = 0;
-	cpu_maps_update_done();
-}
-
 /*
  * When callbacks for CPU hotplug notifications are being executed, we must
  * ensure that the state of the system with respect to the tasks being frozen
@@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb,
 
 	case PM_SUSPEND_PREPARE:
 	case PM_HIBERNATION_PREPARE:
-		cpu_hotplug_disable_before_freeze();
+		cpu_hotplug_disable();
 		break;
 
 	case PM_POST_SUSPEND:
 	case PM_POST_HIBERNATION:
-		cpu_hotplug_enable_after_thaw();
+		cpu_hotplug_enable();
 		break;
 
 	default:
-- 
cgit v1.2.3


From 637241a900cbd982f744d44646b48a273d609b34 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jun 2013 14:04:39 -0700
Subject: kmsg: honor dmesg_restrict sysctl on /dev/kmsg

The dmesg_restrict sysctl currently covers the syslog method for access
dmesg, however /dev/kmsg isn't covered by the same protections.  Most
people haven't noticed because util-linux dmesg(1) defaults to using the
syslog method for access in older versions.  With util-linux dmesg(1)
defaults to reading directly from /dev/kmsg.

To fix /dev/kmsg, let's compare the existing interfaces and what they
allow:

 - /proc/kmsg allows:
  - open (SYSLOG_ACTION_OPEN) if CAP_SYSLOG since it uses a destructive
    single-reader interface (SYSLOG_ACTION_READ).
  - everything, after an open.

 - syslog syscall allows:
  - anything, if CAP_SYSLOG.
  - SYSLOG_ACTION_READ_ALL and SYSLOG_ACTION_SIZE_BUFFER, if
    dmesg_restrict==0.
  - nothing else (EPERM).

The use-cases were:
 - dmesg(1) needs to do non-destructive SYSLOG_ACTION_READ_ALLs.
 - sysklog(1) needs to open /proc/kmsg, drop privs, and still issue the
   destructive SYSLOG_ACTION_READs.

AIUI, dmesg(1) is moving to /dev/kmsg, and systemd-journald doesn't
clear the ring buffer.

Based on the comments in devkmsg_llseek, it sounds like actions besides
reading aren't going to be supported by /dev/kmsg (i.e.
SYSLOG_ACTION_CLEAR), so we have a strict subset of the non-destructive
syslog syscall actions.

To this end, move the check as Josh had done, but also rename the
constants to reflect their new uses (SYSLOG_FROM_CALL becomes
SYSLOG_FROM_READER, and SYSLOG_FROM_FILE becomes SYSLOG_FROM_PROC).
SYSLOG_FROM_READER allows non-destructive actions, and SYSLOG_FROM_PROC
allows destructive actions after a capabilities-constrained
SYSLOG_ACTION_OPEN check.

 - /dev/kmsg allows:
  - open if CAP_SYSLOG or dmesg_restrict==0
  - reading/polling, after open

Addresses https://bugzilla.redhat.com/show_bug.cgi?id=903192

[akpm@linux-foundation.org: use pr_warn_once()]
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Christian Kujau <lists@nerdbynature.de>
Tested-by: Josh Boyer <jwboyer@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kmsg.c         | 10 +++---
 include/linux/syslog.h |  4 +--
 kernel/printk.c        | 91 +++++++++++++++++++++++++++-----------------------
 3 files changed, 57 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index bd4b5a740ff1..bdfabdaefdce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait;
 
 static int kmsg_open(struct inode * inode, struct file * file)
 {
-	return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC);
 }
 
 static int kmsg_release(struct inode * inode, struct file * file)
 {
-	(void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE);
+	(void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC);
 	return 0;
 }
 
@@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
 			 size_t count, loff_t *ppos)
 {
 	if ((file->f_flags & O_NONBLOCK) &&
-	    !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
+	    !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
 		return -EAGAIN;
-	return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC);
 }
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &log_wait, wait);
-	if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
+	if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 38911391a139..98a3153c0f96 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -44,8 +44,8 @@
 /* Return size of the log buffer */
 #define SYSLOG_ACTION_SIZE_BUFFER   10
 
-#define SYSLOG_FROM_CALL 0
-#define SYSLOG_FROM_FILE 1
+#define SYSLOG_FROM_READER           0
+#define SYSLOG_FROM_PROC             1
 
 int do_syslog(int type, char __user *buf, int count, bool from_file);
 
diff --git a/kernel/printk.c b/kernel/printk.c
index fa36e1494420..8212c1aef125 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -363,6 +363,53 @@ static void log_store(int facility, int level,
 	log_next_seq++;
 }
 
+#ifdef CONFIG_SECURITY_DMESG_RESTRICT
+int dmesg_restrict = 1;
+#else
+int dmesg_restrict;
+#endif
+
+static int syslog_action_restricted(int type)
+{
+	if (dmesg_restrict)
+		return 1;
+	/*
+	 * Unless restricted, we allow "read all" and "get buffer size"
+	 * for everybody.
+	 */
+	return type != SYSLOG_ACTION_READ_ALL &&
+	       type != SYSLOG_ACTION_SIZE_BUFFER;
+}
+
+static int check_syslog_permissions(int type, bool from_file)
+{
+	/*
+	 * If this is from /proc/kmsg and we've already opened it, then we've
+	 * already done the capabilities checks at open time.
+	 */
+	if (from_file && type != SYSLOG_ACTION_OPEN)
+		return 0;
+
+	if (syslog_action_restricted(type)) {
+		if (capable(CAP_SYSLOG))
+			return 0;
+		/*
+		 * For historical reasons, accept CAP_SYS_ADMIN too, with
+		 * a warning.
+		 */
+		if (capable(CAP_SYS_ADMIN)) {
+			pr_warn_once("%s (%d): Attempt to access syslog with "
+				     "CAP_SYS_ADMIN but no CAP_SYSLOG "
+				     "(deprecated).\n",
+				 current->comm, task_pid_nr(current));
+			return 0;
+		}
+		return -EPERM;
+	}
+	return security_syslog(type);
+}
+
+
 /* /dev/kmsg - userspace message inject/listen interface */
 struct devkmsg_user {
 	u64 seq;
@@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file)
 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
 		return 0;
 
-	err = security_syslog(SYSLOG_ACTION_READ_ALL);
+	err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
+				       SYSLOG_FROM_READER);
 	if (err)
 		return err;
 
@@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level)
 }
 #endif
 
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
-
-static int syslog_action_restricted(int type)
-{
-	if (dmesg_restrict)
-		return 1;
-	/* Unless restricted, we allow "read all" and "get buffer size" for everybody */
-	return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER;
-}
-
-static int check_syslog_permissions(int type, bool from_file)
-{
-	/*
-	 * If this is from /proc/kmsg and we've already opened it, then we've
-	 * already done the capabilities checks at open time.
-	 */
-	if (from_file && type != SYSLOG_ACTION_OPEN)
-		return 0;
-
-	if (syslog_action_restricted(type)) {
-		if (capable(CAP_SYSLOG))
-			return 0;
-		/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
-		if (capable(CAP_SYS_ADMIN)) {
-			printk_once(KERN_WARNING "%s (%d): "
-				 "Attempt to access syslog with CAP_SYS_ADMIN "
-				 "but no CAP_SYSLOG (deprecated).\n",
-				 current->comm, task_pid_nr(current));
-			return 0;
-		}
-		return -EPERM;
-	}
-	return 0;
-}
-
 #if defined(CONFIG_PRINTK_TIME)
 static bool printk_time = 1;
 #else
@@ -1249,7 +1258,7 @@ out:
 
 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 {
-	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
+	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
 /*
-- 
cgit v1.2.3


From 30dad30922ccc733cfdbfe232090cf674dc374dc Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 12 Jun 2013 14:05:04 -0700
Subject: mm: migration: add migrate_entry_wait_huge()

When we have a page fault for the address which is backed by a hugepage
under migration, the kernel can't wait correctly and do busy looping on
hugepage fault until the migration finishes.  As a result, users who try
to kick hugepage migration (via soft offlining, for example) occasionally
experience long delay or soft lockup.

This is because pte_offset_map_lock() can't get a correct migration entry
or a correct page table lock for hugepage.  This patch introduces
migration_entry_wait_huge() to solve this.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@vger.kernel.org>	[2.6.35+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h |  3 +++
 mm/hugetlb.c            |  2 +-
 mm/migrate.c            | 23 ++++++++++++++++++-----
 3 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 47ead515c811..c5fd30d2a415 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -137,6 +137,7 @@ static inline void make_migration_entry_read(swp_entry_t *entry)
 
 extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address);
+extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte);
 #else
 
 #define make_migration_entry(page, write) swp_entry(0, 0)
@@ -148,6 +149,8 @@ static inline int is_migration_entry(swp_entry_t swp)
 static inline void make_migration_entry_read(swp_entry_t *entryp) { }
 static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					 unsigned long address) { }
+static inline void migration_entry_wait_huge(struct mm_struct *mm,
+					pte_t *pte) { }
 static inline int is_write_migration_entry(swp_entry_t entry)
 {
 	return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f8feeeca6686..e2bfbf73a551 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2839,7 +2839,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (ptep) {
 		entry = huge_ptep_get(ptep);
 		if (unlikely(is_hugetlb_entry_migration(entry))) {
-			migration_entry_wait(mm, (pmd_t *)ptep, address);
+			migration_entry_wait_huge(mm, ptep);
 			return 0;
 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON_LARGE |
diff --git a/mm/migrate.c b/mm/migrate.c
index b1f57501de9c..6f0c24438bba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -200,15 +200,14 @@ static void remove_migration_ptes(struct page *old, struct page *new)
  * get to the page and wait until migration is finished.
  * When we return from this function the fault will be retried.
  */
-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
-				unsigned long address)
+static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+				spinlock_t *ptl)
 {
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
+	pte_t pte;
 	swp_entry_t entry;
 	struct page *page;
 
-	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+	spin_lock(ptl);
 	pte = *ptep;
 	if (!is_swap_pte(pte))
 		goto out;
@@ -236,6 +235,20 @@ out:
 	pte_unmap_unlock(ptep, ptl);
 }
 
+void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+				unsigned long address)
+{
+	spinlock_t *ptl = pte_lockptr(mm, pmd);
+	pte_t *ptep = pte_offset_map(pmd, address);
+	__migration_entry_wait(mm, ptep, ptl);
+}
+
+void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+{
+	spinlock_t *ptl = &(mm)->page_table_lock;
+	__migration_entry_wait(mm, pte, ptl);
+}
+
 #ifdef CONFIG_BLOCK
 /* Returns true if all buffers are successfully locked */
 static bool buffer_migrate_lock_buffers(struct buffer_head *head,
-- 
cgit v1.2.3


From c2853c8df57f49620d26f317d7d43347c29bfc2e Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@intel.com>
Date: Wed, 12 Jun 2013 14:05:10 -0700
Subject: include/linux/math64.h: add div64_ul()

There is div64_long() to handle the s64/long division, but no mocro do
u64/ul division.  It is necessary in some scenarios, so add this
function.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alex Shi <alex.shi@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/math64.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index b8ba85544721..2913b86eb12a 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -6,7 +6,8 @@
 
 #if BITS_PER_LONG == 64
 
-#define div64_long(x,y) div64_s64((x),(y))
+#define div64_long(x, y) div64_s64((x), (y))
+#define div64_ul(x, y)   div64_u64((x), (y))
 
 /**
  * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
@@ -47,7 +48,8 @@ static inline s64 div64_s64(s64 dividend, s64 divisor)
 
 #elif BITS_PER_LONG == 32
 
-#define div64_long(x,y) div_s64((x),(y))
+#define div64_long(x, y) div_s64((x), (y))
+#define div64_ul(x, y)   div_u64((x), (y))
 
 #ifndef div_u64_rem
 static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
-- 
cgit v1.2.3


From f21afc25f9ed45b8ffe200d0f071b0caec3ed2ef Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Fri, 14 Jun 2013 11:13:59 -0700
Subject: smp.h: Use local_irq_{save,restore}() in !SMP version of
 on_each_cpu().

Thanks to commit f91eb62f71b3 ("init: scream bloody murder if interrupts
are enabled too early"), "bloody murder" is now being screamed.

With a MIPS OCTEON config, we use on_each_cpu() in our
irq_chip.irq_bus_sync_unlock() function.  This gets called in early as a
result of the time_init() call.  Because the !SMP version of
on_each_cpu() unconditionally enables irqs, we get:

    WARNING: at init/main.c:560 start_kernel+0x250/0x410()
    Interrupts were enabled early
    CPU: 0 PID: 0 Comm: swapper Not tainted 3.10.0-rc5-Cavium-Octeon+ #801
    Call Trace:
      show_stack+0x68/0x80
      warn_slowpath_common+0x78/0xb0
      warn_slowpath_fmt+0x38/0x48
      start_kernel+0x250/0x410

Suggested fix: Do what we already do in the SMP version of
on_each_cpu(), and use local_irq_save/local_irq_restore.  Because we
need a flags variable, make it a static inline to avoid name space
issues.

[ Change from v1: Convert on_each_cpu to a static inline function, add
  #include <linux/irqflags.h> to avoid build breakage on some files.

  on_each_cpu_mask() and on_each_cpu_cond() suffer the same problem as
  on_each_cpu(), but they are not causing !SMP bugs for me, so I will
  defer changing them to a less urgent patch. ]

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index e6564c1dc552..c8488763277f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <linux/irqflags.h>
 
 extern void cpu_idle(void);
 
@@ -139,13 +140,17 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 }
 #define smp_call_function(func, info, wait) \
 			(up_smp_call_function(func, info))
-#define on_each_cpu(func,info,wait)		\
-	({					\
-		local_irq_disable();		\
-		func(info);			\
-		local_irq_enable();		\
-		0;				\
-	})
+
+static inline int on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	func(info);
+	local_irq_restore(flags);
+	return 0;
+}
+
 /*
  * Note we still need to test the mask even for UP
  * because we actually can get an empty mask from
-- 
cgit v1.2.3


From d3d8fee4138a06b4b9ca172d25b8412fc33ad3f3 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 17 Jun 2013 19:34:57 -0700
Subject: Revert "dw_apb_timer_of.c: Remove parts that were picoxcell-specific"

This reverts commit 55a68c23e0a675b2b8ac2656fd6edbf98b78e4c6.

In order to avoid a collision with dw_apb_timer changes in
the arm-soc tree, revert this change.

I'm leaving it to the arm-soc folks to sort out if they want
to keep the other side of the collision or if they're just going
to back it all out and try again during the next release cycle.

Reported-by: Dinh Nguyen <dinguyen@altera.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm/mach-picoxcell/common.h      |  2 ++
 drivers/clocksource/dw_apb_timer.c    |  6 ++++
 drivers/clocksource/dw_apb_timer_of.c | 52 +++++++++++++++++++----------------
 include/linux/dw_apb_timer.h          |  6 ----
 4 files changed, 37 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-picoxcell/common.h b/arch/arm/mach-picoxcell/common.h
index 237fb3bcbd04..481b42a4ef15 100644
--- a/arch/arm/mach-picoxcell/common.h
+++ b/arch/arm/mach-picoxcell/common.h
@@ -12,4 +12,6 @@
 
 #include <asm/mach/time.h>
 
+extern void dw_apb_timer_init(void);
+
 #endif /* __PICOXCELL_COMMON_H__ */
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index e7042bc5c7d2..e54ca1062d8e 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -21,6 +21,12 @@
 #define APBT_MIN_PERIOD			4
 #define APBT_MIN_DELTA_USEC		200
 
+#define APBTMR_N_LOAD_COUNT		0x00
+#define APBTMR_N_CURRENT_VALUE		0x04
+#define APBTMR_N_CONTROL		0x08
+#define APBTMR_N_EOI			0x0c
+#define APBTMR_N_INT_STATUS		0x10
+
 #define APBTMRS_INT_STATUS		0xa0
 #define APBTMRS_EOI			0xa4
 #define APBTMRS_RAW_INT_STATUS		0xa8
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index a97b4065dacf..d9a1e8d51751 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -55,15 +55,6 @@ static void add_clockevent(struct device_node *event_timer)
 	dw_apb_clockevent_register(ced);
 }
 
-static void __iomem *sched_io_base;
-
-/* This is actually same as __apbt_read_clocksource(), but with
-   different interface */
-static u32 read_sched_clock_sptimer(void)
-{
-	return ~__raw_readl(sched_io_base + APBTMR_N_CURRENT_VALUE);
-}
-
 static void add_clocksource(struct device_node *source_timer)
 {
 	void __iomem *iobase;
@@ -78,27 +69,41 @@ static void add_clocksource(struct device_node *source_timer)
 
 	dw_apb_clocksource_start(cs);
 	dw_apb_clocksource_register(cs);
+}
 
-	sched_io_base = iobase;
-	setup_sched_clock(read_sched_clock_sptimer, 32, rate);
+static void __iomem *sched_io_base;
+
+static u32 read_sched_clock(void)
+{
+	return __raw_readl(sched_io_base);
 }
 
-static const struct of_device_id osctimer_ids[] __initconst = {
-	{ .compatible = "picochip,pc3x2-timer" },
-	{ .compatible = "snps,dw-apb-timer-osc" },
+static const struct of_device_id sptimer_ids[] __initconst = {
+	{ .compatible = "picochip,pc3x2-rtc" },
 	{ .compatible = "snps,dw-apb-timer-sp" },
-	{  /* Sentinel */ },
+	{ /* Sentinel */ },
 };
 
-/*
-   You don't have to use dw_apb_timer for scheduler clock,
-   this should also work fine on arm:
+static void init_sched_clock(void)
+{
+	struct device_node *sched_timer;
+	u32 rate;
 
-  twd_local_timer_of_register();
-  arch_timer_of_register();
-  arch_timer_sched_clock_init();
-*/
+	sched_timer = of_find_matching_node(NULL, sptimer_ids);
+	if (!sched_timer)
+		panic("No RTC for sched clock to use");
 
+	timer_get_base_and_rate(sched_timer, &sched_io_base, &rate);
+	of_node_put(sched_timer);
+
+	setup_sched_clock(read_sched_clock, 32, rate);
+}
+
+static const struct of_device_id osctimer_ids[] __initconst = {
+	{ .compatible = "picochip,pc3x2-timer" },
+	{ .compatible = "snps,dw-apb-timer-osc" },
+	{},
+};
 
 void __init dw_apb_timer_init(void)
 {
@@ -114,6 +119,7 @@ void __init dw_apb_timer_init(void)
 		panic("No timer for clocksource");
 	add_clocksource(source_timer);
 
-	of_node_put(event_timer);
 	of_node_put(source_timer);
+
+	init_sched_clock();
 }
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index de0904e38f33..b1cd9597c241 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -17,12 +17,6 @@
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
 
-#define APBTMR_N_LOAD_COUNT		0x00
-#define APBTMR_N_CURRENT_VALUE		0x04
-#define APBTMR_N_CONTROL		0x08
-#define APBTMR_N_EOI			0x0c
-#define APBTMR_N_INT_STATUS		0x10
-
 #define APBTMRS_REG_SIZE       0x14
 
 struct dw_apb_timer {
-- 
cgit v1.2.3


From 29bb9e5a75684106a37593ad75ec75ff8312731b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 24 May 2013 15:23:40 -0400
Subject: tracing/context-tracking: Add preempt_schedule_context() for tracing

Dave Jones hit the following bug report:

 ===============================
 [ INFO: suspicious RCU usage. ]
 3.10.0-rc2+ #1 Not tainted
 -------------------------------
 include/linux/rcupdate.h:771 rcu_read_lock() used illegally while idle!
 other info that might help us debug this:
 RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0
 RCU used illegally from extended quiescent state!
 2 locks held by cc1/63645:
  #0:  (&rq->lock){-.-.-.}, at: [<ffffffff816b39fd>] __schedule+0xed/0x9b0
  #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff8109d645>] cpuacct_charge+0x5/0x1f0

 CPU: 1 PID: 63645 Comm: cc1 Not tainted 3.10.0-rc2+ #1 [loadavg: 40.57 27.55 13.39 25/277 64369]
 Hardware name: Gigabyte Technology Co., Ltd. GA-MA78GM-S2H/GA-MA78GM-S2H, BIOS F12a 04/23/2010
  0000000000000000 ffff88010f78fcf8 ffffffff816ae383 ffff88010f78fd28
  ffffffff810b698d ffff88011c092548 000000000023d073 ffff88011c092500
  0000000000000001 ffff88010f78fd60 ffffffff8109d7c5 ffffffff8109d645
 Call Trace:
  [<ffffffff816ae383>] dump_stack+0x19/0x1b
  [<ffffffff810b698d>] lockdep_rcu_suspicious+0xfd/0x130
  [<ffffffff8109d7c5>] cpuacct_charge+0x185/0x1f0
  [<ffffffff8109d645>] ? cpuacct_charge+0x5/0x1f0
  [<ffffffff8108dffc>] update_curr+0xec/0x240
  [<ffffffff8108f528>] put_prev_task_fair+0x228/0x480
  [<ffffffff816b3a71>] __schedule+0x161/0x9b0
  [<ffffffff816b4721>] preempt_schedule+0x51/0x80
  [<ffffffff816b4800>] ? __cond_resched_softirq+0x60/0x60
  [<ffffffff816b6824>] ? retint_careful+0x12/0x2e
  [<ffffffff810ff3cc>] ftrace_ops_control_func+0x1dc/0x210
  [<ffffffff816be280>] ftrace_call+0x5/0x2f
  [<ffffffff816b681d>] ? retint_careful+0xb/0x2e
  [<ffffffff816b4805>] ? schedule_user+0x5/0x70
  [<ffffffff816b4805>] ? schedule_user+0x5/0x70
  [<ffffffff816b6824>] ? retint_careful+0x12/0x2e
 ------------[ cut here ]------------

What happened was that the function tracer traced the schedule_user() code
that tells RCU that the system is coming back from userspace, and to
add the CPU back to the RCU monitoring.

Because the function tracer does a preempt_disable/enable_notrace() calls
the preempt_enable_notrace() checks the NEED_RESCHED flag. If it is set,
then preempt_schedule() is called. But this is called before the user_exit()
function can inform the kernel that the CPU is no longer in user mode and
needs to be accounted for by RCU.

The fix is to create a new preempt_schedule_context() that checks if
the kernel is still in user mode and if so to switch it to kernel mode
before calling schedule. It also switches back to user mode coming back
from schedule in need be.

The only user of this currently is the preempt_enable_notrace(), which is
only used by the tracing subsystem.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1369423420.6828.226.camel@gandalf.local.home
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h   | 18 +++++++++++++++++-
 kernel/context_tracking.c | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 87a03c746f17..f5d4723cdb3d 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -33,9 +33,25 @@ do { \
 		preempt_schedule(); \
 } while (0)
 
+#ifdef CONFIG_CONTEXT_TRACKING
+
+void preempt_schedule_context(void);
+
+#define preempt_check_resched_context() \
+do { \
+	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+		preempt_schedule_context(); \
+} while (0)
+#else
+
+#define preempt_check_resched_context() preempt_check_resched()
+
+#endif /* CONFIG_CONTEXT_TRACKING */
+
 #else /* !CONFIG_PREEMPT */
 
 #define preempt_check_resched()		do { } while (0)
+#define preempt_check_resched_context()	do { } while (0)
 
 #endif /* CONFIG_PREEMPT */
 
@@ -88,7 +104,7 @@ do { \
 do { \
 	preempt_enable_no_resched_notrace(); \
 	barrier(); \
-	preempt_check_resched(); \
+	preempt_check_resched_context(); \
 } while (0)
 
 #else /* !CONFIG_PREEMPT_COUNT */
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 65349f07b878..66677003e223 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -71,6 +71,46 @@ void user_enter(void)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_PREEMPT
+/**
+ * preempt_schedule_context - preempt_schedule called by tracing
+ *
+ * The tracing infrastructure uses preempt_enable_notrace to prevent
+ * recursion and tracing preempt enabling caused by the tracing
+ * infrastructure itself. But as tracing can happen in areas coming
+ * from userspace or just about to enter userspace, a preempt enable
+ * can occur before user_exit() is called. This will cause the scheduler
+ * to be called when the system is still in usermode.
+ *
+ * To prevent this, the preempt_enable_notrace will use this function
+ * instead of preempt_schedule() to exit user context if needed before
+ * calling the scheduler.
+ */
+void __sched notrace preempt_schedule_context(void)
+{
+	struct thread_info *ti = current_thread_info();
+	enum ctx_state prev_ctx;
+
+	if (likely(ti->preempt_count || irqs_disabled()))
+		return;
+
+	/*
+	 * Need to disable preemption in case user_exit() is traced
+	 * and the tracer calls preempt_enable_notrace() causing
+	 * an infinite recursion.
+	 */
+	preempt_disable_notrace();
+	prev_ctx = exception_enter();
+	preempt_enable_no_resched_notrace();
+
+	preempt_schedule();
+
+	preempt_disable_notrace();
+	exception_exit(prev_ctx);
+	preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(preempt_schedule_context);
+#endif /* CONFIG_PREEMPT */
 
 /**
  * user_exit - Inform the context tracking that the CPU is
-- 
cgit v1.2.3


From a7bf58040f4e8a7245cb3e24ab93057be3c46363 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Fri, 14 Jun 2013 17:07:01 +0200
Subject: net: vlan: fix comment for vlan_ethhdr->h_vlan_proto

After addition of 8021AD h_vlan_proto can be either ETH_P_8021Q or
ETH_P_8021AD.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 52bd03b38962..637fa71de0c7 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -44,7 +44,7 @@ struct vlan_hdr {
  *	struct vlan_ethhdr - vlan ethernet header (ethhdr + vlan_hdr)
  *	@h_dest: destination ethernet address
  *	@h_source: source ethernet address
- *	@h_vlan_proto: ethernet protocol (always 0x8100)
+ *	@h_vlan_proto: ethernet protocol
  *	@h_vlan_TCI: priority and VLAN ID
  *	@h_vlan_encapsulated_proto: packet type ID or len
  */
-- 
cgit v1.2.3


From 7995bd287134f6c8f80d94bebe7396f05a9bc42b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Jun 2013 18:58:36 +0400
Subject: splice: don't pass the address of ->f_pos to methods

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h          |  6 ++++++
 fs/read_write.c        | 24 ++++++++++++++++--------
 fs/splice.c            | 31 ++++++++++++++++++-------------
 include/linux/fs.h     |  2 --
 include/linux/splice.h |  1 +
 5 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index eaa75f75b625..68121584ae37 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,6 +131,12 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
  */
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 
+/*
+ * splice.c
+ */
+extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+		loff_t *opos, size_t len, unsigned int flags);
+
 /*
  * pipe.c
  */
diff --git a/fs/read_write.c b/fs/read_write.c
index 03430008704e..2cefa417be34 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1064,6 +1064,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	struct fd in, out;
 	struct inode *in_inode, *out_inode;
 	loff_t pos;
+	loff_t out_pos;
 	ssize_t retval;
 	int fl;
 
@@ -1077,12 +1078,14 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (!(in.file->f_mode & FMODE_READ))
 		goto fput_in;
 	retval = -ESPIPE;
-	if (!ppos)
-		ppos = &in.file->f_pos;
-	else
+	if (!ppos) {
+		pos = in.file->f_pos;
+	} else {
+		pos = *ppos;
 		if (!(in.file->f_mode & FMODE_PREAD))
 			goto fput_in;
-	retval = rw_verify_area(READ, in.file, ppos, count);
+	}
+	retval = rw_verify_area(READ, in.file, &pos, count);
 	if (retval < 0)
 		goto fput_in;
 	count = retval;
@@ -1099,7 +1102,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	retval = -EINVAL;
 	in_inode = file_inode(in.file);
 	out_inode = file_inode(out.file);
-	retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
+	out_pos = out.file->f_pos;
+	retval = rw_verify_area(WRITE, out.file, &out_pos, count);
 	if (retval < 0)
 		goto fput_out;
 	count = retval;
@@ -1107,7 +1111,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (!max)
 		max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 
-	pos = *ppos;
 	if (unlikely(pos + count > max)) {
 		retval = -EOVERFLOW;
 		if (pos >= max)
@@ -1126,18 +1129,23 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (in.file->f_flags & O_NONBLOCK)
 		fl = SPLICE_F_NONBLOCK;
 #endif
-	retval = do_splice_direct(in.file, ppos, out.file, count, fl);
+	retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
 
 	if (retval > 0) {
 		add_rchar(current, retval);
 		add_wchar(current, retval);
 		fsnotify_access(in.file);
 		fsnotify_modify(out.file);
+		out.file->f_pos = out_pos;
+		if (ppos)
+			*ppos = pos;
+		else
+			in.file->f_pos = pos;
 	}
 
 	inc_syscr(current);
 	inc_syscw(current);
-	if (*ppos > max)
+	if (pos > max)
 		retval = -EOVERFLOW;
 
 fput_out:
diff --git a/fs/splice.c b/fs/splice.c
index e6b25598c8c4..9eca476227d5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1274,7 +1274,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
 {
 	struct file *file = sd->u.file;
 
-	return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+	return do_splice_from(pipe, file, sd->opos, sd->total_len,
 			      sd->flags);
 }
 
@@ -1294,7 +1294,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
  *
  */
 long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		      size_t len, unsigned int flags)
+		      loff_t *opos, size_t len, unsigned int flags)
 {
 	struct splice_desc sd = {
 		.len		= len,
@@ -1302,6 +1302,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 		.flags		= flags,
 		.pos		= *ppos,
 		.u.file		= out,
+		.opos		= opos,
 	};
 	long ret;
 
@@ -1325,7 +1326,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 {
 	struct pipe_inode_info *ipipe;
 	struct pipe_inode_info *opipe;
-	loff_t offset, *off;
+	loff_t offset;
 	long ret;
 
 	ipipe = get_pipe_info(in);
@@ -1356,13 +1357,15 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 				return -EINVAL;
 			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
 				return -EFAULT;
-			off = &offset;
-		} else
-			off = &out->f_pos;
+		} else {
+			offset = out->f_pos;
+		}
 
-		ret = do_splice_from(ipipe, out, off, len, flags);
+		ret = do_splice_from(ipipe, out, &offset, len, flags);
 
-		if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
+		if (!off_out)
+			out->f_pos = offset;
+		else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
 			ret = -EFAULT;
 
 		return ret;
@@ -1376,13 +1379,15 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 				return -EINVAL;
 			if (copy_from_user(&offset, off_in, sizeof(loff_t)))
 				return -EFAULT;
-			off = &offset;
-		} else
-			off = &in->f_pos;
+		} else {
+			offset = in->f_pos;
+		}
 
-		ret = do_splice_to(in, off, opipe, len, flags);
+		ret = do_splice_to(in, &offset, opipe, len, flags);
 
-		if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
+		if (!off_in)
+			in->f_pos = offset;
+		else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
 			ret = -EFAULT;
 
 		return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43db02e9c9fa..65c2be22b601 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2414,8 +2414,6 @@ extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		size_t len, unsigned int flags);
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 09a545a7dfa3..74575cbf2d6f 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -35,6 +35,7 @@ struct splice_desc {
 		void *data;		/* cookie */
 	} u;
 	loff_t pos;			/* file position */
+	loff_t *opos;			/* sendfile: output position */
 	size_t num_spliced;		/* number of bytes already spliced */
 	bool need_wakeup;		/* need to wake up writer */
 };
-- 
cgit v1.2.3


From bd8a7036c06cf15779b31a5397d4afcb12be81ea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 24 Jun 2013 06:26:00 -0700
Subject: gre: fix a possible skb leak

commit 68c331631143 ("v4 GRE: Add TCP segmentation offload for GRE")
added a possible skb leak, because it frees only the head of segment
list, in case a skb_linearize() call fails.

This patch adds a kfree_skb_list() helper to fix the bug.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Pravin B Shelar <pshelar@nicira.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 20 ++++++++++++--------
 net/ipv4/gre.c         |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c676eae3968..dec1748cd002 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -627,6 +627,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 }
 
 extern void kfree_skb(struct sk_buff *skb);
+extern void kfree_skb_list(struct sk_buff *segs);
 extern void skb_tx_error(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cfd777bd6bd0..1c1738cc4538 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -483,15 +483,8 @@ EXPORT_SYMBOL(skb_add_rx_frag);
 
 static void skb_drop_list(struct sk_buff **listp)
 {
-	struct sk_buff *list = *listp;
-
+	kfree_skb_list(*listp);
 	*listp = NULL;
-
-	do {
-		struct sk_buff *this = list;
-		list = list->next;
-		kfree_skb(this);
-	} while (list);
 }
 
 static inline void skb_drop_fraglist(struct sk_buff *skb)
@@ -651,6 +644,17 @@ void kfree_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(kfree_skb);
 
+void kfree_skb_list(struct sk_buff *segs)
+{
+	while (segs) {
+		struct sk_buff *next = segs->next;
+
+		kfree_skb(segs);
+		segs = next;
+	}
+}
+EXPORT_SYMBOL(kfree_skb_list);
+
 /**
  *	skb_tx_error - report an sk_buff xmit error
  *	@skb: buffer that triggered an error
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index b2e805af9b87..7856d1651d05 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -178,7 +178,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 
 				err = __skb_linearize(skb);
 				if (err) {
-					kfree_skb(segs);
+					kfree_skb_list(segs);
 					segs = ERR_PTR(err);
 					goto out;
 				}
-- 
cgit v1.2.3


From 5dbe7c178d3f0a4634f088d9e729f1909b9ddcd1 Mon Sep 17 00:00:00 2001
From: Nicolas Schichan <nschichan@freebox.fr>
Date: Wed, 26 Jun 2013 17:23:42 +0200
Subject: net: fix kernel deadlock with interface rename and netdev name
 retrieval.

When the kernel (compiled with CONFIG_PREEMPT=n) is performing the
rename of a network interface, it can end up waiting for a workqueue
to complete. If userland is able to invoke a SIOCGIFNAME ioctl or a
SO_BINDTODEVICE getsockopt in between, the kernel will deadlock due to
the fact that read_secklock_begin() will spin forever waiting for the
writer process (the one doing the interface rename) to update the
devnet_rename_seq sequence.

This patch fixes the problem by adding a helper (netdev_get_name())
and using it in the code handling the SIOCGIFNAME ioctl and
SO_BINDTODEVICE setsockopt.

The netdev_get_name() helper uses raw_seqcount_begin() to avoid
spinning forever, waiting for devnet_rename_seq->sequence to become
even. cond_resched() is used in the contended case, before retrying
the access to give the writer process a chance to finish.

The use of raw_seqcount_begin() will incur some unneeded work in the
reader process in the contended case, but this is better than
deadlocking the system.

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 34 ++++++++++++++++++++++++++++++++++
 net/core/dev_ioctl.c      | 19 ++++---------------
 net/core/sock.c           | 17 ++---------------
 4 files changed, 41 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 60584b185a0c..96e4c21e15e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1695,6 +1695,7 @@ extern int		init_dummy_netdev(struct net_device *dev);
 extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
 extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
 extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
+extern int		netdev_get_name(struct net *net, char *name, int ifindex);
 extern int		dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
 extern int		netpoll_trap(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index fc1e289397f5..faebb398fb46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -791,6 +791,40 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 }
 EXPORT_SYMBOL(dev_get_by_index);
 
+/**
+ *	netdev_get_name - get a netdevice name, knowing its ifindex.
+ *	@net: network namespace
+ *	@name: a pointer to the buffer where the name will be stored.
+ *	@ifindex: the ifindex of the interface to get the name from.
+ *
+ *	The use of raw_seqcount_begin() and cond_resched() before
+ *	retrying is required as we want to give the writers a chance
+ *	to complete when CONFIG_PREEMPT is not set.
+ */
+int netdev_get_name(struct net *net, char *name, int ifindex)
+{
+	struct net_device *dev;
+	unsigned int seq;
+
+retry:
+	seq = raw_seqcount_begin(&devnet_rename_seq);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	strcpy(name, dev->name);
+	rcu_read_unlock();
+	if (read_seqcount_retry(&devnet_rename_seq, seq)) {
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
 /**
  *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 6cc0481faade..5b7d0e1d0664 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -19,9 +19,8 @@
 
 static int dev_ifname(struct net *net, struct ifreq __user *arg)
 {
-	struct net_device *dev;
 	struct ifreq ifr;
-	unsigned seq;
+	int error;
 
 	/*
 	 *	Fetch the caller's info block.
@@ -30,19 +29,9 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
-retry:
-	seq = read_seqcount_begin(&devnet_rename_seq);
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
-	if (!dev) {
-		rcu_read_unlock();
-		return -ENODEV;
-	}
-
-	strcpy(ifr.ifr_name, dev->name);
-	rcu_read_unlock();
-	if (read_seqcount_retry(&devnet_rename_seq, seq))
-		goto retry;
+	error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
+	if (error)
+		return error;
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
 		return -EFAULT;
diff --git a/net/core/sock.c b/net/core/sock.c
index 88868a9d21da..d6d024cfaaaf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -571,9 +571,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
 	int ret = -ENOPROTOOPT;
 #ifdef CONFIG_NETDEVICES
 	struct net *net = sock_net(sk);
-	struct net_device *dev;
 	char devname[IFNAMSIZ];
-	unsigned seq;
 
 	if (sk->sk_bound_dev_if == 0) {
 		len = 0;
@@ -584,20 +582,9 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
 	if (len < IFNAMSIZ)
 		goto out;
 
-retry:
-	seq = read_seqcount_begin(&devnet_rename_seq);
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
-	ret = -ENODEV;
-	if (!dev) {
-		rcu_read_unlock();
+	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+	if (ret)
 		goto out;
-	}
-
-	strcpy(devname, dev->name);
-	rcu_read_unlock();
-	if (read_seqcount_retry(&devnet_rename_seq, seq))
-		goto retry;
 
 	len = strlen(devname) + 1;
 
-- 
cgit v1.2.3


From 780427f0e113b4c77dfff4d258c05a902cdb0eb9 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 27 Jun 2013 11:35:46 +0100
Subject: timekeeping: Indicate that clock was set in the pvclock gtod notifier

If the clock was set (stepped), set the action parameter to functions
in the pvclock gtod notifier chain to non-zero.  This allows the
callee to only do work if the clock was stepped.

This will be used on Xen as the synchronization of the Xen wallclock
to the control domain's (dom0) system time will be done with this
notifier and updating on every timer tick is unnecessary and too
expensive.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: <xen-devel@lists.xen.org>
Link: http://lkml.kernel.org/r/1372329348-20841-4-git-send-email-david.vrabel@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/pvclock_gtod.h |  7 +++++++
 kernel/time/timekeeping.c    | 30 ++++++++++++++++++------------
 2 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pvclock_gtod.h b/include/linux/pvclock_gtod.h
index 0ca75825b60d..a71d2dbd3610 100644
--- a/include/linux/pvclock_gtod.h
+++ b/include/linux/pvclock_gtod.h
@@ -3,6 +3,13 @@
 
 #include <linux/notifier.h>
 
+/*
+ * The pvclock gtod notifier is called when the system time is updated
+ * and is used to keep guest time synchronized with host time.
+ *
+ * The 'action' parameter in the notifier function is false (0), or
+ * true (non-zero) if system time was stepped.
+ */
 extern int pvclock_gtod_register_notifier(struct notifier_block *nb);
 extern int pvclock_gtod_unregister_notifier(struct notifier_block *nb);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d8b23a929e66..846d0a1f235e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -29,6 +29,7 @@
 
 #define TK_CLEAR_NTP		(1 << 0)
 #define TK_MIRROR		(1 << 1)
+#define TK_CLOCK_WAS_SET	(1 << 2)
 
 static struct timekeeper timekeeper;
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@ -204,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 
-static void update_pvclock_gtod(struct timekeeper *tk)
+static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
 {
-	raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+	raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
 }
 
 /**
@@ -220,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
-	update_pvclock_gtod(tk);
+	update_pvclock_gtod(tk, true);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	return ret;
@@ -252,7 +253,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 		ntp_clear();
 	}
 	update_vsyscall(tk);
-	update_pvclock_gtod(tk);
+	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
 	if (action & TK_MIRROR)
 		memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
@@ -512,7 +513,7 @@ int do_settimeofday(const struct timespec *tv)
 
 	tk_set_xtime(tk, tv);
 
-	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -556,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
 
 error: /* even if we error out, we forwarded the time, so call update */
-	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -646,7 +647,7 @@ static int change_clocksource(void *data)
 			module_put(new->owner);
 		}
 	}
-	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -887,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	__timekeeping_inject_sleeptime(tk, delta);
 
-	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR);
+	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -969,7 +970,7 @@ static void timekeeping_resume(void)
 	tk->cycle_last = clock->cycle_last = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
-	timekeeping_update(tk, TK_MIRROR);
+	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
@@ -1243,9 +1244,10 @@ out_adjust:
  * It also calls into the NTP code to handle leapsecond processing.
  *
  */
-static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
+static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
 	u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+	unsigned int action = 0;
 
 	while (tk->xtime_nsec >= nsecps) {
 		int leap;
@@ -1268,8 +1270,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
 			clock_was_set_delayed();
+			action = TK_CLOCK_WAS_SET;
 		}
 	}
+	return action;
 }
 
 /**
@@ -1354,6 +1358,7 @@ static void update_wall_time(void)
 	struct timekeeper *tk = &shadow_timekeeper;
 	cycle_t offset;
 	int shift = 0, maxshift;
+	unsigned int action;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1406,7 +1411,7 @@ static void update_wall_time(void)
 	 * Finally, make sure that after the rounding
 	 * xtime_nsec isn't larger than NSEC_PER_SEC
 	 */
-	accumulate_nsecs_to_secs(tk);
+	action = accumulate_nsecs_to_secs(tk);
 
 	write_seqcount_begin(&timekeeper_seq);
 	/* Update clock->cycle_last with the new value */
@@ -1422,7 +1427,7 @@ static void update_wall_time(void)
 	 * updating.
 	 */
 	memcpy(real_tk, tk, sizeof(*tk));
-	timekeeping_update(real_tk, 0);
+	timekeeping_update(real_tk, action);
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1684,6 +1689,7 @@ int do_adjtimex(struct timex *txc)
 
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
+		update_pvclock_gtod(tk, true);
 		clock_was_set_delayed();
 	}
 	write_seqcount_end(&timekeeper_seq);
-- 
cgit v1.2.3


From 55ccb616a6e42052edb37e9c4f82cf8854a59429 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 28 Jun 2013 00:06:42 +0000
Subject: posix_cpu_timer: consolidate expiry time type

The posix cpu timer expiry time is stored in a union of two types: a 64
bits field if we rely on scheduler precise accounting, or a cputime_t if
we rely on jiffies.

This results in quite some duplicate code and special cases to handle the
two types.

Just unify this into a single 64 bits field.  cputime_t can always fit
into it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Olivier Langlois <olivier@trillion01.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/posix-timers.h |  16 ++-
 kernel/posix-cpu-timers.c    | 266 +++++++++++++++++--------------------------
 2 files changed, 117 insertions(+), 165 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 7794d75ed155..907f3fd191ac 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -7,14 +7,20 @@
 #include <linux/timex.h>
 #include <linux/alarmtimer.h>
 
-union cpu_time_count {
-	cputime_t cpu;
-	unsigned long long sched;
-};
+
+static inline unsigned long long cputime_to_expires(cputime_t expires)
+{
+	return (__force unsigned long long)expires;
+}
+
+static inline cputime_t expires_to_cputime(unsigned long long expires)
+{
+	return (__force cputime_t)expires;
+}
 
 struct cpu_timer_list {
 	struct list_head entry;
-	union cpu_time_count expires, incr;
+	unsigned long long expires, incr;
 	struct task_struct *task;
 	int firing;
 };
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 42670e9b44e0..c3c4ea1225a4 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock)
 	return error;
 }
 
-static inline union cpu_time_count
+static inline unsigned long long
 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
 {
-	union cpu_time_count ret;
-	ret.sched = 0;		/* high half always zero when .cpu used */
+	unsigned long long ret;
+
+	ret = 0;		/* high half always zero when .cpu used */
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-		ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+		ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
 	} else {
-		ret.cpu = timespec_to_cputime(tp);
+		ret = cputime_to_expires(timespec_to_cputime(tp));
 	}
 	return ret;
 }
 
 static void sample_to_timespec(const clockid_t which_clock,
-			       union cpu_time_count cpu,
+			       unsigned long long expires,
 			       struct timespec *tp)
 {
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
-		*tp = ns_to_timespec(cpu.sched);
+		*tp = ns_to_timespec(expires);
 	else
-		cputime_to_timespec(cpu.cpu, tp);
-}
-
-static inline int cpu_time_before(const clockid_t which_clock,
-				  union cpu_time_count now,
-				  union cpu_time_count then)
-{
-	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-		return now.sched < then.sched;
-	}  else {
-		return now.cpu < then.cpu;
-	}
-}
-static inline void cpu_time_add(const clockid_t which_clock,
-				union cpu_time_count *acc,
-			        union cpu_time_count val)
-{
-	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-		acc->sched += val.sched;
-	}  else {
-		acc->cpu += val.cpu;
-	}
-}
-static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
-						union cpu_time_count a,
-						union cpu_time_count b)
-{
-	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
-		a.sched -= b.sched;
-	}  else {
-		a.cpu -= b.cpu;
-	}
-	return a;
+		cputime_to_timespec((__force cputime_t)expires, tp);
 }
 
 /*
@@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
  * given the current clock sample.
  */
 static void bump_cpu_timer(struct k_itimer *timer,
-				  union cpu_time_count now)
+			   unsigned long long now)
 {
 	int i;
+	unsigned long long delta, incr;
 
-	if (timer->it.cpu.incr.sched == 0)
+	if (timer->it.cpu.incr == 0)
 		return;
 
-	if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
-		unsigned long long delta, incr;
+	if (now < timer->it.cpu.expires)
+		return;
 
-		if (now.sched < timer->it.cpu.expires.sched)
-			return;
-		incr = timer->it.cpu.incr.sched;
-		delta = now.sched + incr - timer->it.cpu.expires.sched;
-		/* Don't use (incr*2 < delta), incr*2 might overflow. */
-		for (i = 0; incr < delta - incr; i++)
-			incr = incr << 1;
-		for (; i >= 0; incr >>= 1, i--) {
-			if (delta < incr)
-				continue;
-			timer->it.cpu.expires.sched += incr;
-			timer->it_overrun += 1 << i;
-			delta -= incr;
-		}
-	} else {
-		cputime_t delta, incr;
+	incr = timer->it.cpu.incr;
+	delta = now + incr - timer->it.cpu.expires;
 
-		if (now.cpu < timer->it.cpu.expires.cpu)
-			return;
-		incr = timer->it.cpu.incr.cpu;
-		delta = now.cpu + incr - timer->it.cpu.expires.cpu;
-		/* Don't use (incr*2 < delta), incr*2 might overflow. */
-		for (i = 0; incr < delta - incr; i++)
-			     incr += incr;
-		for (; i >= 0; incr = incr >> 1, i--) {
-			if (delta < incr)
-				continue;
-			timer->it.cpu.expires.cpu += incr;
-			timer->it_overrun += 1 << i;
-			delta -= incr;
-		}
+	/* Don't use (incr*2 < delta), incr*2 might overflow. */
+	for (i = 0; incr < delta - incr; i++)
+		incr = incr << 1;
+
+	for (; i >= 0; incr >>= 1, i--) {
+		if (delta < incr)
+			continue;
+
+		timer->it.cpu.expires += incr;
+		timer->it_overrun += 1 << i;
+		delta -= incr;
 	}
 }
 
@@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime)
 	return 0;
 }
 
-static inline cputime_t prof_ticks(struct task_struct *p)
+static inline unsigned long long prof_ticks(struct task_struct *p)
 {
 	cputime_t utime, stime;
 
 	task_cputime(p, &utime, &stime);
 
-	return utime + stime;
+	return cputime_to_expires(utime + stime);
 }
-static inline cputime_t virt_ticks(struct task_struct *p)
+static inline unsigned long long virt_ticks(struct task_struct *p)
 {
 	cputime_t utime;
 
 	task_cputime(p, &utime, NULL);
 
-	return utime;
+	return cputime_to_expires(utime);
 }
 
 static int
@@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
  * Sample a per-thread clock for the given task.
  */
 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
-			    union cpu_time_count *cpu)
+			    unsigned long long *sample)
 {
 	switch (CPUCLOCK_WHICH(which_clock)) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = prof_ticks(p);
+		*sample = prof_ticks(p);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = virt_ticks(p);
+		*sample = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p);
+		*sample = task_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
  */
 static int cpu_clock_sample_group(const clockid_t which_clock,
 				  struct task_struct *p,
-				  union cpu_time_count *cpu)
+				  unsigned long long *sample)
 {
 	struct task_cputime cputime;
 
@@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 		return -EINVAL;
 	case CPUCLOCK_PROF:
 		thread_group_cputime(p, &cputime);
-		cpu->cpu = cputime.utime + cputime.stime;
+		*sample = cputime_to_expires(cputime.utime + cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
 		thread_group_cputime(p, &cputime);
-		cpu->cpu = cputime.utime;
+		*sample = cputime_to_expires(cputime.utime);
 		break;
 	case CPUCLOCK_SCHED:
 		thread_group_cputime(p, &cputime);
-		cpu->sched = cputime.sum_exec_runtime;
+		*sample = cputime.sum_exec_runtime;
 		break;
 	}
 	return 0;
@@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
 	const pid_t pid = CPUCLOCK_PID(which_clock);
 	int error = -EINVAL;
-	union cpu_time_count rtn;
+	unsigned long long rtn;
 
 	if (pid == 0) {
 		/*
@@ -461,30 +414,30 @@ static void cleanup_timers(struct list_head *head,
 
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
-		if (timer->expires.cpu < ptime) {
-			timer->expires.cpu = 0;
+		if (timer->expires < cputime_to_expires(ptime)) {
+			timer->expires = 0;
 		} else {
-			timer->expires.cpu -= ptime;
+			timer->expires -= cputime_to_expires(ptime);
 		}
 	}
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
-		if (timer->expires.cpu < utime) {
-			timer->expires.cpu = 0;
+		if (timer->expires < cputime_to_expires(utime)) {
+			timer->expires = 0;
 		} else {
-			timer->expires.cpu -= utime;
+			timer->expires -= cputime_to_expires(utime);
 		}
 	}
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
-		if (timer->expires.sched < sum_exec_runtime) {
-			timer->expires.sched = 0;
+		if (timer->expires < sum_exec_runtime) {
+			timer->expires = 0;
 		} else {
-			timer->expires.sched -= sum_exec_runtime;
+			timer->expires -= sum_exec_runtime;
 		}
 	}
 }
@@ -516,7 +469,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
-static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
+static void clear_dead_task(struct k_itimer *timer, unsigned long long now)
 {
 	/*
 	 * That's all for this thread or process.
@@ -524,9 +477,7 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 	 */
 	put_task_struct(timer->it.cpu.task);
 	timer->it.cpu.task = NULL;
-	timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
-					     timer->it.cpu.expires,
-					     now);
+	timer->it.cpu.expires -= now;
 }
 
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
@@ -558,14 +509,14 @@ static void arm_timer(struct k_itimer *timer)
 
 	listpos = head;
 	list_for_each_entry(next, head, entry) {
-		if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+		if (nt->expires < next->expires)
 			break;
 		listpos = &next->entry;
 	}
 	list_add(&nt->entry, listpos);
 
 	if (listpos == head) {
-		union cpu_time_count *exp = &nt->expires;
+		unsigned long long exp = nt->expires;
 
 		/*
 		 * We are the new earliest-expiring POSIX 1.b timer, hence
@@ -576,17 +527,17 @@ static void arm_timer(struct k_itimer *timer)
 
 		switch (CPUCLOCK_WHICH(timer->it_clock)) {
 		case CPUCLOCK_PROF:
-			if (expires_gt(cputime_expires->prof_exp, exp->cpu))
-				cputime_expires->prof_exp = exp->cpu;
+			if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
+				cputime_expires->prof_exp = expires_to_cputime(exp);
 			break;
 		case CPUCLOCK_VIRT:
-			if (expires_gt(cputime_expires->virt_exp, exp->cpu))
-				cputime_expires->virt_exp = exp->cpu;
+			if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
+				cputime_expires->virt_exp = expires_to_cputime(exp);
 			break;
 		case CPUCLOCK_SCHED:
 			if (cputime_expires->sched_exp == 0 ||
-			    cputime_expires->sched_exp > exp->sched)
-				cputime_expires->sched_exp = exp->sched;
+			    cputime_expires->sched_exp > exp)
+				cputime_expires->sched_exp = exp;
 			break;
 		}
 	}
@@ -601,20 +552,20 @@ static void cpu_timer_fire(struct k_itimer *timer)
 		/*
 		 * User don't want any signal.
 		 */
-		timer->it.cpu.expires.sched = 0;
+		timer->it.cpu.expires = 0;
 	} else if (unlikely(timer->sigq == NULL)) {
 		/*
 		 * This a special case for clock_nanosleep,
 		 * not a normal timer from sys_timer_create.
 		 */
 		wake_up_process(timer->it_process);
-		timer->it.cpu.expires.sched = 0;
-	} else if (timer->it.cpu.incr.sched == 0) {
+		timer->it.cpu.expires = 0;
+	} else if (timer->it.cpu.incr == 0) {
 		/*
 		 * One-shot timer.  Clear it as soon as it's fired.
 		 */
 		posix_timer_event(timer, 0);
-		timer->it.cpu.expires.sched = 0;
+		timer->it.cpu.expires = 0;
 	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
 		/*
 		 * The signal did not get queued because the signal
@@ -632,7 +583,7 @@ static void cpu_timer_fire(struct k_itimer *timer)
  */
 static int cpu_timer_sample_group(const clockid_t which_clock,
 				  struct task_struct *p,
-				  union cpu_time_count *cpu)
+				  unsigned long long *sample)
 {
 	struct task_cputime cputime;
 
@@ -641,13 +592,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime.utime + cputime.stime;
+		*sample = cputime_to_expires(cputime.utime + cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = cputime.utime;
+		*sample = cputime_to_expires(cputime.utime);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+		*sample = cputime.sum_exec_runtime + task_delta_exec(p);
 		break;
 	}
 	return 0;
@@ -694,7 +645,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 			       struct itimerspec *new, struct itimerspec *old)
 {
 	struct task_struct *p = timer->it.cpu.task;
-	union cpu_time_count old_expires, new_expires, old_incr, val;
+	unsigned long long old_expires, new_expires, old_incr, val;
 	int ret;
 
 	if (unlikely(p == NULL)) {
@@ -749,7 +700,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	}
 
 	if (old) {
-		if (old_expires.sched == 0) {
+		if (old_expires == 0) {
 			old->it_value.tv_sec = 0;
 			old->it_value.tv_nsec = 0;
 		} else {
@@ -764,11 +715,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 			 * new setting.
 			 */
 			bump_cpu_timer(timer, val);
-			if (cpu_time_before(timer->it_clock, val,
-					    timer->it.cpu.expires)) {
-				old_expires = cpu_time_sub(
-					timer->it_clock,
-					timer->it.cpu.expires, val);
+			if (val < timer->it.cpu.expires) {
+				old_expires = timer->it.cpu.expires - val;
 				sample_to_timespec(timer->it_clock,
 						   old_expires,
 						   &old->it_value);
@@ -791,8 +739,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 		goto out;
 	}
 
-	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
-		cpu_time_add(timer->it_clock, &new_expires, val);
+	if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
+		new_expires += val;
 	}
 
 	/*
@@ -801,8 +749,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	 * arm the timer (we'll just fake it for timer_gettime).
 	 */
 	timer->it.cpu.expires = new_expires;
-	if (new_expires.sched != 0 &&
-	    cpu_time_before(timer->it_clock, val, new_expires)) {
+	if (new_expires != 0 && val < new_expires) {
 		arm_timer(timer);
 	}
 
@@ -826,8 +773,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	timer->it_overrun_last = 0;
 	timer->it_overrun = -1;
 
-	if (new_expires.sched != 0 &&
-	    !cpu_time_before(timer->it_clock, val, new_expires)) {
+	if (new_expires != 0 && !(val < new_expires)) {
 		/*
 		 * The designated time already passed, so we notify
 		 * immediately, even if the thread never runs to
@@ -849,7 +795,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 
 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 {
-	union cpu_time_count now;
+	unsigned long long now;
 	struct task_struct *p = timer->it.cpu.task;
 	int clear_dead;
 
@@ -859,7 +805,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	sample_to_timespec(timer->it_clock,
 			   timer->it.cpu.incr, &itp->it_interval);
 
-	if (timer->it.cpu.expires.sched == 0) {	/* Timer not armed at all.  */
+	if (timer->it.cpu.expires == 0) {	/* Timer not armed at all.  */
 		itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
 		return;
 	}
@@ -891,7 +837,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 			 */
 			put_task_struct(p);
 			timer->it.cpu.task = NULL;
-			timer->it.cpu.expires.sched = 0;
+			timer->it.cpu.expires = 0;
 			read_unlock(&tasklist_lock);
 			goto dead;
 		} else {
@@ -912,10 +858,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 		goto dead;
 	}
 
-	if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
+	if (now < timer->it.cpu.expires) {
 		sample_to_timespec(timer->it_clock,
-				   cpu_time_sub(timer->it_clock,
-						timer->it.cpu.expires, now),
+				   timer->it.cpu.expires - now,
 				   &itp->it_value);
 	} else {
 		/*
@@ -946,8 +891,8 @@ static void check_thread_timers(struct task_struct *tsk,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
-			tsk->cputime_expires.prof_exp = t->expires.cpu;
+		if (!--maxfire || prof_ticks(tsk) < t->expires) {
+			tsk->cputime_expires.prof_exp = expires_to_cputime(t->expires);
 			break;
 		}
 		t->firing = 1;
@@ -961,8 +906,8 @@ static void check_thread_timers(struct task_struct *tsk,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
-			tsk->cputime_expires.virt_exp = t->expires.cpu;
+		if (!--maxfire || virt_ticks(tsk) < t->expires) {
+			tsk->cputime_expires.virt_exp = expires_to_cputime(t->expires);
 			break;
 		}
 		t->firing = 1;
@@ -976,8 +921,8 @@ static void check_thread_timers(struct task_struct *tsk,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-			tsk->cputime_expires.sched_exp = t->expires.sched;
+		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires) {
+			tsk->cputime_expires.sched_exp = t->expires;
 			break;
 		}
 		t->firing = 1;
@@ -1030,7 +975,8 @@ static void stop_process_timers(struct signal_struct *sig)
 static u32 onecputick;
 
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
-			     cputime_t *expires, cputime_t cur_time, int signo)
+			     unsigned long long *expires,
+			     unsigned long long cur_time, int signo)
 {
 	if (!it->expires)
 		return;
@@ -1068,7 +1014,7 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	int maxfire;
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, ptime, virt_expires, prof_expires;
+	unsigned long long utime, ptime, virt_expires, prof_expires;
 	unsigned long long sum_sched_runtime, sched_expires;
 	struct list_head *timers = sig->cpu_timers;
 	struct task_cputime cputime;
@@ -1078,8 +1024,8 @@ static void check_process_timers(struct task_struct *tsk,
 	 * Collect the current process totals.
 	 */
 	thread_group_cputimer(tsk, &cputime);
-	utime = cputime.utime;
-	ptime = utime + cputime.stime;
+	utime = cputime_to_expires(cputime.utime);
+	ptime = utime + cputime_to_expires(cputime.stime);
 	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	prof_expires = 0;
@@ -1087,8 +1033,8 @@ static void check_process_timers(struct task_struct *tsk,
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || ptime < tl->expires.cpu) {
-			prof_expires = tl->expires.cpu;
+		if (!--maxfire || ptime < tl->expires) {
+			prof_expires = tl->expires;
 			break;
 		}
 		tl->firing = 1;
@@ -1102,8 +1048,8 @@ static void check_process_timers(struct task_struct *tsk,
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || utime < tl->expires.cpu) {
-			virt_expires = tl->expires.cpu;
+		if (!--maxfire || utime < tl->expires) {
+			virt_expires = tl->expires;
 			break;
 		}
 		tl->firing = 1;
@@ -1117,8 +1063,8 @@ static void check_process_timers(struct task_struct *tsk,
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
-			sched_expires = tl->expires.sched;
+		if (!--maxfire || sum_sched_runtime < tl->expires) {
+			sched_expires = tl->expires;
 			break;
 		}
 		tl->firing = 1;
@@ -1162,8 +1108,8 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 	}
 
-	sig->cputime_expires.prof_exp = prof_expires;
-	sig->cputime_expires.virt_exp = virt_expires;
+	sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
+	sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
 	sig->cputime_expires.sched_exp = sched_expires;
 	if (task_cputime_zero(&sig->cputime_expires))
 		stop_process_timers(sig);
@@ -1176,7 +1122,7 @@ static void check_process_timers(struct task_struct *tsk,
 void posix_cpu_timer_schedule(struct k_itimer *timer)
 {
 	struct task_struct *p = timer->it.cpu.task;
-	union cpu_time_count now;
+	unsigned long long now;
 
 	if (unlikely(p == NULL))
 		/*
@@ -1205,7 +1151,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 			 */
 			put_task_struct(p);
 			timer->it.cpu.task = p = NULL;
-			timer->it.cpu.expires.sched = 0;
+			timer->it.cpu.expires = 0;
 			goto out_unlock;
 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
 			/*
@@ -1387,7 +1333,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
 {
-	union cpu_time_count now;
+	unsigned long long now;
 
 	BUG_ON(clock_idx == CPUCLOCK_SCHED);
 	cpu_timer_sample_group(clock_idx, tsk, &now);
@@ -1399,17 +1345,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		 * it to be absolute.
 		 */
 		if (*oldval) {
-			if (*oldval <= now.cpu) {
+			if (*oldval <= now) {
 				/* Just about to fire. */
 				*oldval = cputime_one_jiffy;
 			} else {
-				*oldval -= now.cpu;
+				*oldval -= now;
 			}
 		}
 
 		if (!*newval)
 			goto out;
-		*newval += now.cpu;
+		*newval += now;
 	}
 
 	/*
@@ -1459,7 +1405,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		}
 
 		while (!signal_pending(current)) {
-			if (timer.it.cpu.expires.sched == 0) {
+			if (timer.it.cpu.expires == 0) {
 				/*
 				 * Our timer fired and was reset, below
 				 * deletion can not fail.
-- 
cgit v1.2.3


From 332962f2c88868ed3cdab466870baaa34dd58612 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Jul 2013 22:46:45 +0200
Subject: clocksource: Reselect clocksource when watchdog validated high-res
 capability

Up to commit 5d33b883a (clocksource: Always verify highres capability)
we had no sanity check when selecting a clocksource, which prevented
that a non highres capable clocksource is used when the system already
switched to highres/nohz mode.

The new sanity check works as Alex and Tim found out. It prevents the
TSC from being used. This happens because on x86 the boot process
looks like this:

 tsc_start_freqency_validation(TSC);
 clocksource_register(HPET);
 clocksource_done_booting();
	clocksource_select()
		Selects HPET which is valid for high-res

 switch_to_highres();

 clocksource_register(TSC);
 	TSC is not selected, because it is not yet
	flagged as VALID_HIGH_RES

 clocksource_watchdog()
	Validates TSC for highres, but that does not make TSC
	the current clocksource.

Before the sanity check was added, we installed TSC unvalidated which
worked most of the time. If the TSC was really detected as unstable,
then the unstable logic removed it and installed HPET again.

The sanity check is correct and needed. So the watchdog needs to kick
a reselection of the clocksource, when it qualifies TSC as a valid
high res clocksource.

To solve this, we mark the clocksource which got the flag
CLOCK_SOURCE_VALID_FOR_HRES set by the watchdog with an new flag
CLOCK_SOURCE_RESELECT and trigger the watchdog thread. The watchdog
thread evaluates the flag and invokes clocksource_select() when set.

To avoid that the clocksource_done_booting() code, which is about to
install the first real clocksource anyway, needs to go through
clocksource_select and tick_oneshot_notify() pointlessly, split out
the clocksource_watchdog_kthread() list walk code and invoke the
select/notify only when called from clocksource_watchdog_kthread().

So clocksource_done_booting() can utilize the same splitout code
without the select/notify invocation and the clocksource_mutex
unlock/relock dance.

Reported-and-tested-by: Alex Shi <alex.shi@intel.com>
Cc: Hans Peter Anvin <hpa@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Tested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307042239150.11637@ionos.tec.linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 57 +++++++++++++++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 2f39a4911668..dbbf8aa7731b 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -210,6 +210,7 @@ struct clocksource {
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
 #define CLOCK_SOURCE_UNSTABLE			0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
+#define CLOCK_SOURCE_RESELECT			0x100
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e713ef7d19a7..50a8736757f3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -181,6 +181,7 @@ static int finished_booting;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
+static void clocksource_select(void);
 
 static LIST_HEAD(watchdog_list);
 static struct clocksource *watchdog;
@@ -301,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
+			/* Mark it valid for high-res. */
 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+
+			/*
+			 * clocksource_done_booting() will sort it if
+			 * finished_booting is not set yet.
+			 */
+			if (!finished_booting)
+				continue;
+
 			/*
-			 * We just marked the clocksource as highres-capable,
-			 * notify the rest of the system as well so that we
-			 * transition into high-res mode:
+			 * If this is not the current clocksource let
+			 * the watchdog thread reselect it. Due to the
+			 * change to high res this clocksource might
+			 * be preferred now. If it is the current
+			 * clocksource let the tick code know about
+			 * that change.
 			 */
-			tick_clock_notify();
+			if (cs != curr_clocksource) {
+				cs->flags |= CLOCK_SOURCE_RESELECT;
+				schedule_work(&watchdog_work);
+			} else {
+				tick_clock_notify();
+			}
 		}
 	}
 
@@ -404,19 +422,25 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-static int clocksource_watchdog_kthread(void *data)
+static int __clocksource_watchdog_kthread(void)
 {
 	struct clocksource *cs, *tmp;
 	unsigned long flags;
 	LIST_HEAD(unstable);
+	int select = 0;
 
-	mutex_lock(&clocksource_mutex);
 	spin_lock_irqsave(&watchdog_lock, flags);
-	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
+	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 			list_del_init(&cs->wd_list);
 			list_add(&cs->wd_list, &unstable);
+			select = 1;
+		}
+		if (cs->flags & CLOCK_SOURCE_RESELECT) {
+			cs->flags &= ~CLOCK_SOURCE_RESELECT;
+			select = 1;
 		}
+	}
 	/* Check if the watchdog timer needs to be stopped. */
 	clocksource_stop_watchdog();
 	spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -426,6 +450,14 @@ static int clocksource_watchdog_kthread(void *data)
 		list_del_init(&cs->wd_list);
 		__clocksource_change_rating(cs, 0);
 	}
+	return select;
+}
+
+static int clocksource_watchdog_kthread(void *data)
+{
+	mutex_lock(&clocksource_mutex);
+	if (__clocksource_watchdog_kthread())
+		clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
@@ -445,7 +477,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
 
 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 static inline void clocksource_resume_watchdog(void) { }
-static inline int clocksource_watchdog_kthread(void *data) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
@@ -647,16 +679,11 @@ static int __init clocksource_done_booting(void)
 {
 	mutex_lock(&clocksource_mutex);
 	curr_clocksource = clocksource_default_clock();
-	mutex_unlock(&clocksource_mutex);
-
 	finished_booting = 1;
-
 	/*
 	 * Run the watchdog first to eliminate unstable clock sources
 	 */
-	clocksource_watchdog_kthread(NULL);
-
-	mutex_lock(&clocksource_mutex);
+	__clocksource_watchdog_kthread();
 	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
@@ -789,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
 	list_del(&cs->list);
 	cs->rating = rating;
 	clocksource_enqueue(cs);
-	clocksource_select();
 }
 
 /**
@@ -801,6 +827,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 {
 	mutex_lock(&clocksource_mutex);
 	__clocksource_change_rating(cs, rating);
+	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
-- 
cgit v1.2.3