From 36d8593ec74dc04d3bd7c1c897a7b7cfbd0b0dc6 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sat, 19 Feb 2011 15:34:50 +0000
Subject: Make clocksource name const

As nothing should be writing to the clocksource name string, make the
clocksource name pointer const.  Build-tested on ARM Versatile Express.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..94c1f38e922a 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -161,7 +161,7 @@ struct clocksource {
 	/*
 	 * First part of structure is read mostly
 	 */
-	char *name;
+	const char *name;
 	struct list_head list;
 	int rating;
 	cycle_t (*read)(struct clocksource *cs);
-- 
cgit v1.2.3


From 78b7280cce23293f7570ad52c1ffe1485c6d9669 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 11 Mar 2011 17:57:23 +0000
Subject: KEYS: Improve /proc/keys

Improve /proc/keys by:

 (1) Don't attempt to summarise the payload of a negated key.  It won't have
     one.  To this end, a helper function - key_is_instantiated() has been
     added that allows the caller to find out whether the key is positively
     instantiated (as opposed to being uninstantiated or negatively
     instantiated).

 (2) Do show keys that are negative, expired or revoked rather than hiding
     them.  This requires an override flag (no_state_check) to be passed to
     search_my_process_keyrings() and keyring_search_aux() to suppress this
     check.

     Without this, keys that are possessed by the caller, but only grant
     permissions to the caller if possessed are skipped as the possession check
     fails.

     Keys that are visible due to user, group or other checks are visible with
     or without this patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h              | 13 +++++++++++++
 net/dns_resolver/dns_key.c       | 10 ++++++----
 security/keys/internal.h         |  4 +++-
 security/keys/keyring.c          | 37 ++++++++++++++++++++++++-------------
 security/keys/proc.c             |  2 +-
 security/keys/process_keys.c     | 12 +++++++-----
 security/keys/request_key.c      |  3 +--
 security/keys/request_key_auth.c |  3 ++-
 security/keys/user_defined.c     |  4 ++--
 9 files changed, 59 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index b2bb01719561..ef19b99aff98 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -276,6 +276,19 @@ static inline key_serial_t key_serial(struct key *key)
 	return key ? key->serial : 0;
 }
 
+/**
+ * key_is_instantiated - Determine if a key has been positively instantiated
+ * @key: The key to check.
+ *
+ * Return true if the specified key has been positively instantiated, false
+ * otherwise.
+ */
+static inline bool key_is_instantiated(const struct key *key)
+{
+	return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) &&
+		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
+}
+
 #define rcu_dereference_key(KEY)					\
 	(rcu_dereference_protected((KEY)->payload.rcudata,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index cfa7a5e1c5c9..fa000d26dc60 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 	int err = key->type_data.x[0];
 
 	seq_puts(m, key->description);
-	if (err)
-		seq_printf(m, ": %d", err);
-	else
-		seq_printf(m, ": %u", key->datalen);
+	if (key_is_instantiated(key)) {
+		if (err)
+			seq_printf(m, ": %d", err);
+		else
+			seq_printf(m, ": %u", key->datalen);
+	}
 }
 
 /*
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 07a025f81902..f375152a2500 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -109,11 +109,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 				    const struct cred *cred,
 				    struct key_type *type,
 				    const void *description,
-				    key_match_func_t match);
+				    key_match_func_t match,
+				    bool no_state_check);
 
 extern key_ref_t search_my_process_keyrings(struct key_type *type,
 					    const void *description,
 					    key_match_func_t match,
+					    bool no_state_check,
 					    const struct cred *cred);
 extern key_ref_t search_process_keyrings(struct key_type *type,
 					 const void *description,
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index cdd2f3f88c88..a06ffab38568 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -176,13 +176,15 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
 	else
 		seq_puts(m, "[anon]");
 
-	rcu_read_lock();
-	klist = rcu_dereference(keyring->payload.subscriptions);
-	if (klist)
-		seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
-	else
-		seq_puts(m, ": empty");
-	rcu_read_unlock();
+	if (key_is_instantiated(keyring)) {
+		rcu_read_lock();
+		klist = rcu_dereference(keyring->payload.subscriptions);
+		if (klist)
+			seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+		else
+			seq_puts(m, ": empty");
+		rcu_read_unlock();
+	}
 }
 
 /*
@@ -271,6 +273,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
  * @type: The type of key to search for.
  * @description: Parameter for @match.
  * @match: Function to rule on whether or not a key is the one required.
+ * @no_state_check: Don't check if a matching key is bad
  *
  * Search the supplied keyring tree for a key that matches the criteria given.
  * The root keyring and any linked keyrings must grant Search permission to the
@@ -303,7 +306,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 			     const struct cred *cred,
 			     struct key_type *type,
 			     const void *description,
-			     key_match_func_t match)
+			     key_match_func_t match,
+			     bool no_state_check)
 {
 	struct {
 		struct keyring_list *keylist;
@@ -345,6 +349,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 	kflags = keyring->flags;
 	if (keyring->type == type && match(keyring, description)) {
 		key = keyring;
+		if (no_state_check)
+			goto found;
 
 		/* check it isn't negative and hasn't expired or been
 		 * revoked */
@@ -384,11 +390,13 @@ descend:
 			continue;
 
 		/* skip revoked keys and expired keys */
-		if (kflags & (1 << KEY_FLAG_REVOKED))
-			continue;
+		if (!no_state_check) {
+			if (kflags & (1 << KEY_FLAG_REVOKED))
+				continue;
 
-		if (key->expiry && now.tv_sec >= key->expiry)
-			continue;
+			if (key->expiry && now.tv_sec >= key->expiry)
+				continue;
+		}
 
 		/* keys that don't match */
 		if (!match(key, description))
@@ -399,6 +407,9 @@ descend:
 					cred, KEY_SEARCH) < 0)
 			continue;
 
+		if (no_state_check)
+			goto found;
+
 		/* we set a different error code if we pass a negative key */
 		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
 			err = key->type_data.reject_error;
@@ -478,7 +489,7 @@ key_ref_t keyring_search(key_ref_t keyring,
 		return ERR_PTR(-ENOKEY);
 
 	return keyring_search_aux(keyring, current->cred,
-				  type, description, type->match);
+				  type, description, type->match, false);
 }
 EXPORT_SYMBOL(keyring_search);
 
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 525cf8a29cdd..49bbc97943ad 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -199,7 +199,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	if (key->perm & KEY_POS_VIEW) {
 		skey_ref = search_my_process_keyrings(key->type, key,
 						      lookup_user_key_possessed,
-						      cred);
+						      true, cred);
 		if (!IS_ERR(skey_ref)) {
 			key_ref_put(skey_ref);
 			key_ref = make_key_ref(key, 1);
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 930634e45149..6c0480db8885 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -331,6 +331,7 @@ void key_fsgid_changed(struct task_struct *tsk)
 key_ref_t search_my_process_keyrings(struct key_type *type,
 				     const void *description,
 				     key_match_func_t match,
+				     bool no_state_check,
 				     const struct cred *cred)
 {
 	key_ref_t key_ref, ret, err;
@@ -350,7 +351,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	if (cred->thread_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->thread_keyring, 1),
-			cred, type, description, match);
+			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -371,7 +372,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	if (cred->tgcred->process_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->tgcred->process_keyring, 1),
-			cred, type, description, match);
+			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -395,7 +396,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 			make_key_ref(rcu_dereference(
 					     cred->tgcred->session_keyring),
 				     1),
-			cred, type, description, match);
+			cred, type, description, match, no_state_check);
 		rcu_read_unlock();
 
 		if (!IS_ERR(key_ref))
@@ -417,7 +418,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	else if (cred->user->session_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->user->session_keyring, 1),
-			cred, type, description, match);
+			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -459,7 +460,8 @@ key_ref_t search_process_keyrings(struct key_type *type,
 
 	might_sleep();
 
-	key_ref = search_my_process_keyrings(type, description, match, cred);
+	key_ref = search_my_process_keyrings(type, description, match,
+					     false, cred);
 	if (!IS_ERR(key_ref))
 		goto found;
 	err = key_ref;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index df3c0417ee40..b18a71745901 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -530,8 +530,7 @@ struct key *request_key_and_link(struct key_type *type,
 	       dest_keyring, flags);
 
 	/* search all the process keyrings for a key */
-	key_ref = search_process_keyrings(type, description, type->match,
-					  cred);
+	key_ref = search_process_keyrings(type, description, type->match, cred);
 
 	if (!IS_ERR(key_ref)) {
 		key = key_ref_to_ptr(key_ref);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 68164031a74e..f6337c9082eb 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -59,7 +59,8 @@ static void request_key_auth_describe(const struct key *key,
 
 	seq_puts(m, "key:");
 	seq_puts(m, key->description);
-	seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len);
+	if (key_is_instantiated(key))
+		seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len);
 }
 
 /*
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index c6ca8662a468..63bb1aaffc0a 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL_GPL(user_destroy);
 void user_describe(const struct key *key, struct seq_file *m)
 {
 	seq_puts(m, key->description);
-
-	seq_printf(m, ": %u", key->datalen);
+	if (key_is_instantiated(key))
+		seq_printf(m, ": %u", key->datalen);
 }
 
 EXPORT_SYMBOL_GPL(user_describe);
-- 
cgit v1.2.3


From 09e10d7fe509408d15818db6a0299f563668a7ba Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 16 Mar 2011 22:57:47 +0000
Subject: ASoC: Add WM8958 VSS support

With appropriate firmware the WM8958 can support Virtual Surround Sound or
VSS, widening the stereo audio image for improved user experience. Enable
support for this mode of operation when the appropriate firmware can be
loaded at runtime.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 include/linux/mfd/wm8994/pdata.h |  34 ++++
 sound/soc/codecs/wm8958-dsp2.c   | 363 ++++++++++++++++++++++++++++++++++++++-
 sound/soc/codecs/wm8994.c        |   2 +
 sound/soc/codecs/wm8994.h        |  14 ++
 4 files changed, 405 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 466b1c777aff..c72174aff1fe 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -32,6 +32,9 @@ struct wm8994_ldo_pdata {
 #define WM8994_EQ_REGS  20
 #define WM8958_MBC_CUTOFF_REGS 20
 #define WM8958_MBC_COEFF_REGS  48
+#define WM8958_MBC_COMBINED_REGS 56
+#define WM8958_VSS_HPF_REGS 2
+#define WM8958_VSS_REGS 148
 
 /**
  * DRC configurations are specified with a label and a set of register
@@ -71,6 +74,31 @@ struct wm8958_mbc_cfg {
 	const char *name;
 	u16 cutoff_regs[WM8958_MBC_CUTOFF_REGS];
 	u16 coeff_regs[WM8958_MBC_COEFF_REGS];
+
+	/* Coefficient layout when using MBC+VSS firmware */
+	u16 combined_regs[WM8958_MBC_COMBINED_REGS];
+};
+
+/**
+ * VSS HPF configurations are specified with a label and two values to
+ * write.  Configurations are expected to be generated using the
+ * multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_vss_hpf_cfg {
+	const char *name;
+	u16 regs[WM8958_VSS_HPF_REGS];
+};
+
+/**
+ * VSS configurations are specified with a label and array of values
+ * to write.  Configurations are expected to be generated using the
+ * multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_vss_cfg {
+	const char *name;
+	u16 regs[WM8958_VSS_REGS];
 };
 
 struct wm8994_pdata {
@@ -95,6 +123,12 @@ struct wm8994_pdata {
 	int num_mbc_cfgs;
 	struct wm8958_mbc_cfg *mbc_cfgs;
 
+	int num_vss_cfgs;
+	struct wm8958_vss_cfg *vss_cfgs;
+
+	int num_vss_hpf_cfgs;
+	struct wm8958_vss_hpf_cfg *vss_hpf_cfgs;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index 9c1cbe5b61ae..d0e257315d97 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -233,6 +233,68 @@ static void wm8958_dsp_start_mbc(struct snd_soc_codec *codec, int path)
 			    WM8958_MBC_ENA);
 }
 
+static void wm8958_dsp_start_vss(struct snd_soc_codec *codec, int path)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int i, ena;
+
+	if (wm8994->mbc_vss)
+		wm8958_dsp2_fw(codec, "MBC+VSS", wm8994->mbc_vss, false);
+
+	snd_soc_update_bits(codec, WM8958_DSP2_PROGRAM,
+			    WM8958_DSP2_ENA, WM8958_DSP2_ENA);
+
+	/* If we've got user supplied settings use them */
+	if (pdata && pdata->num_mbc_cfgs) {
+		struct wm8958_mbc_cfg *cfg
+			= &pdata->mbc_cfgs[wm8994->mbc_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->combined_regs); i++)
+			snd_soc_write(codec, i + 0x2800,
+				      cfg->combined_regs[i]);
+	}
+
+	if (pdata && pdata->num_vss_cfgs) {
+		struct wm8958_vss_cfg *cfg
+			= &pdata->vss_cfgs[wm8994->vss_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2600, cfg->regs[i]);
+	}
+
+	if (pdata && pdata->num_vss_hpf_cfgs) {
+		struct wm8958_vss_hpf_cfg *cfg
+			= &pdata->vss_hpf_cfgs[wm8994->vss_hpf_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2400, cfg->regs[i]);
+	}
+
+	/* Run the DSP */
+	snd_soc_write(codec, WM8958_DSP2_EXECCONTROL,
+		      WM8958_DSP2_RUNR);
+
+	/* Enable the algorithms we've selected */
+	ena = 0;
+	if (wm8994->mbc_ena[path])
+		ena |= 0x8;
+	if (wm8994->hpf2_ena[path])
+		ena |= 0x4;
+	if (wm8994->hpf1_ena[path])
+		ena |= 0x2;
+	if (wm8994->vss_ena[path])
+		ena |= 0x1;
+
+	snd_soc_write(codec, 0x2201, ena);
+
+	/* Switch the DSP into the data path */
+	snd_soc_update_bits(codec, WM8958_DSP2_CONFIG,
+			    WM8958_MBC_SEL_MASK | WM8958_MBC_ENA,
+			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
+}
+
+
 static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
@@ -258,7 +320,8 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 	}
 
 	/* Do we have both an active AIF and an active algorithm? */
-	ena = wm8994->mbc_ena[path];
+	ena = wm8994->mbc_ena[path] || wm8994->vss_ena[path] ||
+		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path];
 	if (!pwr_reg)
 		ena = 0;
 
@@ -281,11 +344,18 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 				    aif << WM8958_DSP2CLK_SRC_SHIFT |
 				    WM8958_DSP2CLK_ENA);
 
-		if (wm8994->mbc_ena[path])
+		if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
+		    wm8994->hpf2_ena[path])
+			wm8958_dsp_start_vss(codec, path);
+		else if (wm8994->mbc_ena[path])
 			wm8958_dsp_start_mbc(codec, path);
 
-		dev_dbg(codec->dev, "DSP running\n");
-	} else {
+		wm8994->dsp_active = path;
+
+		dev_dbg(codec->dev, "DSP running in path %d\n", path);
+	}
+
+	if (!start && wm8994->dsp_active == path) {
 		/* If the DSP is already stopped then noop */
 		if (!(reg & WM8958_DSP2_ENA))
 			return;
@@ -335,7 +405,8 @@ static int wm8958_dsp2_busy(struct wm8994_priv *wm8994, int aif)
 	for (i = 0; i < ARRAY_SIZE(wm8994->mbc_ena); i++) {
 		if (i == aif)
 			continue;
-		if (wm8994->mbc_ena[i])
+		if (wm8994->mbc_ena[i] || wm8994->vss_ena[i] ||
+		    wm8994->hpf1_ena[i] || wm8994->hpf2_ena[i])
 			return 1;
 	}
 
@@ -426,22 +497,239 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
 	.get = wm8958_mbc_get, .put = wm8958_mbc_put, \
 	.private_value = xval }
 
+static int wm8958_put_vss_enum(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_vss_cfgs)
+		return -EINVAL;
+
+	wm8994->vss_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_vss_enum(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->vss_cfg;
+
+	return 0;
+}
+
+static int wm8958_put_vss_hpf_enum(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_vss_hpf_cfgs)
+		return -EINVAL;
+
+	wm8994->vss_hpf_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_vss_hpf_enum(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->vss_hpf_cfg;
+
+	return 0;
+}
+
+static int wm8958_vss_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_vss_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int vss = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.integer.value[0] = wm8994->vss_ena[vss];
+
+	return 0;
+}
+
+static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int vss = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->mbc_vss)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, vss)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", vss);
+		return -EBUSY;
+	}
+
+	wm8994->vss_ena[vss] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, vss, wm8994->vss_ena[vss]);
+
+	return 0;
+}
+
+
+#define WM8958_VSS_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_vss_info, \
+	.get = wm8958_vss_get, .put = wm8958_vss_put, \
+	.private_value = xval }
+
+static int wm8958_hpf_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_hpf_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int hpf = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (hpf < 3)
+		ucontrol->value.integer.value[0] = wm8994->hpf1_ena[hpf % 3];
+	else
+		ucontrol->value.integer.value[0] = wm8994->hpf2_ena[hpf % 3];
+
+	return 0;
+}
+
+static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int hpf = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->mbc_vss)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, hpf % 3)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", hpf);
+		return -EBUSY;
+	}
+
+	if (wm8994->eq[hpf % 3])
+		return -EBUSY;
+
+	if (hpf < 3)
+		wm8994->hpf1_ena[hpf % 3] = ucontrol->value.integer.value[0];
+	else
+		wm8994->hpf2_ena[hpf % 3] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, hpf % 3, ucontrol->value.integer.value[0]);
+
+	return 0;
+}
+
+#define WM8958_HPF_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_hpf_info, \
+	.get = wm8958_hpf_get, .put = wm8958_hpf_put, \
+	.private_value = xval }
+
 static const struct snd_kcontrol_new wm8958_mbc_snd_controls[] = {
 WM8958_MBC_SWITCH("AIF1DAC1 MBC Switch", 0),
 WM8958_MBC_SWITCH("AIF1DAC2 MBC Switch", 1),
 WM8958_MBC_SWITCH("AIF2DAC MBC Switch", 2),
 };
 
-static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
+static const struct snd_kcontrol_new wm8958_vss_snd_controls[] = {
+WM8958_VSS_SWITCH("AIF1DAC1 VSS Switch", 0),
+WM8958_VSS_SWITCH("AIF1DAC2 VSS Switch", 1),
+WM8958_VSS_SWITCH("AIF2DAC VSS Switch", 2),
+WM8958_HPF_SWITCH("AIF1DAC1 HPF1 Switch", 0),
+WM8958_HPF_SWITCH("AIF1DAC2 HPF1 Switch", 1),
+WM8958_HPF_SWITCH("AIF2DAC HPF1 Switch", 2),
+WM8958_HPF_SWITCH("AIF1DAC1 HPF2 Switch", 3),
+WM8958_HPF_SWITCH("AIF1DAC2 HPF2 Switch", 4),
+WM8958_HPF_SWITCH("AIF2DAC HPF2 Switch", 5),
+};
+
+static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 {
 	struct snd_soc_codec *codec = context;
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 
-	if (fw && wm8958_dsp2_fw(codec, "MBC", fw, true) != 0) {
+	if (fw && (wm8958_dsp2_fw(codec, "MBC+VSS", fw, true) == 0)) {
 		mutex_lock(&codec->mutex);
-		wm8994->mbc = fw;
+		wm8994->mbc_vss = fw;
 		mutex_unlock(&codec->mutex);
 	}
+
+}
+
+static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
+{
+	struct snd_soc_codec *codec = context;
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (wm8958_dsp2_fw(codec, "MBC", fw, true) != 0)
+		return;
+
+	mutex_lock(&codec->mutex);
+	wm8994->mbc = fw;
+	mutex_unlock(&codec->mutex);
+
+	/* We can't have more than one request outstanding at once so
+	 * we daisy chain.
+	 */
+	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+				"wm8958_mbc_vss.wfw", codec->dev, GFP_KERNEL,
+				codec, wm8958_mbc_vss_loaded);
 }
 
 void wm8958_dsp2_init(struct snd_soc_codec *codec)
@@ -454,6 +742,9 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 
 	snd_soc_add_controls(codec, wm8958_mbc_snd_controls,
 			     ARRAY_SIZE(wm8958_mbc_snd_controls));
+	snd_soc_add_controls(codec, wm8958_vss_snd_controls,
+			     ARRAY_SIZE(wm8958_vss_snd_controls));
+
 
 	/* We don't *require* firmware and don't want to delay boot */
 	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
@@ -491,5 +782,61 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 				"Failed to add MBC mode controls: %d\n", ret);
 	}
 
+	if (pdata->num_vss_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("VSS Mode", wm8994->vss_enum,
+				     wm8958_get_vss_enum, wm8958_put_vss_enum),
+		};
 
+		/* We need an array of texts for the enum API */
+		wm8994->vss_texts = kmalloc(sizeof(char *)
+					    * pdata->num_vss_cfgs, GFP_KERNEL);
+		if (!wm8994->vss_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d VSS config texts\n",
+				pdata->num_vss_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_vss_cfgs; i++)
+			wm8994->vss_texts[i] = pdata->vss_cfgs[i].name;
+
+		wm8994->vss_enum.max = pdata->num_vss_cfgs;
+		wm8994->vss_enum.texts = wm8994->vss_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add VSS mode controls: %d\n", ret);
+	}
+
+	if (pdata->num_vss_hpf_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("VSS HPF Mode", wm8994->vss_hpf_enum,
+				     wm8958_get_vss_hpf_enum,
+				     wm8958_put_vss_hpf_enum),
+		};
+
+		/* We need an array of texts for the enum API */
+		wm8994->vss_hpf_texts = kmalloc(sizeof(char *)
+						* pdata->num_vss_hpf_cfgs, GFP_KERNEL);
+		if (!wm8994->vss_hpf_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d VSS HPF config texts\n",
+				pdata->num_vss_hpf_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_vss_hpf_cfgs; i++)
+			wm8994->vss_hpf_texts[i] = pdata->vss_hpf_cfgs[i].name;
+
+		wm8994->vss_hpf_enum.max = pdata->num_vss_hpf_cfgs;
+		wm8994->vss_hpf_enum.texts = wm8994->vss_hpf_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add VSS HPFmode controls: %d\n",
+				ret);
+	}
 }
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index f622ff691b41..01ef5704091e 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3140,6 +3140,8 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 	}
 	if (wm8994->mbc)
 		release_firmware(wm8994->mbc);
+	if (wm8994->mbc_vss)
+		release_firmware(wm8994->mbc_vss);
 	kfree(wm8994->retune_mobile_texts);
 	kfree(wm8994->drc_texts);
 	kfree(wm8994);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index a4bfde83065f..f337f3d50590 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -84,6 +84,9 @@ struct wm8994_priv {
 	int lrclk_shared[2];
 
 	int mbc_ena[3];
+	int hpf1_ena[3];
+	int hpf2_ena[3];
+	int vss_ena[3];
 
 	/* Platform dependant DRC configuration */
 	const char **drc_texts;
@@ -101,6 +104,16 @@ struct wm8994_priv {
 	const char **mbc_texts;
 	struct soc_enum mbc_enum;
 
+	/* Platform dependant VSS configuration */
+	int vss_cfg;
+	const char **vss_texts;
+	struct soc_enum vss_enum;
+
+	/* Platform dependant VSS HPF configuration */
+	int vss_hpf_cfg;
+	const char **vss_hpf_texts;
+	struct soc_enum vss_hpf_enum;
+
 	struct wm8994_micdet micdet[2];
 
 	wm8958_micdet_cb jack_cb;
@@ -119,6 +132,7 @@ struct wm8994_priv {
 	int dsp_active;
 	const struct firmware *cur_fw;
 	const struct firmware *mbc;
+	const struct firmware *mbc_vss;
 };
 
 #endif
-- 
cgit v1.2.3


From 312158718fe2056703b2744801165a9574560495 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 17 Mar 2011 20:23:43 +0000
Subject: ASoC: Add WM8958 enhanced EQ support

DSP2 in the WM8958 can be used to support an upgraded EQ for use in
demanding applications.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 include/linux/mfd/wm8994/pdata.h |  15 +++
 sound/soc/codecs/wm8958-dsp2.c   | 192 ++++++++++++++++++++++++++++++++++++++-
 sound/soc/codecs/wm8994.c        |   2 +
 sound/soc/codecs/wm8994.h        |   7 ++
 4 files changed, 213 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index c72174aff1fe..d12f8d635a81 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -35,6 +35,7 @@ struct wm8994_ldo_pdata {
 #define WM8958_MBC_COMBINED_REGS 56
 #define WM8958_VSS_HPF_REGS 2
 #define WM8958_VSS_REGS 148
+#define WM8958_ENH_EQ_REGS 32
 
 /**
  * DRC configurations are specified with a label and a set of register
@@ -101,6 +102,17 @@ struct wm8958_vss_cfg {
 	u16 regs[WM8958_VSS_REGS];
 };
 
+/**
+ * Enhanced EQ configurations are specified with a label and array of
+ * values to write.  Configurations are expected to be generated using
+ * the multiband compressor configuration panel in WISCE - see
+ * http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8958_enh_eq_cfg {
+	const char *name;
+	u16 regs[WM8958_ENH_EQ_REGS];
+};
+
 struct wm8994_pdata {
 	int gpio_base;
 
@@ -129,6 +141,9 @@ struct wm8994_pdata {
 	int num_vss_hpf_cfgs;
 	struct wm8958_vss_hpf_cfg *vss_hpf_cfgs;
 
+	int num_enh_eq_cfgs;
+	struct wm8958_enh_eq_cfg *enh_eq_cfgs;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index d0e257315d97..74983ee2b87f 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -294,6 +294,36 @@ static void wm8958_dsp_start_vss(struct snd_soc_codec *codec, int path)
 			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
 }
 
+static void wm8958_dsp_start_enh_eq(struct snd_soc_codec *codec, int path)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int i;
+
+	wm8958_dsp2_fw(codec, "ENH_EQ", wm8994->enh_eq, false);
+
+	snd_soc_update_bits(codec, WM8958_DSP2_PROGRAM,
+			    WM8958_DSP2_ENA, WM8958_DSP2_ENA);
+
+	/* If we've got user supplied settings use them */
+	if (pdata && pdata->num_enh_eq_cfgs) {
+		struct wm8958_enh_eq_cfg *cfg
+			= &pdata->enh_eq_cfgs[wm8994->enh_eq_cfg];
+
+		for (i = 0; i < ARRAY_SIZE(cfg->regs); i++)
+			snd_soc_write(codec, i + 0x2200,
+				      cfg->regs[i]);
+	}
+
+	/* Run the DSP */
+	snd_soc_write(codec, WM8958_DSP2_EXECCONTROL,
+		      WM8958_DSP2_RUNR);
+
+	/* Switch the DSP into the data path */
+	snd_soc_update_bits(codec, WM8958_DSP2_CONFIG,
+			    WM8958_MBC_SEL_MASK | WM8958_MBC_ENA,
+			    path << WM8958_MBC_SEL_SHIFT | WM8958_MBC_ENA);
+}
 
 static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 {
@@ -321,7 +351,8 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 
 	/* Do we have both an active AIF and an active algorithm? */
 	ena = wm8994->mbc_ena[path] || wm8994->vss_ena[path] ||
-		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path];
+		wm8994->hpf1_ena[path] || wm8994->hpf2_ena[path] ||
+		wm8994->enh_eq_ena[path];
 	if (!pwr_reg)
 		ena = 0;
 
@@ -344,7 +375,9 @@ static void wm8958_dsp_apply(struct snd_soc_codec *codec, int path, int start)
 				    aif << WM8958_DSP2CLK_SRC_SHIFT |
 				    WM8958_DSP2CLK_ENA);
 
-		if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
+		if (wm8994->enh_eq_ena[path])
+			wm8958_dsp_start_enh_eq(codec, path);
+		else if (wm8994->vss_ena[path] || wm8994->hpf1_ena[path] ||
 		    wm8994->hpf2_ena[path])
 			wm8958_dsp_start_vss(codec, path);
 		else if (wm8994->mbc_ena[path])
@@ -483,6 +516,9 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
+	if (wm8994->enh_eq_ena[mbc])
+		return -EBUSY;
+
 	wm8994->mbc_ena[mbc] = ucontrol->value.integer.value[0];
 
 	wm8958_dsp_apply(codec, mbc, wm8994->mbc_ena[mbc]);
@@ -603,6 +639,9 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
+	if (wm8994->enh_eq_ena[vss])
+		return -EBUSY;
+
 	wm8994->vss_ena[vss] = ucontrol->value.integer.value[0];
 
 	wm8958_dsp_apply(codec, vss, wm8994->vss_ena[vss]);
@@ -661,7 +700,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
 		return -EBUSY;
 	}
 
-	if (wm8994->eq[hpf % 3])
+	if (wm8994->enh_eq_ena[hpf % 3])
 		return -EBUSY;
 
 	if (hpf < 3)
@@ -681,6 +720,97 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
 	.get = wm8958_hpf_get, .put = wm8958_hpf_put, \
 	.private_value = xval }
 
+static int wm8958_put_enh_eq_enum(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	struct wm8994_pdata *pdata = wm8994->pdata;
+	int value = ucontrol->value.integer.value[0];
+	int reg;
+
+	/* Don't allow on the fly reconfiguration */
+	reg = snd_soc_read(codec, WM8994_CLOCKING_1);
+	if (reg < 0 || reg & WM8958_DSP2CLK_ENA)
+		return -EBUSY;
+
+	if (value >= pdata->num_enh_eq_cfgs)
+		return -EINVAL;
+
+	wm8994->enh_eq_cfg = value;
+
+	return 0;
+}
+
+static int wm8958_get_enh_eq_enum(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = wm8994->enh_eq_cfg;
+
+	return 0;
+}
+
+static int wm8958_enh_eq_info(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int wm8958_enh_eq_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int eq = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	ucontrol->value.integer.value[0] = wm8994->enh_eq_ena[eq];
+
+	return 0;
+}
+
+static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	int eq = kcontrol->private_value;
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (ucontrol->value.integer.value[0] > 1)
+		return -EINVAL;
+
+	if (!wm8994->enh_eq)
+		return -ENODEV;
+
+	if (wm8958_dsp2_busy(wm8994, eq)) {
+		dev_dbg(codec->dev, "DSP2 active on %d already\n", eq);
+		return -EBUSY;
+	}
+
+	if (wm8994->mbc_ena[eq] || wm8994->vss_ena[eq] ||
+	    wm8994->hpf1_ena[eq] || wm8994->hpf2_ena[eq])
+		return -EBUSY;
+
+	wm8994->enh_eq_ena[eq] = ucontrol->value.integer.value[0];
+
+	wm8958_dsp_apply(codec, eq, ucontrol->value.integer.value[0]);
+
+	return 0;
+}
+
+#define WM8958_ENH_EQ_SWITCH(xname, xval) {\
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,\
+	.info = wm8958_enh_eq_info, \
+	.get = wm8958_enh_eq_get, .put = wm8958_enh_eq_put, \
+	.private_value = xval }
+
 static const struct snd_kcontrol_new wm8958_mbc_snd_controls[] = {
 WM8958_MBC_SWITCH("AIF1DAC1 MBC Switch", 0),
 WM8958_MBC_SWITCH("AIF1DAC2 MBC Switch", 1),
@@ -699,6 +829,24 @@ WM8958_HPF_SWITCH("AIF1DAC2 HPF2 Switch", 4),
 WM8958_HPF_SWITCH("AIF2DAC HPF2 Switch", 5),
 };
 
+static const struct snd_kcontrol_new wm8958_enh_eq_snd_controls[] = {
+WM8958_ENH_EQ_SWITCH("AIF1DAC1 Enhanced EQ Switch", 0),
+WM8958_ENH_EQ_SWITCH("AIF1DAC2 Enhanced EQ Switch", 1),
+WM8958_ENH_EQ_SWITCH("AIF2DAC Enhanced EQ Switch", 2),
+};
+
+static void wm8958_enh_eq_loaded(const struct firmware *fw, void *context)
+{
+	struct snd_soc_codec *codec = context;
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (fw && (wm8958_dsp2_fw(codec, "ENH_EQ", fw, true) == 0)) {
+		mutex_lock(&codec->mutex);
+		wm8994->enh_eq = fw;
+		mutex_unlock(&codec->mutex);
+	}
+}
+
 static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 {
 	struct snd_soc_codec *codec = context;
@@ -710,6 +858,12 @@ static void wm8958_mbc_vss_loaded(const struct firmware *fw, void *context)
 		mutex_unlock(&codec->mutex);
 	}
 
+	/* We can't have more than one request outstanding at once so
+	 * we daisy chain.
+	 */
+	request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+				"wm8958_enh_eq.wfw", codec->dev, GFP_KERNEL,
+				codec, wm8958_enh_eq_loaded);
 }
 
 static void wm8958_mbc_loaded(const struct firmware *fw, void *context)
@@ -744,6 +898,8 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 			     ARRAY_SIZE(wm8958_mbc_snd_controls));
 	snd_soc_add_controls(codec, wm8958_vss_snd_controls,
 			     ARRAY_SIZE(wm8958_vss_snd_controls));
+	snd_soc_add_controls(codec, wm8958_enh_eq_snd_controls,
+			     ARRAY_SIZE(wm8958_enh_eq_snd_controls));
 
 
 	/* We don't *require* firmware and don't want to delay boot */
@@ -839,4 +995,34 @@ void wm8958_dsp2_init(struct snd_soc_codec *codec)
 				"Failed to add VSS HPFmode controls: %d\n",
 				ret);
 	}
+
+	if (pdata->num_enh_eq_cfgs) {
+		struct snd_kcontrol_new control[] = {
+			SOC_ENUM_EXT("Enhanced EQ Mode", wm8994->enh_eq_enum,
+				     wm8958_get_enh_eq_enum,
+				     wm8958_put_enh_eq_enum),
+		};
+
+		/* We need an array of texts for the enum API */
+		wm8994->enh_eq_texts = kmalloc(sizeof(char *)
+						* pdata->num_enh_eq_cfgs, GFP_KERNEL);
+		if (!wm8994->enh_eq_texts) {
+			dev_err(wm8994->codec->dev,
+				"Failed to allocate %d enhanced EQ config texts\n",
+				pdata->num_enh_eq_cfgs);
+			return;
+		}
+
+		for (i = 0; i < pdata->num_enh_eq_cfgs; i++)
+			wm8994->enh_eq_texts[i] = pdata->enh_eq_cfgs[i].name;
+
+		wm8994->enh_eq_enum.max = pdata->num_enh_eq_cfgs;
+		wm8994->enh_eq_enum.texts = wm8994->enh_eq_texts;
+
+		ret = snd_soc_add_controls(wm8994->codec, control, 1);
+		if (ret != 0)
+			dev_err(wm8994->codec->dev,
+				"Failed to add enhanced EQ controls: %d\n",
+				ret);
+	}
 }
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 01ef5704091e..03ef62462bab 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3142,6 +3142,8 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 		release_firmware(wm8994->mbc);
 	if (wm8994->mbc_vss)
 		release_firmware(wm8994->mbc_vss);
+	if (wm8994->enh_eq)
+		release_firmware(wm8994->enh_eq);
 	kfree(wm8994->retune_mobile_texts);
 	kfree(wm8994->drc_texts);
 	kfree(wm8994);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index f337f3d50590..0a1db04b73bd 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -87,6 +87,7 @@ struct wm8994_priv {
 	int hpf1_ena[3];
 	int hpf2_ena[3];
 	int vss_ena[3];
+	int enh_eq_ena[3];
 
 	/* Platform dependant DRC configuration */
 	const char **drc_texts;
@@ -114,6 +115,11 @@ struct wm8994_priv {
 	const char **vss_hpf_texts;
 	struct soc_enum vss_hpf_enum;
 
+	/* Platform dependant enhanced EQ configuration */
+	int enh_eq_cfg;
+	const char **enh_eq_texts;
+	struct soc_enum enh_eq_enum;
+
 	struct wm8994_micdet micdet[2];
 
 	wm8958_micdet_cb jack_cb;
@@ -133,6 +139,7 @@ struct wm8994_priv {
 	const struct firmware *cur_fw;
 	const struct firmware *mbc;
 	const struct firmware *mbc_vss;
+	const struct firmware *enh_eq;
 };
 
 #endif
-- 
cgit v1.2.3


From edf2ed153bcae52de70db00a98b0e81a5668e563 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: ptrace: Kill tracehook_notify_jctl()

tracehook_notify_jctl() aids in determining whether and what to report
to the parent when a task is stopped or continued.  The function also
adds an extra requirement that siglock may be released across it,
which is currently unused and quite difficult to satisfy in
well-defined manner.

As job control and the notifications are about to receive major
overhaul, remove the tracehook and open code it.  If ever necessary,
let's factor it out after the overhaul.

* Oleg spotted incorrect CLD_CONTINUED/STOPPED selection when ptraced.
  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 include/linux/tracehook.h | 27 ---------------------------
 kernel/signal.c           | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3a2e66d88a32..b073f3c8adc3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -468,33 +468,6 @@ static inline int tracehook_get_signal(struct task_struct *task,
 	return 0;
 }
 
-/**
- * tracehook_notify_jctl - report about job control stop/continue
- * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
- * @why:		%CLD_STOPPED or %CLD_CONTINUED
- *
- * This is called when we might call do_notify_parent_cldstop().
- *
- * @notify is zero if we would not ordinarily send a %SIGCHLD,
- * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
- *
- * @why is %CLD_STOPPED when about to stop for job control;
- * we are already in %TASK_STOPPED state, about to call schedule().
- * It might also be that we have just exited (check %PF_EXITING),
- * but need to report that a group-wide stop is complete.
- *
- * @why is %CLD_CONTINUED when waking up after job control stop and
- * ready to make a delayed @notify report.
- *
- * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
- *
- * Called with the siglock held.
- */
-static inline int tracehook_notify_jctl(int notify, int why)
-{
-	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
-}
-
 /**
  * tracehook_finish_jctl - report about return from job control stop
  *
diff --git a/kernel/signal.c b/kernel/signal.c
index f4db76986ec1..03d874e1058f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1727,7 +1727,7 @@ void ptrace_notify(int exit_code)
 static int do_signal_stop(int signr)
 {
 	struct signal_struct *sig = current->signal;
-	int notify;
+	int notify = 0;
 
 	if (!sig->group_stop_count) {
 		struct task_struct *t;
@@ -1759,19 +1759,16 @@ static int do_signal_stop(int signr)
 	 * a group stop in progress and we are the last to stop, report
 	 * to the parent.  When ptraced, every thread reports itself.
 	 */
-	notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-	notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-	/*
-	 * tracehook_notify_jctl() can drop and reacquire siglock, so
-	 * we keep ->group_stop_count != 0 before the call. If SIGCONT
-	 * or SIGKILL comes in between ->group_stop_count == 0.
-	 */
-	if (sig->group_stop_count) {
-		if (!--sig->group_stop_count)
-			sig->flags = SIGNAL_STOP_STOPPED;
-		current->exit_code = sig->group_exit_code;
-		__set_current_state(TASK_STOPPED);
+	if (!--sig->group_stop_count) {
+		sig->flags = SIGNAL_STOP_STOPPED;
+		notify = CLD_STOPPED;
 	}
+	if (task_ptrace(current))
+		notify = CLD_STOPPED;
+
+	current->exit_code = sig->group_exit_code;
+	__set_current_state(TASK_STOPPED);
+
 	spin_unlock_irq(&current->sighand->siglock);
 
 	if (notify) {
@@ -1860,14 +1857,11 @@ relock:
 
 		signal->flags &= ~SIGNAL_CLD_MASK;
 
-		why = tracehook_notify_jctl(why, CLD_CONTINUED);
 		spin_unlock_irq(&sighand->siglock);
 
-		if (why) {
-			read_lock(&tasklist_lock);
-			do_notify_parent_cldstop(current->group_leader, why);
-			read_unlock(&tasklist_lock);
-		}
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current->group_leader, why);
+		read_unlock(&tasklist_lock);
 		goto relock;
 	}
 
@@ -2034,7 +2028,7 @@ void exit_signals(struct task_struct *tsk)
 	if (unlikely(tsk->signal->group_stop_count) &&
 			!--tsk->signal->group_stop_count) {
 		tsk->signal->flags = SIGNAL_STOP_STOPPED;
-		group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
+		group_stop = CLD_STOPPED;
 	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
-- 
cgit v1.2.3


From e5c1902e9260a0075ea52cb5ef627a8d9aaede89 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: signal: Fix premature completion of group stop when interfered by
 ptrace

task->signal->group_stop_count is used to track the progress of group
stop.  It's initialized to the number of tasks which need to stop for
group stop to finish and each stopping or trapping task decrements.
However, each task doesn't keep track of whether it decremented the
counter or not and if woken up before the group stop is complete and
stops again, it can decrement the counter multiple times.

Please consider the following example code.

 static void *worker(void *arg)
 {
	 while (1) ;
	 return NULL;
 }

 int main(void)
 {
	 pthread_t thread;
	 pid_t pid;
	 int i;

	 pid = fork();
	 if (!pid) {
		 for (i = 0; i < 5; i++)
			 pthread_create(&thread, NULL, worker, NULL);
		 while (1) ;
		 return 0;
	 }

	 ptrace(PTRACE_ATTACH, pid, NULL, NULL);
	 while (1) {
		 waitid(P_PID, pid, NULL, WSTOPPED);
		 ptrace(PTRACE_SINGLESTEP, pid, NULL, (void *)(long)SIGSTOP);
	 }
	 return 0;
 }

The child creates five threads and the parent continuously traps the
first thread and whenever the child gets a signal, SIGSTOP is
delivered.  If an external process sends SIGSTOP to the child, all
other threads in the process should reliably stop.  However, due to
the above bug, the first thread will often end up consuming
group_stop_count multiple times and SIGSTOP often ends up stopping
none or part of the other four threads.

This patch adds a new field task->group_stop which is protected by
siglock and uses GROUP_STOP_CONSUME flag to track which task is still
to consume group_stop_count to fix this bug.

task_clear_group_stop_pending() and task_participate_group_stop() are
added to help manipulating group stop states.  As ptrace_stop() now
also uses task_participate_group_stop(), it will set
SIGNAL_STOP_STOPPED if it completes a group stop.

There still are many issues regarding the interaction between group
stop and ptrace.  Patches to address them will follow.

- Oleg spotted duplicate GROUP_STOP_CONSUME.  Dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 include/linux/sched.h |  6 +++++
 kernel/signal.c       | 62 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 60 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b601be3dace..85f51042c2b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1260,6 +1260,7 @@ struct task_struct {
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
+	unsigned int group_stop;	/* GROUP_STOP_*, siglock protected */
 	/* ??? */
 	unsigned int personality;
 	unsigned did_exec:1;
@@ -1771,6 +1772,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+/*
+ * task->group_stop flags
+ */
+#define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 95ac42dc3bcb..ecb20089eaff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -223,6 +223,52 @@ static inline void print_dropped_signal(int sig)
 				current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_pending - clear pending group stop
+ * @task: target task
+ *
+ * Clear group stop states for @task.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_pending(struct task_struct *task)
+{
+	task->group_stop &= ~GROUP_STOP_CONSUME;
+}
+
+/**
+ * task_participate_group_stop - participate in a group stop
+ * @task: task participating in a group stop
+ *
+ * @task is participating in a group stop.  Group stop states are cleared
+ * and the group stop count is consumed if %GROUP_STOP_CONSUME was set.  If
+ * the consumption completes the group stop, the appropriate %SIGNAL_*
+ * flags are set.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static bool task_participate_group_stop(struct task_struct *task)
+{
+	struct signal_struct *sig = task->signal;
+	bool consume = task->group_stop & GROUP_STOP_CONSUME;
+
+	task_clear_group_stop_pending(task);
+
+	if (!consume)
+		return false;
+
+	if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+		sig->group_stop_count--;
+
+	if (!sig->group_stop_count) {
+		sig->flags = SIGNAL_STOP_STOPPED;
+		return true;
+	}
+	return false;
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
@@ -1645,7 +1691,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	 * we must participate in the bookkeeping.
 	 */
 	if (current->signal->group_stop_count > 0)
-		--current->signal->group_stop_count;
+		task_participate_group_stop(current);
 
 	current->last_siginfo = info;
 	current->exit_code = exit_code;
@@ -1730,6 +1776,7 @@ static int do_signal_stop(int signr)
 	int notify = 0;
 
 	if (!sig->group_stop_count) {
+		unsigned int gstop = GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1741,6 +1788,7 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
+		current->group_stop = gstop;
 		sig->group_stop_count = 1;
 		for (t = next_thread(current); t != current; t = next_thread(t))
 			/*
@@ -1750,19 +1798,19 @@ static int do_signal_stop(int signr)
 			 */
 			if (!(t->flags & PF_EXITING) &&
 			    !task_is_stopped_or_traced(t)) {
+				t->group_stop = gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
-			}
+			} else
+				task_clear_group_stop_pending(t);
 	}
 	/*
 	 * If there are no other threads in the group, or if there is
 	 * a group stop in progress and we are the last to stop, report
 	 * to the parent.  When ptraced, every thread reports itself.
 	 */
-	if (!--sig->group_stop_count) {
-		sig->flags = SIGNAL_STOP_STOPPED;
+	if (task_participate_group_stop(current))
 		notify = CLD_STOPPED;
-	}
 	if (task_ptrace(current))
 		notify = CLD_STOPPED;
 
@@ -2026,10 +2074,8 @@ void exit_signals(struct task_struct *tsk)
 			recalc_sigpending_and_wake(t);
 
 	if (unlikely(tsk->signal->group_stop_count) &&
-			!--tsk->signal->group_stop_count) {
-		tsk->signal->flags = SIGNAL_STOP_STOPPED;
+	    task_participate_group_stop(tsk))
 		group_stop = CLD_STOPPED;
-	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-- 
cgit v1.2.3


From 39efa3ef3a376a4e53de2f82fc91182459d34200 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: signal: Use GROUP_STOP_PENDING to stop once for a single group stop

Currently task->signal->group_stop_count is used to decide whether to
stop for group stop.  However, if there is a task in the group which
is taking a long time to stop, other tasks which are continued by
ptrace would repeatedly stop for the same group stop until the group
stop is complete.

Conversely, if a ptraced task is in TASK_TRACED state, the debugger
won't get notified of group stops which is inconsistent compared to
the ptraced task in any other state.

This patch introduces GROUP_STOP_PENDING which tracks whether a task
is yet to stop for the group stop in progress.  The flag is set when a
group stop starts and cleared when the task stops the first time for
the group stop, and consulted whenever whether the task should
participate in a group stop needs to be determined.  Note that now
tasks in TASK_TRACED also participate in group stop.

This results in the following behavior changes.

* For a single group stop, a ptracer would see at most one stop
  reported.

* A ptracee in TASK_TRACED now also participates in group stop and the
  tracer would get the notification.  However, as a ptraced task could
  be in TASK_STOPPED state or any ptrace trap could consume group
  stop, the notification may still be missing.  These will be
  addressed with further patches.

* A ptracee may start a group stop while one is still in progress if
  the tracer let it continue with stop signal delivery.  Group stop
  code handles this correctly.

Oleg:

* Spotted that a task might skip signal check even when its
  GROUP_STOP_PENDING is set.  Fixed by updating
  recalc_sigpending_tsk() to check GROUP_STOP_PENDING instead of
  group_stop_count.

* Pointed out that task->group_stop should be cleared whenever
  task->signal->group_stop_count is cleared.  Fixed accordingly.

* Pointed out the behavior inconsistency between TASK_TRACED and
  RUNNING and the last behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
---
 fs/exec.c             |  1 +
 include/linux/sched.h |  3 +++
 kernel/signal.c       | 36 +++++++++++++++++++++---------------
 3 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..8328beb9016f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code)
 
 	t = start;
 	do {
+		task_clear_group_stop_pending(t);
 		if (t != current && t->mm) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 85f51042c2b8..b2a17dfbdbad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1775,8 +1775,11 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * task->group_stop flags
  */
+#define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
 
+extern void task_clear_group_stop_pending(struct task_struct *task);
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --git a/kernel/signal.c b/kernel/signal.c
index ecb20089eaff..a2e7a6527d24 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static int recalc_sigpending_tsk(struct task_struct *t)
 {
-	if (t->signal->group_stop_count > 0 ||
+	if ((t->group_stop & GROUP_STOP_PENDING) ||
 	    PENDING(&t->pending, &t->blocked) ||
 	    PENDING(&t->signal->shared_pending, &t->blocked)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -232,19 +232,19 @@ static inline void print_dropped_signal(int sig)
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
  */
-static void task_clear_group_stop_pending(struct task_struct *task)
+void task_clear_group_stop_pending(struct task_struct *task)
 {
-	task->group_stop &= ~GROUP_STOP_CONSUME;
+	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME);
 }
 
 /**
  * task_participate_group_stop - participate in a group stop
  * @task: task participating in a group stop
  *
- * @task is participating in a group stop.  Group stop states are cleared
- * and the group stop count is consumed if %GROUP_STOP_CONSUME was set.  If
- * the consumption completes the group stop, the appropriate %SIGNAL_*
- * flags are set.
+ * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+ * Group stop states are cleared and the group stop count is consumed if
+ * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+ * stop, the appropriate %SIGNAL_* flags are set.
  *
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
@@ -254,6 +254,8 @@ static bool task_participate_group_stop(struct task_struct *task)
 	struct signal_struct *sig = task->signal;
 	bool consume = task->group_stop & GROUP_STOP_CONSUME;
 
+	WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+
 	task_clear_group_stop_pending(task);
 
 	if (!consume)
@@ -765,6 +767,9 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 		t = p;
 		do {
 			unsigned int state;
+
+			task_clear_group_stop_pending(t);
+
 			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
 			/*
 			 * If there is a handler for SIGCONT, we must make
@@ -906,6 +911,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
 			signal->group_stop_count = 0;
 			t = p;
 			do {
+				task_clear_group_stop_pending(t);
 				sigaddset(&t->pending.signal, SIGKILL);
 				signal_wake_up(t, 1);
 			} while_each_thread(p, t);
@@ -1139,6 +1145,7 @@ int zap_other_threads(struct task_struct *p)
 	p->signal->group_stop_count = 0;
 
 	while_each_thread(p, t) {
+		task_clear_group_stop_pending(t);
 		count++;
 
 		/* Don't bother with already dead threads */
@@ -1690,7 +1697,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	 * If there is a group stop in progress,
 	 * we must participate in the bookkeeping.
 	 */
-	if (current->signal->group_stop_count > 0)
+	if (current->group_stop & GROUP_STOP_PENDING)
 		task_participate_group_stop(current);
 
 	current->last_siginfo = info;
@@ -1775,8 +1782,8 @@ static int do_signal_stop(int signr)
 	struct signal_struct *sig = current->signal;
 	int notify = 0;
 
-	if (!sig->group_stop_count) {
-		unsigned int gstop = GROUP_STOP_CONSUME;
+	if (!(current->group_stop & GROUP_STOP_PENDING)) {
+		unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1796,8 +1803,7 @@ static int do_signal_stop(int signr)
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
-			if (!(t->flags & PF_EXITING) &&
-			    !task_is_stopped_or_traced(t)) {
+			if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
 				t->group_stop = gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
@@ -1926,8 +1932,8 @@ relock:
 		if (unlikely(signr != 0))
 			ka = return_ka;
 		else {
-			if (unlikely(signal->group_stop_count > 0) &&
-			    do_signal_stop(0))
+			if (unlikely(current->group_stop &
+				     GROUP_STOP_PENDING) && do_signal_stop(0))
 				goto relock;
 
 			signr = dequeue_signal(current, &current->blocked,
@@ -2073,7 +2079,7 @@ void exit_signals(struct task_struct *tsk)
 		if (!signal_pending(t) && !(t->flags & PF_EXITING))
 			recalc_sigpending_and_wake(t);
 
-	if (unlikely(tsk->signal->group_stop_count) &&
+	if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
 	    task_participate_group_stop(tsk))
 		group_stop = CLD_STOPPED;
 out:
-- 
cgit v1.2.3


From d79fdd6d96f46fabb779d86332e3677c6f5c2a4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Mar 2011 10:37:00 +0100
Subject: ptrace: Clean transitions between TASK_STOPPED and TRACED

Currently, if the task is STOPPED on ptrace attach, it's left alone
and the state is silently changed to TRACED on the next ptrace call.
The behavior breaks the assumption that arch_ptrace_stop() is called
before any task is poked by ptrace and is ugly in that a task
manipulates the state of another task directly.

With GROUP_STOP_PENDING, the transitions between TASK_STOPPED and
TRACED can be made clean.  The tracer can use the flag to tell the
tracee to retry stop on attach and detach.  On retry, the tracee will
enter the desired state in the correct way.  The lower 16bits of
task->group_stop is used to remember the signal number which caused
the last group stop.  This is used while retrying for ptrace attach as
the original group_exit_code could have been consumed with wait(2) by
then.

As the real parent may wait(2) and consume the group_exit_code
anytime, the group_exit_code needs to be saved separately so that it
can be used when switching from regular sleep to ptrace_stop().  This
is recorded in the lower 16bits of task->group_stop.

If a task is already stopped and there's no intervening SIGCONT, a
ptrace request immediately following a successful PTRACE_ATTACH should
always succeed even if the tracer doesn't wait(2) for attach
completion; however, with this change, the tracee might still be
TASK_RUNNING trying to enter TASK_TRACED which would cause the
following request to fail with -ESRCH.

This intermediate state is hidden from the ptracer by setting
GROUP_STOP_TRAPPING on attach and making ptrace_check_attach() wait
for it to clear on its signal->wait_chldexit.  Completing the
transition or getting killed clears TRAPPING and wakes up the tracer.

Note that the STOPPED -> RUNNING -> TRACED transition is still visible
to other threads which are in the same group as the ptracer and the
reverse transition is visible to all.  Please read the comments for
details.

Oleg:

* Spotted a race condition where a task may retry group stop without
  proper bookkeeping.  Fixed by redoing bookkeeping on retry.

* Spotted that the transition is visible to userland in several
  different ways.  Most are fixed with GROUP_STOP_TRAPPING.  Unhandled
  corner case is documented.

* Pointed out not setting GROUP_STOP_SIGMASK on an already stopped
  task would result in more consistent behavior.

* Pointed out that calling ptrace_stop() from do_signal_stop() in
  TASK_STOPPED can race with group stop start logic and then confuse
  the TRAPPING wait in ptrace_check_attach().  ptrace_stop() is now
  called with TASK_RUNNING.

* Suggested using signal->wait_chldexit instead of bit wait.

* Spotted a race condition between TRACED transition and clearing of
  TRAPPING.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
---
 include/linux/sched.h |  2 ++
 kernel/ptrace.c       | 49 ++++++++++++++++++++++++++++----
 kernel/signal.c       | 79 ++++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 112 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b2a17dfbdbad..456d80ed3b78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1775,8 +1775,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * task->group_stop flags
  */
+#define GROUP_STOP_SIGMASK	0xffff    /* signr of the last group stop */
 #define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
+#define GROUP_STOP_TRAPPING	(1 << 18) /* switching from STOPPED to TRACED */
 
 extern void task_clear_group_stop_pending(struct task_struct *task);
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6acf8954017c..745fc2dd00c5 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -49,14 +49,22 @@ static void ptrace_untrace(struct task_struct *child)
 	spin_lock(&child->sighand->siglock);
 	if (task_is_traced(child)) {
 		/*
-		 * If the group stop is completed or in progress,
-		 * this thread was already counted as stopped.
+		 * If group stop is completed or in progress, it should
+		 * participate in the group stop.  Set GROUP_STOP_PENDING
+		 * before kicking it.
+		 *
+		 * This involves TRACED -> RUNNING -> STOPPED transition
+		 * which is similar to but in the opposite direction of
+		 * what happens while attaching to a stopped task.
+		 * However, in this direction, the intermediate RUNNING
+		 * state is not hidden even from the current ptracer and if
+		 * it immediately re-attaches and performs a WNOHANG
+		 * wait(2), it may fail.
 		 */
 		if (child->signal->flags & SIGNAL_STOP_STOPPED ||
 		    child->signal->group_stop_count)
-			__set_task_state(child, TASK_STOPPED);
-		else
-			signal_wake_up(child, 1);
+			child->group_stop |= GROUP_STOP_PENDING;
+		signal_wake_up(child, 1);
 	}
 	spin_unlock(&child->sighand->siglock);
 }
@@ -165,6 +173,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 
 static int ptrace_attach(struct task_struct *task)
 {
+	bool wait_trap = false;
 	int retval;
 
 	audit_ptrace(task);
@@ -204,12 +213,42 @@ static int ptrace_attach(struct task_struct *task)
 	__ptrace_link(task, current);
 	send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
 
+	spin_lock(&task->sighand->siglock);
+
+	/*
+	 * If the task is already STOPPED, set GROUP_STOP_PENDING and
+	 * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+	 * will be cleared if the child completes the transition or any
+	 * event which clears the group stop states happens.  We'll wait
+	 * for the transition to complete before returning from this
+	 * function.
+	 *
+	 * This hides STOPPED -> RUNNING -> TRACED transition from the
+	 * attaching thread but a different thread in the same group can
+	 * still observe the transient RUNNING state.  IOW, if another
+	 * thread's WNOHANG wait(2) on the stopped tracee races against
+	 * ATTACH, the wait(2) may fail due to the transient RUNNING.
+	 *
+	 * The following task_is_stopped() test is safe as both transitions
+	 * in and out of STOPPED are protected by siglock.
+	 */
+	if (task_is_stopped(task)) {
+		task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+		signal_wake_up(task, 1);
+		wait_trap = true;
+	}
+
+	spin_unlock(&task->sighand->siglock);
+
 	retval = 0;
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
 	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
+	if (wait_trap)
+		wait_event(current->signal->wait_chldexit,
+			   !(task->group_stop & GROUP_STOP_TRAPPING));
 	return retval;
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 418776c41d24..1e199919ae09 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -223,6 +223,26 @@ static inline void print_dropped_signal(int sig)
 				current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_trapping - clear group stop trapping bit
+ * @task: target task
+ *
+ * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+ * and wake up the ptracer.  Note that we don't need any further locking.
+ * @task->siglock guarantees that @task->parent points to the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_trapping(struct task_struct *task)
+{
+	if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+		task->group_stop &= ~GROUP_STOP_TRAPPING;
+		__wake_up_sync(&task->parent->signal->wait_chldexit,
+			       TASK_UNINTERRUPTIBLE, 1);
+	}
+}
+
 /**
  * task_clear_group_stop_pending - clear pending group stop
  * @task: target task
@@ -1706,8 +1726,20 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	current->last_siginfo = info;
 	current->exit_code = exit_code;
 
-	/* Let the debugger run.  */
-	__set_current_state(TASK_TRACED);
+	/*
+	 * TRACED should be visible before TRAPPING is cleared; otherwise,
+	 * the tracer might fail do_wait().
+	 */
+	set_current_state(TASK_TRACED);
+
+	/*
+	 * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+	 * transition to TASK_TRACED should be atomic with respect to
+	 * siglock.  This hsould be done after the arch hook as siglock is
+	 * released and regrabbed across it.
+	 */
+	task_clear_group_stop_trapping(current);
+
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
 	if (may_ptrace_stop()) {
@@ -1788,6 +1820,9 @@ static int do_signal_stop(int signr)
 		unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
+		/* signr will be recorded in task->group_stop for retries */
+		WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
@@ -1797,25 +1832,27 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
-		current->group_stop = gstop;
+		current->group_stop &= ~GROUP_STOP_SIGMASK;
+		current->group_stop |= signr | gstop;
 		sig->group_stop_count = 1;
-		for (t = next_thread(current); t != current; t = next_thread(t))
+		for (t = next_thread(current); t != current;
+		     t = next_thread(t)) {
+			t->group_stop &= ~GROUP_STOP_SIGMASK;
 			/*
 			 * Setting state to TASK_STOPPED for a group
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
 			if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
-				t->group_stop = gstop;
+				t->group_stop |= signr | gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
-			} else
+			} else {
 				task_clear_group_stop_pending(t);
+			}
+		}
 	}
-
-	current->exit_code = sig->group_exit_code;
-	__set_current_state(TASK_STOPPED);
-
+retry:
 	if (likely(!task_ptrace(current))) {
 		int notify = 0;
 
@@ -1827,6 +1864,7 @@ static int do_signal_stop(int signr)
 		if (task_participate_group_stop(current))
 			notify = CLD_STOPPED;
 
+		__set_current_state(TASK_STOPPED);
 		spin_unlock_irq(&current->sighand->siglock);
 
 		if (notify) {
@@ -1839,13 +1877,28 @@ static int do_signal_stop(int signr)
 		schedule();
 
 		spin_lock_irq(&current->sighand->siglock);
-	} else
-		ptrace_stop(current->exit_code, CLD_STOPPED, 0, NULL);
+	} else {
+		ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+			    CLD_STOPPED, 0, NULL);
+		current->exit_code = 0;
+	}
+
+	/*
+	 * GROUP_STOP_PENDING could be set if another group stop has
+	 * started since being woken up or ptrace wants us to transit
+	 * between TASK_STOPPED and TRACED.  Retry group stop.
+	 */
+	if (current->group_stop & GROUP_STOP_PENDING) {
+		WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+		goto retry;
+	}
+
+	/* PTRACE_ATTACH might have raced with task killing, clear trapping */
+	task_clear_group_stop_trapping(current);
 
 	spin_unlock_irq(&current->sighand->siglock);
 
 	tracehook_finish_jctl();
-	current->exit_code = 0;
 
 	return 1;
 }
-- 
cgit v1.2.3


From 6fb1b1e18fe3d141c54182c5d5b3af823bed455f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 19 Mar 2011 13:55:39 +0100
Subject: ath9k: add support for overriding the MAC address through platform
 data

On some devices the correct MAC address is not in the EEPROM data, but
stored somewhere else.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/init.c | 7 ++++++-
 include/linux/ath9k_platform.h        | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 79aec983279f..e22e8215d941 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/ath9k_platform.h>
 
 #include "ath9k.h"
 
@@ -537,6 +538,7 @@ static void ath9k_init_misc(struct ath_softc *sc)
 static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 			    const struct ath_bus_ops *bus_ops)
 {
+	struct ath9k_platform_data *pdata = sc->dev->platform_data;
 	struct ath_hw *ah = NULL;
 	struct ath_common *common;
 	int ret = 0, i;
@@ -551,7 +553,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	ah->hw_version.subsysid = subsysid;
 	sc->sc_ah = ah;
 
-	if (!sc->dev->platform_data)
+	if (!pdata)
 		ah->ah_flags |= AH_USE_EEPROM;
 
 	common = ath9k_hw_common(ah);
@@ -587,6 +589,9 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	if (ret)
 		goto err_hw;
 
+	if (pdata && pdata->macaddr)
+		memcpy(common->macaddr, pdata->macaddr, ETH_ALEN);
+
 	ret = ath9k_init_queues(sc);
 	if (ret)
 		goto err_queues;
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index b847fc7b93f9..b5f06583a1ba 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -23,6 +23,7 @@
 
 struct ath9k_platform_data {
 	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
+	u8 *macaddr;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3


From 6de66dd963ddd669667a81a2401f2fd6472ff55c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 19 Mar 2011 13:55:40 +0100
Subject: ath9k: add support for overriding LED pin and GPIO settings from
 platform data

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/gpio.c | 14 ++++++++------
 drivers/net/wireless/ath/ath9k/hw.h   |  2 +-
 drivers/net/wireless/ath/ath9k/init.c |  8 +++++++-
 include/linux/ath9k_platform.h        |  4 ++++
 4 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c
index 0fb8f8ac275a..44a0a886124d 100644
--- a/drivers/net/wireless/ath/ath9k/gpio.c
+++ b/drivers/net/wireless/ath/ath9k/gpio.c
@@ -41,12 +41,14 @@ void ath_init_leds(struct ath_softc *sc)
 {
 	int ret;
 
-	if (AR_SREV_9287(sc->sc_ah))
-		sc->sc_ah->led_pin = ATH_LED_PIN_9287;
-	else if (AR_SREV_9485(sc->sc_ah))
-		sc->sc_ah->led_pin = ATH_LED_PIN_9485;
-	else
-		sc->sc_ah->led_pin = ATH_LED_PIN_DEF;
+	if (sc->sc_ah->led_pin < 0) {
+		if (AR_SREV_9287(sc->sc_ah))
+			sc->sc_ah->led_pin = ATH_LED_PIN_9287;
+		else if (AR_SREV_9485(sc->sc_ah))
+			sc->sc_ah->led_pin = ATH_LED_PIN_9485;
+		else
+			sc->sc_ah->led_pin = ATH_LED_PIN_DEF;
+	}
 
 	/* Configure gpio 1 for output */
 	ath9k_hw_cfg_output(sc->sc_ah, sc->sc_ah->led_pin,
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 775c0eb10b95..3d9fc6e391a7 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -798,7 +798,7 @@ struct ath_hw {
 	u32 originalGain[22];
 	int initPDADC;
 	int PDADCdelta;
-	u8 led_pin;
+	int led_pin;
 	u32 gpio_mask;
 	u32 gpio_val;
 
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index e22e8215d941..cdb0f1c89a0e 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -553,8 +553,14 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 	ah->hw_version.subsysid = subsysid;
 	sc->sc_ah = ah;
 
-	if (!pdata)
+	if (!pdata) {
 		ah->ah_flags |= AH_USE_EEPROM;
+		sc->sc_ah->led_pin = -1;
+	} else {
+		sc->sc_ah->gpio_mask = pdata->gpio_mask;
+		sc->sc_ah->gpio_val = pdata->gpio_val;
+		sc->sc_ah->led_pin = pdata->led_pin;
+	}
 
 	common = ath9k_hw_common(ah);
 	common->ops = &ath9k_common_ops;
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index b5f06583a1ba..020387a114e3 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -24,6 +24,10 @@
 struct ath9k_platform_data {
 	u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS];
 	u8 *macaddr;
+
+	int led_pin;
+	u32 gpio_mask;
+	u32 gpio_val;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3


From 1cb7b1e0de6a1f8f071f4a146e3d10f3a662f707 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Thu, 31 Mar 2011 13:36:38 +0200
Subject: ACPI EC: remove dead code

static void acpi_ec_gpe_query(void *ec_cxt);
-> The function is right above this declaration -> not needed.

poll_force is also not used, cleaned up in ec.c and its users:
compal-laptop and msi-laptop.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/ec.c                    |  6 +-----
 drivers/platform/x86/compal-laptop.c | 12 ++++++------
 drivers/platform/x86/msi-laptop.c    | 12 ++++++------
 include/linux/acpi.h                 |  3 +--
 4 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index fa848c4116a8..b3f1d6f52a89 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -69,7 +69,6 @@ enum ec_command {
 
 #define ACPI_EC_DELAY		500	/* Wait 500ms max. during EC ops */
 #define ACPI_EC_UDELAY_GLK	1000	/* Wait 1ms max. to get global lock */
-#define ACPI_EC_CDELAY		10	/* Wait 10us before polling EC */
 #define ACPI_EC_MSI_UDELAY	550	/* Wait 550us for MSI EC */
 
 #define ACPI_EC_STORM_THRESHOLD 8	/* number of false interrupts
@@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write);
 
 int ec_transaction(u8 command,
 		   const u8 * wdata, unsigned wdata_len,
-		   u8 * rdata, unsigned rdata_len,
-		   int force_poll)
+		   u8 * rdata, unsigned rdata_len)
 {
 	struct transaction t = {.command = command,
 				.wdata = wdata, .rdata = rdata,
@@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt)
 	mutex_unlock(&ec->lock);
 }
 
-static void acpi_ec_gpe_query(void *ec_cxt);
-
 static int ec_check_sci(struct acpi_ec *ec, u8 state)
 {
 	if (state & ACPI_EC_FLAG_SCI) {
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 034572b980c9..f4f43e65475f 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -200,7 +200,7 @@ static bool extra_features;
  * watching the output of address 0x4F (do an ec_transaction writing 0x33
  * into 0x4F and read a few bytes from the output, like so:
  *	u8 writeData = 0x33;
- *	ec_transaction(0x4F, &writeData, 1, buffer, 32, 0);
+ *	ec_transaction(0x4F, &writeData, 1, buffer, 32);
  * That address is labled "fan1 table information" in the service manual.
  * It should be clear which value in 'buffer' changes). This seems to be
  * related to fan speed. It isn't a proper 'realtime' fan speed value
@@ -286,7 +286,7 @@ static int get_backlight_level(void)
 static void set_backlight_state(bool on)
 {
 	u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA;
-	ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0);
+	ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0);
 }
 
 
@@ -294,24 +294,24 @@ static void set_backlight_state(bool on)
 static void pwm_enable_control(void)
 {
 	unsigned char writeData = PWM_ENABLE_DATA;
-	ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0);
+	ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0);
 }
 
 static void pwm_disable_control(void)
 {
 	unsigned char writeData = PWM_DISABLE_DATA;
-	ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0);
+	ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0);
 }
 
 static void set_pwm(int pwm)
 {
-	ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0);
+	ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0);
 }
 
 static int get_fan_rpm(void)
 {
 	u8 value, data = FAN_DATA;
-	ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0);
+	ec_transaction(FAN_ADDRESS, &data, 1, &value, 1);
 	return 100 * (int)value;
 }
 
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 7e9bb6df9d39..918a65dd2ab3 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -120,7 +120,7 @@ static int set_lcd_level(int level)
 	buf[1] = (u8) (level*31);
 
 	return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf),
-			      NULL, 0, 1);
+			      NULL, 0);
 }
 
 static int get_lcd_level(void)
@@ -129,7 +129,7 @@ static int get_lcd_level(void)
 	int result;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -142,7 +142,7 @@ static int get_auto_brightness(void)
 	int result;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -157,7 +157,7 @@ static int set_auto_brightness(int enable)
 	wdata[0] = 4;
 
 	result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1,
-				&rdata, 1, 1);
+				&rdata, 1);
 	if (result < 0)
 		return result;
 
@@ -165,7 +165,7 @@ static int set_auto_brightness(int enable)
 	wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0);
 
 	return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2,
-			      NULL, 0, 1);
+			      NULL, 0);
 }
 
 static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
@@ -202,7 +202,7 @@ static int get_wireless_state(int *wlan, int *bluetooth)
 	u8 wdata = 0, rdata;
 	int result;
 
-	result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1);
+	result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1);
 	if (result < 0)
 		return -1;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a2e910e01293..1deb2a73c2da 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val);
 extern int ec_write(u8 addr, u8 val);
 extern int ec_transaction(u8 command,
                           const u8 *wdata, unsigned wdata_len,
-                          u8 *rdata, unsigned rdata_len,
-			  int force_poll);
+                          u8 *rdata, unsigned rdata_len);
 
 #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)
 
-- 
cgit v1.2.3


From 6cb6a27c45cec9184302c2e350b3593c64bc7f6c Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 2 Apr 2011 22:48:47 -0700
Subject: net: Call netdev_features_change() from netdev_update_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue FEAT_CHANGE notification when features are changed by
netdev_update_features().  This will allow changes made by extra constraints
on e.g. MTU change to be properly propagated like changes via ethtool.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 23 +++++++++++++++++------
 net/core/ethtool.c        |  6 +++---
 3 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eeb2cd3631c..423a5447d2ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2550,6 +2550,7 @@ static inline u32 netdev_get_wanted_features(struct net_device *dev)
 }
 u32 netdev_increment_features(u32 all, u32 one, u32 mask);
 u32 netdev_fix_features(struct net_device *dev, u32 features);
+int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
 
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 3da9fb06d47a..02f56376fe99 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
-void netdev_update_features(struct net_device *dev)
+int __netdev_update_features(struct net_device *dev)
 {
 	u32 features;
 	int err = 0;
@@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev)
 	features = netdev_fix_features(dev, features);
 
 	if (dev->features == features)
-		return;
+		return 0;
 
 	netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
 		dev->features, features);
@@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev)
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 
-	if (!err)
-		dev->features = features;
-	else if (err < 0)
+	if (unlikely(err < 0)) {
 		netdev_err(dev,
 			"set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
 			err, features, dev->features);
+		return -1;
+	}
+
+	if (!err)
+		dev->features = features;
+
+	return 1;
+}
+
+void netdev_update_features(struct net_device *dev)
+{
+	if (__netdev_update_features(dev))
+		netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_update_features);
 
@@ -5430,7 +5441,7 @@ int register_netdevice(struct net_device *dev)
 		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	/*
 	 *	Default initial state at registry is that the
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 74ead9eca126..439e4b0e1312 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -317,7 +317,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
 
 	dev->wanted_features &= ~features[0].valid;
 	dev->wanted_features |= features[0].valid & features[0].requested;
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	if ((dev->wanted_features ^ dev->features) & features[0].valid)
 		ret |= ETHTOOL_F_WISH;
@@ -499,7 +499,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
 		else
 			dev->wanted_features &= ~mask;
 
-		netdev_update_features(dev);
+		__netdev_update_features(dev);
 		return 0;
 	}
 
@@ -551,7 +551,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
 	dev->wanted_features =
 		(dev->wanted_features & ~changed) | data;
 
-	netdev_update_features(dev);
+	__netdev_update_features(dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From ee77f075921730b2b465880f9fd4367003bdab39 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 1 Apr 2011 20:12:38 +0200
Subject: signal: Turn SIGNAL_STOP_DEQUEUED into GROUP_STOP_DEQUEUED

This patch moves SIGNAL_STOP_DEQUEUED from signal_struct->flags to
task_struct->group_stop, and thus makes it per-thread.

Like SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can be false-positive
after return from get_signal_to_deliver(), this is fine. The only
purpose of this bit is: we can drop ->siglock after __dequeue_signal()
returns the sig_kernel_stop() signal and before we call
do_signal_stop(), in this case we must not miss SIGCONT if it comes in
between.

But, unlike SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can not be
false-positive in do_signal_stop() if multiple threads dequeue the
sig_kernel_stop() signal at the same time.

Consider two threads T1 and T2, SIGTTIN has a hanlder.

	- T1 dequeues SIGTSTP and sets SIGNAL_STOP_DEQUEUED, then
	  it drops ->siglock

	- SIGCONT comes and clears SIGNAL_STOP_DEQUEUED, SIGTSTP
	  should be cancelled.

	- T2 dequeues SIGTTIN and sets SIGNAL_STOP_DEQUEUED again.
	  Since we have a handler we should not stop, T2 returns
	  to usermode to run the handler.

	- T1 continues, calls do_signal_stop() and wrongly starts
	  the group stop because SIGNAL_STOP_DEQUEUED was restored
	  in between.

With or without this change:

	- we need to do something with ptrace_signal() which can
	  return SIGSTOP, but this needs another discussion

	- SIGSTOP can be lost if it races with the mt exec, will
	  be fixed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h |  6 +++---
 kernel/signal.c       | 14 ++++----------
 2 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 456d80ed3b78..8cef82d4cf77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -652,9 +652,8 @@ struct signal_struct {
  * Bits in flags field of signal_struct.
  */
 #define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
-#define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
 /*
  * Pending notifications to parent.
  */
@@ -1779,6 +1778,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
 #define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
 #define GROUP_STOP_TRAPPING	(1 << 18) /* switching from STOPPED to TRACED */
+#define GROUP_STOP_DEQUEUED	(1 << 19) /* stop signal dequeued */
 
 extern void task_clear_group_stop_pending(struct task_struct *task);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e9abc69dc0d8..4f7312b49b2d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -254,7 +254,8 @@ static void task_clear_group_stop_trapping(struct task_struct *task)
  */
 void task_clear_group_stop_pending(struct task_struct *task)
 {
-	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME);
+	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+			      GROUP_STOP_DEQUEUED);
 }
 
 /**
@@ -602,7 +603,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		 * is to alert stop-signal processing code when another
 		 * processor has come along and cleared the flag.
 		 */
-		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		current->group_stop |= GROUP_STOP_DEQUEUED;
 	}
 	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 		/*
@@ -821,13 +822,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 			signal->flags = why | SIGNAL_STOP_CONTINUED;
 			signal->group_stop_count = 0;
 			signal->group_exit_code = 0;
-		} else {
-			/*
-			 * We are not stopped, but there could be a stop
-			 * signal in the middle of being processed after
-			 * being removed from the queue.  Clear that too.
-			 */
-			signal->flags &= ~SIGNAL_STOP_DEQUEUED;
 		}
 	}
 
@@ -1855,7 +1849,7 @@ static int do_signal_stop(int signr)
 		/* signr will be recorded in task->group_stop for retries */
 		WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
 
-		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+		if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
 		/*
-- 
cgit v1.2.3


From 17f60a7da150fdd0cfb9756f86a262daa72c835f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 1 Apr 2011 17:07:50 -0400
Subject: capabilites: allow the application of capability limits to usermode
 helpers

There is no way to limit the capabilities of usermodehelpers. This problem
reared its head recently when someone complained that any user with
cap_net_admin was able to load arbitrary kernel modules, even though the user
didn't have cap_sys_module.  The reason is because the actual load is done by
a usermode helper and those always have the full cap set.  This patch addes new
sysctls which allow us to bound the permissions of usermode helpers.

/proc/sys/kernel/usermodehelper/bset
/proc/sys/kernel/usermodehelper/inheritable

You must have CAP_SYS_MODULE  and CAP_SETPCAP to change these (changes are
&= ONLY).  When the kernel launches a usermodehelper it will do so with these
as the bset and pI.

-v2:	make globals static
	create spinlock to protect globals

-v3:	require both CAP_SETPCAP and CAP_SYS_MODULE
-v4:	fix the typo s/CAP_SET_PCAP/CAP_SETPCAP/ because I didn't commit
Signed-off-by: Eric Paris <eparis@redhat.com>
No-objection-from: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/kmod.h |   3 ++
 kernel/kmod.c        | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c      |   6 ++++
 3 files changed, 109 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 6efd7a78de6a..79bb98d71858 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -24,6 +24,7 @@
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/workqueue.h>
+#include <linux/sysctl.h>
 
 #define KMOD_PATH_LEN 256
 
@@ -109,6 +110,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 				       NULL, NULL, NULL);
 }
 
+extern struct ctl_table usermodehelper_table[];
+
 extern void usermodehelper_init(void);
 
 extern int usermodehelper_disable(void);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9cd0591c96a2..06fdea2819b6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -25,6 +25,7 @@
 #include <linux/kmod.h>
 #include <linux/slab.h>
 #include <linux/completion.h>
+#include <linux/cred.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/workqueue.h>
@@ -43,6 +44,13 @@ extern int max_threads;
 
 static struct workqueue_struct *khelper_wq;
 
+#define CAP_BSET	(void *)1
+#define CAP_PI		(void *)2
+
+static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
+static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
+static DEFINE_SPINLOCK(umh_sysctl_lock);
+
 #ifdef CONFIG_MODULES
 
 /*
@@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module);
 static int ____call_usermodehelper(void *data)
 {
 	struct subprocess_info *sub_info = data;
+	struct cred *new;
 	int retval;
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data)
 			goto fail;
 	}
 
+	retval = -ENOMEM;
+	new = prepare_kernel_cred(current);
+	if (!new)
+		goto fail;
+
+	spin_lock(&umh_sysctl_lock);
+	new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
+	new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
+					     new->cap_inheritable);
+	spin_unlock(&umh_sysctl_lock);
+
+	commit_creds(new);
+
 	retval = kernel_execve(sub_info->path,
 			       (const char *const *)sub_info->argv,
 			       (const char *const *)sub_info->envp);
@@ -418,6 +440,84 @@ unlock:
 }
 EXPORT_SYMBOL(call_usermodehelper_exec);
 
+static int proc_cap_handler(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+	unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
+	kernel_cap_t new_cap;
+	int err, i;
+
+	if (write && (!capable(CAP_SETPCAP) ||
+		      !capable(CAP_SYS_MODULE)))
+		return -EPERM;
+
+	/*
+	 * convert from the global kernel_cap_t to the ulong array to print to
+	 * userspace if this is a read.
+	 */
+	spin_lock(&umh_sysctl_lock);
+	for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)  {
+		if (table->data == CAP_BSET)
+			cap_array[i] = usermodehelper_bset.cap[i];
+		else if (table->data == CAP_PI)
+			cap_array[i] = usermodehelper_inheritable.cap[i];
+		else
+			BUG();
+	}
+	spin_unlock(&umh_sysctl_lock);
+
+	t = *table;
+	t.data = &cap_array;
+
+	/*
+	 * actually read or write and array of ulongs from userspace.  Remember
+	 * these are least significant 32 bits first
+	 */
+	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+
+	/*
+	 * convert from the sysctl array of ulongs to the kernel_cap_t
+	 * internal representation
+	 */
+	for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
+		new_cap.cap[i] = cap_array[i];
+
+	/*
+	 * Drop everything not in the new_cap (but don't add things)
+	 */
+	spin_lock(&umh_sysctl_lock);
+	if (write) {
+		if (table->data == CAP_BSET)
+			usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
+		if (table->data == CAP_PI)
+			usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
+	}
+	spin_unlock(&umh_sysctl_lock);
+
+	return 0;
+}
+
+struct ctl_table usermodehelper_table[] = {
+	{
+		.procname	= "bset",
+		.data		= CAP_BSET,
+		.maxlen		= _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+		.mode		= 0600,
+		.proc_handler	= proc_cap_handler,
+	},
+	{
+		.procname	= "inheritable",
+		.data		= CAP_PI,
+		.maxlen		= _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+		.mode		= 0600,
+		.proc_handler	= proc_cap_handler,
+	},
+	{ }
+};
+
 void __init usermodehelper_init(void)
 {
 	khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c0bb32414b17..965134bed6cd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -56,6 +56,7 @@
 #include <linux/kprobes.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
+#include <linux/kmod.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -615,6 +616,11 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0555,
 		.child		= random_table,
 	},
+	{
+		.procname	= "usermodehelper",
+		.mode		= 0555,
+		.child		= usermodehelper_table,
+	},
 	{
 		.procname	= "overflowuid",
 		.data		= &overflowuid,
-- 
cgit v1.2.3


From ffa8e59df047d57e812a04f7d6baf6a25c652c0c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 1 Apr 2011 17:08:34 -0400
Subject: capabilities: do not drop CAP_SETPCAP from the initial task

In olden' days of yore CAP_SETPCAP had special meaning for the init task.
We actually have code to make sure that CAP_SETPCAP wasn't in pE of things
using the init_cred.  But CAP_SETPCAP isn't so special any more and we
don't have a reason to special case dropping it for init or kthreads....

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 6 ++++--
 kernel/capability.c        | 2 --
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..11d562863e49 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -412,7 +412,6 @@ extern const kernel_cap_t __cap_init_eff_set;
 
 # define CAP_EMPTY_SET    ((kernel_cap_t){{ 0, 0 }})
 # define CAP_FULL_SET     ((kernel_cap_t){{ ~0, ~0 }})
-# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }})
 # define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0 \
 				    | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
 				    CAP_FS_MASK_B1 } })
@@ -423,10 +422,10 @@ extern const kernel_cap_t __cap_init_eff_set;
 #endif /* _KERNEL_CAPABILITY_U32S != 2 */
 
 #define CAP_INIT_INH_SET    CAP_EMPTY_SET
+#define CAP_INIT_EFF_SET    CAP_FULL_SET
 
 # define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
 # define cap_set_full(c)      do { (c) = __cap_full_set; } while (0)
-# define cap_set_init_eff(c)  do { (c) = __cap_init_eff_set; } while (0)
 
 #define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
 #define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
@@ -547,6 +546,9 @@ extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
 
+extern const kernel_cap_t __cap_empty_set;
+extern const kernel_cap_t __cap_full_set;
+
 /**
  * nsown_capable - Check superior capability to one's own user_ns
  * @cap: The capability in question
diff --git a/kernel/capability.c b/kernel/capability.c
index bf0c734d0c12..2a374d512ead 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -23,11 +23,9 @@
 
 const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
 const kernel_cap_t __cap_full_set = CAP_FULL_SET;
-const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
 
 EXPORT_SYMBOL(__cap_empty_set);
 EXPORT_SYMBOL(__cap_full_set);
-EXPORT_SYMBOL(__cap_init_eff_set);
 
 int file_caps_enabled = 1;
 
-- 
cgit v1.2.3


From 5163b583a036b103c3cec7171d6731c125773ed6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 1 Apr 2011 17:08:39 -0400
Subject: capabilities: delete unused cap_set_full

unused code.  Clean it up.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 2 --
 kernel/capability.c        | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 11d562863e49..8d0da30dad23 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -425,7 +425,6 @@ extern const kernel_cap_t __cap_init_eff_set;
 #define CAP_INIT_EFF_SET    CAP_FULL_SET
 
 # define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
-# define cap_set_full(c)      do { (c) = __cap_full_set; } while (0)
 
 #define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
 #define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
@@ -547,7 +546,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
 
 extern const kernel_cap_t __cap_empty_set;
-extern const kernel_cap_t __cap_full_set;
 
 /**
  * nsown_capable - Check superior capability to one's own user_ns
diff --git a/kernel/capability.c b/kernel/capability.c
index 2a374d512ead..14ea4210a530 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -22,10 +22,8 @@
  */
 
 const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
-const kernel_cap_t __cap_full_set = CAP_FULL_SET;
 
 EXPORT_SYMBOL(__cap_empty_set);
-EXPORT_SYMBOL(__cap_full_set);
 
 int file_caps_enabled = 1;
 
-- 
cgit v1.2.3


From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 1 Apr 2011 17:08:45 -0400
Subject: capabilities: delete all CAP_INIT macros

The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time,
but now days they aren't helping.  Just open code the logic in the
init_cred.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 3 ---
 include/linux/init_task.h  | 7 -------
 kernel/cred.c              | 6 +++---
 3 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 8d0da30dad23..04fed72809de 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -421,9 +421,6 @@ extern const kernel_cap_t __cap_init_eff_set;
 
 #endif /* _KERNEL_CAPABILITY_U32S != 2 */
 
-#define CAP_INIT_INH_SET    CAP_EMPTY_SET
-#define CAP_INIT_EFF_SET    CAP_FULL_SET
-
 # define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
 
 #define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index caa151fbebb7..1f277204de34 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -83,13 +83,6 @@ extern struct group_info init_groups;
 #define INIT_IDS
 #endif
 
-/*
- * Because of the reduced scope of CAP_SETPCAP when filesystem
- * capabilities are in effect, it is safe to allow CAP_SETPCAP to
- * be available in the default configuration.
- */
-# define CAP_INIT_BSET  CAP_FULL_SET
-
 #ifdef CONFIG_RCU_BOOST
 #define INIT_TASK_RCU_BOOST()						\
 	.rcu_boost_mutex = NULL,
diff --git a/kernel/cred.c b/kernel/cred.c
index 5557b55048df..b982f0863ae9 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -49,10 +49,10 @@ struct cred init_cred = {
 	.magic			= CRED_MAGIC,
 #endif
 	.securebits		= SECUREBITS_DEFAULT,
-	.cap_inheritable	= CAP_INIT_INH_SET,
+	.cap_inheritable	= CAP_EMPTY_SET,
 	.cap_permitted		= CAP_FULL_SET,
-	.cap_effective		= CAP_INIT_EFF_SET,
-	.cap_bset		= CAP_INIT_BSET,
+	.cap_effective		= CAP_FULL_SET,
+	.cap_bset		= CAP_FULL_SET,
 	.user			= INIT_USER,
 	.group_info		= &init_groups,
 #ifdef CONFIG_KEYS
-- 
cgit v1.2.3


From 7f5c6d4f665bb57a19a34ce1fb16cc708c04f219 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 4 Apr 2011 17:04:03 +0200
Subject: netfilter: get rid of atomic ops in fast path

We currently use a percpu spinlock to 'protect' rule bytes/packets
counters, after various attempts to use RCU instead.

Lately we added a seqlock so that get_counters() can run without
blocking BH or 'writers'. But we really only need the seqcount in it.

Spinlock itself is only locked by the current/owner cpu, so we can
remove it completely.

This cleanups api, using correct 'writer' vs 'reader' semantic.

At replace time, the get_counters() call makes sure all cpus are done
using the old table.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 96 +++++++++++++++++---------------------
 net/ipv4/netfilter/arp_tables.c    | 18 ++++---
 net/ipv4/netfilter/ip_tables.c     | 28 +++++------
 net/ipv6/netfilter/ip6_tables.c    | 19 +++++---
 net/netfilter/x_tables.c           |  9 ++--
 5 files changed, 80 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 37219525ff6f..32cddf78b13e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -456,72 +456,60 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
 
-/*
- * Per-CPU spinlock associated with per-cpu table entries, and
- * with a counter for the "reading" side that allows a recursive
- * reader to avoid taking the lock and deadlocking.
- *
- * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
- * It needs to ensure that the rules are not being changed while the packet
- * is being processed. In some cases, the read lock will be acquired
- * twice on the same CPU; this is okay because of the count.
- *
- * "writing" is used when reading counters.
- *  During replace any readers that are using the old tables have to complete
- *  before freeing the old table. This is handled by the write locking
- *  necessary for reading the counters.
+/**
+ * xt_recseq - recursive seqcount for netfilter use
+ * 
+ * Packet processing changes the seqcount only if no recursion happened
+ * get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
+ * because we use the normal seqcount convention :
+ * Low order bit set to 1 if a writer is active.
  */
-struct xt_info_lock {
-	seqlock_t lock;
-	unsigned char readers;
-};
-DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
+DECLARE_PER_CPU(seqcount_t, xt_recseq);
 
-/*
- * Note: we need to ensure that preemption is disabled before acquiring
- * the per-cpu-variable, so we do it as a two step process rather than
- * using "spin_lock_bh()".
- *
- * We _also_ need to disable bottom half processing before updating our
- * nesting count, to make sure that the only kind of re-entrancy is this
- * code being called by itself: since the count+lock is not an atomic
- * operation, we can allow no races.
+/**
+ * xt_write_recseq_begin - start of a write section
  *
- * _Only_ that special combination of being per-cpu and never getting
- * re-entered asynchronously means that the count is safe.
+ * Begin packet processing : all readers must wait the end
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
+ * Returns :
+ *  1 if no recursion on this cpu
+ *  0 if recursion detected
  */
-static inline void xt_info_rdlock_bh(void)
+static inline unsigned int xt_write_recseq_begin(void)
 {
-	struct xt_info_lock *lock;
+	unsigned int addend;
 
-	local_bh_disable();
-	lock = &__get_cpu_var(xt_info_locks);
-	if (likely(!lock->readers++))
-		write_seqlock(&lock->lock);
-}
+	/*
+	 * Low order bit of sequence is set if we already
+	 * called xt_write_recseq_begin().
+	 */
+	addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1;
 
-static inline void xt_info_rdunlock_bh(void)
-{
-	struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
+	/*
+	 * This is kind of a write_seqcount_begin(), but addend is 0 or 1
+	 * We dont check addend value to avoid a test and conditional jump,
+	 * since addend is most likely 1
+	 */
+	__this_cpu_add(xt_recseq.sequence, addend);
+	smp_wmb();
 
-	if (likely(!--lock->readers))
-		write_sequnlock(&lock->lock);
-	local_bh_enable();
+	return addend;
 }
 
-/*
- * The "writer" side needs to get exclusive access to the lock,
- * regardless of readers.  This must be called with bottom half
- * processing (and thus also preemption) disabled.
+/**
+ * xt_write_recseq_end - end of a write section
+ * @addend: return value from previous xt_write_recseq_begin()
+ *
+ * End packet processing : all readers can proceed
+ * 1) Must be called with preemption disabled
+ * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add())
  */
-static inline void xt_info_wrlock(unsigned int cpu)
-{
-	write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
-}
-
-static inline void xt_info_wrunlock(unsigned int cpu)
+static inline void xt_write_recseq_end(unsigned int addend)
 {
-	write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
+	/* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
+	smp_wmb();
+	__this_cpu_add(xt_recseq.sequence, addend);
 }
 
 /*
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4b5d457c2d76..2ea743336836 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	void *table_base;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	table_base = private->entries[smp_processor_id()];
 
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	xt_info_rdunlock_bh();
+	xt_write_recseq_end(addend);
+	local_bh_enable();
 
 	if (acpar.hotdrop)
 		return NF_DROP;
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	int ret = 0;
 	void *loc_cpu_entry;
 	struct arpt_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
  unlock_up_free:
 	local_bh_enable();
 	xt_table_unlock(t);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ffcea0d1678e..2b6b700949eb 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
 }
 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
 
-/*
-   We keep a set of rules for each CPU, so we can avoid write-locking
-   them in the softirq when updating the counters and therefore
-   only need to read-lock in the softirq; doing a write_lock_bh() in user
-   context stops packets coming through and allows user context to read
-   the counters or update the rules.
-
-   Hence the start of any table is given by get_table() below.  */
-
 /* Returns whether matches rule or not. */
 /* Performance critical - called for every packet */
 static inline bool
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
-	xt_info_rdunlock_bh();
+ 	xt_write_recseq_end(addend);
+ 	local_bh_enable();
+
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
 	int ret = 0;
 	void *loc_cpu_entry;
 	struct ipt_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
  unlock_up_free:
 	local_bh_enable();
 	xt_table_unlock(t);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0b2af9b85cec..ec7cf579cdd4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
+	unsigned int addend;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -358,7 +359,8 @@ ip6t_do_table(struct sk_buff *skb,
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-	xt_info_rdlock_bh();
+	local_bh_disable();
+	addend = xt_write_recseq_begin();
 	private = table->private;
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
@@ -442,7 +444,9 @@ ip6t_do_table(struct sk_buff *skb,
 	} while (!acpar.hotdrop);
 
 	*stackptr = origptr;
-	xt_info_rdunlock_bh();
+
+ 	xt_write_recseq_end(addend);
+ 	local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -899,7 +903,7 @@ get_counters(const struct xt_table_info *t,
 	unsigned int i;
 
 	for_each_possible_cpu(cpu) {
-		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
 
 		i = 0;
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
@@ -907,10 +911,10 @@ get_counters(const struct xt_table_info *t,
 			unsigned int start;
 
 			do {
-				start = read_seqbegin(lock);
+				start = read_seqcount_begin(s);
 				bcnt = iter->counters.bcnt;
 				pcnt = iter->counters.pcnt;
-			} while (read_seqretry(lock, start));
+			} while (read_seqcount_retry(s, start));
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
@@ -1325,6 +1329,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	struct ip6t_entry *iter;
+	unsigned int addend;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1381,13 +1386,13 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	i = 0;
 	/* Choose the copy that is on our node */
 	curcpu = smp_processor_id();
-	xt_info_wrlock(curcpu);
+	addend = xt_write_recseq_begin();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_entry_foreach(iter, loc_cpu_entry, private->size) {
 		ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
 		++i;
 	}
-	xt_info_wrunlock(curcpu);
+	xt_write_recseq_end(addend);
 
  unlock_up_free:
 	local_bh_enable();
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a9adf4c6b299..52959efca858 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -762,8 +762,8 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
-DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
 
 static int xt_jumpstack_alloc(struct xt_table_info *i)
 {
@@ -1362,10 +1362,7 @@ static int __init xt_init(void)
 	int rv;
 
 	for_each_possible_cpu(i) {
-		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-
-		seqlock_init(&lock->lock);
-		lock->readers = 0;
+		seqcount_init(&per_cpu(xt_recseq, i));
 	}
 
 	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
-- 
cgit v1.2.3


From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 16 Mar 2011 17:29:47 -0400
Subject: jump label: Introduce static_branch() interface

Introduce:

static __always_inline bool static_branch(struct jump_label_key *key);

instead of the old JUMP_LABEL(key, label) macro.

In this way, jump labels become really easy to use:

Define:

        struct jump_label_key jump_key;

Can be used as:

        if (static_branch(&jump_key))
                do unlikely code

enable/disale via:

        jump_label_inc(&jump_key);
        jump_label_dec(&jump_key);

that's it!

For the jump labels disabled case, the static_branch() becomes an
atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(),
atomic_dec() operations. We show testing results for this change below.

Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct.

Since we now require a 'struct jump_label_key *key', we can store a pointer into
the jump table addresses. In this way, we can enable/disable jump labels, in
basically constant time. This change allows us to completely remove the previous
hashtable scheme. Thanks to Peter Zijlstra for this re-write.

Testing:

I ran a series of 'tbench 20' runs 5 times (with reboots) for 3
configurations, where tracepoints were disabled.

jump label configured in
avg: 815.6

jump label *not* configured in (using atomic reads)
avg: 800.1

jump label *not* configured in (regular reads)
avg: 803.4

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20110316212947.GA8792@redhat.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
Suggested-by: H. Peter Anvin <hpa@linux.intel.com>
Tested-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/mips/include/asm/jump_label.h  |  22 +-
 arch/sparc/include/asm/jump_label.h |  25 +-
 arch/x86/include/asm/alternative.h  |   3 +-
 arch/x86/include/asm/jump_label.h   |  26 +-
 arch/x86/kernel/alternative.c       |   2 +-
 arch/x86/kernel/module.c            |   1 +
 include/asm-generic/vmlinux.lds.h   |  14 +-
 include/linux/dynamic_debug.h       |   2 -
 include/linux/jump_label.h          |  89 +++---
 include/linux/jump_label_ref.h      |  44 ---
 include/linux/perf_event.h          |  26 +-
 include/linux/tracepoint.h          |  22 +-
 kernel/jump_label.c                 | 539 +++++++++++++++---------------------
 kernel/perf_event.c                 |   4 +-
 kernel/tracepoint.c                 |  23 +-
 15 files changed, 356 insertions(+), 486 deletions(-)
 delete mode 100644 include/linux/jump_label_ref.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf75076..1881b316ca45 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
 #define WORD_INSN ".word"
 #endif
 
-#define JUMP_LABEL(key, label)						\
-	do {								\
-		asm goto("1:\tnop\n\t"					\
-			"nop\n\t"					\
-			".pushsection __jump_table,  \"a\"\n\t"		\
-			WORD_INSN " 1b, %l[" #label "], %0\n\t"		\
-			".popsection\n\t"				\
-			: :  "i" (key) :  : label);			\
-	} while (0)
-
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:\tnop\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d2..fc73a82366f8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:\n\t"				\
-			 "nop\n\t"				\
-			 "nop\n\t"				\
-			 ".pushsection __jump_table,  \"a\"\n\t"\
-			 ".align 4\n\t"				\
-			 ".word 1b, %l[" #label "], %c0\n\t"	\
-			 ".popsection \n\t"			\
-			 : :  "i" (key) :  : label);\
-	} while (0)
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+		asm goto("1:\n\t"
+			 "nop\n\t"
+			 "nop\n\t"
+			 ".pushsection __jump_table,  \"aw\"\n\t"
+			 ".align 4\n\t"
+			 ".word 1b, %l[l_yes], %c0\n\t"
+			 ".popsection \n\t"
+			 : :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..8cdd1e247975 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
-#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
 extern void arch_init_ideal_nop5(void);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..f217cee86533 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,24 @@
 
 #include <linux/types.h>
 #include <asm/nops.h>
+#include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-
-# define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:"					\
-			JUMP_LABEL_INITIAL_NOP			\
-			".pushsection __jump_table,  \"aw\" \n\t"\
-			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
-			".popsection \n\t"			\
-			: :  "i" (key) :  : label);		\
-	} while (0)
+#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:"
+		JUMP_LABEL_INITIAL_NOP
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..651454b0c811 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -679,7 +679,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
 unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/jump_label.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 32c45e5fe0ab..79522166d7f1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -170,6 +170,10 @@
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
+	. = ALIGN(8);                                                   \
+	VMLINUX_SYMBOL(__start___jump_table) = .;                       \
+	*(__jump_table)                                                 \
+	VMLINUX_SYMBOL(__stop___jump_table) = .;                        \
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___verbose) = .;                          \
 	*(__verbose)                                                    \
@@ -228,8 +232,6 @@
 									\
 	BUG_TABLE							\
 									\
-	JUMP_TABLE							\
-									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -589,14 +591,6 @@
 #define BUG_TABLE
 #endif
 
-#define JUMP_TABLE							\
-	. = ALIGN(8);							\
-	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___jump_table) = .;		\
-		*(__jump_table)						\
-		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
-	}
-
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 	. = ALIGN(4);							\
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 0c9653f11c18..e747ecd48e1c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,8 +1,6 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
-#include <linux/jump_label.h>
-
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7880f18e4b86..83e745f3ead7 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,20 +1,43 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
+#include <linux/types.h>
+#include <linux/compiler.h>
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+
+struct jump_label_key {
+	atomic_t enabled;
+	struct jump_entry *entries;
+#ifdef CONFIG_MODULES
+	struct jump_label_mod *next;
+#endif
+};
+
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
 
 enum jump_label_type {
+	JUMP_LABEL_DISABLE = 0,
 	JUMP_LABEL_ENABLE,
-	JUMP_LABEL_DISABLE
 };
 
 struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
+#ifdef CONFIG_MODULES
+#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL}
+#else
+#define JUMP_LABEL_INIT {{ 0 }, NULL}
+#endif
+
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	return arch_static_branch(key);
+}
+
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
@@ -23,37 +46,37 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
-extern void jump_label_update(unsigned long key, enum jump_label_type type);
-extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
+extern void jump_label_inc(struct jump_label_key *key);
+extern void jump_label_dec(struct jump_label_key *key);
+extern bool jump_label_enabled(struct jump_label_key *key);
+extern void jump_label_apply_nops(struct module *mod);
 
-#define jump_label_enable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+#else
 
-#define jump_label_disable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+#include <asm/atomic.h>
 
-#else
+#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
 
-#define JUMP_LABEL(key, label)			\
-do {						\
-	if (unlikely(*key))			\
-		goto label;			\
-} while (0)
+struct jump_label_key {
+	atomic_t enabled;
+};
 
-#define jump_label_enable(cond_var)	\
-do {					\
-       *(cond_var) = 1;			\
-} while (0)
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	if (unlikely(atomic_read(&key->enabled)))
+		return true;
+	return false;
+}
 
-#define jump_label_disable(cond_var)	\
-do {					\
-       *(cond_var) = 0;			\
-} while (0)
+static inline void jump_label_inc(struct jump_label_key *key)
+{
+	atomic_inc(&key->enabled);
+}
 
-static inline int jump_label_apply_nops(struct module *mod)
+static inline void jump_label_dec(struct jump_label_key *key)
 {
-	return 0;
+	atomic_dec(&key->enabled);
 }
 
 static inline int jump_label_text_reserved(void *start, void *end)
@@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end)
 static inline void jump_label_lock(void) {}
 static inline void jump_label_unlock(void) {}
 
-#endif
+static inline bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
 
-#define COND_STMT(key, stmt)					\
-do {								\
-	__label__ jl_enabled;					\
-	JUMP_LABEL(key, jl_enabled);				\
-	if (0) {						\
-jl_enabled:							\
-		stmt;						\
-	}							\
-} while (0)
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
deleted file mode 100644
index e5d012ad92c6..000000000000
--- a/include/linux/jump_label_ref.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _LINUX_JUMP_LABEL_REF_H
-#define _LINUX_JUMP_LABEL_REF_H
-
-#include <linux/jump_label.h>
-#include <asm/atomic.h>
-
-#ifdef HAVE_JUMP_LABEL
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	if (atomic_add_return(1, key) == 1)
-		jump_label_enable(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	if (atomic_dec_and_test(key))
-		jump_label_disable(key);
-}
-
-#else /* !HAVE_JUMP_LABEL */
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	atomic_inc(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	atomic_dec(key);
-}
-
-#undef JUMP_LABEL
-#define JUMP_LABEL(key, label)						\
-do {									\
-	if (unlikely(__builtin_choose_expr(				\
-	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
-	      atomic_read((atomic_t *)(key)), *(key))))			\
-		goto label;						\
-} while (0)
-
-#endif /* HAVE_JUMP_LABEL */
-
-#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 311b4dc785a1..730b7821690f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -505,7 +505,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
-#include <linux/jump_label_ref.h>
+#include <linux/jump_label.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -1034,7 +1034,7 @@ static inline int is_software_event(struct perf_event *event)
 	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
-extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
@@ -1063,22 +1063,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct pt_regs hot_regs;
 
-	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
-	return;
-
-have_event:
-	if (!regs) {
-		perf_fetch_caller_regs(&hot_regs);
-		regs = &hot_regs;
+	if (static_branch(&perf_swevent_enabled[event_id])) {
+		if (!regs) {
+			perf_fetch_caller_regs(&hot_regs);
+			regs = &hot_regs;
+		}
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
-	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
-extern atomic_t perf_sched_events;
+extern struct jump_label_key perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *task)
 {
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_in(task);
 }
 
 static inline
@@ -1086,7 +1085,8 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_out(task, next);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 97c84a58efb8..d530a4460a0b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -29,7 +29,7 @@ struct tracepoint_func {
 
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
-	int state;			/* State. */
+	struct jump_label_key key;
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
@@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
-		return;							\
-do_trace:								\
+		if (static_branch(&__tracepoint_##name.key))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
@@ -176,14 +174,14 @@ do_trace:								\
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
  */
-#define DEFINE_TRACE_FN(name, reg, unreg)				\
-	static const char __tpstrtab_##name[]				\
-	__attribute__((section("__tracepoints_strings"))) = #name;	\
-	struct tracepoint __tracepoint_##name				\
-	__attribute__((section("__tracepoints"))) =			\
-		{ __tpstrtab_##name, 0, reg, unreg, NULL };		\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	\
-	__attribute__((section("__tracepoints_ptrs"))) =		\
+#define DEFINE_TRACE_FN(name, reg, unreg)				 \
+	static const char __tpstrtab_##name[]				 \
+	__attribute__((section("__tracepoints_strings"))) = #name;	 \
+	struct tracepoint __tracepoint_##name				 \
+	__attribute__((section("__tracepoints"))) =			 \
+		{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
 		&__tracepoint_##name;
 
 #define DEFINE_TRACE(name)						\
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 3b79bd938330..74d1c099fbd1 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,43 +2,23 @@
  * jump label support
  *
  * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
  *
  */
-#include <linux/jump_label.h>
 #include <linux/memory.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/list.h>
-#include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/err.h>
+#include <linux/jump_label.h>
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_HASH_BITS 6
-#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
-static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
-
 /* mutex to protect coming/going of the the jump_label table */
 static DEFINE_MUTEX(jump_label_mutex);
 
-struct jump_label_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	/* hang modules off here */
-	struct hlist_head modules;
-	unsigned long key;
-};
-
-struct jump_label_module_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	struct module *mod;
-};
-
 void jump_label_lock(void)
 {
 	mutex_lock(&jump_label_mutex);
@@ -49,6 +29,11 @@ void jump_label_unlock(void)
 	mutex_unlock(&jump_label_mutex);
 }
 
+bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -64,7 +49,7 @@ static int jump_label_cmp(const void *a, const void *b)
 }
 
 static void
-sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
+jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
 	unsigned long size;
 
@@ -73,118 +58,25 @@ sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct jump_label_entry *e;
-	u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (key == e->key)
-			return e;
-	}
-	return NULL;
-}
+static void jump_label_update(struct jump_label_key *key, int enable);
 
-static struct jump_label_entry *
-add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
+void jump_label_inc(struct jump_label_key *key)
 {
-	struct hlist_head *head;
-	struct jump_label_entry *e;
-	u32 hash;
-
-	e = get_jump_label_entry(key);
-	if (e)
-		return ERR_PTR(-EEXIST);
-
-	e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-
-	hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	e->key = key;
-	e->table = table;
-	e->nr_entries = nr_entries;
-	INIT_HLIST_HEAD(&(e->modules));
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
+	if (atomic_inc_not_zero(&key->enabled))
+		return;
 
-static int
-build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
-{
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	int count;
-
-	sort_jump_label_entries(start, stop);
-	iter = start;
-	while (iter < stop) {
-		entry = get_jump_label_entry(iter->key);
-		if (!entry) {
-			iter_begin = iter;
-			count = 0;
-			while ((iter < stop) &&
-				(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-			}
-			entry = add_jump_label_entry(iter_begin->key,
-							count, iter_begin);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
-		 } else {
-			WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
-			return -1;
-		}
-	}
-	return 0;
+	jump_label_lock();
+	if (atomic_add_return(1, &key->enabled) == 1)
+		jump_label_update(key, JUMP_LABEL_ENABLE);
+	jump_label_unlock();
 }
 
-/***
- * jump_label_update - update jump label text
- * @key -  key value associated with a a jump label
- * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
- *
- * Will enable/disable the jump for jump label @key, depending on the
- * value of @type.
- *
- */
-
-void jump_label_update(unsigned long key, enum jump_label_type type)
+void jump_label_dec(struct jump_label_key *key)
 {
-	struct jump_entry *iter;
-	struct jump_label_entry *entry;
-	struct hlist_node *module_node;
-	struct jump_label_module_entry *e_module;
-	int count;
+	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
+		return;
 
-	jump_label_lock();
-	entry = get_jump_label_entry((jump_label_t)key);
-	if (entry) {
-		count = entry->nr_entries;
-		iter = entry->table;
-		while (count--) {
-			if (kernel_text_address(iter->code))
-				arch_jump_label_transform(iter, type);
-			iter++;
-		}
-		/* eanble/disable jump labels in modules */
-		hlist_for_each_entry(e_module, module_node, &(entry->modules),
-							hlist) {
-			count = e_module->nr_entries;
-			iter = e_module->table;
-			while (count--) {
-				if (iter->key &&
-						kernel_text_address(iter->code))
-					arch_jump_label_transform(iter, type);
-				iter++;
-			}
-		}
-	}
+	jump_label_update(key, JUMP_LABEL_DISABLE);
 	jump_label_unlock();
 }
 
@@ -197,77 +89,33 @@ static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 	return 0;
 }
 
-#ifdef CONFIG_MODULES
-
-static int module_conflict(void *start, void *end)
+static int __jump_label_text_reserved(struct jump_entry *iter_start,
+		struct jump_entry *iter_stop, void *start, void *end)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
 	struct jump_entry *iter;
-	int i, count;
-	int conflict = 0;
-
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-							module_node_next,
-							&(e->modules), hlist) {
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (addr_conflict(iter, start, end)) {
-						conflict = 1;
-						goto out;
-					}
-					iter++;
-				}
-			}
-		}
-	}
-out:
-	return conflict;
-}
-
-#endif
-
-/***
- * jump_label_text_reserved - check if addr range is reserved
- * @start: start text addr
- * @end: end text addr
- *
- * checks if the text addr located between @start and @end
- * overlaps with any of the jump label patch addresses. Code
- * that wants to modify kernel text should first verify that
- * it does not overlap with any of the jump label addresses.
- * Caller must hold jump_label_mutex.
- *
- * returns 1 if there is an overlap, 0 otherwise
- */
-int jump_label_text_reserved(void *start, void *end)
-{
-	struct jump_entry *iter;
-	struct jump_entry *iter_start = __start___jump_table;
-	struct jump_entry *iter_stop = __start___jump_table;
-	int conflict = 0;
 
 	iter = iter_start;
 	while (iter < iter_stop) {
-		if (addr_conflict(iter, start, end)) {
-			conflict = 1;
-			goto out;
-		}
+		if (addr_conflict(iter, start, end))
+			return 1;
 		iter++;
 	}
 
-	/* now check modules */
-#ifdef CONFIG_MODULES
-	conflict = module_conflict(start, end);
-#endif
-out:
-	return conflict;
+	return 0;
+}
+
+static void __jump_label_update(struct jump_label_key *key,
+		struct jump_entry *entry, int enable)
+{
+	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+		/*
+		 * entry->code set to 0 invalidates module init text sections
+		 * kernel_text_address() verifies we are not in core kernel
+		 * init code, see jump_label_invalidate_module_init().
+		 */
+		if (entry->code && kernel_text_address(entry->code))
+			arch_jump_label_transform(entry, enable);
+	}
 }
 
 /*
@@ -277,142 +125,173 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
-static __init int init_jump_label(void)
+static __init int jump_label_init(void)
 {
-	int ret;
 	struct jump_entry *iter_start = __start___jump_table;
 	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_label_key *key = NULL;
 	struct jump_entry *iter;
 
 	jump_label_lock();
-	ret = build_jump_label_hashtable(__start___jump_table,
-					 __stop___jump_table);
-	iter = iter_start;
-	while (iter < iter_stop) {
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
 		arch_jump_label_text_poke_early(iter->code);
-		iter++;
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+		atomic_set(&key->enabled, 0);
+		key->entries = iter;
+#ifdef CONFIG_MODULES
+		key->next = NULL;
+#endif
 	}
 	jump_label_unlock();
-	return ret;
+
+	return 0;
 }
-early_initcall(init_jump_label);
+early_initcall(jump_label_init);
 
 #ifdef CONFIG_MODULES
 
-static struct jump_label_module_entry *
-add_jump_label_module_entry(struct jump_label_entry *entry,
-			    struct jump_entry *iter_begin,
-			    int count, struct module *mod)
+struct jump_label_mod {
+	struct jump_label_mod *next;
+	struct jump_entry *entries;
+	struct module *mod;
+};
+
+static int __jump_label_mod_text_reserved(void *start, void *end)
+{
+	struct module *mod;
+
+	mod = __module_text_address((unsigned long)start);
+	if (!mod)
+		return 0;
+
+	WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+
+	return __jump_label_text_reserved(mod->jump_entries,
+				mod->jump_entries + mod->num_jump_entries,
+				start, end);
+}
+
+static void __jump_label_mod_update(struct jump_label_key *key, int enable)
+{
+	struct jump_label_mod *mod = key->next;
+
+	while (mod) {
+		__jump_label_update(key, mod->entries, enable);
+		mod = mod->next;
+	}
+}
+
+/***
+ * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
+ * @mod: module to patch
+ *
+ * Allow for run-time selection of the optimal nops. Before the module
+ * loads patch these with arch_get_jump_label_nop(), which is specified by
+ * the arch specific jump label code.
+ */
+void jump_label_apply_nops(struct module *mod)
 {
-	struct jump_label_module_entry *e;
-
-	e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	e->mod = mod;
-	e->nr_entries = count;
-	e->table = iter_begin;
-	hlist_add_head(&e->hlist, &entry->modules);
-	return e;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (iter_start == iter_stop)
+		return;
+
+	for (iter = iter_start; iter < iter_stop; iter++)
+		arch_jump_label_text_poke_early(iter->code);
 }
 
-static int add_jump_label_module(struct module *mod)
+static int jump_label_add_module(struct module *mod)
 {
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	struct jump_label_module_entry *module_entry;
-	int count;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm;
 
 	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
+	if (iter_start == iter_stop)
 		return 0;
 
-	sort_jump_label_entries(mod->jump_entries,
-				mod->jump_entries + mod->num_jump_entries);
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		entry = get_jump_label_entry(iter->key);
-		iter_begin = iter;
-		count = 0;
-		while ((iter < mod->jump_entries + mod->num_jump_entries) &&
-			(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-		}
-		if (!entry) {
-			entry = add_jump_label_entry(iter_begin->key, 0, NULL);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod) {
+			atomic_set(&key->enabled, 0);
+			key->entries = iter;
+			key->next = NULL;
+			continue;
 		}
-		module_entry = add_jump_label_module_entry(entry, iter_begin,
-							   count, mod);
-		if (IS_ERR(module_entry))
-			return PTR_ERR(module_entry);
+
+		jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
+		if (!jlm)
+			return -ENOMEM;
+
+		jlm->mod = mod;
+		jlm->entries = iter;
+		jlm->next = key->next;
+		key->next = jlm;
+
+		if (jump_label_enabled(key))
+			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
 	}
+
 	return 0;
 }
 
-static void remove_jump_label_module(struct module *mod)
+static void jump_label_del_module(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
-	int i;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm, **prev;
 
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod)
+			continue;
+
+		prev = &key->next;
+		jlm = key->next;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod == mod) {
-					hlist_del(&e_module->hlist);
-					kfree(e_module);
-				}
-			}
-			if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
-				hlist_del(&e->hlist);
-				kfree(e);
-			}
+		while (jlm && jlm->mod != mod) {
+			prev = &jlm->next;
+			jlm = jlm->next;
+		}
+
+		if (jlm) {
+			*prev = jlm->next;
+			kfree(jlm);
 		}
 	}
 }
 
-static void remove_jump_label_module_init(struct module *mod)
+static void jump_label_invalidate_module_init(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
 	struct jump_entry *iter;
-	int i, count;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod != mod)
-					continue;
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (within_module_init(iter->code, mod))
-						iter->key = 0;
-					iter++;
-				}
-			}
-		}
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (within_module_init(iter->code, mod))
+			iter->code = 0;
 	}
 }
 
@@ -426,59 +305,77 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		jump_label_lock();
-		ret = add_jump_label_module(mod);
+		ret = jump_label_add_module(mod);
 		if (ret)
-			remove_jump_label_module(mod);
+			jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
 		jump_label_lock();
-		remove_jump_label_module(mod);
+		jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
 		jump_label_lock();
-		remove_jump_label_module_init(mod);
+		jump_label_invalidate_module_init(mod);
 		jump_label_unlock();
 		break;
 	}
-	return ret;
-}
 
-/***
- * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
- * @mod: module to patch
- *
- * Allow for run-time selection of the optimal nops. Before the module
- * loads patch these with arch_get_jump_label_nop(), which is specified by
- * the arch specific jump label code.
- */
-void jump_label_apply_nops(struct module *mod)
-{
-	struct jump_entry *iter;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
-
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		arch_jump_label_text_poke_early(iter->code);
-		iter++;
-	}
+	return notifier_from_errno(ret);
 }
 
 struct notifier_block jump_label_module_nb = {
 	.notifier_call = jump_label_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than tracepoints */
 };
 
-static __init int init_jump_label_module(void)
+static __init int jump_label_init_module(void)
 {
 	return register_module_notifier(&jump_label_module_nb);
 }
-early_initcall(init_jump_label_module);
+early_initcall(jump_label_init_module);
 
 #endif /* CONFIG_MODULES */
 
+/***
+ * jump_label_text_reserved - check if addr range is reserved
+ * @start: start text addr
+ * @end: end text addr
+ *
+ * checks if the text addr located between @start and @end
+ * overlaps with any of the jump label patch addresses. Code
+ * that wants to modify kernel text should first verify that
+ * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
+ *
+ * returns 1 if there is an overlap, 0 otherwise
+ */
+int jump_label_text_reserved(void *start, void *end)
+{
+	int ret = __jump_label_text_reserved(__start___jump_table,
+			__stop___jump_table, start, end);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MODULES
+	ret = __jump_label_mod_text_reserved(start, end);
+#endif
+	return ret;
+}
+
+static void jump_label_update(struct jump_label_key *key, int enable)
+{
+	struct jump_entry *entry = key->entries;
+
+	/* if there are no users, entry can be NULL */
+	if (entry)
+		__jump_label_update(key, entry, enable);
+
+#ifdef CONFIG_MODULES
+	__jump_label_mod_update(key, enable);
+#endif
+}
+
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c75925c4d1e2..d665e92fbd44 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -125,7 +125,7 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-atomic_t perf_sched_events __read_mostly;
+struct jump_label_key perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -5417,7 +5417,7 @@ fail:
 	return err;
 }
 
-atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
 {
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 68187af4889e..b219f1449c54 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
-	if (elem->regfunc && !elem->state && active)
+	if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
 		elem->regfunc();
-	else if (elem->unregfunc && elem->state && !active)
+	else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
 		elem->unregfunc();
 
 	/*
@@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 * is used.
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-	if (!elem->state && active) {
-		jump_label_enable(&elem->state);
-		elem->state = active;
-	} else if (elem->state && !active) {
-		jump_label_disable(&elem->state);
-		elem->state = active;
-	}
+	if (active && !jump_label_enabled(&elem->key))
+		jump_label_inc(&elem->key);
+	else if (!active && jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 }
 
 /*
@@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
-	if (elem->unregfunc && elem->state)
+	if (elem->unregfunc && jump_label_enabled(&elem->key))
 		elem->unregfunc();
 
-	if (elem->state) {
-		jump_label_disable(&elem->state);
-		elem->state = 0;
-	}
+	if (jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 	rcu_assign_pointer(elem->funcs, NULL);
 }
 
-- 
cgit v1.2.3


From 0545a3037773512d3448557ba048cebb73b3e4af Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 4 Apr 2011 05:30:58 +0000
Subject: pkt_sched: QFQ - quick fair queue scheduler

This is an implementation of the Quick Fair Queue scheduler developed
by Fabio Checconi. The same algorithm is already implemented in ipfw
in FreeBSD. Fabio had an earlier version developed on Linux, I just
cleaned it up.  Thanks to Eric Dumazet for testing this under load.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   15 +
 net/sched/Kconfig         |   11 +
 net/sched/Makefile        |    1 +
 net/sched/sch_qfq.c       | 1137 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1164 insertions(+)
 create mode 100644 net/sched/sch_qfq.c

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index b1032a3fafdc..8062e0a68dda 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -588,4 +588,19 @@ struct tc_sfb_xstats {
 
 #define SFB_MAX_PROB 0xFFFF
 
+/* QFQ */
+enum {
+	TCA_QFQ_UNSPEC,
+	TCA_QFQ_WEIGHT,
+	TCA_QFQ_LMAX,
+	__TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX	(__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+	__u32 weight;
+	__u32 lmax;
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a7a5583d4f68..aeaa2110b699 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -239,6 +239,17 @@ config NET_SCH_CHOKE
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_choke.
 
+config NET_SCH_QFQ
+	tristate "Quick Fair Queueing scheduler (QFQ)"
+	help
+	  Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
+	  packet scheduling algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_qfq.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 2e77b8dba22e..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
+obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
+/*
+ * net/sched/sch_qfq.c         Quick Fair Queueing Scheduler.
+ *
+ * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+
+/*  Quick Fair Queueing
+    ===================
+
+    Sources:
+
+    Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
+    Packet Scheduling with Tight Bandwidth Distribution Guarantees."
+
+    See also:
+    http://retis.sssup.it/~fabio/linux/qfq/
+ */
+
+/*
+
+  Virtual time computations.
+
+  S, F and V are all computed in fixed point arithmetic with
+  FRAC_BITS decimal bits.
+
+  QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+	one bit per index.
+  QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+
+  The layout of the bits is as below:
+
+                   [ MTU_SHIFT ][      FRAC_BITS    ]
+                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
+				 ^.__grp->index = 0
+				 *.__grp->slot_shift
+
+  where MIN_SLOT_SHIFT is derived by difference from the others.
+
+  The max group index corresponds to Lmax/w_min, where
+  Lmax=1<<MTU_SHIFT, w_min = 1 .
+  From this, and knowing how many groups (MAX_INDEX) we want,
+  we can derive the shift corresponding to each group.
+
+  Because we often need to compute
+	F = S + len/w_i  and V = V + len/wsum
+  instead of storing w_i store the value
+	inv_w = (1<<FRAC_BITS)/w_i
+  so we can do F = S + len * inv_w * wsum.
+  We use W_TOT in the formulas so we can easily move between
+  static and adaptive weight sum.
+
+  The per-scheduler-instance data contain all the data structures
+  for the scheduler: bitmaps and bucket lists.
+
+ */
+
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group.
+ */
+#define QFQ_MAX_SLOTS	32
+
+/*
+ * Shifts used for class<->group mapping.  We allow class weights that are
+ * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
+ * group with the smallest index that can support the L_i / r_i configured
+ * for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ */
+#define QFQ_MAX_INDEX		19
+#define QFQ_MAX_WSHIFT		16
+
+#define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM		(2*QFQ_MAX_WEIGHT)
+
+#define FRAC_BITS		30	/* fixed point arithmetic */
+#define ONE_FP			(1UL << FRAC_BITS)
+#define IWSUM			(ONE_FP/QFQ_MAX_WSUM)
+
+#define QFQ_MTU_SHIFT		11
+#define QFQ_MIN_SLOT_SHIFT	(FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states.  These values are used as indexes for the bitmaps
+ * array of struct qfq_queue.
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+
+struct qfq_group;
+
+struct qfq_class {
+	struct Qdisc_class_common common;
+
+	unsigned int refcnt;
+	unsigned int filter_cnt;
+
+	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct Qdisc *qdisc;
+
+	struct hlist_node next;	/* Link for the slot list. */
+	u64 S, F;		/* flow timestamps (exact) */
+
+	/* group we belong to. In principle we would need the index,
+	 * which is log_2(lmax/weight), but we never reference it
+	 * directly, only the group.
+	 */
+	struct qfq_group *grp;
+
+	/* these are copied from the flowset. */
+	u32	inv_w;		/* ONE_FP/weight */
+	u32	lmax;		/* Max packet size for this flow. */
+};
+
+struct qfq_group {
+	u64 S, F;			/* group timestamps (approx). */
+	unsigned int slot_shift;	/* Slot shift. */
+	unsigned int index;		/* Group index. */
+	unsigned int front;		/* Index of the front slot. */
+	unsigned long full_slots;	/* non-empty slots */
+
+	/* Array of RR lists of active classes. */
+	struct hlist_head slots[QFQ_MAX_SLOTS];
+};
+
+struct qfq_sched {
+	struct tcf_proto *filter_list;
+	struct Qdisc_class_hash clhash;
+
+	u64		V;		/* Precise virtual time. */
+	u32		wsum;		/* weight sum */
+
+	unsigned long bitmaps[QFQ_MAX_STATE];	    /* Group bitmaps. */
+	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+
+static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct qfq_class, common);
+}
+
+static void qfq_purge_queue(struct qfq_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
+	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
+	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
+};
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
+{
+	u64 slot_size = (u64)maxlen * inv_w;
+	unsigned long size_map;
+	int index = 0;
+
+	size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
+	if (!size_map)
+		goto out;
+
+	index = __fls(size_map) + 1;	/* basically a log_2 */
+	index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+	if (index < 0)
+		index = 0;
+out:
+	pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
+		 (unsigned long) ONE_FP/inv_w, maxlen, index);
+
+	return index;
+}
+
+static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)*arg;
+	struct nlattr *tb[TCA_QFQ_MAX + 1];
+	u32 weight, lmax, inv_w;
+	int i, err;
+
+	if (tca[TCA_OPTIONS] == NULL) {
+		pr_notice("qfq: no options\n");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_QFQ_WEIGHT]) {
+		weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
+		if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
+			pr_notice("qfq: invalid weight %u\n", weight);
+			return -EINVAL;
+		}
+	} else
+		weight = 1;
+
+	inv_w = ONE_FP / weight;
+	weight = ONE_FP / inv_w;
+	if (q->wsum + weight > QFQ_MAX_WSUM) {
+		pr_notice("qfq: total weight out of range (%u + %u)\n",
+			  weight, q->wsum);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_QFQ_LMAX]) {
+		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
+		if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+			pr_notice("qfq: invalid max length %u\n", lmax);
+			return -EINVAL;
+		}
+	} else
+		lmax = 1UL << QFQ_MTU_SHIFT;
+
+	if (cl != NULL) {
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+						    qdisc_root_sleeping_lock(sch),
+						    tca[TCA_RATE]);
+			if (err)
+				return err;
+		}
+
+		sch_tree_lock(sch);
+		if (tb[TCA_QFQ_WEIGHT]) {
+			q->wsum = weight - ONE_FP / cl->inv_w;
+			cl->inv_w = inv_w;
+		}
+		sch_tree_unlock(sch);
+
+		return 0;
+	}
+
+	cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+
+	cl->refcnt = 1;
+	cl->common.classid = classid;
+	cl->lmax = lmax;
+	cl->inv_w = inv_w;
+	i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+	cl->grp = &q->groups[i];
+	q->wsum += weight;
+
+	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
+				      &pfifo_qdisc_ops, classid);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+
+	if (tca[TCA_RATE]) {
+		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+					qdisc_root_sleeping_lock(sch),
+					tca[TCA_RATE]);
+		if (err) {
+			qdisc_destroy(cl->qdisc);
+			kfree(cl);
+			return err;
+		}
+	}
+
+	sch_tree_lock(sch);
+	qdisc_class_hash_insert(&q->clhash, &cl->common);
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+
+	if (cl->inv_w) {
+		q->wsum -= ONE_FP / cl->inv_w;
+		cl->inv_w = 0;
+	}
+
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	qdisc_destroy(cl->qdisc);
+	kfree(cl);
+}
+
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (cl->filter_cnt > 0)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	qfq_purge_queue(cl);
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
+
+	BUG_ON(--cl->refcnt == 0);
+	/*
+	 * This shouldn't happen: we "hold" one cops->get() when called
+	 * from tc_ctl_tclass; the destroy method is done from cops->put().
+	 */
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct qfq_class *cl = qfq_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (--cl->refcnt == 0)
+		qfq_destroy_class(sch, cl);
+}
+
+static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+
+	return &q->filter_list;
+}
+
+static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
+				  u32 classid)
+{
+	struct qfq_class *cl = qfq_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->filter_cnt++;
+
+	return (unsigned long)cl;
+}
+
+static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
+			   struct Qdisc *new, struct Qdisc **old)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (new == NULL) {
+		new = qdisc_create_dflt(sch->dev_queue,
+					&pfifo_qdisc_ops, cl->common.classid);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	qfq_purge_queue(cl);
+	*old = cl->qdisc;
+	cl->qdisc = new;
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	return cl->qdisc;
+}
+
+static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+	struct nlattr *nest;
+
+	tcm->tcm_parent	= TC_H_ROOT;
+	tcm->tcm_handle	= cl->common.classid;
+	tcm->tcm_info	= cl->qdisc->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
+	NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+				struct gnet_dump *d)
+{
+	struct qfq_class *cl = (struct qfq_class *)arg;
+	struct tc_qfq_stats xstats;
+
+	memset(&xstats, 0, sizeof(xstats));
+	cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
+
+	xstats.weight = ONE_FP/cl->inv_w;
+	xstats.lmax = cl->lmax;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+		pr_debug("qfq_classify: found %d\n", skb->priority);
+		cl = qfq_find_class(sch, skb->priority);
+		if (cl != NULL)
+			return cl;
+	}
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		cl = (struct qfq_class *)res.class;
+		if (cl == NULL)
+			cl = qfq_find_class(sch, res.classid);
+		return cl;
+	}
+
+	return NULL;
+}
+
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(u64 a, u64 b)
+{
+	return (s64)(a - b) > 0;
+}
+
+/* Round a precise timestamp to its slotted value. */
+static inline u64 qfq_round_down(u64 ts, unsigned int shift)
+{
+	return ts & ~((1ULL << shift) - 1);
+}
+
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+					unsigned long bitmap)
+{
+	int index = __ffs(bitmap);
+	return &q->groups[index];
+}
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long mask_from(unsigned long bitmap, int from)
+{
+	return bitmap & ~((1UL << from) - 1);
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
+{
+	/* if S > V we are not eligible */
+	unsigned int state = qfq_gt(grp->S, q->V);
+	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (qfq_gt(grp->F, next->F))
+			state |= EB;
+	}
+
+	return state;
+}
+
+
+/*
+ * In principle
+ *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ *	q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
+				   int src, int dst)
+{
+	q->bitmaps[dst] |= q->bitmaps[src] & mask;
+	q->bitmaps[src] &= ~mask;
+}
+
+static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
+{
+	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+	struct qfq_group *next;
+
+	if (mask) {
+		next = qfq_ffs(q, mask);
+		if (!qfq_gt(next->F, old_F))
+			return;
+	}
+
+	mask = (1UL << index) - 1;
+	qfq_move_groups(q, mask, EB, ER);
+	qfq_move_groups(q, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+	old_V ^= q->V;
+	old_V >>= QFQ_MIN_SLOT_SHIFT;
+	if (old_V) {
+		...
+	}
+ *
+ */
+static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
+{
+	unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+	unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+	if (vslot != old_vslot) {
+		unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
+		qfq_move_groups(q, mask, IR, ER);
+		qfq_move_groups(q, mask, IB, EB);
+	}
+}
+
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
+			    u64 roundedS)
+{
+	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
+	unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+
+	hlist_add_head(&cl->next, &grp->slots[i]);
+	__set_bit(slot, &grp->full_slots);
+}
+
+/* Maybe introduce hlist_first_entry?? */
+static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
+{
+	return hlist_entry(grp->slots[grp->front].first,
+			   struct qfq_class, next);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static void qfq_front_slot_remove(struct qfq_group *grp)
+{
+	struct qfq_class *cl = qfq_slot_head(grp);
+
+	BUG_ON(!cl);
+	hlist_del(&cl->next);
+	if (hlist_empty(&grp->slots[grp->front]))
+		__clear_bit(0, &grp->full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
+{
+	unsigned int i;
+
+	pr_debug("qfq slot_scan: grp %u full %#lx\n",
+		 grp->index, grp->full_slots);
+
+	if (grp->full_slots == 0)
+		return NULL;
+
+	i = __ffs(grp->full_slots);  /* zero based */
+	if (i > 0) {
+		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+		grp->full_slots >>= i;
+	}
+
+	return qfq_slot_head(grp);
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
+{
+	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+
+	grp->full_slots <<= i;
+	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+
+static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
+{
+	struct qfq_group *grp;
+	unsigned long ineligible;
+
+	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+	if (ineligible) {
+		if (!q->bitmaps[ER]) {
+			grp = qfq_ffs(q, ineligible);
+			if (qfq_gt(grp->S, q->V))
+				q->V = grp->S;
+		}
+		qfq_make_eligible(q, old_V);
+	}
+}
+
+/* What is length of next packet in queue (0 if queue is empty) */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+
+	skb = sch->ops->peek(sch);
+	return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
+{
+	unsigned int len = qdisc_peek_len(cl->qdisc);
+
+	cl->S = cl->F;
+	if (!len)
+		qfq_front_slot_remove(grp);	/* queue is empty */
+	else {
+		u64 roundedS;
+
+		cl->F = cl->S + (u64)len * cl->inv_w;
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (roundedS == grp->S)
+			return false;
+
+		qfq_front_slot_remove(grp);
+		qfq_slot_insert(grp, cl, roundedS);
+	}
+
+	return true;
+}
+
+static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	struct sk_buff *skb;
+	unsigned int len;
+	u64 old_V;
+
+	if (!q->bitmaps[ER])
+		return NULL;
+
+	grp = qfq_ffs(q, q->bitmaps[ER]);
+
+	cl = qfq_slot_head(grp);
+	skb = qdisc_dequeue_peeked(cl->qdisc);
+	if (!skb) {
+		WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
+		return NULL;
+	}
+
+	sch->q.qlen--;
+	qdisc_bstats_update(sch, skb);
+
+	old_V = q->V;
+	len = qdisc_pkt_len(skb);
+	q->V += (u64)len * IWSUM;
+	pr_debug("qfq dequeue: len %u F %lld now %lld\n",
+		 len, (unsigned long long) cl->F, (unsigned long long) q->V);
+
+	if (qfq_update_class(grp, cl)) {
+		u64 old_F = grp->F;
+
+		cl = qfq_slot_scan(grp);
+		if (!cl)
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+		else {
+			u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
+			unsigned int s;
+
+			if (grp->S == roundedS)
+				goto skip_unblock;
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+
+		qfq_unblock_groups(q, grp->index, old_F);
+	}
+
+skip_unblock:
+	qfq_update_eligible(q, old_V);
+
+	return skb;
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+	unsigned long mask;
+	uint32_t limit, roundedF;
+	int slot_shift = cl->grp->slot_shift;
+
+	roundedF = qfq_round_down(cl->F, slot_shift);
+	limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+
+	if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+		/* timestamp was stale */
+		mask = mask_from(q->bitmaps[ER], cl->grp->index);
+		if (mask) {
+			struct qfq_group *next = qfq_ffs(q, mask);
+			if (qfq_gt(roundedF, next->F)) {
+				cl->S = next->F;
+				return;
+			}
+		}
+		cl->S = q->V;
+	} else  /* timestamp is not stale */
+		cl->S = cl->F;
+}
+
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	int err;
+	u64 roundedS;
+	int s;
+
+	cl = qfq_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
+
+	err = qdisc_enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
+		return err;
+	}
+
+	bstats_update(&cl->bstats, skb);
+	++sch->q.qlen;
+
+	/* If the new skb is not the head of queue, then done here. */
+	if (cl->qdisc->q.qlen != 1)
+		return err;
+
+	/* If reach this point, queue q was idle */
+	grp = cl->grp;
+	qfq_update_start(q, cl);
+
+	/* compute new finish time and rounded start. */
+	cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+
+	/*
+	 * insert cl in the correct bucket.
+	 * If cl->S >= grp->S we don't need to adjust the
+	 * bucket list and simply go to the insertion phase.
+	 * Otherwise grp->S is decreasing, we must make room
+	 * in the bucket list, and also recompute the group state.
+	 * Finally, if there were no flows in this group and nobody
+	 * was in ER make sure to adjust V.
+	 */
+	if (grp->full_slots) {
+		if (!qfq_gt(grp->S, cl->S))
+			goto skip_update;
+
+		/* create a slot for this cl->S */
+		qfq_slot_rotate(grp, roundedS);
+		/* group was surely ineligible, remove */
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+		q->V = roundedS;
+
+	grp->S = roundedS;
+	grp->F = roundedS + (2ULL << grp->slot_shift);
+	s = qfq_calc_state(q, grp);
+	__set_bit(grp->index, &q->bitmaps[s]);
+
+	pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
+		 s, q->bitmaps[s],
+		 (unsigned long long) cl->S,
+		 (unsigned long long) cl->F,
+		 (unsigned long long) q->V);
+
+skip_update:
+	qfq_slot_insert(grp, cl, roundedS);
+
+	return err;
+}
+
+
+static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+			    struct qfq_class *cl)
+{
+	unsigned int i, offset;
+	u64 roundedS;
+
+	roundedS = qfq_round_down(cl->S, grp->slot_shift);
+	offset = (roundedS - grp->S) >> grp->slot_shift;
+	i = (grp->front + offset) % QFQ_MAX_SLOTS;
+
+	hlist_del(&cl->next);
+	if (hlist_empty(&grp->slots[i]))
+		__clear_bit(offset, &grp->full_slots);
+}
+
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ */
+static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
+{
+	struct qfq_group *grp = cl->grp;
+	unsigned long mask;
+	u64 roundedS;
+	int s;
+
+	cl->F = cl->S;
+	qfq_slot_remove(q, grp, cl);
+
+	if (!grp->full_slots) {
+		__clear_bit(grp->index, &q->bitmaps[IR]);
+		__clear_bit(grp->index, &q->bitmaps[EB]);
+		__clear_bit(grp->index, &q->bitmaps[IB]);
+
+		if (test_bit(grp->index, &q->bitmaps[ER]) &&
+		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+			if (mask)
+				mask = ~((1UL << __fls(mask)) - 1);
+			else
+				mask = ~0UL;
+			qfq_move_groups(q, mask, EB, ER);
+			qfq_move_groups(q, mask, IB, IR);
+		}
+		__clear_bit(grp->index, &q->bitmaps[ER]);
+	} else if (hlist_empty(&grp->slots[grp->front])) {
+		cl = qfq_slot_scan(grp);
+		roundedS = qfq_round_down(cl->S, grp->slot_shift);
+		if (grp->S != roundedS) {
+			__clear_bit(grp->index, &q->bitmaps[ER]);
+			__clear_bit(grp->index, &q->bitmaps[IR]);
+			__clear_bit(grp->index, &q->bitmaps[EB]);
+			__clear_bit(grp->index, &q->bitmaps[IB]);
+			grp->S = roundedS;
+			grp->F = roundedS + (2ULL << grp->slot_shift);
+			s = qfq_calc_state(q, grp);
+			__set_bit(grp->index, &q->bitmaps[s]);
+		}
+	}
+
+	qfq_update_eligible(q, q->V);
+}
+
+static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl = (struct qfq_class *)arg;
+
+	if (cl->qdisc->q.qlen == 0)
+		qfq_deactivate_class(q, cl);
+}
+
+static unsigned int qfq_drop(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	unsigned int i, j, len;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+			struct qfq_class *cl;
+			struct hlist_node *n;
+
+			hlist_for_each_entry(cl, n, &grp->slots[j], next) {
+
+				if (!cl->qdisc->ops->drop)
+					continue;
+
+				len = cl->qdisc->ops->drop(cl->qdisc);
+				if (len > 0) {
+					sch->q.qlen--;
+					if (!cl->qdisc->q.qlen)
+						qfq_deactivate_class(q, cl);
+
+					return len;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	int i, j, err;
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		grp->index = i;
+		grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
+				   - (QFQ_MAX_INDEX - i);
+		for (j = 0; j < QFQ_MAX_SLOTS; j++)
+			INIT_HLIST_HEAD(&grp->slots[j]);
+	}
+
+	return 0;
+}
+
+static void qfq_reset_qdisc(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_group *grp;
+	struct qfq_class *cl;
+	struct hlist_node *n, *tmp;
+	unsigned int i, j;
+
+	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+		grp = &q->groups[i];
+		for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+			hlist_for_each_entry_safe(cl, n, tmp,
+						  &grp->slots[j], next) {
+				qfq_deactivate_class(q, cl);
+			}
+		}
+	}
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+			qdisc_reset(cl->qdisc);
+	}
+	sch->q.qlen = 0;
+}
+
+static void qfq_destroy_qdisc(struct Qdisc *sch)
+{
+	struct qfq_sched *q = qdisc_priv(sch);
+	struct qfq_class *cl;
+	struct hlist_node *n, *next;
+	unsigned int i;
+
+	tcf_destroy_chain(&q->filter_list);
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode) {
+			qfq_destroy_class(sch, cl);
+		}
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+}
+
+static const struct Qdisc_class_ops qfq_class_ops = {
+	.change		= qfq_change_class,
+	.delete		= qfq_delete_class,
+	.get		= qfq_get_class,
+	.put		= qfq_put_class,
+	.tcf_chain	= qfq_tcf_chain,
+	.bind_tcf	= qfq_bind_tcf,
+	.unbind_tcf	= qfq_unbind_tcf,
+	.graft		= qfq_graft_class,
+	.leaf		= qfq_class_leaf,
+	.qlen_notify	= qfq_qlen_notify,
+	.dump		= qfq_dump_class,
+	.dump_stats	= qfq_dump_class_stats,
+	.walk		= qfq_walk,
+};
+
+static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
+	.cl_ops		= &qfq_class_ops,
+	.id		= "qfq",
+	.priv_size	= sizeof(struct qfq_sched),
+	.enqueue	= qfq_enqueue,
+	.dequeue	= qfq_dequeue,
+	.peek		= qdisc_peek_dequeued,
+	.drop		= qfq_drop,
+	.init		= qfq_init_qdisc,
+	.reset		= qfq_reset_qdisc,
+	.destroy	= qfq_destroy_qdisc,
+	.owner		= THIS_MODULE,
+};
+
+static int __init qfq_init(void)
+{
+	return register_qdisc(&qfq_qdisc_ops);
+}
+
+static void __exit qfq_exit(void)
+{
+	unregister_qdisc(&qfq_qdisc_ops);
+}
+
+module_init(qfq_init);
+module_exit(qfq_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 4dd365fd55991b4e54a1d1c255081e6370b9da29 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Wed, 30 Mar 2011 18:01:15 -0700
Subject: ieee80211: add HT extended capabilities masks

IEEE Std 802.11n, Oct. 29, 2009:
7.3.2.56.5 HT Extended Capabilities field

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2d1c6117d92c..79690b710665 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -884,6 +884,15 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
+/* 802.11n HT extended capabilities masks (for extended_ht_cap_info) */
+#define IEEE80211_HT_EXT_CAP_PCO		0x0001
+#define IEEE80211_HT_EXT_CAP_PCO_TIME		0x0006
+#define		IEEE80211_HT_EXT_CAP_PCO_TIME_SHIFT	1
+#define IEEE80211_HT_EXT_CAP_MCS_FB		0x0300
+#define		IEEE80211_HT_EXT_CAP_MCS_FB_SHIFT	8
+#define IEEE80211_HT_EXT_CAP_HTC_SUP		0x0400
+#define IEEE80211_HT_EXT_CAP_RD_RESPONDER	0x0800
+
 /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */
 #define IEEE80211_HT_AMPDU_PARM_FACTOR		0x03
 #define IEEE80211_HT_AMPDU_PARM_DENSITY		0x1C
-- 
cgit v1.2.3


From ce57d9e694d98e421e329fbac5d6f5dc5e9e101e Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 1 Apr 2011 12:06:48 +0200
Subject: ssb: trivial: use u8 for chip_rev (it's mask is 0xF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/scan.c      | 2 +-
 include/linux/ssb/ssb.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 29884c00c4d5..7dca719fbcfb 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -307,7 +307,7 @@ int ssb_bus_scan(struct ssb_bus *bus,
 	} else {
 		if (bus->bustype == SSB_BUSTYPE_PCI) {
 			bus->chip_id = pcidev_to_chipid(bus->host_pci);
-			pci_read_config_word(bus->host_pci, PCI_REVISION_ID,
+			pci_read_config_byte(bus->host_pci, PCI_REVISION_ID,
 					     &bus->chip_rev);
 			bus->chip_package = 0;
 		} else {
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 9659eff52ca2..7e99b348834c 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -308,7 +308,7 @@ struct ssb_bus {
 
 	/* ID information about the Chip. */
 	u16 chip_id;
-	u16 chip_rev;
+	u8 chip_rev;
 	u16 sprom_offset;
 	u16 sprom_size;		/* number of words in sprom */
 	u8 chip_package;
-- 
cgit v1.2.3


From c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 4 Apr 2011 22:30:30 -0700
Subject: net: Allow no-cache copy from user on transmit

This patch uses __copy_from_user_nocache on transmit to bypass data
cache for a performance improvement.  skb_add_data_nocache and
skb_copy_to_page_nocache can be called by sendmsg functions to use
this feature, initial support is in tcp_sendmsg.  This functionality is
configurable per device using ethtool.

Presumably, this feature would only be useful when the driver does
not touch the data.  The feature is turned on by default if a device
indicates that it does some form of checksum offload; it is off by
default for devices that do no checksum offload or indicate no checksum
is necessary.  For the former case copy-checksum is probably done
anyway, in the latter case the device is likely loopback in which case
the no cache copy is probably not beneficial.

This patch was tested using 200 instances of netperf TCP_RR with
1400 byte request and one byte reply.  Platform is 16 core AMD x86.

No-cache copy disabled:
   672703 tps, 97.13% utilization
   50/90/99% latency:244.31 484.205 1028.41

No-cache copy enabled:
   702113 tps, 96.16% utilization,
   50/90/99% latency 238.56 467.56 956.955

Using 14000 byte request and response sizes demonstrate the
effects more dramatically:

No-cache copy disabled:
   79571 tps, 34.34 %utlization
   50/90/95% latency 1584.46 2319.59 5001.76

No-cache copy enabled:
   83856 tps, 34.81% utilization
   50/90/95% latency 2508.42 2622.62 2735.88

Note especially the effect on latency tail (95th percentile).

This seems to provide a nice performance improvement and is
consistent in the tests I ran.  Presumably, this would provide
the greatest benfits in the presence of an application workload
stressing the cache and a lot of transmit data happening.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/netdevice.h       |  3 ++-
 include/net/sock.h              | 53 +++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c                  | 12 ++++++++++
 net/core/ethtool.c              |  2 +-
 net/ipv4/tcp.c                  |  7 +++---
 6 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 16d6fe954695..b51e021354b5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1407,7 +1407,7 @@ static int bond_compute_features(struct bonding *bond)
 	int i;
 
 	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
-	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
+	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_NOCACHE_COPY;
 
 	if (!bond->first_slave)
 		goto done;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a4664cc68e2b..09d262415769 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1066,6 +1066,7 @@ struct net_device {
 #define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
 #define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 #define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
+#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -1081,7 +1082,7 @@ struct net_device {
 	/* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0x3f3fffff & ~NETIF_F_NEVER_CHANGE)
+#define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
 
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
diff --git a/include/net/sock.h b/include/net/sock.h
index da0534d3401c..43bd515e92fd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -52,6 +52,7 @@
 #include <linux/mm.h>
 #include <linux/security.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
@@ -1389,6 +1390,58 @@ static inline void sk_nocaps_add(struct sock *sk, int flags)
 	sk->sk_route_caps &= ~flags;
 }
 
+static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
+					   char __user *from, char *to,
+					   int copy)
+{
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		int err = 0;
+		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
+		if (err)
+			return err;
+		skb->csum = csum_block_add(skb->csum, csum, skb->len);
+	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
+		if (!access_ok(VERIFY_READ, from, copy) ||
+		    __copy_from_user_nocache(to, from, copy))
+			return -EFAULT;
+	} else if (copy_from_user(to, from, copy))
+		return -EFAULT;
+
+	return 0;
+}
+
+static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
+				       char __user *from, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy), copy);
+	if (err)
+		__skb_trim(skb, skb->len);
+
+	return err;
+}
+
+static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
+					   struct sk_buff *skb,
+					   struct page *page,
+					   int off, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from,
+				       page_address(page) + off, copy);
+	if (err)
+		return err;
+
+	skb->len	     += copy;
+	skb->data_len	     += copy;
+	skb->truesize	     += copy;
+	sk->sk_wmem_queued   += copy;
+	sk_mem_charge(sk, copy);
+	return 0;
+}
+
 static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 				   struct sk_buff *skb, struct page *page,
 				   int off, int copy)
diff --git a/net/core/dev.c b/net/core/dev.c
index 02f56376fe99..5d0b4f6f1a72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev)
 		dev->features &= ~NETIF_F_GSO;
 	}
 
+	/* Turn on no cache copy if HW is doing checksum */
+	dev->hw_features |= NETIF_F_NOCACHE_COPY;
+	if ((dev->features & NETIF_F_ALL_CSUM) &&
+	    !(dev->features & NETIF_F_NO_CSUM)) {
+		dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+		dev->features |= NETIF_F_NOCACHE_COPY;
+	}
+
 	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
 	 * vlan_dev_init() will do the dev->features check, so these features
 	 * are enabled only if supported by underlying device.
@@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 		}
 	}
 
+	/* If device can't no cache copy, don't do for all */
+	if (!(one & NETIF_F_NOCACHE_COPY))
+		all &= ~NETIF_F_NOCACHE_COPY;
+
 	one |= NETIF_F_ALL_CSUM;
 
 	one |= all & NETIF_F_ONE_FOR_ALL;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 439e4b0e1312..719670ae199c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -359,7 +359,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
 	/* NETIF_F_NTUPLE */          "rx-ntuple-filter",
 	/* NETIF_F_RXHASH */          "rx-hashing",
 	/* NETIF_F_RXCSUM */          "rx-checksum",
-	"",
+	/* NETIF_F_NOCACHE_COPY */    "tx-nocache-copy"
 	"",
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
 				/* We have some space in skb head. Superb! */
 				if (copy > skb_tailroom(skb))
 					copy = skb_tailroom(skb);
-				if ((err = skb_add_data(skb, from, copy)) != 0)
+				err = skb_add_data_nocache(sk, skb, from, copy);
+				if (err)
 					goto do_fault;
 			} else {
 				int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
 
 				/* Time to copy data. We are close to
 				 * the end! */
-				err = skb_copy_to_page(sk, from, skb, page,
-						       off, copy);
+				err = skb_copy_to_page_nocache(sk, from, skb,
+							       page, off, copy);
 				if (err) {
 					/* If this page was new, give it to the
 					 * socket so it does not get leaked.
-- 
cgit v1.2.3


From e20b5b61a36bd5b80eea064c0f2e73285dbe0d3b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 1 Apr 2011 22:52:34 +0100
Subject: ethtool: Convert struct ethtool_ops comment to kernel-doc format

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 80 ++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c8fcbdd2b0e7..ab12f84c17bd 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -683,63 +683,53 @@ void ethtool_ntuple_flush(struct net_device *dev);
 bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
- * &ethtool_ops - Alter and report network device settings
- * get_settings: Get device-specific settings
- * set_settings: Set device-specific settings
- * get_drvinfo: Report driver information
- * get_regs: Get device registers
- * get_wol: Report whether Wake-on-Lan is enabled
- * set_wol: Turn Wake-on-Lan on or off
- * get_msglevel: Report driver message level
- * set_msglevel: Set driver message level
- * nway_reset: Restart autonegotiation
- * get_link: Get link status
- * get_eeprom: Read data from the device EEPROM
- * set_eeprom: Write data to the device EEPROM
- * get_coalesce: Get interrupt coalescing parameters
- * set_coalesce: Set interrupt coalescing parameters
- * get_ringparam: Report ring sizes
- * set_ringparam: Set ring sizes
- * get_pauseparam: Report pause parameters
- * set_pauseparam: Set pause parameters
- * get_rx_csum: Report whether receive checksums are turned on or off
- * set_rx_csum: Turn receive checksum on or off
- * get_tx_csum: Report whether transmit checksums are turned on or off
- * set_tx_csum: Turn transmit checksums on or off
- * get_sg: Report whether scatter-gather is enabled
- * set_sg: Turn scatter-gather on or off
- * get_tso: Report whether TCP segmentation offload is enabled
- * set_tso: Turn TCP segmentation offload on or off
- * get_ufo: Report whether UDP fragmentation offload is enabled
- * set_ufo: Turn UDP fragmentation offload on or off
- * self_test: Run specified self-tests
- * get_strings: Return a set of strings that describe the requested objects
- * phys_id: Identify the device
- * get_stats: Return statistics about the device
- * get_flags: get 32-bit flags bitmap
- * set_flags: set 32-bit flags bitmap
- *
- * Description:
- *
- * get_settings:
+ * struct ethtool_ops - Alter and report network device settings
+ * @get_settings: Get device-specific settings.
  *	@get_settings is passed an &ethtool_cmd to fill in.  It returns
  *	an negative errno or zero.
- *
- * set_settings:
+ * @set_settings: Set device-specific settings.
  *	@set_settings is passed an &ethtool_cmd and should attempt to set
  *	all the settings this device supports.  It may return an error value
  *	if something goes wrong (otherwise 0).
- *
- * get_eeprom:
+ * @get_drvinfo: Report driver information
+ * @get_regs: Get device registers
+ * @get_wol: Report whether Wake-on-Lan is enabled
+ * @set_wol: Turn Wake-on-Lan on or off
+ * @get_msglevel: Report driver message level
+ * @set_msglevel: Set driver message level
+ * @nway_reset: Restart autonegotiation
+ * @get_link: Get link status
+ * @get_eeprom: Read data from the device EEPROM.
  *	Should fill in the magic field.  Don't need to check len for zero
  *	or wraparound.  Fill in the data argument with the eeprom values
  *	from offset to offset + len.  Update len to the amount read.
  *	Returns an error or zero.
- *
- * set_eeprom:
+ * @set_eeprom: Write data to the device EEPROM.
  *	Should validate the magic field.  Don't need to check len for zero
  *	or wraparound.  Update len to the amount written.  Returns an error
  *	or zero.
+ * @get_coalesce: Get interrupt coalescing parameters
+ * @set_coalesce: Set interrupt coalescing parameters
+ * @get_ringparam: Report ring sizes
+ * @set_ringparam: Set ring sizes
+ * @get_pauseparam: Report pause parameters
+ * @set_pauseparam: Set pause parameters
+ * @get_rx_csum: Report whether receive checksums are turned on or off
+ * @set_rx_csum: Turn receive checksum on or off
+ * @get_tx_csum: Report whether transmit checksums are turned on or off
+ * @set_tx_csum: Turn transmit checksums on or off
+ * @get_sg: Report whether scatter-gather is enabled
+ * @set_sg: Turn scatter-gather on or off
+ * @get_tso: Report whether TCP segmentation offload is enabled
+ * @set_tso: Turn TCP segmentation offload on or off
+ * @get_ufo: Report whether UDP fragmentation offload is enabled
+ * @set_ufo: Turn UDP fragmentation offload on or off
+ * @self_test: Run specified self-tests
+ * @get_strings: Return a set of strings that describe the requested objects
+ * @phys_id: Identify the device
+ * @get_stats: Return statistics about the device
+ * @get_flags: get 32-bit flags bitmap
+ * @set_flags: set 32-bit flags bitmap
  */
 struct ethtool_ops {
 	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-- 
cgit v1.2.3


From 8717d07b1143e0f150921f5bd7cfe7af579a995a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 1 Apr 2011 23:57:41 +0100
Subject: ethtool: Fill out and update comment for struct ethtool_ops

Briefly document all operations (except get_rx_ntuple), including
whether they may return an error code and whether they are deprecated.
Also mention some things that should be handled by the ethtool core
rather than by drivers.

Briefly document general requirements for callers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 124 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ab12f84c17bd..ead7dcb1bf1e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -683,22 +683,28 @@ void ethtool_ntuple_flush(struct net_device *dev);
 bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
- * struct ethtool_ops - Alter and report network device settings
- * @get_settings: Get device-specific settings.
- *	@get_settings is passed an &ethtool_cmd to fill in.  It returns
- *	an negative errno or zero.
- * @set_settings: Set device-specific settings.
- *	@set_settings is passed an &ethtool_cmd and should attempt to set
- *	all the settings this device supports.  It may return an error value
- *	if something goes wrong (otherwise 0).
- * @get_drvinfo: Report driver information
+ * struct ethtool_ops - optional netdev operations
+ * @get_settings: Get various device settings including Ethernet link
+ *	settings.  Returns a negative error code or zero.
+ * @set_settings: Set various device settings including Ethernet link
+ *	settings.  Returns a negative error code or zero.
+ * @get_drvinfo: Report driver/device information.  Should only set the
+ *	@driver, @version, @fw_version and @bus_info fields.  If not
+ *	implemented, the @driver and @bus_info fields will be filled in
+ *	according to the netdev's parent device.
+ * @get_regs_len: Get buffer length required for @get_regs
  * @get_regs: Get device registers
  * @get_wol: Report whether Wake-on-Lan is enabled
- * @set_wol: Turn Wake-on-Lan on or off
- * @get_msglevel: Report driver message level
+ * @set_wol: Turn Wake-on-Lan on or off.  Returns a negative error code
+ *	or zero.
+ * @get_msglevel: Report driver message level.  This should be the value
+ *	of the @msg_enable field used by netif logging functions.
  * @set_msglevel: Set driver message level
- * @nway_reset: Restart autonegotiation
- * @get_link: Get link status
+ * @nway_reset: Restart autonegotiation.  Returns a negative error code
+ *	or zero.
+ * @get_link: Report whether physical link is up.  Will only be called if
+ *	the netdev is up.  Should usually be set to ethtool_op_get_link(),
+ *	which uses netif_carrier_ok().
  * @get_eeprom: Read data from the device EEPROM.
  *	Should fill in the magic field.  Don't need to check len for zero
  *	or wraparound.  Fill in the data argument with the eeprom values
@@ -708,28 +714,84 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	Should validate the magic field.  Don't need to check len for zero
  *	or wraparound.  Update len to the amount written.  Returns an error
  *	or zero.
- * @get_coalesce: Get interrupt coalescing parameters
- * @set_coalesce: Set interrupt coalescing parameters
+ * @get_coalesce: Get interrupt coalescing parameters.  Returns a negative
+ *	error code or zero.
+ * @set_coalesce: Set interrupt coalescing parameters.  Returns a negative
+ *	error code or zero.
  * @get_ringparam: Report ring sizes
- * @set_ringparam: Set ring sizes
+ * @set_ringparam: Set ring sizes.  Returns a negative error code or zero.
  * @get_pauseparam: Report pause parameters
- * @set_pauseparam: Set pause parameters
- * @get_rx_csum: Report whether receive checksums are turned on or off
- * @set_rx_csum: Turn receive checksum on or off
- * @get_tx_csum: Report whether transmit checksums are turned on or off
- * @set_tx_csum: Turn transmit checksums on or off
- * @get_sg: Report whether scatter-gather is enabled
- * @set_sg: Turn scatter-gather on or off
- * @get_tso: Report whether TCP segmentation offload is enabled
- * @set_tso: Turn TCP segmentation offload on or off
- * @get_ufo: Report whether UDP fragmentation offload is enabled
- * @set_ufo: Turn UDP fragmentation offload on or off
+ * @set_pauseparam: Set pause parameters.  Returns a negative error code
+ *	or zero.
+ * @get_rx_csum: Deprecated in favour of the netdev feature %NETIF_F_RXCSUM.
+ *	Report whether receive checksums are turned on or off.
+ * @set_rx_csum: Deprecated in favour of generic netdev features.  Turn
+ *	receive checksum on or off.  Returns a negative error code or zero.
+ * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums
+ *	are turned on or off.
+ * @set_tx_csum: Deprecated in favour of generic netdev features.  Turn
+ *	transmit checksums on or off.  Returns a egative error code or zero.
+ * @get_sg: Deprecated as redundant.  Report whether scatter-gather is
+ *	enabled.  
+ * @set_sg: Deprecated in favour of generic netdev features.  Turn
+ *	scatter-gather on or off. Returns a negative error code or zero.
+ * @get_tso: Deprecated as redundant.  Report whether TCP segmentation
+ *	offload is enabled.
+ * @set_tso: Deprecated in favour of generic netdev features.  Turn TCP
+ *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
- * @phys_id: Identify the device
- * @get_stats: Return statistics about the device
- * @get_flags: get 32-bit flags bitmap
- * @set_flags: set 32-bit flags bitmap
+ * @phys_id: Identify the physical device, e.g. by flashing an LED
+ *	attached to it until interrupted by a signal or the given time
+ *	(in seconds) elapses.  If the given time is zero, use a default
+ *	time limit.  Returns a negative error code or zero.  Being
+ *	interrupted by a signal is not an error.
+ * @get_ethtool_stats: Return extended statistics about the device.
+ *	This is only useful if the device maintains statistics not
+ *	included in &struct rtnl_link_stats64.
+ * @begin: Function to be called before any other operation.  Returns a
+ *	negative error code or zero.
+ * @complete: Function to be called after any other operation except
+ *	@begin.  Will be called even if the other operation failed.
+ * @get_ufo: Deprecated as redundant.  Report whether UDP fragmentation
+ *	offload is enabled.
+ * @set_ufo: Deprecated in favour of generic netdev features.  Turn UDP
+ *	fragmentation offload on or off.  Returns a negative error code or zero.
+ * @get_flags: Deprecated as redundant.  Report features included in
+ *	&enum ethtool_flags that are enabled.  
+ * @set_flags: Deprecated in favour of generic netdev features.  Turn
+ *	features included in &enum ethtool_flags on or off.  Returns a
+ *	negative error code or zero.
+ * @get_priv_flags: Report driver-specific feature flags.
+ * @set_priv_flags: Set driver-specific feature flags.  Returns a negative
+ *	error code or zero.
+ * @get_sset_count: Get number of strings that @get_strings will write.
+ * @get_rxnfc: Get RX flow classification rules.  Returns a negative
+ *	error code or zero.
+ * @set_rxnfc: Set RX flow classification rules.  Returns a negative
+ *	error code or zero.
+ * @flash_device: Write a firmware image to device's flash memory.
+ *	Returns a negative error code or zero.
+ * @reset: Reset (part of) the device, as specified by a bitmask of
+ *	flags from &enum ethtool_reset_flags.  Returns a negative
+ *	error code or zero.
+ * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
+ *	or zero.
+ * @get_rx_ntuple: Deprecated.
+ * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
+ *	Returns a negative error code or zero.
+ * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
+ *	Returns a negative error code or zero.
+ *
+ * All operations are optional (i.e. the function pointer may be set
+ * to %NULL) and callers must take this into account.  Callers must
+ * hold the RTNL, except that for @get_drvinfo the caller may or may
+ * not hold the RTNL.
+ *
+ * See the structures used by these operations for further documentation.
+ *
+ * See &struct net_device and &struct net_device_ops for documentation
+ * of the generic netdev features interface.
  */
 struct ethtool_ops {
 	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-- 
cgit v1.2.3


From 68f512f21a64c9b264df6c61a9333e7890faf74b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Sat, 2 Apr 2011 00:35:15 +0100
Subject: ethtool: Change ETHTOOL_PHYS_ID implementation to allow dropping RTNL

The ethtool ETHTOOL_PHYS_ID command runs for an arbitrarily long
period of time, holding the RTNL lock.  This blocks routing updates,
device enumeration, and various important operations that one might
want to keep running while hunting for the flashing LED.

We need to drop the RTNL lock during this operation, but currently the
core implementation is a thin wrapper around a driver operation and
drivers may well depend upon holding the lock.

Define a new driver operation 'set_phys_id' with an argument that sets
the ID indicator on/off/inactive/active (the last optional, for any
driver or firmware that prefers to handle blinking asynchronously).
When this is defined, the ethtool core drops the lock while waiting
and only acquires it around calls to this operation.

Deprecate the 'phys_id' operation in favour of this.  It can be
removed once all in-tree drivers are converted.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/ethtool.h | 30 ++++++++++++++++++++++++++-
 net/core/ethtool.c      | 55 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 82 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ead7dcb1bf1e..c04d1316d221 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -663,6 +663,22 @@ struct ethtool_rx_ntuple_list {
 	unsigned int		count;
 };
 
+/**
+ * enum ethtool_phys_id_state - indicator state for physical identification
+ * @ETHTOOL_ID_INACTIVE: Physical ID indicator should be deactivated
+ * @ETHTOOL_ID_ACTIVE: Physical ID indicator should be activated
+ * @ETHTOOL_ID_ON: LED should be turned on (used iff %ETHTOOL_ID_ACTIVE
+ *	is not supported)
+ * @ETHTOOL_ID_OFF: LED should be turned off (used iff %ETHTOOL_ID_ACTIVE
+ *	is not supported)
+ */
+enum ethtool_phys_id_state {
+	ETHTOOL_ID_INACTIVE,
+	ETHTOOL_ID_ACTIVE,
+	ETHTOOL_ID_ON,
+	ETHTOOL_ID_OFF
+};
+
 struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
@@ -741,7 +757,18 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
- * @phys_id: Identify the physical device, e.g. by flashing an LED
+ * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
+ *	attached to it.  The implementation may update the indicator
+ *	asynchronously or synchronously, but in either case it must return
+ *	quickly.  It is initially called with the argument %ETHTOOL_ID_ACTIVE,
+ *	and must either activate asynchronous updates or return -%EINVAL.
+ *	If it returns -%EINVAL then it will be called again at intervals with
+ *	argument %ETHTOOL_ID_ON or %ETHTOOL_ID_OFF and should set the state of
+ *	the indicator accordingly.  Finally, it is called with the argument
+ *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
+ *	negative error code or zero.
+ * @phys_id: Deprecated in favour of @set_phys_id.
+ *	Identify the physical device, e.g. by flashing an LED
  *	attached to it until interrupted by a signal or the given time
  *	(in seconds) elapses.  If the given time is zero, use a default
  *	time limit.  Returns a negative error code or zero.  Being
@@ -830,6 +857,7 @@ struct ethtool_ops {
 	int	(*set_tso)(struct net_device *, u32);
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
+	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
 	int	(*phys_id)(struct net_device *, u32);
 	void	(*get_ethtool_stats)(struct net_device *,
 				     struct ethtool_stats *, u64 *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 74ead9eca126..1c95f0fb8b3a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -21,6 +21,8 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -1618,14 +1620,63 @@ out:
 static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
+	static bool busy;
+	int rc;
 
-	if (!dev->ethtool_ops->phys_id)
+	if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id)
 		return -EOPNOTSUPP;
 
+	if (busy)
+		return -EBUSY;
+
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->phys_id(dev, id.data);
+	if (!dev->ethtool_ops->set_phys_id)
+		/* Do it the old way */
+		return dev->ethtool_ops->phys_id(dev, id.data);
+
+	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+	if (rc && rc != -EINVAL)
+		return rc;
+
+	/* Drop the RTNL lock while waiting, but prevent reentry or
+	 * removal of the device.
+	 */
+	busy = true;
+	dev_hold(dev);
+	rtnl_unlock();
+
+	if (rc == 0) {
+		/* Driver will handle this itself */
+		schedule_timeout_interruptible(
+			id.data ? id.data : MAX_SCHEDULE_TIMEOUT);
+	} else {
+		/* Driver expects to be called periodically */
+		do {
+			rtnl_lock();
+			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON);
+			rtnl_unlock();
+			if (rc)
+				break;
+			schedule_timeout_interruptible(HZ / 2);
+
+			rtnl_lock();
+			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF);
+			rtnl_unlock();
+			if (rc)
+				break;
+			schedule_timeout_interruptible(HZ / 2);
+		} while (!signal_pending(current) &&
+			 (id.data == 0 || --id.data != 0));
+	}
+
+	rtnl_lock();
+	dev_put(dev);
+	busy = false;
+
+	(void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+	return rc;
 }
 
 static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
-- 
cgit v1.2.3


From 31d68ef65c7d49def19c1bae4e01b87d66cf5a56 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 24 Feb 2011 11:25:33 -0800
Subject: SUNRPC: Don't wait for full record to receive tcp data

Ensure that we immediately read and buffer data from the incoming TCP
stream so that we grow the receive window quickly, and don't deadlock on
large READ or WRITE requests.

Also do some minor exit cleanup.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h |   1 +
 net/sunrpc/svcsock.c           | 144 ++++++++++++++++++++++++++++++++---------
 2 files changed, 113 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f2..85c50b40759d 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
+	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
 /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 40b502b11442..a4fafcbc6ea0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
 	return len;
 }
 
+static int svc_partial_recvfrom(struct svc_rqst *rqstp,
+				struct kvec *iov, int nr,
+				int buflen, unsigned int base)
+{
+	size_t save_iovlen;
+	void __user *save_iovbase;
+	unsigned int i;
+	int ret;
+
+	if (base == 0)
+		return svc_recvfrom(rqstp, iov, nr, buflen);
+
+	for (i = 0; i < nr; i++) {
+		if (iov[i].iov_len > base)
+			break;
+		base -= iov[i].iov_len;
+	}
+	save_iovlen = iov[i].iov_len;
+	save_iovbase = iov[i].iov_base;
+	iov[i].iov_len -= base;
+	iov[i].iov_base += base;
+	ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
+	iov[i].iov_len = save_iovlen;
+	iov[i].iov_base = save_iovbase;
+	return ret;
+}
+
 /*
  * Set socket snd and rcv buffer lengths
  */
@@ -884,6 +911,56 @@ failed:
 	return NULL;
 }
 
+static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		return 0;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		if (rqstp->rq_pages[i] != NULL)
+			put_page(rqstp->rq_pages[i]);
+		BUG_ON(svsk->sk_pages[i] == NULL);
+		rqstp->rq_pages[i] = svsk->sk_pages[i];
+		svsk->sk_pages[i] = NULL;
+	}
+	rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
+	return len;
+}
+
+static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		return;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		svsk->sk_pages[i] = rqstp->rq_pages[i];
+		rqstp->rq_pages[i] = NULL;
+	}
+}
+
+static void svc_tcp_clear_pages(struct svc_sock *svsk)
+{
+	unsigned int i, len, npages;
+
+	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+		goto out;
+	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		BUG_ON(svsk->sk_pages[i] == NULL);
+		put_page(svsk->sk_pages[i]);
+		svsk->sk_pages[i] = NULL;
+	}
+out:
+	svsk->sk_tcplen = 0;
+}
+
 /*
  * Receive data.
  * If we haven't gotten the record length yet, get the next four bytes.
@@ -928,7 +1005,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		if (len < want) {
 			dprintk("svc: short recvfrom while reading record "
 				"length (%d of %d)\n", len, want);
-			goto err_again; /* record header not complete */
+			return -EAGAIN;
 		}
 
 		svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -958,26 +1035,14 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	if (svsk->sk_reclen < 8)
 		goto err_delete; /* client is nuts. */
 
-	/* Check whether enough data is available */
-	len = svc_recv_available(svsk);
-	if (len < 0)
-		goto error;
-
-	if (len < svsk->sk_reclen) {
-		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			len, svsk->sk_reclen);
-		goto err_again;	/* record not complete */
-	}
 	len = svsk->sk_reclen;
 
 	return len;
- error:
-	if (len == -EAGAIN)
-		dprintk("RPC: TCP recv_record got EAGAIN\n");
+error:
+	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
- err_delete:
+err_delete:
 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- err_again:
 	return -EAGAIN;
 }
 
@@ -1035,6 +1100,7 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
 	return i;
 }
 
+
 /*
  * Receive data from a TCP socket.
  */
@@ -1045,6 +1111,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int		len;
 	struct kvec *vec;
+	unsigned int want, base;
 	__be32 *p;
 	__be32 calldir;
 	int pnum;
@@ -1058,6 +1125,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len < 0)
 		goto error;
 
+	base = svc_tcp_restore_pages(svsk, rqstp);
+	want = svsk->sk_reclen - base;
+
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
@@ -1066,11 +1136,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
 	/* Now receive data */
-	len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen);
-	if (len < 0)
-		goto err_again;
+	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
+	if (len >= 0)
+		svsk->sk_tcplen += len;
+	if (len != want) {
+		if (len < 0 && len != -EAGAIN)
+			goto err_other;
+		svc_tcp_save_pages(svsk, rqstp);
+		dprintk("svc: incomplete TCP record (%d of %d)\n",
+			svsk->sk_tcplen, svsk->sk_reclen);
+		goto err_noclose;
+	}
 
-	dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen);
 	rqstp->rq_arg.len = svsk->sk_reclen;
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
@@ -1087,7 +1164,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (calldir) {
 		len = receive_cb_reply(svsk, rqstp);
 		if (len < 0)
-			goto err_again;
+			goto error;
 	}
 
 	/* Reset TCP read info */
@@ -1102,20 +1179,20 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
+	dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
 	return rqstp->rq_arg.len;
 
-err_again:
-	if (len == -EAGAIN) {
-		dprintk("RPC: TCP recvfrom got EAGAIN\n");
-		return len;
-	}
 error:
-	if (len != -EAGAIN) {
-		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
-		       svsk->sk_xprt.xpt_server->sv_name, -len);
-		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	}
+	if (len != -EAGAIN)
+		goto err_other;
+	dprintk("RPC: TCP recvfrom got EAGAIN\n");
 	return -EAGAIN;
+err_other:
+	printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
+	       svsk->sk_xprt.xpt_server->sv_name, -len);
+	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+err_noclose:
+	return -EAGAIN;	/* record not complete */
 }
 
 /*
@@ -1286,6 +1363,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
@@ -1544,8 +1622,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
 
 	svc_sock_detach(xprt);
 
-	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags))
+	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+		svc_tcp_clear_pages(svsk);
 		kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From f4263c9857e6411ef2388868cc6c79a1602a654e Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Thu, 31 Mar 2011 09:25:41 -0700
Subject: nl80211: Add BSS parameters to station

This allows user-space monitoring of BSS parameters for the associated
station.  This is useful for debugging and verifying that the paramaters
are as expected.

[Exactly the same as before but bundled into a single message]

Signed-off-by: Paul Stewart <pstew@chromium.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 35 ++++++++++++++++++++++++++++++++++-
 include/net/cfg80211.h  | 34 ++++++++++++++++++++++++++++++++++
 net/mac80211/cfg.c      | 13 ++++++++++++-
 net/wireless/nl80211.c  | 21 ++++++++++++++++++++-
 4 files changed, 100 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 30022189104d..16eea7229e99 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1221,6 +1221,36 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_sta_bss_param - BSS information collected by STA
+ *
+ * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM
+ * when getting information about the bitrate of a station.
+ *
+ * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE:  whether short preamble is enabled
+ *	(flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME:  whether short slot time is enabled
+ *	(flag)
+ * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8)
+ * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16)
+ * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined
+ * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use
+ */
+enum nl80211_sta_bss_param {
+	__NL80211_STA_BSS_PARAM_INVALID,
+	NL80211_STA_BSS_PARAM_CTS_PROT,
+	NL80211_STA_BSS_PARAM_SHORT_PREAMBLE,
+	NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME,
+	NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+	NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+
+	/* keep last */
+	__NL80211_STA_BSS_PARAM_AFTER_LAST,
+	NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_sta_info - station information
  *
@@ -1233,7 +1263,7 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
- * 	containing info as possible, see &enum nl80211_sta_info_txrate.
+ * 	containing info as possible, see &enum nl80211_rate_info
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
  * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
  *	station)
@@ -1245,6 +1275,8 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
  * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
+ * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
+ *     containing info as possible, see &enum nl80211_sta_bss_param
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1264,6 +1296,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_FAILED,
 	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_RX_BITRATE,
+	NL80211_STA_INFO_BSS_PARAM,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2c4530451721..ba7384acf4e0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -422,6 +422,7 @@ struct station_parameters {
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
  * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
+ * @STATION_INFO_BSS_PARAM: @bss_param filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -439,6 +440,7 @@ enum station_info_flags {
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
 	STATION_INFO_SIGNAL_AVG		= 1<<13,
 	STATION_INFO_RX_BITRATE		= 1<<14,
+	STATION_INFO_BSS_PARAM          = 1<<15,
 };
 
 /**
@@ -472,6 +474,37 @@ struct rate_info {
 	u16 legacy;
 };
 
+/**
+ * enum station_info_rate_flags - bitrate info flags
+ *
+ * Used by the driver to indicate the specific rate transmission
+ * type for 802.11n transmissions.
+ *
+ * @BSS_PARAM_FLAGS_CTS_PROT: whether CTS protection is enabled
+ * @BSS_PARAM_FLAGS_SHORT_PREAMBLE: whether short preamble is enabled
+ * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled
+ */
+enum bss_param_flags {
+	BSS_PARAM_FLAGS_CTS_PROT	= 1<<0,
+	BSS_PARAM_FLAGS_SHORT_PREAMBLE	= 1<<1,
+	BSS_PARAM_FLAGS_SHORT_SLOT_TIME	= 1<<2,
+};
+
+/**
+ * struct sta_bss_parameters - BSS parameters for the attached station
+ *
+ * Information about the currently associated BSS
+ *
+ * @flags: bitflag of flags from &enum bss_param_flags
+ * @dtim_period: DTIM period for the BSS
+ * @beacon_interval: beacon interval
+ */
+struct sta_bss_parameters {
+	u8 flags;
+	u8 dtim_period;
+	u16 beacon_interval;
+};
+
 /**
  * struct station_info - station information
  *
@@ -515,6 +548,7 @@ struct station_info {
 	u32 tx_retries;
 	u32 tx_failed;
 	u32 rx_dropped_misc;
+	struct sta_bss_parameters bss_param;
 
 	int generation;
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 334213571ad0..bf5d28da46e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -342,7 +342,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_FAILED |
 			STATION_INFO_TX_BITRATE |
 			STATION_INFO_RX_BITRATE |
-			STATION_INFO_RX_DROP_MISC;
+			STATION_INFO_RX_DROP_MISC |
+			STATION_INFO_BSS_PARAM;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
@@ -389,6 +390,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->plink_state = sta->plink_state;
 #endif
 	}
+
+	sinfo->bss_param.flags = 0;
+	if (sdata->vif.bss_conf.use_cts_prot)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
+	if (sdata->vif.bss_conf.use_short_preamble)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
+	if (sdata->vif.bss_conf.use_short_slot)
+		sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
+	sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period;
+	sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
 }
 
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 40c90fb461c4..297d7ce4117b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2002,7 +2002,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				const u8 *mac_addr, struct station_info *sinfo)
 {
 	void *hdr;
-	struct nlattr *sinfoattr;
+	struct nlattr *sinfoattr, *bss_param;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
@@ -2062,6 +2062,25 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_TX_FAILED)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
 			    sinfo->tx_failed);
+	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
+		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
+		if (!bss_param)
+			goto nla_put_failure;
+
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_CTS_PROT)
+			NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_CTS_PROT);
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_PREAMBLE)
+			NLA_PUT_FLAG(msg, NL80211_STA_BSS_PARAM_SHORT_PREAMBLE);
+		if (sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_SLOT_TIME)
+			NLA_PUT_FLAG(msg,
+				     NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME);
+		NLA_PUT_U8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+			   sinfo->bss_param.dtim_period);
+		NLA_PUT_U16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+			    sinfo->bss_param.beacon_interval);
+
+		nla_nest_end(msg, bss_param);
+	}
 	nla_nest_end(msg, sinfoattr);
 
 	return genlmsg_end(msg, hdr);
-- 
cgit v1.2.3


From 0e028465d18b7c6797fcbdea632299d16097c5cd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 6 Mar 2011 18:02:54 +0100
Subject: exec: unify do_execve/compat_do_execve code

Add the appropriate members into struct user_arg_ptr and teach
get_user_arg_ptr() to handle is_compat = T case correctly.

This allows us to remove the compat_do_execve() code from fs/compat.c
and reimplement compat_do_execve() as the trivial wrapper on top of
do_execve_common(is_compat => true).

In fact, this fixes another (minor) bug. "compat_uptr_t str" can
overflow after "str += len" in compat_copy_strings() if a 64bit
application execs via sys32_execve().

Unexport acct_arg_size() and get_arg_page(), fs/compat.c doesn't
need them any longer.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 fs/compat.c             | 235 ------------------------------------------------
 fs/exec.c               |  62 ++++++++++---
 include/linux/binfmts.h |   4 -
 3 files changed, 50 insertions(+), 251 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
 	return do_sys_open(dfd, filename, flags, mode);
 }
 
-/*
- * compat_count() counts the number of arguments/envelopes. It is basically
- * a copy of count() from fs/exec.c, except that it works with 32 bit argv
- * and envp pointers.
- */
-static int compat_count(compat_uptr_t __user *argv, int max)
-{
-	int i = 0;
-
-	if (argv != NULL) {
-		for (;;) {
-			compat_uptr_t p;
-
-			if (get_user(p, argv))
-				return -EFAULT;
-			if (!p)
-				break;
-			argv++;
-			if (i++ >= max)
-				return -E2BIG;
-
-			if (fatal_signal_pending(current))
-				return -ERESTARTNOHAND;
-			cond_resched();
-		}
-	}
-	return i;
-}
-
-/*
- * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
- * except that it works with 32 bit argv and envp pointers.
- */
-static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
-				struct linux_binprm *bprm)
-{
-	struct page *kmapped_page = NULL;
-	char *kaddr = NULL;
-	unsigned long kpos = 0;
-	int ret;
-
-	while (argc-- > 0) {
-		compat_uptr_t str;
-		int len;
-		unsigned long pos;
-
-		if (get_user(str, argv+argc) ||
-		    !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		if (len > MAX_ARG_STRLEN) {
-			ret = -E2BIG;
-			goto out;
-		}
-
-		/* We're going to work our way backwords. */
-		pos = bprm->p;
-		str += len;
-		bprm->p -= len;
-
-		while (len > 0) {
-			int offset, bytes_to_copy;
-
-			if (fatal_signal_pending(current)) {
-				ret = -ERESTARTNOHAND;
-				goto out;
-			}
-			cond_resched();
-
-			offset = pos % PAGE_SIZE;
-			if (offset == 0)
-				offset = PAGE_SIZE;
-
-			bytes_to_copy = offset;
-			if (bytes_to_copy > len)
-				bytes_to_copy = len;
-
-			offset -= bytes_to_copy;
-			pos -= bytes_to_copy;
-			str -= bytes_to_copy;
-			len -= bytes_to_copy;
-
-			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
-				struct page *page;
-
-				page = get_arg_page(bprm, pos, 1);
-				if (!page) {
-					ret = -E2BIG;
-					goto out;
-				}
-
-				if (kmapped_page) {
-					flush_kernel_dcache_page(kmapped_page);
-					kunmap(kmapped_page);
-					put_page(kmapped_page);
-				}
-				kmapped_page = page;
-				kaddr = kmap(kmapped_page);
-				kpos = pos & PAGE_MASK;
-				flush_cache_page(bprm->vma, kpos,
-						 page_to_pfn(kmapped_page));
-			}
-			if (copy_from_user(kaddr+offset, compat_ptr(str),
-						bytes_to_copy)) {
-				ret = -EFAULT;
-				goto out;
-			}
-		}
-	}
-	ret = 0;
-out:
-	if (kmapped_page) {
-		flush_kernel_dcache_page(kmapped_page);
-		kunmap(kmapped_page);
-		put_page(kmapped_page);
-	}
-	return ret;
-}
-
-/*
- * compat_do_execve() is mostly a copy of do_execve(), with the exception
- * that it processes 32 bit argv and envp pointers.
- */
-int compat_do_execve(char * filename,
-	compat_uptr_t __user *argv,
-	compat_uptr_t __user *envp,
-	struct pt_regs * regs)
-{
-	struct linux_binprm *bprm;
-	struct file *file;
-	struct files_struct *displaced;
-	bool clear_in_exec;
-	int retval;
-
-	retval = unshare_files(&displaced);
-	if (retval)
-		goto out_ret;
-
-	retval = -ENOMEM;
-	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
-	if (!bprm)
-		goto out_files;
-
-	retval = prepare_bprm_creds(bprm);
-	if (retval)
-		goto out_free;
-
-	retval = check_unsafe_exec(bprm);
-	if (retval < 0)
-		goto out_free;
-	clear_in_exec = retval;
-	current->in_execve = 1;
-
-	file = open_exec(filename);
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
-		goto out_unmark;
-
-	sched_exec();
-
-	bprm->file = file;
-	bprm->filename = filename;
-	bprm->interp = filename;
-
-	retval = bprm_mm_init(bprm);
-	if (retval)
-		goto out_file;
-
-	bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
-	if ((retval = bprm->argc) < 0)
-		goto out;
-
-	bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
-	if ((retval = bprm->envc) < 0)
-		goto out;
-
-	retval = prepare_binprm(bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = copy_strings_kernel(1, &bprm->filename, bprm);
-	if (retval < 0)
-		goto out;
-
-	bprm->exec = bprm->p;
-	retval = compat_copy_strings(bprm->envc, envp, bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = compat_copy_strings(bprm->argc, argv, bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = search_binary_handler(bprm, regs);
-	if (retval < 0)
-		goto out;
-
-	/* execve succeeded */
-	current->fs->in_exec = 0;
-	current->in_execve = 0;
-	acct_update_integrals(current);
-	free_bprm(bprm);
-	if (displaced)
-		put_files_struct(displaced);
-	return retval;
-
-out:
-	if (bprm->mm) {
-		acct_arg_size(bprm, 0);
-		mmput(bprm->mm);
-	}
-
-out_file:
-	if (bprm->file) {
-		allow_write_access(bprm->file);
-		fput(bprm->file);
-	}
-
-out_unmark:
-	if (clear_in_exec)
-		current->fs->in_exec = 0;
-	current->in_execve = 0;
-
-out_free:
-	free_bprm(bprm);
-
-out_files:
-	if (displaced)
-		reset_files_struct(displaced);
-out_ret:
-	return retval;
-}
-
 #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
 
 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/exec.c b/fs/exec.c
index 526a0399d963..89d788ca7829 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -167,7 +168,7 @@ out:
 
 #ifdef CONFIG_MMU
 
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 	struct mm_struct *mm = current->mm;
 	long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +187,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 #endif
 }
 
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -305,11 +306,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
 
 #else
 
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 }
 
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -399,17 +400,36 @@ err:
 }
 
 struct user_arg_ptr {
-	const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+	bool is_compat;
+#endif
+	union {
+		const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+		compat_uptr_t __user *compat;
+#endif
+	} ptr;
 };
 
 static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
 {
-	const char __user *ptr;
+	const char __user *native;
+
+#ifdef CONFIG_COMPAT
+	if (unlikely(argv.is_compat)) {
+		compat_uptr_t compat;
+
+		if (get_user(compat, argv.ptr.compat + nr))
+			return ERR_PTR(-EFAULT);
 
-	if (get_user(ptr, argv.native + nr))
+		return compat_ptr(compat);
+	}
+#endif
+
+	if (get_user(native, argv.ptr.native + nr))
 		return ERR_PTR(-EFAULT);
 
-	return ptr;
+	return native;
 }
 
 /*
@@ -419,7 +439,7 @@ static int count(struct user_arg_ptr argv, int max)
 {
 	int i = 0;
 
-	if (argv.native != NULL) {
+	if (argv.ptr.native != NULL) {
 		for (;;) {
 			const char __user *p = get_user_arg_ptr(argv, i);
 
@@ -542,7 +562,7 @@ int copy_strings_kernel(int argc, const char *const *__argv,
 	int r;
 	mm_segment_t oldfs = get_fs();
 	struct user_arg_ptr argv = {
-		.native = (const char __user *const  __user *)__argv,
+		.ptr.native = (const char __user *const  __user *)__argv,
 	};
 
 	set_fs(KERNEL_DS);
@@ -1516,10 +1536,28 @@ int do_execve(const char *filename,
 	const char __user *const __user *__envp,
 	struct pt_regs *regs)
 {
-	struct user_arg_ptr argv = { .native = __argv };
-	struct user_arg_ptr envp = { .native = __envp };
+	struct user_arg_ptr argv = { .ptr.native = __argv };
+	struct user_arg_ptr envp = { .ptr.native = __envp };
+	return do_execve_common(filename, argv, envp, regs);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+	compat_uptr_t __user *__argv,
+	compat_uptr_t __user *__envp,
+	struct pt_regs *regs)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
 	return do_execve_common(filename, argv, envp, regs);
 }
+#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c3d6512eded1..8845613fd7e3 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -60,10 +60,6 @@ struct linux_binprm {
 	unsigned long loader, exec;
 };
 
-extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages);
-extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
-					int write);
-
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
 
-- 
cgit v1.2.3


From 5cdede2408e80f190c5595e592c24e77c1bf44b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 Apr 2011 15:55:18 +0200
Subject: PCI: Move ATS declarations in seperate header file

This patch moves the relevant declarations from the local
header file in drivers/pci to a more accessible locations so
that it can be used by the AMD IOMMU driver too.
The file is named pci-ats.h because support for the PCI PRI
capability will also be added there in a later patch-set.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/intel-iommu.c |  1 +
 drivers/pci/iov.c         |  1 +
 drivers/pci/pci.h         | 37 ---------------------------------
 include/linux/pci-ats.h   | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 37 deletions(-)
 create mode 100644 include/linux/pci-ats.h

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7da3bef60d87..fdb2cef2b908 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
+#include <linux/pci-ats.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee55c1c..42fae4776515 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/pci-ats.h>
 #include "pci.h"
 
 #define VIRTFN_ID_LEN	16
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a6ec200fe5ee..4020025f854e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -250,15 +250,6 @@ struct pci_sriov {
 	u8 __iomem *mstate;	/* VF Migration State Array */
 };
 
-/* Address Translation Service */
-struct pci_ats {
-	int pos;	/* capability position */
-	int stu;	/* Smallest Translation Unit */
-	int qdep;	/* Invalidate Queue Depth */
-	int ref_cnt;	/* Physical Function reference count */
-	unsigned int is_enabled:1;	/* Enable bit is set */
-};
-
 #ifdef CONFIG_PCI_IOV
 extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +260,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
 
-extern int pci_enable_ats(struct pci_dev *dev, int ps);
-extern void pci_disable_ats(struct pci_dev *dev);
-extern int pci_ats_queue_depth(struct pci_dev *dev);
-/**
- * pci_ats_enabled - query the ATS status
- * @dev: the PCI device
- *
- * Returns 1 if ATS capability is enabled, or 0 if not.
- */
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return dev->ats && dev->ats->is_enabled;
-}
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -304,21 +282,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
 	return 0;
 }
 
-static inline int pci_enable_ats(struct pci_dev *dev, int ps)
-{
-	return -ENODEV;
-}
-static inline void pci_disable_ats(struct pci_dev *dev)
-{
-}
-static inline int pci_ats_queue_depth(struct pci_dev *dev)
-{
-	return -ENODEV;
-}
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return 0;
-}
 #endif /* CONFIG_PCI_IOV */
 
 static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
new file mode 100644
index 000000000000..655824fa4c76
--- /dev/null
+++ b/include/linux/pci-ats.h
@@ -0,0 +1,52 @@
+#ifndef LINUX_PCI_ATS_H
+#define LINUX_PCI_ATS_H
+
+/* Address Translation Service */
+struct pci_ats {
+	int pos;        /* capability position */
+	int stu;        /* Smallest Translation Unit */
+	int qdep;       /* Invalidate Queue Depth */
+	int ref_cnt;    /* Physical Function reference count */
+	unsigned int is_enabled:1;      /* Enable bit is set */
+};
+
+#ifdef CONFIG_PCI_IOV
+
+extern int pci_enable_ats(struct pci_dev *dev, int ps);
+extern void pci_disable_ats(struct pci_dev *dev);
+extern int pci_ats_queue_depth(struct pci_dev *dev);
+/**
+ * pci_ats_enabled - query the ATS status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ATS capability is enabled, or 0 if not.
+ */
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return dev->ats && dev->ats->is_enabled;
+}
+
+#else /* CONFIG_PCI_IOV */
+
+static inline int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	return -ENODEV;
+}
+
+static inline void pci_disable_ats(struct pci_dev *dev)
+{
+}
+
+static inline int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+#endif /* LINUX_PCI_ATS_H*/
-- 
cgit v1.2.3


From dce840a08702bd13a9a186e07e63d1ef82256b5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:50 +0200
Subject: sched: Dynamically allocate sched_domain/sched_group data-structures

Instead of relying on static allocations for the sched_domain and
sched_group trees, dynamically allocate and RCU free them.

Allocating this dynamically also allows for some build_sched_groups()
simplification since we can now (like with other simplifications) rely
on the sched_domain tree instead of hard-coded knowledge.

One tricky to note is that detach_destroy_domains() needs to hold
rcu_read_lock() over the entire tear-down, per-cpu is not sufficient
since that can lead to partial sched_group existance (could possibly
be solved by doing the tear-down backwards but this is much more
robust).

A concequence of the above is that we can no longer print the
sched_domain debug stuff from cpu_attach_domain() since that might now
run with preemption disabled (due to classic RCU etc.) and
sched_domain_debug() does some GFP_KERNEL allocations.

Another thing to note is that we now fully rely on normal RCU and not
RCU-sched, this is because with the new and exiting RCU flavours we
grew over the years BH doesn't necessarily hold off RCU-sched grace
periods (-rt is known to break this). This would in fact already cause
us grief since we do sched_domain/sched_group iterations from softirq
context.

This patch is somewhat larger than I would like it to be, but I didn't
find any means of shrinking/splitting this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   5 +
 kernel/sched.c        | 479 ++++++++++++++++++++------------------------------
 kernel/sched_fair.c   |  30 +++-
 3 files changed, 218 insertions(+), 296 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..020b79d6c486 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -868,6 +868,7 @@ static inline int sd_power_saving_flags(void)
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -973,6 +974,10 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
 
 	unsigned int span_weight;
 	/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cca59ec4a49..65204845063e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -417,6 +417,7 @@ struct rt_rq {
  */
 struct root_domain {
 	atomic_t refcount;
+	struct rcu_head rcu;
 	cpumask_var_t span;
 	cpumask_var_t online;
 
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq)
 
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
-			      rcu_read_lock_sched_held() || \
+			      rcu_read_lock_held() || \
 			      lockdep_is_held(&sched_domains_mutex))
 
 /*
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
-static void free_rootdomain(struct root_domain *rd)
+static void free_rootdomain(struct rcu_head *rcu)
 {
-	synchronize_sched();
+	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
 
 	cpupri_cleanup(&rd->cpupri);
-
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	if (old_rd)
-		free_rootdomain(old_rd);
+		call_rcu_sched(&old_rd->rcu, free_rootdomain);
 }
 
 static int init_rootdomain(struct root_domain *rd)
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
+static void free_sched_domain(struct rcu_head *rcu)
+{
+	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+	if (atomic_dec_and_test(&sd->groups->ref))
+		kfree(sd->groups);
+	kfree(sd);
+}
+
+static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+{
+	call_rcu(&sd->rcu, free_sched_domain);
+}
+
+static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+{
+	for (; sd; sd = sd->parent)
+		destroy_sched_domain(sd, cpu);
+}
+
 /*
  * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
  * hold the hotplug lock.
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 			tmp->parent = parent->parent;
 			if (parent->parent)
 				parent->parent->child = tmp;
+			destroy_sched_domain(parent, cpu);
 		} else
 			tmp = tmp->parent;
 	}
 
 	if (sd && sd_degenerate(sd)) {
+		tmp = sd;
 		sd = sd->parent;
+		destroy_sched_domain(tmp, cpu);
 		if (sd)
 			sd->child = NULL;
 	}
 
-	sched_domain_debug(sd, cpu);
+	/* sched_domain_debug(sd, cpu); */
 
 	rq_attach_root(rq, rd);
+	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
+	destroy_sched_domains(tmp, cpu);
 }
 
 /* cpus with isolated domains */
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str)
 
 __setup("isolcpus=", isolated_cpu_setup);
 
-/*
- * init_sched_build_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
- * init_sched_build_groups will build a circular linked list of the groups
- * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
- */
-static void
-init_sched_build_groups(const struct cpumask *span,
-			const struct cpumask *cpu_map,
-			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
-					struct sched_group **sg,
-					struct cpumask *tmpmask),
-			struct cpumask *covered, struct cpumask *tmpmask)
-{
-	struct sched_group *first = NULL, *last = NULL;
-	int i;
-
-	cpumask_clear(covered);
-
-	for_each_cpu(i, span) {
-		struct sched_group *sg;
-		int group = group_fn(i, cpu_map, &sg, tmpmask);
-		int j;
-
-		if (cpumask_test_cpu(i, covered))
-			continue;
-
-		cpumask_clear(sched_group_cpus(sg));
-		sg->cpu_power = 0;
-
-		for_each_cpu(j, span) {
-			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
-		if (!first)
-			first = sg;
-		if (last)
-			last->next = sg;
-		last = sg;
-	}
-	last->next = first;
-}
-
 #define SD_NODES_PER_DOMAIN 16
 
 #ifdef CONFIG_NUMA
@@ -6858,154 +6832,96 @@ struct static_sched_domain {
 	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
 };
 
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_group **__percpu sg;
+};
+
 struct s_data {
 #ifdef CONFIG_NUMA
 	int			sd_allnodes;
 #endif
 	cpumask_var_t		nodemask;
 	cpumask_var_t		send_covered;
-	cpumask_var_t		tmpmask;
 	struct sched_domain ** __percpu sd;
+	struct sd_data 		sdd[SD_LV_MAX];
 	struct root_domain	*rd;
 };
 
 enum s_alloc {
 	sa_rootdomain,
 	sa_sd,
-	sa_tmpmask,
+	sa_sd_storage,
 	sa_send_covered,
 	sa_nodemask,
 	sa_none,
 };
 
 /*
- * SMT sched-domains:
+ * Assumes the sched_domain tree is fully constructed
  */
-#ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
-
-static int
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
-		 struct sched_group **sg, struct cpumask *unused)
+static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
-	if (sg)
-		*sg = &per_cpu(sched_groups, cpu).sg;
-	return cpu;
-}
-#endif /* CONFIG_SCHED_SMT */
+	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+	struct sched_domain *child = sd->child;
 
-/*
- * multi-core sched-domains:
- */
-#ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
+	if (child)
+		cpu = cpumask_first(sched_domain_span(child));
 
-static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group;
-#ifdef CONFIG_SCHED_SMT
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group).sg;
-	return group;
+		*sg = *per_cpu_ptr(sdd->sg, cpu);
+
+	return cpu;
 }
-#endif /* CONFIG_SCHED_MC */
 
 /*
- * book sched-domains:
+ * build_sched_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
+ *
+ * build_sched_groups will build a circular linked list of the groups
+ * covered by the given span, and will set each group's ->cpumask correctly,
+ * and ->cpu_power to 0.
  */
-#ifdef CONFIG_SCHED_BOOK
-static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
-
-static int
-cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group = cpu;
-#ifdef CONFIG_SCHED_MC
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_book, group).sg;
-	return group;
-}
-#endif /* CONFIG_SCHED_BOOK */
-
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
-
-static int
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
+static void
+build_sched_groups(struct sched_domain *sd, struct cpumask *covered)
 {
-	int group;
-#ifdef CONFIG_SCHED_BOOK
-	cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_MC)
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_phys, group).sg;
-	return group;
-}
-
-#ifdef CONFIG_NUMA
-static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
+	struct sched_group *first = NULL, *last = NULL;
+	struct sd_data *sdd = sd->private;
+	const struct cpumask *span = sched_domain_span(sd);
+	int i;
 
-static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
-{
-	int group;
+	cpumask_clear(covered);
 
-	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-	group = cpumask_first(nodemask);
+	for_each_cpu(i, span) {
+		struct sched_group *sg;
+		int group = get_group(i, sdd, &sg);
+		int j;
 
-	if (sg)
-		*sg = &per_cpu(sched_group_node, group).sg;
-	return group;
-}
+		if (cpumask_test_cpu(i, covered))
+			continue;
 
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
+		cpumask_clear(sched_group_cpus(sg));
+		sg->cpu_power = 0;
 
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
-{
-	int group;
+		for_each_cpu(j, span) {
+			if (get_group(j, sdd, NULL) != group)
+				continue;
 
-	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-	group = cpumask_first(nodemask);
+			cpumask_set_cpu(j, covered);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
+		}
 
-	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group).sg;
-	return group;
+		if (!first)
+			first = sg;
+		if (last)
+			last->next = sg;
+		last = sg;
+	}
+	last->next = first;
 }
 
-#endif /* CONFIG_NUMA */
-
 /*
  * Initialize sched groups cpu_power.
  *
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 # define SD_INIT_NAME(sd, type)		do { } while (0)
 #endif
 
-#define	SD_INIT(sd, type)	sd_init_##type(sd)
-
-#define SD_INIT_FUNC(type)	\
-static noinline void sd_init_##type(struct sched_domain *sd)	\
-{								\
-	memset(sd, 0, sizeof(*sd));				\
-	*sd = SD_##type##_INIT;					\
-	sd->level = SD_LV_##type;				\
-	SD_INIT_NAME(sd, type);					\
+#define SD_INIT_FUNC(type)						       \
+static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \
+{									       \
+	struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu);  \
+	*sd = SD_##type##_INIT;						       \
+	sd->level = SD_LV_##type;					       \
+	SD_INIT_NAME(sd, type);						       \
+	sd->private = &d->sdd[SD_LV_##type];				       \
+	return sd;							       \
 }
 
 SD_INIT_FUNC(CPU)
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd,
 static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 				 const struct cpumask *cpu_map)
 {
+	int i, j;
+
 	switch (what) {
 	case sa_rootdomain:
-		free_rootdomain(d->rd); /* fall through */
+		free_rootdomain(&d->rd->rcu); /* fall through */
 	case sa_sd:
 		free_percpu(d->sd); /* fall through */
-	case sa_tmpmask:
-		free_cpumask_var(d->tmpmask); /* fall through */
+	case sa_sd_storage:
+		for (i = 0; i < SD_LV_MAX; i++) {
+			for_each_cpu(j, cpu_map) {
+				kfree(*per_cpu_ptr(d->sdd[i].sd, j));
+				kfree(*per_cpu_ptr(d->sdd[i].sg, j));
+			}
+			free_percpu(d->sdd[i].sd);
+			free_percpu(d->sdd[i].sg);
+		} /* fall through */
 	case sa_send_covered:
 		free_cpumask_var(d->send_covered); /* fall through */
 	case sa_nodemask:
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 						   const struct cpumask *cpu_map)
 {
+	int i, j;
+
+	memset(d, 0, sizeof(*d));
+
 	if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
 		return sa_none;
 	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
 		return sa_nodemask;
-	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
-		return sa_send_covered;
-	d->sd = alloc_percpu(struct sched_domain *);
-	if (!d->sd) {
-		printk(KERN_WARNING "Cannot alloc per-cpu pointers\n");
-		return sa_tmpmask;
+	for (i = 0; i < SD_LV_MAX; i++) {
+		d->sdd[i].sd = alloc_percpu(struct sched_domain *);
+		if (!d->sdd[i].sd)
+			return sa_sd_storage;
+
+		d->sdd[i].sg = alloc_percpu(struct sched_group *);
+		if (!d->sdd[i].sg)
+			return sa_sd_storage;
+
+		for_each_cpu(j, cpu_map) {
+			struct sched_domain *sd;
+			struct sched_group *sg;
+
+		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sd)
+				return sa_sd_storage;
+
+			*per_cpu_ptr(d->sdd[i].sd, j) = sd;
+
+			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sg)
+				return sa_sd_storage;
+
+			*per_cpu_ptr(d->sdd[i].sg, j) = sg;
+		}
 	}
+	d->sd = alloc_percpu(struct sched_domain *);
+	if (!d->sd)
+		return sa_sd_storage;
 	d->rd = alloc_rootdomain();
-	if (!d->rd) {
-		printk(KERN_WARNING "Cannot alloc root domain\n");
+	if (!d->rd)
 		return sa_sd;
-	}
 	return sa_rootdomain;
 }
 
+/*
+ * NULL the sd_data elements we've used to build the sched_domain and
+ * sched_group structure so that the subsequent __free_domain_allocs()
+ * will not free the data we're using.
+ */
+static void claim_allocations(int cpu, struct sched_domain *sd)
+{
+	struct sd_data *sdd = sd->private;
+	struct sched_group *sg = sd->groups;
+
+	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
+	*per_cpu_ptr(sdd->sd, cpu) = NULL;
+
+	if (cpu == cpumask_first(sched_group_cpus(sg))) {
+		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+		*per_cpu_ptr(sdd->sg, cpu) = NULL;
+	}
+}
+
 static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
 	const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
 {
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
 	d->sd_allnodes = 0;
 	if (cpumask_weight(cpu_map) >
 	    SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
-		sd = &per_cpu(allnodes_domains, i).sd;
-		SD_INIT(sd, ALLNODES);
+		sd = sd_init_ALLNODES(d, i);
 		set_domain_attribute(sd, attr);
 		cpumask_copy(sched_domain_span(sd), cpu_map);
-		cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
 		d->sd_allnodes = 1;
 	}
 	parent = sd;
 
-	sd = &per_cpu(node_domains, i).sd;
-	SD_INIT(sd, NODE);
+	sd = sd_init_NODE(d, i);
 	set_domain_attribute(sd, attr);
 	sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
 	sd->parent = parent;
 	if (parent)
 		parent->child = sd;
 	cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
-	cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
 	struct sched_domain *parent, int i)
 {
 	struct sched_domain *sd;
-	sd = &per_cpu(phys_domains, i).sd;
-	SD_INIT(sd, CPU);
+	sd = sd_init_CPU(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_copy(sched_domain_span(sd), d->nodemask);
 	sd->parent = parent;
 	if (parent)
 		parent->child = sd;
-	cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
 	return sd;
 }
 
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_BOOK
-	sd = &per_cpu(book_domains, i).sd;
-	SD_INIT(sd, BOOK);
+	sd = sd_init_BOOK(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_MC
-	sd = &per_cpu(core_domains, i).sd;
-	SD_INIT(sd, MC);
+	sd = sd_init_MC(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
 {
 	struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_SMT
-	sd = &per_cpu(cpu_domains, i).sd;
-	SD_INIT(sd, SIBLING);
+	sd = sd_init_SIBLING(d, i);
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
 	sd->parent = parent;
 	parent->child = sd;
-	cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
 	return sd;
 }
 
-static void build_sched_groups(struct s_data *d, struct sched_domain *sd,
-			       const struct cpumask *cpu_map, int cpu)
-{
-	switch (sd->level) {
-#ifdef CONFIG_SCHED_SMT
-	case SD_LV_SIBLING: /* set up CPU (sibling) groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_cpu_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-#ifdef CONFIG_SCHED_MC
-	case SD_LV_MC: /* set up multi-core groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_core_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	case SD_LV_BOOK: /* set up book groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_book_group,
-						d->send_covered, d->tmpmask);
-		break;
-#endif
-	case SD_LV_CPU: /* set up physical groups */
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_phys_group,
-						d->send_covered, d->tmpmask);
-		break;
-#ifdef CONFIG_NUMA
-	case SD_LV_NODE:
-		if (cpu == cpumask_first(sched_domain_span(sd)))
-			init_sched_build_groups(sched_domain_span(sd), cpu_map,
-						&cpu_to_node_group,
-						d->send_covered, d->tmpmask);
-
-	case SD_LV_ALLNODES:
-		if (cpu == cpumask_first(cpu_map))
-			init_sched_build_groups(cpu_map, cpu_map,
-					&cpu_to_allnodes_group,
-					d->send_covered, d->tmpmask);
-		break;
-#endif
-	default:
-		break;
-	}
-}
-
 /*
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const struct cpumask *cpu_map,
-				 struct sched_domain_attr *attr)
+static int build_sched_domains(const struct cpumask *cpu_map,
+			       struct sched_domain_attr *attr)
 {
 	enum s_alloc alloc_state = sa_none;
+	struct sched_domain *sd;
 	struct s_data d;
-	struct sched_domain *sd, *tmp;
 	int i;
-#ifdef CONFIG_NUMA
-	d.sd_allnodes = 0;
-#endif
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
 	if (alloc_state != sa_rootdomain)
 		goto error;
 
-	/*
-	 * Set up domains for cpus specified by the cpu_map.
-	 */
+	/* Set up domains for cpus specified by the cpu_map. */
 	for_each_cpu(i, cpu_map) {
 		cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
 			    cpu_map);
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
 
 		*per_cpu_ptr(d.sd, i) = sd;
+	}
+
+	/* Build the groups for the domains */
+	for_each_cpu(i, cpu_map) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			sd->span_weight = cpumask_weight(sched_domain_span(sd));
+			get_group(i, sd->private, &sd->groups);
+			atomic_inc(&sd->groups->ref);
 
-		for (tmp = sd; tmp; tmp = tmp->parent) {
-			tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
-			build_sched_groups(&d, tmp, cpu_map, i);
+			if (i != cpumask_first(sched_domain_span(sd)))
+				continue;
+
+			build_sched_groups(sd, d.send_covered);
 		}
 	}
 
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
-		sd = *per_cpu_ptr(d.sd, i);
-		for (; sd; sd = sd->parent)
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			claim_allocations(i, sd);
 			init_sched_groups_power(i, sd);
+		}
 	}
 
 	/* Attach the domains */
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
 		sd = *per_cpu_ptr(d.sd, i);
 		cpu_attach_domain(sd, d.rd, i);
 	}
+	rcu_read_unlock();
 
-	__free_domain_allocs(&d, sa_tmpmask, cpu_map);
+	__free_domain_allocs(&d, sa_sd, cpu_map);
 	return 0;
 
 error:
@@ -7357,11 +7269,6 @@ error:
 	return -ENOMEM;
 }
 
-static int build_sched_domains(const struct cpumask *cpu_map)
-{
-	return __build_sched_domains(cpu_map, NULL);
-}
-
 static cpumask_var_t *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map)
 		doms_cur = &fallback_doms;
 	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
-	err = build_sched_domains(doms_cur[0]);
+	err = build_sched_domains(doms_cur[0], NULL);
 	register_sched_domain_sysctl();
 
 	return err;
 }
 
-static void destroy_sched_domains(const struct cpumask *cpu_map,
-				       struct cpumask *tmpmask)
-{
-}
-
 /*
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
 static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	/* Save because hotplug lock held. */
-	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
-	synchronize_sched();
-	destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
+	rcu_read_unlock();
 }
 
 /* handle null as "default" */
@@ -7538,8 +7438,7 @@ match1:
 				goto match2;
 		}
 		/* no match - add a new doms_new */
-		__build_sched_domains(doms_new[i],
-					dattr_new ? dattr_new + i : NULL);
+		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
 match2:
 		;
 	}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4ee50f0af8d1..4a8ac7c2a18e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1622,6 +1622,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 	/*
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
+	rcu_read_lock();
 	for_each_domain(target, sd) {
 		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
 			break;
@@ -1641,6 +1642,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 			break;
 	}
+	rcu_read_unlock();
 
 	return target;
 }
@@ -1673,6 +1675,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		new_cpu = prev_cpu;
 	}
 
+	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
 			continue;
@@ -1723,9 +1726,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
 	if (affine_sd) {
 		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
-			return select_idle_sibling(p, cpu);
-		else
-			return select_idle_sibling(p, prev_cpu);
+			prev_cpu = cpu;
+
+		new_cpu = select_idle_sibling(p, prev_cpu);
+		goto unlock;
 	}
 
 	while (sd) {
@@ -1766,6 +1770,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		}
 		/* while loop will break here if sd == NULL */
 	}
+unlock:
+	rcu_read_unlock();
 
 	return new_cpu;
 }
@@ -3462,6 +3468,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 	raw_spin_unlock(&this_rq->lock);
 
 	update_shares(this_cpu);
+	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
 		int balance = 1;
@@ -3483,6 +3490,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	raw_spin_lock(&this_rq->lock);
 
@@ -3531,6 +3539,7 @@ static int active_load_balance_cpu_stop(void *data)
 	double_lock_balance(busiest_rq, target_rq);
 
 	/* Search for an sd spanning us and the target CPU. */
+	rcu_read_lock();
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
 		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
@@ -3546,6 +3555,7 @@ static int active_load_balance_cpu_stop(void *data)
 		else
 			schedstat_inc(sd, alb_failed);
 	}
+	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
 	busiest_rq->active_balance = 0;
@@ -3672,6 +3682,7 @@ static int find_new_ilb(int cpu)
 {
 	struct sched_domain *sd;
 	struct sched_group *ilb_group;
+	int ilb = nr_cpu_ids;
 
 	/*
 	 * Have idle load balancer selection from semi-idle packages only
@@ -3687,20 +3698,25 @@ static int find_new_ilb(int cpu)
 	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
 		goto out_done;
 
+	rcu_read_lock();
 	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
 		ilb_group = sd->groups;
 
 		do {
-			if (is_semi_idle_group(ilb_group))
-				return cpumask_first(nohz.grp_idle_mask);
+			if (is_semi_idle_group(ilb_group)) {
+				ilb = cpumask_first(nohz.grp_idle_mask);
+				goto unlock;
+			}
 
 			ilb_group = ilb_group->next;
 
 		} while (ilb_group != sd->groups);
 	}
+unlock:
+	rcu_read_unlock();
 
 out_done:
-	return nr_cpu_ids;
+	return ilb;
 }
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
@@ -3845,6 +3861,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 
 	update_shares(cpu);
 
+	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
@@ -3890,6 +3907,7 @@ out:
 		if (!balance)
 			break;
 	}
+	rcu_read_unlock();
 
 	/*
 	 * next_balance will be updated only when there is a need.
-- 
cgit v1.2.3


From 3859173d43658d51a749bc0201b943922577d39c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:53 +0200
Subject: sched: Reduce some allocation pressure

Since we now allocate SD_LV_MAX * nr_cpu_ids sched_domain/sched_group
structures when rebuilding the scheduler toplogy it might make sense
to shrink that depending on the CONFIG_ options.

This is only needed until we get rid of SD_LV_* alltogether and
provide a full dynamic topology interface.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.406226449@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 020b79d6c486..5a9168b01db8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -897,12 +897,20 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 
 enum sched_domain_level {
 	SD_LV_NONE = 0,
+#ifdef CONFIG_SCHED_SMT
 	SD_LV_SIBLING,
+#endif
+#ifdef CONFIG_SCHED_MC
 	SD_LV_MC,
+#endif
+#ifdef CONFIG_SCHED_BOOK
 	SD_LV_BOOK,
+#endif
 	SD_LV_CPU,
+#ifdef CONFIG_NUMA
 	SD_LV_NODE,
 	SD_LV_ALLNODES,
+#endif
 	SD_LV_MAX
 };
 
-- 
cgit v1.2.3


From 7dd04b730749f957c116f363524fd622b05e5141 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:09:56 +0200
Subject: sched: Remove some dead code

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.553814623@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  6 ------
 kernel/sched.c        | 16 ----------------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a9168b01db8..09d9e02f2b61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -883,9 +883,6 @@ struct sched_group {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_group' in kernel/sched.c)
 	 */
 	unsigned long cpumask[0];
 };
@@ -994,9 +991,6 @@ struct sched_domain {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
 	 */
 	unsigned long span[0];
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index f4d3a624c50a..5ec685ce516a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6816,22 +6816,6 @@ static void sched_domain_node_span(int node, struct cpumask *span)
 
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
-/*
- * The cpus mask in sched_group and sched_domain hangs off the end.
- *
- * ( See the the comments in include/linux/sched.h:struct sched_group
- *   and struct sched_domain. )
- */
-struct static_sched_group {
-	struct sched_group sg;
-	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
-};
-
-struct static_sched_domain {
-	struct sched_domain sd;
-	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
-};
-
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-- 
cgit v1.2.3


From 60495e7760d8ee364695006af37309b0755e0e17 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 7 Apr 2011 14:10:04 +0200
Subject: sched: Dynamic sched_domain::level

Remove the SD_LV_ enum and use dynamic level assignments.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110407122942.969433965@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 23 +++--------------------
 kernel/cpuset.c       |  2 +-
 kernel/sched.c        |  9 ++++++---
 3 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 09d9e02f2b61..e43e5b0ab0b5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -892,25 +892,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
-enum sched_domain_level {
-	SD_LV_NONE = 0,
-#ifdef CONFIG_SCHED_SMT
-	SD_LV_SIBLING,
-#endif
-#ifdef CONFIG_SCHED_MC
-	SD_LV_MC,
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	SD_LV_BOOK,
-#endif
-	SD_LV_CPU,
-#ifdef CONFIG_NUMA
-	SD_LV_NODE,
-	SD_LV_ALLNODES,
-#endif
-	SD_LV_MAX
-};
-
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@@ -919,6 +900,8 @@ struct sched_domain_attr {
 	.relax_domain_level = -1,			\
 }
 
+extern int sched_domain_level_max;
+
 struct sched_domain {
 	/* These fields must be setup */
 	struct sched_domain *parent;	/* top domain must be null terminated */
@@ -936,7 +919,7 @@ struct sched_domain {
 	unsigned int forkexec_idx;
 	unsigned int smt_gain;
 	int flags;			/* See SD_* */
-	enum sched_domain_level level;
+	int level;
 
 	/* Runtime fields. */
 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 33eee16addb8..2bb8c2e98fff 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1159,7 +1159,7 @@ int current_cpuset_is_being_rebound(void)
 static int update_relax_domain_level(struct cpuset *cs, s64 val)
 {
 #ifdef CONFIG_SMP
-	if (val < -1 || val >= SD_LV_MAX)
+	if (val < -1 || val >= sched_domain_level_max)
 		return -EINVAL;
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3231e1997426..506cb8147c70 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6966,7 +6966,6 @@ sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
 {									\
 	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
 	*sd = SD_##type##_INIT;						\
-	sd->level = SD_LV_##type;					\
 	SD_INIT_NAME(sd, type);						\
 	sd->private = &tl->data;					\
 	return sd;							\
@@ -6988,13 +6987,14 @@ SD_INIT_FUNC(CPU)
 #endif
 
 static int default_relax_domain_level = -1;
+int sched_domain_level_max;
 
 static int __init setup_relax_domain_level(char *str)
 {
 	unsigned long val;
 
 	val = simple_strtoul(str, NULL, 0);
-	if (val < SD_LV_MAX)
+	if (val < sched_domain_level_max)
 		default_relax_domain_level = val;
 
 	return 1;
@@ -7173,8 +7173,11 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 
 	set_domain_attribute(sd, attr);
 	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
-	if (child)
+	if (child) {
+		sd->level = child->level + 1;
+		sched_domain_level_max = max(sched_domain_level_max, sd->level);
 		child->parent = sd;
+	}
 	sd->child = child;
 
 	return sd;
-- 
cgit v1.2.3


From 127fe533ae56d7f4e7b5011869870982eba25723 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 8 Apr 2011 18:01:59 +0000
Subject: v3 ethtool: add ntuple flow specifier data to network flow classifier

This change is meant to add an ntuple data extensions to the rx network flow
classification specifiers.  The idea is to allow ntuple to be displayed via
the network flow classification interface.

The first patch had some left over stuff from the original flow extension
flags I had added.  That bit is removed in this patch.

The second had some left over comments that stated we ignored bits in the
masks when we actually match them.

This work is based on input from Ben Hutchings.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 53 +++++++++++++++++++++++++++++--------------------
 net/socket.c            | 14 ++++++-------
 2 files changed, 39 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c04d1316d221..c7eff13d2f34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -380,27 +380,42 @@ struct ethtool_usrip4_spec {
 	__u8    proto;
 };
 
+union ethtool_flow_union {
+	struct ethtool_tcpip4_spec		tcp_ip4_spec;
+	struct ethtool_tcpip4_spec		udp_ip4_spec;
+	struct ethtool_tcpip4_spec		sctp_ip4_spec;
+	struct ethtool_ah_espip4_spec		ah_ip4_spec;
+	struct ethtool_ah_espip4_spec		esp_ip4_spec;
+	struct ethtool_usrip4_spec		usr_ip4_spec;
+	struct ethhdr				ether_spec;
+	__u8					hdata[60];
+};
+
+struct ethtool_flow_ext {
+	__be16	vlan_etype;
+	__be16	vlan_tci;
+	__be32	data[2];
+};
+
 /**
  * struct ethtool_rx_flow_spec - specification for RX flow filter
  * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
  * @h_u: Flow fields to match (dependent on @flow_type)
- * @m_u: Masks for flow field bits to be ignored
+ * @h_ext: Additional fields to match
+ * @m_u: Masks for flow field bits to be matched
+ * @m_ext: Masks for additional field bits to be matched
+ *	Note, all additional fields must be ignored unless @flow_type
+ *	includes the %FLOW_EXT flag.
  * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
  *	if packets should be discarded
  * @location: Index of filter in hardware table
  */
 struct ethtool_rx_flow_spec {
 	__u32		flow_type;
-	union {
-		struct ethtool_tcpip4_spec		tcp_ip4_spec;
-		struct ethtool_tcpip4_spec		udp_ip4_spec;
-		struct ethtool_tcpip4_spec		sctp_ip4_spec;
-		struct ethtool_ah_espip4_spec		ah_ip4_spec;
-		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_usrip4_spec		usr_ip4_spec;
-		struct ethhdr				ether_spec;
-		__u8					hdata[72];
-	} h_u, m_u;
+	union ethtool_flow_union h_u;
+	struct ethtool_flow_ext h_ext;
+	union ethtool_flow_union m_u;
+	struct ethtool_flow_ext m_ext;
 	__u64		ring_cookie;
 	__u32		location;
 };
@@ -458,16 +473,10 @@ struct ethtool_rxnfc {
 
 struct compat_ethtool_rx_flow_spec {
 	u32		flow_type;
-	union {
-		struct ethtool_tcpip4_spec		tcp_ip4_spec;
-		struct ethtool_tcpip4_spec		udp_ip4_spec;
-		struct ethtool_tcpip4_spec		sctp_ip4_spec;
-		struct ethtool_ah_espip4_spec		ah_ip4_spec;
-		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_usrip4_spec		usr_ip4_spec;
-		struct ethhdr				ether_spec;
-		u8					hdata[72];
-	} h_u, m_u;
+	union ethtool_flow_union h_u;
+	struct ethtool_flow_ext h_ext;
+	union ethtool_flow_union m_u;
+	struct ethtool_flow_ext m_ext;
 	compat_u64	ring_cookie;
 	u32		location;
 };
@@ -1072,6 +1081,8 @@ struct ethtool_ops {
 #define	IPV4_FLOW	0x10	/* hash only */
 #define	IPV6_FLOW	0x11	/* hash only */
 #define	ETHER_FLOW	0x12	/* spec only (ether_spec) */
+/* Flag to enable additional fields in struct ethtool_rx_flow_spec */
+#define	FLOW_EXT	0x80000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/net/socket.c b/net/socket.c
index 5212447c86e7..575c84f2af19 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2643,13 +2643,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 		return -EFAULT;
 
 	if (convert_in) {
-		/* We expect there to be holes between fs.m_u and
+		/* We expect there to be holes between fs.m_ext and
 		 * fs.ring_cookie and at the end of fs, but nowhere else.
 		 */
-		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
-			     sizeof(compat_rxnfc->fs.m_u) !=
-			     offsetof(struct ethtool_rxnfc, fs.m_u) +
-			     sizeof(rxnfc->fs.m_u));
+		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+			     sizeof(compat_rxnfc->fs.m_ext) !=
+			     offsetof(struct ethtool_rxnfc, fs.m_ext) +
+			     sizeof(rxnfc->fs.m_ext));
 		BUILD_BUG_ON(
 			offsetof(struct compat_ethtool_rxnfc, fs.location) -
 			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
@@ -2657,7 +2657,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
 
 		if (copy_in_user(rxnfc, compat_rxnfc,
-				 (void *)(&rxnfc->fs.m_u + 1) -
+				 (void *)(&rxnfc->fs.m_ext + 1) -
 				 (void *)rxnfc) ||
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
@@ -2674,7 +2674,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 
 	if (convert_out) {
 		if (copy_in_user(compat_rxnfc, rxnfc,
-				 (const void *)(&rxnfc->fs.m_u + 1) -
+				 (const void *)(&rxnfc->fs.m_ext + 1) -
 				 (const void *)rxnfc) ||
 		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
 				 &rxnfc->fs.ring_cookie,
-- 
cgit v1.2.3


From 581a8b0feeed8877aab3a8ca4c972419790cd07f Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:27 -0700
Subject: nl80211: rename NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE

To NL80211_MESH_SETUP_IE. This reflects our ability to insert any ie
into a mesh beacon, not simply path selection ies.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  9 +++++----
 include/net/cfg80211.h     |  8 ++++----
 net/mac80211/cfg.c         | 15 +++++++--------
 net/mac80211/ieee80211_i.h |  4 ++--
 net/mac80211/mesh.c        |  6 +++---
 net/mac80211/mesh_plink.c  |  2 +-
 net/mac80211/tx.c          |  2 +-
 net/wireless/mesh.c        |  4 ++--
 net/wireless/nl80211.c     | 11 ++++++-----
 9 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 16eea7229e99..ecf6b68a96da 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -545,6 +545,7 @@ enum nl80211_commands {
 /* source-level API compatibility */
 #define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
 #define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -1719,9 +1720,9 @@ enum nl80211_meshconf_params {
  * vendor specific path metric or disable it to use the default Airtime
  * metric.
  *
- * @NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE: A vendor specific information
- * element that vendors will use to identify the path selection methods and
- * metrics in use.
+ * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a
+ * robust security network ie, or a vendor specific information element that
+ * vendors will use to identify the path selection methods and metrics in use.
  *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
@@ -1730,7 +1731,7 @@ enum nl80211_mesh_setup_params {
 	__NL80211_MESH_SETUP_INVALID,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
-	NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE,
+	NL80211_MESH_SETUP_IE,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ba7384acf4e0..1d02ddf5a8a3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -689,8 +689,8 @@ struct mesh_config {
  * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
  * @path_sel_proto: which path selection protocol to use
  * @path_metric: which metric to use
- * @vendor_ie: vendor information elements (optional)
- * @vendor_ie_len: length of vendor information elements
+ * @ie: vendor information elements (optional)
+ * @ie_len: length of vendor information elements
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -699,8 +699,8 @@ struct mesh_setup {
 	u8 mesh_id_len;
 	u8  path_sel_proto;
 	u8  path_metric;
-	const u8 *vendor_ie;
-	u8 vendor_ie_len;
+	const u8 *ie;
+	u8 ie_len;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index bf5d28da46e6..d9428afd8bf6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1034,26 +1034,25 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	u8 *new_ie;
 	const u8 *old_ie;
 
-	/* first allocate the new vendor information element */
+	/* allocate information elements */
 	new_ie = NULL;
-	old_ie = ifmsh->vendor_ie;
+	old_ie = ifmsh->ie;
 
-	ifmsh->vendor_ie_len = setup->vendor_ie_len;
-	if (setup->vendor_ie_len) {
-		new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len,
+	if (setup->ie_len) {
+		new_ie = kmemdup(setup->ie, setup->ie_len,
 				GFP_KERNEL);
 		if (!new_ie)
 			return -ENOMEM;
 	}
+	ifmsh->ie_len = setup->ie_len;
+	ifmsh->ie = new_ie;
+	kfree(old_ie);
 
 	/* now copy the rest of the setup parameters */
 	ifmsh->mesh_id_len = setup->mesh_id_len;
 	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
 	ifmsh->mesh_pp_id = setup->path_sel_proto;
 	ifmsh->mesh_pm_id = setup->path_metric;
-	ifmsh->vendor_ie = new_ie;
-
-	kfree(old_ie);
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6eb2c8523eeb..6450100594ba 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -488,8 +488,8 @@ struct ieee80211_if_mesh {
 	struct mesh_config mshcfg;
 	u32 mesh_seqnum;
 	bool accepting_plinks;
-	const u8 *vendor_ie;
-	u8 vendor_ie_len;
+	const u8 *ie;
+	u8 ie_len;
 };
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 2a57cc02c618..1c244c0c7664 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -279,9 +279,9 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
 
-	if (sdata->u.mesh.vendor_ie) {
-		int len = sdata->u.mesh.vendor_ie_len;
-		const u8 *data = sdata->u.mesh.vendor_ie;
+	if (sdata->u.mesh.ie) {
+		int len = sdata->u.mesh.ie_len;
+		const u8 *data = sdata->u.mesh.ie;
 		if (skb_tailroom(skb) > len)
 			memcpy(skb_put(skb, len), data, len);
 	}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 44b53931ba5e..c705b20e1acb 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -161,7 +161,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		__le16 reason) {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
-			sdata->u.mesh.vendor_ie_len);
+			sdata->u.mesh.ie_len);
 	struct ieee80211_mgmt *mgmt;
 	bool include_plid = false;
 	static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ce4596ed1268..17b10be31f55 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2262,7 +2262,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400 +
-				sdata->u.mesh.vendor_ie_len);
+				sdata->u.mesh.ie_len);
 		if (!skb)
 			goto out;
 
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 73e39c171ffb..0d4b2260f96f 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -53,8 +53,8 @@ const struct mesh_config default_mesh_config = {
 const struct mesh_setup default_mesh_setup = {
 	.path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
 	.path_metric = IEEE80211_PATH_METRIC_AIRTIME,
-	.vendor_ie = NULL,
-	.vendor_ie_len = 0,
+	.ie = NULL,
+	.ie_len = 0,
 };
 
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 297d7ce4117b..ccd825a5857e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2823,7 +2823,7 @@ static const struct nla_policy
 	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
-	[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY,
+	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
 };
 
@@ -2925,13 +2925,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		 IEEE80211_PATH_METRIC_VENDOR :
 		 IEEE80211_PATH_METRIC_AIRTIME;
 
-	if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) {
+
+	if (tb[NL80211_MESH_SETUP_IE]) {
 		struct nlattr *ieattr =
-			tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE];
+			tb[NL80211_MESH_SETUP_IE];
 		if (!is_valid_ie_attr(ieattr))
 			return -EINVAL;
-		setup->vendor_ie = nla_data(ieattr);
-		setup->vendor_ie_len = nla_len(ieattr);
+		setup->ie = nla_data(ieattr);
+		setup->ie_len = nla_len(ieattr);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 15d5dda623139bbf6165030fc251bbd5798f4130 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:28 -0700
Subject: cfg80211/nl80211: Add userspace authentication flag to mesh setup

During mesh setup, use NL80211_MESH_SETUP_USERSPACE_AUTH flag to create
a secure mesh and route management frames to userspace.

Also, NL80211_CMD_GET_WIPHY now returns a flag NL80211_SUPPORT_MESH_AUTH
if the wiphy's mesh implementation supports routing of mesh auth frames
to userspace.  This is useful for forward compatibility between old
kernels and new userspace tools.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 9 +++++++++
 include/net/cfg80211.h  | 5 +++++
 net/wireless/mesh.c     | 4 ++++
 net/wireless/nl80211.c  | 5 +++++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ecf6b68a96da..0e652d860819 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -887,6 +887,9 @@ enum nl80211_commands {
  *	changed once the mesh is active.
  * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute
  *	containing attributes from &enum nl80211_meshconf_params.
+ * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
+ *	allows auth frames in a mesh to be passed to userspace for processing via
+ *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1075,6 +1078,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 
+	NL80211_ATTR_SUPPORT_MESH_AUTH,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1724,6 +1729,9 @@ enum nl80211_meshconf_params {
  * robust security network ie, or a vendor specific information element that
  * vendors will use to identify the path selection methods and metrics in use.
  *
+ * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
+ * daemon will be authenticating mesh candidates.
+ *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
  */
@@ -1732,6 +1740,7 @@ enum nl80211_mesh_setup_params {
 	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
 	NL80211_MESH_SETUP_IE,
+	NL80211_MESH_SETUP_USERSPACE_AUTH,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1d02ddf5a8a3..e77603bd1630 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -691,6 +691,7 @@ struct mesh_config {
  * @path_metric: which metric to use
  * @ie: vendor information elements (optional)
  * @ie_len: length of vendor information elements
+ * @is_secure: or not
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -701,6 +702,7 @@ struct mesh_setup {
 	u8  path_metric;
 	const u8 *ie;
 	u8 ie_len;
+	bool is_secure;
 };
 
 /**
@@ -1451,6 +1453,8 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS: The device supports separate
  *	unicast and multicast TX keys.
+ * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
+ *	auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1463,6 +1467,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS= BIT(9),
+	WIPHY_FLAG_MESH_AUTH			= BIT(10),
 };
 
 struct mac_address {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 0d4b2260f96f..0e5c122ce324 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -72,6 +72,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+	      setup->is_secure)
+		return -EOPNOTSUPP;
+
 	if (wdev->mesh_id_len)
 		return -EALREADY;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ccd825a5857e..cbedfc2a42a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -124,6 +124,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
 	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
+	[NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
 
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
@@ -594,6 +595,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
+	if (dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_MESH_AUTH);
 
 	NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
 		sizeof(u32) * dev->wiphy.n_cipher_suites,
@@ -2823,6 +2826,7 @@ static const struct nla_policy
 	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
 };
@@ -2934,6 +2938,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
+	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
 
 	return 0;
 }
-- 
cgit v1.2.3


From b39c48fac1fc915a5dcd024bf6e9aabc855ed591 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:30 -0700
Subject: nl80211/mac80211: let userspace authenticate stations

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 net/mac80211/cfg.c      | 6 ++++++
 net/wireless/nl80211.c  | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0e652d860819..5ec4ac3a0ef4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1174,6 +1174,7 @@ enum nl80211_iftype {
  *	with short barker preamble
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
  * @NL80211_STA_FLAG_MFP: station uses management frame protection
+ * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
  * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
  * @__NL80211_STA_FLAG_AFTER_LAST: internal use
  */
@@ -1183,6 +1184,7 @@ enum nl80211_sta_flags {
 	NL80211_STA_FLAG_SHORT_PREAMBLE,
 	NL80211_STA_FLAG_WME,
 	NL80211_STA_FLAG_MFP,
+	NL80211_STA_FLAG_AUTHENTICATED,
 
 	/* keep last */
 	__NL80211_STA_FLAG_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dc623d884d02..1c25723eacda 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -686,6 +686,12 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		if (set & BIT(NL80211_STA_FLAG_MFP))
 			sta->flags |= WLAN_STA_MFP;
 	}
+
+	if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) {
+		sta->flags &= ~WLAN_STA_AUTH;
+		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
+			sta->flags |= WLAN_STA_AUTH;
+	}
 	spin_unlock_irqrestore(&sta->flaglock, flags);
 
 	/*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cbedfc2a42a2..ce29a0d0e88e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1925,6 +1925,7 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
 	[NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
 	[NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
 	[NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG },
+	[NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG },
 };
 
 static int parse_station_flags(struct genl_info *info,
@@ -2284,7 +2285,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			err = -EINVAL;
 		if (params.supported_rates)
 			err = -EINVAL;
-		if (params.sta_flags_mask)
+		if (params.sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
 			err = -EINVAL;
 		break;
 	default:
-- 
cgit v1.2.3


From c93b5e717ec47b57abfe0229360bc11e77520984 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 7 Apr 2011 15:08:34 -0700
Subject: nl80211: New notification to discover mesh peer candidates.

Notify userspace when a beacon/presp is received from a suitable mesh
peer candidate for whom no sta information exists.  Userspace can then
decide to create a sta info for the candidate.  If userspace is not
ready to authenticate the peer right away, it can create the sta info
with the authenticated flag unset and set it later.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 ++++++++++++
 include/net/cfg80211.h  | 16 ++++++++++++++++
 net/wireless/mesh.c     | 14 ++++++++++++++
 net/wireless/nl80211.c  | 38 ++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  4 ++++
 5 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5ec4ac3a0ef4..b87481866dde 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -410,6 +410,16 @@
  *	notification. This event is used to indicate that an unprotected
  *	disassociation frame was dropped when MFP is in use.
  *
+ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a
+ *      beacon or probe response from a compatible mesh peer.  This is only
+ *      sent while no station information (sta_info) exists for the new peer
+ *      candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH is set.  On
+ *      reception of this notification, userspace may decide to create a new
+ *      station (@NL80211_CMD_NEW_STATION).  To stop this notification from
+ *      reoccurring, the userspace authentication daemon may want to create the
+ *      new station with the AUTHENTICATED flag unset and maybe change it later
+ *      depending on the authentication result.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -522,6 +532,8 @@ enum nl80211_commands {
 	NL80211_CMD_UNPROT_DEAUTHENTICATE,
 	NL80211_CMD_UNPROT_DISASSOCIATE,
 
+	NL80211_CMD_NEW_PEER_CANDIDATE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e77603bd1630..f40cd30847de 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2488,6 +2488,22 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
  */
 void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp);
 
+/**
+ * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate
+ *
+ * @dev: network device
+ * @macaddr: the MAC address of the new candidate
+ * @ie: information elements advertised by the peer candidate
+ * @ie_len: lenght of the information elements buffer
+ * @gfp: allocation flags
+ *
+ * This function notifies cfg80211 that the mesh peer candidate has been
+ * detected, most likely via a beacon or, less likely, via a probe response.
+ * cfg80211 then sends a notification to userspace.
+ */
+void cfg80211_notify_new_peer_candidate(struct net_device *dev,
+		const u8 *macaddr, const u8 *ie, u8 ie_len, gfp_t gfp);
+
 /**
  * DOC: RFkill integration
  *
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index e0226e8265a3..5c116083eeca 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,5 +1,6 @@
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
+#include "nl80211.h"
 #include "core.h"
 
 /* Default values, timeouts in ms */
@@ -110,6 +111,19 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
+void cfg80211_notify_new_peer_candidate(struct net_device *dev,
+		const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
+		return;
+
+	nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
+			macaddr, ie, ie_len, gfp);
+}
+EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
+
 static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 				 struct net_device *dev)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f4cb8efe2e5f..58f501a35022 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5818,6 +5818,44 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
+		struct net_device *netdev,
+		const u8 *macaddr, const u8* ie, u8 ie_len,
+		gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NEW_PEER_CANDIDATE);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr);
+	if (ie_len && ie)
+		NLA_PUT(msg, NL80211_ATTR_IE, ie_len , ie);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *addr,
 				 enum nl80211_key_type key_type, int key_id,
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index dcac5cd6f017..f2af6955a665 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -50,6 +50,10 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
 			       const u8 *ie, size_t ie_len, bool from_ap);
 
+void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev,
+				     const u8 *macaddr, const u8* ie, u8 ie_len,
+				     gfp_t gfp);
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
-- 
cgit v1.2.3


From ebe27c91af8b7f4810ae906fbd3eeb2d87850026 Mon Sep 17 00:00:00 2001
From: Mohammed Shafi Shajakhan <mshajakhan@atheros.com>
Date: Fri, 8 Apr 2011 21:24:24 +0530
Subject: {mac|nl}80211: Add station connected time

Add station connected time in debugfs. This will be helpful to get a
measure of stability of the connection and for debugging stress issues

Cc: Senthilkumar Balasubramanian <Senthilkumar.Balasubramanian@Atheros.com>
Signed-off-by: Mohammed Shafi Shajakhan <mshajakhan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  2 ++
 include/net/cfg80211.h     |  4 ++++
 net/mac80211/cfg.c         |  7 ++++++-
 net/mac80211/debugfs_sta.c | 26 ++++++++++++++++++++++++++
 net/mac80211/sta_info.c    |  3 +++
 net/mac80211/sta_info.h    |  2 ++
 net/wireless/nl80211.c     |  3 +++
 7 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b87481866dde..be8df57b789d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1297,6 +1297,7 @@ enum nl80211_sta_bss_param {
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
  * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
  *     containing info as possible, see &enum nl80211_sta_bss_param
+ * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1317,6 +1318,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_RX_BITRATE,
 	NL80211_STA_INFO_BSS_PARAM,
+	NL80211_STA_INFO_CONNECTED_TIME,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f40cd30847de..d30eada7c6cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -423,6 +423,7 @@ struct station_parameters {
  * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
  * @STATION_INFO_BSS_PARAM: @bss_param filled
+ * @STATION_INFO_CONNECTED_TIME: @connected_time filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -441,6 +442,7 @@ enum station_info_flags {
 	STATION_INFO_SIGNAL_AVG		= 1<<13,
 	STATION_INFO_RX_BITRATE		= 1<<14,
 	STATION_INFO_BSS_PARAM          = 1<<15,
+	STATION_INFO_CONNECTED_TIME	= 1<<16
 };
 
 /**
@@ -511,6 +513,7 @@ struct sta_bss_parameters {
  * Station information filled by driver for get_station() and dump_station.
  *
  * @filled: bitflag of flags from &enum station_info_flags
+ * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
  * @rx_bytes: bytes received from this station
  * @tx_bytes: bytes transmitted to this station
@@ -533,6 +536,7 @@ struct sta_bss_parameters {
  */
 struct station_info {
 	u32 filled;
+	u32 connected_time;
 	u32 inactive_time;
 	u32 rx_bytes;
 	u32 tx_bytes;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1c25723eacda..a6d191f2a0fe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -330,6 +330,7 @@ static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, in
 static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct timespec uptime;
 
 	sinfo->generation = sdata->local->sta_generation;
 
@@ -343,7 +344,11 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_BITRATE |
 			STATION_INFO_RX_BITRATE |
 			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_BSS_PARAM;
+			STATION_INFO_BSS_PARAM |
+			STATION_INFO_CONNECTED_TIME;
+
+	do_posix_clock_monotonic_gettime(&uptime);
+	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c04a1396cf8d..c008232731eb 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -92,6 +92,31 @@ static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(inactive_ms);
 
+
+static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
+					size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct timespec uptime;
+	struct tm result;
+	long connected_time_secs;
+	char buf[100];
+	int res;
+	do_posix_clock_monotonic_gettime(&uptime);
+	connected_time_secs = uptime.tv_sec - sta->last_connected;
+	time_to_tm(connected_time_secs, 0, &result);
+	result.tm_year -= 70;
+	result.tm_mday -= 1;
+	res = scnprintf(buf, sizeof(buf),
+		"years  - %d\nmonths - %d\ndays   - %d\nclock  - %d:%d:%d\n\n",
+			result.tm_year, result.tm_mon, result.tm_mday,
+			result.tm_hour, result.tm_min, result.tm_sec);
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+STA_OPS(connected_time);
+
+
+
 static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
 				      size_t count, loff_t *ppos)
 {
@@ -324,6 +349,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(flags);
 	DEBUGFS_ADD(num_ps_buf_frames);
 	DEBUGFS_ADD(inactive_ms);
+	DEBUGFS_ADD(connected_time);
 	DEBUGFS_ADD(last_seq_ctrl);
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(dev);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 999f8fbf0b4b..8a9068ac0673 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -228,6 +228,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
+	struct timespec uptime;
 	int i;
 
 	sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
@@ -245,6 +246,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->sdata = sdata;
 	sta->last_rx = jiffies;
 
+	do_posix_clock_monotonic_gettime(&uptime);
+	sta->last_connected = uptime.tv_sec;
 	ewma_init(&sta->avg_signal, 1024, 8);
 
 	if (sta_prepare_rate_control(local, sta, gfp)) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 43238e99cfb3..984a03db3553 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -226,6 +226,7 @@ enum plink_state {
  * @rx_bytes: Number of bytes received from this STA
  * @wep_weak_iv_count: number of weak WEP IVs received from this station
  * @last_rx: time (in jiffies) when last frame was received from this STA
+ * @last_connected: time (in seconds) when a station got connected
  * @num_duplicates: number of duplicate frames received from this STA
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
@@ -295,6 +296,7 @@ struct sta_info {
 	unsigned long rx_packets, rx_bytes;
 	unsigned long wep_weak_iv_count;
 	unsigned long last_rx;
+	long last_connected;
 	unsigned long num_duplicates;
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58f501a35022..0efa7fd01150 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2020,6 +2020,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
+	if (sinfo->filled & STATION_INFO_CONNECTED_TIME)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_CONNECTED_TIME,
+			    sinfo->connected_time);
 	if (sinfo->filled & STATION_INFO_INACTIVE_TIME)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_INACTIVE_TIME,
 			    sinfo->inactive_time);
-- 
cgit v1.2.3


From bcc6d47903612c3861201cc3a866fb604f26b8b2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 7 Apr 2011 19:48:33 +0000
Subject: net: vlan: make non-hw-accel rx path similar to hw-accel

Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.

For non-rx-vlan-hw-accel however, tagged skb goes thru whole
__netif_receive_skb, it's untagged in ptype_base hander and reinjected

This incosistency is fixed by this patch. Vlan untagging happens early in
__netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
see the skb like it was untagged by hw.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

v1->v2:
	remove "inline" from vlan_core.c functions
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  10 ++-
 net/8021q/vlan.c        |   8 ---
 net/8021q/vlan.h        |   2 -
 net/8021q/vlan_core.c   |  85 +++++++++++++++++++++++-
 net/8021q/vlan_dev.c    | 173 ------------------------------------------------
 net/core/dev.c          |   8 ++-
 6 files changed, 99 insertions(+), 187 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 635e1faec412..998b29930b80 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -132,7 +132,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern bool vlan_hwaccel_do_receive(struct sk_buff **skb);
+extern bool vlan_do_receive(struct sk_buff **skb);
+extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -166,13 +167,18 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
+static inline bool vlan_do_receive(struct sk_buff **skb)
 {
 	if ((*skb)->vlan_tci & VLAN_VID_MASK)
 		(*skb)->pkt_type = PACKET_OTHERHOST;
 	return false;
 }
 
+inline struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	return skb;
+}
+
 static inline gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e47600b4e2e3..14ef5efbc653 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -49,11 +49,6 @@ const char vlan_version[] = DRV_VERSION;
 static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
 static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
 
-static struct packet_type vlan_packet_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_8021Q),
-	.func = vlan_skb_recv, /* VLAN receive method */
-};
-
 /* End of global variables definitions. */
 
 static void vlan_group_free(struct vlan_group *grp)
@@ -684,7 +679,6 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err4;
 
-	dev_add_pack(&vlan_packet_type);
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
@@ -705,8 +699,6 @@ static void __exit vlan_cleanup_module(void)
 
 	unregister_netdevice_notifier(&vlan_notifier_block);
 
-	dev_remove_pack(&vlan_packet_type);
-
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5687c9b95f33..c3408def8a19 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -75,8 +75,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 }
 
 /* found in vlan_dev.c */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev);
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index ce8e3ab3e7a5..41495dc2a4c9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,7 +4,7 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -88,3 +88,86 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	return napi_gro_frags(napi);
 }
 EXPORT_SYMBOL(vlan_gro_frags);
+
+static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+{
+	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+		if (skb_cow(skb, skb_headroom(skb)) < 0)
+			skb = NULL;
+		if (skb) {
+			/* Lifted from Gleb's VLAN code... */
+			memmove(skb->data - ETH_HLEN,
+				skb->data - VLAN_ETH_HLEN, 12);
+			skb->mac_header += VLAN_HLEN;
+		}
+	}
+	return skb;
+}
+
+static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *) rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
+
+struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	struct vlan_hdr *vhdr;
+	u16 vlan_tci;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		/* vlan_tci is already set-up so leave this for another time */
+		return skb;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto err_free;
+
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
+
+	vhdr = (struct vlan_hdr *) skb->data;
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	__vlan_hwaccel_put_tag(skb, vlan_tci);
+
+	skb_pull_rcsum(skb, VLAN_HLEN);
+	vlan_set_encap_proto(skb, vhdr);
+
+	skb = vlan_check_reorder_header(skb);
+	if (unlikely(!skb))
+		goto err_free;
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b84a46b30c0c..d174c312b7f1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -65,179 +65,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
-{
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
-
-	return skb;
-}
-
-static inline void vlan_set_encap_proto(struct sk_buff *skb,
-		struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *)rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
-/*
- *	Determine the packet's protocol ID. The rule here is that we
- *	assume 802.3 if the type field is short enough to be a length.
- *	This is normal practice and works for any 'now in use' protocol.
- *
- *  Also, at this point we assume that we ARE dealing exclusively with
- *  VLAN packets, or packets that should be made into VLAN packets based
- *  on a default VLAN ID.
- *
- *  NOTE:  Should be similar to ethernet/eth.c.
- *
- *  SANITY NOTE:  This method is called when a packet is moving up the stack
- *                towards userland.  To get here, it would have already passed
- *                through the ethernet/eth.c eth_type_trans() method.
- *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
- *                 stored UNALIGNED in the memory.  RISC systems don't like
- *                 such cases very much...
- *  SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
- *  		    aligned, so there doesn't need to be any of the unaligned
- *  		    stuff.  It has been commented out now...  --Ben
- *
- */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev)
-{
-	struct vlan_hdr *vhdr;
-	struct vlan_pcpu_stats *rx_stats;
-	struct net_device *vlan_dev;
-	u16 vlan_id;
-	u16 vlan_tci;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (skb == NULL)
-		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
-		goto err_free;
-
-	vhdr = (struct vlan_hdr *)skb->data;
-	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-
-	rcu_read_lock();
-	vlan_dev = vlan_find_dev(dev, vlan_id);
-
-	/* If the VLAN device is defined, we use it.
-	 * If not, and the VID is 0, it is a 802.1p packet (not
-	 * really a VLAN), so we will just netif_rx it later to the
-	 * original interface, but with the skb->proto set to the
-	 * wrapped proto: we do nothing here.
-	 */
-
-	if (!vlan_dev) {
-		if (vlan_id) {
-			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-				 __func__, vlan_id, dev->name);
-			goto err_unlock;
-		}
-		rx_stats = NULL;
-	} else {
-		skb->dev = vlan_dev;
-
-		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
-
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;
-		rx_stats->rx_bytes += skb->len;
-
-		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
-
-		pr_debug("%s: priority: %u for TCI: %hu\n",
-			 __func__, skb->priority, vlan_tci);
-
-		switch (skb->pkt_type) {
-		case PACKET_BROADCAST:
-			/* Yeah, stats collect these together.. */
-			/* stats->broadcast ++; // no such counter :-( */
-			break;
-
-		case PACKET_MULTICAST:
-			rx_stats->rx_multicast++;
-			break;
-
-		case PACKET_OTHERHOST:
-			/* Our lower layer thinks this is not local, let's make
-			 * sure.
-			 * This allows the VLAN to have a different MAC than the
-			 * underlying device, and still route correctly.
-			 */
-			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-						skb->dev->dev_addr))
-				skb->pkt_type = PACKET_HOST;
-			break;
-		default:
-			break;
-		}
-		u64_stats_update_end(&rx_stats->syncp);
-	}
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-	vlan_set_encap_proto(skb, vhdr);
-
-	if (vlan_dev) {
-		skb = vlan_check_reorder_header(skb);
-		if (!skb) {
-			rx_stats->rx_errors++;
-			goto err_unlock;
-		}
-	}
-
-	netif_rx(skb);
-
-	rcu_read_unlock();
-	return NET_RX_SUCCESS;
-
-err_unlock:
-	rcu_read_unlock();
-err_free:
-	atomic_long_inc(&dev->rx_dropped);
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
 static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 95897ff3a76f..d1aebf7c6494 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3130,6 +3130,12 @@ another_round:
 
 	__this_cpu_inc(softnet_data.processed);
 
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3177,7 +3183,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_hwaccel_do_receive(&skb)) {
+		if (vlan_do_receive(&skb)) {
 			ret = __netif_receive_skb(skb);
 			goto out;
 		} else if (unlikely(!skb))
-- 
cgit v1.2.3


From 1aac62671686e6234c91b5f6fc4caaa850419d5d Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 12 Apr 2011 04:07:39 +0000
Subject: net: vlan_features comment clarification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 09d262415769..cb8178ab3c52 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1035,7 +1035,7 @@ struct net_device {
 	u32			hw_features;
 	/* user-requested features */
 	u32			wanted_features;
-	/* VLAN feature mask */
+	/* mask of features inheritable by VLAN devices */
 	u32			vlan_features;
 
 	/* Net device feature bits; if you change something,
-- 
cgit v1.2.3


From 6139e75f4a413bdc8f366fc11e437347be8abc59 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 12 Apr 2011 19:27:51 -0700
Subject: net: Missing 'inline' in vlan-disabled vlan_untag()

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 998b29930b80..546d9d35fbd4 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -174,7 +174,7 @@ static inline bool vlan_do_receive(struct sk_buff **skb)
 	return false;
 }
 
-inline struct sk_buff *vlan_untag(struct sk_buff *skb)
+static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	return skb;
 }
-- 
cgit v1.2.3


From edb2fd9524cc2a55fb5a2e878b6e4e83f9e63fd0 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Wed, 13 Apr 2011 09:45:45 +0200
Subject: include/linux/leds-regulator.h: fix syntax in example code

Fix struct field initializer syntax in some example code from a comment,
this will make copying and pasting the code more straightforward.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/leds-regulator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-regulator.h b/include/linux/leds-regulator.h
index 5a8eb389aab8..e2337a8c90b0 100644
--- a/include/linux/leds-regulator.h
+++ b/include/linux/leds-regulator.h
@@ -16,7 +16,7 @@
  * Use "vled" as supply id when declaring the regulator consumer:
  *
  * static struct regulator_consumer_supply pcap_regulator_VVIB_consumers [] = {
- * 	{ .dev_name = "leds-regulator.0", supply = "vled" },
+ * 	{ .dev_name = "leds-regulator.0", .supply = "vled" },
  * };
  *
  * If you have several regulator driven LEDs, you can append a numerical id to
-- 
cgit v1.2.3


From 91eb7c08c6cb3b8eeba1c61f5753c56dcb77f018 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 13 Apr 2011 13:51:38 +0200
Subject: netfilter: ipset: SCTP, UDPLITE support added

SCTP and UDPLITE port support added to the hash:*port* set types.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/ipset/ip_set_getport.h |  2 ++
 net/netfilter/ipset/ip_set_getport.c           | 16 +++++++++++++++-
 net/netfilter/ipset/ip_set_hash_ipport.c       |  2 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c     |  2 +-
 net/netfilter/ipset/ip_set_hash_ipportnet.c    |  2 +-
 net/netfilter/ipset/ip_set_hash_netport.c      |  2 +-
 6 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index 5aebd170f899..90d09300e954 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -22,7 +22,9 @@ static inline bool ip_set_proto_with_ports(u8 proto)
 {
 	switch (proto) {
 	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
 	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
 		return true;
 	}
 	return false;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 8d5227212686..757143b2240a 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
+#include <linux/sctp.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -35,7 +36,20 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		*port = src ? th->source : th->dest;
 		break;
 	}
-	case IPPROTO_UDP: {
+	case IPPROTO_SCTP: {
+		sctp_sctphdr_t _sh;
+		const sctp_sctphdr_t *sh;
+
+		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
+		if (sh == NULL)
+			/* No choice either */
+			return false;
+
+		*port = src ? sh->source : sh->dest;
+		break;
+	}
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE: {
 		struct udphdr _udph;
 		const struct udphdr *uh;
 
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index b9214145d357..14281b6b8074 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -491,7 +491,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 4642872df6e1..401c8a2531db 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -509,7 +509,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2cb84a54b7ad..4743e5402522 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -574,7 +574,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 8598676f2a05..d2a40362dd3a 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -526,7 +526,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= AF_UNSPEC,
-	.revision	= 0,
+	.revision	= 1,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
-- 
cgit v1.2.3


From 8b5933c380fc66a6311739f9b36a812383f82141 Mon Sep 17 00:00:00 2001
From: amit salecha <amit.salecha@qlogic.com>
Date: Thu, 7 Apr 2011 01:58:42 +0000
Subject: net: ethtool support to configure number of channels

Ethtool support to configure RX, TX and other channels. combined field
in struct ethtool_channels to reflect set of channel (RX, TX or other).
Other channel can be link interrupts, SR-IOV coordination etc.

ETHTOOL_GCHANNELS will report max and current number of RX channels,
max and current number of TX channels, max and current number of other channel
or max and current number of combined channel.

Number of channel can be modify upto max number of channel through
ETHTOOL_SCHANNELS command.

Ben Hutchings:
o define 'combined' and 'other' types.  Most multiqueue drivers pair up RX and TX
  queues so that most channels combine RX and TX work.
o Please could you use a kernel-doc comment to describe the structure.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 36 ++++++++++++++++++++++++++++++++++++
 net/core/ethtool.c      | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 12cfbd0be2ee..ad22a68c2e5d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -229,6 +229,34 @@ struct ethtool_ringparam {
 	__u32	tx_pending;
 };
 
+/**
+ * struct ethtool_channels - configuring number of network channel
+ * @cmd: ETHTOOL_{G,S}CHANNELS
+ * @max_rx: Read only. Maximum number of receive channel the driver support.
+ * @max_tx: Read only. Maximum number of transmit channel the driver support.
+ * @max_other: Read only. Maximum number of other channel the driver support.
+ * @max_combined: Read only. Maximum number of combined channel the driver
+ *	support. Set of queues RX, TX or other.
+ * @rx_count: Valid values are in the range 1 to the max_rx.
+ * @tx_count: Valid values are in the range 1 to the max_tx.
+ * @other_count: Valid values are in the range 1 to the max_other.
+ * @combined_count: Valid values are in the range 1 to the max_combined.
+ *
+ * This can be used to configure RX, TX and other channels.
+ */
+
+struct ethtool_channels {
+	__u32	cmd;
+	__u32	max_rx;
+	__u32	max_tx;
+	__u32	max_other;
+	__u32	max_combined;
+	__u32	rx_count;
+	__u32	tx_count;
+	__u32	other_count;
+	__u32	combined_count;
+};
+
 /* for configuring link flow control parameters */
 struct ethtool_pauseparam {
 	__u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
@@ -818,6 +846,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Returns a negative error code or zero.
+ * @get_channels: Get number of channels.
+ * @set_channels: Set number of channels.  Returns a negative error code or
+ *	zero.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -891,6 +922,9 @@ struct ethtool_ops {
 				  struct ethtool_rxfh_indir *);
 	int	(*set_rxfh_indir)(struct net_device *,
 				  const struct ethtool_rxfh_indir *);
+	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
+	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
+
 };
 #endif /* __KERNEL__ */
 
@@ -959,6 +993,8 @@ struct ethtool_ops {
 
 #define ETHTOOL_GFEATURES	0x0000003a /* Get device offload settings */
 #define ETHTOOL_SFEATURES	0x0000003b /* Change device offload settings */
+#define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
+#define ETHTOOL_SCHANNELS	0x0000003d /* Set no of channels */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 43ef09fedd6e..41dee2de13ad 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1446,6 +1446,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
 }
 
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+	if (!dev->ethtool_ops->get_channels)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_channels(dev, &channels);
+
+	if (copy_to_user(useraddr, &channels, sizeof(channels)))
+		return -EFAULT;
+	return 0;
+}
+
+static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
+						   void __user *useraddr)
+{
+	struct ethtool_channels channels;
+
+	if (!dev->ethtool_ops->set_channels)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&channels, useraddr, sizeof(channels)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_channels(dev, &channels);
+}
+
 static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
@@ -2007,6 +2036,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
 		break;
+	case ETHTOOL_GCHANNELS:
+		rc = ethtool_get_channels(dev, useraddr);
+		break;
+	case ETHTOOL_SCHANNELS:
+		rc = ethtool_set_channels(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 4d42d417be75d750b82798922b6e775915e11bce Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 13 Apr 2011 14:48:55 -0700
Subject: rndis_host: Poll status before control channel where necessary

Some RNDIS devices don't respond on the control channel until polled
on the status channel.  In particular, this was reported to be the
case for the 2Wire HomePortal 1000SW and for some Windows Mobile
devices.

This is roughly based on a patch by John Carr <john.carr@unrouted.co.uk>
which is currently applied by Mandriva.

Reported-by: Mark Glassberg <vzeeaxwl@myfairpoint.net>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/rndis_host.c   | 39 ++++++++++++++++++++++++++++++++-------
 include/linux/usb/rndis_host.h |  2 ++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 5994a25c56ac..6d6c1da68a36 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -104,8 +104,10 @@ static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg,
 int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 {
 	struct cdc_state	*info = (void *) &dev->data;
+	struct usb_cdc_notification notification;
 	int			master_ifnum;
 	int			retval;
+	int			partial;
 	unsigned		count;
 	__le32			rsp;
 	u32			xid = 0, msg_len, request_id;
@@ -133,13 +135,20 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 	if (unlikely(retval < 0 || xid == 0))
 		return retval;
 
-	// FIXME Seems like some devices discard responses when
-	// we time out and cancel our "get response" requests...
-	// so, this is fragile.  Probably need to poll for status.
+	/* Some devices don't respond on the control channel until
+	 * polled on the status channel, so do that first. */
+	if (dev->driver_info->data & RNDIS_DRIVER_DATA_POLL_STATUS) {
+		retval = usb_interrupt_msg(
+			dev->udev,
+			usb_rcvintpipe(dev->udev,
+				       dev->status->desc.bEndpointAddress),
+			&notification, sizeof(notification), &partial,
+			RNDIS_CONTROL_TIMEOUT_MS);
+		if (unlikely(retval < 0))
+			return retval;
+	}
 
-	/* ignore status endpoint, just poll the control channel;
-	 * the request probably completed immediately
-	 */
+	/* Poll the control channel; the request probably completed immediately */
 	rsp = buf->msg_type | RNDIS_MSG_COMPLETION;
 	for (count = 0; count < 10; count++) {
 		memset(buf, 0, CONTROL_BUFFER_SIZE);
@@ -581,17 +590,33 @@ static const struct driver_info	rndis_info = {
 	.tx_fixup =	rndis_tx_fixup,
 };
 
+static const struct driver_info	rndis_poll_status_info = {
+	.description =	"RNDIS device (poll status before control)",
+	.flags =	FLAG_ETHER | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+	.data =		RNDIS_DRIVER_DATA_POLL_STATUS,
+	.bind =		rndis_bind,
+	.unbind =	rndis_unbind,
+	.status =	rndis_status,
+	.rx_fixup =	rndis_rx_fixup,
+	.tx_fixup =	rndis_tx_fixup,
+};
+
 /*-------------------------------------------------------------------------*/
 
 static const struct usb_device_id	products [] = {
 {
+	/* 2Wire HomePortal 1000SW */
+	USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042,
+				      USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
+	.driver_info = (unsigned long) &rndis_poll_status_info,
+}, {
 	/* RNDIS is MSFT's un-official variant of CDC ACM */
 	USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
 	.driver_info = (unsigned long) &rndis_info,
 }, {
 	/* "ActiveSync" is an undocumented variant of RNDIS, used in WM5 */
 	USB_INTERFACE_INFO(USB_CLASS_MISC, 1, 1),
-	.driver_info = (unsigned long) &rndis_info,
+	.driver_info = (unsigned long) &rndis_poll_status_info,
 }, {
 	/* RNDIS for tethering */
 	USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3),
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 05ef52861988..88fceb718c77 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -256,6 +256,8 @@ struct rndis_keepalive_c {	/* IN (optionally OUT) */
 #define FLAG_RNDIS_PHYM_NOT_WIRELESS	0x0001
 #define FLAG_RNDIS_PHYM_WIRELESS	0x0002
 
+/* Flags for driver_info::data */
+#define RNDIS_DRIVER_DATA_POLL_STATUS	1	/* poll status before control */
 
 extern void rndis_status(struct usbnet *dev, struct urb *urb);
 extern int
-- 
cgit v1.2.3


From f1407d5c66240b33d11a7f1a41d55ccf6a9d7647 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 4 Apr 2011 13:44:59 +0900
Subject: usb: renesas_usbhs: Add Renesas USBHS common code

Renesas SuperH has USBHS IP which can switch Host / Function.
This driver is designed so that Host / Function may dynamically change.
This patch add usb/renesas_usbhs and common code for SuperH USBHS.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/Makefile                   |   1 +
 drivers/usb/Kconfig                |   2 +
 drivers/usb/renesas_usbhs/Kconfig  |  15 +
 drivers/usb/renesas_usbhs/Makefile |   7 +
 drivers/usb/renesas_usbhs/common.c | 394 +++++++++++++++++
 drivers/usb/renesas_usbhs/common.h | 225 ++++++++++
 drivers/usb/renesas_usbhs/mod.c    | 261 +++++++++++
 drivers/usb/renesas_usbhs/mod.h    | 106 +++++
 drivers/usb/renesas_usbhs/pipe.c   | 880 +++++++++++++++++++++++++++++++++++++
 drivers/usb/renesas_usbhs/pipe.h   | 105 +++++
 include/linux/usb/renesas_usbhs.h  | 149 +++++++
 11 files changed, 2145 insertions(+)
 create mode 100644 drivers/usb/renesas_usbhs/Kconfig
 create mode 100644 drivers/usb/renesas_usbhs/Makefile
 create mode 100644 drivers/usb/renesas_usbhs/common.c
 create mode 100644 drivers/usb/renesas_usbhs/common.h
 create mode 100644 drivers/usb/renesas_usbhs/mod.c
 create mode 100644 drivers/usb/renesas_usbhs/mod.h
 create mode 100644 drivers/usb/renesas_usbhs/pipe.c
 create mode 100644 drivers/usb/renesas_usbhs/pipe.h
 create mode 100644 include/linux/usb/renesas_usbhs.h

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 3f135b6fb014..ad67b7d4c271 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_UWB)		+= uwb/
 obj-$(CONFIG_USB_OTG_UTILS)	+= usb/otg/
 obj-$(CONFIG_USB)		+= usb/
 obj-$(CONFIG_USB_MUSB_HDRC)	+= usb/musb/
+obj-$(CONFIG_USB_RENESAS_USBHS)	+= usb/renesas_usbhs/
 obj-$(CONFIG_PCI)		+= usb/
 obj-$(CONFIG_USB_GADGET)	+= usb/gadget/
 obj-$(CONFIG_SERIO)		+= input/serio/
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 41b6e51188e4..d299906e4f00 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -115,6 +115,8 @@ source "drivers/usb/host/Kconfig"
 
 source "drivers/usb/musb/Kconfig"
 
+source "drivers/usb/renesas_usbhs/Kconfig"
+
 source "drivers/usb/class/Kconfig"
 
 source "drivers/usb/storage/Kconfig"
diff --git a/drivers/usb/renesas_usbhs/Kconfig b/drivers/usb/renesas_usbhs/Kconfig
new file mode 100644
index 000000000000..481490e5500a
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/Kconfig
@@ -0,0 +1,15 @@
+#
+# Renesas USB Controller Drivers
+#
+
+config USB_RENESAS_USBHS
+	tristate 'Renesas USBHS controller'
+	default n
+	help
+	   Renesas USBHS is a discrete USB host and peripheral controller chip
+	   that supports both full and high speed USB 2.0 data transfers.
+	   It has nine or more configurable endpoints, and endpoint zero.
+
+	   Say "y" to link the driver statically, or "m" to build a
+	   dynamically linked module called "renesas_usbhs" and force all
+	   gadget drivers to also be dynamically linked.
diff --git a/drivers/usb/renesas_usbhs/Makefile b/drivers/usb/renesas_usbhs/Makefile
new file mode 100644
index 000000000000..d76f3dd3b9d1
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/Makefile
@@ -0,0 +1,7 @@
+#
+# for Renesas USB
+#
+
+obj-$(CONFIG_USB_RENESAS_USBHS)	+= renesas_usbhs.o
+
+renesas_usbhs-y			:= common.o mod.o pipe.o
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
new file mode 100644
index 000000000000..d9ad60d1c156
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -0,0 +1,394 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include "./common.h"
+
+/*
+ * platform call back
+ *
+ * renesas usb support platform callback function.
+ * Below macro call it.
+ * if platform doesn't have callback, it return 0 (no error)
+ */
+#define usbhs_platform_call(priv, func, args...)\
+	(!(priv) ? -ENODEV :			\
+	 !((priv)->pfunc->func) ? 0 :		\
+	 (priv)->pfunc->func(args))
+
+/*
+ *		common functions
+ */
+u16 usbhs_read(struct usbhs_priv *priv, u32 reg)
+{
+	return ioread16(priv->base + reg);
+}
+
+void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data)
+{
+	iowrite16(data, priv->base + reg);
+}
+
+void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data)
+{
+	u16 val = usbhs_read(priv, reg);
+
+	val &= ~mask;
+	val |= data & mask;
+
+	usbhs_write(priv, reg, val);
+}
+
+/*
+ *		syscfg functions
+ */
+void usbhs_sys_clock_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, SCKE, enable ? SCKE : 0);
+}
+
+void usbhs_sys_hispeed_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, HSE, enable ? HSE : 0);
+}
+
+void usbhs_sys_usb_ctrl(struct usbhs_priv *priv, int enable)
+{
+	usbhs_bset(priv, SYSCFG, USBE, enable ? USBE : 0);
+}
+
+void usbhs_sys_host_ctrl(struct usbhs_priv *priv, int enable)
+{
+	u16 mask = DCFM | DRPD | DPRPU;
+	u16 val  = DCFM | DRPD;
+
+	/*
+	 * if enable
+	 *
+	 * - select Host mode
+	 * - D+ Line/D- Line Pull-down
+	 */
+	usbhs_bset(priv, SYSCFG, mask, enable ? val : 0);
+}
+
+void usbhs_sys_function_ctrl(struct usbhs_priv *priv, int enable)
+{
+	u16 mask = DCFM | DRPD | DPRPU;
+	u16 val  = DPRPU;
+
+	/*
+	 * if enable
+	 *
+	 * - select Function mode
+	 * - D+ Line Pull-up
+	 */
+	usbhs_bset(priv, SYSCFG, mask, enable ? val : 0);
+}
+
+/*
+ *		frame functions
+ */
+int usbhs_frame_get_num(struct usbhs_priv *priv)
+{
+	return usbhs_read(priv, FRMNUM) & FRNM_MASK;
+}
+
+/*
+ *		local functions
+ */
+static struct usbhs_priv *usbhsc_pdev_to_priv(struct platform_device *pdev)
+{
+	return dev_get_drvdata(&pdev->dev);
+}
+
+static void usbhsc_bus_ctrl(struct usbhs_priv *priv, int enable)
+{
+	int wait = usbhs_get_dparam(priv, buswait_bwait);
+	u16 data = 0;
+
+	if (enable) {
+		/* set bus wait if platform have */
+		if (wait)
+			usbhs_bset(priv, BUSWAIT, 0x000F, wait);
+	}
+	usbhs_write(priv, DVSTCTR, data);
+}
+
+/*
+ *		platform default param
+ */
+static u32 usbhsc_default_pipe_type[] = {
+		USB_ENDPOINT_XFER_CONTROL,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+};
+
+/*
+ *		driver callback functions
+ */
+static void usbhsc_notify_hotplug(struct work_struct *work)
+{
+	struct usbhs_priv *priv = container_of(work,
+					       struct usbhs_priv,
+					       notify_hotplug_work);
+	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+	int id;
+	int enable;
+	int ret;
+
+	/*
+	 * get vbus status from platform
+	 */
+	enable = usbhs_platform_call(priv, get_vbus, pdev);
+
+	/*
+	 * get id from platform
+	 */
+	id = usbhs_platform_call(priv, get_id, pdev);
+
+	if (enable && !mod) {
+		ret = usbhs_mod_change(priv, id);
+		if (ret < 0)
+			return;
+
+		dev_dbg(&pdev->dev, "%s enable\n", __func__);
+
+		/* enable PM */
+		pm_runtime_get_sync(&pdev->dev);
+
+		/* USB on */
+		usbhs_sys_clock_ctrl(priv, enable);
+		usbhsc_bus_ctrl(priv, enable);
+
+		/* module start */
+		usbhs_mod_call(priv, start, priv);
+
+	} else if (!enable && mod) {
+		dev_dbg(&pdev->dev, "%s disable\n", __func__);
+
+		/* module stop */
+		usbhs_mod_call(priv, stop, priv);
+
+		/* USB off */
+		usbhsc_bus_ctrl(priv, enable);
+		usbhs_sys_clock_ctrl(priv, enable);
+
+		/* disable PM */
+		pm_runtime_put_sync(&pdev->dev);
+
+		usbhs_mod_change(priv, -1);
+
+		/* reset phy for next connection */
+		usbhs_platform_call(priv, phy_reset, pdev);
+	}
+}
+
+static int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+
+	/*
+	 * This functions will be called in interrupt.
+	 * To make sure safety context,
+	 * use workqueue for usbhs_notify_hotplug
+	 */
+	schedule_work(&priv->notify_hotplug_work);
+	return 0;
+}
+
+/*
+ *		platform functions
+ */
+static int __devinit usbhs_probe(struct platform_device *pdev)
+{
+	struct renesas_usbhs_platform_info *info = pdev->dev.platform_data;
+	struct renesas_usbhs_driver_callback *dfunc;
+	struct usbhs_priv *priv;
+	struct resource *res;
+	unsigned int irq;
+	int ret;
+
+	/* check platform information */
+	if (!info ||
+	    !info->platform_callback.get_id ||
+	    !info->platform_callback.get_vbus) {
+		dev_err(&pdev->dev, "no platform information\n");
+		return -EINVAL;
+	}
+
+	/* platform data */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!res || (int)irq <= 0) {
+		dev_err(&pdev->dev, "Not enough Renesas USB platform resources.\n");
+		return -ENODEV;
+	}
+
+	/* usb private data */
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "Could not allocate priv\n");
+		return -ENOMEM;
+	}
+
+	priv->base = ioremap_nocache(res->start, resource_size(res));
+	if (!priv->base) {
+		dev_err(&pdev->dev, "ioremap error.\n");
+		ret = -ENOMEM;
+		goto probe_end_kfree;
+	}
+
+	/*
+	 * care platform info
+	 */
+	priv->pfunc	= &info->platform_callback;
+	priv->dparam	= &info->driver_param;
+
+	/* set driver callback functions for platform */
+	dfunc			= &info->driver_callback;
+	dfunc->notify_hotplug	= usbhsc_drvcllbck_notify_hotplug;
+
+	/* set default param if platform doesn't have */
+	if (!priv->dparam->pipe_type) {
+		priv->dparam->pipe_type = usbhsc_default_pipe_type;
+		priv->dparam->pipe_size = ARRAY_SIZE(usbhsc_default_pipe_type);
+	}
+
+	/*
+	 * priv settings
+	 */
+	priv->irq	= irq;
+	priv->pdev	= pdev;
+	INIT_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
+	spin_lock_init(usbhs_priv_to_lock(priv));
+
+	/* call pipe and module init */
+	ret = usbhs_pipe_probe(priv);
+	if (ret < 0)
+		goto probe_end_mod_exit;
+
+	ret = usbhs_mod_probe(priv);
+	if (ret < 0)
+		goto probe_end_iounmap;
+
+	/* dev_set_drvdata should be called after usbhs_mod_init */
+	dev_set_drvdata(&pdev->dev, priv);
+
+	/*
+	 * deviece reset here because
+	 * USB device might be used in boot loader.
+	 */
+	usbhs_sys_clock_ctrl(priv, 0);
+
+	/*
+	 * platform call
+	 *
+	 * USB phy setup might depend on CPU/Board.
+	 * If platform has its callback functions,
+	 * call it here.
+	 */
+	ret = usbhs_platform_call(priv, hardware_init, pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "platform prove failed.\n");
+		goto probe_end_pipe_exit;
+	}
+
+	/* reset phy for connection */
+	usbhs_platform_call(priv, phy_reset, pdev);
+
+	/*
+	 * manual call notify_hotplug for cold plug
+	 */
+	pm_runtime_enable(&pdev->dev);
+	ret = usbhsc_drvcllbck_notify_hotplug(pdev);
+	if (ret < 0)
+		goto probe_end_call_remove;
+
+	dev_info(&pdev->dev, "probed\n");
+
+	return ret;
+
+probe_end_call_remove:
+	usbhs_platform_call(priv, hardware_exit, pdev);
+probe_end_pipe_exit:
+	usbhs_pipe_remove(priv);
+probe_end_mod_exit:
+	usbhs_mod_remove(priv);
+probe_end_iounmap:
+	iounmap(priv->base);
+probe_end_kfree:
+	kfree(priv);
+
+	dev_info(&pdev->dev, "probe failed\n");
+
+	return ret;
+}
+
+static int __devexit usbhs_remove(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+
+	dev_dbg(&pdev->dev, "usb remove\n");
+
+	pm_runtime_disable(&pdev->dev);
+
+	usbhsc_bus_ctrl(priv, 0);
+
+	usbhs_platform_call(priv, hardware_exit, pdev);
+	usbhs_pipe_remove(priv);
+	usbhs_mod_remove(priv);
+	iounmap(priv->base);
+	kfree(priv);
+
+	return 0;
+}
+
+static struct platform_driver renesas_usbhs_driver = {
+	.driver		= {
+		.name	= "renesas_usbhs",
+	},
+	.probe		= usbhs_probe,
+	.remove		= __devexit_p(usbhs_remove),
+};
+
+static int __init usbhs_init(void)
+{
+	return platform_driver_register(&renesas_usbhs_driver);
+}
+
+static void __exit usbhs_exit(void)
+{
+	platform_driver_unregister(&renesas_usbhs_driver);
+}
+
+module_init(usbhs_init);
+module_exit(usbhs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Renesas USB driver");
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
new file mode 100644
index 000000000000..f1a2b62f93f9
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -0,0 +1,225 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_DRIVER_H
+#define RENESAS_USB_DRIVER_H
+
+#include <linux/platform_device.h>
+#include <linux/usb/renesas_usbhs.h>
+
+struct usbhs_priv;
+
+#include "./mod.h"
+#include "./pipe.h"
+
+/*
+ *
+ *		register define
+ *
+ */
+#define SYSCFG		0x0000
+#define BUSWAIT		0x0002
+#define DVSTCTR		0x0008
+#define CFIFO		0x0014
+#define CFIFOSEL	0x0020
+#define CFIFOCTR	0x0022
+#define INTENB0		0x0030
+#define INTENB1		0x0032
+#define BRDYENB		0x0036
+#define NRDYENB		0x0038
+#define BEMPENB		0x003A
+#define INTSTS0		0x0040
+#define INTSTS1		0x0042
+#define BRDYSTS		0x0046
+#define NRDYSTS		0x0048
+#define BEMPSTS		0x004A
+#define FRMNUM		0x004C
+#define USBREQ		0x0054	/* USB request type register */
+#define USBVAL		0x0056	/* USB request value register */
+#define USBINDX		0x0058	/* USB request index register */
+#define USBLENG		0x005A	/* USB request length register */
+#define DCPCFG		0x005C
+#define DCPMAXP		0x005E
+#define DCPCTR		0x0060
+#define PIPESEL		0x0064
+#define PIPECFG		0x0068
+#define PIPEBUF		0x006A
+#define PIPEMAXP	0x006C
+#define PIPEPERI	0x006E
+#define PIPEnCTR	0x0070
+
+/* SYSCFG */
+#define SCKE	(1 << 10)	/* USB Module Clock Enable */
+#define HSE	(1 << 7)	/* High-Speed Operation Enable */
+#define DCFM	(1 << 6)	/* Controller Function Select */
+#define DRPD	(1 << 5)	/* D+ Line/D- Line Resistance Control */
+#define DPRPU	(1 << 4)	/* D+ Line Resistance Control */
+#define USBE	(1 << 0)	/* USB Module Operation Enable */
+
+/* DVSTCTR */
+#define EXTLP	(1 << 10)	/* Controls the EXTLP pin output state */
+#define PWEN	(1 << 9)	/* Controls the PWEN pin output state */
+#define RHST	(0x7)		/* Reset Handshake */
+#define  RHST_LOW_SPEED  1	/* Low-speed connection */
+#define  RHST_FULL_SPEED 2	/* Full-speed connection */
+#define  RHST_HIGH_SPEED 3	/* High-speed connection */
+
+/* CFIFOSEL */
+#define MBW_32	(0x2 << 10)	/* CFIFO Port Access Bit Width */
+
+/* CFIFOCTR */
+#define BVAL	(1 << 15)	/* Buffer Memory Enable Flag */
+#define BCLR	(1 << 14)	/* CPU buffer clear */
+#define FRDY	(1 << 13)	/* FIFO Port Ready */
+#define DTLN_MASK (0x0FFF)	/* Receive Data Length */
+
+/* INTENB0 */
+#define VBSE	(1 << 15)	/* Enable IRQ VBUS_0 and VBUSIN_0 */
+#define RSME	(1 << 14)	/* Enable IRQ Resume */
+#define SOFE	(1 << 13)	/* Enable IRQ Frame Number Update */
+#define DVSE	(1 << 12)	/* Enable IRQ Device State Transition */
+#define CTRE	(1 << 11)	/* Enable IRQ Control Stage Transition */
+#define BEMPE	(1 << 10)	/* Enable IRQ Buffer Empty */
+#define NRDYE	(1 << 9)	/* Enable IRQ Buffer Not Ready Response */
+#define BRDYE	(1 << 8)	/* Enable IRQ Buffer Ready */
+
+/* INTENB1 */
+#define BCHGE	(1 << 14)	/* USB Bus Change Interrupt Enable */
+#define DTCHE	(1 << 12)	/* Disconnection Detect Interrupt Enable */
+#define ATTCHE	(1 << 11)	/* Connection Detect Interrupt Enable */
+#define EOFERRE	(1 << 6)	/* EOF Error Detect Interrupt Enable */
+#define SIGNE	(1 << 5)	/* Setup Transaction Error Interrupt Enable */
+#define SACKE	(1 << 4)	/* Setup Transaction ACK Interrupt Enable */
+
+/* INTSTS0 */
+#define DVST	(1 << 12)	/* Device State Transition Interrupt Status */
+#define CTRT	(1 << 11)	/* Control Stage Interrupt Status */
+#define BEMP	(1 << 10)	/* Buffer Empty Interrupt Status */
+#define BRDY	(1 << 8)	/* Buffer Ready Interrupt Status */
+#define VBSTS	(1 << 7)	/* VBUS_0 and VBUSIN_0 Input Status */
+#define VALID	(1 << 3)	/* USB Request Receive */
+
+#define DVSQ_MASK		(0x3 << 4)	/* Device State */
+#define  POWER_STATE		(0 << 4)
+#define  DEFAULT_STATE		(1 << 4)
+#define  ADDRESS_STATE		(2 << 4)
+#define  CONFIGURATION_STATE	(3 << 4)
+
+#define CTSQ_MASK		(0x7)	/* Control Transfer Stage */
+#define  IDLE_SETUP_STAGE	0	/* Idle stage or setup stage */
+#define  READ_DATA_STAGE	1	/* Control read data stage */
+#define  READ_STATUS_STAGE	2	/* Control read status stage */
+#define  WRITE_DATA_STAGE	3	/* Control write data stage */
+#define  WRITE_STATUS_STAGE	4	/* Control write status stage */
+#define  NODATA_STATUS_STAGE	5	/* Control write NoData status stage */
+#define  SEQUENCE_ERROR		6	/* Control transfer sequence error */
+
+/* PIPECFG */
+/* DCPCFG */
+#define TYPE_NONE	(0 << 14)	/* Transfer Type */
+#define TYPE_BULK	(1 << 14)
+#define TYPE_INT	(2 << 14)
+#define TYPE_ISO	(3 << 14)
+#define DBLB		(1 << 9)	/* Double Buffer Mode */
+#define SHTNAK		(1 << 7)	/* Pipe Disable in Transfer End */
+#define DIR_OUT		(1 << 4)	/* Transfer Direction */
+
+/* PIPEMAXP */
+/* DCPMAXP */
+#define DEVSEL_MASK	(0xF << 12)	/* Device Select */
+#define DCP_MAXP_MASK	(0x7F)
+#define PIPE_MAXP_MASK	(0x7FF)
+
+/* PIPEBUF */
+#define BUFSIZE_SHIFT	10
+#define BUFSIZE_MASK	(0x1F << BUFSIZE_SHIFT)
+#define BUFNMB_MASK	(0xFF)
+
+/* PIPEnCTR */
+/* DCPCTR */
+#define BSTS		(1 << 15)	/* Buffer Status */
+#define CSSTS		(1 << 12)	/* CSSTS Status */
+#define SQCLR		(1 << 8)	/* Toggle Bit Clear */
+#define	ACLRM		(1 << 9)	/* Buffer Auto-Clear Mode */
+#define PBUSY		(1 << 5)	/* Pipe Busy */
+#define PID_MASK	(0x3)		/* Response PID */
+#define  PID_NAK	0
+#define  PID_BUF	1
+#define  PID_STALL10	2
+#define  PID_STALL11	3
+
+#define CCPL		(1 << 2)	/* Control Transfer End Enable */
+
+/* FRMNUM */
+#define FRNM_MASK	(0x7FF)
+
+/*
+ *		struct
+ */
+struct usbhs_priv {
+
+	void __iomem *base;
+	unsigned int irq;
+
+	struct renesas_usbhs_platform_callback	*pfunc;
+	struct renesas_usbhs_driver_param	*dparam;
+
+	struct work_struct notify_hotplug_work;
+	struct platform_device *pdev;
+
+	spinlock_t		lock;
+
+	/*
+	 * module control
+	 */
+	struct usbhs_mod_info mod_info;
+
+	/*
+	 * pipe control
+	 */
+	struct usbhs_pipe_info pipe_info;
+};
+
+/*
+ * common
+ */
+u16 usbhs_read(struct usbhs_priv *priv, u32 reg);
+void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data);
+void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data);
+
+/*
+ * sysconfig
+ */
+void usbhs_sys_clock_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_hispeed_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_usb_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_host_ctrl(struct usbhs_priv *priv, int enable);
+void usbhs_sys_function_ctrl(struct usbhs_priv *priv, int enable);
+
+/*
+ * frame
+ */
+int usbhs_frame_get_num(struct usbhs_priv *priv);
+
+/*
+ * data
+ */
+#define usbhs_get_dparam(priv, param)	(priv->dparam->param)
+#define usbhs_priv_to_pdev(priv)	(priv->pdev)
+#define usbhs_priv_to_dev(priv)		(&priv->pdev->dev)
+#define usbhs_priv_to_lock(priv)	(&priv->lock)
+
+#endif /* RENESAS_USB_DRIVER_H */
diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c
new file mode 100644
index 000000000000..4a3398484cd7
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/mod.c
@@ -0,0 +1,261 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/interrupt.h>
+
+#include "./common.h"
+#include "./mod.h"
+
+#define usbhs_priv_to_modinfo(priv) (&priv->mod_info)
+
+/*
+ *		host / gadget functions
+ *
+ * renesas_usbhs host/gadget can register itself by below functions.
+ * these functions are called when probe
+ *
+ */
+void usbhs_mod_register(struct usbhs_priv *priv, struct usbhs_mod *mod, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	info->mod[id]	= mod;
+	mod->priv	= priv;
+}
+
+struct usbhs_mod *usbhs_mod_get(struct usbhs_priv *priv, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+	struct usbhs_mod *ret = NULL;
+
+	switch (id) {
+	case USBHS_HOST:
+	case USBHS_GADGET:
+		ret = info->mod[id];
+		break;
+	}
+
+	return ret;
+}
+
+int usbhs_mod_is_host(struct usbhs_priv *priv, struct usbhs_mod *mod)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	if (!mod)
+		return -EINVAL;
+
+	return info->mod[USBHS_HOST] == mod;
+}
+
+struct usbhs_mod *usbhs_mod_get_current(struct usbhs_priv *priv)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+
+	return info->curt;
+}
+
+int usbhs_mod_change(struct usbhs_priv *priv, int id)
+{
+	struct usbhs_mod_info *info = usbhs_priv_to_modinfo(priv);
+	struct usbhs_mod *mod = NULL;
+	int ret = 0;
+
+	/* id < 0 mean no current */
+	switch (id) {
+	case USBHS_HOST:
+	case USBHS_GADGET:
+		mod = info->mod[id];
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	info->curt = mod;
+
+	return ret;
+}
+
+static irqreturn_t usbhs_interrupt(int irq, void *data);
+int usbhs_mod_probe(struct usbhs_priv *priv)
+{
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int ret;
+
+	/* irq settings */
+	ret = request_irq(priv->irq, usbhs_interrupt,
+			  IRQF_DISABLED, dev_name(dev), priv);
+	if (ret)
+		dev_err(dev, "irq request err\n");
+
+	return ret;
+}
+
+void usbhs_mod_remove(struct usbhs_priv *priv)
+{
+	free_irq(priv->irq, priv);
+}
+
+/*
+ *		status functions
+ */
+int usbhs_status_get_usb_speed(struct usbhs_irq_state *irq_state)
+{
+	switch (irq_state->dvstctr & RHST) {
+	case RHST_LOW_SPEED:
+		return USB_SPEED_LOW;
+	case RHST_FULL_SPEED:
+		return USB_SPEED_FULL;
+	case RHST_HIGH_SPEED:
+		return USB_SPEED_HIGH;
+	}
+
+	return USB_SPEED_UNKNOWN;
+}
+
+int usbhs_status_get_device_state(struct usbhs_irq_state *irq_state)
+{
+	int state = irq_state->intsts0 & DVSQ_MASK;
+
+	switch (state) {
+	case POWER_STATE:
+	case DEFAULT_STATE:
+	case ADDRESS_STATE:
+	case CONFIGURATION_STATE:
+		return state;
+	}
+
+	return -EIO;
+}
+
+int usbhs_status_get_ctrl_stage(struct usbhs_irq_state *irq_state)
+{
+	/*
+	 * return value
+	 *
+	 * IDLE_SETUP_STAGE
+	 * READ_DATA_STAGE
+	 * READ_STATUS_STAGE
+	 * WRITE_DATA_STAGE
+	 * WRITE_STATUS_STAGE
+	 * NODATA_STATUS_STAGE
+	 * SEQUENCE_ERROR
+	 */
+	return (int)irq_state->intsts0 & CTSQ_MASK;
+}
+
+static void usbhs_status_get_each_irq(struct usbhs_priv *priv,
+				      struct usbhs_irq_state *state)
+{
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+
+	state->intsts0 = usbhs_read(priv, INTSTS0);
+	state->intsts1 = usbhs_read(priv, INTSTS1);
+
+	state->brdysts = usbhs_read(priv, BRDYSTS);
+	state->nrdysts = usbhs_read(priv, NRDYSTS);
+	state->bempsts = usbhs_read(priv, BEMPSTS);
+
+	state->dvstctr = usbhs_read(priv, DVSTCTR);
+
+	/* mask */
+	state->bempsts &= mod->irq_bempsts;
+	state->brdysts &= mod->irq_brdysts;
+}
+
+/*
+ *		interrupt
+ */
+#define INTSTS0_MAGIC 0xF800 /* acknowledge magical interrupt sources */
+#define INTSTS1_MAGIC 0xA870 /* acknowledge magical interrupt sources */
+static irqreturn_t usbhs_interrupt(int irq, void *data)
+{
+	struct usbhs_priv *priv = data;
+	struct usbhs_irq_state irq_state;
+
+	usbhs_status_get_each_irq(priv, &irq_state);
+
+	/*
+	 * clear interrupt
+	 *
+	 * The hardware is _very_ picky to clear interrupt bit.
+	 * Especially INTSTS0_MAGIC, INTSTS1_MAGIC value.
+	 *
+	 * see
+	 *	"Operation"
+	 *	 - "Control Transfer (DCP)"
+	 *	   - Function :: VALID bit should 0
+	 */
+	usbhs_write(priv, INTSTS0, ~irq_state.intsts0 & INTSTS0_MAGIC);
+	usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC);
+
+	usbhs_write(priv, BRDYSTS, 0);
+	usbhs_write(priv, NRDYSTS, 0);
+	usbhs_write(priv, BEMPSTS, 0);
+
+	/*
+	 * call irq callback functions
+	 * see also
+	 *	usbhs_irq_setting_update
+	 */
+	if (irq_state.intsts0 & DVST)
+		usbhs_mod_call(priv, irq_dev_state, priv, &irq_state);
+
+	if (irq_state.intsts0 & CTRT)
+		usbhs_mod_call(priv, irq_ctrl_stage, priv, &irq_state);
+
+	if (irq_state.intsts0 & BEMP)
+		usbhs_mod_call(priv, irq_empty, priv, &irq_state);
+
+	if (irq_state.intsts0 & BRDY)
+		usbhs_mod_call(priv, irq_ready, priv, &irq_state);
+
+	return IRQ_HANDLED;
+}
+
+void usbhs_irq_callback_update(struct usbhs_priv *priv, struct usbhs_mod *mod)
+{
+	u16 intenb0 = 0;
+
+	usbhs_write(priv, INTENB0, 0);
+
+	usbhs_write(priv, BEMPENB, 0);
+	usbhs_write(priv, BRDYENB, 0);
+
+	/*
+	 * see also
+	 *	usbhs_interrupt
+	 */
+
+	/*
+	 * it don't enable DVSE (intenb0) here
+	 * but "mod->irq_dev_state" will be called.
+	 */
+
+	if (mod->irq_ctrl_stage)
+		intenb0 |= CTRE;
+
+	if (mod->irq_empty && mod->irq_bempsts) {
+		usbhs_write(priv, BEMPENB, mod->irq_bempsts);
+		intenb0 |= BEMPE;
+	}
+
+	if (mod->irq_ready && mod->irq_brdysts) {
+		usbhs_write(priv, BRDYENB, mod->irq_brdysts);
+		intenb0 |= BRDYE;
+	}
+
+	usbhs_write(priv, INTENB0, intenb0);
+}
diff --git a/drivers/usb/renesas_usbhs/mod.h b/drivers/usb/renesas_usbhs/mod.h
new file mode 100644
index 000000000000..bd873d5b432a
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/mod.h
@@ -0,0 +1,106 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_MOD_H
+#define RENESAS_USB_MOD_H
+
+#include <linux/spinlock.h>
+#include <linux/usb/renesas_usbhs.h>
+#include "./common.h"
+
+/*
+ *	struct
+ */
+struct usbhs_irq_state {
+	u16 intsts0;
+	u16 intsts1;
+	u16 brdysts;
+	u16 nrdysts;
+	u16 bempsts;
+	u16 dvstctr;
+};
+
+struct usbhs_mod {
+	char *name;
+
+	/*
+	 * entry point from common.c
+	 */
+	int (*start)(struct usbhs_priv *priv);
+	int (*stop)(struct usbhs_priv *priv);
+
+	/* INTSTS0 :: DVST (DVSQ) */
+	int (*irq_dev_state)(struct usbhs_priv *priv,
+			     struct usbhs_irq_state *irq_state);
+
+	/* INTSTS0 :: CTRT (CTSQ) */
+	int (*irq_ctrl_stage)(struct usbhs_priv *priv,
+			      struct usbhs_irq_state *irq_state);
+
+	/* INTSTS0 :: BEMP */
+	/* BEMPSTS */
+	int (*irq_empty)(struct usbhs_priv *priv,
+			 struct usbhs_irq_state *irq_state);
+	u16 irq_bempsts;
+
+	/* INTSTS0 :: BRDY */
+	/* BRDYSTS */
+	int (*irq_ready)(struct usbhs_priv *priv,
+			 struct usbhs_irq_state *irq_state);
+	u16 irq_brdysts;
+
+	struct usbhs_priv *priv;
+};
+
+struct usbhs_mod_info {
+	struct usbhs_mod *mod[USBHS_MAX];
+	struct usbhs_mod *curt; /* current mod */
+};
+
+/*
+ *		for host/gadget module
+ */
+struct usbhs_mod *usbhs_mod_get(struct usbhs_priv *priv, int id);
+struct usbhs_mod *usbhs_mod_get_current(struct usbhs_priv *priv);
+void usbhs_mod_register(struct usbhs_priv *priv, struct usbhs_mod *usb, int id);
+int usbhs_mod_is_host(struct usbhs_priv *priv, struct usbhs_mod *mod);
+int usbhs_mod_change(struct usbhs_priv *priv, int id);
+int usbhs_mod_probe(struct usbhs_priv *priv);
+void usbhs_mod_remove(struct usbhs_priv *priv);
+
+/*
+ *		status functions
+ */
+int usbhs_status_get_usb_speed(struct usbhs_irq_state *irq_state);
+int usbhs_status_get_device_state(struct usbhs_irq_state *irq_state);
+int usbhs_status_get_ctrl_stage(struct usbhs_irq_state *irq_state);
+
+/*
+ *		callback functions
+ */
+void usbhs_irq_callback_update(struct usbhs_priv *priv, struct usbhs_mod *mod);
+
+
+#define usbhs_mod_call(priv, func, param...)		\
+	({						\
+		struct usbhs_mod *mod;			\
+		mod = usbhs_mod_get_current(priv);	\
+		!mod		? -ENODEV :		\
+		!mod->func	? 0 :			\
+		 mod->func(param);			\
+	})
+
+#endif /* RENESAS_USB_MOD_H */
diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c
new file mode 100644
index 000000000000..b7a9137f599b
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/pipe.c
@@ -0,0 +1,880 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include "./common.h"
+#include "./pipe.h"
+
+/*
+ *		macros
+ */
+#define usbhsp_priv_to_pipeinfo(pr)	(&(pr)->pipe_info)
+#define usbhsp_pipe_to_priv(p)		((p)->priv)
+
+#define usbhsp_addr_offset(p)	((usbhs_pipe_number(p) - 1) * 2)
+
+#define usbhsp_is_dcp(p)	((p)->priv->pipe_info.pipe == (p))
+
+#define usbhsp_flags_set(p, f)	((p)->flags |=  USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_clr(p, f)	((p)->flags &= ~USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_has(p, f)	((p)->flags &   USBHS_PIPE_FLAGS_##f)
+#define usbhsp_flags_init(p)	do {(p)->flags = 0; } while (0)
+
+#define usbhsp_type(p)		((p)->pipe_type)
+#define usbhsp_type_is(p, t)	((p)->pipe_type == t)
+
+/*
+ * for debug
+ */
+static char *usbhsp_pipe_name[] = {
+	[USB_ENDPOINT_XFER_CONTROL]	= "DCP",
+	[USB_ENDPOINT_XFER_BULK]	= "BULK",
+	[USB_ENDPOINT_XFER_INT]		= "INT",
+	[USB_ENDPOINT_XFER_ISOC]	= "ISO",
+};
+
+/*
+ *		usb request functions
+ */
+void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
+{
+	u16 val;
+
+	val = usbhs_read(priv, USBREQ);
+	req->bRequest		= (val >> 8) & 0xFF;
+	req->bRequestType	= (val >> 0) & 0xFF;
+
+	req->wValue	= usbhs_read(priv, USBVAL);
+	req->wIndex	= usbhs_read(priv, USBINDX);
+	req->wLength	= usbhs_read(priv, USBLENG);
+}
+
+void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
+{
+	usbhs_write(priv, USBREQ,  (req->bRequest << 8) | req->bRequestType);
+	usbhs_write(priv, USBVAL,  req->wValue);
+	usbhs_write(priv, USBINDX, req->wIndex);
+	usbhs_write(priv, USBLENG, req->wLength);
+}
+
+/*
+ *		DCPCTR/PIPEnCTR functions
+ */
+static void usbhsp_pipectrl_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	int offset = usbhsp_addr_offset(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		usbhs_bset(priv, DCPCTR, mask, val);
+	else
+		usbhs_bset(priv, PIPEnCTR + offset, mask, val);
+}
+
+static u16 usbhsp_pipectrl_get(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	int offset = usbhsp_addr_offset(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return usbhs_read(priv, DCPCTR);
+	else
+		return usbhs_read(priv, PIPEnCTR + offset);
+}
+
+/*
+ *		DCP/PIPE functions
+ */
+static void __usbhsp_pipe_xxx_set(struct usbhs_pipe *pipe,
+				  u16 dcp_reg, u16 pipe_reg,
+				  u16 mask, u16 val)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		usbhs_bset(priv, dcp_reg, mask, val);
+	else
+		usbhs_bset(priv, pipe_reg, mask, val);
+}
+
+static u16 __usbhsp_pipe_xxx_get(struct usbhs_pipe *pipe,
+				 u16 dcp_reg, u16 pipe_reg)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return usbhs_read(priv, dcp_reg);
+	else
+		return usbhs_read(priv, pipe_reg);
+}
+
+/*
+ *		DCPCFG/PIPECFG functions
+ */
+static void usbhsp_pipe_cfg_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	__usbhsp_pipe_xxx_set(pipe, DCPCFG, PIPECFG, mask, val);
+}
+
+/*
+ *		PIPEBUF
+ */
+static void usbhsp_pipe_buf_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	if (usbhsp_is_dcp(pipe))
+		return;
+
+	__usbhsp_pipe_xxx_set(pipe, 0, PIPEBUF, mask, val);
+}
+
+/*
+ *		DCPMAXP/PIPEMAXP
+ */
+static void usbhsp_pipe_maxp_set(struct usbhs_pipe *pipe, u16 mask, u16 val)
+{
+	__usbhsp_pipe_xxx_set(pipe, DCPMAXP, PIPEMAXP, mask, val);
+}
+
+static u16 usbhsp_pipe_maxp_get(struct usbhs_pipe *pipe)
+{
+	return __usbhsp_pipe_xxx_get(pipe, DCPMAXP, PIPEMAXP);
+}
+
+/*
+ *		pipe control functions
+ */
+static void usbhsp_pipe_select(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	/*
+	 * On pipe, this is necessary before
+	 * accesses to below registers.
+	 *
+	 * PIPESEL	: usbhsp_pipe_select
+	 * PIPECFG	: usbhsp_pipe_cfg_xxx
+	 * PIPEBUF	: usbhsp_pipe_buf_xxx
+	 * PIPEMAXP	: usbhsp_pipe_maxp_xxx
+	 * PIPEPERI
+	 */
+
+	/*
+	 * if pipe is dcp, no pipe is selected.
+	 * it is no problem, because dcp have its register
+	 */
+	usbhs_write(priv, PIPESEL, 0xF & usbhs_pipe_number(pipe));
+}
+
+static int usbhsp_pipe_barrier(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int timeout = 1024;
+	u16 val;
+
+	/*
+	 * make sure....
+	 *
+	 * Modify these bits when CSSTS = 0, PID = NAK, and no pipe number is
+	 * specified by the CURPIPE bits.
+	 * When changing the setting of this bit after changing
+	 * the PID bits for the selected pipe from BUF to NAK,
+	 * check that CSSTS = 0 and PBUSY = 0.
+	 */
+
+	/*
+	 * CURPIPE bit = 0
+	 *
+	 * see also
+	 *  "Operation"
+	 *  - "Pipe Control"
+	 *   - "Pipe Control Registers Switching Procedure"
+	 */
+	usbhs_write(priv, CFIFOSEL, 0);
+
+	do {
+		val  = usbhsp_pipectrl_get(pipe);
+		val &= CSSTS | PID_MASK;
+		if (!val)
+			return 0;
+
+		udelay(10);
+
+	} while (timeout--);
+
+	/*
+	 * force NAK
+	 */
+	timeout = 1024;
+	usbhs_fifo_disable(pipe);
+	do {
+		val  = usbhsp_pipectrl_get(pipe);
+		val &= PBUSY;
+		if (!val)
+			return 0;
+
+	} while (timeout--);
+
+	dev_err(dev, "pipe barrier failed\n");
+
+	return -EBUSY;
+}
+
+static int usbhsp_pipe_is_accessible(struct usbhs_pipe *pipe)
+{
+	u16 val;
+
+	val = usbhsp_pipectrl_get(pipe);
+	if (val & BSTS)
+		return 0;
+
+	return -EBUSY;
+}
+
+/*
+ *		PID ctrl
+ */
+static void __usbhsp_pid_try_nak_if_stall(struct usbhs_pipe *pipe)
+{
+	u16 pid = usbhsp_pipectrl_get(pipe);
+
+	pid &= PID_MASK;
+
+	/*
+	 * see
+	 * "Pipe n Control Register" - "PID"
+	 */
+	switch (pid) {
+	case PID_STALL11:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL10);
+		/* fall-through */
+	case PID_STALL10:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_NAK);
+	}
+}
+
+void usbhs_fifo_disable(struct usbhs_pipe *pipe)
+{
+	/* see "Pipe n Control Register" - "PID" */
+	__usbhsp_pid_try_nak_if_stall(pipe);
+
+	usbhsp_pipectrl_set(pipe, PID_MASK, PID_NAK);
+}
+
+void usbhs_fifo_enable(struct usbhs_pipe *pipe)
+{
+	/* see "Pipe n Control Register" - "PID" */
+	__usbhsp_pid_try_nak_if_stall(pipe);
+
+	usbhsp_pipectrl_set(pipe, PID_MASK, PID_BUF);
+}
+
+void usbhs_fifo_stall(struct usbhs_pipe *pipe)
+{
+	u16 pid = usbhsp_pipectrl_get(pipe);
+
+	pid &= PID_MASK;
+
+	/*
+	 * see
+	 * "Pipe n Control Register" - "PID"
+	 */
+	switch (pid) {
+	case PID_NAK:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL10);
+		break;
+	case PID_BUF:
+		usbhsp_pipectrl_set(pipe, PID_MASK, PID_STALL11);
+		break;
+	}
+}
+
+/*
+ *		CFIFO ctrl
+ */
+void usbhs_fifo_send_terminator(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	usbhs_bset(priv, CFIFOCTR, BVAL, BVAL);
+}
+
+static void usbhsp_fifo_clear(struct usbhs_pipe *pipe)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+
+	usbhs_write(priv, CFIFOCTR, BCLR);
+}
+
+static int usbhsp_fifo_barrier(struct usbhs_priv *priv)
+{
+	int timeout = 1024;
+
+	do {
+		/* The FIFO port is accessible */
+		if (usbhs_read(priv, CFIFOCTR) & FRDY)
+			return 0;
+
+		udelay(10);
+	} while (timeout--);
+
+	return -EBUSY;
+}
+
+static int usbhsp_fifo_rcv_len(struct usbhs_priv *priv)
+{
+	return usbhs_read(priv, CFIFOCTR) & DTLN_MASK;
+}
+
+static int usbhsp_fifo_select(struct usbhs_pipe *pipe, int write)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int timeout = 1024;
+	u16 mask = ((1 << 5) | 0xF);		/* mask of ISEL | CURPIPE */
+	u16 base = usbhs_pipe_number(pipe);	/* CURPIPE */
+
+	if (usbhsp_is_dcp(pipe))
+		base |= (1 == write) << 5;	/* ISEL */
+
+	/* "base" will be used below  */
+	usbhs_write(priv, CFIFOSEL, base | MBW_32);
+
+	/* check ISEL and CURPIPE value */
+	while (timeout--) {
+		if (base == (mask & usbhs_read(priv, CFIFOSEL)))
+			return 0;
+		udelay(10);
+	}
+
+	dev_err(dev, "fifo select error\n");
+
+	return -EIO;
+}
+
+int usbhs_fifo_prepare_write(struct usbhs_pipe *pipe)
+{
+	int ret;
+
+	ret = usbhsp_fifo_select(pipe, 1);
+	if (ret < 0)
+		return ret;
+
+	usbhsp_fifo_clear(pipe);
+
+	return ret;
+}
+
+int usbhs_fifo_write(struct usbhs_pipe *pipe, u8 *buf, int len)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	void __iomem *addr = priv->base + CFIFO;
+	int maxp = usbhs_pipe_get_maxpacket(pipe);
+	int total_len;
+	int i, ret;
+
+	ret = usbhsp_pipe_is_accessible(pipe);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhs_fifo_prepare_write(pipe);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhsp_fifo_barrier(priv);
+	if (ret < 0)
+		return ret;
+
+	len = min(len, maxp);
+	total_len = len;
+
+	/*
+	 * FIXME
+	 *
+	 * 32-bit access only
+	 */
+	if (len >= 4 &&
+	    !((unsigned long)buf & 0x03)) {
+		iowrite32_rep(addr, buf, len / 4);
+		len %= 4;
+		buf += total_len - len;
+	}
+
+	/* the rest operation */
+	for (i = 0; i < len; i++)
+		iowrite8(buf[i], addr + (0x03 - (i & 0x03)));
+
+	if (total_len < maxp)
+		usbhs_fifo_send_terminator(pipe);
+
+	return total_len;
+}
+
+int usbhs_fifo_prepare_read(struct usbhs_pipe *pipe)
+{
+	int ret;
+
+	/*
+	 * select pipe and enable it to prepare packet receive
+	 */
+	ret = usbhsp_fifo_select(pipe, 0);
+	if (ret < 0)
+		return ret;
+
+	usbhs_fifo_enable(pipe);
+
+	return ret;
+}
+
+int usbhs_fifo_read(struct usbhs_pipe *pipe, u8 *buf, int len)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	void __iomem *addr = priv->base + CFIFO;
+	int rcv_len;
+	int i, ret;
+	int total_len;
+	u32 data = 0;
+
+	ret = usbhsp_fifo_select(pipe, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = usbhsp_fifo_barrier(priv);
+	if (ret < 0)
+		return ret;
+
+	rcv_len = usbhsp_fifo_rcv_len(priv);
+
+	/*
+	 * Buffer clear if Zero-Length packet
+	 *
+	 * see
+	 * "Operation" - "FIFO Buffer Memory" - "FIFO Port Function"
+	 */
+	if (0 == rcv_len) {
+		usbhsp_fifo_clear(pipe);
+		return 0;
+	}
+
+	len = min(rcv_len, len);
+	total_len = len;
+
+	/*
+	 * FIXME
+	 *
+	 * 32-bit access only
+	 */
+	if (len >= 4 &&
+	    !((unsigned long)buf & 0x03)) {
+		ioread32_rep(addr, buf, len / 4);
+		len %= 4;
+		buf += rcv_len - len;
+	}
+
+	/* the rest operation */
+	for (i = 0; i < len; i++) {
+		if (!(i & 0x03))
+			data = ioread32(addr);
+
+		buf[i] = (data >> ((i & 0x03) * 8)) & 0xff;
+	}
+
+	return total_len;
+}
+
+/*
+ *		pipe setup
+ */
+static int usbhsp_possible_double_buffer(struct usbhs_pipe *pipe)
+{
+	/*
+	 * only ISO / BULK pipe can use double buffer
+	 */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
+		return 1;
+
+	return 0;
+}
+
+static u16 usbhsp_setup_pipecfg(struct usbhs_pipe *pipe,
+				const struct usb_endpoint_descriptor *desc,
+				int is_host)
+{
+	u16 type = 0;
+	u16 bfre = 0;
+	u16 dblb = 0;
+	u16 cntmd = 0;
+	u16 dir = 0;
+	u16 epnum = 0;
+	u16 shtnak = 0;
+	u16 type_array[] = {
+		[USB_ENDPOINT_XFER_BULK] = TYPE_BULK,
+		[USB_ENDPOINT_XFER_INT]  = TYPE_INT,
+		[USB_ENDPOINT_XFER_ISOC] = TYPE_ISO,
+	};
+	int is_double = usbhsp_possible_double_buffer(pipe);
+
+	if (usbhsp_is_dcp(pipe))
+		return -EINVAL;
+
+	/*
+	 * PIPECFG
+	 *
+	 * see
+	 *  - "Register Descriptions" - "PIPECFG" register
+	 *  - "Features"  - "Pipe configuration"
+	 *  - "Operation" - "Pipe Control"
+	 */
+
+	/* TYPE */
+	type = type_array[usbhsp_type(pipe)];
+
+	/* BFRE */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		bfre = 0; /* FIXME */
+
+	/* DBLB */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_ISOC) ||
+	    usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		dblb = (is_double) ? DBLB : 0;
+
+	/* CNTMD */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK))
+		cntmd = 0; /* FIXME */
+
+	/* DIR */
+	if (usb_endpoint_dir_in(desc))
+		usbhsp_flags_set(pipe, IS_DIR_IN);
+
+	if ((is_host  && usb_endpoint_dir_out(desc)) ||
+	    (!is_host && usb_endpoint_dir_in(desc)))
+		dir |= DIR_OUT;
+
+	/* SHTNAK */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_BULK) &&
+	    !dir)
+		shtnak = SHTNAK;
+
+	/* EPNUM */
+	epnum = 0xF & usb_endpoint_num(desc);
+
+	return	type	|
+		bfre	|
+		dblb	|
+		cntmd	|
+		dir	|
+		shtnak	|
+		epnum;
+}
+
+static u16 usbhsp_setup_pipemaxp(struct usbhs_pipe *pipe,
+				 const struct usb_endpoint_descriptor *desc,
+				 int is_host)
+{
+	/* host should set DEVSEL */
+
+	/* reutn MXPS */
+	return PIPE_MAXP_MASK & le16_to_cpu(desc->wMaxPacketSize);
+}
+
+static u16 usbhsp_setup_pipebuff(struct usbhs_pipe *pipe,
+				 const struct usb_endpoint_descriptor *desc,
+				 int is_host)
+{
+	struct usbhs_priv *priv = usbhsp_pipe_to_priv(pipe);
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct device *dev = usbhs_priv_to_dev(priv);
+	int pipe_num = usbhs_pipe_number(pipe);
+	int is_double = usbhsp_possible_double_buffer(pipe);
+	u16 buff_size;
+	u16 bufnmb;
+	u16 bufnmb_cnt;
+
+	/*
+	 * PIPEBUF
+	 *
+	 * see
+	 *  - "Register Descriptions" - "PIPEBUF" register
+	 *  - "Features"  - "Pipe configuration"
+	 *  - "Operation" - "FIFO Buffer Memory"
+	 *  - "Operation" - "Pipe Control"
+	 *
+	 * ex) if pipe6 - pipe9 are USB_ENDPOINT_XFER_INT (SH7724)
+	 *
+	 * BUFNMB:	PIPE
+	 * 0:		pipe0 (DCP 256byte)
+	 * 1:		-
+	 * 2:		-
+	 * 3:		-
+	 * 4:		pipe6 (INT 64byte)
+	 * 5:		pipe7 (INT 64byte)
+	 * 6:		pipe8 (INT 64byte)
+	 * 7:		pipe9 (INT 64byte)
+	 * 8 - xx:	free (for BULK, ISOC)
+	 */
+
+	/*
+	 * FIXME
+	 *
+	 * it doesn't have good buffer allocator
+	 *
+	 * DCP : 256 byte
+	 * BULK: 512 byte
+	 * INT :  64 byte
+	 * ISOC: 512 byte
+	 */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_CONTROL))
+		buff_size = 256;
+	else if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT))
+		buff_size = 64;
+	else
+		buff_size = 512;
+
+	/* change buff_size to register value */
+	bufnmb_cnt = (buff_size / 64) - 1;
+
+	/* BUFNMB has been reserved for INT pipe
+	 * see above */
+	if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT)) {
+		bufnmb = pipe_num - 2;
+	} else {
+		bufnmb = info->bufnmb_last;
+		info->bufnmb_last += bufnmb_cnt + 1;
+
+		/*
+		 * double buffer
+		 */
+		if (is_double)
+			info->bufnmb_last += bufnmb_cnt + 1;
+	}
+
+	dev_dbg(dev, "pipe : %d : buff_size 0x%x: bufnmb 0x%x\n",
+		pipe_num, buff_size, bufnmb);
+
+	return	(0x1f & bufnmb_cnt)	<< 10 |
+		(0xff & bufnmb)		<<  0;
+}
+
+/*
+ *		pipe control
+ */
+int usbhs_pipe_get_maxpacket(struct usbhs_pipe *pipe)
+{
+	u16 mask = usbhsp_is_dcp(pipe) ? DCP_MAXP_MASK : PIPE_MAXP_MASK;
+
+	usbhsp_pipe_select(pipe);
+
+	return (int)(usbhsp_pipe_maxp_get(pipe) & mask);
+}
+
+int usbhs_pipe_is_dir_in(struct usbhs_pipe *pipe)
+{
+	return usbhsp_flags_has(pipe, IS_DIR_IN);
+}
+
+void usbhs_pipe_clear_sequence(struct usbhs_pipe *pipe)
+{
+	usbhsp_pipectrl_set(pipe, SQCLR, SQCLR);
+}
+
+static struct usbhs_pipe *usbhsp_get_pipe(struct usbhs_priv *priv, u32 type)
+{
+	struct usbhs_pipe *pos, *pipe;
+	int i;
+
+	/*
+	 * find target pipe
+	 */
+	pipe = NULL;
+	usbhs_for_each_pipe_with_dcp(pos, priv, i) {
+		if (!usbhsp_type_is(pos, type))
+			continue;
+		if (usbhsp_flags_has(pos, IS_USED))
+			continue;
+
+		pipe = pos;
+		break;
+	}
+
+	if (!pipe)
+		return NULL;
+
+	/*
+	 * initialize pipe flags
+	 */
+	usbhsp_flags_init(pipe);
+	usbhsp_flags_set(pipe, IS_USED);
+
+	return pipe;
+}
+
+void usbhs_pipe_init(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct usbhs_pipe *pipe;
+	int i;
+
+	/*
+	 * FIXME
+	 *
+	 * driver needs good allocator.
+	 *
+	 * find first free buffer area (BULK, ISOC)
+	 * (DCP, INT area is fixed)
+	 *
+	 * buffer number 0 - 3 have been reserved for DCP
+	 * see
+	 *	usbhsp_to_bufnmb
+	 */
+	info->bufnmb_last = 4;
+	usbhs_for_each_pipe_with_dcp(pipe, priv, i) {
+		if (usbhsp_type_is(pipe, USB_ENDPOINT_XFER_INT))
+			info->bufnmb_last++;
+
+		usbhsp_flags_init(pipe);
+		pipe->mod_private = NULL;
+	}
+}
+
+struct usbhs_pipe *usbhs_pipe_malloc(struct usbhs_priv *priv,
+				     const struct usb_endpoint_descriptor *desc)
+{
+	struct device *dev = usbhs_priv_to_dev(priv);
+	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
+	struct usbhs_pipe *pipe;
+	int is_host = usbhs_mod_is_host(priv, mod);
+	int ret;
+	u16 pipecfg, pipebuf, pipemaxp;
+
+	pipe = usbhsp_get_pipe(priv, usb_endpoint_type(desc));
+	if (!pipe)
+		return NULL;
+
+	usbhs_fifo_disable(pipe);
+
+	/* make sure pipe is not busy */
+	ret = usbhsp_pipe_barrier(pipe);
+	if (ret < 0) {
+		dev_err(dev, "pipe setup failed %d\n", usbhs_pipe_number(pipe));
+		return NULL;
+	}
+
+	pipecfg  = usbhsp_setup_pipecfg(pipe,  desc, is_host);
+	pipebuf  = usbhsp_setup_pipebuff(pipe, desc, is_host);
+	pipemaxp = usbhsp_setup_pipemaxp(pipe, desc, is_host);
+
+	/* buffer clear
+	 * see PIPECFG :: BFRE */
+	usbhsp_pipectrl_set(pipe, ACLRM, ACLRM);
+	usbhsp_pipectrl_set(pipe, ACLRM, 0);
+
+	usbhsp_pipe_select(pipe);
+	usbhsp_pipe_cfg_set(pipe, 0xFFFF, pipecfg);
+	usbhsp_pipe_buf_set(pipe, 0xFFFF, pipebuf);
+	usbhsp_pipe_maxp_set(pipe, 0xFFFF, pipemaxp);
+
+	usbhs_pipe_clear_sequence(pipe);
+
+	dev_dbg(dev, "enable pipe %d : %s (%s)\n",
+		usbhs_pipe_number(pipe),
+		usbhsp_pipe_name[usb_endpoint_type(desc)],
+		usbhs_pipe_is_dir_in(pipe) ? "in" : "out");
+
+	return pipe;
+}
+
+/*
+ *		dcp control
+ */
+struct usbhs_pipe *usbhs_dcp_malloc(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe *pipe;
+
+	pipe = usbhsp_get_pipe(priv, USB_ENDPOINT_XFER_CONTROL);
+	if (!pipe)
+		return NULL;
+
+	/*
+	 * dcpcfg  : default
+	 * dcpmaxp : default
+	 * pipebuf : nothing to do
+	 */
+
+	usbhsp_pipe_select(pipe);
+	usbhs_pipe_clear_sequence(pipe);
+
+	return pipe;
+}
+
+void usbhs_dcp_control_transfer_done(struct usbhs_pipe *pipe)
+{
+	WARN_ON(!usbhsp_is_dcp(pipe));
+
+	usbhs_fifo_enable(pipe);
+	usbhsp_pipectrl_set(pipe, CCPL, CCPL);
+}
+
+
+/*
+ *		pipe module function
+ */
+int usbhs_pipe_probe(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+	struct usbhs_pipe *pipe;
+	struct device *dev = usbhs_priv_to_dev(priv);
+	u32 *pipe_type = usbhs_get_dparam(priv, pipe_type);
+	int pipe_size = usbhs_get_dparam(priv, pipe_size);
+	int i;
+
+	/* This driver expects 1st pipe is DCP */
+	if (pipe_type[0] != USB_ENDPOINT_XFER_CONTROL) {
+		dev_err(dev, "1st PIPE is not DCP\n");
+		return -EINVAL;
+	}
+
+	info->pipe = kzalloc(sizeof(struct usbhs_pipe) * pipe_size, GFP_KERNEL);
+	if (!info->pipe) {
+		dev_err(dev, "Could not allocate pipe\n");
+		return -ENOMEM;
+	}
+
+	info->size = pipe_size;
+
+	/*
+	 * init pipe
+	 */
+	usbhs_for_each_pipe_with_dcp(pipe, priv, i) {
+		pipe->priv = priv;
+		usbhsp_type(pipe) = pipe_type[i] & USB_ENDPOINT_XFERTYPE_MASK;
+
+		dev_dbg(dev, "pipe %x\t: %s\n",
+			i, usbhsp_pipe_name[pipe_type[i]]);
+	}
+
+	return 0;
+}
+
+void usbhs_pipe_remove(struct usbhs_priv *priv)
+{
+	struct usbhs_pipe_info *info = usbhsp_priv_to_pipeinfo(priv);
+
+	kfree(info->pipe);
+}
diff --git a/drivers/usb/renesas_usbhs/pipe.h b/drivers/usb/renesas_usbhs/pipe.h
new file mode 100644
index 000000000000..4a60dcef9676
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/pipe.h
@@ -0,0 +1,105 @@
+/*
+ * Renesas USB driver
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_PIPE_H
+#define RENESAS_USB_PIPE_H
+
+#include "./common.h"
+
+/*
+ *	struct
+ */
+struct usbhs_pipe {
+	u32 pipe_type;	/* USB_ENDPOINT_XFER_xxx */
+
+	struct usbhs_priv *priv;
+
+	u32 flags;
+#define USBHS_PIPE_FLAGS_IS_USED		(1 << 0)
+#define USBHS_PIPE_FLAGS_IS_DIR_IN		(1 << 1)
+
+	void *mod_private;
+};
+
+struct usbhs_pipe_info {
+	struct usbhs_pipe *pipe;
+	int size;	/* array size of "pipe" */
+	int bufnmb_last;	/* FIXME : driver needs good allocator */
+};
+
+/*
+ * pipe list
+ */
+#define __usbhs_for_each_pipe(start, pos, info, i)	\
+	for (i = start, pos = (info)->pipe;		\
+	     i < (info)->size;				\
+	     i++, pos = (info)->pipe + i)
+
+#define usbhs_for_each_pipe(pos, priv, i)			\
+	__usbhs_for_each_pipe(1, pos, &((priv)->pipe_info), i)
+
+#define usbhs_for_each_pipe_with_dcp(pos, priv, i)		\
+	__usbhs_for_each_pipe(0, pos, &((priv)->pipe_info), i)
+
+/*
+ * pipe module probe / remove
+ */
+int usbhs_pipe_probe(struct usbhs_priv *priv);
+void usbhs_pipe_remove(struct usbhs_priv *priv);
+
+/*
+ * cfifo
+ */
+int usbhs_fifo_write(struct usbhs_pipe *pipe, u8 *buf, int len);
+int usbhs_fifo_read(struct usbhs_pipe *pipe, u8 *buf, int len);
+int usbhs_fifo_prepare_write(struct usbhs_pipe *pipe);
+int usbhs_fifo_prepare_read(struct usbhs_pipe *pipe);
+
+void usbhs_fifo_enable(struct usbhs_pipe *pipe);
+void usbhs_fifo_disable(struct usbhs_pipe *pipe);
+void usbhs_fifo_stall(struct usbhs_pipe *pipe);
+
+void usbhs_fifo_send_terminator(struct usbhs_pipe *pipe);
+
+
+/*
+ * usb request
+ */
+void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req);
+void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req);
+
+/*
+ * pipe control
+ */
+struct usbhs_pipe
+*usbhs_pipe_malloc(struct usbhs_priv *priv,
+		   const struct usb_endpoint_descriptor *desc);
+
+int usbhs_pipe_is_dir_in(struct usbhs_pipe *pipe);
+void usbhs_pipe_init(struct usbhs_priv *priv);
+int usbhs_pipe_get_maxpacket(struct usbhs_pipe *pipe);
+void usbhs_pipe_clear_sequence(struct usbhs_pipe *pipe);
+
+#define usbhs_pipe_number(p)	(((u32)(p) - (u32)(p)->priv->pipe_info.pipe) / \
+				 sizeof(struct usbhs_pipe))
+
+/*
+ * dcp control
+ */
+struct usbhs_pipe *usbhs_dcp_malloc(struct usbhs_priv *priv);
+void usbhs_dcp_control_transfer_done(struct usbhs_pipe *pipe);
+
+#endif /* RENESAS_USB_PIPE_H */
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
new file mode 100644
index 000000000000..565bca3aa440
--- /dev/null
+++ b/include/linux/usb/renesas_usbhs.h
@@ -0,0 +1,149 @@
+/*
+ * Renesas USB
+ *
+ * Copyright (C) 2011 Renesas Solutions Corp.
+ * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#ifndef RENESAS_USB_H
+#define RENESAS_USB_H
+#include <linux/platform_device.h>
+#include <linux/usb/ch9.h>
+
+/*
+ * module type
+ *
+ * it will be return value from get_id
+ */
+enum {
+	USBHS_HOST = 0,
+	USBHS_GADGET,
+	USBHS_MAX,
+};
+
+/*
+ * callback functions table for driver
+ *
+ * These functions are called from platform for driver.
+ * Callback function's pointer will be set before
+ * renesas_usbhs_platform_callback :: hardware_init was called
+ */
+struct renesas_usbhs_driver_callback {
+	int (*notify_hotplug)(struct platform_device *pdev);
+};
+
+/*
+ * callback functions for platform
+ *
+ * These functions are called from driver for platform
+ */
+struct renesas_usbhs_platform_callback {
+
+	/*
+	 * option:
+	 *
+	 * Hardware init function for platform.
+	 * it is called when driver was probed.
+	 */
+	int (*hardware_init)(struct platform_device *pdev);
+
+	/*
+	 * option:
+	 *
+	 * Hardware exit function for platform.
+	 * it is called when driver was removed
+	 */
+	void (*hardware_exit)(struct platform_device *pdev);
+
+	/*
+	 * option:
+	 *
+	 * Phy reset for platform
+	 */
+	void (*phy_reset)(struct platform_device *pdev);
+
+	/*
+	 * get USB ID function
+	 *  - USBHS_HOST
+	 *  - USBHS_GADGET
+	 */
+	int (*get_id)(struct platform_device *pdev);
+
+	/*
+	 * get VBUS status function.
+	 */
+	int (*get_vbus)(struct platform_device *pdev);
+};
+
+/*
+ * parameters for renesas usbhs
+ *
+ * some register needs USB chip specific parameters.
+ * This struct show it to driver
+ */
+struct renesas_usbhs_driver_param {
+	/*
+	 * pipe settings
+	 */
+	u32 *pipe_type; /* array of USB_ENDPOINT_XFER_xxx (from ep0) */
+	int pipe_size; /* pipe_type array size */
+
+	/*
+	 * option:
+	 *
+	 * for BUSWAIT :: BWAIT
+	 * */
+	int buswait_bwait;
+};
+
+/*
+ * option:
+ *
+ * platform information for renesas_usbhs driver.
+ */
+struct renesas_usbhs_platform_info {
+	/*
+	 * option:
+	 *
+	 * platform set these functions before
+	 * call platform_add_devices if needed
+	 */
+	struct renesas_usbhs_platform_callback	platform_callback;
+
+	/*
+	 * driver set these callback functions pointer.
+	 * platform can use it on callback functions
+	 */
+	struct renesas_usbhs_driver_callback	driver_callback;
+
+	/*
+	 * option:
+	 *
+	 * driver use these param for some register
+	 */
+	struct renesas_usbhs_driver_param	driver_param;
+};
+
+/*
+ * macro for platform
+ */
+#define renesas_usbhs_get_info(pdev)\
+	((struct renesas_usbhs_platform_info *)(pdev)->dev.platform_data)
+
+#define renesas_usbhs_call_notify_hotplug(pdev)				\
+	({								\
+		struct renesas_usbhs_driver_callback *dc;		\
+		dc = &(renesas_usbhs_get_info(pdev)->driver_callback);	\
+		if (dc)							\
+			dc->notify_hotplug(pdev);			\
+	})
+#endif /* RENESAS_USB_H */
-- 
cgit v1.2.3


From 3ab810f19d71f4083be44b41770bcd784ff82e51 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 1 Apr 2011 11:24:30 -0700
Subject: usb gadget: fix all Section mismatch warnings

Fix 41 occurrences of this type of Section mismatch warning
in g_mass_storage, g_serial, g_cdc, g_multi, g_nokia, g_ether, g_ffs:
(the 75 number reported earlier contained some duplicates.)

WARNING: drivers/usb/gadget/g_mass_storage.o(.text+0x687a): Section mismatch in reference from the function fsg_bind() to the function .devinit.text:usb_ep_autoconfig()
The function fsg_bind() references
the function __devinit usb_ep_autoconfig().
This is often because fsg_bind lacks a __devinit
annotation or the annotation of usb_ep_autoconfig is wrong.

Also remove __devinit from usb_ep_autoconfig_reset() to prevent
possible section mismatch problems with it.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/gadget.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index e538172c0f64..dd1571db55e7 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -890,8 +890,8 @@ static inline void usb_free_descriptors(struct usb_descriptor_header **v)
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-			struct usb_endpoint_descriptor *) __devinit;
+			struct usb_endpoint_descriptor *);
 
-extern void usb_ep_autoconfig_reset(struct usb_gadget *) __devinit;
+extern void usb_ep_autoconfig_reset(struct usb_gadget *);
 
 #endif /* __LINUX_USB_GADGET_H */
-- 
cgit v1.2.3


From c326de88b8ac7ed1cd1027017ba6079dbe91be49 Mon Sep 17 00:00:00 2001
From: "Mathieu J. Poirier" <mathieu.poirier@linaro.org>
Date: Wed, 13 Apr 2011 17:13:00 -0700
Subject: net: allow shifted access in smsc911x V2

This is a revised patch that permits a shifted access to the
LAN9221 registers.  More specifically:

 It adds a shift parameter in the platform_data.
 It introduces an ops in smsc911x_data.
 A choice of access function to use at run-time.
 Four new shifted access function.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 156 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/smsc911x.h |   1 +
 2 files changed, 150 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index b8faab7780da..c6d47d10590c 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -71,6 +71,17 @@ static int debug = 3;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
+struct smsc911x_data;
+
+struct smsc911x_ops {
+	u32 (*reg_read)(struct smsc911x_data *pdata, u32 reg);
+	void (*reg_write)(struct smsc911x_data *pdata, u32 reg, u32 val);
+	void (*rx_readfifo)(struct smsc911x_data *pdata,
+				unsigned int *buf, unsigned int wordcount);
+	void (*tx_writefifo)(struct smsc911x_data *pdata,
+				unsigned int *buf, unsigned int wordcount);
+};
+
 struct smsc911x_data {
 	void __iomem *ioaddr;
 
@@ -118,8 +129,14 @@ struct smsc911x_data {
 	unsigned int clear_bits_mask;
 	unsigned int hashhi;
 	unsigned int hashlo;
+
+	/* register access functions */
+	const struct smsc911x_ops *ops;
 };
 
+/* Easy access to information */
+#define __smsc_shift(pdata, reg) ((reg) << ((pdata)->config.shift))
+
 static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
 	if (pdata->config.flags & SMSC911X_USE_32BIT)
@@ -133,13 +150,29 @@ static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 	return 0;
 }
 
+static inline u32
+__smsc911x_reg_read_shift(struct smsc911x_data *pdata, u32 reg)
+{
+	if (pdata->config.flags & SMSC911X_USE_32BIT)
+		return readl(pdata->ioaddr + __smsc_shift(pdata, reg));
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT)
+		return (readw(pdata->ioaddr +
+				__smsc_shift(pdata, reg)) & 0xFFFF) |
+			((readw(pdata->ioaddr +
+			__smsc_shift(pdata, reg + 2)) & 0xFFFF) << 16);
+
+	BUG();
+	return 0;
+}
+
 static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
 	u32 data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pdata->dev_lock, flags);
-	data = __smsc911x_reg_read(pdata, reg);
+	data = pdata->ops->reg_read(pdata, reg);
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 
 	return data;
@@ -162,13 +195,32 @@ static inline void __smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 	BUG();
 }
 
+static inline void
+__smsc911x_reg_write_shift(struct smsc911x_data *pdata, u32 reg, u32 val)
+{
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writel(val, pdata->ioaddr + __smsc_shift(pdata, reg));
+		return;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		writew(val & 0xFFFF,
+			pdata->ioaddr + __smsc_shift(pdata, reg));
+		writew((val >> 16) & 0xFFFF,
+			pdata->ioaddr + __smsc_shift(pdata, reg + 2));
+		return;
+	}
+
+	BUG();
+}
+
 static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 				      u32 val)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&pdata->dev_lock, flags);
-	__smsc911x_reg_write(pdata, reg, val);
+	pdata->ops->reg_write(pdata, reg, val);
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
@@ -204,6 +256,40 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/* Writes a packet to the TX_DATA_FIFO - shifted version */
+static inline void
+smsc911x_tx_writefifo_shift(struct smsc911x_data *pdata, unsigned int *buf,
+		      unsigned int wordcount)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			__smsc911x_reg_write_shift(pdata, TX_DATA_FIFO,
+					     swab32(*buf++));
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writesl(pdata->ioaddr + __smsc_shift(pdata,
+						TX_DATA_FIFO), buf, wordcount);
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			__smsc911x_reg_write_shift(pdata,
+						 TX_DATA_FIFO, *buf++);
+		goto out;
+	}
+
+	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
 /* Reads a packet out of the RX_DATA_FIFO */
 static inline void
 smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
@@ -236,6 +322,40 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/* Reads a packet out of the RX_DATA_FIFO - shifted version */
+static inline void
+smsc911x_rx_readfifo_shift(struct smsc911x_data *pdata, unsigned int *buf,
+		     unsigned int wordcount)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			*buf++ = swab32(__smsc911x_reg_read_shift(pdata,
+							    RX_DATA_FIFO));
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		readsl(pdata->ioaddr + __smsc_shift(pdata,
+						RX_DATA_FIFO), buf, wordcount);
+		goto out;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			*buf++ = __smsc911x_reg_read_shift(pdata,
+								RX_DATA_FIFO);
+		goto out;
+	}
+
+	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
 static int smsc911x_mac_complete(struct smsc911x_data *pdata)
@@ -500,7 +620,7 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
 		wrsz += (u32)((ulong)pdata->loopback_tx_pkt & 0x3);
 		wrsz >>= 2;
 
-		smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+		pdata->ops->tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
 
 		/* Wait till transmit is done */
 		i = 60;
@@ -544,7 +664,7 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
 		rdsz += (u32)((ulong)pdata->loopback_rx_pkt & 0x3);
 		rdsz >>= 2;
 
-		smsc911x_rx_readfifo(pdata, (unsigned int *)bufp, rdsz);
+		pdata->ops->rx_readfifo(pdata, (unsigned int *)bufp, rdsz);
 
 		if (pktlength != (MIN_PACKET_SIZE + 4)) {
 			SMSC_WARN(pdata, hw, "Unexpected packet size "
@@ -1046,8 +1166,8 @@ static int smsc911x_poll(struct napi_struct *napi, int budget)
 		/* Align IP on 16B boundary */
 		skb_reserve(skb, NET_IP_ALIGN);
 		skb_put(skb, pktlength - 4);
-		smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head,
-				     pktwords);
+		pdata->ops->rx_readfifo(pdata,
+				 (unsigned int *)skb->head, pktwords);
 		skb->protocol = eth_type_trans(skb, dev);
 		skb_checksum_none_assert(skb);
 		netif_receive_skb(skb);
@@ -1351,7 +1471,7 @@ static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	wrsz += (u32)((ulong)skb->data & 0x3);
 	wrsz >>= 2;
 
-	smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+	pdata->ops->tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
 	freespace -= (skb->len + 32);
 	dev_kfree_skb(skb);
 
@@ -1957,6 +2077,22 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/* standard register acces */
+static const struct smsc911x_ops standard_smsc911x_ops = {
+	.reg_read = __smsc911x_reg_read,
+	.reg_write = __smsc911x_reg_write,
+	.rx_readfifo = smsc911x_rx_readfifo,
+	.tx_writefifo = smsc911x_tx_writefifo,
+};
+
+/* shifted register access */
+static const struct smsc911x_ops shifted_smsc911x_ops = {
+	.reg_read = __smsc911x_reg_read_shift,
+	.reg_write = __smsc911x_reg_write_shift,
+	.rx_readfifo = smsc911x_rx_readfifo_shift,
+	.tx_writefifo = smsc911x_tx_writefifo_shift,
+};
+
 static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
@@ -2026,6 +2162,12 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 		goto out_free_netdev_2;
 	}
 
+	/* assume standard, non-shifted, access to HW registers */
+	pdata->ops = &standard_smsc911x_ops;
+	/* apply the right access if shifting is needed */
+	if (config->shift)
+		pdata->ops = &shifted_smsc911x_ops;
+
 	retval = smsc911x_init(dev);
 	if (retval < 0)
 		goto out_unmap_io_3;
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 7144e8aa1e41..4dde70e74822 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -29,6 +29,7 @@ struct smsc911x_platform_config {
 	unsigned int irq_polarity;
 	unsigned int irq_type;
 	unsigned int flags;
+	unsigned int shift;
 	phy_interface_t phy_interface;
 	unsigned char mac[6];
 };
-- 
cgit v1.2.3


From 184748cc50b2dceb8287f9fb657eda48ff8fcfe7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:39 +0200
Subject: sched: Provide scheduler_ipi() callback in response to
 smp_send_reschedule()

For future rework of try_to_wake_up() we'd like to push part of that
function onto the CPU the task is actually going to run on.

In order to do so we need a generic callback from the existing scheduler IPI.

This patch introduces such a generic callback: scheduler_ipi() and
implements it as a NOP.

BenH notes: PowerPC might use this IPI on offline CPUs under rare conditions!

Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.744338123@chello.nl
---
 arch/alpha/kernel/smp.c             |  3 +--
 arch/arm/kernel/smp.c               |  5 +----
 arch/blackfin/mach-common/smp.c     |  3 +++
 arch/cris/arch-v32/kernel/smp.c     | 13 ++++++++-----
 arch/ia64/kernel/irq_ia64.c         |  2 ++
 arch/ia64/xen/irq_xen.c             | 10 +++++++++-
 arch/m32r/kernel/smp.c              |  4 +---
 arch/mips/cavium-octeon/smp.c       |  2 ++
 arch/mips/kernel/smtc.c             |  2 +-
 arch/mips/mti-malta/malta-int.c     |  2 ++
 arch/mips/pmc-sierra/yosemite/smp.c |  4 ++++
 arch/mips/sgi-ip27/ip27-irq.c       |  2 ++
 arch/mips/sibyte/bcm1480/smp.c      |  7 +++----
 arch/mips/sibyte/sb1250/smp.c       |  7 +++----
 arch/mn10300/kernel/smp.c           |  5 +----
 arch/parisc/kernel/smp.c            |  5 +----
 arch/powerpc/kernel/smp.c           |  4 ++--
 arch/s390/kernel/smp.c              |  6 +++---
 arch/sh/kernel/smp.c                |  2 ++
 arch/sparc/kernel/smp_32.c          |  4 +++-
 arch/sparc/kernel/smp_64.c          |  1 +
 arch/tile/kernel/smp.c              |  6 +-----
 arch/um/kernel/smp.c                |  2 +-
 arch/x86/kernel/smp.c               |  5 ++---
 arch/x86/xen/smp.c                  |  5 ++---
 include/linux/sched.h               |  2 ++
 26 files changed, 63 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 42aa078a5e4d..5a621c6d22ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
 		switch (which) {
 		case IPI_RESCHEDULE:
-			/* Reschedule callback.  Everything to be done
-			   is done by the interrupt return path.  */
+			scheduler_ipi();
 			break;
 
 		case IPI_CALL_FUNC:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e4..7a561eb731ea 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_RESCHEDULE:
-		/*
-		 * nothing more to do - eveything is
-		 * done on the interrupt return path
-		 */
+		scheduler_ipi();
 		break;
 
 	case IPI_CALL_FUNC:
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 6e17a265c4d3..326bb86f4d29 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -164,6 +164,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
 		case BFIN_IPI_CALL_FUNC:
 			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 4c9e3e1ba5d1..66cc75657e2f 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
 
+	if (ipi.vector & IPI_SCHEDULE) {
+		scheduler_ipi();
+	}
 	if (ipi.vector & IPI_CALL) {
-	         func(info);
+		func(info);
 	}
 	if (ipi.vector & IPI_FLUSH_TLB) {
-		     if (flush_mm == FLUSH_ALL)
-			 __flush_tlb_all();
-		     else if (flush_vma == FLUSH_ALL)
+		if (flush_mm == FLUSH_ALL)
+			__flush_tlb_all();
+		else if (flush_vma == FLUSH_ALL)
 			__flush_tlb_mm(flush_mm);
-		     else
+		else
 			__flush_tlb_page(flush_vma, flush_addr);
 	}
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5b704740f160..782c3a357f24 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 #include <linux/irq.h>
 #include <linux/ratelimit.h>
 #include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			smp_local_flush_tlb();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index 108bb858acf2..b279e142c633 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 static int xen_slab_ready;
 
 #ifdef CONFIG_SMP
+#include <linux/sched.h>
+
 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
  * it ends up to issue several memory accesses upon percpu data and
  * thus adds unnecessary traffic to other paths.
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 static irqreturn_t
 xen_dummy_handler(int irq, void *dev_id)
 {
+	return IRQ_HANDLED;
+}
 
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 };
 
 static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_dummy_handler,
+	.handler =	xen_resched_handler,
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 31cef20b2996..fc10b39893d4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
  *
  * Description:  This routine executes on CPU which received
  *               'RESCHEDULE_IPI'.
- *               Rescheduling is processed at the exit of interrupt
- *               operation.
  *
  * Born on Date: 2002.02.05
  *
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
  *==========================================================================*/
 void smp_reschedule_interrupt(void)
 {
-	/* nothing to do */
+	scheduler_ipi();
 }
 
 /*==========================================================================*
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d0..76923eeb58b9 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	/* Check if we've been told to flush the icache */
 	if (action & SMP_ICACHE_FLUSH)
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5a88cc4ccd5a..cedac4633741 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
 static void ipi_resched_interrupt(void)
 {
-	/* Return from interrupt should be enough to cause scheduler check */
+	scheduler_ipi();
 }
 
 static void ipi_call_interrupt(void)
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ead..7d93e6fbfa5a 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -309,6 +309,8 @@ static void ipi_call_dispatch(void)
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+	scheduler_ipi();
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index efc9e889b349..2608752898c0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 
 	case 1:
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 	}
 }
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 0a04603d577c..b18b04e48577 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 #ifdef CONFIG_SMP
 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 47b347c992ea..d667875be564 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c00a5cb1128d..38e7f6bd7922 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 226c826a2194..83fb27912231 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
  * @irq: The interrupt number.
  * @dev_id: The device ID.
  *
- * We need do nothing here, since the scheduling will be effected on our way
- * back through entry.S.
- *
  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
  */
 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
 {
-	/* do nothing */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 69d63d354ef0..828305f19cff 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 				
 			case IPI_RESCHEDULE:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
-				/*
-				 * Reschedule callback.  Everything to be
-				 * done is done by the interrupt return path.
-				 */
+				scheduler_ipi();
 				break;
 
 			case IPI_CALL_FUNC:
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..9f9c204bef69 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -116,7 +116,7 @@ void smp_message_recv(int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
+		scheduler_ipi();
 		break;
 	case PPC_MSG_CALL_FUNC_SINGLE:
 		generic_smp_call_function_single_interrupt();
@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
 static irqreturn_t reschedule_action(int irq, void *data)
 {
-	/* we just need the return path side effect of checking need_resched */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63a97db83f96..63c7d9ff220d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
-	 *
-	 * For the ec_schedule signal we have to do nothing. All the work
-	 * is done automatically when we return from the interrupt.
 	 */
 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
 
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+
 	if (test_bit(ec_call_function, &bits))
 		generic_smp_call_function_interrupt();
 
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 509b36b45115..6207561ea34a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case SMP_MSG_RESCHEDULE:
+		scheduler_ipi();
 		break;
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..f95690c167b6 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -125,7 +125,9 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
 void smp_send_reschedule(int cpu)
 {
-	/* See sparc64 */
+	/*
+	 * XXX missing reschedule IPI, see scheduler_ipi()
+	 */
 }
 
 void smp_send_stop(void)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 3e94a8c23238..9478da7fdb3e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	scheduler_ipi();
 }
 
 /* This is a nop because we capture all other cpus
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a4293102ef81..c52224d5ed45 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
-	/*
-	 * Nothing to do here; when we return from interrupt, the
-	 * rescheduling will occur there. But do bump the interrupt
-	 * profiler count in the meantime.
-	 */
 	__get_cpu_var(irq_stat).irq_resched_count++;
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 106bf27e2a9a..eefb107d2d73 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'R':
-			set_tsk_need_resched(current);
+			scheduler_ipi();
 			break;
 
 		case 'S':
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228d..013e7eba83bb 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 }
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..762b46ab14d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..758e27afcda5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2189,8 +2189,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+static inline void scheduler_ipi(void) { }
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void scheduler_ipi(void) { }
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
-- 
cgit v1.2.3


From 3ca7a440da394808571dad32d33d3bc0389982e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:40 +0200
Subject: sched: Always provide p->on_cpu

Always provide p->on_cpu so that we can determine if its on a cpu
without having to lock the rq.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152728.785452014@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 +---
 kernel/sched.c        | 46 +++++++++++++++++++++++++++++-----------------
 2 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 758e27afcda5..3435837e89ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1200,9 +1200,7 @@ struct task_struct {
 	int lock_depth;		/* BKL lock depth */
 
 #ifdef CONFIG_SMP
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
-	int oncpu;
-#endif
+	int on_cpu;
 #endif
 
 	int prio, static_prio, normal_prio;
diff --git a/kernel/sched.c b/kernel/sched.c
index a187c3fe027b..cd2593e1a3ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -838,18 +838,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
 	return rq->curr == p;
 }
 
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(struct rq *rq, struct task_struct *p)
 {
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
 	return task_current(rq, p);
+#endif
 }
 
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
 }
 
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
@@ -865,15 +886,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
-	return p->oncpu;
-#else
-	return task_current(rq, p);
-#endif
-}
-
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
 #ifdef CONFIG_SMP
@@ -882,7 +894,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 	 * SMP rebalancing from interrupt is the only thing that cares
 	 * here.
 	 */
-	next->oncpu = 1;
+	next->on_cpu = 1;
 #endif
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	raw_spin_unlock_irq(&rq->lock);
@@ -895,12 +907,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
 	/*
-	 * After ->oncpu is cleared, the task can be moved to a different CPU.
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
 	 * We must ensure this doesn't happen until the switch is completely
 	 * finished.
 	 */
 	smp_wmb();
-	prev->oncpu = 0;
+	prev->on_cpu = 0;
 #endif
 #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	local_irq_enable();
@@ -2686,8 +2698,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	p->oncpu = 0;
+#if defined(CONFIG_SMP)
+	p->on_cpu = 0;
 #endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
@@ -5776,8 +5788,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	rcu_read_unlock();
 
 	rq->curr = rq->idle = idle;
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	idle->oncpu = 1;
+#if defined(CONFIG_SMP)
+	idle->on_cpu = 1;
 #endif
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
-- 
cgit v1.2.3


From c6eb3dda25892f1f974f5420f63e6721aab02f6f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:41 +0200
Subject: mutex: Use p->on_cpu for the adaptive spin

Since we now have p->on_cpu unconditionally available, use it to
re-implement mutex_spin_on_owner.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.826338173@chello.nl
---
 include/linux/mutex.h |  2 +-
 include/linux/sched.h |  2 +-
 kernel/mutex-debug.c  |  2 +-
 kernel/mutex-debug.h  |  2 +-
 kernel/mutex.c        |  2 +-
 kernel/mutex.h        |  2 +-
 kernel/sched.c        | 83 ++++++++++++++++++++-------------------------------
 7 files changed, 39 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 94b48bd40dd7..c75471db576e 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -51,7 +51,7 @@ struct mutex {
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
-	struct thread_info	*owner;
+	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3435837e89ff..173850479e2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
-extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
+extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 
 struct nsproxy;
 struct user_namespace;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index ec815a960b5d..73da83aff418 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
 		return;
 
 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(lock->owner != current);
 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	mutex_clear_owner(lock);
 }
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 57d527a16f9d..0799fd3e4cfa 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index c4195fa98900..fe4706cb0c5b 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 */
 
 	for (;;) {
-		struct thread_info *owner;
+		struct task_struct *owner;
 
 		/*
 		 * If we own the BKL, then don't spin. The owner of
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 67578ca48f94..4115fbf83b12 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -19,7 +19,7 @@
 #ifdef CONFIG_SMP
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/sched.c b/kernel/sched.c
index cd2593e1a3ec..55cc50323ce1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4173,70 +4173,53 @@ need_resched:
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
- */
-int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
-{
-	unsigned int cpu;
-	struct rq *rq;
 
-	if (!sched_feat(OWNER_SPIN))
-		return 0;
+static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
+{
+	bool ret = false;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/*
-	 * Need to access the cpu field knowing that
-	 * DEBUG_PAGEALLOC could have unmapped it if
-	 * the mutex owner just released it and exited.
-	 */
-	if (probe_kernel_address(&owner->cpu, cpu))
-		return 0;
-#else
-	cpu = owner->cpu;
-#endif
+	rcu_read_lock();
+	if (lock->owner != owner)
+		goto fail;
 
 	/*
-	 * Even if the access succeeded (likely case),
-	 * the cpu field may no longer be valid.
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * lock->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
 	 */
-	if (cpu >= nr_cpumask_bits)
-		return 0;
+	barrier();
 
-	/*
-	 * We need to validate that we can do a
-	 * get_cpu() and that we have the percpu area.
-	 */
-	if (!cpu_online(cpu))
-		return 0;
+	ret = owner->on_cpu;
+fail:
+	rcu_read_unlock();
 
-	rq = cpu_rq(cpu);
+	return ret;
+}
 
-	for (;;) {
-		/*
-		 * Owner changed, break to re-assess state.
-		 */
-		if (lock->owner != owner) {
-			/*
-			 * If the lock has switched to a different owner,
-			 * we likely have heavy contention. Return 0 to quit
-			 * optimistic spinning and not contend further:
-			 */
-			if (lock->owner)
-				return 0;
-			break;
-		}
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+{
+	if (!sched_feat(OWNER_SPIN))
+		return 0;
 
-		/*
-		 * Is that owner really running on that cpu?
-		 */
-		if (task_thread_info(rq->curr) != owner || need_resched())
+	while (owner_running(lock, owner)) {
+		if (need_resched())
 			return 0;
 
 		arch_mutex_cpu_relax();
 	}
 
+	/*
+	 * If the owner changed to another task there is likely
+	 * heavy contention, stop spinning.
+	 */
+	if (lock->owner)
+		return 0;
+
 	return 1;
 }
 #endif
-- 
cgit v1.2.3


From fd2f4419b4cbe8fe90796df9617c355762afd6a4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:44 +0200
Subject: sched: Provide p->on_rq

Provide a generic p->on_rq because the p->se.on_rq semantics are
unfavourable for lockless wakeups but needed for sched_fair.

In particular, p->on_rq is only cleared when we actually dequeue the
task in schedule() and not on any random dequeue as done by things
like __migrate_task() and __sched_setscheduler().

This also allows us to remove p->se usage from !sched_fair code.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.949545047@chello.nl
---
 include/linux/sched.h   |  1 +
 kernel/sched.c          | 38 ++++++++++++++++++++------------------
 kernel/sched_debug.c    |  2 +-
 kernel/sched_rt.c       | 16 ++++++++--------
 kernel/sched_stoptask.c |  2 +-
 5 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 173850479e2c..b33a700652dc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,6 +1202,7 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	int on_cpu;
 #endif
+	int on_rq;
 
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4481638f9178..dece28e505c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1785,7 +1785,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, flags);
-	p->se.on_rq = 1;
 }
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1793,7 +1792,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, flags);
-	p->se.on_rq = 0;
 }
 
 /*
@@ -2128,7 +2126,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
+	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
 		rq->skip_clock_update = 1;
 }
 
@@ -2203,7 +2201,7 @@ static bool migrate_task(struct task_struct *p, struct rq *rq)
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	return p->se.on_rq || task_running(rq, p);
+	return p->on_rq || task_running(rq, p);
 }
 
 /*
@@ -2263,7 +2261,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		rq = task_rq_lock(p, &flags);
 		trace_sched_wait_task(p);
 		running = task_running(rq, p);
-		on_rq = p->se.on_rq;
+		on_rq = p->on_rq;
 		ncsw = 0;
 		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -2444,6 +2442,7 @@ ttwu_stat(struct rq *rq, struct task_struct *p, int cpu, int wake_flags)
 static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
 {
 	activate_task(rq, p, en_flags);
+	p->on_rq = 1;
 
 	/* if a worker is waking up, notify workqueue */
 	if (p->flags & PF_WQ_WORKER)
@@ -2506,7 +2505,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 	cpu = task_cpu(p);
 
-	if (p->se.on_rq)
+	if (p->on_rq)
 		goto out_running;
 
 	orig_cpu = cpu;
@@ -2583,7 +2582,7 @@ static void try_to_wake_up_local(struct task_struct *p)
 	if (!(p->state & TASK_NORMAL))
 		return;
 
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_post_activation(p, rq, 0);
@@ -2620,19 +2619,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
  */
 static void __sched_fork(struct task_struct *p)
 {
+	p->on_rq			= 0;
+
+	p->se.on_rq			= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);
-	p->se.on_rq = 0;
-	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2750,6 +2751,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 
 	rq = task_rq_lock(p, &flags);
 	activate_task(rq, p, 0);
+	p->on_rq = 1;
 	trace_sched_wakeup_new(p, true);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
@@ -4051,7 +4053,7 @@ static inline void schedule_debug(struct task_struct *prev)
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	if (prev->se.on_rq)
+	if (prev->on_rq)
 		update_rq_clock(rq);
 	prev->sched_class->put_prev_task(rq, prev);
 }
@@ -4126,7 +4128,9 @@ need_resched:
 				if (to_wakeup)
 					try_to_wake_up_local(to_wakeup);
 			}
+
 			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+			prev->on_rq = 0;
 
 			/*
 			 * If we are going to sleep and we have plugged IO queued, make
@@ -4695,7 +4699,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		dequeue_task(rq, p, 0);
@@ -4743,7 +4747,7 @@ void set_user_nice(struct task_struct *p, long nice)
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		dequeue_task(rq, p, 0);
 
@@ -4877,8 +4881,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 {
-	BUG_ON(p->se.on_rq);
-
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
@@ -5044,7 +5046,7 @@ recheck:
 		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		deactivate_task(rq, p, 0);
@@ -5965,7 +5967,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	 * If we're not on a rq, the next wake-up will ensure we're
 	 * placed properly.
 	 */
-	if (p->se.on_rq) {
+	if (p->on_rq) {
 		deactivate_task(rq_src, p, 0);
 		set_task_cpu(p, dest_cpu);
 		activate_task(rq_dest, p, 0);
@@ -8339,7 +8341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
 	int old_prio = p->prio;
 	int on_rq;
 
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		deactivate_task(rq, p, 0);
 	__setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8682,7 +8684,7 @@ void sched_move_task(struct task_struct *tsk)
 	rq = task_rq_lock(tsk, &flags);
 
 	running = task_current(rq, tsk);
-	on_rq = tsk->se.on_rq;
+	on_rq = tsk->on_rq;
 
 	if (on_rq)
 		dequeue_task(rq, tsk, 0);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 7bacd83a4158..3669bec6e130 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, p) {
-		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
+		if (!p->on_rq || task_cpu(p) != rq_cpu)
 			continue;
 
 		print_task(m, rq, p);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc65f82..9ca4f5f879c4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1136,7 +1136,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
+	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1287,7 +1287,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 				     !cpumask_test_cpu(lowest_rq->cpu,
 						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
-				     !task->se.on_rq)) {
+				     !task->on_rq)) {
 
 				raw_spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
@@ -1321,7 +1321,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 	BUG_ON(task_current(rq, p));
 	BUG_ON(p->rt.nr_cpus_allowed <= 1);
 
-	BUG_ON(!p->se.on_rq);
+	BUG_ON(!p->on_rq);
 	BUG_ON(!rt_task(p));
 
 	return p;
@@ -1467,7 +1467,7 @@ static int pull_rt_task(struct rq *this_rq)
 		 */
 		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
 			WARN_ON(p == src_rq->curr);
-			WARN_ON(!p->se.on_rq);
+			WARN_ON(!p->on_rq);
 
 			/*
 			 * There's a chance that p is higher in priority
@@ -1538,7 +1538,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 	 * Update the migration status of the RQ if we have an RT task
 	 * which is running AND changing its weight value.
 	 */
-	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
+	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
 		struct rq *rq = task_rq(p);
 
 		if (!task_current(rq, p)) {
@@ -1608,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 */
-	if (p->se.on_rq && !rq->rt.rt_nr_running)
+	if (p->on_rq && !rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
 
@@ -1638,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	 * If that current running task is also an RT task
 	 * then see if we can move to another run queue.
 	 */
-	if (p->se.on_rq && rq->curr != p) {
+	if (p->on_rq && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (rq->rt.overloaded && push_rt_task(rq) &&
 		    /* Don't resched if we changed runqueues */
@@ -1657,7 +1657,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 static void
 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 {
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		return;
 
 	if (rq->curr == p) {
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 1ba2bd40fdac..f607de42e6fc 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -26,7 +26,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->se.on_rq)
+	if (stop && stop->on_rq)
 		return stop;
 
 	return NULL;
-- 
cgit v1.2.3


From 7608dec2ce2004c234339bef8c8074e5e601d0e9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:46 +0200
Subject: sched: Drop the rq argument to sched_class::select_task_rq()

In preparation of calling select_task_rq() without rq->lock held, drop
the dependency on the rq argument.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152729.031077745@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 +--
 kernel/sched.c          | 20 +++++++++++---------
 kernel/sched_fair.c     |  2 +-
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       | 38 ++++++++++++++++++++++++++------------
 kernel/sched_stoptask.c |  3 +--
 6 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b33a700652dc..ff4e2f9c24a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1067,8 +1067,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
-			       int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
diff --git a/kernel/sched.c b/kernel/sched.c
index d398f2f0a3c9..d4b815d345b3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2195,13 +2195,15 @@ static int migration_cpu_stop(void *data);
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static bool migrate_task(struct task_struct *p, struct rq *rq)
+static bool need_migrate_task(struct task_struct *p)
 {
 	/*
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	return p->on_rq || task_running(rq, p);
+	bool running = p->on_rq || p->on_cpu;
+	smp_rmb(); /* finish_lock_switch() */
+	return running;
 }
 
 /*
@@ -2376,9 +2378,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 {
-	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -2533,7 +2535,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 		en_flags |= ENQUEUE_WAKING;
 	}
 
-	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
 	__task_rq_unlock(rq);
@@ -2744,7 +2746,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
 	 * without people poking at ->cpus_allowed.
 	 */
-	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+	cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
 	set_task_cpu(p, cpu);
 
 	p->state = TASK_RUNNING;
@@ -3474,7 +3476,7 @@ void sched_exec(void)
 	int dest_cpu;
 
 	rq = task_rq_lock(p, &flags);
-	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
@@ -3482,7 +3484,7 @@ void sched_exec(void)
 	 * select_task_rq() can race against ->cpus_allowed
 	 */
 	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-	    likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
+	    likely(cpu_active(dest_cpu)) && need_migrate_task(p)) {
 		struct migration_arg arg = { p, dest_cpu };
 
 		task_rq_unlock(rq, &flags);
@@ -5911,7 +5913,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 		goto out;
 
 	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-	if (migrate_task(p, rq)) {
+	if (need_migrate_task(p)) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
 		__task_rq_unlock(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4ee50f0af8d1..96b2c95ac356 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1657,7 +1657,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a776a6396427..0a51882534ea 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -7,7 +7,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9ca4f5f879c4..19ecb3127379 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
+	struct task_struct *curr;
+	struct rq *rq;
+	int cpu;
+
 	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
+	cpu = task_cpu(p);
+	rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+
 	/*
-	 * If the current task is an RT task, then
+	 * If the current task on @p's runqueue is an RT task, then
 	 * try to see if we can wake this RT task up on another
 	 * runqueue. Otherwise simply start this RT task
 	 * on its current runqueue.
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 	 * lock?
 	 *
 	 * For equal prio tasks, we just let the scheduler sort it out.
+	 *
+	 * Otherwise, just let it ride on the affined RQ and the
+	 * post-schedule router will push the preempted task away
+	 *
+	 * This test is optimistic, if we get it wrong the load-balancer
+	 * will have to sort it out.
 	 */
-	if (unlikely(rt_task(rq->curr)) &&
-	    (rq->curr->rt.nr_cpus_allowed < 2 ||
-	     rq->curr->prio < p->prio) &&
+	if (curr && unlikely(rt_task(curr)) &&
+	    (curr->rt.nr_cpus_allowed < 2 ||
+	     curr->prio < p->prio) &&
 	    (p->rt.nr_cpus_allowed > 1)) {
-		int cpu = find_lowest_rq(p);
+		int target = find_lowest_rq(p);
 
-		return (cpu == -1) ? task_cpu(p) : cpu;
+		if (target != -1)
+			cpu = target;
 	}
+	rcu_read_unlock();
 
-	/*
-	 * Otherwise, just let it ride on the affined RQ and the
-	 * post-schedule router will push the preempted task away
-	 */
-	return task_cpu(p);
+	return cpu;
 }
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index f607de42e6fc..6f437632afab 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -9,8 +9,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct rq *rq, struct task_struct *p,
-		    int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }
-- 
cgit v1.2.3


From 74f8e4b2335de45485b8d5b31a504747f13c8070 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:47 +0200
Subject: sched: Remove rq argument to sched_class::task_waking()

In preparation of calling this without rq->lock held, remove the
dependency on the rq argument.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110405152729.071474242@chello.nl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 +++++++---
 kernel/sched.c        |  2 +-
 kernel/sched_fair.c   |  4 +++-
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff4e2f9c24a7..7f5732f8c618 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1048,8 +1048,12 @@ struct sched_domain;
 #define WF_FORK		0x02		/* child wakeup after fork */
 
 #define ENQUEUE_WAKEUP		1
-#define ENQUEUE_WAKING		2
-#define ENQUEUE_HEAD		4
+#define ENQUEUE_HEAD		2
+#ifdef CONFIG_SMP
+#define ENQUEUE_WAKING		4	/* sched_class::task_waking was called */
+#else
+#define ENQUEUE_WAKING		0
+#endif
 
 #define DEQUEUE_SLEEP		1
 
@@ -1071,7 +1075,7 @@ struct sched_class {
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct task_struct *task);
 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
diff --git a/kernel/sched.c b/kernel/sched.c
index d4b815d345b3..46f42cac4eb1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2531,7 +2531,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	p->state = TASK_WAKING;
 
 	if (p->sched_class->task_waking) {
-		p->sched_class->task_waking(rq, p);
+		p->sched_class->task_waking(p);
 		en_flags |= ENQUEUE_WAKING;
 	}
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 96b2c95ac356..ad4c414f456d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1372,11 +1372,13 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 #ifdef CONFIG_SMP
 
-static void task_waking_fair(struct rq *rq, struct task_struct *p)
+static void task_waking_fair(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
+	lockdep_assert_held(&task_rq(p)->lock);
+
 	se->vruntime -= cfs_rq->min_vruntime;
 }
 
-- 
cgit v1.2.3


From a8e4f2eaecc9bfa4954adf79a04f4f22fddd829c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:49 +0200
Subject: sched: Delay task_contributes_to_load()

In prepratation of having to call task_contributes_to_load() without
holding rq->lock, we need to store the result until we do and can
update the rq accounting accordingly.

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152729.151523907@chello.nl
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 16 ++++------------
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7f5732f8c618..25c50317ddc1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1273,6 +1273,7 @@ struct task_struct {
 
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
+	unsigned sched_contributes_to_load:1;
 
 	pid_t pid;
 	pid_t tgid;
diff --git a/kernel/sched.c b/kernel/sched.c
index 7a5eb2620785..fd32b78c123c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2519,18 +2519,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
-	/*
-	 * In order to handle concurrent wakeups and release the rq->lock
-	 * we put the task in TASK_WAKING state.
-	 *
-	 * First fix up the nr_uninterruptible count:
-	 */
-	if (task_contributes_to_load(p)) {
-		if (likely(cpu_online(orig_cpu)))
-			rq->nr_uninterruptible--;
-		else
-			this_rq()->nr_uninterruptible--;
-	}
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
 
 	if (p->sched_class->task_waking) {
@@ -2555,6 +2544,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	WARN_ON(task_cpu(p) != cpu);
 	WARN_ON(p->state != TASK_WAKING);
 
+	if (p->sched_contributes_to_load)
+		rq->nr_uninterruptible--;
+
 out_activate:
 #endif /* CONFIG_SMP */
 	ttwu_activate(rq, p, en_flags);
-- 
cgit v1.2.3


From 317f394160e9beb97d19a84c39b7e5eb3d7815a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:58 +0200
Subject: sched: Move the second half of ttwu() to the remote cpu

Now that we've removed the rq->lock requirement from the first part of
ttwu() and can compute placement without holding any rq->lock, ensure
we execute the second half of ttwu() on the actual cpu we want the
task to run on.

This avoids having to take rq->lock and doing the task enqueue
remotely, saving lots on cacheline transfers.

As measured using: http://oss.oracle.com/~mason/sembench.c

  $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done
  $ echo 4096 32000 64 128 > /proc/sys/kernel/sem
  $ ./sembench -t 2048 -w 1900 -o 0

  unpatched: run time 30 seconds 647278 worker burns per second
  patched:   run time 30 seconds 816715 worker burns per second

Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152729.515897185@chello.nl
---
 include/linux/sched.h   |  3 ++-
 init/Kconfig            |  5 +++++
 kernel/sched.c          | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched_features.h |  6 ++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25c50317ddc1..e09dafa6e149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1203,6 +1203,7 @@ struct task_struct {
 	int lock_depth;		/* BKL lock depth */
 
 #ifdef CONFIG_SMP
+	struct task_struct *wake_entry;
 	int on_cpu;
 #endif
 	int on_rq;
@@ -2192,7 +2193,7 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
-static inline void scheduler_ipi(void) { }
+void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index 56240e724d9a..32745bfe059e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TTWU_QUEUE
+	bool
+	depends on !SPARC32
+	default y
+
 config MM_OWNER
 	bool
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 7d8b85fcdf06..9e3ede120e81 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -556,6 +556,10 @@ struct rq {
 	unsigned int ttwu_count;
 	unsigned int ttwu_local;
 #endif
+
+#ifdef CONFIG_SMP
+	struct task_struct *wake_list;
+#endif
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -2516,10 +2520,61 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+static void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+	if (!list)
+		return;
+
+	raw_spin_lock(&rq->lock);
+
+	while (list) {
+		struct task_struct *p = list;
+		list = list->wake_entry;
+		ttwu_do_activate(rq, p, 0);
+	}
+
+	raw_spin_unlock(&rq->lock);
+}
+
+void scheduler_ipi(void)
+{
+	sched_ttwu_pending();
+}
+
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	struct task_struct *next = rq->wake_list;
+
+	for (;;) {
+		struct task_struct *old = next;
+
+		p->wake_entry = next;
+		next = cmpxchg(&rq->wake_list, old, p);
+		if (next == old)
+			break;
+	}
+
+	if (!next)
+		smp_send_reschedule(cpu);
+}
+#endif
+
 static void ttwu_queue(struct task_struct *p, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+	if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+		ttwu_queue_remote(p, cpu);
+		return;
+	}
+#endif
+
 	raw_spin_lock(&rq->lock);
 	ttwu_do_activate(rq, p, 0);
 	raw_spin_unlock(&rq->lock);
@@ -6331,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DYING:
+		sched_ttwu_pending();
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 68e69acc29b9..be40f7371ee1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
  * Decrement CPU power based on irq activity
  */
 SCHED_FEAT(NONIRQ_POWER, 1)
+
+/*
+ * Queue remote wakeups on the target CPU and process them
+ * using the scheduler IPI. Reduces rq->lock contention/bounces.
+ */
+SCHED_FEAT(TTWU_QUEUE, 1)
-- 
cgit v1.2.3


From 38a2f37258f9e2ae3f6e4241e01088be8dfaf4e9 Mon Sep 17 00:00:00 2001
From: huajun li <huajun.li.lee@gmail.com>
Date: Wed, 13 Apr 2011 15:43:32 +0000
Subject: usbnet: Fix up 'FLAG_POINTTOPOINT' and 'FLAG_MULTI_PACKET' overlaps.

USB tethering does not work anymore since 2.6.39-rc2, but it's okay in
-rc1. The root cause is the new added mask code 'FLAG_POINTTOPOINT'
overlaps 'FLAG_MULTI_PACKET'  in  include/linux/usb/usbnet.h, this
causes logic issue in  rx_process(). This patch cleans up the overlap.

Reported-and-Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Signed-off-by:  Huajun Li <huajun.li.lee@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 3c7329b8ea0e..0e1855079fbb 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -103,8 +103,8 @@ struct driver_info {
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
  * Affects statistic (counters) and short packet handling.
  */
-#define FLAG_MULTI_PACKET	0x1000
-#define FLAG_RX_ASSEMBLE	0x2000	/* rx packets may span >1 frames */
+#define FLAG_MULTI_PACKET	0x2000
+#define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3


From feddbb34ebd75e9b6bf573b852079e327a88c07a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 28 Mar 2011 10:12:25 +0300
Subject: UBI: fix minor stylistic issues

Fix checkpatch.pl errors and warnings:

* space before tab
* line over 80 characters
* include linux/ioctl.h instead of asm/ioctl.h

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/cdev.c  |  4 ++--
 drivers/mtd/ubi/debug.c | 18 +++++++++---------
 drivers/mtd/ubi/io.c    |  4 ++--
 drivers/mtd/ubi/scan.c  |  2 +-
 drivers/mtd/ubi/ubi.h   |  4 ++--
 drivers/mtd/ubi/wl.c    |  3 ++-
 include/linux/mtd/ubi.h |  4 ++--
 include/mtd/ubi-user.h  |  6 +++---
 8 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 4119cb857c97..191f3bb3c41a 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -115,7 +115,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
 		mode = UBI_READONLY;
 
 	dbg_gen("open device %d, volume %d, mode %d",
-	        ubi_num, vol_id, mode);
+		ubi_num, vol_id, mode);
 
 	desc = ubi_open_volume(ubi_num, vol_id, mode);
 	if (IS_ERR(desc))
@@ -158,7 +158,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
 	loff_t new_offset;
 
 	if (vol->updating) {
-		 /* Update is in progress, seeking is prohibited */
+		/* Update is in progress, seeking is prohibited */
 		dbg_err("updating");
 		return -EBUSY;
 	}
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index d4d07e5f138f..0cd5beabe9c9 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -75,15 +75,15 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
 {
 	printk(KERN_DEBUG "Volume identifier header dump:\n");
 	printk(KERN_DEBUG "\tmagic     %08x\n", be32_to_cpu(vid_hdr->magic));
-	printk(KERN_DEBUG "\tversion   %d\n",   (int)vid_hdr->version);
-	printk(KERN_DEBUG "\tvol_type  %d\n",   (int)vid_hdr->vol_type);
-	printk(KERN_DEBUG "\tcopy_flag %d\n",   (int)vid_hdr->copy_flag);
-	printk(KERN_DEBUG "\tcompat    %d\n",   (int)vid_hdr->compat);
-	printk(KERN_DEBUG "\tvol_id    %d\n",   be32_to_cpu(vid_hdr->vol_id));
-	printk(KERN_DEBUG "\tlnum      %d\n",   be32_to_cpu(vid_hdr->lnum));
-	printk(KERN_DEBUG "\tdata_size %d\n",   be32_to_cpu(vid_hdr->data_size));
-	printk(KERN_DEBUG "\tused_ebs  %d\n",   be32_to_cpu(vid_hdr->used_ebs));
-	printk(KERN_DEBUG "\tdata_pad  %d\n",   be32_to_cpu(vid_hdr->data_pad));
+	printk(KERN_DEBUG "\tversion   %d\n",  (int)vid_hdr->version);
+	printk(KERN_DEBUG "\tvol_type  %d\n",  (int)vid_hdr->vol_type);
+	printk(KERN_DEBUG "\tcopy_flag %d\n",  (int)vid_hdr->copy_flag);
+	printk(KERN_DEBUG "\tcompat    %d\n",  (int)vid_hdr->compat);
+	printk(KERN_DEBUG "\tvol_id    %d\n",  be32_to_cpu(vid_hdr->vol_id));
+	printk(KERN_DEBUG "\tlnum      %d\n",  be32_to_cpu(vid_hdr->lnum));
+	printk(KERN_DEBUG "\tdata_size %d\n",  be32_to_cpu(vid_hdr->data_size));
+	printk(KERN_DEBUG "\tused_ebs  %d\n",  be32_to_cpu(vid_hdr->used_ebs));
+	printk(KERN_DEBUG "\tdata_pad  %d\n",  be32_to_cpu(vid_hdr->data_pad));
 	printk(KERN_DEBUG "\tsqnum     %llu\n",
 		(unsigned long long)be64_to_cpu(vid_hdr->sqnum));
 	printk(KERN_DEBUG "\thdr_crc   %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index e347cc4388ed..d58ceb1ca8fd 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -189,8 +189,8 @@ retry:
 		}
 
 		if (retries++ < UBI_IO_RETRIES) {
-			dbg_io("error %d%s while reading %d bytes from PEB %d:%d,"
-			       " read only %zd bytes, retry",
+			dbg_io("error %d%s while reading %d bytes from PEB "
+			       "%d:%d, read only %zd bytes, retry",
 			       err, errstr, len, pnum, offset, read);
 			yield();
 			goto retry;
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index d2d12ab7def4..2135a53732ff 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -1103,7 +1103,7 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si)
 		 * otherwise, only print a warning.
 		 */
 		if (si->corr_peb_count >= max_corr) {
-			ubi_err("too many corrupted PEBs, refusing this device");
+			ubi_err("too many corrupted PEBs, refusing");
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index f1be8b79663c..c6c22295898e 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -341,8 +341,8 @@ struct ubi_wl_entry;
  *      protected from the wear-leveling worker)
  * @pq_head: protection queue head
  * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
- * 	     @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
- * 	     @erroneous, and @erroneous_peb_count fields
+ *	     @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
+ *	     @erroneous, and @erroneous_peb_count fields
  * @move_mutex: serializes eraseblock moves
  * @work_sem: synchronizes the WL worker with use tasks
  * @wl_scheduled: non-zero if the wear-leveling was scheduled
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index b4cf57db2556..ff2c4956eeff 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1570,7 +1570,8 @@ void ubi_wl_close(struct ubi_device *ubi)
  * @ec: the erase counter to check
  *
  * This function returns zero if the erase counter of physical eraseblock @pnum
- * is equivalent to @ec, and a negative error code if not or if an error occurred.
+ * is equivalent to @ec, and a negative error code if not or if an error
+ * occurred.
  */
 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
 {
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 84854edf4436..15da0e99f48a 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -21,7 +21,7 @@
 #ifndef __LINUX_UBI_H__
 #define __LINUX_UBI_H__
 
-#include <asm/ioctl.h>
+#include <linux/ioctl.h>
 #include <linux/types.h>
 #include <mtd/ubi-user.h>
 
@@ -87,7 +87,7 @@ enum {
  * physical eraseblock size and on how much bytes UBI headers consume. But
  * because of the volume alignment (@alignment), the usable size of logical
  * eraseblocks if a volume may be less. The following equation is true:
- * 	@usable_leb_size = LEB size - (LEB size mod @alignment),
+ *	@usable_leb_size = LEB size - (LEB size mod @alignment),
  * where LEB size is the logical eraseblock size defined by the UBI device.
  *
  * The alignment is multiple to the minimal flash input/output unit size or %1
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index a3903423c005..3c4109777aff 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -406,9 +406,9 @@ struct ubi_map_req {
  * @value: value to set
  */
 struct ubi_set_vol_prop_req {
-       __u8  property;
-       __u8  padding[7];
-       __u64 value;
+	__u8  property;
+	__u8  padding[7];
+	__u64 value;
 }  __packed;
 
 #endif /* __UBI_USER_H__ */
-- 
cgit v1.2.3


From fce55922f5299a04c0a56b170a141fab34f13465 Mon Sep 17 00:00:00 2001
From: "Allan, Bruce W" <bruce.w.allan@intel.com>
Date: Wed, 13 Apr 2011 13:09:10 +0000
Subject: ethtool: allow custom interval for physical identification

When physical identification of an adapter is done by toggling the
mechanism on and off through software utilizing the set_phys_id operation,
it is done with a fixed duration for both on and off states.  Some drivers
may want to set a custom duration for the on/off intervals.  This patch
changes the API so the return code from the driver's entry point when it
is called with ETHTOOL_ID_ACTIVE can specify the frequency at which to
cycle the on/off states, and updates the drivers that have already been
converted to use the new set_phys_id and use the synchronous method for
identifying an adapter.

The physical identification frequency set in the updated drivers is based
on how it was done prior to the introduction of set_phys_id.

Compile tested only.  Also fixes a compiler warning in sfc.

v2: drivers do not return -EINVAL for ETHOOL_ID_ACTIVE
v3: fold patchset into single patch and cleanup per Ben's feedback

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Subbu Seetharaman <subbu.seetharaman@emulex.com>
Cc: Ajit Khaparde <ajit.khaparde@emulex.com>
Cc: Michael Chan <mchan@broadcom.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Divy Le Ray <divy@chelsio.com>
Cc: Don Fry <pcnet32@frontier.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
Cc: Steve Hodgson <shodgson@solarflare.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Carlson <mcarlson@broadcom.com>
Acked-by: Jon Mason <jdmason@kudzu.us>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_ethtool.c    |  2 +-
 drivers/net/bnx2.c                |  2 +-
 drivers/net/bnx2x/bnx2x_ethtool.c |  2 +-
 drivers/net/cxgb3/cxgb3_main.c    |  2 +-
 drivers/net/ewrk3.c               |  2 +-
 drivers/net/niu.c                 |  2 +-
 drivers/net/pcnet32.c             |  2 +-
 drivers/net/s2io.c                |  2 +-
 drivers/net/sfc/ethtool.c         |  6 +++---
 drivers/net/skge.c                |  2 +-
 drivers/net/sky2.c                |  2 +-
 drivers/net/tg3.c                 |  2 +-
 include/linux/ethtool.h           |  6 ++++--
 net/core/ethtool.c                | 31 ++++++++++++++++---------------
 14 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c
index 96f5502e0ef7..80226e4801f3 100644
--- a/drivers/net/benet/be_ethtool.c
+++ b/drivers/net/benet/be_ethtool.c
@@ -516,7 +516,7 @@ be_set_phys_id(struct net_device *netdev,
 	case ETHTOOL_ID_ACTIVE:
 		be_cmd_get_beacon_state(adapter, adapter->hba_port_num,
 					&adapter->beacon_state);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		be_cmd_set_beacon_state(adapter, adapter->hba_port_num, 0, 0,
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 0a52079bafef..bf729ee6acbd 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7473,7 +7473,7 @@ bnx2_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state)
 
 		bp->leds_save = REG_RD(bp, BNX2_MISC_CFG);
 		REG_WR(bp, BNX2_MISC_CFG, BNX2_MISC_CFG_LEDMODE_MAC);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		REG_WR(bp, BNX2_EMAC_LED, BNX2_EMAC_LED_OVERRIDE |
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index ad7d91e499f4..0a5e88d6ba2c 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -2025,7 +2025,7 @@ static int bnx2x_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		bnx2x_set_led(&bp->link_params, &bp->link_vars,
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 802c7a7c3b25..a087e0691dce 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1757,7 +1757,7 @@ static int set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_OFF:
 		t3_set_reg_field(adapter, A_T3DBG_GPIO_EN, F_GPIO0_OUT_VAL, 0);
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index c7ce4438e923..17b6027d8be8 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -1618,7 +1618,7 @@ static int ewrk3_set_phys_id(struct net_device *dev,
 		/* Prevent ISR from twiddling the LED */
 		lp->led_mask = 0;
 		spin_unlock_irq(&lp->hw_lock);
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 		cr = inb(EWRK3_CR);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 3fa1e9cdb4a8..ea2272f0f37e 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -7896,7 +7896,7 @@ static int niu_set_phys_id(struct net_device *dev,
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
 		np->orig_led_state = niu_led_state_save(np);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		niu_force_led(np, 1);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index e89afb929740..0a1efbae1bc0 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1038,7 +1038,7 @@ static int pcnet32_set_phys_id(struct net_device *dev,
 		for (i = 4; i < 8; i++)
 			lp->save_regs[i - 4] = a->read_bcr(ioaddr, i);
 		spin_unlock_irqrestore(&lp->lock, flags);
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 	case ETHTOOL_ID_OFF:
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 2d5cc6142c04..2302d9743744 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -5541,7 +5541,7 @@ static int s2io_ethtool_set_led(struct net_device *dev,
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
 		sp->adapt_ctrl_org = readq(&bar0->gpio_control);
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		s2io_set_led(sp, true);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 644f7c1d6e7b..5d8468fc5804 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -182,7 +182,7 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
 			       enum ethtool_phys_id_state state)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	enum efx_led_mode mode;
+	enum efx_led_mode mode = EFX_LED_DEFAULT;
 
 	switch (state) {
 	case ETHTOOL_ID_ON:
@@ -194,8 +194,8 @@ static int efx_ethtool_phys_id(struct net_device *net_dev,
 	case ETHTOOL_ID_INACTIVE:
 		mode = EFX_LED_DEFAULT;
 		break;
-	default:
-		return -EINVAL;
+	case ETHTOOL_ID_ACTIVE:
+		return 1;	/* cycle on/off once per second */
 	}
 
 	efx->type->set_id_led(efx, mode);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 310dcbce2519..176d784cbb54 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -753,7 +753,7 @@ static int skge_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 2;	/* cycle on/off twice per second */
 
 	case ETHTOOL_ID_ON:
 		skge_led(skge, LED_MODE_TST);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a4b8fe564eb0..c8d045114c66 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -3813,7 +3813,7 @@ static int sky2_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 	case ETHTOOL_ID_INACTIVE:
 		sky2_led(sky2, MO_LED_NORM);
 		break;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 10fa476fede3..9915734ac3e9 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10384,7 +10384,7 @@ static int tg3_set_phys_id(struct net_device *dev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		return -EINVAL;
+		return 1;	/* cycle on/off once per second */
 
 	case ETHTOOL_ID_ON:
 		tw32(MAC_LED_CTRL, LED_CTRL_LNKLED_OVERRIDE |
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ad22a68c2e5d..9de31274341d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -798,8 +798,10 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	attached to it.  The implementation may update the indicator
  *	asynchronously or synchronously, but in either case it must return
  *	quickly.  It is initially called with the argument %ETHTOOL_ID_ACTIVE,
- *	and must either activate asynchronous updates or return -%EINVAL.
- *	If it returns -%EINVAL then it will be called again at intervals with
+ *	and must either activate asynchronous updates and return zero, return
+ *	a negative error or return a positive frequency for synchronous
+ *	indication (e.g. 1 for one on/off cycle per second).  If it returns
+ *	a frequency then it will be called again at intervals with the
  *	argument %ETHTOOL_ID_ON or %ETHTOOL_ID_OFF and should set the state of
  *	the indicator accordingly.  Finally, it is called with the argument
  *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 41dee2de13ad..13d79f5a86e5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1669,7 +1669,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 		return dev->ethtool_ops->phys_id(dev, id.data);
 
 	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
-	if (rc && rc != -EINVAL)
+	if (rc < 0)
 		return rc;
 
 	/* Drop the RTNL lock while waiting, but prevent reentry or
@@ -1684,21 +1684,22 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 		schedule_timeout_interruptible(
 			id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
 	} else {
-		/* Driver expects to be called periodically */
+		/* Driver expects to be called at twice the frequency in rc */
+		int n = rc * 2, i, interval = HZ / n;
+
+		/* Count down seconds */
 		do {
-			rtnl_lock();
-			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON);
-			rtnl_unlock();
-			if (rc)
-				break;
-			schedule_timeout_interruptible(HZ / 2);
-
-			rtnl_lock();
-			rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF);
-			rtnl_unlock();
-			if (rc)
-				break;
-			schedule_timeout_interruptible(HZ / 2);
+			/* Count down iterations per second */
+			i = n;
+			do {
+				rtnl_lock();
+				rc = dev->ethtool_ops->set_phys_id(dev,
+				    (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
+				rtnl_unlock();
+				if (rc)
+					break;
+				schedule_timeout_interruptible(interval);
+			} while (!signal_pending(current) && --i != 0);
 		} while (!signal_pending(current) &&
 			 (id.data == 0 || --id.data != 0));
 	}
-- 
cgit v1.2.3


From 1791f881435fab951939ad700e947b66c062e083 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Wed, 30 Mar 2011 15:24:21 +0200
Subject: posix clocks: Replace mutex with reader/writer semaphore

A dynamic posix clock is protected from asynchronous removal by a mutex.
However, using a mutex has the unwanted effect that a long running clock
operation in one process will unnecessarily block other processes.

For example, one process might call read() to get an external time stamp
coming in at one pulse per second. A second process calling clock_gettime
would have to wait for almost a whole second.

This patch fixes the issue by using a reader/writer semaphore instead of
a mutex.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/%3C20110330132421.GA31771%40riccoc20.at.omicron.at%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-clock.h |  5 +++--
 kernel/time/posix-clock.c   | 24 +++++++++---------------
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 369e19d3750b..7f1183dcd119 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/posix-timers.h>
+#include <linux/rwsem.h>
 
 struct posix_clock;
 
@@ -104,7 +105,7 @@ struct posix_clock_operations {
  * @ops:     Functional interface to the clock
  * @cdev:    Character device instance for this clock
  * @kref:    Reference count.
- * @mutex:   Protects the 'zombie' field from concurrent access.
+ * @rwsem:   Protects the 'zombie' field from concurrent access.
  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
  * @release: A function to free the structure when the reference count reaches
  *           zero. May be NULL if structure is statically allocated.
@@ -117,7 +118,7 @@ struct posix_clock {
 	struct posix_clock_operations ops;
 	struct cdev cdev;
 	struct kref kref;
-	struct mutex mutex;
+	struct rw_semaphore rwsem;
 	bool zombie;
 	void (*release)(struct posix_clock *clk);
 };
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 25028dd4fa18..c340ca658f37 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -19,7 +19,6 @@
  */
 #include <linux/device.h>
 #include <linux/file.h>
-#include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp)
 {
 	struct posix_clock *clk = fp->private_data;
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (!clk->zombie)
 		return clk;
 
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 
 	return NULL;
 }
 
 static void put_posix_clock(struct posix_clock *clk)
 {
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 }
 
 static ssize_t posix_clock_read(struct file *fp, char __user *buf,
@@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 	struct posix_clock *clk =
 		container_of(inode->i_cdev, struct posix_clock, cdev);
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (clk->zombie) {
 		err = -ENODEV;
@@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 		fp->private_data = clk;
 	}
 out:
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 	return err;
 }
 
@@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
 	int err;
 
 	kref_init(&clk->kref);
-	mutex_init(&clk->mutex);
+	init_rwsem(&clk->rwsem);
 
 	cdev_init(&clk->cdev, &posix_clock_file_operations);
 	clk->cdev.owner = clk->ops.owner;
 	err = cdev_add(&clk->cdev, devid, 1);
-	if (err)
-		goto no_cdev;
 
 	return err;
-no_cdev:
-	mutex_destroy(&clk->mutex);
-	return err;
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 
 static void delete_clock(struct kref *kref)
 {
 	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-	mutex_destroy(&clk->mutex);
+
 	if (clk->release)
 		clk->release(clk);
 }
@@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk)
 {
 	cdev_del(&clk->cdev);
 
-	mutex_lock(&clk->mutex);
+	down_write(&clk->rwsem);
 	clk->zombie = true;
-	mutex_unlock(&clk->mutex);
+	up_write(&clk->rwsem);
 
 	kref_put(&clk->kref, delete_clock);
 }
-- 
cgit v1.2.3


From 468f86134ee515234afe5c5b3f39f266c50e61a5 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Mon, 18 Apr 2011 15:57:32 -0400
Subject: NFSv4.1: Don't update sequence number if rpc_task is not sent

If we fail to contact the gss upcall program, then no message will
be sent to the server.  The client still updated the sequence number,
however, and this lead to NFS4ERR_SEQ_MISMATCH for the next several
RPC calls.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c            | 4 ++--
 include/linux/sunrpc/sched.h | 2 ++
 net/sunrpc/xprt.c            | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b8e1ac69b743..e7e2077eebd9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -444,8 +444,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	if (res->sr_status == 1)
 		res->sr_status = NFS_OK;
 
-	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
-	if (!res->sr_slot)
+	/* don't increment the sequence number if the task wasn't sent */
+	if (!RPC_WAS_SENT(task))
 		goto out;
 
 	/* Check the SEQUENCE operation status */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d81db8012c63..3b94f804b852 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -127,6 +127,7 @@ struct rpc_task_setup {
 #define RPC_TASK_KILLED		0x0100		/* task was killed */
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
+#define RPC_TASK_SENT		0x0800		/* message was sent */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
@@ -134,6 +135,7 @@ struct rpc_task_setup {
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 #define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
+#define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 
 #define RPC_TASK_RUNNING	0
 #define RPC_TASK_QUEUED		1
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9494c3767356..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
 	xprt->ops->set_retrans_timeout(task);
-- 
cgit v1.2.3


From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 19 Apr 2011 13:32:46 +0200
Subject: block: get rid of QUEUE_FLAG_REENTER

We are currently using this flag to check whether it's safe
to call into ->request_fn(). If it is set, we punt to kblockd.
But we get a lot of false positives and excessive punts to
kblockd, which hurts performance.

The only real abuser of this infrastructure is SCSI. So export
the async queue run and convert SCSI over to use that. There's
room for improvement in that SCSI need not always use the async
call, but this fixes our performance issue and they can fix that
up in due time.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c                 | 11 ++---------
 block/blk.h                      |  1 -
 drivers/scsi/scsi_lib.c          | 17 +----------------
 drivers/scsi/scsi_transport_fc.c | 19 ++++---------------
 include/linux/blkdev.h           | 26 +++++++++++++-------------
 5 files changed, 20 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 580eee5743e5..40725b9091f1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	/*
-	 * Only recurse once to avoid overrunning the stack, let the unplug
-	 * handling reinvoke the handler shortly if we already got there.
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else
-		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q)
 	if (likely(!blk_queue_stopped(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
+EXPORT_SYMBOL(blk_run_queue_async);
 
 /**
  * blk_run_queue - run a single device queue
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..61263463e38e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
-void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ab55c2fa7ce2..e9901b8f8443 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q)
 	list_splice_init(&shost->starved_list, &starved_list);
 
 	while (!list_empty(&starved_list)) {
-		int flagset;
-
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q)
 			continue;
 		}
 
-		spin_unlock(shost->host_lock);
-
-		spin_lock(sdev->request_queue->queue_lock);
-		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-				!test_bit(QUEUE_FLAG_REENTER,
-					&sdev->request_queue->queue_flags);
-		if (flagset)
-			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue);
-		if (flagset)
-			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
-		spin_unlock(sdev->request_queue->queue_lock);
-
-		spin_lock(shost->host_lock);
+		blk_run_queue_async(sdev->request_queue);
 	}
 	/* put any unprocessed entries back */
 	list_splice(&starved_list, &shost->starved_list);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 28c33506e4ad..815069d13f9b 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3816,28 +3816,17 @@ fail_host_msg:
 static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
-	int flagset;
-	unsigned long flags;
-
 	if (!rport->rqst_q)
 		return;
 
+	/*
+	 * This get/put dance makes no sense
+	 */
 	get_device(&rport->dev);
-
-	spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
-	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
-		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
-	if (flagset)
-		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q);
-	if (flagset)
-		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
-
+	blk_run_queue_async(rport->rqst_q);
 	put_device(&rport->dev);
 }
 
-
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
  * @q:		rport request queue
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..2ad95fa1d130 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -388,20 +388,19 @@ struct request_queue
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
-#define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
-#define QUEUE_FLAG_ELVSWITCH	7	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI		8	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES     9	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   10	/* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     11	/* fake timeout */
-#define QUEUE_FLAG_STACKABLE   12	/* supports request stacking */
-#define QUEUE_FLAG_NONROT      13	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP	9	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     10	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   11	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      12	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
+#define QUEUE_FLAG_IO_STAT     13	/* do IO stats */
+#define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
+#define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
+extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3


From 62c7d085e1f2a1f2b4d89560551eff18d703b3b1 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Thu, 10 Mar 2011 16:42:47 +0200
Subject: wl12xx: add new board_tcxo_clock element to the platform data

This new value is a new type of clock setting that is used by wl128x
chipsets.

Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 include/linux/wl12xx.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index bebb8efea0a6..eb8aacab8d4e 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -24,7 +24,7 @@
 #ifndef _LINUX_WL12XX_H
 #define _LINUX_WL12XX_H
 
-/* The board reference clock values */
+/* Reference clock values */
 enum {
 	WL12XX_REFCLOCK_19 = 0,	/* 19.2 MHz */
 	WL12XX_REFCLOCK_26 = 1,	/* 26 MHz */
@@ -32,12 +32,25 @@ enum {
 	WL12XX_REFCLOCK_54 = 3,	/* 54 MHz */
 };
 
+/* TCXO clock values */
+enum {
+	WL12XX_TCXOCLOCK_19_2	= 0, /* 19.2MHz */
+	WL12XX_TCXOCLOCK_26	= 1, /* 26 MHz */
+	WL12XX_TCXOCLOCK_38_4	= 2, /* 38.4MHz */
+	WL12XX_TCXOCLOCK_52	= 3, /* 52 MHz */
+	WL12XX_TCXOCLOCK_16_368	= 4, /* 16.368 MHz */
+	WL12XX_TCXOCLOCK_32_736	= 5, /* 32.736 MHz */
+	WL12XX_TCXOCLOCK_16_8	= 6, /* 16.8 MHz */
+	WL12XX_TCXOCLOCK_33_6	= 7, /* 33.6 MHz */
+};
+
 struct wl12xx_platform_data {
 	void (*set_power)(bool enable);
 	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
 	int irq;
 	bool use_eeprom;
 	int board_ref_clock;
+	int board_tcxo_clock;
 };
 
 #ifdef CONFIG_WL12XX_PLATFORM_DATA
-- 
cgit v1.2.3


From d29633b40e6afc6b4276a4e381bc532cc84be104 Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Thu, 31 Mar 2011 10:06:57 +0200
Subject: wl12xx: Clean up and fix the 128x boot sequence

Clean up the boot sequence code & fix the following issues:
1. Always read the registers' values and set the relevant bits instead of
   zeroing all other bits
2. Handle cases where wl1271_top_reg_read returns an error
3. Verify that the HW can detect the selected clock source
4. Remove 128x PG10 initialization code
5. Configure the MCS PLL to work in HP mode

Signed-off-by: Ido Yariv <ido@wizery.com>
Reviewed-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 drivers/net/wireless/wl12xx/boot.c | 235 ++++++++++++++++++-------------------
 drivers/net/wireless/wl12xx/boot.h |   1 +
 include/linux/wl12xx.h             |  10 +-
 3 files changed, 123 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/boot.c b/drivers/net/wireless/wl12xx/boot.c
index 34bf2fe47dc7..b5ec2c2b6f78 100644
--- a/drivers/net/wireless/wl12xx/boot.c
+++ b/drivers/net/wireless/wl12xx/boot.c
@@ -523,137 +523,137 @@ static void wl1271_boot_hw_version(struct wl1271 *wl)
 		wl->quirks |= WL12XX_QUIRK_END_OF_TRANSACTION;
 }
 
-/*
- * WL128x has two clocks input - TCXO and FREF.
- * TCXO is the main clock of the device, while FREF is used to sync
- * between the GPS and the cellular modem.
- * In cases where TCXO is 32.736MHz or 16.368MHz, the FREF will be used
- * as the WLAN/BT main clock.
- */
-static int wl128x_switch_fref(struct wl1271 *wl, bool *is_ref_clk)
+static int wl128x_switch_tcxo_to_fref(struct wl1271 *wl)
 {
-	u16 sys_clk_cfg_val;
+	u16 spare_reg;
 
-	/* if working on XTAL-only mode go directly to TCXO TO FREF SWITCH */
-	if ((wl->ref_clock == CONF_REF_CLK_38_4_M_XTAL) ||
-	    (wl->ref_clock == CONF_REF_CLK_26_M_XTAL))
-		return true;
+	/* Mask bits [2] & [8:4] in the sys_clk_cfg register */
+	spare_reg = wl1271_top_reg_read(wl, WL_SPARE_REG);
+	if (spare_reg == 0xFFFF)
+		return -EFAULT;
+	spare_reg |= (BIT(3) | BIT(5) | BIT(6));
+	wl1271_top_reg_write(wl, WL_SPARE_REG, spare_reg);
 
-	/* Read clock source FREF or TCXO */
-	sys_clk_cfg_val = wl1271_top_reg_read(wl, SYS_CLK_CFG_REG);
+	/* Enable FREF_CLK_REQ & mux MCS and coex PLLs to FREF */
+	wl1271_top_reg_write(wl, SYS_CLK_CFG_REG,
+			     WL_CLK_REQ_TYPE_PG2 | MCS_PLL_CLK_SEL_FREF);
 
-	if (sys_clk_cfg_val & PRCM_CM_EN_MUX_WLAN_FREF) {
-		/* if bit 3 is set - working with FREF clock */
-		wl1271_debug(DEBUG_BOOT, "working with FREF clock, skip"
-			     " to FREF");
+	/* Delay execution for 15msec, to let the HW settle */
+	mdelay(15);
 
-		*is_ref_clk = true;
-	} else {
-		/* if bit 3 is clear - working with TCXO clock */
-		wl1271_debug(DEBUG_BOOT, "working with TCXO clock");
-
-		/* TCXO to FREF switch, check TXCO clock config */
-		if ((wl->tcxo_clock != WL12XX_TCXOCLOCK_16_368) &&
-		    (wl->tcxo_clock != WL12XX_TCXOCLOCK_32_736)) {
-			/*
-			 * not 16.368Mhz and not 32.736Mhz - skip to
-			 * configure ELP stage
-			 */
-			wl1271_debug(DEBUG_BOOT, "NEW PLL ALGO:"
-				     " TcxoRefClk=%d - not 16.368Mhz and not"
-				     " 32.736Mhz - skip to configure ELP"
-				     " stage", wl->tcxo_clock);
-
-			*is_ref_clk = false;
-		} else {
-			wl1271_debug(DEBUG_BOOT, "NEW PLL ALGO:"
-				     "TcxoRefClk=%d - 16.368Mhz or 32.736Mhz"
-				     " - TCXO to FREF switch",
-				     wl->tcxo_clock);
+	return 0;
+}
 
-			return true;
-		}
-	}
+static bool wl128x_is_tcxo_valid(struct wl1271 *wl)
+{
+	u16 tcxo_detection;
+
+	tcxo_detection = wl1271_top_reg_read(wl, TCXO_CLK_DETECT_REG);
+	if (tcxo_detection & TCXO_DET_FAILED)
+		return false;
 
-	return false;
+	return true;
 }
 
-static int wl128x_boot_clk(struct wl1271 *wl, bool *is_ref_clk)
+static bool wl128x_is_fref_valid(struct wl1271 *wl)
 {
-	if (wl128x_switch_fref(wl, is_ref_clk)) {
-		wl1271_debug(DEBUG_BOOT, "XTAL-only mode go directly to"
-					 " TCXO TO FREF SWITCH");
-		/* TCXO to FREF switch - for PG2.0 */
-		wl1271_top_reg_write(wl, WL_SPARE_REG,
-				     WL_SPARE_MASK_8526);
-
-		wl1271_top_reg_write(wl, SYS_CLK_CFG_REG,
-			WL_CLK_REQ_TYPE_PG2 | MCS_PLL_CLK_SEL_FREF);
-
-		*is_ref_clk = true;
-		mdelay(15);
-	}
+	u16 fref_detection;
 
-	/* Set bit 2 in spare register to avoid illegal access */
-	wl1271_top_reg_write(wl, WL_SPARE_REG, WL_SPARE_VAL);
+	fref_detection = wl1271_top_reg_read(wl, FREF_CLK_DETECT_REG);
+	if (fref_detection & FREF_CLK_DETECT_FAIL)
+		return false;
 
-	/* working with TCXO clock */
-	if ((*is_ref_clk == false) &&
-	    ((wl->tcxo_clock == WL12XX_TCXOCLOCK_16_8) ||
-	     (wl->tcxo_clock == WL12XX_TCXOCLOCK_33_6))) {
-		wl1271_debug(DEBUG_BOOT, "16_8_M or 33_6_M TCXO detected");
+	return true;
+}
 
-		/* Manually Configure MCS PLL settings PG2.0 Only */
-		wl1271_top_reg_write(wl, MCS_PLL_M_REG, MCS_PLL_M_REG_VAL);
-		wl1271_top_reg_write(wl, MCS_PLL_N_REG, MCS_PLL_N_REG_VAL);
-		wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG,
-				     MCS_PLL_CONFIG_REG_VAL);
-	} else {
-		int pll_config;
-		u16 mcs_pll_config_val;
+static int wl128x_manually_configure_mcs_pll(struct wl1271 *wl)
+{
+	wl1271_top_reg_write(wl, MCS_PLL_M_REG, MCS_PLL_M_REG_VAL);
+	wl1271_top_reg_write(wl, MCS_PLL_N_REG, MCS_PLL_N_REG_VAL);
+	wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG, MCS_PLL_CONFIG_REG_VAL);
 
-		/*
-		 * Configure MCS PLL settings to FREF Freq
-		 * Set the values that determine the time elapse since the PLL's
-		 * get their enable signal until the lock indication is set
-		 */
-		wl1271_top_reg_write(wl, PLL_LOCK_COUNTERS_REG,
-			PLL_LOCK_COUNTERS_COEX | PLL_LOCK_COUNTERS_MCS);
+	return 0;
+}
 
-		mcs_pll_config_val = wl1271_top_reg_read(wl,
-						 MCS_PLL_CONFIG_REG);
-		/*
-		 * Set the MCS PLL input frequency value according to the
-		 * reference clock value detected/read
-		 */
-		if (*is_ref_clk == false) {
-			if ((wl->tcxo_clock == WL12XX_TCXOCLOCK_19_2) ||
-			    (wl->tcxo_clock == WL12XX_TCXOCLOCK_38_4))
-				pll_config = 1;
-			else if ((wl->tcxo_clock == WL12XX_TCXOCLOCK_26)
-				 ||
-				 (wl->tcxo_clock == WL12XX_TCXOCLOCK_52))
-				pll_config = 2;
-			else
-				return -EINVAL;
-		} else {
-			if ((wl->ref_clock == CONF_REF_CLK_19_2_E) ||
-			    (wl->ref_clock == CONF_REF_CLK_38_4_E))
-				pll_config = 1;
-			else if ((wl->ref_clock == CONF_REF_CLK_26_E) ||
-				 (wl->ref_clock == CONF_REF_CLK_52_E))
-				pll_config = 2;
-			else
-				return -EINVAL;
-		}
+static int wl128x_configure_mcs_pll(struct wl1271 *wl, int clk)
+{
+	u16 spare_reg;
+	u16 pll_config;
+	u8 input_freq;
+
+	/* Mask bits [3:1] in the sys_clk_cfg register */
+	spare_reg = wl1271_top_reg_read(wl, WL_SPARE_REG);
+	if (spare_reg == 0xFFFF)
+		return -EFAULT;
+	spare_reg |= BIT(2);
+	wl1271_top_reg_write(wl, WL_SPARE_REG, spare_reg);
+
+	/* Handle special cases of the TCXO clock */
+	if (wl->tcxo_clock == WL12XX_TCXOCLOCK_16_8 ||
+	    wl->tcxo_clock == WL12XX_TCXOCLOCK_33_6)
+		return wl128x_manually_configure_mcs_pll(wl);
+
+	/* Set the input frequency according to the selected clock source */
+	input_freq = (clk & 1) + 1;
+
+	pll_config = wl1271_top_reg_read(wl, MCS_PLL_CONFIG_REG);
+	if (pll_config == 0xFFFF)
+		return -EFAULT;
+	pll_config |= (input_freq << MCS_SEL_IN_FREQ_SHIFT);
+	pll_config |= MCS_PLL_ENABLE_HP;
+	wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG, pll_config);
 
-		mcs_pll_config_val |= (pll_config << (MCS_SEL_IN_FREQ_SHIFT)) &
-				      (MCS_SEL_IN_FREQ_MASK);
-		wl1271_top_reg_write(wl, MCS_PLL_CONFIG_REG,
-				     mcs_pll_config_val);
+	return 0;
+}
+
+/*
+ * WL128x has two clocks input - TCXO and FREF.
+ * TCXO is the main clock of the device, while FREF is used to sync
+ * between the GPS and the cellular modem.
+ * In cases where TCXO is 32.736MHz or 16.368MHz, the FREF will be used
+ * as the WLAN/BT main clock.
+ */
+static int wl128x_boot_clk(struct wl1271 *wl, int *selected_clock)
+{
+	u16 sys_clk_cfg;
+
+	/* For XTAL-only modes, FREF will be used after switching from TCXO */
+	if (wl->ref_clock == WL12XX_REFCLOCK_26_XTAL ||
+	    wl->ref_clock == WL12XX_REFCLOCK_38_XTAL) {
+		if (!wl128x_switch_tcxo_to_fref(wl))
+			return -EINVAL;
+		goto fref_clk;
 	}
 
-	return 0;
+	/* Query the HW, to determine which clock source we should use */
+	sys_clk_cfg = wl1271_top_reg_read(wl, SYS_CLK_CFG_REG);
+	if (sys_clk_cfg == 0xFFFF)
+		return -EINVAL;
+	if (sys_clk_cfg & PRCM_CM_EN_MUX_WLAN_FREF)
+		goto fref_clk;
+
+	/* If TCXO is either 32.736MHz or 16.368MHz, switch to FREF */
+	if (wl->tcxo_clock == WL12XX_TCXOCLOCK_16_368 ||
+	    wl->tcxo_clock == WL12XX_TCXOCLOCK_32_736) {
+		if (!wl128x_switch_tcxo_to_fref(wl))
+			return -EINVAL;
+		goto fref_clk;
+	}
+
+	/* TCXO clock is selected */
+	if (!wl128x_is_tcxo_valid(wl))
+		return -EINVAL;
+	*selected_clock = wl->tcxo_clock;
+	goto config_mcs_pll;
+
+fref_clk:
+	/* FREF clock is selected */
+	if (!wl128x_is_fref_valid(wl))
+		return -EINVAL;
+	*selected_clock = wl->ref_clock;
+
+config_mcs_pll:
+	return wl128x_configure_mcs_pll(wl, *selected_clock);
 }
 
 static int wl127x_boot_clk(struct wl1271 *wl)
@@ -713,10 +713,10 @@ int wl1271_load_firmware(struct wl1271 *wl)
 {
 	int ret = 0;
 	u32 tmp, clk;
-	bool is_ref_clk = false;
+	int selected_clock = -1;
 
 	if (wl->chip.id == CHIP_ID_1283_PG20) {
-		ret = wl128x_boot_clk(wl, &is_ref_clk);
+		ret = wl128x_boot_clk(wl, &selected_clock);
 		if (ret < 0)
 			goto out;
 	} else {
@@ -741,10 +741,7 @@ int wl1271_load_firmware(struct wl1271 *wl)
 	wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk);
 
 	if (wl->chip.id == CHIP_ID_1283_PG20) {
-		if (is_ref_clk == false)
-			clk |= ((wl->tcxo_clock & 0x3) << 1) << 4;
-		else
-			clk |= ((wl->ref_clock & 0x3) << 1) << 4;
+		clk |= ((selected_clock & 0x3) << 1) << 4;
 	} else {
 		clk |= (wl->ref_clock << 1) << 4;
 	}
diff --git a/drivers/net/wireless/wl12xx/boot.h b/drivers/net/wireless/wl12xx/boot.h
index 1f5ee31dc0b1..d9de64ac1442 100644
--- a/drivers/net/wireless/wl12xx/boot.h
+++ b/drivers/net/wireless/wl12xx/boot.h
@@ -107,6 +107,7 @@ struct wl1271_static_data {
 #define MCS_SEL_IN_FREQ_MASK         0x0070
 #define MCS_SEL_IN_FREQ_SHIFT        4
 #define MCS_PLL_CONFIG_REG_VAL       0x73
+#define MCS_PLL_ENABLE_HP            (BIT(0) | BIT(1))
 
 #define MCS_PLL_M_REG                0xD94
 #define MCS_PLL_N_REG                0xD96
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index eb8aacab8d4e..c1a743ea7470 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -26,10 +26,12 @@
 
 /* Reference clock values */
 enum {
-	WL12XX_REFCLOCK_19 = 0,	/* 19.2 MHz */
-	WL12XX_REFCLOCK_26 = 1,	/* 26 MHz */
-	WL12XX_REFCLOCK_38 = 2,	/* 38.4 MHz */
-	WL12XX_REFCLOCK_54 = 3,	/* 54 MHz */
+	WL12XX_REFCLOCK_19	= 0, /* 19.2 MHz */
+	WL12XX_REFCLOCK_26	= 1, /* 26 MHz */
+	WL12XX_REFCLOCK_38	= 2, /* 38.4 MHz */
+	WL12XX_REFCLOCK_52	= 3, /* 52 MHz */
+	WL12XX_REFCLOCK_38_XTAL = 4, /* 38.4 MHz, XTAL */
+	WL12XX_REFCLOCK_26_XTAL = 5, /* 26 MHz, XTAL */
 };
 
 /* TCXO clock values */
-- 
cgit v1.2.3


From 341b7cde6ccc60672fcd7fc84dd24a1b7c0b8d94 Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Thu, 31 Mar 2011 10:07:01 +0200
Subject: wl12xx: Handle platforms without level trigger interrupts

Some platforms are incapable of triggering on level interrupts. Add a
platform quirks member in the platform data structure, as well as an
edge interrupt quirk which can be set on such platforms.

When the interrupt is requested with IRQF_TRIGGER_RISING, IRQF_ONESHOT
cannot be used, as we might miss interrupts that occur after the FW
status is cleared and before the threaded interrupt handler exits.

Moreover, when IRQF_ONESHOT is not set, iterating more than once in the
threaded interrupt handler introduces a few race conditions between this
handler and the hardirq handler. Currently this is worked around by
limiting the loop to one iteration only. This workaround has an impact
on performance. To remove to this restriction, the race conditions will
need to be addressed.

Signed-off-by: Ido Yariv <ido@wizery.com>
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 drivers/net/wireless/wl12xx/main.c   | 9 +++++++++
 drivers/net/wireless/wl12xx/sdio.c   | 9 ++++++++-
 drivers/net/wireless/wl12xx/spi.c    | 9 ++++++++-
 drivers/net/wireless/wl12xx/wl12xx.h | 3 +++
 include/linux/wl12xx.h               | 4 ++++
 5 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c
index 1feb9551ef8f..7126506611c1 100644
--- a/drivers/net/wireless/wl12xx/main.c
+++ b/drivers/net/wireless/wl12xx/main.c
@@ -30,6 +30,7 @@
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/wl12xx.h>
 
 #include "wl12xx.h"
 #include "wl12xx_80211.h"
@@ -719,6 +720,13 @@ irqreturn_t wl1271_irq(int irq, void *cookie)
 	set_bit(WL1271_FLAG_TX_PENDING, &wl->flags);
 	cancel_work_sync(&wl->tx_work);
 
+	/*
+	 * In case edge triggered interrupt must be used, we cannot iterate
+	 * more than once without introducing race conditions with the hardirq.
+	 */
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		loopcount = 1;
+
 	mutex_lock(&wl->mutex);
 
 	wl1271_debug(DEBUG_IRQ, "IRQ work");
@@ -3648,6 +3656,7 @@ struct ieee80211_hw *wl1271_alloc_hw(void)
 	wl->ap_ps_map = 0;
 	wl->ap_fw_ps_map = 0;
 	wl->quirks = 0;
+	wl->platform_quirks = 0;
 
 	memset(wl->tx_frames_map, 0, sizeof(wl->tx_frames_map));
 	for (i = 0; i < ACX_TX_DESCRIPTORS; i++)
diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c
index 8246e9de4306..bcd4ad7ba90d 100644
--- a/drivers/net/wireless/wl12xx/sdio.c
+++ b/drivers/net/wireless/wl12xx/sdio.c
@@ -220,6 +220,7 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	struct ieee80211_hw *hw;
 	const struct wl12xx_platform_data *wlan_data;
 	struct wl1271 *wl;
+	unsigned long irqflags;
 	int ret;
 
 	/* We are only able to handle the wlan function */
@@ -251,9 +252,15 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	wl->irq = wlan_data->irq;
 	wl->ref_clock = wlan_data->board_ref_clock;
 	wl->tcxo_clock = wlan_data->board_tcxo_clock;
+	wl->platform_quirks = wlan_data->platform_quirks;
+
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		irqflags = IRQF_TRIGGER_RISING;
+	else
+		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
 
 	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   irqflags,
 				   DRIVER_NAME, wl);
 	if (ret < 0) {
 		wl1271_error("request_irq() failed: %d", ret);
diff --git a/drivers/net/wireless/wl12xx/spi.c b/drivers/net/wireless/wl12xx/spi.c
index 7b82b5f0e490..51662bb68019 100644
--- a/drivers/net/wireless/wl12xx/spi.c
+++ b/drivers/net/wireless/wl12xx/spi.c
@@ -364,6 +364,7 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 	struct wl12xx_platform_data *pdata;
 	struct ieee80211_hw *hw;
 	struct wl1271 *wl;
+	unsigned long irqflags;
 	int ret;
 
 	pdata = spi->dev.platform_data;
@@ -402,6 +403,12 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 
 	wl->ref_clock = pdata->board_ref_clock;
 	wl->tcxo_clock = pdata->board_tcxo_clock;
+	wl->platform_quirks = pdata->platform_quirks;
+
+	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
+		irqflags = IRQF_TRIGGER_RISING;
+	else
+		irqflags = IRQF_TRIGGER_HIGH | IRQF_ONESHOT;
 
 	wl->irq = spi->irq;
 	if (wl->irq < 0) {
@@ -411,7 +418,7 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 	}
 
 	ret = request_threaded_irq(wl->irq, wl1271_hardirq, wl1271_irq,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   irqflags,
 				   DRIVER_NAME, wl);
 	if (ret < 0) {
 		wl1271_error("request_irq() failed: %d", ret);
diff --git a/drivers/net/wireless/wl12xx/wl12xx.h b/drivers/net/wireless/wl12xx/wl12xx.h
index 9ccbcfdd0802..fb2b79fa42b4 100644
--- a/drivers/net/wireless/wl12xx/wl12xx.h
+++ b/drivers/net/wireless/wl12xx/wl12xx.h
@@ -579,6 +579,9 @@ struct wl1271 {
 
 	/* Quirks of specific hardware revisions */
 	unsigned int quirks;
+
+	/* Platform limitations */
+	unsigned int platform_quirks;
 };
 
 struct wl1271_station {
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index c1a743ea7470..4b697395326e 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -53,8 +53,12 @@ struct wl12xx_platform_data {
 	bool use_eeprom;
 	int board_ref_clock;
 	int board_tcxo_clock;
+	unsigned long platform_quirks;
 };
 
+/* Platform does not support level trigger interrupts */
+#define WL12XX_PLATFORM_QUIRK_EDGE_IRQ	BIT(0)
+
 #ifdef CONFIG_WL12XX_PLATFORM_DATA
 
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
-- 
cgit v1.2.3


From d924de09cac6e18bdfbe9461a2ab2adeb36e77b0 Mon Sep 17 00:00:00 2001
From: Michael Jones <michael.jones@matrix-vision.de>
Date: Tue, 29 Mar 2011 05:19:06 -0300
Subject: [media] v4l: add V4L2_PIX_FMT_Y12 format

Y12 is a grey-scale format with a depth of 12 bits per pixel stored in
16-bit words.

Signed-off-by: Michael Jones <michael.jones@matrix-vision.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |  1 +
 Documentation/DocBook/v4l/pixfmt-y12.xml  | 79 +++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |  1 +
 include/linux/videodev2.h                 |  1 +
 4 files changed, 82 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-y12.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index 5d259c632cdf..fea63b45471a 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -294,6 +294,7 @@
 <!ENTITY sub-srggb10 SYSTEM "v4l/pixfmt-srggb10.xml">
 <!ENTITY sub-srggb8 SYSTEM "v4l/pixfmt-srggb8.xml">
 <!ENTITY sub-y10 SYSTEM "v4l/pixfmt-y10.xml">
+<!ENTITY sub-y12 SYSTEM "v4l/pixfmt-y12.xml">
 <!ENTITY sub-pixfmt SYSTEM "v4l/pixfmt.xml">
 <!ENTITY sub-cropcap SYSTEM "v4l/vidioc-cropcap.xml">
 <!ENTITY sub-dbg-g-register SYSTEM "v4l/vidioc-dbg-g-register.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-y12.xml b/Documentation/DocBook/v4l/pixfmt-y12.xml
new file mode 100644
index 000000000000..ff417b858cc9
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-y12.xml
@@ -0,0 +1,79 @@
+<refentry id="V4L2-PIX-FMT-Y12">
+  <refmeta>
+    <refentrytitle>V4L2_PIX_FMT_Y12 ('Y12 ')</refentrytitle>
+    &manvol;
+  </refmeta>
+  <refnamediv>
+    <refname><constant>V4L2_PIX_FMT_Y12</constant></refname>
+    <refpurpose>Grey-scale image</refpurpose>
+  </refnamediv>
+  <refsect1>
+    <title>Description</title>
+
+    <para>This is a grey-scale image with a depth of 12 bits per pixel. Pixels
+are stored in 16-bit words with unused high bits padded with 0. The least
+significant byte is stored at lower memory addresses (little-endian).</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_Y12</constant> 4 &times; 4
+pixel image</title>
+
+      <formalpara>
+	<title>Byte Order.</title>
+	<para>Each cell is one byte.
+	  <informaltable frame="none">
+	    <tgroup cols="9" align="center">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>start&nbsp;+&nbsp;0:</entry>
+		  <entry>Y'<subscript>00low</subscript></entry>
+		  <entry>Y'<subscript>00high</subscript></entry>
+		  <entry>Y'<subscript>01low</subscript></entry>
+		  <entry>Y'<subscript>01high</subscript></entry>
+		  <entry>Y'<subscript>02low</subscript></entry>
+		  <entry>Y'<subscript>02high</subscript></entry>
+		  <entry>Y'<subscript>03low</subscript></entry>
+		  <entry>Y'<subscript>03high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;8:</entry>
+		  <entry>Y'<subscript>10low</subscript></entry>
+		  <entry>Y'<subscript>10high</subscript></entry>
+		  <entry>Y'<subscript>11low</subscript></entry>
+		  <entry>Y'<subscript>11high</subscript></entry>
+		  <entry>Y'<subscript>12low</subscript></entry>
+		  <entry>Y'<subscript>12high</subscript></entry>
+		  <entry>Y'<subscript>13low</subscript></entry>
+		  <entry>Y'<subscript>13high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;16:</entry>
+		  <entry>Y'<subscript>20low</subscript></entry>
+		  <entry>Y'<subscript>20high</subscript></entry>
+		  <entry>Y'<subscript>21low</subscript></entry>
+		  <entry>Y'<subscript>21high</subscript></entry>
+		  <entry>Y'<subscript>22low</subscript></entry>
+		  <entry>Y'<subscript>22high</subscript></entry>
+		  <entry>Y'<subscript>23low</subscript></entry>
+		  <entry>Y'<subscript>23high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;24:</entry>
+		  <entry>Y'<subscript>30low</subscript></entry>
+		  <entry>Y'<subscript>30high</subscript></entry>
+		  <entry>Y'<subscript>31low</subscript></entry>
+		  <entry>Y'<subscript>31high</subscript></entry>
+		  <entry>Y'<subscript>32low</subscript></entry>
+		  <entry>Y'<subscript>32high</subscript></entry>
+		  <entry>Y'<subscript>33low</subscript></entry>
+		  <entry>Y'<subscript>33high</subscript></entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index c6fdcbbd1b41..40af4beb48b9 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -696,6 +696,7 @@ information.</para>
     &sub-packed-yuv;
     &sub-grey;
     &sub-y10;
+    &sub-y12;
     &sub-y16;
     &sub-yuyv;
     &sub-uyvy;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index aa6c393b7ae9..be82c8ead1af 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -308,6 +308,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y4      v4l2_fourcc('Y', '0', '4', ' ') /*  4  Greyscale     */
 #define V4L2_PIX_FMT_Y6      v4l2_fourcc('Y', '0', '6', ' ') /*  6  Greyscale     */
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
+#define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
 /* Palette formats */
-- 
cgit v1.2.3


From cbbc69a4a98081740f0e3d7717fbfa0b584b983d Mon Sep 17 00:00:00 2001
From: Michael Jones <michael.jones@matrix-vision.de>
Date: Tue, 29 Mar 2011 05:19:07 -0300
Subject: [media] media: add missing 8-bit bayer formats and Y12

8-bit SGBRG and SRGGB media bus formats are missing, as well as the
12-bit grey format. Add them.

Signed-off-by: Michael Jones <michael.jones@matrix-vision.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/subdev-formats.xml | 59 ++++++++++++++++++++++++++++
 include/linux/v4l2-mediabus.h                |  7 +++-
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml
index 7041127d6dfc..d7ccd25edcc1 100644
--- a/Documentation/DocBook/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/v4l/subdev-formats.xml
@@ -456,6 +456,23 @@
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SGBRG8-1X8">
+	      <entry>V4L2_MBUS_FMT_SGBRG8_1X8</entry>
+	      <entry>0x3013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SGRBG8-1X8">
 	      <entry>V4L2_MBUS_FMT_SGRBG8_1X8</entry>
 	      <entry>0x3002</entry>
@@ -473,6 +490,23 @@
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SRGGB8-1X8">
+	      <entry>V4L2_MBUS_FMT_SRGGB8_1X8</entry>
+	      <entry>0x3014</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SBGGR10-DPCM8-1X8">
 	      <entry>V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8</entry>
 	      <entry>0x300b</entry>
@@ -2159,6 +2193,31 @@
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-Y12-1X12">
+	      <entry>V4L2_MBUS_FMT_Y12_1X12</entry>
+	      <entry>0x2013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-UYVY8-1X16">
 	      <entry>V4L2_MBUS_FMT_UYVY8_1X16</entry>
 	      <entry>0x200f</entry>
diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h
index 7054a7a8065e..de5c15921025 100644
--- a/include/linux/v4l2-mediabus.h
+++ b/include/linux/v4l2-mediabus.h
@@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007,
 	V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008,
 
-	/* YUV (including grey) - next is 0x2013 */
+	/* YUV (including grey) - next is 0x2014 */
 	V4L2_MBUS_FMT_Y8_1X8 = 0x2001,
 	V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002,
 	V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003,
@@ -60,6 +60,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_Y10_1X10 = 0x200a,
 	V4L2_MBUS_FMT_YUYV10_2X10 = 0x200b,
 	V4L2_MBUS_FMT_YVYU10_2X10 = 0x200c,
+	V4L2_MBUS_FMT_Y12_1X12 = 0x2013,
 	V4L2_MBUS_FMT_UYVY8_1X16 = 0x200f,
 	V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010,
 	V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011,
@@ -67,9 +68,11 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d,
 	V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e,
 
-	/* Bayer - next is 0x3013 */
+	/* Bayer - next is 0x3015 */
 	V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001,
+	V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013,
 	V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002,
+	V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014,
 	V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b,
 	V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c,
 	V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009,
-- 
cgit v1.2.3


From cbc6a6ed0900aed789b5ca77192845f2f987af70 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Wed, 13 Apr 2011 21:40:45 +0200
Subject: rfkill: Regulator consumer driver for rfkill

Add a regulator consumer driver for rfkill to enable controlling radio
transmitters connected to voltage regulators using the regulator
framework.

A new "vrfkill" virtual supply is provided to use in platform code.

Signed-off-by: Guiming Zhuo <gmzhuo@gmail.com>
Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill-regulator.h |  48 ++++++++++++
 net/rfkill/Kconfig               |  11 +++
 net/rfkill/Makefile              |   1 +
 net/rfkill/rfkill-regulator.c    | 164 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 224 insertions(+)
 create mode 100644 include/linux/rfkill-regulator.h
 create mode 100644 net/rfkill/rfkill-regulator.c

(limited to 'include/linux')

diff --git a/include/linux/rfkill-regulator.h b/include/linux/rfkill-regulator.h
new file mode 100644
index 000000000000..aca36bc83315
--- /dev/null
+++ b/include/linux/rfkill-regulator.h
@@ -0,0 +1,48 @@
+/*
+ * rfkill-regulator.c - Regulator consumer driver for rfkill
+ *
+ * Copyright (C) 2009  Guiming Zhuo <gmzhuo@gmail.com>
+ * Copyright (C) 2011  Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_RFKILL_REGULATOR_H
+#define __LINUX_RFKILL_REGULATOR_H
+
+/*
+ * Use "vrfkill" as supply id when declaring the regulator consumer:
+ *
+ * static struct regulator_consumer_supply pcap_regulator_V6_consumers [] = {
+ * 	{ .dev_name = "rfkill-regulator.0", .supply = "vrfkill" },
+ * };
+ *
+ * If you have several regulator driven rfkill, you can append a numerical id to
+ * .dev_name as done above, and use the same id when declaring the platform
+ * device:
+ *
+ * static struct rfkill_regulator_platform_data ezx_rfkill_bt_data = {
+ * 	.name  = "ezx-bluetooth",
+ * 	.type  = RFKILL_TYPE_BLUETOOTH,
+ * };
+ *
+ * static struct platform_device a910_rfkill = {
+ * 	.name  = "rfkill-regulator",
+ * 	.id    = 0,
+ * 	.dev   = {
+ * 		.platform_data = &ezx_rfkill_bt_data,
+ * 	},
+ * };
+ */
+
+#include <linux/rfkill.h>
+
+struct rfkill_regulator_platform_data {
+	char *name;             /* the name for the rfkill switch */
+	enum rfkill_type type;  /* the type as specified in rfkill.h */
+};
+
+#endif /* __LINUX_RFKILL_REGULATOR_H */
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 7fce6dfd2180..48464ca13b24 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -22,3 +22,14 @@ config RFKILL_INPUT
 	depends on RFKILL
 	depends on INPUT = y || RFKILL = INPUT
 	default y if !EXPERT
+
+config RFKILL_REGULATOR
+	tristate "Generic rfkill regulator driver"
+	depends on RFKILL || !RFKILL
+	depends on REGULATOR
+	help
+          This options enable controlling radio transmitters connected to
+          voltage regulator using the regulator framework.
+
+          To compile this driver as a module, choose M here: the module will
+          be called rfkill-regulator.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index 662105352691..d9a5a58ffd8c 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -5,3 +5,4 @@
 rfkill-y			+= core.o
 rfkill-$(CONFIG_RFKILL_INPUT)	+= input.o
 obj-$(CONFIG_RFKILL)		+= rfkill.o
+obj-$(CONFIG_RFKILL_REGULATOR)	+= rfkill-regulator.o
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
new file mode 100644
index 000000000000..18dc512a10f3
--- /dev/null
+++ b/net/rfkill/rfkill-regulator.c
@@ -0,0 +1,164 @@
+/*
+ * rfkill-regulator.c - Regulator consumer driver for rfkill
+ *
+ * Copyright (C) 2009  Guiming Zhuo <gmzhuo@gmail.com>
+ * Copyright (C) 2011  Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * Implementation inspired by leds-regulator driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/rfkill.h>
+#include <linux/rfkill-regulator.h>
+
+struct rfkill_regulator_data {
+	struct rfkill *rf_kill;
+	bool reg_enabled;
+
+	struct regulator *vcc;
+};
+
+static int rfkill_regulator_set_block(void *data, bool blocked)
+{
+	struct rfkill_regulator_data *rfkill_data = data;
+
+	pr_debug("%s: blocked: %d\n", __func__, blocked);
+
+	if (blocked) {
+		if (rfkill_data->reg_enabled) {
+			regulator_disable(rfkill_data->vcc);
+			rfkill_data->reg_enabled = 0;
+		}
+	} else {
+		if (!rfkill_data->reg_enabled) {
+			regulator_enable(rfkill_data->vcc);
+			rfkill_data->reg_enabled = 1;
+		}
+	}
+
+	pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
+		regulator_is_enabled(rfkill_data->vcc));
+
+	return 0;
+}
+
+struct rfkill_ops rfkill_regulator_ops = {
+	.set_block = rfkill_regulator_set_block,
+};
+
+static int __devinit rfkill_regulator_probe(struct platform_device *pdev)
+{
+	struct rfkill_regulator_platform_data *pdata = pdev->dev.platform_data;
+	struct rfkill_regulator_data *rfkill_data;
+	struct regulator *vcc;
+	struct rfkill *rf_kill;
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	if (pdata->name == NULL || pdata->type == 0) {
+		dev_err(&pdev->dev, "invalid name or type in platform data\n");
+		return -EINVAL;
+	}
+
+	vcc = regulator_get_exclusive(&pdev->dev, "vrfkill");
+	if (IS_ERR(vcc)) {
+		dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
+		ret = PTR_ERR(vcc);
+		goto out;
+	}
+
+	rfkill_data = kzalloc(sizeof(*rfkill_data), GFP_KERNEL);
+	if (rfkill_data == NULL) {
+		ret = -ENOMEM;
+		goto err_data_alloc;
+	}
+
+	rf_kill = rfkill_alloc(pdata->name, &pdev->dev,
+				pdata->type,
+				&rfkill_regulator_ops, rfkill_data);
+	if (rf_kill == NULL) {
+		dev_err(&pdev->dev, "Cannot alloc rfkill device\n");
+		ret = -ENOMEM;
+		goto err_rfkill_alloc;
+	}
+
+	if (regulator_is_enabled(vcc)) {
+		dev_dbg(&pdev->dev, "Regulator already enabled\n");
+		rfkill_data->reg_enabled = 1;
+	}
+	rfkill_data->vcc = vcc;
+	rfkill_data->rf_kill = rf_kill;
+
+	ret = rfkill_register(rf_kill);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register rfkill device\n");
+		goto err_rfkill_register;
+	}
+
+	platform_set_drvdata(pdev, rfkill_data);
+	dev_info(&pdev->dev, "%s initialized\n", pdata->name);
+
+	return 0;
+
+err_rfkill_register:
+	rfkill_destroy(rf_kill);
+err_rfkill_alloc:
+	kfree(rfkill_data);
+err_data_alloc:
+	regulator_put(vcc);
+out:
+	return ret;
+}
+
+static int __devexit rfkill_regulator_remove(struct platform_device *pdev)
+{
+	struct rfkill_regulator_data *rfkill_data = platform_get_drvdata(pdev);
+	struct rfkill *rf_kill = rfkill_data->rf_kill;
+
+	rfkill_unregister(rf_kill);
+	rfkill_destroy(rf_kill);
+	regulator_put(rfkill_data->vcc);
+	kfree(rfkill_data);
+
+	return 0;
+}
+
+static struct platform_driver rfkill_regulator_driver = {
+	.probe = rfkill_regulator_probe,
+	.remove = __devexit_p(rfkill_regulator_remove),
+	.driver = {
+		.name = "rfkill-regulator",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init rfkill_regulator_init(void)
+{
+	return platform_driver_register(&rfkill_regulator_driver);
+}
+module_init(rfkill_regulator_init);
+
+static void __exit rfkill_regulator_exit(void)
+{
+	platform_driver_unregister(&rfkill_regulator_driver);
+}
+module_exit(rfkill_regulator_exit);
+
+MODULE_AUTHOR("Guiming Zhuo <gmzhuo@gmail.com>");
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("Regulator consumer driver for rfkill");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:rfkill-regulator");
-- 
cgit v1.2.3


From 6716671d8c1c07a8072098764d1b7cbfef7412ad Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 23 Mar 2011 10:48:35 +0100
Subject: TTY: introduce deinit helpers for proper ldisc shutdown

Introduce deinitialize_tty_struct which should be called after
initialize_tty_struct and before successfull tty_ldisc_setup.

It calls tty_ldisc_deinit which is opposite of tty_ldisc_init. It only
puts a reference to ldisc and assigns NULL to tty->ldisc.

It will be used to shut down ldisc when tty_release cannot be called
yet.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/tty_io.c    | 14 ++++++++++++++
 drivers/tty/tty_ldisc.c | 13 +++++++++++++
 include/linux/tty.h     |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 026bf2f6f5f2..f5dd23520fe3 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2886,6 +2886,20 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->dev = tty_get_device(tty);
 }
 
+/**
+ *	deinitialize_tty_struct
+ *	@tty: tty to deinitialize
+ *
+ *	This subroutine deinitializes a tty structure that has been newly
+ *	allocated but tty_release cannot be called on that yet.
+ *
+ *	Locking: none - tty in question must not be exposed at this point
+ */
+void deinitialize_tty_struct(struct tty_struct *tty)
+{
+	tty_ldisc_deinit(tty);
+}
+
 /**
  *	tty_put_char	-	write one character to a tty
  *	@tty: tty
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index e19e13647116..5d01d32e2cf0 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -956,6 +956,19 @@ void tty_ldisc_init(struct tty_struct *tty)
 	tty_ldisc_assign(tty, ld);
 }
 
+/**
+ *	tty_ldisc_init		-	ldisc cleanup for new tty
+ *	@tty: tty that was allocated recently
+ *
+ *	The tty structure must not becompletely set up (tty_ldisc_setup) when
+ *      this call is made.
+ */
+void tty_ldisc_deinit(struct tty_struct *tty)
+{
+	put_ldisc(tty->ldisc);
+	tty_ldisc_assign(tty, NULL);
+}
+
 void tty_ldisc_begin(void)
 {
 	/* Setup the default TTY line discipline. */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9f469c700550..4db4ca79f895 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -472,6 +472,7 @@ extern int tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
+extern void deinitialize_tty_struct(struct tty_struct *tty);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 								int first_ok);
 extern int tty_release(struct inode *inode, struct file *filp);
@@ -525,6 +526,7 @@ extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
+extern void tty_ldisc_deinit(struct tty_struct *tty);
 extern void tty_ldisc_begin(void);
 /* This last one is just for the tty layer internals and shouldn't be used elsewhere */
 extern void tty_ldisc_enable(struct tty_struct *tty);
-- 
cgit v1.2.3


From bcdd323b893ad3c9b7ef26b5e4a0bef974238501 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 16 Mar 2011 15:59:35 +0200
Subject: device: add dev_WARN_ONCE

it's quite useful to print the device name
on the stack dump caused by WARN(), but
there are other cases where we might want
to use WARN_ONCE.

Introduce a helper similar to dev_WARN() for
that case too.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..d4840511e877 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -742,13 +742,17 @@ do {						     \
 #endif
 
 /*
- * dev_WARN() acts like dev_printk(), but with the key difference
+ * dev_WARN*() acts like dev_printk(), but with the key difference
  * of using a WARN/WARN_ON to get the message out, including the
  * file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
 	WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg);
 
+#define dev_WARN_ONCE(dev, condition, format, arg...) \
+	WARN_ONCE(condition, "Device %s\n" format, \
+			dev_driver_string(dev), ## arg)
+
 /* Create alias, so I can be autoloaded. */
 #define MODULE_ALIAS_CHARDEV(major,minor) \
 	MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From aed65af1cc2f6fc9ded5a8158f1405a02cf6d2ff Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 28 Mar 2011 09:12:52 -0700
Subject: drivers: make device_type const

The device_type structure does not contain data that changes
during usage and should be const. This allows devices to declare
the struct const.

I have patches to change all the subsystems, but need the infra
structure change first.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 4 ++--
 include/linux/device.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 81b78ede37c4..fb8130ceea10 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -400,7 +400,7 @@ static void device_remove_groups(struct device *dev,
 static int device_add_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 	int error;
 
 	if (class) {
@@ -440,7 +440,7 @@ static int device_add_attrs(struct device *dev)
 static void device_remove_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 
 	device_remove_groups(dev, dev->groups);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index d4840511e877..350ceda4de97 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -408,7 +408,7 @@ struct device {
 
 	struct kobject kobj;
 	const char		*init_name; /* initial name of the device */
-	struct device_type	*type;
+	const struct device_type *type;
 
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
-- 
cgit v1.2.3


From 0b2e9a8e10ad2d191e5c37e77f1ce23e148e7a0b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 14 Apr 2011 22:31:57 +0000
Subject: of: Export of_irq_find_parent()

We have platform code that needs to find a node's interrupt parent, so
export of_irq_find_parent() so we can use it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/of/irq.c       | 2 +-
 include/linux/of_irq.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 75b0d3cb7676..9f689f1da0fc 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
  * Returns a pointer to the interrupt parent node, or NULL if the interrupt
  * parent could not be determined.
  */
-static struct device_node *of_irq_find_parent(struct device_node *child)
+struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
 	const __be32 *parp;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 109e013b1772..e6955f5d1f08 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -68,6 +68,7 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #endif /* CONFIG_OF_IRQ */
 #endif /* CONFIG_OF */
-- 
cgit v1.2.3


From b14a9ccc1ddddfbc76b7cae06d02db4adf0ae1db Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Tue, 29 Mar 2011 10:10:16 +0900
Subject: power_supply: Add driver for MAX8903 charger

MAX8903 is an integrated battery charger and selector with two
power inputs (USB and AC adapter). This driver enables the charger,
handles interrupts, and provides power-supply-class information to
userland.

Tested on Exynos4 NURI / S5PC210 SLP7 boards.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig                 |   8 +
 drivers/power/Makefile                |   1 +
 drivers/power/max8903_charger.c       | 391 ++++++++++++++++++++++++++++++++++
 include/linux/power/max8903_charger.h |  57 +++++
 4 files changed, 457 insertions(+)
 create mode 100644 drivers/power/max8903_charger.c
 create mode 100644 include/linux/power/max8903_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 52a462fc6b84..1f50ebcc6399 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -203,6 +203,14 @@ config CHARGER_ISP1704
 	  Say Y to enable support for USB Charger Detection with
 	  ISP1707/ISP1704 USB transceivers.
 
+config CHARGER_MAX8903
+	tristate "MAX8903 Battery DC-DC Charger for USB and Adapter Power"
+	help
+	  Say Y to enable support for the MAX8903 DC-DC charger and sysfs.
+	  The driver supports controlling charger-enable and current-limit
+	  pins based on the status of charger connections with interrupt
+	  handlers.
+
 config CHARGER_TWL4030
 	tristate "OMAP TWL4030 BCI charger driver"
 	depends on TWL4030_CORE
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 8385bfae8728..8fcd93ff2353 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -32,5 +32,6 @@ obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
 obj-$(CONFIG_BATTERY_JZ4740)	+= jz4740-battery.o
 obj-$(CONFIG_BATTERY_INTEL_MID)	+= intel_mid_battery.o
 obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
+obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
 obj-$(CONFIG_CHARGER_GPIO)	+= gpio-charger.o
diff --git a/drivers/power/max8903_charger.c b/drivers/power/max8903_charger.c
new file mode 100644
index 000000000000..33ff0e37809e
--- /dev/null
+++ b/drivers/power/max8903_charger.c
@@ -0,0 +1,391 @@
+/*
+ * max8903_charger.c - Maxim 8903 USB/Adapter Charger Driver
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/power_supply.h>
+#include <linux/platform_device.h>
+#include <linux/power/max8903_charger.h>
+
+struct max8903_data {
+	struct max8903_pdata *pdata;
+	struct device *dev;
+	struct power_supply psy;
+	bool fault;
+	bool usb_in;
+	bool ta_in;
+};
+
+static enum power_supply_property max8903_charger_props[] = {
+	POWER_SUPPLY_PROP_STATUS, /* Charger status output */
+	POWER_SUPPLY_PROP_ONLINE, /* External power source */
+	POWER_SUPPLY_PROP_HEALTH, /* Fault or OK */
+};
+
+static int max8903_get_property(struct power_supply *psy,
+		enum power_supply_property psp,
+		union power_supply_propval *val)
+{
+	struct max8903_data *data = container_of(psy,
+			struct max8903_data, psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		if (data->pdata->chg) {
+			if (gpio_get_value(data->pdata->chg) == 0)
+				val->intval = POWER_SUPPLY_STATUS_CHARGING;
+			else if (data->usb_in || data->ta_in)
+				val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+			else
+				val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		}
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = 0;
+		if (data->usb_in || data->ta_in)
+			val->intval = 1;
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		if (data->fault)
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static irqreturn_t max8903_dcin(int irq, void *_data)
+{
+	struct max8903_data *data = _data;
+	struct max8903_pdata *pdata = data->pdata;
+	bool ta_in;
+	enum power_supply_type old_type;
+
+	ta_in = gpio_get_value(pdata->dok) ? false : true;
+
+	if (ta_in == data->ta_in)
+		return IRQ_HANDLED;
+
+	data->ta_in = ta_in;
+
+	/* Set Current-Limit-Mode 1:DC 0:USB */
+	if (pdata->dcm)
+		gpio_set_value(pdata->dcm, ta_in ? 1 : 0);
+
+	/* Charger Enable / Disable (cen is negated) */
+	if (pdata->cen)
+		gpio_set_value(pdata->cen, ta_in ? 0 :
+				(data->usb_in ? 0 : 1));
+
+	dev_dbg(data->dev, "TA(DC-IN) Charger %s.\n", ta_in ?
+			"Connected" : "Disconnected");
+
+	old_type = data->psy.type;
+
+	if (data->ta_in)
+		data->psy.type = POWER_SUPPLY_TYPE_MAINS;
+	else if (data->usb_in)
+		data->psy.type = POWER_SUPPLY_TYPE_USB;
+	else
+		data->psy.type = POWER_SUPPLY_TYPE_BATTERY;
+
+	if (old_type != data->psy.type)
+		power_supply_changed(&data->psy);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t max8903_usbin(int irq, void *_data)
+{
+	struct max8903_data *data = _data;
+	struct max8903_pdata *pdata = data->pdata;
+	bool usb_in;
+	enum power_supply_type old_type;
+
+	usb_in = gpio_get_value(pdata->uok) ? false : true;
+
+	if (usb_in == data->usb_in)
+		return IRQ_HANDLED;
+
+	data->usb_in = usb_in;
+
+	/* Do not touch Current-Limit-Mode */
+
+	/* Charger Enable / Disable (cen is negated) */
+	if (pdata->cen)
+		gpio_set_value(pdata->cen, usb_in ? 0 :
+				(data->ta_in ? 0 : 1));
+
+	dev_dbg(data->dev, "USB Charger %s.\n", usb_in ?
+			"Connected" : "Disconnected");
+
+	old_type = data->psy.type;
+
+	if (data->ta_in)
+		data->psy.type = POWER_SUPPLY_TYPE_MAINS;
+	else if (data->usb_in)
+		data->psy.type = POWER_SUPPLY_TYPE_USB;
+	else
+		data->psy.type = POWER_SUPPLY_TYPE_BATTERY;
+
+	if (old_type != data->psy.type)
+		power_supply_changed(&data->psy);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t max8903_fault(int irq, void *_data)
+{
+	struct max8903_data *data = _data;
+	struct max8903_pdata *pdata = data->pdata;
+	bool fault;
+
+	fault = gpio_get_value(pdata->flt) ? false : true;
+
+	if (fault == data->fault)
+		return IRQ_HANDLED;
+
+	data->fault = fault;
+
+	if (fault)
+		dev_err(data->dev, "Charger suffers a fault and stops.\n");
+	else
+		dev_err(data->dev, "Charger recovered from a fault.\n");
+
+	return IRQ_HANDLED;
+}
+
+static __devinit int max8903_probe(struct platform_device *pdev)
+{
+	struct max8903_data *data;
+	struct device *dev = &pdev->dev;
+	struct max8903_pdata *pdata = pdev->dev.platform_data;
+	int ret = 0;
+	int gpio;
+	int ta_in = 0;
+	int usb_in = 0;
+
+	data = kzalloc(sizeof(struct max8903_data), GFP_KERNEL);
+	if (data == NULL) {
+		dev_err(dev, "Cannot allocate memory.\n");
+		return -ENOMEM;
+	}
+	data->pdata = pdata;
+	data->dev = dev;
+	platform_set_drvdata(pdev, data);
+
+	if (pdata->dc_valid == false && pdata->usb_valid == false) {
+		dev_err(dev, "No valid power sources.\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (pdata->dc_valid) {
+		if (pdata->dok && gpio_is_valid(pdata->dok) &&
+				pdata->dcm && gpio_is_valid(pdata->dcm)) {
+			gpio = pdata->dok; /* PULL_UPed Interrupt */
+			ta_in = gpio_get_value(gpio) ? 0 : 1;
+
+			gpio = pdata->dcm; /* Output */
+			gpio_set_value(gpio, ta_in);
+		} else {
+			dev_err(dev, "When DC is wired, DOK and DCM should"
+					" be wired as well.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	} else {
+		if (pdata->dcm) {
+			if (gpio_is_valid(pdata->dcm))
+				gpio_set_value(pdata->dcm, 0);
+			else {
+				dev_err(dev, "Invalid pin: dcm.\n");
+				ret = -EINVAL;
+				goto err;
+			}
+		}
+	}
+
+	if (pdata->usb_valid) {
+		if (pdata->uok && gpio_is_valid(pdata->uok)) {
+			gpio = pdata->uok;
+			usb_in = gpio_get_value(gpio) ? 0 : 1;
+		} else {
+			dev_err(dev, "When USB is wired, UOK should be wired."
+					"as well.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	if (pdata->cen) {
+		if (gpio_is_valid(pdata->cen)) {
+			gpio_set_value(pdata->cen, (ta_in || usb_in) ? 0 : 1);
+		} else {
+			dev_err(dev, "Invalid pin: cen.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	if (pdata->chg) {
+		if (!gpio_is_valid(pdata->chg)) {
+			dev_err(dev, "Invalid pin: chg.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	if (pdata->flt) {
+		if (!gpio_is_valid(pdata->flt)) {
+			dev_err(dev, "Invalid pin: flt.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	if (pdata->usus) {
+		if (!gpio_is_valid(pdata->usus)) {
+			dev_err(dev, "Invalid pin: usus.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	data->fault = false;
+	data->ta_in = ta_in;
+	data->usb_in = usb_in;
+
+	data->psy.name = "max8903_charger";
+	data->psy.type = (ta_in) ? POWER_SUPPLY_TYPE_MAINS :
+			((usb_in) ? POWER_SUPPLY_TYPE_USB :
+			 POWER_SUPPLY_TYPE_BATTERY);
+	data->psy.get_property = max8903_get_property;
+	data->psy.properties = max8903_charger_props;
+	data->psy.num_properties = ARRAY_SIZE(max8903_charger_props);
+
+	ret = power_supply_register(dev, &data->psy);
+	if (ret) {
+		dev_err(dev, "failed: power supply register.\n");
+		goto err;
+	}
+
+	if (pdata->dc_valid) {
+		ret = request_threaded_irq(gpio_to_irq(pdata->dok),
+				NULL, max8903_dcin,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				"MAX8903 DC IN", data);
+		if (ret) {
+			dev_err(dev, "Cannot request irq %d for DC (%d)\n",
+					gpio_to_irq(pdata->dok), ret);
+			goto err_psy;
+		}
+	}
+
+	if (pdata->usb_valid) {
+		ret = request_threaded_irq(gpio_to_irq(pdata->uok),
+				NULL, max8903_usbin,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				"MAX8903 USB IN", data);
+		if (ret) {
+			dev_err(dev, "Cannot request irq %d for USB (%d)\n",
+					gpio_to_irq(pdata->uok), ret);
+			goto err_dc_irq;
+		}
+	}
+
+	if (pdata->flt) {
+		ret = request_threaded_irq(gpio_to_irq(pdata->flt),
+				NULL, max8903_fault,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				"MAX8903 Fault", data);
+		if (ret) {
+			dev_err(dev, "Cannot request irq %d for Fault (%d)\n",
+					gpio_to_irq(pdata->flt), ret);
+			goto err_usb_irq;
+		}
+	}
+
+	return 0;
+
+err_usb_irq:
+	if (pdata->usb_valid)
+		free_irq(gpio_to_irq(pdata->uok), data);
+err_dc_irq:
+	if (pdata->dc_valid)
+		free_irq(gpio_to_irq(pdata->dok), data);
+err_psy:
+	power_supply_unregister(&data->psy);
+err:
+	kfree(data);
+	return ret;
+}
+
+static __devexit int max8903_remove(struct platform_device *pdev)
+{
+	struct max8903_data *data = platform_get_drvdata(pdev);
+
+	if (data) {
+		struct max8903_pdata *pdata = data->pdata;
+
+		if (pdata->flt)
+			free_irq(gpio_to_irq(pdata->flt), data);
+		if (pdata->usb_valid)
+			free_irq(gpio_to_irq(pdata->uok), data);
+		if (pdata->dc_valid)
+			free_irq(gpio_to_irq(pdata->dok), data);
+		power_supply_unregister(&data->psy);
+		kfree(data);
+	}
+
+	return 0;
+}
+
+static struct platform_driver max8903_driver = {
+	.probe	= max8903_probe,
+	.remove	= __devexit_p(max8903_remove),
+	.driver = {
+		.name	= "max8903-charger",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init max8903_init(void)
+{
+	return platform_driver_register(&max8903_driver);
+}
+module_init(max8903_init);
+
+static void __exit max8903_exit(void)
+{
+	platform_driver_unregister(&max8903_driver);
+}
+module_exit(max8903_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MAX8903 Charger Driver");
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_ALIAS("max8903-charger");
diff --git a/include/linux/power/max8903_charger.h b/include/linux/power/max8903_charger.h
new file mode 100644
index 000000000000..24f51db8a83f
--- /dev/null
+++ b/include/linux/power/max8903_charger.h
@@ -0,0 +1,57 @@
+/*
+ * max8903_charger.h - Maxim 8903 USB/Adapter Charger Driver
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __MAX8903_CHARGER_H__
+#define __MAX8903_CHARGER_H__
+
+struct max8903_pdata {
+	/*
+	 * GPIOs
+	 * cen, chg, flt, and usus are optional.
+	 * dok, dcm, and uok are not optional depending on the status of
+	 * dc_valid and usb_valid.
+	 */
+	int cen;	/* Charger Enable input */
+	int dok;	/* DC(Adapter) Power OK output */
+	int uok;	/* USB Power OK output */
+	int chg;	/* Charger status output */
+	int flt;	/* Fault output */
+	int dcm;	/* Current-Limit Mode input (1: DC, 2: USB) */
+	int usus;	/* USB Suspend Input (1: suspended) */
+
+	/*
+	 * DC(Adapter/TA) is wired
+	 * When dc_valid is true,
+	 *	dok and dcm should be valid.
+	 *
+	 * At least one of dc_valid or usb_valid should be true.
+	 */
+	bool dc_valid;
+	/*
+	 * USB is wired
+	 * When usb_valid is true,
+	 *	uok should be valid.
+	 */
+	bool usb_valid;
+};
+
+#endif /* __MAX8903_CHARGER_H__ */
-- 
cgit v1.2.3


From 2785cefc98051646bd1d36a627822a3f43736697 Mon Sep 17 00:00:00 2001
From: Kalle Jokiniemi <kalle.jokiniemi@nokia.com>
Date: Tue, 29 Mar 2011 16:27:59 +0300
Subject: isp1704_charger: Allow board specific powering routine

The ISP1704/1707 chip can be put to full power down
state by asserting the CHIP_SEL line. This patch enables
platform or board specific hooks to put the device into
power down mode in case not needed.

This patch is a preparation for enabling this powering
routine in n900 (rx-51) devices.

Thanks to Heikki Krogerus for helping out with the patch.

Signed-off-by: Kalle Jokiniemi <kalle.jokiniemi@nokia.com>
Acked-By: Heikki Krogerus <heikki.krogerus@nokia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/isp1704_charger.c       | 22 ++++++++++++++++++++++
 include/linux/power/isp1704_charger.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 include/linux/power/isp1704_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/isp1704_charger.c b/drivers/power/isp1704_charger.c
index 2ad9b14a5ce3..f6d72b402a8e 100644
--- a/drivers/power/isp1704_charger.c
+++ b/drivers/power/isp1704_charger.c
@@ -33,6 +33,7 @@
 #include <linux/usb/ulpi.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/power/isp1704_charger.h>
 
 /* Vendor specific Power Control register */
 #define ISP1704_PWR_CTRL		0x3d
@@ -70,6 +71,18 @@ struct isp1704_charger {
 	unsigned		max_power;
 };
 
+/*
+ * Disable/enable the power from the isp1704 if a function for it
+ * has been provided with platform data.
+ */
+static void isp1704_charger_set_power(struct isp1704_charger *isp, bool on)
+{
+	struct isp1704_charger_data	*board = isp->dev->platform_data;
+
+	if (board->set_power)
+		board->set_power(on);
+}
+
 /*
  * Determine is the charging port DCP (dedicated charger) or CDP (Host/HUB
  * chargers).
@@ -222,6 +235,9 @@ static void isp1704_charger_work(struct work_struct *data)
 
 	mutex_lock(&lock);
 
+	if (event != USB_EVENT_NONE)
+		isp1704_charger_set_power(isp, 1);
+
 	switch (event) {
 	case USB_EVENT_VBUS:
 		isp->online = true;
@@ -269,6 +285,8 @@ static void isp1704_charger_work(struct work_struct *data)
 		 */
 		if (isp->otg->gadget)
 			usb_gadget_disconnect(isp->otg->gadget);
+
+		isp1704_charger_set_power(isp, 0);
 		break;
 	case USB_EVENT_ENUMERATED:
 		if (isp->present)
@@ -394,6 +412,8 @@ static int __devinit isp1704_charger_probe(struct platform_device *pdev)
 	isp->dev = &pdev->dev;
 	platform_set_drvdata(pdev, isp);
 
+	isp1704_charger_set_power(isp, 1);
+
 	ret = isp1704_test_ulpi(isp);
 	if (ret < 0)
 		goto fail1;
@@ -434,6 +454,7 @@ static int __devinit isp1704_charger_probe(struct platform_device *pdev)
 
 	/* Detect charger if VBUS is valid (the cable was already plugged). */
 	ret = otg_io_read(isp->otg, ULPI_USB_INT_STS);
+	isp1704_charger_set_power(isp, 0);
 	if ((ret & ULPI_INT_VBUS_VALID) && !isp->otg->default_a) {
 		isp->event = USB_EVENT_VBUS;
 		schedule_work(&isp->work);
@@ -459,6 +480,7 @@ static int __devexit isp1704_charger_remove(struct platform_device *pdev)
 	otg_unregister_notifier(isp->otg, &isp->nb);
 	power_supply_unregister(&isp->psy);
 	otg_put_transceiver(isp->otg);
+	isp1704_charger_set_power(isp, 0);
 	kfree(isp);
 
 	return 0;
diff --git a/include/linux/power/isp1704_charger.h b/include/linux/power/isp1704_charger.h
new file mode 100644
index 000000000000..68096a6aa2d7
--- /dev/null
+++ b/include/linux/power/isp1704_charger.h
@@ -0,0 +1,29 @@
+/*
+ * ISP1704 USB Charger Detection driver
+ *
+ * Copyright (C) 2011 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#ifndef __ISP1704_CHARGER_H
+#define __ISP1704_CHARGER_H
+
+struct isp1704_charger_data {
+	void		(*set_power)(bool on);
+};
+
+#endif
-- 
cgit v1.2.3


From d4dc210f69bcb0b4bef5a83b1c323817be89bad1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2011 20:54:46 +0200
Subject: block: don't block events on excl write for non-optical devices

Disk event code automatically blocks events on excl write.  This is
primarily to avoid issuing polling commands while burning is in
progress.  This behavior doesn't fit other types of devices with
removeable media where polling commands don't have adverse side
effects and door locking usually doesn't exist.

This patch introduces new genhd flag which controls the auto-blocking
behavior and uses it to enable auto-blocking only on optical devices.

Note for stable: 2.6.38 and later only

Cc: stable@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/paride/pcd.c |  1 +
 drivers/cdrom/viocd.c      |  3 ++-
 drivers/ide/ide-cd.c       |  2 +-
 drivers/scsi/sr.c          |  2 +-
 fs/block_dev.c             | 17 ++++++++++-------
 include/linux/genhd.h      |  1 +
 6 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 2f2ccf686251..a0aabd904a51 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -320,6 +320,7 @@ static void pcd_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
+		disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 		disk->events = DISK_EVENT_MEDIA_CHANGE;
 	}
 }
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 4e874c5fa605..ae15a4ddaa9b 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -625,7 +625,8 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	blk_queue_max_hw_sectors(q, 4096 / 512);
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
-	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+	gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
+			 GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	gendisk->events = DISK_EVENT_MEDIA_CHANGE;
 	set_capacity(gendisk, 0);
 	gendisk->private_data = d;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index fd1e11799137..6e5123b1d341 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1781,7 +1781,7 @@ static int ide_cd_probe(ide_drive_t *drive)
 
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
-	g->flags |= GENHD_FL_REMOVABLE;
+	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 95019c747cc1..4778e2707168 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -636,7 +636,7 @@ static int sr_probe(struct device *dev)
 	disk->first_minor = minor;
 	sprintf(disk->disk_name, "sr%d", minor);
 	disk->fops = &sr_bdops;
-	disk->flags = GENHD_FL_CD;
+	disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
 
 	blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 257b00e98428..d7c2e0fddc6f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1237,6 +1237,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 	res = __blkdev_get(bdev, mode, 0);
 
 	if (whole) {
+		struct gendisk *disk = whole->bd_disk;
+
 		/* finish claiming */
 		mutex_lock(&bdev->bd_mutex);
 		spin_lock(&bdev_lock);
@@ -1263,15 +1265,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 		spin_unlock(&bdev_lock);
 
 		/*
-		 * Block event polling for write claims.  Any write
-		 * holder makes the write_holder state stick until all
-		 * are released.  This is good enough and tracking
-		 * individual writeable reference is too fragile given
-		 * the way @mode is used in blkdev_get/put().
+		 * Block event polling for write claims if requested.  Any
+		 * write holder makes the write_holder state stick until
+		 * all are released.  This is good enough and tracking
+		 * individual writeable reference is too fragile given the
+		 * way @mode is used in blkdev_get/put().
 		 */
-		if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+		if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+		    !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
 			bdev->bd_write_holder = true;
-			disk_block_events(bdev->bd_disk);
+			disk_block_events(disk);
 		}
 
 		mutex_unlock(&bdev->bd_mutex);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index d764a426e9fd..300d7582006e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -127,6 +127,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	256
 
 enum {
 	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
-- 
cgit v1.2.3


From 8c9e80ed276fc4b9c9fadf29d8bf6b3576112f1a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 21 Apr 2011 17:23:19 -0700
Subject: SECURITY: Move exec_permission RCU checks into security modules

Right now all RCU walks fall back to reference walk when CONFIG_SECURITY
is enabled, even though just the standard capability module is active.
This is because security_inode_exec_permission unconditionally fails
RCU walks.

Move this decision to the low level security module. This requires
passing the RCU flags down the security hook. This way at least
the capability module and a few easy cases in selinux/smack work
with RCU walks with CONFIG_SECURITY=y

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h   | 2 +-
 security/capability.c      | 2 +-
 security/security.c        | 6 ++----
 security/selinux/hooks.c   | 6 +++++-
 security/smack/smack_lsm.c | 6 +++++-
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ca02f1716736..8ce59ef3e5af 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1456,7 +1456,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask);
+	int (*inode_permission) (struct inode *inode, int mask, unsigned flags);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..bbb51156261b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask)
+static int cap_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 101142369db4..4ba6d4cc061f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -518,16 +518,14 @@ int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask);
+	return security_ops->inode_permission(inode, mask, 0);
 }
 
 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	if (flags)
-		return -ECHILD;
-	return security_ops->inode_permission(inode, MAY_EXEC);
+	return security_ops->inode_permission(inode, MAY_EXEC, flags);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f9c3764e4859..a73f4e463774 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2635,7 +2635,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(cred, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask)
+static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
@@ -2649,6 +2649,10 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 	if (!mask)
 		return 0;
 
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.inode = inode;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c6f8fcadae07..400a5d5cde61 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -686,7 +686,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask)
+static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	struct smk_audit_info ad;
 
@@ -696,6 +696,10 @@ static int smack_inode_permission(struct inode *inode, int mask)
 	 */
 	if (mask == 0)
 		return 0;
+
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
 	smk_ad_setfield_u_fs_inode(&ad, inode);
 	return smk_curacc(smk_of_inode(inode), mask, &ad);
-- 
cgit v1.2.3


From 764b0c4b3256ad4431cb52eaf99c0abe6df0a085 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Fri, 8 Apr 2011 04:57:42 -0500
Subject: drivers:misc:ti-st: handle delayed tty receive

When certain technologies shutdown their interface without waiting for
the acknowledgement from the chip. The receive_buf from the TTY would be
invoked a while after the relevant technology is unregistered.

This patch introduces a new flag "is_registered" which maintains the
state of protocols BT, FM or GPS and thereby removes the need to clear
the protocol data from ST when protocols gets unregistered.

This fixes corner cases when HCI RESET is sent down from bluetooth stack
and the receive_buf is called from tty after 250ms before which
bluetooth would have unregistered from the system.
OR - when FM application decides to close down the device without
sending a power-off FM command resulting in some RDS data or interrupt
data coming in after the driver is unregistered.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 23 +++++++++++++----------
 include/linux/ti_wilink_st.h |  3 ++-
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 486117f72c9f..f91f82eabda7 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -43,13 +43,15 @@ static void add_channel_to_table(struct st_data_s *st_gdata,
 	pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
 	/* list now has the channel id as index itself */
 	st_gdata->list[new_proto->chnl_id] = new_proto;
+	st_gdata->is_registered[new_proto->chnl_id] = true;
 }
 
 static void remove_channel_from_table(struct st_data_s *st_gdata,
 		struct st_proto_s *proto)
 {
 	pr_info("%s: id %d\n", __func__, proto->chnl_id);
-	st_gdata->list[proto->chnl_id] = NULL;
+/*	st_gdata->list[proto->chnl_id] = NULL; */
+	st_gdata->is_registered[proto->chnl_id] = false;
 }
 
 /*
@@ -104,7 +106,7 @@ void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
 
 	if (unlikely
 	    (st_gdata == NULL || st_gdata->rx_skb == NULL
-	     || st_gdata->list[chnl_id] == NULL)) {
+	     || st_gdata->is_registered[chnl_id] == false)) {
 		pr_err("chnl_id %d not registered, no data to send?",
 			   chnl_id);
 		kfree_skb(st_gdata->rx_skb);
@@ -141,14 +143,15 @@ void st_reg_complete(struct st_data_s *st_gdata, char err)
 	unsigned char i = 0;
 	pr_info(" %s ", __func__);
 	for (i = 0; i < ST_MAX_CHANNELS; i++) {
-		if (likely(st_gdata != NULL && st_gdata->list[i] != NULL &&
-			   st_gdata->list[i]->reg_complete_cb != NULL)) {
+		if (likely(st_gdata != NULL &&
+			st_gdata->is_registered[i] == true &&
+				st_gdata->list[i]->reg_complete_cb != NULL)) {
 			st_gdata->list[i]->reg_complete_cb
 				(st_gdata->list[i]->priv_data, err);
 			pr_info("protocol %d's cb sent %d\n", i, err);
 			if (err) { /* cleanup registered protocol */
 				st_gdata->protos_registered--;
-				st_gdata->list[i] = NULL;
+				st_gdata->is_registered[i] = false;
 			}
 		}
 	}
@@ -475,9 +478,9 @@ void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
 {
 	seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
 			st_gdata->protos_registered,
-			st_gdata->list[0x04] != NULL ? 'R' : 'U',
-			st_gdata->list[0x08] != NULL ? 'R' : 'U',
-			st_gdata->list[0x09] != NULL ? 'R' : 'U');
+			st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x09] == true ? 'R' : 'U');
 }
 
 /********************************************************************/
@@ -504,7 +507,7 @@ long st_register(struct st_proto_s *new_proto)
 		return -EPROTONOSUPPORT;
 	}
 
-	if (st_gdata->list[new_proto->chnl_id] != NULL) {
+	if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 		pr_err("chnl_id %d already registered", new_proto->chnl_id);
 		return -EALREADY;
 	}
@@ -563,7 +566,7 @@ long st_register(struct st_proto_s *new_proto)
 		/* check for already registered once more,
 		 * since the above check is old
 		 */
-		if (st_gdata->list[new_proto->chnl_id] != NULL) {
+		if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 			pr_err(" proto %d already registered ",
 				   new_proto->chnl_id);
 			return -EALREADY;
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 7071ec5d0118..b004e557caa9 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -140,12 +140,12 @@ extern long st_unregister(struct st_proto_s *);
  */
 struct st_data_s {
 	unsigned long st_state;
-	struct tty_struct *tty;
 	struct sk_buff *tx_skb;
 #define ST_TX_SENDING	1
 #define ST_TX_WAKEUP	2
 	unsigned long tx_state;
 	struct st_proto_s *list[ST_MAX_CHANNELS];
+	bool is_registered[ST_MAX_CHANNELS];
 	unsigned long rx_state;
 	unsigned long rx_count;
 	struct sk_buff *rx_skb;
@@ -155,6 +155,7 @@ struct st_data_s {
 	unsigned char	protos_registered;
 	unsigned long ll_state;
 	void *kim_data;
+	struct tty_struct *tty;
 };
 
 /*
-- 
cgit v1.2.3


From c8705082404823a5bb3e02a32ba0764399b9e6f2 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 20 Apr 2011 09:44:46 +0200
Subject: driver core: let dev_set_drvdata return int instead of void as it can
 fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before commit

	b402843 (Driver core: move dev_get/set_drvdata to drivers/base/dd.c)

calling dev_set_drvdata with dev=NULL was an unchecked error. After some
discussion about what to return in this case removing the check (and so
producing a null pointer exception) seems fine.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/dd.c      | 7 +++----
 include/linux/device.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 7e9219b02796..e3a3eff1dacc 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -413,17 +413,16 @@ void *dev_get_drvdata(const struct device *dev)
 }
 EXPORT_SYMBOL(dev_get_drvdata);
 
-void dev_set_drvdata(struct device *dev, void *data)
+int dev_set_drvdata(struct device *dev, void *data)
 {
 	int error;
 
-	if (!dev)
-		return;
 	if (!dev->p) {
 		error = device_private_init(dev);
 		if (error)
-			return;
+			return error;
 	}
 	dev->p->driver_data = data;
+	return 0;
 }
 EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index 350ceda4de97..2215d013ca96 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -557,7 +557,7 @@ extern int device_move(struct device *dev, struct device *new_parent,
 extern const char *device_get_devnode(struct device *dev,
 				      mode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
-extern void dev_set_drvdata(struct device *dev, void *data);
+extern int dev_set_drvdata(struct device *dev, void *data);
 
 /*
  * Root device objects for grouping under /sys/devices
-- 
cgit v1.2.3


From b1c43f82c5aa265442f82dba31ce985ebb7aa71c Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 21 Mar 2011 12:25:08 +0200
Subject: tty: make receive_buf() return the amout of bytes received

it makes it simpler to keep track of the amount of
bytes received and simplifies how flush_to_ldisc counts
the remaining bytes. It also fixes a bug of lost bytes
on n_tty when flushing too many bytes via the USB
serial gadget driver.

Tested-by: Stefan Bigler <stefan.bigler@keymile.com>
Tested-by: Toby Gray <toby.gray@realvnc.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/bluetooth/hci_ldisc.c      | 12 +++++---
 drivers/input/serio/serport.c      | 10 +++++--
 drivers/isdn/gigaset/ser-gigaset.c |  8 +++--
 drivers/misc/ti-st/st_core.c       |  6 ++--
 drivers/net/caif/caif_serial.c     |  6 ++--
 drivers/net/can/slcan.c            |  9 ++++--
 drivers/net/hamradio/6pack.c       |  8 +++--
 drivers/net/hamradio/mkiss.c       | 11 ++++---
 drivers/net/irda/irtty-sir.c       | 16 +++++-----
 drivers/net/ppp_async.c            |  6 ++--
 drivers/net/ppp_synctty.c          |  6 ++--
 drivers/net/slip.c                 | 11 ++++---
 drivers/net/wan/x25_asy.c          |  7 +++--
 drivers/tty/n_gsm.c                |  6 ++--
 drivers/tty/n_hdlc.c               | 18 ++++++-----
 drivers/tty/n_r3964.c              | 10 ++++---
 drivers/tty/n_tty.c                | 61 +++++++++-----------------------------
 drivers/tty/tty_buffer.c           | 15 ++++++----
 drivers/tty/vt/selection.c         |  3 +-
 include/linux/tty_ldisc.h          |  9 +++---
 20 files changed, 125 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 48ad2a7ab080..0d4da5e14ba0 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -357,22 +357,26 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty)
  *     
  * Return Value:    None
  */
-static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count)
+static unsigned int hci_uart_tty_receive(struct tty_struct *tty,
+		const u8 *data, char *flags, int count)
 {
 	struct hci_uart *hu = (void *)tty->disc_data;
+	int received;
 
 	if (!hu || tty != hu->tty)
-		return;
+		return -ENODEV;
 
 	if (!test_bit(HCI_UART_PROTO_SET, &hu->flags))
-		return;
+		return -EINVAL;
 
 	spin_lock(&hu->rx_lock);
-	hu->proto->recv(hu, (void *) data, count);
+	received = hu->proto->recv(hu, (void *) data, count);
 	hu->hdev->stat.byte_rx += count;
 	spin_unlock(&hu->rx_lock);
 
 	tty_unthrottle(tty);
+
+	return received;
 }
 
 static int hci_uart_register_dev(struct hci_uart *hu)
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index 8755f5f3ad37..f3698967edf6 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -120,17 +120,21 @@ static void serport_ldisc_close(struct tty_struct *tty)
  * 'interrupt' routine.
  */
 
-static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
+static unsigned int serport_ldisc_receive(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct serport *serport = (struct serport*) tty->disc_data;
 	unsigned long flags;
 	unsigned int ch_flags;
+	int ret = 0;
 	int i;
 
 	spin_lock_irqsave(&serport->lock, flags);
 
-	if (!test_bit(SERPORT_ACTIVE, &serport->flags))
+	if (!test_bit(SERPORT_ACTIVE, &serport->flags)) {
+		ret = -EINVAL;
 		goto out;
+	}
 
 	for (i = 0; i < count; i++) {
 		switch (fp[i]) {
@@ -152,6 +156,8 @@ static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *c
 
 out:
 	spin_unlock_irqrestore(&serport->lock, flags);
+
+	return ret == 0 ? count : ret;
 }
 
 /*
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 86a5c4f7775e..1d44d470897c 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file,
  *	cflags	buffer containing error flags for received characters (ignored)
  *	count	number of received characters
  */
-static void
+static unsigned int
 gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 		    char *cflags, int count)
 {
@@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	struct inbuf_t *inbuf;
 
 	if (!cs)
-		return;
+		return -ENODEV;
 	inbuf = cs->inbuf;
 	if (!inbuf) {
 		dev_err(cs->dev, "%s: no inbuf\n", __func__);
 		cs_put(cs);
-		return;
+		return -EINVAL;
 	}
 
 	tail = inbuf->tail;
@@ -725,6 +725,8 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	gig_dbg(DEBUG_INTR, "%s-->BH", __func__);
 	gigaset_schedule_event(cs);
 	cs_put(cs);
+
+	return count;
 }
 
 /*
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 486117f72c9f..cb98a7da98ef 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -744,8 +744,8 @@ static void st_tty_close(struct tty_struct *tty)
 	pr_debug("%s: done ", __func__);
 }
 
-static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
-			   char *tty_flags, int count)
+static unsigned int st_tty_receive(struct tty_struct *tty,
+		const unsigned char *data, char *tty_flags, int count)
 {
 #ifdef VERBOSE
 	print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
@@ -758,6 +758,8 @@ static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
 	 */
 	st_recv(tty->disc_data, data, count);
 	pr_debug("done %s", __func__);
+
+	return count;
 }
 
 /* wake-up function called in from the TTY layer
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 3df0c0f8b8bf..73c7e03617ec 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size)
 
 #endif
 
-static void ldisc_receive(struct tty_struct *tty, const u8 *data,
-			char *flags, int count)
+static unsigned int ldisc_receive(struct tty_struct *tty,
+		const u8 *data, char *flags, int count)
 {
 	struct sk_buff *skb = NULL;
 	struct ser_device *ser;
@@ -215,6 +215,8 @@ static void ldisc_receive(struct tty_struct *tty, const u8 *data,
 	} else
 		++ser->dev->stats.rx_dropped;
 	update_tty_status(ser);
+
+	return count;
 }
 
 static int handle_tx(struct ser_device *ser)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index b423965a78d1..c600954998d5 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -425,16 +425,17 @@ static void slc_setup(struct net_device *dev)
  * in parallel
  */
 
-static void slcan_receive_buf(struct tty_struct *tty,
+static unsigned int slcan_receive_buf(struct tty_struct *tty,
 			      const unsigned char *cp, char *fp, int count)
 {
 	struct slcan *sl = (struct slcan *) tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev))
-		return;
+		return -ENODEV;
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -443,6 +444,8 @@ static void slcan_receive_buf(struct tty_struct *tty,
 		}
 		slcan_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /************************************
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 3e5d0b6b6516..992089639ea4 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -456,7 +456,7 @@ out:
  * a block of 6pack data has been received, which can now be decapsulated
  * and sent on to some IP layer for further processing.
  */
-static void sixpack_receive_buf(struct tty_struct *tty,
+static unsigned int sixpack_receive_buf(struct tty_struct *tty,
 	const unsigned char *cp, char *fp, int count)
 {
 	struct sixpack *sp;
@@ -464,11 +464,11 @@ static void sixpack_receive_buf(struct tty_struct *tty,
 	int count1;
 
 	if (!count)
-		return;
+		return 0;
 
 	sp = sp_get(tty);
 	if (!sp)
-		return;
+		return -ENODEV;
 
 	memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf));
 
@@ -487,6 +487,8 @@ static void sixpack_receive_buf(struct tty_struct *tty,
 
 	sp_put(sp);
 	tty_unthrottle(tty);
+
+	return count1;
 }
 
 /*
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 4c628393c8b1..0e4f23531140 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -923,13 +923,14 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file,
  * a block of data has been received, which can now be decapsulated
  * and sent on to the AX.25 layer for further processing.
  */
-static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-	char *fp, int count)
+static unsigned int mkiss_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct mkiss *ax = mkiss_get(tty);
+	int bytes = count;
 
 	if (!ax)
-		return;
+		return -ENODEV;
 
 	/*
 	 * Argh! mtu change time! - costs us the packet part received
@@ -939,7 +940,7 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		ax_changedmtu(ax);
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp != NULL && *fp++) {
 			if (!test_and_set_bit(AXF_ERROR, &ax->flags))
 				ax->dev->stats.rx_errors++;
@@ -952,6 +953,8 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	mkiss_put(ax);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 /*
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 3352b2443e58..035861d8acb1 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t
  * usbserial:	urb-complete-interrupt / softint
  */
 
-static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count) 
+static unsigned int irtty_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct sir_dev *dev;
 	struct sirtty_cb *priv = tty->disc_data;
 	int	i;
 
-	IRDA_ASSERT(priv != NULL, return;);
-	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;);
+	IRDA_ASSERT(priv != NULL, return -ENODEV;);
+	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;);
 
 	if (unlikely(count==0))		/* yes, this happens */
-		return;
+		return 0;
 
 	dev = priv->dev;
 	if (!dev) {
 		IRDA_WARNING("%s(), not ready yet!\n", __func__);
-		return;
+		return -ENODEV;
 	}
 
 	for (i = 0; i < count; i++) {
@@ -242,11 +242,13 @@ static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
  		if (fp && *fp++) { 
 			IRDA_DEBUG(0, "Framing or parity error!\n");
 			sirdev_receive(dev, NULL, 0);	/* notify sir_dev (updating stats) */
-			return;
+			return -EINVAL;
  		}
 	}
 
 	sirdev_receive(dev, cp, count);
+
+	return count;
 }
 
 /*
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index a1b82c9c67d2..53872d7d7382 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static void
+static unsigned int
 ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return;
+		return -ENODEV;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_async_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -356,6 +356,8 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	ap_put(ap);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 static void
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 2573f525f11c..0815790a5cf9 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static void
+static unsigned int
 ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return;
+		return -ENODEV;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_sync_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -397,6 +397,8 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	sp_put(ap);
 	tty_unthrottle(tty);
+
+	return count;
 }
 
 static void
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 86cbb9ea2f26..86718d358395 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -670,16 +670,17 @@ static void sl_setup(struct net_device *dev)
  * in parallel
  */
 
-static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-							char *fp, int count)
+static unsigned int slip_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct slip *sl = tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev))
-		return;
+		return -ENODEV;
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -693,6 +694,8 @@ static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 #endif
 			slip_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /************************************
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 24297b274cd4..40398bf7d036 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -517,17 +517,18 @@ static int x25_asy_close(struct net_device *dev)
  * and sent on to some IP layer for further processing.
  */
 
-static void x25_asy_receive_buf(struct tty_struct *tty,
+static unsigned int x25_asy_receive_buf(struct tty_struct *tty,
 				const unsigned char *cp, char *fp, int count)
 {
 	struct x25_asy *sl = tty->disc_data;
+	int bytes = count;
 
 	if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev))
 		return;
 
 
 	/* Read the characters out of the buffer */
-	while (count--) {
+	while (bytes--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -536,6 +537,8 @@ static void x25_asy_receive_buf(struct tty_struct *tty,
 		}
 		x25_asy_unesc(sl, *cp++);
 	}
+
+	return count;
 }
 
 /*
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 47f8cdb207f1..6abc73598847 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2138,8 +2138,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
 	gsm->tty = NULL;
 }
 
-static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count)
+static unsigned int gsmld_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct gsm_mux *gsm = tty->disc_data;
 	const unsigned char *dp;
@@ -2173,6 +2173,8 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	}
 	/* FASYNC if needed ? */
 	/* If clogged call tty_throttle(tty); */
+
+	return count;
 }
 
 /**
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index cea56033b34c..cac666314aef 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
 				    poll_table *wait);
 static int n_hdlc_tty_open(struct tty_struct *tty);
 static void n_hdlc_tty_close(struct tty_struct *tty);
-static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp,
-			       char *fp, int count);
+static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
+		const __u8 *cp, char *fp, int count);
 static void n_hdlc_tty_wakeup(struct tty_struct *tty);
 
 #define bset(p,b)	((p)[(b) >> 5] |= (1 << ((b) & 0x1f)))
@@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
  * Called by tty low level driver when receive data is available. Data is
  * interpreted as one HDLC frame.
  */
-static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
-			       char *flags, int count)
+static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
+		const __u8 *data, char *flags, int count)
 {
 	register struct n_hdlc *n_hdlc = tty2n_hdlc (tty);
 	register struct n_hdlc_buf *buf;
@@ -521,20 +521,20 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 		
 	/* This can happen if stuff comes in on the backup tty */
 	if (!n_hdlc || tty != n_hdlc->tty)
-		return;
+		return -ENODEV;
 		
 	/* verify line is using HDLC discipline */
 	if (n_hdlc->magic != HDLC_MAGIC) {
 		printk("%s(%d) line not using HDLC discipline\n",
 			__FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 	
 	if ( count>maxframe ) {
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) rx count>maxframesize, data discarded\n",
 			       __FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 
 	/* get a free HDLC buffer */	
@@ -550,7 +550,7 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) no more rx buffers, data discarded\n",
 			       __FILE__,__LINE__);
-		return;
+		return -EINVAL;
 	}
 		
 	/* copy received data to HDLC buffer */
@@ -565,6 +565,8 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
 	if (n_hdlc->tty->fasync != NULL)
 		kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN);
 
+	return count;
+
 }	/* end of n_hdlc_tty_receive() */
 
 /**
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 5c6c31459a2f..a4bc39c21a43 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old);
 static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 		struct poll_table_struct *wait);
-static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-		char *fp, int count);
+static unsigned int r3964_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count);
 
 static struct tty_ldisc_ops tty_ldisc_N_R3964 = {
 	.owner = THIS_MODULE,
@@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 	return result;
 }
 
-static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			char *fp, int count)
+static unsigned int r3964_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	struct r3964_info *pInfo = tty->disc_data;
 	const unsigned char *p;
@@ -1257,6 +1257,8 @@ static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		}
 
 	}
+
+	return count;
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 0ad32888091c..95d0a9c2dd13 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -81,38 +81,6 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 	return put_user(x, ptr);
 }
 
-/**
- *	n_tty_set__room	-	receive space
- *	@tty: terminal
- *
- *	Called by the driver to find out how much data it is
- *	permitted to feed to the line discipline without any being lost
- *	and thus to manage flow control. Not serialized. Answers for the
- *	"instant".
- */
-
-static void n_tty_set_room(struct tty_struct *tty)
-{
-	/* tty->read_cnt is not read locked ? */
-	int	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
-	int old_left;
-
-	/*
-	 * If we are doing input canonicalization, and there are no
-	 * pending newlines, let characters through without limit, so
-	 * that erase characters will be handled.  Other excess
-	 * characters will be beeped.
-	 */
-	if (left <= 0)
-		left = tty->icanon && !tty->canon_data;
-	old_left = tty->receive_room;
-	tty->receive_room = left;
-
-	/* Did this open up the receive buffer? We may need to flip */
-	if (left && !old_left)
-		schedule_work(&tty->buf.work);
-}
-
 static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
 {
 	if (tty->read_cnt < N_TTY_BUF_SIZE) {
@@ -184,7 +152,6 @@ static void reset_buffer_flags(struct tty_struct *tty)
 
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
-	n_tty_set_room(tty);
 	check_unthrottle(tty);
 }
 
@@ -1360,17 +1327,19 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
  *	calls one at a time and in order (or using flush_to_ldisc)
  */
 
-static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
-			      char *fp, int count)
+static unsigned int n_tty_receive_buf(struct tty_struct *tty,
+		const unsigned char *cp, char *fp, int count)
 {
 	const unsigned char *p;
 	char *f, flags = TTY_NORMAL;
 	int	i;
 	char	buf[64];
 	unsigned long cpuflags;
+	int left;
+	int ret = 0;
 
 	if (!tty->read_buf)
-		return;
+		return 0;
 
 	if (tty->real_raw) {
 		spin_lock_irqsave(&tty->read_lock, cpuflags);
@@ -1380,6 +1349,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
+		ret += i;
 		cp += i;
 		count -= i;
 
@@ -1389,8 +1359,10 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
+		ret += i;
 		spin_unlock_irqrestore(&tty->read_lock, cpuflags);
 	} else {
+		ret = count;
 		for (i = count, p = cp, f = fp; i; i--, p++) {
 			if (f)
 				flags = *f++;
@@ -1418,8 +1390,6 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 			tty->ops->flush_chars(tty);
 	}
 
-	n_tty_set_room(tty);
-
 	if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
 		L_EXTPROC(tty)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
@@ -1432,8 +1402,12 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	 * mode.  We don't want to throttle the driver if we're in
 	 * canonical mode and don't have a newline yet!
 	 */
-	if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
+	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+
+	if (left < TTY_THRESHOLD_THROTTLE)
 		tty_throttle(tty);
+
+	return ret;
 }
 
 int is_ignored(int sig)
@@ -1477,7 +1451,6 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
 		tty->raw = 1;
 		tty->real_raw = 1;
-		n_tty_set_room(tty);
 		return;
 	}
 	if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
@@ -1530,7 +1503,6 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		else
 			tty->real_raw = 0;
 	}
-	n_tty_set_room(tty);
 	/* The termios change make the tty ready for I/O */
 	wake_up_interruptible(&tty->write_wait);
 	wake_up_interruptible(&tty->read_wait);
@@ -1812,8 +1784,6 @@ do_it_again:
 				retval = -ERESTARTSYS;
 				break;
 			}
-			/* FIXME: does n_tty_set_room need locking ? */
-			n_tty_set_room(tty);
 			timeout = schedule_timeout(timeout);
 			continue;
 		}
@@ -1885,10 +1855,8 @@ do_it_again:
 		 * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode,
 		 * we won't get any more characters.
 		 */
-		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) {
-			n_tty_set_room(tty);
+		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE)
 			check_unthrottle(tty);
-		}
 
 		if (b - buf >= minimum)
 			break;
@@ -1910,7 +1878,6 @@ do_it_again:
 	} else if (test_and_clear_bit(TTY_PUSH, &tty->flags))
 		 goto do_it_again;
 
-	n_tty_set_room(tty);
 	return retval;
 }
 
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index f1a7918d71aa..46de2e075dac 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -416,6 +416,7 @@ static void flush_to_ldisc(struct work_struct *work)
 		struct tty_buffer *head, *tail = tty->buf.tail;
 		int seen_tail = 0;
 		while ((head = tty->buf.head) != NULL) {
+			int copied;
 			int count;
 			char *char_buf;
 			unsigned char *flag_buf;
@@ -442,17 +443,19 @@ static void flush_to_ldisc(struct work_struct *work)
 			   line discipline as we want to empty the queue */
 			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
 				break;
-			if (!tty->receive_room || seen_tail)
-				break;
-			if (count > tty->receive_room)
-				count = tty->receive_room;
 			char_buf = head->char_buf_ptr + head->read;
 			flag_buf = head->flag_buf_ptr + head->read;
-			head->read += count;
 			spin_unlock_irqrestore(&tty->buf.lock, flags);
-			disc->ops->receive_buf(tty, char_buf,
+			copied = disc->ops->receive_buf(tty, char_buf,
 							flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
+
+			head->read += copied;
+
+			if (copied == 0 || seen_tail) {
+				schedule_work(&tty->buf.work);
+				break;
+			}
 		}
 		clear_bit(TTY_FLUSHING, &tty->flags);
 	}
diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index fb864e7fcd13..67b1d0d7c8ac 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -332,8 +332,7 @@ int paste_selection(struct tty_struct *tty)
 			continue;
 		}
 		count = sel_buffer_lth - pasted;
-		count = min(count, tty->receive_room);
-		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
+		count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index ff7dc08696a8..5b07792ccb46 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -76,7 +76,7 @@
  * 	tty device.  It is solely the responsibility of the line
  * 	discipline to handle poll requests.
  *
- * void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
+ * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp,
  * 		       char *fp, int count);
  *
  * 	This function is called by the low-level tty driver to send
@@ -84,7 +84,8 @@
  * 	processing.  <cp> is a pointer to the buffer of input
  * 	character received by the device.  <fp> is a pointer to a
  * 	pointer of flag bytes which indicate whether a character was
- * 	received with a parity error, etc.
+ * 	received with a parity error, etc. Returns the amount of bytes
+ * 	received.
  * 
  * void	(*write_wakeup)(struct tty_struct *);
  *
@@ -140,8 +141,8 @@ struct tty_ldisc_ops {
 	/*
 	 * The following routines are called from below.
 	 */
-	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
-			       char *fp, int count);
+	unsigned int (*receive_buf)(struct tty_struct *,
+			const unsigned char *cp, char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
 	void	(*dcd_change)(struct tty_struct *, unsigned int,
 				struct pps_event_time *);
-- 
cgit v1.2.3


From 0911f124bf55357803d53197cc1ae5479f5e37e2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Apr 2011 11:01:51 +0200
Subject: genirq: Forgotten updates/deletions after removal of compat code

commit 0c6f8a8b917ad361319c8ace3e9f28e69bfdb4c1 ("genirq: Remove compat code")
removed the compat code, but forgot to update some references in comments and
delete some of its documentation.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: http://lkml.kernel.org/r/%3C1302426113-13808-1-git-send-email-geert%40linux-m68k.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 19 +------------------
 include/linux/irqdesc.h |  2 +-
 2 files changed, 2 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 09a308072f56..a71dd18639fb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -53,7 +53,7 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * Bits which can be modified via irq_set/clear/modify_status_flags()
  * IRQ_LEVEL			- Interrupt is level type. Will be also
  *				  updated in the code when the above trigger
- *				  bits are modified via set_irq_type()
+ *				  bits are modified via irq_set_irq_type()
  * IRQ_PER_CPU			- Mark an interrupt PER_CPU. Will protect
  *				  it from affinity setting
  * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
@@ -261,23 +261,6 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * struct irq_chip - hardware interrupt chip descriptor
  *
  * @name:		name for /proc/interrupts
- * @startup:		deprecated, replaced by irq_startup
- * @shutdown:		deprecated, replaced by irq_shutdown
- * @enable:		deprecated, replaced by irq_enable
- * @disable:		deprecated, replaced by irq_disable
- * @ack:		deprecated, replaced by irq_ack
- * @mask:		deprecated, replaced by irq_mask
- * @mask_ack:		deprecated, replaced by irq_mask_ack
- * @unmask:		deprecated, replaced by irq_unmask
- * @eoi:		deprecated, replaced by irq_eoi
- * @end:		deprecated, will go away with __do_IRQ()
- * @set_affinity:	deprecated, replaced by irq_set_affinity
- * @retrigger:		deprecated, replaced by irq_retrigger
- * @set_type:		deprecated, replaced by irq_set_type
- * @set_wake:		deprecated, replaced by irq_wake
- * @bus_lock:		deprecated, replaced by irq_bus_lock
- * @bus_sync_unlock:	deprecated, replaced by irq_bus_sync_unlock
- *
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
  * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a082905b5ebe..8e1dc8ea5471 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -21,7 +21,7 @@ struct timer_rand_state;
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
- * @wake_depth:		enable depth, for multiple set_irq_wake() callers
+ * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
  * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
-- 
cgit v1.2.3


From 770767787c23040dc152e7ae230597ff55b39470 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Apr 2011 11:01:52 +0200
Subject: genirq: irq_desc: Document preflow_handler and affinity_hint

[ tglx: Filled in the FIXME place holders ]

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: http://lkml.kernel.org/r/%3C1302426113-13808-2-git-send-email-geert%40linux-m68k.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 8e1dc8ea5471..c70b1aa4b93a 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -16,7 +16,8 @@ struct timer_rand_state;
  * @irq_data:		per irq and chip data passed down to chip functions
  * @timer_rand_state:	pointer to timer rand state struct
  * @kstat_irqs:		irq stats per cpu
- * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
+ * @handle_irq:		highlevel irq-events handler
+ * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
@@ -26,6 +27,7 @@ struct timer_rand_state;
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
+ * @affinity_hint:	hint to user space for preferred irq affinity
  * @affinity_notify:	context for notification of affinity changes
  * @pending_mask:	pending rebalanced interrupts
  * @threads_oneshot:	bitfield to handle shared oneshot threads
-- 
cgit v1.2.3


From 7f1b1244e159a8490d7fb13667c6cb7e1e75046b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 Apr 2011 06:01:44 +0900
Subject: genirq: Support per-IRQ thread disabling.

This adds support for disabling threading on a per-IRQ basis via the IRQ
status instead of the IRQ flow, which is necessary for interrupts that
don't follow the natural IRQ flow channels, such as those that are
virtually created.

The new APIs added are simply:

	irq_set_thread()
	irq_set_nothread()

which follow the rest of the IRQ status routines.

Chained handlers also have IRQ_NOTHREAD set on them automatically, making
the lack of threading explicit rather than implicit. Subsequently, the
nothread flag can be viewed through the standard genirq debugging
facilities.

[ tglx: Fixed cleanup fallout ]

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Link: http://lkml.kernel.org/r/%3C20110406210135.GF18426%40linux-sh.org%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h   | 14 +++++++++++++-
 kernel/irq/chip.c     |  1 +
 kernel/irq/debug.h    |  1 +
 kernel/irq/manage.c   |  3 ++-
 kernel/irq/settings.h | 17 +++++++++++++++++
 5 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a71dd18639fb..39c23786c1db 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -59,6 +59,7 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
  * IRQ_NOREQUEST		- Interrupt cannot be requested via
  *				  request_irq()
+ * IRQ_NOTHREAD			- Interrupt cannot be threaded
  * IRQ_NOAUTOEN			- Interrupt is not automatically enabled in
  *				  request/setup_irq()
  * IRQ_NO_BALANCING		- Interrupt cannot be balanced (affinity set)
@@ -85,6 +86,7 @@ enum {
 	IRQ_NO_BALANCING	= (1 << 13),
 	IRQ_MOVE_PCNTXT		= (1 << 14),
 	IRQ_NESTED_THREAD	= (1 << 15),
+	IRQ_NOTHREAD		= (1 << 16),
 };
 
 #define IRQF_MODIFY_MASK	\
@@ -422,7 +424,7 @@ irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
 /*
  * Set a highlevel chained flow handler for a given IRQ.
  * (a chained handler is automatically enabled and set to
- *  IRQ_NOREQUEST and IRQ_NOPROBE)
+ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
  */
 static inline void
 irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
@@ -452,6 +454,16 @@ static inline void irq_set_probe(unsigned int irq)
 	irq_modify_status(irq, IRQ_NOPROBE, 0);
 }
 
+static inline void irq_set_nothread(unsigned int irq)
+{
+	irq_modify_status(irq, 0, IRQ_NOTHREAD);
+}
+
+static inline void irq_set_thread(unsigned int irq)
+{
+	irq_modify_status(irq, IRQ_NOTHREAD, 0);
+}
+
 static inline void irq_set_nested_thread(unsigned int irq, bool nest)
 {
 	if (nest)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4af1e2b244cb..52d856d513ff 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -573,6 +573,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	if (handle != handle_bad_irq && is_chained) {
 		irq_settings_set_noprobe(desc);
 		irq_settings_set_norequest(desc);
+		irq_settings_set_nothread(desc);
 		irq_startup(desc);
 	}
 out:
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 306cba37e9a5..97a8bfadc88a 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -27,6 +27,7 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 	P(IRQ_PER_CPU);
 	P(IRQ_NOPROBE);
 	P(IRQ_NOREQUEST);
+	P(IRQ_NOTHREAD);
 	P(IRQ_NOAUTOEN);
 
 	PS(IRQS_AUTODETECT);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 07c1611f3899..f7ce0021e1c4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -900,7 +900,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		new->handler = irq_nested_primary_handler;
 	} else {
-		irq_setup_forced_threading(new);
+		if (irq_settings_can_thread(desc))
+			irq_setup_forced_threading(new);
 	}
 
 	/*
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0d91730b6330..f1667833d444 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -8,6 +8,7 @@ enum {
 	_IRQ_LEVEL		= IRQ_LEVEL,
 	_IRQ_NOPROBE		= IRQ_NOPROBE,
 	_IRQ_NOREQUEST		= IRQ_NOREQUEST,
+	_IRQ_NOTHREAD		= IRQ_NOTHREAD,
 	_IRQ_NOAUTOEN		= IRQ_NOAUTOEN,
 	_IRQ_MOVE_PCNTXT	= IRQ_MOVE_PCNTXT,
 	_IRQ_NO_BALANCING	= IRQ_NO_BALANCING,
@@ -20,6 +21,7 @@ enum {
 #define IRQ_LEVEL		GOT_YOU_MORON
 #define IRQ_NOPROBE		GOT_YOU_MORON
 #define IRQ_NOREQUEST		GOT_YOU_MORON
+#define IRQ_NOTHREAD		GOT_YOU_MORON
 #define IRQ_NOAUTOEN		GOT_YOU_MORON
 #define IRQ_NESTED_THREAD	GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
@@ -94,6 +96,21 @@ static inline void irq_settings_set_norequest(struct irq_desc *desc)
 	desc->status_use_accessors |= _IRQ_NOREQUEST;
 }
 
+static inline bool irq_settings_can_thread(struct irq_desc *desc)
+{
+	return !(desc->status_use_accessors & _IRQ_NOTHREAD);
+}
+
+static inline void irq_settings_clr_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors &= ~_IRQ_NOTHREAD;
+}
+
+static inline void irq_settings_set_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors |= _IRQ_NOTHREAD;
+}
+
 static inline bool irq_settings_can_probe(struct irq_desc *desc)
 {
 	return !(desc->status_use_accessors & _IRQ_NOPROBE);
-- 
cgit v1.2.3


From 7d8280624797bbe2f5170bd3c85c75a8c9c74242 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 3 Apr 2011 11:42:53 +0200
Subject: genirq: Implement a generic interrupt chip

Implement a generic interrupt chip, which is configurable and is able
to handle the most common irq chip implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Tested-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by; Kevin Hilman <khilman@ti.com>
---
 include/linux/irq.h       | 135 ++++++++++++++++++++++++
 kernel/irq/Makefile       |   1 +
 kernel/irq/generic-chip.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 397 insertions(+)
 create mode 100644 kernel/irq/generic-chip.c

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 39c23786c1db..2ba2f1216790 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -568,6 +568,141 @@ static inline int irq_reserve_irq(unsigned int irq)
 	return irq_reserve_irqs(irq, 1);
 }
 
+#ifndef irq_reg_writel
+# define irq_reg_writel(val, addr)	writel(val, addr)
+#endif
+#ifndef irq_reg_readl
+# define irq_reg_readl(addr)		readl(addr)
+#endif
+
+/**
+ * struct irq_chip_regs - register offsets for struct irq_gci
+ * @enable:	Enable register offset to reg_base
+ * @disable:	Disable register offset to reg_base
+ * @mask:	Mask register offset to reg_base
+ * @ack:	Ack register offset to reg_base
+ * @eoi:	Eoi register offset to reg_base
+ * @type:	Type configuration register offset to reg_base
+ * @polarity:	Polarity configuration register offset to reg_base
+ */
+struct irq_chip_regs {
+	unsigned long		enable;
+	unsigned long		disable;
+	unsigned long		mask;
+	unsigned long		ack;
+	unsigned long		eoi;
+	unsigned long		type;
+	unsigned long		polarity;
+};
+
+/**
+ * struct irq_chip_type - Generic interrupt chip instance for a flow type
+ * @chip:		The real interrupt chip which provides the callbacks
+ * @regs:		Register offsets for this chip
+ * @handler:		Flow handler associated with this chip
+ * @type:		Chip can handle these flow types
+ *
+ * A irq_generic_chip can have several instances of irq_chip_type when
+ * it requires different functions and register offsets for different
+ * flow types.
+ */
+struct irq_chip_type {
+	struct irq_chip		chip;
+	struct irq_chip_regs	regs;
+	irq_flow_handler_t	handler;
+	u32			type;
+};
+
+/**
+ * struct irq_chip_generic - Generic irq chip data structure
+ * @lock:		Lock to protect register and cache data access
+ * @reg_base:		Register base address (virtual)
+ * @irq_base:		Interrupt base nr for this chip
+ * @irq_cnt:		Number of interrupts handled by this chip
+ * @mask_cache:		Cached mask register
+ * @type_cache:		Cached type register
+ * @polarity_cache:	Cached polarity register
+ * @wake_enabled:	Interrupt can wakeup from suspend
+ * @wake_active:	Interrupt is marked as an wakeup from suspend source
+ * @num_ct:		Number of available irq_chip_type instances (usually 1)
+ * @private:		Private data for non generic chip callbacks
+ * @chip_types:		Array of interrupt irq_chip_types
+ *
+ * Note, that irq_chip_generic can have multiple irq_chip_type
+ * implementations which can be associated to a particular irq line of
+ * an irq_chip_generic instance. That allows to share and protect
+ * state in an irq_chip_generic instance when we need to implement
+ * different flow mechanisms (level/edge) for it.
+ */
+struct irq_chip_generic {
+	raw_spinlock_t		lock;
+	void __iomem		*reg_base;
+	unsigned int		irq_base;
+	unsigned int		irq_cnt;
+	u32			mask_cache;
+	u32			type_cache;
+	u32			polarity_cache;
+	u32			wake_enabled;
+	u32			wake_active;
+	unsigned int		num_ct;
+	void			*private;
+	struct irq_chip_type	chip_types[0];
+};
+
+/**
+ * enum irq_gc_flags - Initialization flags for generic irq chips
+ * @IRQ_GC_INIT_MASK_CACHE:	Initialize the mask_cache by reading mask reg
+ * @IRQ_GC_INIT_NESTED_LOCK:	Set the lock class of the irqs to nested for
+ *				irq chips which need to call irq_set_wake() on
+ *				the parent irq. Usually GPIO implementations
+ */
+enum irq_gc_flags {
+	IRQ_GC_INIT_MASK_CACHE		= 1 << 0,
+	IRQ_GC_INIT_NESTED_LOCK		= 1 << 1,
+};
+
+/* Generic chip callback functions */
+void irq_gc_noop(struct irq_data *d);
+void irq_gc_mask_disable_reg(struct irq_data *d);
+void irq_gc_mask_set_bit(struct irq_data *d);
+void irq_gc_mask_clr_bit(struct irq_data *d);
+void irq_gc_unmask_enable_reg(struct irq_data *d);
+void irq_gc_ack(struct irq_data *d);
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
+void irq_gc_eoi(struct irq_data *d);
+int irq_gc_set_wake(struct irq_data *d, unsigned int on);
+
+/* Setup functions for irq_chip_generic */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler);
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set);
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
+
+static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
+{
+	return container_of(d->chip, struct irq_chip_type, chip);
+}
+
+#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
+
+#ifdef CONFIG_SMP
+static inline void irq_gc_lock(struct irq_chip_generic *gc)
+{
+	raw_spin_lock(&gc->lock);
+}
+
+static inline void irq_gc_unlock(struct irq_chip_generic *gc)
+{
+	raw_spin_unlock(&gc->lock);
+}
+#else
+static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
+static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
+#endif
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 54329cd7b3ee..e7a13bd3316a 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-y += generic-chip.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
new file mode 100644
index 000000000000..eb23e5924260
--- /dev/null
+++ b/kernel/irq/generic-chip.c
@@ -0,0 +1,261 @@
+/*
+ * Library implementing the most common irq chip callback functions
+ *
+ * Copyright (C) 2011, Thomas Gleixner
+ */
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static inline struct irq_chip_regs *cur_regs(struct irq_data *d)
+{
+	return &container_of(d->chip, struct irq_chip_type, chip)->regs;
+}
+
+/**
+ * irq_gc_noop - NOOP function
+ * @d: irq_data
+ */
+void irq_gc_noop(struct irq_data *d)
+{
+}
+
+/**
+ * irq_gc_mask_disable_reg - Mask chip via disable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_mask_disable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->disable);
+	gc->mask_cache &= ~mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via setting bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_set_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache |= mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via clearing bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_clr_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache &= ~mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_unmask_enable_reg - Unmask chip via enable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_unmask_enable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->enable);
+	gc->mask_cache |= mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_ack - Ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->mask);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_eoi - EOI interrupt
+ * @d: irq_data
+ */
+void irq_gc_eoi(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->eoi);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_set_wake - Set/clr wake bit for an interrupt
+ * @d: irq_data
+ *
+ * For chips where the wake from suspend functionality is not
+ * configured in a separate register and the wakeup active state is
+ * just stored in a bitmask.
+ */
+int irq_gc_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	if (!(mask & gc->wake_enabled))
+		return -EINVAL;
+
+	irq_gc_lock(gc);
+	if (on)
+		gc->wake_active |= mask;
+	else
+		gc->wake_active &= ~mask;
+	irq_gc_unlock(gc);
+	return 0;
+}
+
+/**
+ * irq_alloc_generic_chip - Allocate a generic chip and initialize it
+ * @name:	Name of the irq chip
+ * @num_ct:	Number of irq_chip_type instances associated with this
+ * @irq_base:	Interrupt base nr for this chip
+ * @reg_base:	Register base address (virtual)
+ * @handler:	Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler)
+{
+	struct irq_chip_generic *gc;
+	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+	gc = kzalloc(sz, GFP_KERNEL);
+	if (gc) {
+		raw_spin_lock_init(&gc->lock);
+		gc->num_ct = num_ct;
+		gc->irq_base = irq_base;
+		gc->reg_base = reg_base;
+		gc->chip_types->chip.name = name;
+		gc->chip_types->handler = handler;
+	}
+	return gc;
+}
+
+/*
+ * Separate lockdep class for interrupt chip which can nest irq_desc
+ * lock.
+ */
+static struct lock_class_key irq_nested_lock_class;
+
+/**
+ * irq_setup_generic_chip - Setup a range of interrupts with a generic chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags:	Flags for initialization
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set)
+{
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	/* Init mask cache ? */
+	if (flags & IRQ_GC_INIT_MASK_CACHE)
+		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
+
+	for (i = gc->irq_base; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		if (flags & IRQ_GC_INIT_NESTED_LOCK)
+			irq_set_lockdep_class(i, &irq_nested_lock_class);
+
+		irq_set_chip_and_handler(i, &ct->chip, ct->handler);
+		irq_set_chip_data(i, gc);
+		irq_modify_status(i, clr, set);
+	}
+	gc->irq_cnt = i - gc->irq_base;
+}
+
+/**
+ * irq_setup_alt_chip - Switch to alternative chip
+ * @d:		irq_data for this interrupt
+ * @type	Flow type to be initialized
+ *
+ * Only to be called from chip->irq_set_type() callbacks.
+ */
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	for (i = 0; i < gc->num_ct; i++, ct++) {
+		if (ct->type & type) {
+			d->chip = &ct->chip;
+			irq_data_to_desc(d)->handle_irq = ct->handler;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
-- 
cgit v1.2.3


From cfefd21e693dca791bf9ecfc9dd3794facad533c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Apr 2011 22:36:08 +0200
Subject: genirq: Add chip suspend and resume callbacks

These callbacks are only called in the syscore suspend/resume code on
interrupt chips which have been registered via the generic irq chip
mechanism. Calling those callbacks per irq would be rather icky, but
with the generic irq chip mechanism we can call this per registered
chip.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
---
 include/linux/irq.h       | 11 ++++++
 kernel/irq/generic-chip.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2ba2f1216790..8b4538446636 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -280,6 +280,9 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
  * @irq_cpu_online:	configure an interrupt source for a secondary CPU
  * @irq_cpu_offline:	un-configure an interrupt source for a secondary CPU
+ * @irq_suspend:	function called from core code on suspend once per chip
+ * @irq_resume:		function called from core code on resume once per chip
+ * @irq_pm_shutdown:	function called from core code on shutdown once per chip
  * @irq_print_chip:	optional to print special chip info in show_interrupts
  * @flags:		chip specific flags
  *
@@ -309,6 +312,10 @@ struct irq_chip {
 	void		(*irq_cpu_online)(struct irq_data *data);
 	void		(*irq_cpu_offline)(struct irq_data *data);
 
+	void		(*irq_suspend)(struct irq_data *data);
+	void		(*irq_resume)(struct irq_data *data);
+	void		(*irq_pm_shutdown)(struct irq_data *data);
+
 	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
 
 	unsigned long	flags;
@@ -626,6 +633,7 @@ struct irq_chip_type {
  * @wake_active:	Interrupt is marked as an wakeup from suspend source
  * @num_ct:		Number of available irq_chip_type instances (usually 1)
  * @private:		Private data for non generic chip callbacks
+ * @list:		List head for keeping track of instances
  * @chip_types:		Array of interrupt irq_chip_types
  *
  * Note, that irq_chip_generic can have multiple irq_chip_type
@@ -646,6 +654,7 @@ struct irq_chip_generic {
 	u32			wake_active;
 	unsigned int		num_ct;
 	void			*private;
+	struct list_head	list;
 	struct irq_chip_type	chip_types[0];
 };
 
@@ -680,6 +689,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 			    enum irq_gc_flags flags, unsigned int clr,
 			    unsigned int set);
 int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set);
 
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index eb23e5924260..31a9db711906 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -8,9 +8,13 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/syscore_ops.h>
 
 #include "internals.h"
 
+static LIST_HEAD(gc_list);
+static DEFINE_RAW_SPINLOCK(gc_lock);
+
 static inline struct irq_chip_regs *cur_regs(struct irq_data *d)
 {
 	return &container_of(d->chip, struct irq_chip_type, chip)->regs;
@@ -219,6 +223,10 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 	struct irq_chip_type *ct = gc->chip_types;
 	unsigned int i;
 
+	raw_spin_lock(&gc_lock);
+	list_add_tail(&gc->list, &gc_list);
+	raw_spin_unlock(&gc_lock);
+
 	/* Init mask cache ? */
 	if (flags & IRQ_GC_INIT_MASK_CACHE)
 		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
@@ -259,3 +267,88 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
 	}
 	return -EINVAL;
 }
+
+/**
+ * irq_remove_generic_chip - Remove a chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Remove up to 32 interrupts starting from gc->irq_base.
+ */
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set)
+{
+	unsigned int i = gc->irq_base;
+
+	raw_spin_lock(&gc_lock);
+	list_del(&gc->list);
+	raw_spin_unlock(&gc_lock);
+
+	for (; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		/* Remove handler first. That will mask the irq line */
+		irq_set_handler(i, NULL);
+		irq_set_chip(i, &no_irq_chip);
+		irq_set_chip_data(i, NULL);
+		irq_modify_status(i, clr, set);
+	}
+}
+
+#ifdef CONFIG_PM
+static int irq_gc_suspend(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_suspend)
+			ct->chip.irq_suspend(irq_get_irq_data(gc->irq_base));
+	}
+	return 0;
+}
+
+static void irq_gc_resume(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_resume)
+			ct->chip.irq_resume(irq_get_irq_data(gc->irq_base));
+	}
+}
+#else
+#define irq_gc_suspend NULL
+#define irq_gc_resume NULL
+#endif
+
+static void irq_gc_shutdown(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_pm_shutdown)
+			ct->chip.irq_pm_shutdown(irq_get_irq_data(gc->irq_base));
+	}
+}
+
+static struct syscore_ops irq_gc_syscore_ops = {
+	.suspend = irq_gc_suspend,
+	.resume = irq_gc_resume,
+	.shutdown = irq_gc_shutdown,
+};
+
+static int __init irq_gc_init_ops(void)
+{
+	register_syscore_ops(&irq_gc_syscore_ops);
+	return 0;
+}
+device_initcall(irq_gc_init_ops);
-- 
cgit v1.2.3


From 625f2a378e5a10f45fdc37932fc9f8a21676de9e Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 22 Apr 2011 11:19:10 -0600
Subject: sched: Get rid of lock_depth

Neil Brown pointed out that lock_depth somehow escaped the BKL
removal work.  Let's get rid of it now.

Note that the perf scripting utilities still have a bunch of
code for dealing with common_lock_depth in tracepoints; I have
left that in place in case anybody wants to use that code with
older kernels.

Suggested-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110422111910.456c0e84@bike.lwn.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt             |  1 -
 include/linux/init_task.h                       |  1 -
 include/linux/sched.h                           |  6 ------
 kernel/fork.c                                   |  1 -
 kernel/mutex.c                                  |  7 -------
 kernel/sched.c                                  | 11 +----------
 kernel/sched_debug.c                            |  4 ----
 kernel/trace/trace_kprobe.c                     |  1 -
 tools/perf/Documentation/perf-script-perl.txt   |  1 -
 tools/perf/Documentation/perf-script-python.txt |  1 -
 10 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 6d27ab8d6e9f..c83bd6b4e6e8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -120,7 +120,6 @@ format:
         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
         field:unsigned char common_preempt_count;       offset:3; size:1;signed:0;
         field:int common_pid;   offset:4;       size:4; signed:1;
-        field:int common_lock_depth;    offset:8;       size:4; signed:1;
 
         field:unsigned long __probe_ip; offset:12;      size:4; signed:0;
         field:int __probe_nargs;        offset:16;      size:4; signed:1;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index caa151fbebb7..689496bb6654 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -134,7 +134,6 @@ extern struct cred init_cred;
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
-	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
 	.normal_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 171ba24b08a7..013314a56105 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -731,10 +731,6 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-#ifdef CONFIG_SCHEDSTATS
-	/* BKL stats */
-	unsigned int bkl_count;
-#endif
 };
 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
@@ -1190,8 +1186,6 @@ struct task_struct {
 	unsigned int flags;	/* per process flags, defined below */
 	unsigned int ptrace;
 
-	int lock_depth;		/* BKL lock depth */
-
 #ifdef CONFIG_SMP
 	struct task_struct *wake_entry;
 	int on_cpu;
diff --git a/kernel/fork.c b/kernel/fork.c
index e7548dee636b..aca62871a4f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1103,7 +1103,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	posix_cpu_timers_init(p);
 
-	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index fe4706cb0c5b..2c938e2337cd 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -162,13 +162,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	for (;;) {
 		struct task_struct *owner;
 
-		/*
-		 * If we own the BKL, then don't spin. The owner of
-		 * the mutex might be waiting on us to release the BKL.
-		 */
-		if (unlikely(current->lock_depth >= 0))
-			break;
-
 		/*
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
diff --git a/kernel/sched.c b/kernel/sched.c
index 8cb0a5769a16..9cde2dd229c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4121,12 +4121,6 @@ static inline void schedule_debug(struct task_struct *prev)
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
 	schedstat_inc(this_rq(), sched_count);
-#ifdef CONFIG_SCHEDSTATS
-	if (unlikely(prev->lock_depth >= 0)) {
-		schedstat_inc(this_rq(), rq_sched_info.bkl_count);
-		schedstat_inc(prev, sched_info.bkl_count);
-	}
-#endif
 }
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
@@ -5852,11 +5846,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-#if defined(CONFIG_PREEMPT)
-	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
-#else
 	task_thread_info(idle)->preempt_count = 0;
-#endif
+
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3669bec6e130..a6710a112b4f 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -296,9 +296,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(ttwu_count);
 	P(ttwu_local);
 
-	SEQ_printf(m, "  .%-30s: %d\n", "bkl_count",
-				rq->rq_sched_info.bkl_count);
-
 #undef P
 #undef P64
 #endif
@@ -441,7 +438,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.statistics.wait_count);
 	PN(se.statistics.iowait_sum);
 	P(se.statistics.iowait_count);
-	P(sched_info.bkl_count);
 	P(se.nr_migrations);
 	P(se.statistics.nr_migrations_cold);
 	P(se.statistics.nr_failed_migrations_affine);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 35d55a386145..f925c45f0afa 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -53,7 +53,6 @@ const char *reserved_field_names[] = {
 	"common_preempt_count",
 	"common_pid",
 	"common_tgid",
-	"common_lock_depth",
 	FIELD_STRING_IP,
 	FIELD_STRING_RETIP,
 	FIELD_STRING_FUNC,
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 5bb41e55a3ac..3152cca15501 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -63,7 +63,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 36b38277422c..471022069119 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -463,7 +463,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
-- 
cgit v1.2.3


From dea3667bc3c2a0521e8d8855e407a49d9d70028c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Apr 2011 07:58:46 -0700
Subject: vfs: get rid of insane dentry hashing rules

The dentry hashing rules have been really quite complicated for a long
while, in odd ways.  That made functions like __d_drop() very fragile
and non-obvious.

In particular, whether a dentry was hashed or not was indicated with an
explicit DCACHE_UNHASHED bit.  That's despite the fact that the hash
abstraction that the dentries use actually have a 'is this entry hashed
or not' model (which is a simple test of the 'pprev' pointer).

The reason that was done is because we used the normal 'is this entry
unhashed' model to mark whether the dentry had _ever_ been hashed in the
dentry hash tables, and that logic goes back many years (commit
b3423415fbc2: "dcache: avoid RCU for never-hashed dentries").

That, in turn, meant that __d_drop had totally different unhashing logic
for the dentry hash table case and for the anonymous dcache case,
because in order to use the "is this dentry hashed" logic as a flag for
whether it had ever been on the RCU hash table, we had to unhash such a
dentry differently so that we'd never think that it wasn't 'unhashed'
and wouldn't be free'd correctly.

That's just insane.  It made the logic really hard to follow, when there
were two different kinds of "unhashed" states, and one of them (the one
that used "list_bl_unhashed()") really had nothing at all to do with
being unhashed per se, but with a very subtle lifetime rule instead.

So turn all of it around, and make it logical.

Instead of having a DENTRY_UNHASHED bit in d_flags to indicate whether
the dentry is on the hash chains or not, use the hash chain unhashed
logic for that.  Suddenly "d_unhashed()" just uses "list_bl_unhashed()",
and everything makes sense.

And for the lifetime rule, just use an explicit DENTRY_RCUACCEES bit.
If we ever insert the dentry into the dentry hash table so that it is
visible to RCU lookup, we mark it DENTRY_RCUACCESS to show that it now
needs the RCU lifetime rules.  Now suddently that test at dentry free
time makes sense too.

And because unhashing now is sane and doesn't depend on where the dentry
got unhashed from (because the dentry hash chain details doesn't have
some subtle side effects), we can re-unify the __d_drop() logic and use
common code for the unhashing.

Also fix one more open-coded hash chain bit_spin_lock() that I missed in
the previous chain locking cleanup commit.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 42 ++++++++++++++++--------------------------
 include/linux/dcache.h |  4 ++--
 2 files changed, 18 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 7108c15685dd..d600a0af3b2e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -164,8 +164,8 @@ static void d_free(struct dentry *dentry)
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 
-	/* if dentry was never inserted into hash, immediate free is OK */
-	if (hlist_bl_unhashed(&dentry->d_hash))
+	/* if dentry was never visible to RCU, immediate free is OK */
+	if (!(dentry->d_flags & DCACHE_RCUACCESS))
 		__d_free(&dentry->d_u.d_rcu);
 	else
 		call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -327,28 +327,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
  */
 void __d_drop(struct dentry *dentry)
 {
-	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+	if (!d_unhashed(dentry)) {
 		struct hlist_bl_head *b;
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 			b = &dentry->d_sb->s_anon;
-			spin_lock_bucket(b);
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_init(&dentry->d_hash);
-			spin_unlock_bucket(b);
-		} else {
-			struct hlist_bl_head *b;
+		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
-			spin_lock_bucket(b);
-			/*
-			 * We may not actually need to put DCACHE_UNHASHED
-			 * manipulations under the hash lock, but follow
-			 * the principle of least surprise.
-			 */
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_rcu(&dentry->d_hash);
-			spin_unlock_bucket(b);
-			dentry_rcuwalk_barrier(dentry);
-		}
+
+		spin_lock_bucket(b);
+		__hlist_bl_del(&dentry->d_hash);
+		dentry->d_hash.pprev = NULL;
+		spin_unlock_bucket(b);
+
+		dentry_rcuwalk_barrier(dentry);
 	}
 }
 EXPORT_SYMBOL(__d_drop);
@@ -1301,7 +1292,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dname[name->len] = 0;
 
 	dentry->d_count = 1;
-	dentry->d_flags = DCACHE_UNHASHED;
+	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
 	seqcount_init(&dentry->d_seq);
 	dentry->d_inode = NULL;
@@ -1603,10 +1594,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
-	tmp->d_flags &= ~DCACHE_UNHASHED;
+	spin_lock_bucket(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+	spin_unlock_bucket(&tmp->d_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&inode->i_lock);
 	security_d_instantiate(tmp, inode);
@@ -2087,7 +2077,7 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
 	BUG_ON(!d_unhashed(entry));
 	spin_lock_bucket(b);
- 	entry->d_flags &= ~DCACHE_UNHASHED;
+	entry->d_flags |= DCACHE_RCUACCESS;
 	hlist_bl_add_head_rcu(&entry->d_hash, b);
 	spin_unlock_bucket(b);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f2afed4fa945..19d90a55541d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -197,7 +197,7 @@ struct dentry_operations {
       * typically using d_splice_alias. */
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
-#define DCACHE_UNHASHED		0x0010	
+#define DCACHE_RCUACCESS	0x0010	/* Entry has ever been RCU-visible */
 #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
      /* Parent inode is watched by inotify */
 
@@ -384,7 +384,7 @@ extern struct dentry *dget_parent(struct dentry *dentry);
  
 static inline int d_unhashed(struct dentry *dentry)
 {
-	return (dentry->d_flags & DCACHE_UNHASHED);
+	return hlist_bl_unhashed(&dentry->d_hash);
 }
 
 static inline int d_unlinked(struct dentry *dentry)
-- 
cgit v1.2.3


From 3f7ac1d6671ebca7a955853f7127c937f7befbd3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Mar 2011 11:14:25 +0100
Subject: libata: Kill unused ATA_DFLAG_{H|D}IPM flags

ATA_DFLAG_{H|D}IPM flags are no longer used.  Kill them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7f675aa81d87..3b4d223a6aff 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -137,8 +137,6 @@ enum {
 	ATA_DFLAG_ACPI_PENDING	= (1 << 5), /* ACPI resume action pending */
 	ATA_DFLAG_ACPI_FAILED	= (1 << 6), /* ACPI on devcfg has failed */
 	ATA_DFLAG_AN		= (1 << 7), /* AN configured */
-	ATA_DFLAG_HIPM		= (1 << 8), /* device supports HIPM */
-	ATA_DFLAG_DIPM		= (1 << 9), /* device supports DIPM */
 	ATA_DFLAG_DMADIR	= (1 << 10), /* device requires DMADIR */
 	ATA_DFLAG_CFG_MASK	= (1 << 12) - 1,
 
-- 
cgit v1.2.3


From ae01b2493c3bf03c504c32ac4ebb01d528508db3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Mar 2011 11:14:55 +0100
Subject: libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65

NVIDIA mcp65 familiy of controllers cause command timeouts when DIPM
is used.  Implement ATA_FLAG_NO_DIPM and apply it.

This problem was reported by Stefan Bader in the following thread.

 http://thread.gmane.org/gmane.linux.ide/48841

stable: applicable to 2.6.37 and 38.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stefan Bader <stefan.bader@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/ahci.c      | 2 +-
 drivers/ata/libata-eh.c | 6 ++++--
 include/linux/libata.h  | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 39d829cd82dd..cbd38e130f05 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -150,7 +150,7 @@ static const struct ata_port_info ahci_port_info[] = {
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP |
 				 AHCI_HFLAG_YES_NCQ),
-		.flags		= AHCI_FLAG_COMMON,
+		.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 88cd22fa65cd..f26f2fe3480a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3316,6 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	struct ata_eh_context *ehc = &link->eh_context;
 	struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
 	enum ata_lpm_policy old_policy = link->lpm_policy;
+	bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM;
 	unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
 	unsigned int err_mask;
 	int rc;
@@ -3332,7 +3333,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	 */
 	ata_for_each_dev(dev, link, ENABLED) {
 		bool hipm = ata_id_has_hipm(dev->id);
-		bool dipm = ata_id_has_dipm(dev->id);
+		bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
 
 		/* find the first enabled and LPM enabled devices */
 		if (!link_dev)
@@ -3389,7 +3390,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 
 	/* host config updated, enable DIPM if transitioning to MIN_POWER */
 	ata_for_each_dev(dev, link, ENABLED) {
-		if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
+		if (policy == ATA_LPM_MIN_POWER && !no_dipm &&
+		    ata_id_has_dipm(dev->id)) {
 			err_mask = ata_dev_set_feature(dev,
 					SETFEATURES_SATA_ENABLE, SATA_DIPM);
 			if (err_mask && err_mask != AC_ERR_DEV) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3b4d223a6aff..04f32a3eb26b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -196,6 +196,7 @@ enum {
 					      * management */
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
 					      * led */
+	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-- 
cgit v1.2.3


From fd954ae124e8a866e9cc1bc3de9a07be5492f608 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Apr 2011 14:28:18 -0400
Subject: NFSv4.1: Don't loop forever in nfs4_proc_create_session

If a server for some reason keeps sending NFS4ERR_DELAY errors, we can end
up looping forever inside nfs4_proc_create_session, and so the usual
mechanisms for detecting if the nfs_client is dead don't work.

Fix this by ensuring that we loop inside the nfs4_state_manager thread
instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  1 +
 fs/nfs/nfs4proc.c         | 45 +++++++--------------------------------------
 fs/nfs/nfs4state.c        | 46 +++++++++++++++++++++++++++++++---------------
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 40 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1c261ddd65d..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
 	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
+	NFS4CLNT_LEASE_CONFIRM,
 };
 
 enum nfs4_session_state {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 628e35f75307..50c814828fcb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3754,18 +3754,17 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
 		if (status != -NFS4ERR_CLID_INUSE)
 			break;
-		if (signalled())
+		if (loop != 0) {
+			++clp->cl_id_uniquifier;
 			break;
-		if (loop++ & 1)
-			ssleep(clp->cl_lease_time / HZ + 1);
-		else
-			if (++clp->cl_id_uniquifier == 0)
-				break;
+		}
+		++loop;
+		ssleep(clp->cl_lease_time / HZ + 1);
 	}
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 		struct nfs4_setclientid_res *arg,
 		struct rpc_cred *cred)
 {
@@ -3790,26 +3789,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
-		struct nfs4_setclientid_res *arg,
-		struct rpc_cred *cred)
-{
-	long timeout = 0;
-	int err;
-	do {
-		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
-		switch (err) {
-			case 0:
-				return err;
-			case -NFS4ERR_RESOURCE:
-				/* The IBM lawyers misread another document! */
-			case -NFS4ERR_DELAY:
-				err = nfs4_delay(clp->cl_rpcclient, &timeout);
-		}
-	} while (err == 0);
-	return err;
-}
-
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
@@ -5222,20 +5201,10 @@ int nfs4_proc_create_session(struct nfs_client *clp)
 	int status;
 	unsigned *ptr;
 	struct nfs4_session *session = clp->cl_session;
-	long timeout = 0;
-	int err;
 
 	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
 
-	do {
-		status = _nfs4_proc_create_session(clp);
-		if (status == -NFS4ERR_DELAY) {
-			err = nfs4_delay(clp->cl_rpcclient, &timeout);
-			if (err)
-				status = err;
-		}
-	} while (status == -NFS4ERR_DELAY);
-
+	status = _nfs4_proc_create_session(clp);
 	if (status)
 		goto out;
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 4dfb34b43ffb..4a810c8b0753 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
 
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
-	struct nfs4_setclientid_res clid;
+	struct nfs4_setclientid_res clid = {
+		.clientid = clp->cl_clientid,
+		.confirm = clp->cl_confirm,
+	};
 	unsigned short port;
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	port = nfs_callback_tcpport;
 	if (clp->cl_addr.ss_family == AF_INET6)
 		port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
 	if (status != 0)
 		goto out;
+	clp->cl_clientid = clid.clientid;
+	clp->cl_confirm = clid.confirm;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
 	if (status != 0)
 		goto out;
-	clp->cl_clientid = clid.clientid;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs4_schedule_state_renewal(clp);
 out:
 	return status;
@@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	nfs4_begin_drain_session(clp);
 	status = nfs4_proc_exchange_id(clp, cred);
 	if (status != 0)
 		goto out;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 		goto out;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
@@ -1584,20 +1598,22 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
  */
 static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
 {
-	if (nfs4_has_session(clp)) {
-		switch (status) {
-		case -NFS4ERR_DELAY:
-		case -NFS4ERR_CLID_INUSE:
-		case -EAGAIN:
-			break;
+	switch (status) {
+	case -NFS4ERR_CLID_INUSE:
+	case -NFS4ERR_STALE_CLIENTID:
+		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+		break;
+	case -NFS4ERR_DELAY:
+	case -EAGAIN:
+		ssleep(1);
+		break;
 
-		case -EKEYEXPIRED:
-			nfs4_warn_keyexpired(clp->cl_hostname);
-		case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
-					 * in nfs4_exchange_id */
-		default:
-			return;
-		}
+	case -EKEYEXPIRED:
+		nfs4_warn_keyexpired(clp->cl_hostname);
+	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+				 * in nfs4_exchange_id */
+	default:
+		return;
 	}
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 }
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 216cea5db0aa..87694ca86914 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,6 +47,7 @@ struct nfs_client {
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;	/* Clientid verifier */
 	unsigned long		cl_state;
 
 	spinlock_t		cl_lock;
-- 
cgit v1.2.3


From 7494d00c7b826b6ceb79ec33892bd0ef59be5614 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Apr 2011 14:28:45 -0400
Subject: SUNRPC: Allow RPC calls to return ETIMEDOUT instead of EIO

On occasion, it is useful for the NFS layer to distinguish between
soft timeouts and other EIO errors due to (say) encoding errors,
or authentication errors.

The following patch ensures that the default behaviour of the RPC
layer remains to return EIO on soft timeouts (until we have
audited all the callers).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 3 ++-
 net/sunrpc/clnt.c            | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 3b94f804b852..f73c482ec9c6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -128,12 +128,13 @@ struct rpc_task_setup {
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 #define RPC_TASK_SENT		0x0800		/* message was sent */
+#define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
+#define RPC_IS_SOFT(t)		((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
 #define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
 #define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e7a96e478f63..8d83f9d48713 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task)
 		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
-		rpc_exit(task, -EIO);
+		if (task->tk_flags & RPC_TASK_TIMEOUT)
+			rpc_exit(task, -ETIMEDOUT);
+		else
+			rpc_exit(task, -EIO);
 		return;
 	}
 
-- 
cgit v1.2.3


From 1c9904297451f558191e211a48d8838b4bf792b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 21 Apr 2011 17:23:19 -0700
Subject: SECURITY: Move exec_permission RCU checks into security modules

Right now all RCU walks fall back to reference walk when CONFIG_SECURITY
is enabled, even though just the standard capability module is active.
This is because security_inode_exec_permission unconditionally fails
RCU walks.

Move this decision to the low level security module. This requires
passing the RCU flags down the security hook. This way at least
the capability module and a few easy cases in selinux/smack work
with RCU walks with CONFIG_SECURITY=y

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/security.h   | 2 +-
 security/capability.c      | 2 +-
 security/security.c        | 6 ++----
 security/selinux/hooks.c   | 6 +++++-
 security/smack/smack_lsm.c | 6 +++++-
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 84a202ac3de9..2f99ecd0fb2a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1454,7 +1454,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask);
+	int (*inode_permission) (struct inode *inode, int mask, unsigned flags);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
diff --git a/security/capability.c b/security/capability.c
index ab3d807accc3..56bb1605fd79 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask)
+static int cap_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 47b8a447118f..7e34f98bf433 100644
--- a/security/security.c
+++ b/security/security.c
@@ -514,16 +514,14 @@ int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask);
+	return security_ops->inode_permission(inode, mask, 0);
 }
 
 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	if (flags)
-		return -ECHILD;
-	return security_ops->inode_permission(inode, MAY_EXEC);
+	return security_ops->inode_permission(inode, MAY_EXEC, flags);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7a630a8a5cef..9a220be17a3f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2635,7 +2635,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(cred, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask)
+static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
@@ -2649,6 +2649,10 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 	if (!mask)
 		return 0;
 
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.inode = inode;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 23c7a6d0c80c..42fcb47747a3 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -686,7 +686,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask)
+static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	struct smk_audit_info ad;
 
@@ -696,6 +696,10 @@ static int smack_inode_permission(struct inode *inode, int mask)
 	 */
 	if (mask == 0)
 		return 0;
+
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
 	smk_ad_setfield_u_fs_inode(&ad, inode);
 	return smk_curacc(smk_of_inode(inode), mask, &ad);
-- 
cgit v1.2.3


From bf26c018490c2fce7fe9b629083b96ce0e6ad019 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 7 Apr 2011 16:53:20 +0200
Subject: ptrace: Prepare to fix racy accesses on task breakpoints

When a task is traced and is in a stopped state, the tracer
may execute a ptrace request to examine the tracee state and
get its task struct. Right after, the tracee can be killed
and thus its breakpoints released.
This can happen concurrently when the tracer is in the middle
of reading or modifying these breakpoints, leading to dereferencing
a freed pointer.

Hence, to prepare the fix, create a generic breakpoint reference
holding API. When a reference on the breakpoints of a task is
held, the breakpoints won't be released until the last reference
is dropped. After that, no more ptrace request on the task's
breakpoints can be serviced for the tracer.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: v2.6.33.. <stable@kernel.org>
Link: http://lkml.kernel.org/r/1302284067-7860-2-git-send-email-fweisbec@gmail.com
---
 include/linux/ptrace.h | 13 ++++++++++++-
 include/linux/sched.h  |  3 +++
 kernel/exit.c          |  2 +-
 kernel/ptrace.c        | 17 +++++++++++++++++
 4 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a1147e5dd245..9178d5cc0b01 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 		child->ptrace = current->ptrace;
 		__ptrace_link(child, current->parent);
 	}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_set(&child->ptrace_bp_refcnt, 1);
+#endif
 }
 
 /**
@@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
 
-#endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern int ptrace_get_breakpoints(struct task_struct *tsk);
+extern void ptrace_put_breakpoints(struct task_struct *tsk);
+#else
+static inline void ptrace_put_breakpoints(struct task_struct *tsk) { }
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+#endif /* __KERNEL */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 18d63cea2848..781abd137673 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1537,6 +1537,9 @@ struct task_struct {
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
 #endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_t ptrace_bp_refcnt;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/exit.c b/kernel/exit.c
index f5d2f63bae0b..8dd874181542 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1016,7 +1016,7 @@ NORET_TYPE void do_exit(long code)
 	/*
 	 * FIXME: do that only when needed, using sched_exit tracepoint
 	 */
-	flush_ptrace_hw_breakpoint(tsk);
+	ptrace_put_breakpoints(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0fc1eed28d27..dc7ab65f3b36 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
 
 
 /*
@@ -879,3 +880,19 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 	return ret;
 }
 #endif	/* CONFIG_COMPAT */
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+int ptrace_get_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt))
+		return 0;
+
+	return -1;
+}
+
+void ptrace_put_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt))
+		flush_ptrace_hw_breakpoint(tsk);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-- 
cgit v1.2.3


From f2f5f2a1cedc803a5a517557d436e6cb10c007de Mon Sep 17 00:00:00 2001
From: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Date: Tue, 19 Apr 2011 19:29:01 +0530
Subject: ath9k_hw: Get AHB clock information from ath9k_platform_data

Add a bool in ath9k_platform_data to pass AHB clock speed information.
Driver needs this to configure PLL on some SOCs.

Signed-off-by: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/hw.h   | 2 ++
 drivers/net/wireless/ath/ath9k/init.c | 1 +
 include/linux/ath9k_platform.h        | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 450b64263bc9..5a4ba09a2f1c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -846,6 +846,8 @@ struct ath_hw {
 
 	/* Enterprise mode cap */
 	u32 ent_mode;
+
+	bool is_clk_25mhz;
 };
 
 struct ath_bus_ops {
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 1ac8318d82a3..e78b6aefa108 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -574,6 +574,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, u16 subsysid,
 		sc->sc_ah->gpio_mask = pdata->gpio_mask;
 		sc->sc_ah->gpio_val = pdata->gpio_val;
 		sc->sc_ah->led_pin = pdata->led_pin;
+		ah->is_clk_25mhz = pdata->is_clk_25mhz;
 	}
 
 	common = ath9k_hw_common(ah);
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 020387a114e3..60a7c49dcb49 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -28,6 +28,8 @@ struct ath9k_platform_data {
 	int led_pin;
 	u32 gpio_mask;
 	u32 gpio_val;
+
+	bool is_clk_25mhz;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3


From 9f2e731d1d278d853def1567735d8a823668a3c8 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 20 Apr 2011 11:12:30 +0200
Subject: ssb: cc: add & fix defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We probably got false positive results for checking PLL being down.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb_driver_chipcommon.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 2cdf249b4e5f..4f2d77a0c021 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -131,6 +131,9 @@
 #define SSB_CHIPCO_GPIOIRQ		0x0074
 #define SSB_CHIPCO_WATCHDOG		0x0080
 #define SSB_CHIPCO_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  SSB_CHIPCO_GPIOTIMER_OFFTIME	0x0000FFFF
+#define  SSB_CHIPCO_GPIOTIMER_OFFTIME_SHIFT	0
+#define  SSB_CHIPCO_GPIOTIMER_ONTIME	0xFFFF0000
 #define  SSB_CHIPCO_GPIOTIMER_ONTIME_SHIFT	16
 #define SSB_CHIPCO_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
 #define SSB_CHIPCO_CLOCK_N		0x0090
@@ -189,8 +192,10 @@
 #define  SSB_CHIPCO_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
 #define  SSB_CHIPCO_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
 #define  SSB_CHIPCO_CLKCTLST_HWCROFF	0x00000020 /* Force HW clock request off */
-#define  SSB_CHIPCO_CLKCTLST_HAVEHT	0x00010000 /* HT available */
-#define  SSB_CHIPCO_CLKCTLST_HAVEALP	0x00020000 /* APL available */
+#define  SSB_CHIPCO_CLKCTLST_HAVEALP	0x00010000 /* ALP available */
+#define  SSB_CHIPCO_CLKCTLST_HAVEHT	0x00020000 /* HT available */
+#define  SSB_CHIPCO_CLKCTLST_4328A0_HAVEHT	0x00010000 /* 4328a0 has reversed bits */
+#define  SSB_CHIPCO_CLKCTLST_4328A0_HAVEALP	0x00020000 /* 4328a0 has reversed bits */
 #define SSB_CHIPCO_HW_WORKAROUND	0x01E4 /* Hardware workaround (rev >= 20) */
 #define SSB_CHIPCO_UART0_DATA		0x0300
 #define SSB_CHIPCO_UART0_IMR		0x0304
-- 
cgit v1.2.3


From 9835a30e980561082beb02ce724f6e555787bc19 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 24 Apr 2011 11:04:19 +0200
Subject: ssb: cc: clear GPIOPULL registers on init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_chipcommon.c           | 6 ++++++
 include/linux/ssb/ssb_driver_chipcommon.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 7c031fdc8205..b4b3733aefcf 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -260,6 +260,12 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 	if (cc->dev->id.revision >= 11)
 		cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT);
 	ssb_dprintk(KERN_INFO PFX "chipcommon status is 0x%x\n", cc->status);
+
+	if (cc->dev->id.revision >= 20) {
+		chipco_write32(cc, SSB_CHIPCO_GPIOPULLUP, 0);
+		chipco_write32(cc, SSB_CHIPCO_GPIOPULLDOWN, 0);
+	}
+
 	ssb_pmu_init(cc);
 	chipco_powercontrol_init(cc);
 	ssb_chipco_set_clockmode(cc, SSB_CLKMODE_FAST);
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 4f2d77a0c021..a08d693d8324 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -123,6 +123,8 @@
 #define SSB_CHIPCO_FLASHDATA		0x0048
 #define SSB_CHIPCO_BCAST_ADDR		0x0050
 #define SSB_CHIPCO_BCAST_DATA		0x0054
+#define SSB_CHIPCO_GPIOPULLUP		0x0058		/* Rev >= 20 only */
+#define SSB_CHIPCO_GPIOPULLDOWN		0x005C		/* Rev >= 20 only */
 #define SSB_CHIPCO_GPIOIN		0x0060
 #define SSB_CHIPCO_GPIOOUT		0x0064
 #define SSB_CHIPCO_GPIOOUTEN		0x0068
-- 
cgit v1.2.3


From f48b7399840b453e7282b523f535561fe9638a2d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 25 Apr 2011 12:54:27 -0400
Subject: LSM: split LSM_AUDIT_DATA_FS into _PATH and _INODE

The lsm common audit code has wacky contortions making sure which pieces
of information are set based on if it was given a path, dentry, or
inode.  Split this into path and inode to get rid of some of the code
complexity.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_audit.h  |  9 ++++-----
 security/lsm_audit.c       | 50 ++++++++++++++++++++++++++--------------------
 security/selinux/avc.c     |  2 +-
 security/selinux/hooks.c   | 50 +++++++++++++++++++++++-----------------------
 security/smack/smack.h     |  8 ++++----
 security/smack/smack_lsm.c | 32 ++++++++++++++---------------
 6 files changed, 78 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 112a55033352..bbaceab83a65 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -27,7 +27,7 @@
 /* Auxiliary data to use in generating the audit record. */
 struct common_audit_data {
 	char type;
-#define LSM_AUDIT_DATA_FS	1
+#define LSM_AUDIT_DATA_PATH	1
 #define LSM_AUDIT_DATA_NET	2
 #define LSM_AUDIT_DATA_CAP	3
 #define LSM_AUDIT_DATA_IPC	4
@@ -35,12 +35,11 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_KEY	6
 #define LSM_AUDIT_DATA_NONE	7
 #define LSM_AUDIT_DATA_KMOD	8
+#define LSM_AUDIT_DATA_INODE	9
 	struct task_struct *tsk;
 	union 	{
-		struct {
-			struct path path;
-			struct inode *inode;
-		} fs;
+		struct path path;
+		struct inode *inode;
 		struct {
 			int netif;
 			struct sock *sk;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 908aa712816a..2e846052cbf4 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -210,7 +210,6 @@ static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
 static void dump_common_audit_data(struct audit_buffer *ab,
 				   struct common_audit_data *a)
 {
-	struct inode *inode = NULL;
 	struct task_struct *tsk = current;
 
 	if (a->tsk)
@@ -229,33 +228,40 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 	case LSM_AUDIT_DATA_CAP:
 		audit_log_format(ab, " capability=%d ", a->u.cap);
 		break;
-	case LSM_AUDIT_DATA_FS:
-		if (a->u.fs.path.dentry) {
-			struct dentry *dentry = a->u.fs.path.dentry;
-			if (a->u.fs.path.mnt) {
-				audit_log_d_path(ab, "path=", &a->u.fs.path);
-			} else {
-				audit_log_format(ab, " name=");
-				audit_log_untrustedstring(ab,
-						 dentry->d_name.name);
-			}
-			inode = dentry->d_inode;
-		} else if (a->u.fs.inode) {
-			struct dentry *dentry;
-			inode = a->u.fs.inode;
-			dentry = d_find_alias(inode);
-			if (dentry) {
-				audit_log_format(ab, " name=");
-				audit_log_untrustedstring(ab,
-						 dentry->d_name.name);
-				dput(dentry);
-			}
+	case LSM_AUDIT_DATA_PATH: {
+		struct dentry *dentry = a->u.path.dentry;
+		struct inode *inode;
+
+		if (a->u.path.mnt) {
+			audit_log_d_path(ab, "path=", &a->u.path);
+		} else {
+			audit_log_format(ab, " name=");
+			audit_log_untrustedstring(ab,
+					 dentry->d_name.name);
 		}
+		inode = dentry->d_inode;
 		if (inode)
 			audit_log_format(ab, " dev=%s ino=%lu",
 					inode->i_sb->s_id,
 					inode->i_ino);
 		break;
+	}
+	case LSM_AUDIT_DATA_INODE: {
+		struct dentry *dentry;
+		struct inode *inode;
+
+		inode = a->u.inode;
+		dentry = d_find_alias(inode);
+		if (dentry) {
+			audit_log_format(ab, " name=");
+			audit_log_untrustedstring(ab,
+					 dentry->d_name.name);
+			dput(dentry);
+		}
+		audit_log_format(ab, " dev=%s ino=%lu", inode->i_sb->s_id,
+				 inode->i_ino);
+		break;
+	}
 	case LSM_AUDIT_DATA_TASK:
 		tsk = a->u.tsk;
 		if (tsk && tsk->pid) {
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 1d027e29ce8d..ce742f1778e1 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -531,7 +531,7 @@ int avc_audit(u32 ssid, u32 tsid,
 	 * during retry. However this is logically just as if the operation
 	 * happened a little later.
 	 */
-	if ((a->type == LSM_AUDIT_DATA_FS) &&
+	if ((a->type == LSM_AUDIT_DATA_INODE) &&
 	    (flags & IPERM_FLAG_RCU))
 		return -ECHILD;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ed5f29aa0a38..ad664d3056eb 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1488,8 +1488,8 @@ static int inode_has_perm(const struct cred *cred,
 
 	if (!adp) {
 		adp = &ad;
-		COMMON_AUDIT_DATA_INIT(&ad, FS);
-		ad.u.fs.inode = inode;
+		COMMON_AUDIT_DATA_INIT(&ad, INODE);
+		ad.u.inode = inode;
 	}
 
 	return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
@@ -1506,9 +1506,9 @@ static inline int dentry_has_perm(const struct cred *cred,
 	struct inode *inode = dentry->d_inode;
 	struct common_audit_data ad;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.mnt = mnt;
-	ad.u.fs.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.mnt = mnt;
+	ad.u.path.dentry = dentry;
 	return inode_has_perm(cred, inode, av, &ad, 0);
 }
 
@@ -1530,8 +1530,8 @@ static int file_has_perm(const struct cred *cred,
 	u32 sid = cred_sid(cred);
 	int rc;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path = file->f_path;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path = file->f_path;
 
 	if (sid != fsec->sid) {
 		rc = avc_has_perm(sid, fsec->sid,
@@ -1569,8 +1569,8 @@ static int may_create(struct inode *dir,
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
 			  DIR__ADD_NAME | DIR__SEARCH,
@@ -1621,8 +1621,8 @@ static int may_link(struct inode *dir,
 	dsec = dir->i_security;
 	isec = dentry->d_inode->i_security;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.dentry = dentry;
 
 	av = DIR__SEARCH;
 	av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME);
@@ -1667,9 +1667,9 @@ static inline int may_rename(struct inode *old_dir,
 	old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	new_dsec = new_dir->i_security;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
 
-	ad.u.fs.path.dentry = old_dentry;
+	ad.u.path.dentry = old_dentry;
 	rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
 			  DIR__REMOVE_NAME | DIR__SEARCH, &ad);
 	if (rc)
@@ -1685,7 +1685,7 @@ static inline int may_rename(struct inode *old_dir,
 			return rc;
 	}
 
-	ad.u.fs.path.dentry = new_dentry;
+	ad.u.path.dentry = new_dentry;
 	av = DIR__ADD_NAME | DIR__SEARCH;
 	if (new_dentry->d_inode)
 		av |= DIR__REMOVE_NAME;
@@ -1991,8 +1991,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 			return rc;
 	}
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path = bprm->file->f_path;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path = bprm->file->f_path;
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
 		new_tsec->sid = old_tsec->sid;
@@ -2120,7 +2120,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	/* Revalidate access to inherited open files. */
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, INODE);
 
 	spin_lock(&files->file_lock);
 	for (;;) {
@@ -2468,8 +2468,8 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 	if (flags & MS_KERNMOUNT)
 		return 0;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.dentry = sb->s_root;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.dentry = sb->s_root;
 	return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
 }
 
@@ -2478,8 +2478,8 @@ static int selinux_sb_statfs(struct dentry *dentry)
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.dentry = dentry->d_sb->s_root;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.dentry = dentry->d_sb->s_root;
 	return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
 
@@ -2653,8 +2653,8 @@ static int selinux_inode_permission(struct inode *inode, int mask, unsigned flag
 	if (!mask)
 		return 0;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.inode = inode;
+	COMMON_AUDIT_DATA_INIT(&ad, INODE);
+	ad.u.inode = inode;
 
 	if (from_access)
 		ad.selinux_audit_data.auditdeny |= FILE__AUDIT_ACCESS;
@@ -2732,8 +2732,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 	if (!is_owner_or_cap(inode))
 		return -EPERM;
 
-	COMMON_AUDIT_DATA_INIT(&ad, FS);
-	ad.u.fs.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	ad.u.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, isec->sid, isec->sclass,
 			  FILE__RELABELFROM, &ad);
diff --git a/security/smack/smack.h b/security/smack/smack.h
index b449cfdad21c..a16925c0e91a 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -316,22 +316,22 @@ static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
 static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
 						    struct dentry *d)
 {
-	a->a.u.fs.path.dentry = d;
+	a->a.u.path.dentry = d;
 }
 static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
 						 struct vfsmount *m)
 {
-	a->a.u.fs.path.mnt = m;
+	a->a.u.path.mnt = m;
 }
 static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
 					      struct inode *i)
 {
-	a->a.u.fs.inode = i;
+	a->a.u.inode = i;
 }
 static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
 					     struct path p)
 {
-	a->a.u.fs.path = p;
+	a->a.u.path = p;
 }
 static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
 					    struct sock *sk)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 42fcb47747a3..eeb393fbf925 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -383,7 +383,7 @@ static int smack_sb_statfs(struct dentry *dentry)
 	int rc;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad);
@@ -407,7 +407,7 @@ static int smack_sb_mount(char *dev_name, struct path *path,
 	struct superblock_smack *sbp = path->mnt->mnt_sb->s_security;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path(&ad, *path);
 
 	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
@@ -426,7 +426,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
 	struct superblock_smack *sbp;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_root);
 	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
 
@@ -563,7 +563,7 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
@@ -592,7 +592,7 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	/*
@@ -623,7 +623,7 @@ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	/*
@@ -663,7 +663,7 @@ static int smack_inode_rename(struct inode *old_inode,
 	char *isp;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
@@ -700,7 +700,7 @@ static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
 	/* May be droppable after audit */
 	if (flags & IPERM_FLAG_RCU)
 		return -ECHILD;
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_INODE);
 	smk_ad_setfield_u_fs_inode(&ad, inode);
 	return smk_curacc(smk_of_inode(inode), mask, &ad);
 }
@@ -720,7 +720,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 	 */
 	if (iattr->ia_valid & ATTR_FORCE)
 		return 0;
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
@@ -737,7 +737,7 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
@@ -784,7 +784,7 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 	} else
 		rc = cap_inode_setxattr(dentry, name, value, size, flags);
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	if (rc == 0)
@@ -845,7 +845,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
 {
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
@@ -877,7 +877,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 	} else
 		rc = cap_inode_removexattr(dentry, name);
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 	if (rc == 0)
 		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
@@ -1047,7 +1047,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
 	int rc = 0;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 
 	if (_IOC_DIR(cmd) & _IOC_WRITE)
@@ -1070,7 +1070,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
 {
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry);
 	return smk_curacc(file->f_security, MAY_WRITE, &ad);
 }
@@ -1089,7 +1089,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 
 	switch (cmd) {
-- 
cgit v1.2.3


From a269434d2fb48a4d66c1d7bf821b7874b59c5b41 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 25 Apr 2011 13:10:27 -0400
Subject: LSM: separate LSM_AUDIT_DATA_DENTRY from LSM_AUDIT_DATA_PATH

This patch separates and audit message that only contains a dentry from
one that contains a full path.  This allows us to make it harder to
misuse the interfaces or for the interfaces to be implemented wrong.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_audit.h  |  2 ++
 security/lsm_audit.c       | 25 ++++++++++++++++---------
 security/selinux/hooks.c   | 26 +++++++++++++-------------
 security/smack/smack.h     |  7 +------
 security/smack/smack_lsm.c | 34 ++++++++++++++++++++--------------
 5 files changed, 52 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index bbaceab83a65..88e78dedc2e8 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -36,9 +36,11 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_NONE	7
 #define LSM_AUDIT_DATA_KMOD	8
 #define LSM_AUDIT_DATA_INODE	9
+#define LSM_AUDIT_DATA_DENTRY	10
 	struct task_struct *tsk;
 	union 	{
 		struct path path;
+		struct dentry *dentry;
 		struct inode *inode;
 		struct {
 			int netif;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 2e846052cbf4..893af8a2fa1e 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -229,17 +229,24 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		audit_log_format(ab, " capability=%d ", a->u.cap);
 		break;
 	case LSM_AUDIT_DATA_PATH: {
-		struct dentry *dentry = a->u.path.dentry;
 		struct inode *inode;
 
-		if (a->u.path.mnt) {
-			audit_log_d_path(ab, "path=", &a->u.path);
-		} else {
-			audit_log_format(ab, " name=");
-			audit_log_untrustedstring(ab,
-					 dentry->d_name.name);
-		}
-		inode = dentry->d_inode;
+		audit_log_d_path(ab, "path=", &a->u.path);
+
+		inode = a->u.path.dentry->d_inode;
+		if (inode)
+			audit_log_format(ab, " dev=%s ino=%lu",
+					inode->i_sb->s_id,
+					inode->i_ino);
+		break;
+	}
+	case LSM_AUDIT_DATA_DENTRY: {
+		struct inode *inode;
+
+		audit_log_format(ab, " name=");
+		audit_log_untrustedstring(ab, a->u.dentry->d_name.name);
+
+		inode = a->u.dentry->d_inode;
 		if (inode)
 			audit_log_format(ab, " dev=%s ino=%lu",
 					inode->i_sb->s_id,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ad664d3056eb..9e8078a42a94 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1569,8 +1569,8 @@ static int may_create(struct inode *dir,
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
-	ad.u.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
+	ad.u.dentry = dentry;
 
 	rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
 			  DIR__ADD_NAME | DIR__SEARCH,
@@ -1621,8 +1621,8 @@ static int may_link(struct inode *dir,
 	dsec = dir->i_security;
 	isec = dentry->d_inode->i_security;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
-	ad.u.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
+	ad.u.dentry = dentry;
 
 	av = DIR__SEARCH;
 	av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME);
@@ -1667,9 +1667,9 @@ static inline int may_rename(struct inode *old_dir,
 	old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	new_dsec = new_dir->i_security;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
 
-	ad.u.path.dentry = old_dentry;
+	ad.u.dentry = old_dentry;
 	rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
 			  DIR__REMOVE_NAME | DIR__SEARCH, &ad);
 	if (rc)
@@ -1685,7 +1685,7 @@ static inline int may_rename(struct inode *old_dir,
 			return rc;
 	}
 
-	ad.u.path.dentry = new_dentry;
+	ad.u.dentry = new_dentry;
 	av = DIR__ADD_NAME | DIR__SEARCH;
 	if (new_dentry->d_inode)
 		av |= DIR__REMOVE_NAME;
@@ -2468,8 +2468,8 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 	if (flags & MS_KERNMOUNT)
 		return 0;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
-	ad.u.path.dentry = sb->s_root;
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
+	ad.u.dentry = sb->s_root;
 	return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
 }
 
@@ -2478,8 +2478,8 @@ static int selinux_sb_statfs(struct dentry *dentry)
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
-	ad.u.path.dentry = dentry->d_sb->s_root;
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
+	ad.u.dentry = dentry->d_sb->s_root;
 	return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
 
@@ -2732,8 +2732,8 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 	if (!is_owner_or_cap(inode))
 		return -EPERM;
 
-	COMMON_AUDIT_DATA_INIT(&ad, PATH);
-	ad.u.path.dentry = dentry;
+	COMMON_AUDIT_DATA_INIT(&ad, DENTRY);
+	ad.u.dentry = dentry;
 
 	rc = avc_has_perm(sid, isec->sid, isec->sclass,
 			  FILE__RELABELFROM, &ad);
diff --git a/security/smack/smack.h b/security/smack/smack.h
index a16925c0e91a..2b6c6a516123 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -316,12 +316,7 @@ static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
 static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
 						    struct dentry *d)
 {
-	a->a.u.path.dentry = d;
-}
-static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
-						 struct vfsmount *m)
-{
-	a->a.u.path.mnt = m;
+	a->a.u.dentry = d;
 }
 static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
 					      struct inode *i)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index eeb393fbf925..a3bdd1383928 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -383,7 +383,7 @@ static int smack_sb_statfs(struct dentry *dentry)
 	int rc;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad);
@@ -425,10 +425,13 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
 {
 	struct superblock_smack *sbp;
 	struct smk_audit_info ad;
+	struct path path;
+
+	path.dentry = mnt->mnt_root;
+	path.mnt = mnt;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
-	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_root);
-	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
+	smk_ad_setfield_u_fs_path(&ad, path);
 
 	sbp = mnt->mnt_sb->s_security;
 	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
@@ -563,7 +566,7 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
@@ -592,7 +595,7 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	/*
@@ -623,7 +626,7 @@ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
 	struct smk_audit_info ad;
 	int rc;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	/*
@@ -663,7 +666,7 @@ static int smack_inode_rename(struct inode *old_inode,
 	char *isp;
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
@@ -720,7 +723,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 	 */
 	if (iattr->ia_valid & ATTR_FORCE)
 		return 0;
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
@@ -736,10 +739,13 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct smk_audit_info ad;
+	struct path path;
+
+	path.dentry = dentry;
+	path.mnt = mnt;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
-	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
-	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
+	smk_ad_setfield_u_fs_path(&ad, path);
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
 }
 
@@ -784,7 +790,7 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 	} else
 		rc = cap_inode_setxattr(dentry, name, value, size, flags);
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	if (rc == 0)
@@ -845,7 +851,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
 {
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
 	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
@@ -877,7 +883,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 	} else
 		rc = cap_inode_removexattr(dentry, name);
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 	if (rc == 0)
 		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
@@ -1070,7 +1076,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
 {
 	struct smk_audit_info ad;
 
-	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
 	smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry);
 	return smk_curacc(file->f_security, MAY_WRITE, &ad);
 }
-- 
cgit v1.2.3


From ad58671cf32c74a8d6e8f51e63e9cf4e7a73bf1e Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 19 Apr 2011 12:43:45 +0100
Subject: Add a strtobool function matching semantics of existing in kernel
 equivalents

This is a rename of the usr_strtobool proposal, which was a renamed,
relocated and fixed version of previous kstrtobool RFC

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/string.h |  1 +
 lib/string.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index a716ee2a8adb..a176db2f2c85 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
+extern int strtobool(const char *s, bool *res);
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit v1.2.3


From 3dd2ee4824b668a635d6d2bb6bc73f33708cab9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Apr 2011 18:10:58 -0700
Subject: bit_spinlock: don't play preemption games inside the busy loop

When we are waiting for the bit-lock to be released, and are looping
over the 'cpu_relax()' should not be doing anything else - otherwise we
miss the point of trying to do the whole 'cpu_relax()'.

Do the preemption enable/disable around the loop, rather than inside of
it.

Noticed when I was looking at the code generation for the dcache
__d_drop usage, and the code just looked very odd.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bit_spinlock.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index e612575a2596..b4326bfa684f 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -23,11 +23,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 	preempt_disable();
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 	while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
-		while (test_bit(bitnum, addr)) {
-			preempt_enable();
+		preempt_enable();
+		do {
 			cpu_relax();
-			preempt_disable();
-		}
+		} while (test_bit(bitnum, addr));
+		preempt_disable();
 	}
 #endif
 	__acquire(bitlock);
-- 
cgit v1.2.3


From 1879fd6a26571fd4e8e1f4bb3e7537bc936b1fe7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 25 Apr 2011 14:01:36 -0400
Subject: add hlist_bl_lock/unlock helpers

Now that the whole dcache_hash_bucket crap is gone, go all the way and
also remove the weird locking layering violations for locking the hash
buckets.  Add hlist_bl_lock/unlock helpers to move the locking into the
list abstraction instead of requiring each caller to open code it.
After all allowing for the bit locks is the whole point of these helpers
over the plain hlist variant.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c             | 22 ++++++----------------
 fs/gfs2/glock.c         |  6 ++----
 include/linux/list_bl.h | 11 +++++++++++
 3 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index d600a0af3b2e..22a0ef41bad1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -109,16 +109,6 @@ static inline struct hlist_bl_head *d_hash(struct dentry *parent,
 	return dentry_hashtable + (hash & D_HASHMASK);
 }
 
-static inline void spin_lock_bucket(struct hlist_bl_head *b)
-{
-	bit_spin_lock(0, (unsigned long *)&b->first);
-}
-
-static inline void spin_unlock_bucket(struct hlist_bl_head *b)
-{
-	__bit_spin_unlock(0, (unsigned long *)&b->first);
-}
-
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
@@ -334,10 +324,10 @@ void __d_drop(struct dentry *dentry)
 		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
 
-		spin_lock_bucket(b);
+		hlist_bl_lock(b);
 		__hlist_bl_del(&dentry->d_hash);
 		dentry->d_hash.pprev = NULL;
-		spin_unlock_bucket(b);
+		hlist_bl_unlock(b);
 
 		dentry_rcuwalk_barrier(dentry);
 	}
@@ -1594,9 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	spin_lock_bucket(&tmp->d_sb->s_anon);
+	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	spin_unlock_bucket(&tmp->d_sb->s_anon);
+	hlist_bl_unlock(&tmp->d_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&inode->i_lock);
 	security_d_instantiate(tmp, inode);
@@ -2076,10 +2066,10 @@ EXPORT_SYMBOL(d_delete);
 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
 	BUG_ON(!d_unhashed(entry));
-	spin_lock_bucket(b);
+	hlist_bl_lock(b);
 	entry->d_flags |= DCACHE_RCUACCESS;
 	hlist_bl_add_head_rcu(&entry->d_hash, b);
-	spin_unlock_bucket(b);
+	hlist_bl_unlock(b);
 }
 
 static void _d_rehash(struct dentry * entry)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f07643e21bfa..7a4fb630a320 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
 
 static inline void spin_lock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	bit_spin_lock(0, (unsigned long *)bl);
+	hlist_bl_lock(&gl_hash_table[hash]);
 }
 
 static inline void spin_unlock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	__bit_spin_unlock(0, (unsigned long *)bl);
+	hlist_bl_unlock(&gl_hash_table[hash]);
 }
 
 static void gfs2_glock_dealloc(struct rcu_head *rcu)
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 5bad17d1acde..31f9d75adc5b 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -2,6 +2,7 @@
 #define _LINUX_LIST_BL_H
 
 #include <linux/list.h>
+#include <linux/bit_spinlock.h>
 
 /*
  * Special version of lists, where head of the list has a lock in the lowest
@@ -114,6 +115,16 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
 	}
 }
 
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+	bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+	__bit_spin_unlock(0, (unsigned long *)b);
+}
+
 /**
  * hlist_bl_for_each_entry	- iterate over list of given type
  * @tpos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3


From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 24 Apr 2011 08:18:31 +0200
Subject: perf events: Add stalled cycles generic event -
 PERF_COUNT_HW_STALLED_CYCLES

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +++
 include/linux/perf_event.h             | 1 +
 tools/perf/util/parse-events.c         | 1 +
 tools/perf/util/python.c               | 1 +
 4 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9ae4a2aa7398..efa2704c9dfd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,6 +1413,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
+		/* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+
 		if (ebx & 0x40) {
 			/*
 			 * Erratum AAJ80 detected, we work it around by using
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee9f1e782800..ac636dd20a0c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 952b4ae3d954..1869e4c646db 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols[] = {
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f5e38451fdc5..406f613ee619 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },
-- 
cgit v1.2.3


From 04ad1fb2640a4f23e99ccb705c179d64abac03f2 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sat, 23 Apr 2011 19:30:29 +0200
Subject: ssb: update reject bit for Target State Low
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My 14e4:4315 is SSB_IDLOW_SSBREV_26:
read32 0xfaafcff8 -> 0x600422d5
My 14e4:4328 is SSB_IDLOW_SSBREV_24:
read32 0xfaafcff8 -> 0x400422c5
My 14e4:432b is SSB_IDLOW_SSBREV_26 again:
read32 0xfaafcff8 -> 0x600422d5

For all of them wl driver is using 0x2 reject bit:
write32(0xf98) <- 0x00010002
So it seems SSB 2.3 is the exception using another bit.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/main.c           | 15 +++++++--------
 include/linux/ssb/ssb_regs.h |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 74aa2cca7d8c..ad3da93a428c 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1117,23 +1117,22 @@ static u32 ssb_tmslow_reject_bitmask(struct ssb_device *dev)
 {
 	u32 rev = ssb_read32(dev, SSB_IDLOW) & SSB_IDLOW_SSBREV;
 
-	/* The REJECT bit changed position in TMSLOW between
-	 * Backplane revisions. */
+	/* The REJECT bit seems to be different for Backplane rev 2.3 */
 	switch (rev) {
 	case SSB_IDLOW_SSBREV_22:
-		return SSB_TMSLOW_REJECT_22;
+	case SSB_IDLOW_SSBREV_24:
+	case SSB_IDLOW_SSBREV_26:
+		return SSB_TMSLOW_REJECT;
 	case SSB_IDLOW_SSBREV_23:
 		return SSB_TMSLOW_REJECT_23;
-	case SSB_IDLOW_SSBREV_24:     /* TODO - find the proper REJECT bits */
-	case SSB_IDLOW_SSBREV_25:     /* same here */
-	case SSB_IDLOW_SSBREV_26:     /* same here */
+	case SSB_IDLOW_SSBREV_25:     /* TODO - find the proper REJECT bit */
 	case SSB_IDLOW_SSBREV_27:     /* same here */
-		return SSB_TMSLOW_REJECT_23;	/* this is a guess */
+		return SSB_TMSLOW_REJECT;	/* this is a guess */
 	default:
 		printk(KERN_INFO "ssb: Backplane Revision 0x%.8X\n", rev);
 		WARN_ON(1);
 	}
-	return (SSB_TMSLOW_REJECT_22 | SSB_TMSLOW_REJECT_23);
+	return (SSB_TMSLOW_REJECT | SSB_TMSLOW_REJECT_23);
 }
 
 int ssb_device_is_enabled(struct ssb_device *dev)
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 402955ae48ce..efbf459d571c 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -97,7 +97,7 @@
 #define  SSB_INTVEC_ENET1	0x00000040 /* Enable interrupts for enet 1 */
 #define SSB_TMSLOW		0x0F98     /* SB Target State Low */
 #define  SSB_TMSLOW_RESET	0x00000001 /* Reset */
-#define  SSB_TMSLOW_REJECT_22	0x00000002 /* Reject (Backplane rev 2.2) */
+#define  SSB_TMSLOW_REJECT	0x00000002 /* Reject (Standard Backplane) */
 #define  SSB_TMSLOW_REJECT_23	0x00000004 /* Reject (Backplane rev 2.3) */
 #define  SSB_TMSLOW_CLOCK	0x00010000 /* Clock Enable */
 #define  SSB_TMSLOW_FGC		0x00020000 /* Force Gated Clocks On */
-- 
cgit v1.2.3


From 304529b1b6f8612ccbb4582e997051b48b94f4a4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 1 Apr 2011 14:32:09 -0700
Subject: time: Add timekeeping_inject_sleeptime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some platforms cannot implement read_persistent_clock, as
their RTC devices are only accessible when interrupts are enabled.
This keeps them from being used by the timekeeping code on resume
to measure the time in suspend.

The RTC layer tries to work around this, by calling do_settimeofday
on resume after irqs are reenabled to set the time properly. However,
this only corrects CLOCK_REALTIME, and does not properly adjust
the sleep time value. This causes btime in /proc/stat to be incorrect
as well as making the new CLOCK_BOTTTIME inaccurate.

This patch resolves the issue by introducing a new timekeeping hook
to allow the RTC layer to inject the sleep time on resume.

The code also checks to make sure that read_persistent_clock is
nonfunctional before setting the sleep time, so that should the RTC's
HCTOSYS option be configured in on a system that does support
read_persistent_clock we will not increase the total_sleep_time twice.

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/class.c       | 23 ++++++++-----------
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 56 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 63 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 39013867cbd6..4194e59e14cd 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -41,26 +41,21 @@ static void rtc_device_release(struct device *dev)
  * system's wall clock; restore it on resume().
  */
 
-static struct timespec	delta;
 static time_t		oldtime;
+static struct timespec	oldts;
 
 static int rtc_suspend(struct device *dev, pm_message_t mesg)
 {
 	struct rtc_device	*rtc = to_rtc_device(dev);
 	struct rtc_time		tm;
-	struct timespec		ts = current_kernel_time();
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
 	rtc_read_time(rtc, &tm);
+	ktime_get_ts(&oldts);
 	rtc_tm_to_time(&tm, &oldtime);
 
-	/* RTC precision is 1 second; adjust delta for avg 1/2 sec err */
-	set_normalized_timespec(&delta,
-				ts.tv_sec - oldtime,
-				ts.tv_nsec - (NSEC_PER_SEC >> 1));
-
 	return 0;
 }
 
@@ -70,10 +65,12 @@ static int rtc_resume(struct device *dev)
 	struct rtc_time		tm;
 	time_t			newtime;
 	struct timespec		time;
+	struct timespec		newts;
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
+	ktime_get_ts(&newts);
 	rtc_read_time(rtc, &tm);
 	if (rtc_valid_tm(&tm) != 0) {
 		pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
@@ -85,15 +82,13 @@ static int rtc_resume(struct device *dev)
 			pr_debug("%s:  time travel!\n", dev_name(&rtc->dev));
 		return 0;
 	}
+	/* calculate the RTC time delta */
+	set_normalized_timespec(&time, newtime - oldtime, 0);
 
-	/* restore wall clock using delta against this RTC;
-	 * adjust again for avg 1/2 second RTC sampling error
-	 */
-	set_normalized_timespec(&time,
-				newtime + delta.tv_sec,
-				(NSEC_PER_SEC >> 1) + delta.tv_nsec);
-	do_settimeofday(&time);
+	/* subtract kernel time between rtc_suspend to rtc_resume */
+	time = timespec_sub(time, timespec_sub(newts, oldts));
 
+	timekeeping_inject_sleeptime(&time);
 	return 0;
 }
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 454a26205787..4ea5a75fcacd 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -126,6 +126,7 @@ struct timespec __current_kernel_time(void); /* does not take xtime_lock */
 struct timespec get_monotonic_coarse(void);
 void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 				struct timespec *wtom, struct timespec *sleep);
+void timekeeping_inject_sleeptime(struct timespec *delta);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8ad5d576755e..8e6a05a5915a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -595,6 +595,58 @@ void __init timekeeping_init(void)
 /* time in seconds when suspend began */
 static struct timespec timekeeping_suspend_time;
 
+/**
+ * __timekeeping_inject_sleeptime - Internal function to add sleep interval
+ * @delta: pointer to a timespec delta value
+ *
+ * Takes a timespec offset measuring a suspend interval and properly
+ * adds the sleep offset to the timekeeping variables.
+ */
+static void __timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	xtime = timespec_add(xtime, *delta);
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
+	total_sleep_time = timespec_add(total_sleep_time, *delta);
+}
+
+
+/**
+ * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
+ * @delta: pointer to a timespec delta value
+ *
+ * This hook is for architectures that cannot support read_persistent_clock
+ * because their RTC/persistent clock is only accessible when irqs are enabled.
+ *
+ * This function should only be called by rtc_resume(), and allows
+ * a suspend offset to be injected into the timekeeping values.
+ */
+void timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	unsigned long flags;
+	struct timespec ts;
+
+	/* Make sure we don't set the clock twice */
+	read_persistent_clock(&ts);
+	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+		return;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+	timekeeping_forward_now();
+
+	__timekeeping_inject_sleeptime(delta);
+
+	timekeeper.ntp_error = 0;
+	ntp_clear();
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	/* signal hrtimers about time change */
+	clock_was_set();
+}
+
+
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
  *
@@ -615,9 +667,7 @@ static void timekeeping_resume(void)
 
 	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
 		ts = timespec_sub(ts, timekeeping_suspend_time);
-		xtime = timespec_add(xtime, ts);
-		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
-		total_sleep_time = timespec_add(total_sleep_time, ts);
+		__timekeeping_inject_sleeptime(&ts);
 	}
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
-- 
cgit v1.2.3


From 88d19cf37952a7e1e38b2bf87a00f0e857e63180 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 3 Jan 2011 18:59:43 -0800
Subject: timers: Add rb_init_node() to allow for stack allocated rb nodes

In cases where a timerqueue_node or some structure that utilizes
a timerqueue_node is allocated on the stack, gcc would give warnings
caused by the timerqueue_init()'s calling RB_CLEAR_NODE, which
self-references the nodes uninitialized data.

The solution is to create an rb_init_node() function that zeros
the rb_node structure out and then calls RB_CLEAR_NODE(), and
then call the new init function from timerqueue_init().

CC: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/rbtree.h     | 8 ++++++++
 include/linux/timerqueue.h | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 7066acb2c530..033b507b33b1 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -136,6 +136,14 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 #define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
 #define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
 
+static inline void rb_init_node(struct rb_node *rb)
+{
+	rb->rb_parent_color = 0;
+	rb->rb_right = NULL;
+	rb->rb_left = NULL;
+	RB_CLEAR_NODE(rb);
+}
+
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..5088727478fd 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
 
 static inline void timerqueue_init(struct timerqueue_node *node)
 {
-	RB_CLEAR_NODE(&node->node);
+	rb_init_node(&node->node);
 }
 
 static inline void timerqueue_init_head(struct timerqueue_head *head)
-- 
cgit v1.2.3


From ff3ead96d17f47ee70c294a5cc2cce9b61e82f0f Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 11 Jan 2011 09:42:13 -0800
Subject: timers: Introduce in-kernel alarm-timer interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides the in kernel interface and infrastructure for
alarm-timers.

Alarm-timers are a hybrid style timer, similar to hrtimers,
but when the system is suspended, the RTC device is set to
fire and wake the system for when the soonest alarm-timer
expires.

The concept for Alarm-timers was inspired by the Android Alarm
driver (by Arve Hjønnevåg) found in the Android kernel tree.

See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36

This in-kernel interface should be fairly compatible with the
Android alarm driver in-kernel interface, but has the advantage
of utilizing the new RTC timerqueue code instead of doing direct
RTC manipulation.

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/alarmtimer.h |  30 ++++
 kernel/time/Makefile       |   2 +-
 kernel/time/alarmtimer.c   | 375 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 406 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/alarmtimer.h
 create mode 100644 kernel/time/alarmtimer.c

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
new file mode 100644
index 000000000000..6b364b2e2074
--- /dev/null
+++ b/include/linux/alarmtimer.h
@@ -0,0 +1,30 @@
+#ifndef _LINUX_ALARMTIMER_H
+#define _LINUX_ALARMTIMER_H
+
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+
+enum alarmtimer_type {
+	ALARM_REALTIME,
+	ALARM_BOOTTIME,
+
+	ALARM_NUMTYPE,
+};
+
+struct alarm {
+	struct timerqueue_node	node;
+	ktime_t			period;
+	void			(*function)(struct alarm *);
+	enum alarmtimer_type	type;
+	char			enabled;
+	void			*data;
+};
+
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *));
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period);
+void alarm_cancel(struct alarm *alarm);
+
+#endif
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index b0425991e9ac..e2fd74b8e8c2 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
-obj-y += timeconv.o posix-clock.o
+obj-y += timeconv.o posix-clock.o alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
new file mode 100644
index 000000000000..48c2ee949e61
--- /dev/null
+++ b/kernel/time/alarmtimer.c
@@ -0,0 +1,375 @@
+/*
+ * Alarmtimer interface
+ *
+ * This interface provides a timer which is similarto hrtimers,
+ * but triggers a RTC alarm if the box is suspend.
+ *
+ * This interface is influenced by the Android RTC Alarm timer
+ * interface.
+ *
+ * Copyright (C) 2010 IBM Corperation
+ *
+ * Author: John Stultz <john.stultz@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+#include <linux/alarmtimer.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/posix-timers.h>
+#include <linux/workqueue.h>
+#include <linux/freezer.h>
+
+
+static struct alarm_base {
+	spinlock_t		lock;
+	struct timerqueue_head	timerqueue;
+	struct hrtimer		timer;
+	ktime_t			(*gettime)(void);
+	clockid_t		base_clockid;
+	struct work_struct	irqwork;
+} alarm_bases[ALARM_NUMTYPE];
+
+static struct rtc_timer		rtctimer;
+static struct rtc_device	*rtcdev;
+
+static ktime_t freezer_delta;
+static DEFINE_SPINLOCK(freezer_delta_lock);
+
+
+/**************************************************************************
+ * alarmtimer management code
+ */
+
+/*
+ * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
+ * @base: pointer to the base where the timer is being run
+ * @alarm: pointer to alarm being enqueued.
+ *
+ * Adds alarm to a alarm_base timerqueue and if necessary sets
+ * an hrtimer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
+{
+	timerqueue_add(&base->timerqueue, &alarm->node);
+	if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
+		hrtimer_try_to_cancel(&base->timer);
+		hrtimer_start(&base->timer, alarm->node.expires,
+				HRTIMER_MODE_ABS);
+	}
+}
+
+/*
+ * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
+ * @base: pointer to the base where the timer is running
+ * @alarm: pointer to alarm being removed
+ *
+ * Removes alarm to a alarm_base timerqueue and if necessary sets
+ * a new timer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
+
+	timerqueue_del(&base->timerqueue, &alarm->node);
+	if (next == &alarm->node) {
+		hrtimer_try_to_cancel(&base->timer);
+		next = timerqueue_getnext(&base->timerqueue);
+		if (!next)
+			return;
+		hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
+	}
+}
+
+/*
+ * alarmtimer_do_work - Handles alarm being fired.
+ * @work: pointer to workqueue being run
+ *
+ * When a timer fires, this runs through the timerqueue to see
+ * which alarm timers, and run those that expired. If there are
+ * more alarm timers queued, we set the hrtimer to fire in the
+ * future.
+ */
+void alarmtimer_do_work(struct work_struct *work)
+{
+	struct alarm_base *base = container_of(work, struct alarm_base,
+						irqwork);
+	struct timerqueue_node *next;
+	unsigned long flags;
+	ktime_t now;
+
+	spin_lock_irqsave(&base->lock, flags);
+	now = base->gettime();
+	while ((next = timerqueue_getnext(&base->timerqueue))) {
+		struct alarm *alarm;
+		ktime_t expired = next->expires;
+
+		if (expired.tv64 >= now.tv64)
+			break;
+
+		alarm = container_of(next, struct alarm, node);
+
+		timerqueue_del(&base->timerqueue, &alarm->node);
+		alarm->enabled = 0;
+		/* Re-add periodic timers */
+		if (alarm->period.tv64) {
+			alarm->node.expires = ktime_add(expired, alarm->period);
+			timerqueue_add(&base->timerqueue, &alarm->node);
+			alarm->enabled = 1;
+		}
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (alarm->function)
+			alarm->function(alarm);
+		spin_lock_irqsave(&base->lock, flags);
+	}
+
+	if (next) {
+		hrtimer_start(&base->timer, next->expires,
+				HRTIMER_MODE_ABS);
+	}
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+
+/*
+ * alarmtimer_fired - Handles alarm hrtimer being fired.
+ * @timer: pointer to hrtimer being run
+ *
+ * When a timer fires, this schedules the do_work function to
+ * be run.
+ */
+static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
+{
+	struct alarm_base *base = container_of(timer, struct alarm_base, timer);
+	schedule_work(&base->irqwork);
+	return HRTIMER_NORESTART;
+}
+
+
+/*
+ * alarmtimer_suspend - Suspend time callback
+ * @dev: unused
+ * @state: unused
+ *
+ * When we are going into suspend, we look through the bases
+ * to see which is the soonest timer to expire. We then
+ * set an rtc timer to fire that far into the future, which
+ * will wake us from suspend.
+ */
+static int alarmtimer_suspend(struct device *dev)
+{
+	struct rtc_time tm;
+	ktime_t min, now;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	min = freezer_delta;
+	freezer_delta = ktime_set(0, 0);
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+	/* If we have no rtcdev, just return */
+	if (!rtcdev)
+		return 0;
+
+	/* Find the soonest timer to expire*/
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		struct alarm_base *base = &alarm_bases[i];
+		struct timerqueue_node *next;
+		ktime_t delta;
+
+		spin_lock_irqsave(&base->lock, flags);
+		next = timerqueue_getnext(&base->timerqueue);
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (!next)
+			continue;
+		delta = ktime_sub(next->expires, base->gettime());
+		if (!min.tv64 || (delta.tv64 < min.tv64))
+			min = delta;
+	}
+	if (min.tv64 == 0)
+		return 0;
+
+	/* XXX - Should we enforce a minimum sleep time? */
+	WARN_ON(min.tv64 < NSEC_PER_SEC);
+
+	/* Setup an rtc timer to fire that far in the future */
+	rtc_timer_cancel(rtcdev, &rtctimer);
+	rtc_read_time(rtcdev, &tm);
+	now = rtc_tm_to_ktime(tm);
+	now = ktime_add(now, min);
+
+	rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0));
+
+	return 0;
+}
+
+
+/**************************************************************************
+ * alarm kernel interface code
+ */
+
+/*
+ * alarm_init - Initialize an alarm structure
+ * @alarm: ptr to alarm to be initialized
+ * @type: the type of the alarm
+ * @function: callback that is run when the alarm fires
+ *
+ * In-kernel interface to initializes the alarm structure.
+ */
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *))
+{
+	timerqueue_init(&alarm->node);
+	alarm->period = ktime_set(0, 0);
+	alarm->function = function;
+	alarm->type = type;
+	alarm->enabled = 0;
+}
+
+/*
+ * alarm_start - Sets an alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time to run the alarm
+ * @period: period at which the alarm will recur
+ *
+ * In-kernel interface set an alarm timer.
+ */
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->node.expires = start;
+	alarm->period = period;
+	alarmtimer_enqueue(base, alarm);
+	alarm->enabled = 1;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+/*
+ * alarm_cancel - Tries to cancel an alarm timer
+ * @alarm: ptr to alarm to be canceled
+ *
+ * In-kernel interface to cancel an alarm timer.
+ */
+void alarm_cancel(struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->enabled = 0;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+
+
+/**************************************************************************
+ * alarmtimer initialization code
+ */
+
+/* Suspend hook structures */
+static const struct dev_pm_ops alarmtimer_pm_ops = {
+	.suspend = alarmtimer_suspend,
+};
+
+static struct platform_driver alarmtimer_driver = {
+	.driver = {
+		.name = "alarmtimer",
+		.pm = &alarmtimer_pm_ops,
+	}
+};
+
+/**
+ * alarmtimer_init - Initialize alarm timer code
+ *
+ * This function initializes the alarm bases and registers
+ * the posix clock ids.
+ */
+static int __init alarmtimer_init(void)
+{
+	int error = 0;
+	int i;
+
+	/* Initialize alarm bases */
+	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
+	alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+	alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
+	alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		timerqueue_init_head(&alarm_bases[i].timerqueue);
+		spin_lock_init(&alarm_bases[i].lock);
+		hrtimer_init(&alarm_bases[i].timer,
+				alarm_bases[i].base_clockid,
+				HRTIMER_MODE_ABS);
+		alarm_bases[i].timer.function = alarmtimer_fired;
+		INIT_WORK(&alarm_bases[i].irqwork, alarmtimer_do_work);
+	}
+	error = platform_driver_register(&alarmtimer_driver);
+	platform_device_register_simple("alarmtimer", -1, NULL, 0);
+
+	return error;
+}
+device_initcall(alarmtimer_init);
+
+/**
+ * has_wakealarm - check rtc device has wakealarm ability
+ * @dev: current device
+ * @name_ptr: name to be returned
+ *
+ * This helper function checks to see if the rtc device can wake
+ * from suspend.
+ */
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/**
+ * alarmtimer_init_late - Late initializing of alarmtimer code
+ *
+ * This function locates a rtc device to use for wakealarms.
+ * Run as late_initcall to make sure rtc devices have been
+ * registered.
+ */
+static int __init alarmtimer_init_late(void)
+{
+	char *str;
+
+	/* Find an rtc device and init the rtc_timer */
+	class_find_device(rtc_class, NULL, &str, has_wakealarm);
+	if (str)
+		rtcdev = rtc_class_open(str);
+	if (!rtcdev) {
+		printk(KERN_WARNING "No RTC device found, ALARM timers will"
+			" not wake from suspend");
+	}
+	rtc_timer_init(&rtctimer, NULL, NULL);
+
+	return 0;
+}
+late_initcall(alarmtimer_init_late);
-- 
cgit v1.2.3


From 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 11 Jan 2011 09:54:33 -0800
Subject: timers: Posix interface for alarm-timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch exposes alarm-timers to userland via the posix clock
and timers interface, using two new clockids: CLOCK_REALTIME_ALARM
and CLOCK_BOOTTIME_ALARM. Both clockids behave identically to
CLOCK_REALTIME and CLOCK_BOOTTIME, respectively, but timers
set against the _ALARM suffixed clockids will wake the system if
it is suspended.

Some background can be found here:
	https://lwn.net/Articles/429925/

The concept for Alarm-timers was inspired by the Android Alarm
driver (by Arve Hjønnevåg) found in the Android kernel tree.

See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36

While the in-kernel interface is pretty similar between
alarm-timers and Android alarm driver, the user-space interface
for the Android alarm driver is via ioctls to a new char device.
As mentioned above, I've instead chosen to export this functionality
via the posix interface, as it seemed a little simpler and avoids
creating duplicate interfaces to things like CLOCK_REALTIME and
CLOCK_MONOTONIC under alternate names (ie:ANDROID_ALARM_RTC and
ANDROID_ALARM_SYSTEMTIME).

The semantics of the Android alarm driver are different from what
this posix interface provides. For instance, threads other then
the thread waiting on the Android alarm driver are able to modify
the alarm being waited on. Also this interface does not allow
the same wakelock semantics that the Android driver provides
(ie: kernel takes a wakelock on RTC alarm-interupt, and holds it
through process wakeup, and while the process runs, until the
process either closes the char device or calls back in to wait
on a new alarm).

One potential way to implement similar semantics may be via
the timerfd infrastructure, but this needs more research.

There may also need to be some sort of sysfs system level policy
hooks that allow alarm timers to be disabled to keep them
from firing at inappropriate times (ie: laptop in a well insulated
bag, mid-flight).

CC: Arve Hjønnevåg <arve@android.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/capability.h   |   7 +-
 include/linux/posix-timers.h |   2 +
 include/linux/time.h         |   2 +
 kernel/time/alarmtimer.c     | 330 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 340 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..7cb23eae693d 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -355,7 +355,12 @@ struct cpu_vfs_cap_data {
 
 #define CAP_SYSLOG           34
 
-#define CAP_LAST_CAP         CAP_SYSLOG
+/* Allow triggering something that will wake the system */
+
+#define CAP_WAKE_ALARM            35
+
+
+#define CAP_LAST_CAP         CAP_WAKE_ALARM
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d51243ae0726..808227d40a64 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/timex.h>
+#include <linux/alarmtimer.h>
 
 union cpu_time_count {
 	cputime_t cpu;
@@ -80,6 +81,7 @@ struct k_itimer {
 			unsigned long incr;
 			unsigned long expires;
 		} mmtimer;
+		struct alarm alarmtimer;
 	} it;
 };
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 4ea5a75fcacd..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -295,6 +295,8 @@ struct itimerval {
 #define CLOCK_REALTIME_COARSE		5
 #define CLOCK_MONOTONIC_COARSE		6
 #define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 48c2ee949e61..4058ad79d55f 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -215,6 +215,21 @@ static int alarmtimer_suspend(struct device *dev)
 }
 
 
+static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
+{
+	ktime_t delta;
+	unsigned long flags;
+	struct alarm_base *base = &alarm_bases[type];
+
+	delta = ktime_sub(absexp, base->gettime());
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
+		freezer_delta = delta;
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
+
+
 /**************************************************************************
  * alarm kernel interface code
  */
@@ -279,6 +294,309 @@ void alarm_cancel(struct alarm *alarm)
 }
 
 
+/**************************************************************************
+ * alarm posix interface code
+ */
+
+/*
+ * clock2alarm - helper that converts from clockid to alarmtypes
+ * @clockid: clockid.
+ *
+ * Helper function that converts from clockids to alarmtypes
+ */
+static enum alarmtimer_type clock2alarm(clockid_t clockid)
+{
+	if (clockid == CLOCK_REALTIME_ALARM)
+		return ALARM_REALTIME;
+	if (clockid == CLOCK_BOOTTIME_ALARM)
+		return ALARM_BOOTTIME;
+	return -1;
+}
+
+/*
+ * alarm_handle_timer - Callback for posix timers
+ * @alarm: alarm that fired
+ *
+ * Posix timer callback for expired alarm timers.
+ */
+static void alarm_handle_timer(struct alarm *alarm)
+{
+	struct k_itimer *ptr = container_of(alarm, struct k_itimer,
+						it.alarmtimer);
+	if (posix_timer_event(ptr, 0) != 0)
+		ptr->it_overrun++;
+}
+
+/*
+ * alarm_clock_getres - posix getres interface
+ * @which_clock: clockid
+ * @tp: timespec to fill
+ *
+ * Returns the granularity of underlying alarm base clock
+ */
+static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+	clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
+
+	return hrtimer_get_res(baseid, tp);
+}
+
+/**
+ * alarm_clock_get - posix clock_get interface
+ * @which_clock: clockid
+ * @tp: timespec to fill.
+ *
+ * Provides the underlying alarm base time.
+ */
+static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+	struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
+
+	*tp = ktime_to_timespec(base->gettime());
+	return 0;
+}
+
+/**
+ * alarm_timer_create - posix timer_create interface
+ * @new_timer: k_itimer pointer to manage
+ *
+ * Initializes the k_itimer structure.
+ */
+static int alarm_timer_create(struct k_itimer *new_timer)
+{
+	enum  alarmtimer_type type;
+	struct alarm_base *base;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	type = clock2alarm(new_timer->it_clock);
+	base = &alarm_bases[type];
+	alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer);
+	return 0;
+}
+
+/**
+ * alarm_timer_get - posix timer_get interface
+ * @new_timer: k_itimer pointer
+ * @cur_setting: itimerspec data to fill
+ *
+ * Copies the itimerspec data out from the k_itimer
+ */
+static void alarm_timer_get(struct k_itimer *timr,
+				struct itimerspec *cur_setting)
+{
+	cur_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	cur_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+	return;
+}
+
+/**
+ * alarm_timer_del - posix timer_del interface
+ * @timr: k_itimer pointer to be deleted
+ *
+ * Cancels any programmed alarms for the given timer.
+ */
+static int alarm_timer_del(struct k_itimer *timr)
+{
+	alarm_cancel(&timr->it.alarmtimer);
+	return 0;
+}
+
+/**
+ * alarm_timer_set - posix timer_set interface
+ * @timr: k_itimer pointer to be deleted
+ * @flags: timer flags
+ * @new_setting: itimerspec to be used
+ * @old_setting: itimerspec being replaced
+ *
+ * Sets the timer to new_setting, and starts the timer.
+ */
+static int alarm_timer_set(struct k_itimer *timr, int flags,
+				struct itimerspec *new_setting,
+				struct itimerspec *old_setting)
+{
+	/* Save old values */
+	old_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	old_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+
+	/* If the timer was already set, cancel it */
+	alarm_cancel(&timr->it.alarmtimer);
+
+	/* start the timer */
+	alarm_start(&timr->it.alarmtimer,
+			timespec_to_ktime(new_setting->it_value),
+			timespec_to_ktime(new_setting->it_interval));
+	return 0;
+}
+
+/**
+ * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep
+ * @alarm: ptr to alarm that fired
+ *
+ * Wakes up the task that set the alarmtimer
+ */
+static void alarmtimer_nsleep_wakeup(struct alarm *alarm)
+{
+	struct task_struct *task = (struct task_struct *)alarm->data;
+
+	alarm->data = NULL;
+	if (task)
+		wake_up_process(task);
+}
+
+/**
+ * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation
+ * @alarm: ptr to alarmtimer
+ * @absexp: absolute expiration time
+ *
+ * Sets the alarm timer and sleeps until it is fired or interrupted.
+ */
+static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
+{
+	alarm->data = (void *)current;
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		alarm_start(alarm, absexp, ktime_set(0, 0));
+		if (likely(alarm->data))
+			schedule();
+
+		alarm_cancel(alarm);
+	} while (alarm->data && !signal_pending(current));
+
+	__set_current_state(TASK_RUNNING);
+
+	return (alarm->data == NULL);
+}
+
+
+/**
+ * update_rmtp - Update remaining timespec value
+ * @exp: expiration time
+ * @type: timer type
+ * @rmtp: user pointer to remaining timepsec value
+ *
+ * Helper function that fills in rmtp value with time between
+ * now and the exp value
+ */
+static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
+			struct timespec __user *rmtp)
+{
+	struct timespec rmt;
+	ktime_t rem;
+
+	rem = ktime_sub(exp, alarm_bases[type].gettime());
+
+	if (rem.tv64 <= 0)
+		return 0;
+	rmt = ktime_to_timespec(rem);
+
+	if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+		return -EFAULT;
+
+	return 1;
+
+}
+
+/**
+ * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
+ * @restart: ptr to restart block
+ *
+ * Handles restarted clock_nanosleep calls
+ */
+static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
+{
+	enum  alarmtimer_type type = restart->nanosleep.index;
+	ktime_t exp;
+	struct timespec __user  *rmtp;
+	struct alarm alarm;
+	int ret = 0;
+
+	exp.tv64 = restart->nanosleep.expires;
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	rmtp = restart->nanosleep.rmtp;
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+
+	/* The other values in restart are already filled in */
+	ret = -ERESTART_RESTARTBLOCK;
+out:
+	return ret;
+}
+
+/**
+ * alarm_timer_nsleep - alarmtimer nanosleep
+ * @which_clock: clockid
+ * @flags: determins abstime or relative
+ * @tsreq: requested sleep time (abs or rel)
+ * @rmtp: remaining sleep time saved
+ *
+ * Handles clock_nanosleep calls against _ALARM clockids
+ */
+static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
+		     struct timespec *tsreq, struct timespec __user *rmtp)
+{
+	enum  alarmtimer_type type = clock2alarm(which_clock);
+	struct alarm alarm;
+	ktime_t exp;
+	int ret = 0;
+	struct restart_block *restart;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	exp = timespec_to_ktime(*tsreq);
+	/* Convert (if necessary) to absolute time */
+	if (flags != TIMER_ABSTIME) {
+		ktime_t now = alarm_bases[type].gettime();
+		exp = ktime_add(now, exp);
+	}
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	/* abs timers don't set remaining time or restart */
+	if (flags == TIMER_ABSTIME) {
+		ret = -ERESTARTNOHAND;
+		goto out;
+	}
+
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+	restart = &current_thread_info()->restart_block;
+	restart->fn = alarm_timer_nsleep_restart;
+	restart->nanosleep.index = type;
+	restart->nanosleep.expires = exp.tv64;
+	restart->nanosleep.rmtp = rmtp;
+	ret = -ERESTART_RESTARTBLOCK;
+
+out:
+	return ret;
+}
 
 /**************************************************************************
  * alarmtimer initialization code
@@ -306,6 +624,18 @@ static int __init alarmtimer_init(void)
 {
 	int error = 0;
 	int i;
+	struct k_clock alarm_clock = {
+		.clock_getres	= alarm_clock_getres,
+		.clock_get	= alarm_clock_get,
+		.timer_create	= alarm_timer_create,
+		.timer_set	= alarm_timer_set,
+		.timer_del	= alarm_timer_del,
+		.timer_get	= alarm_timer_get,
+		.nsleep		= alarm_timer_nsleep,
+	};
+
+	posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
+	posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
 
 	/* Initialize alarm bases */
 	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
-- 
cgit v1.2.3


From 28331a46d88459788c8fca72dbb0415cd7f514c9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 27 Apr 2011 13:47:52 -0400
Subject: NFSv4: Ensure we request the ordinary fileid when doing readdirplus

When readdir() returns a directory entry for the root of a mounted
filesystem, Linux follows the old convention of returning the inode
number of the covered directory (despite newer versions of POSIX declaring
that this is a bug).
To ensure this continues to work, the NFSv4 readdir implementation requests
the 'mounted-on-fileid' from the server.

However, readdirplus also needs to instantiate an inode for this entry, and
for that, we also need to request the real fileid as per this patch.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c        | 31 ++++++++++++++-----------------
 include/linux/nfs_xdr.h |  2 ++
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ba952bdc4d62..7310d2ec5de8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-	uint32_t attrs[2] = {0, 0};
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	uint32_t dircount = readdir->count >> 1;
 	__be32 *p;
 
 	if (readdir->plus) {
 		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
-			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
 		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
 			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
 			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
 			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		dircount >>= 1;
 	}
-	attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
-	attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
-	/* Switch to mounted_on_fileid if the server supports it */
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		attrs[0] &= ~FATTR4_WORD0_FILEID;
-	else
-		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	/* Use mounted_on_fileid only if the server supports it */
+	if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
+		attrs[0] |= FATTR4_WORD0_FILEID;
 
 	p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
 	*p++ = cpu_to_be32(OP_READDIR);
@@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
 			goto out_overflow;
 		xdr_decode_hyper(p, fileid);
 		bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
-		ret = NFS_ATTR_FATTR_FILEID;
+		ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
 	}
 	dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
 	return ret;
@@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 {
 	int status;
 	umode_t fmode = 0;
-	uint64_t fileid;
 	uint32_t type;
 
 	status = decode_attr_type(xdr, bitmap, &type);
@@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
+	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
 	if (status < 0)
 		goto xdr_error;
-	if (status != 0 && !(fattr->valid & status)) {
-		fattr->fileid = fileid;
-		fattr->valid |= status;
-	}
+	fattr->valid |= status;
 
 xdr_error:
 	dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -6411,7 +6406,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
 					entry->server, 1) < 0)
 		goto out_overflow;
-	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+		entry->ino = entry->fattr->mounted_on_fileid;
+	else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
 
 	entry->d_type = DT_UNKNOWN;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78b101e487ea..890dce242639 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -50,6 +50,7 @@ struct nfs_fattr {
 	} du;
 	struct nfs_fsid		fsid;
 	__u64			fileid;
+	__u64			mounted_on_fileid;
 	struct timespec		atime;
 	struct timespec		mtime;
 	struct timespec		ctime;
@@ -83,6 +84,7 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_PRECHANGE	(1U << 18)
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
 #define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
+#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID		(1U << 21)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
-- 
cgit v1.2.3


From 26fc8775b51484d8c0a671198639c6d5ae60533e Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 15 Apr 2011 20:08:19 +0200
Subject: mmc: fix a race between card-detect rescan and clock-gate work
 instances

Currently there is a race in the MMC core between a card-detect
rescan work and the clock-gating work, scheduled from a command
completion. Fix it by removing the dedicated clock-gating mutex
and using the MMC standard locking mechanism instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Simon Horman <horms@verge.net.au>
Cc: Magnus Damm <damm@opensource.se>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/host.c  | 9 ++++-----
 include/linux/mmc/host.h | 1 -
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 461e6a17fb90..2b200c1cfbba 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		spin_unlock_irqrestore(&host->clk_lock, flags);
 		return;
 	}
-	mutex_lock(&host->clk_gate_mutex);
+	mmc_claim_host(host);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (!host->clk_requests) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
 	}
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
+	mmc_release_host(host);
 }
 
 /*
@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 {
 	unsigned long flags;
 
-	mutex_lock(&host->clk_gate_mutex);
+	mmc_claim_host(host);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 	}
 	host->clk_requests++;
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mutex_unlock(&host->clk_gate_mutex);
+	mmc_release_host(host);
 }
 
 /**
@@ -215,7 +215,6 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_gated = false;
 	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
-	mutex_init(&host->clk_gate_mutex);
 }
 
 /**
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bcb793ec7374..eb792cb6d745 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -183,7 +183,6 @@ struct mmc_host {
 	struct work_struct	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
-	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3


From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 20 Apr 2011 09:27:32 +0000
Subject: net: filter: Just In Time compiler for x86-64

In order to speedup packet filtering, here is an implementation of a
JIT compiler for x86_64

It is disabled by default, and must be enabled by the admin.

echo 1 >/proc/sys/net/core/bpf_jit_enable

It uses module_alloc() and module_free() to get memory in the 2GB text
kernel range since we call helpers functions from the generated code.

EAX : BPF A accumulator
EBX : BPF X accumulator
RDI : pointer to skb   (first argument given to JIT function)
RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
r9d : skb->len - skb->data_len (headlen)
r8  : skb->data

To get a trace of generated code, use :

echo 2 >/proc/sys/net/core/bpf_jit_enable

Example of generated code :

# tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24

flen=18 proglen=147 pass=3 image=ffffffffa00b5000
JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60
JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00
JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00
JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be
JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0
JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00
JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00
JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24
JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31
JIT code: ffffffffa00b5090: c0 c9 c3

BPF program is 144 bytes long, so native program is almost same size ;)

(000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 8
(002) ld       [26]
(003) and      #0xffffff00
(004) jeq      #0xc0a81400      jt 16   jf 5
(005) ld       [30]
(006) and      #0xffffff00
(007) jeq      #0xc0a81400      jt 16   jf 17
(008) jeq      #0x806           jt 10   jf 9
(009) jeq      #0x8035          jt 10   jf 17
(010) ld       [28]
(011) and      #0xffffff00
(012) jeq      #0xc0a81400      jt 16   jf 13
(013) ld       [38]
(014) and      #0xffffff00
(015) jeq      #0xc0a81400      jt 16   jf 17
(016) ret      #65535
(017) ret      #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt |  11 +
 MAINTAINERS                  |   1 +
 arch/x86/Kbuild              |   1 +
 arch/x86/Kconfig             |   1 +
 arch/x86/net/Makefile        |   4 +
 arch/x86/net/bpf_jit.S       | 140 +++++++++
 arch/x86/net/bpf_jit_comp.c  | 654 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h       |  76 +++++
 include/linux/netdevice.h    |   1 +
 include/linux/skbuff.h       |   2 +-
 net/Kconfig                  |  13 +
 net/core/filter.c            |  65 +----
 net/core/sysctl_net_core.c   |   9 +
 net/packet/af_packet.c       |   2 +-
 14 files changed, 918 insertions(+), 62 deletions(-)
 create mode 100644 arch/x86/net/Makefile
 create mode 100644 arch/x86/net/bpf_jit.S
 create mode 100644 arch/x86/net/bpf_jit_comp.c

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index cbd05ffc606b..3201a7097e4d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net
 1. /proc/sys/net/core - Network core options
 -------------------------------------------------------
 
+bpf_jit_enable
+--------------
+
+This enables Berkeley Packet Filter Just in Time compiler.
+Currently supported on x86_64 architecture, bpf_jit provides a framework
+to speed packet filtering, the one used by tcpdump/libpcap for example.
+Values :
+	0 - disable the JIT (default value)
+	1 - enable the JIT
+	2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
 rmem_default
 ------------
 
diff --git a/MAINTAINERS b/MAINTAINERS
index b5266ad50167..17c0917a26ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4372,6 +4372,7 @@ S:	Maintained
 F:	net/ipv4/
 F:	net/ipv6/
 F:	include/net/ip*
+F:	arch/x86/net/*
 
 NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
 M:	Paul Moore <paul.moore@hp.com>
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b754..0e9dec6cadd1 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
 obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
+obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..855a1bdc437d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select ARCH_NO_SYSDEV_OPS
+	select HAVE_BPF_JIT if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 000000000000..90568c33ddb0
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 000000000000..66870223f8c5
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
+/* bpf_jit.S : BPF JIT helper functions
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Calling convention :
+ * rdi : skb pointer
+ * esi : offset of byte(s) to fetch in skb (can be scratched)
+ * r8  : copy of skb->data
+ * r9d : hlen = skb->len - skb->data_len
+ */
+#define SKBDATA	%r8
+
+sk_load_word_ind:
+	.globl	sk_load_word_ind
+
+	add	%ebx,%esi	/* offset += X */
+#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
+	js	bpf_error
+
+sk_load_word:
+	.globl	sk_load_word
+
+	mov	%r9d,%eax		# hlen
+	sub	%esi,%eax		# hlen - offset
+	cmp	$3,%eax
+	jle	bpf_slow_path_word
+	mov     (SKBDATA,%rsi),%eax
+	bswap   %eax  			/* ntohl() */
+	ret
+
+
+sk_load_half_ind:
+	.globl sk_load_half_ind
+
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_half:
+	.globl	sk_load_half
+
+	mov	%r9d,%eax
+	sub	%esi,%eax		#	hlen - offset
+	cmp	$1,%eax
+	jle	bpf_slow_path_half
+	movzwl	(SKBDATA,%rsi),%eax
+	rol	$8,%ax			# ntohs()
+	ret
+
+sk_load_byte_ind:
+	.globl sk_load_byte_ind
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_byte:
+	.globl	sk_load_byte
+
+	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
+	jle	bpf_slow_path_byte
+	movzbl	(SKBDATA,%rsi),%eax
+	ret
+
+/**
+ * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
+ *
+ * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
+ * Must preserve A accumulator (%eax)
+ * Inputs : %esi is the offset value, already known positive
+ */
+ENTRY(sk_load_byte_msh)
+	CFI_STARTPROC
+	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
+	jle	bpf_slow_path_byte_msh
+	movzbl	(SKBDATA,%rsi),%ebx
+	and	$15,%bl
+	shl	$2,%bl
+	ret
+	CFI_ENDPROC
+ENDPROC(sk_load_byte_msh)
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
+/* rsi contains offset and can be scratched */
+#define bpf_slow_path_common(LEN)		\
+	push	%rdi;    /* save skb */		\
+	push	%r9;				\
+	push	SKBDATA;			\
+/* rsi already has offset */			\
+	mov	$LEN,%ecx;	/* len */	\
+	lea	-12(%rbp),%rdx;			\
+	call	skb_copy_bits;			\
+	test    %eax,%eax;			\
+	pop	SKBDATA;			\
+	pop	%r9;				\
+	pop	%rdi
+
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	js	bpf_error
+	mov	-12(%rbp),%eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	js	bpf_error
+	mov	-12(%rbp),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	ret
+
+bpf_slow_path_byte_msh:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..bfab3fa10edc
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
+/* bpf_jit_comp.c : BPF JIT compiler
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+
+/*
+ * Conventions :
+ *  EAX : BPF A accumulator
+ *  EBX : BPF X accumulator
+ *  RDI : pointer to skb   (first argument given to JIT function)
+ *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
+ *  ECX,EDX,ESI : scratch registers
+ *  r9d : skb->len - skb->data_len (headlen)
+ *  r8  : skb->data
+ * -8(RBP) : saved RBX value
+ * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+	if (len == 1)
+		*ptr = bytes;
+	else if (len == 2)
+		*(u16 *)ptr = bytes;
+	else {
+		*(u32 *)ptr = bytes;
+		barrier();
+	}
+	return ptr + len;
+}
+
+#define EMIT(bytes, len)	do { prog = emit_code(prog, bytes, len); } while (0)
+
+#define EMIT1(b1)		EMIT(b1, 1)
+#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
+
+#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
+#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+
+static inline bool is_imm8(int value)
+{
+	return value <= 127 && value >= -128;
+}
+
+static inline bool is_near(int offset)
+{
+	return offset <= 127 && offset >= -128;
+}
+
+#define EMIT_JMP(offset)						\
+do {									\
+	if (offset) {							\
+		if (is_near(offset))					\
+			EMIT2(0xeb, offset); /* jmp .+off8 */		\
+		else							\
+			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
+	}								\
+} while (0)
+
+/* list of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define X86_JB  0x72
+#define X86_JAE 0x73
+#define X86_JE  0x74
+#define X86_JNE 0x75
+#define X86_JBE 0x76
+#define X86_JA  0x77
+
+#define EMIT_COND_JMP(op, offset)				\
+do {								\
+	if (is_near(offset))					\
+		EMIT2(op, offset); /* jxx .+off8 */		\
+	else {							\
+		EMIT2(0x0f, op + 0x10);				\
+		EMIT(offset, 4); /* jxx .+off32 */		\
+	}							\
+} while (0)
+
+#define COND_SEL(CODE, TOP, FOP)	\
+	case CODE:			\
+		t_op = TOP;		\
+		f_op = FOP;		\
+		goto cond_branch
+
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG    2 /* ebx is used */
+#define SEEN_MEM     4 /* use mem[] for temporary storage */
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+	set_fs(old_fs);
+}
+
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	u8 temp[64];
+	u8 *prog;
+	unsigned int proglen, oldproglen = 0;
+	int ilen, i;
+	int t_offset, f_offset;
+	u8 t_op, f_op, seen = 0, pass;
+	u8 *image = NULL;
+	u8 *func;
+	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+	unsigned int cleanup_addr; /* epilogue code offset */
+	unsigned int *addrs;
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/* Before first pass, make a rough estimation of addrs[]
+	 * each bpf instruction is translated to less than 64 bytes
+	 */
+	for (proglen = 0, i = 0; i < flen; i++) {
+		proglen += 64;
+		addrs[i] = proglen;
+	}
+	cleanup_addr = proglen; /* epilogue address */
+
+	for (pass = 0; pass < 10; pass++) {
+		/* no prologue/epilogue for trivial filters (RET something) */
+		proglen = 0;
+		prog = temp;
+
+		if (seen) {
+			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
+			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
+			/* note : must save %rbx in case bpf_error is hit */
+			if (seen & (SEEN_XREG | SEEN_DATAREF))
+				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
+			if (seen & SEEN_XREG)
+				CLEAR_X(); /* make sure we dont leek kernel memory */
+
+			/*
+			 * If this filter needs to access skb data,
+			 * loads r9 and r8 with :
+			 *  r9 = skb->len - skb->data_len
+			 *  r8 = skb->data
+			 */
+			if (seen & SEEN_DATAREF) {
+				if (offsetof(struct sk_buff, len) <= 127)
+					/* mov    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
+				else {
+					/* mov    off32(%rdi),%r9d */
+					EMIT3(0x44, 0x8b, 0x8f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				if (is_imm8(offsetof(struct sk_buff, data_len)))
+					/* sub    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
+				else {
+					EMIT3(0x44, 0x2b, 0x8f);
+					EMIT(offsetof(struct sk_buff, data_len), 4);
+				}
+
+				if (is_imm8(offsetof(struct sk_buff, data)))
+					/* mov off8(%rdi),%r8 */
+					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
+				else {
+					/* mov off32(%rdi),%r8 */
+					EMIT3(0x4c, 0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, data), 4);
+				}
+			}
+		}
+
+		switch (filter[0].code) {
+		case BPF_S_RET_K:
+		case BPF_S_LD_W_LEN:
+		case BPF_S_ANC_PROTOCOL:
+		case BPF_S_ANC_IFINDEX:
+		case BPF_S_ANC_MARK:
+		case BPF_S_ANC_RXHASH:
+		case BPF_S_ANC_CPU:
+		case BPF_S_ANC_QUEUE:
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+			/* first instruction sets A register (or is RET 'constant') */
+			break;
+		default:
+			/* make sure we dont leak kernel information to user */
+			CLEAR_A(); /* A = 0 */
+		}
+
+		for (i = 0; i < flen; i++) {
+			unsigned int K = filter[i].k;
+
+			switch (filter[i].code) {
+			case BPF_S_ALU_ADD_X: /* A += X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
+				break;
+			case BPF_S_ALU_ADD_K: /* A += K; */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
+				else
+					EMIT1_off32(0x05, K);	/* add imm32,%eax */
+				break;
+			case BPF_S_ALU_SUB_X: /* A -= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
+				break;
+			case BPF_S_ALU_SUB_K: /* A -= K */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
+				else
+					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
+				break;
+			case BPF_S_ALU_MUL_X: /* A *= X; */
+				seen |= SEEN_XREG;
+				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
+				break;
+			case BPF_S_ALU_MUL_K: /* A *= K */
+				if (is_imm8(K))
+					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
+				else {
+					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
+					EMIT(K, 4);
+				}
+				break;
+			case BPF_S_ALU_DIV_X: /* A /= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 != -1)
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
+				else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
+				}
+				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
+				break;
+			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
+				EMIT(K, 4);
+				EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+				break;
+			case BPF_S_ALU_AND_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
+				break;
+			case BPF_S_ALU_AND_K:
+				if (K >= 0xFFFFFF00) {
+					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
+				} else if (K >= 0xFFFF0000) {
+					EMIT2(0x66, 0x25);	/* and imm16,%ax */
+					EMIT2(K, 2);
+				} else {
+					EMIT1_off32(0x25, K);	/* and imm32,%eax */
+				}
+				break;
+			case BPF_S_ALU_OR_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
+				break;
+			case BPF_S_ALU_OR_K:
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
+				else
+					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
+				break;
+			case BPF_S_ALU_LSH_X: /* A <<= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
+				break;
+			case BPF_S_ALU_LSH_K:
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe0); /* shl %eax */
+				else
+					EMIT3(0xc1, 0xe0, K);
+				break;
+			case BPF_S_ALU_RSH_X: /* A >>= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
+				break;
+			case BPF_S_ALU_RSH_K: /* A >>= K; */
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe8); /* shr %eax */
+				else
+					EMIT3(0xc1, 0xe8, K);
+				break;
+			case BPF_S_ALU_NEG:
+				EMIT2(0xf7, 0xd8);		/* neg %eax */
+				break;
+			case BPF_S_RET_K:
+				if (!K) {
+					if (pc_ret0 == -1)
+						pc_ret0 = i;
+					CLEAR_A();
+				} else {
+					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
+				}
+				/* fallinto */
+			case BPF_S_RET_A:
+				if (seen) {
+					if (i != flen - 1) {
+						EMIT_JMP(cleanup_addr - addrs[i]);
+						break;
+					}
+					if (seen & SEEN_XREG)
+						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
+					EMIT1(0xc9);		/* leaveq */
+				}
+				EMIT1(0xc3);		/* ret */
+				break;
+			case BPF_S_MISC_TAX: /* X = A */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
+				break;
+			case BPF_S_MISC_TXA: /* A = X */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
+				break;
+			case BPF_S_LD_IMM: /* A = K */
+				if (!K)
+					CLEAR_A();
+				else
+					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+				break;
+			case BPF_S_LDX_IMM: /* X = K */
+				seen |= SEEN_XREG;
+				if (!K)
+					CLEAR_X();
+				else
+					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
+				break;
+			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
+				seen |= SEEN_MEM;
+				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
+				seen |= SEEN_MEM;
+				EMIT3(0x89, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x89, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_LD_W_LEN: /*	A = skb->len; */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov    off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_LDX_W_LEN: /* X = skb->len; */
+				seen |= SEEN_XREG;
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov off8(%rdi),%ebx */
+					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x9f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+				if (is_imm8(offsetof(struct sk_buff, protocol))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, protocol), 4);
+				}
+				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
+				break;
+			case BPF_S_ANC_IFINDEX:
+				if (is_imm8(offsetof(struct sk_buff, dev))) {
+					/* movq off8(%rdi),%rax */
+					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
+				} else {
+					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
+					EMIT(offsetof(struct sk_buff, dev), 4);
+				}
+				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
+				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
+				EMIT(offsetof(struct net_device, ifindex), 4);
+				break;
+			case BPF_S_ANC_MARK:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+				if (is_imm8(offsetof(struct sk_buff, mark))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, mark), 4);
+				}
+				break;
+			case BPF_S_ANC_RXHASH:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+				if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, rxhash), 4);
+				}
+				break;
+			case BPF_S_ANC_QUEUE:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
+				}
+				break;
+			case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
+				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
+#else
+				CLEAR_A();
+#endif
+				break;
+			case BPF_S_LD_W_ABS:
+				func = sk_load_word;
+common_load:			seen |= SEEN_DATAREF;
+				if ((int)K < 0)
+					goto out;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call */
+				break;
+			case BPF_S_LD_H_ABS:
+				func = sk_load_half;
+				goto common_load;
+			case BPF_S_LD_B_ABS:
+				func = sk_load_byte;
+				goto common_load;
+			case BPF_S_LDX_B_MSH:
+				if ((int)K < 0) {
+					if (pc_ret0 != -1) {
+						EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+						break;
+					}
+					CLEAR_A();
+					EMIT_JMP(cleanup_addr - addrs[i]);
+					break;
+				}
+				seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
+				break;
+			case BPF_S_LD_W_IND:
+				func = sk_load_word_ind;
+common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
+				break;
+			case BPF_S_LD_H_IND:
+				func = sk_load_half_ind;
+				goto common_load_ind;
+			case BPF_S_LD_B_IND:
+				func = sk_load_byte_ind;
+				goto common_load_ind;
+			case BPF_S_JMP_JA:
+				t_offset = addrs[i + K] - addrs[i];
+				EMIT_JMP(t_offset);
+				break;
+			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
+			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
+
+cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
+				t_offset = addrs[i + filter[i].jt] - addrs[i];
+
+				/* same targets, can avoid doing the test :) */
+				if (filter[i].jt == filter[i].jf) {
+					EMIT_JMP(t_offset);
+					break;
+				}
+
+				switch (filter[i].code) {
+				case BPF_S_JMP_JGT_X:
+				case BPF_S_JMP_JGE_X:
+				case BPF_S_JMP_JEQ_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
+					break;
+				case BPF_S_JMP_JSET_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
+					break;
+				case BPF_S_JMP_JEQ_K:
+					if (K == 0) {
+						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
+						break;
+					}
+				case BPF_S_JMP_JGT_K:
+				case BPF_S_JMP_JGE_K:
+					if (K <= 127)
+						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+					else
+						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
+					break;
+				case BPF_S_JMP_JSET_K:
+					if (K <= 0xFF)
+						EMIT2(0xa8, K); /* test imm8,%al */
+					else if (!(K & 0xFFFF00FF))
+						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
+					else if (K <= 0xFFFF) {
+						EMIT2(0x66, 0xa9); /* test imm16,%ax */
+						EMIT(K, 2);
+					} else {
+						EMIT1_off32(0xa9, K); /* test imm32,%eax */
+					}
+					break;
+				}
+				if (filter[i].jt != 0) {
+					if (filter[i].jf)
+						t_offset += is_near(f_offset) ? 2 : 6;
+					EMIT_COND_JMP(t_op, t_offset);
+					if (filter[i].jf)
+						EMIT_JMP(f_offset);
+					break;
+				}
+				EMIT_COND_JMP(f_op, f_offset);
+				break;
+			default:
+				/* hmm, too complex filter, give up with jit compiler */
+				goto out;
+			}
+			ilen = prog - temp;
+			if (image) {
+				if (unlikely(proglen + ilen > oldproglen)) {
+					pr_err("bpb_jit_compile fatal error\n");
+					kfree(addrs);
+					module_free(NULL, image);
+					return;
+				}
+				memcpy(image + proglen, temp, ilen);
+			}
+			proglen += ilen;
+			addrs[i] = proglen;
+			prog = temp;
+		}
+		/* last bpf instruction is always a RET :
+		 * use it to give the cleanup instruction(s) addr
+		 */
+		cleanup_addr = proglen - 1; /* ret */
+		if (seen)
+			cleanup_addr -= 1; /* leaveq */
+		if (seen & SEEN_XREG)
+			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
+
+		if (image) {
+			WARN_ON(proglen != oldproglen);
+			break;
+		}
+		if (proglen == oldproglen) {
+			image = module_alloc(max_t(unsigned int,
+						   proglen,
+						   sizeof(struct work_struct)));
+			if (!image)
+				goto out;
+		}
+		oldproglen = proglen;
+	}
+	if (bpf_jit_enable > 1)
+		pr_err("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
+				       16, 1, image, proglen, false);
+
+		bpf_flush_icache(image, image + proglen);
+
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45266b75409a..4609b85e559d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -135,6 +135,8 @@ struct sk_filter
 {
 	atomic_t		refcnt;
 	unsigned int         	len;	/* Number of filter blocks */
+	unsigned int		(*bpf_func)(const struct sk_buff *skb,
+					    const struct sock_filter *filter);
 	struct rcu_head		rcu;
 	struct sock_filter     	insns[0];
 };
@@ -153,6 +155,80 @@ extern unsigned int sk_run_filter(const struct sk_buff *skb,
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
+
+#ifdef CONFIG_BPF_JIT
+extern void bpf_jit_compile(struct sk_filter *fp);
+extern void bpf_jit_free(struct sk_filter *fp);
+#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
+#else
+static inline void bpf_jit_compile(struct sk_filter *fp)
+{
+}
+static inline void bpf_jit_free(struct sk_filter *fp)
+{
+}
+#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
+#endif
+
+enum {
+	BPF_S_RET_K = 1,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+	/* Ancillary data */
+	BPF_S_ANC_PROTOCOL,
+	BPF_S_ANC_PKTTYPE,
+	BPF_S_ANC_IFINDEX,
+	BPF_S_ANC_NLATTR,
+	BPF_S_ANC_NLATTR_NEST,
+	BPF_S_ANC_MARK,
+	BPF_S_ANC_QUEUE,
+	BPF_S_ANC_HATYPE,
+	BPF_S_ANC_RXHASH,
+	BPF_S_ANC_CPU,
+};
+
 #endif /* __KERNEL__ */
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb8178ab3c52..364bcf212f71 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2514,6 +2514,7 @@ extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		bpf_jit_enable;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d0ae90af0b40..79aafbbf430a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -391,8 +391,8 @@ struct sk_buff {
 
 	__u32			rxhash;
 
+	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
-	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
diff --git a/net/Kconfig b/net/Kconfig
index 79cabf1ee68b..745fb02d2fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,19 @@ config XPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config HAVE_BPF_JIT
+	bool
+
+config BPF_JIT
+	bool "enable BPF Just In Time compiler"
+	depends on HAVE_BPF_JIT
+	---help---
+	  Berkeley Packet Filter filtering capabilities are normally handled
+	  by an interpreter. This option allows kernel to generate a native
+	  code when filter is loaded in memory. This should speedup
+	  packet sniffing (libpcap/tcpdump). Note : Admin should enable
+	  this feature changing /proc/sys/net/core/bpf_jit_enable
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/filter.c b/net/core/filter.c
index afb8afb066bb..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,65 +39,6 @@
 #include <linux/filter.h>
 #include <linux/reciprocal_div.h>
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-};
-
 /* No hurry in this branch */
 static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 {
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
+		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
+	bpf_jit_free(fp);
 	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
+	fp->bpf_func = sk_run_filter;
 
 	err = sk_chk_filter(fp->insns, fp->len);
 	if (err) {
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}
 
+	bpf_jit_compile(fp);
+
 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b6095fdc4..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_BPF_JIT
+	{
+		.procname	= "bpf_jit_enable",
+		.data		= &bpf_jit_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
 		.data		= &netdev_tstamp_prequeue,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5362e96022b..549527bca87a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns);
+		res = SK_RUN_FILTER(filter, skb);
 	rcu_read_unlock();
 
 	return res;
-- 
cgit v1.2.3


From e6fa16ab9c1e9b344428e6fea4d29e3cc4b28fb0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 20:59:41 +0200
Subject: signal: sigprocmask() should do retarget_shared_pending()

In short, almost every changing of current->blocked is wrong, or at least
can lead to the unexpected results.

For example. Two threads T1 and T2, T1 sleeps in sigtimedwait/pause/etc.
kill(tgid, SIG) can pick T2 for TIF_SIGPENDING. If T2 calls sigprocmask()
and blocks SIG before it notices the pending signal, nobody else can handle
this pending shared signal.

I am not sure this is bug, but at least this looks strange imho. T1 should
not sleep forever, there is a signal which should wake it up.

This patch moves the code which actually changes ->blocked into the new
helper, set_current_blocked() and changes this code to call
retarget_shared_pending() as exit_signals() does. We should only care about
the signals we just blocked, we use "newset & ~current->blocked" as a mask.

We do not check !sigisemptyset(newblocked), retarget_shared_pending() is
cheap unless mask & shared_pending.

Note: for this particular case we could simply change sigprocmask() to
return -EINTR if signal_pending(), but then we should change other callers
and, more importantly, if we need this fix then set_current_blocked() will
have more callers and some of them can't restart. See the next patch as a
random example.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/signal.h |  1 +
 kernel/signal.c        | 29 ++++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fcd2b14b1932..ba009c167275 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -243,6 +243,7 @@ extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
 struct pt_regs;
diff --git a/kernel/signal.c b/kernel/signal.c
index e8308e3238c1..8aa3a2e226af 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2299,6 +2299,29 @@ long do_no_restart_syscall(struct restart_block *param)
 	return -EINTR;
 }
 
+/**
+ * set_current_blocked - change current->blocked mask
+ * @newset: new mask
+ *
+ * It is wrong to change ->blocked directly, this helper should be used
+ * to ensure the process can't miss a shared signal we are going to block.
+ */
+void set_current_blocked(const sigset_t *newset)
+{
+	struct task_struct *tsk = current;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
+		sigset_t newblocked;
+		/* A set of now blocked but previously unblocked signals. */
+		signandsets(&newblocked, newset, &current->blocked);
+		retarget_shared_pending(tsk, &newblocked);
+	}
+	tsk->blocked = *newset;
+	recalc_sigpending();
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
+
 /*
  * This is also useful for kernel threads that want to temporarily
  * (or permanently) block certain signals.
@@ -2330,11 +2353,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 		return -EINVAL;
 	}
 
-	spin_lock_irq(&tsk->sighand->siglock);
-	tsk->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&tsk->sighand->siglock);
-
+	set_current_blocked(&newset);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 943df1485a8ff0e600729e082e568ece04d4de9e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 21:44:14 +0200
Subject: signal: introduce do_sigtimedwait() to factor out compat/native code

Factor out the common code in sys_rt_sigtimedwait/compat_sys_rt_sigtimedwait
to the new helper, do_sigtimedwait().

Add the comment to document the extra tick we add to timespec_to_jiffies(ts),
thanks to Linus who explained this to me.

Perhaps it would be better to move compat_sys_rt_sigtimedwait() into
signal.c under CONFIG_COMPAT, then we can make do_sigtimedwait() static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
---
 include/linux/signal.h |   2 +
 kernel/compat.c        |  41 ++++--------------
 kernel/signal.c        | 110 +++++++++++++++++++++++++++++--------------------
 3 files changed, 74 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ba009c167275..782546d661ba 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -242,6 +242,8 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
+extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
+				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
diff --git a/kernel/compat.c b/kernel/compat.c
index 06cbb0619531..9214dcd087b7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -890,10 +890,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 {
 	compat_sigset_t s32;
 	sigset_t s;
-	int sig;
 	struct timespec t;
 	siginfo_t info;
-	long ret, timeout;
+	long ret;
 
 	if (sigsetsize != sizeof(sigset_t))
 		return -EINVAL;
@@ -901,45 +900,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
 		return -EFAULT;
 	sigset_from_compat(&s, &s32);
-	sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&s);
 
-	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (uts) {
-		if (get_compat_timespec (&t, uts))
+		if (get_compat_timespec(&t, uts))
 			return -EFAULT;
-		if (!timespec_valid(&t))
-			return -EINVAL;
-		timeout = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &s, &info);
-	if (!sig && timeout) {
-		current->real_blocked = current->blocked;
-		sigandsets(&current->blocked, &current->blocked, &s);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-
-		timeout = schedule_timeout_interruptible(timeout);
-
-		spin_lock_irq(&current->sighand->siglock);
-		sig = dequeue_signal(current, &s, &info);
-		current->blocked = current->real_blocked;
-		siginitset(&current->real_blocked, 0);
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user32(uinfo, &info))
-				ret = -EFAULT;
-		}
-	} else {
-		ret = timeout?-EINTR:-EAGAIN;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user32(uinfo, &info))
+			ret = -EFAULT;
 	}
+
 	return ret;
 
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index c734619554f6..1ab89f677424 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2503,6 +2503,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 
 #endif
 
+/**
+ *  do_sigtimedwait - wait for queued signals specified in @which
+ *  @which: queued signals to wait for
+ *  @info: if non-null, the signal's siginfo is returned here
+ *  @ts: upper bound on process time suspension
+ */
+int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+			const struct timespec *ts)
+{
+	struct task_struct *tsk = current;
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	sigset_t mask = *which;
+	int sig;
+
+	if (ts) {
+		if (!timespec_valid(ts))
+			return -EINVAL;
+		timeout = timespec_to_jiffies(ts);
+		/*
+		 * We can be close to the next tick, add another one
+		 * to ensure we will wait at least the time asked for.
+		 */
+		if (ts->tv_sec || ts->tv_nsec)
+			timeout++;
+	}
+
+	/*
+	 * Invert the set of allowed signals to get those we want to block.
+	 */
+	sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	signotset(&mask);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	sig = dequeue_signal(tsk, &mask, info);
+	if (!sig && timeout) {
+		/*
+		 * None ready, temporarily unblock those we're interested
+		 * while we are sleeping in so that we'll be awakened when
+		 * they arrive.
+		 */
+		tsk->real_blocked = tsk->blocked;
+		sigandsets(&tsk->blocked, &tsk->blocked, &mask);
+		recalc_sigpending();
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		timeout = schedule_timeout_interruptible(timeout);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		sig = dequeue_signal(tsk, &mask, info);
+		tsk->blocked = tsk->real_blocked;
+		siginitset(&tsk->real_blocked, 0);
+		recalc_sigpending();
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (sig)
+		return sig;
+	return timeout ? -EINTR : -EAGAIN;
+}
+
 /**
  *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
  *			in @uthese
@@ -2515,11 +2575,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
 		size_t, sigsetsize)
 {
-	int ret, sig;
 	sigset_t these;
 	struct timespec ts;
 	siginfo_t info;
-	long timeout;
+	int ret;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
 	if (sigsetsize != sizeof(sigset_t))
@@ -2528,55 +2587,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 	if (copy_from_user(&these, uthese, sizeof(these)))
 		return -EFAULT;
 
-	/*
-	 * Invert the set of allowed signals to get those we
-	 * want to block.
-	 */
-	sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&these);
-
-	timeout = MAX_SCHEDULE_TIMEOUT;
 	if (uts) {
 		if (copy_from_user(&ts, uts, sizeof(ts)))
 			return -EFAULT;
-		if (!timespec_valid(&ts))
-			return -EINVAL;
-		timeout = timespec_to_jiffies(&ts) + (ts.tv_sec || ts.tv_nsec);
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &these, &info);
-	if (!sig && timeout) {
-		/*
-		 * None ready -- temporarily unblock those we're
-		 * interested while we are sleeping in so that we'll
-		 * be awakened when they arrive.
-		 */
-		current->real_blocked = current->blocked;
-		sigandsets(&current->blocked, &current->blocked, &these);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-
-		timeout = schedule_timeout_interruptible(timeout);
-
-		spin_lock_irq(&current->sighand->siglock);
-		sig = dequeue_signal(current, &these, &info);
-		current->blocked = current->real_blocked;
-		siginitset(&current->real_blocked, 0);
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user(uinfo, &info))
-				ret = -EFAULT;
-		}
-	} else {
-		ret = -EAGAIN;
-		if (timeout)
-			ret = -EINTR;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user(uinfo, &info))
+			ret = -EFAULT;
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 702a5073fdb71eb29cd4912575289fb5044c1894 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 22:01:27 +0200
Subject: signal: rename signandsets() to sigandnsets()

As Tejun and Linus pointed out, "nand" is the wrong name for "x & ~y",
it should be "andn". Rename signandsets() as suggested.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 include/linux/signal.h | 6 +++---
 kernel/signal.c        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 782546d661ba..7e2526374fd7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -123,13 +123,13 @@ _SIG_SET_BINOP(sigorsets, _sig_or)
 #define _sig_and(x,y)	((x) & (y))
 _SIG_SET_BINOP(sigandsets, _sig_and)
 
-#define _sig_nand(x,y)	((x) & ~(y))
-_SIG_SET_BINOP(signandsets, _sig_nand)
+#define _sig_andn(x,y)	((x) & ~(y))
+_SIG_SET_BINOP(sigandnsets, _sig_andn)
 
 #undef _SIG_SET_BINOP
 #undef _sig_or
 #undef _sig_and
-#undef _sig_nand
+#undef _sig_andn
 
 #define _SIG_SET_OP(name, op)						\
 static inline void name(sigset_t *set)					\
diff --git a/kernel/signal.c b/kernel/signal.c
index 4d97e11d7672..e7ee4e642c5a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -669,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
 	if (sigisemptyset(&m))
 		return 0;
 
-	signandsets(&s->signal, &s->signal, mask);
+	sigandnsets(&s->signal, &s->signal, mask);
 	list_for_each_entry_safe(q, n, &s->list, list) {
 		if (sigismember(mask, q->info.si_signo)) {
 			list_del_init(&q->list);
@@ -2304,7 +2304,7 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
 	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
 		sigset_t newblocked;
 		/* A set of now blocked but previously unblocked signals. */
-		signandsets(&newblocked, newset, &current->blocked);
+		sigandnsets(&newblocked, newset, &current->blocked);
 		retarget_shared_pending(tsk, &newblocked);
 	}
 	tsk->blocked = *newset;
@@ -2349,7 +2349,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 		sigorsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_UNBLOCK:
-		signandsets(&newset, &tsk->blocked, set);
+		sigandnsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_SETMASK:
 		newset = *set;
-- 
cgit v1.2.3


From 78f11a255749d09025f54d4e2df4fbcb031530e2 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 27 Apr 2011 15:26:45 -0700
Subject: mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups

The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).

So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).

After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration).  So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Caspar Zhang <bugs@casparzhang.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/mm.h      |  3 ++-
 mm/huge_memory.c        | 43 ++++++++++++++++++++++++-------------------
 3 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index df29c8fde36b..8847c8c29791 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 					 unsigned long end,
 					 long adjust_next)
 {
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		return;
 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 692dbae6ffa7..2348db26bc3d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
 /*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 470dcda10add..83326ad66d9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1408,6 +1408,9 @@ out:
 	return ret;
 }
 
+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+
 int hugepage_madvise(struct vm_area_struct *vma,
 		     unsigned long *vm_flags, int advice)
 {
@@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_HUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
@@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_NOHUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_HUGEPAGE;
 		*vm_flags |= VM_NOHUGEPAGE;
@@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_file || vma->vm_ops)
+	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm,
 	    (vma->vm_flags & VM_NOHUGEPAGE))
 		goto out;
 
-	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			progress++;
 			continue;
 		}
-		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+		if (!vma->anon_vma || vma->vm_ops)
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-
-		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+		/*
+		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
+		 * must be true too, verify it here.
+		 */
+		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+			  vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;
-- 
cgit v1.2.3


From 5a3ea8782c63d3501cb764c176f153c0d9a400e1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Apr 2011 15:55:52 -0400
Subject: flex_array: flex_array_prealloc takes a number of elements, not an
 end

Change flex_array_prealloc to take the number of elements for which space
should be allocated instead of the last (inclusive) element. Users
and documentation are updated accordingly.  flex_arrays got introduced before
they had users.  When folks started using it, they ended up needing a
different API than was coded up originally.  This swaps over to the API that
folks apparently need.

Based-on-patch-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Tested-by: Chris Richards <gizmo@giz-works.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: stable@kernel.org [2.6.38+]
---
 Documentation/flexible-arrays.txt |  4 ++--
 include/linux/flex_array.h        |  2 +-
 lib/flex_array.c                  | 13 ++++++++-----
 security/selinux/ss/policydb.c    |  6 +++---
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
index cb8a3a00cc92..df904aec9904 100644
--- a/Documentation/flexible-arrays.txt
+++ b/Documentation/flexible-arrays.txt
@@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before
 entering atomic context, using:
 
     int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int end, gfp_t flags);
+			    unsigned int nr_elements, gfp_t flags);
 
 This function will ensure that memory for the elements indexed in the range
-defined by start and end has been allocated.  Thereafter, a
+defined by start and nr_elements has been allocated.  Thereafter, a
 flex_array_put() call on an element in that range is guaranteed not to
 block.
 
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 70e4efabe0fb..ebeb2f3ad068 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -61,7 +61,7 @@ struct flex_array {
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-		unsigned int end, gfp_t flags);
+		unsigned int nr_elements, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
diff --git a/lib/flex_array.c b/lib/flex_array.c
index c0ea40ba2082..0c33b24498ba 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear);
 
 /**
  * flex_array_prealloc - guarantee that array space exists
- * @fa:		the flex array for which to preallocate parts
- * @start:	index of first array element for which space is allocated
- * @end:	index of last (inclusive) element for which space is allocated
- * @flags:	page allocation flags
+ * @fa:			the flex array for which to preallocate parts
+ * @start:		index of first array element for which space is allocated
+ * @nr_elements:	number of elements for which space is allocated
+ * @flags:		page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -245,13 +245,16 @@ EXPORT_SYMBOL(flex_array_clear);
  * Locking must be provided by the caller.
  */
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int end, gfp_t flags)
+			unsigned int nr_elements, gfp_t flags)
 {
 	int start_part;
 	int end_part;
 	int part_nr;
+	unsigned int end;
 	struct flex_array_part *part;
 
+	end = start + nr_elements - 1;
+
 	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
 		return -ENOSPC;
 	if (elements_fit_in_base(fa))
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 549120c56edd..102e9ec1b77a 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -545,7 +545,7 @@ static int policydb_index(struct policydb *p)
 		goto out;
 
 	rc = flex_array_prealloc(p->type_val_to_struct_array, 0,
-				 p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO);
+				 p->p_types.nprim, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto out;
 
@@ -562,7 +562,7 @@ static int policydb_index(struct policydb *p)
 			goto out;
 
 		rc = flex_array_prealloc(p->sym_val_to_name[i],
-					 0, p->symtab[i].nprim - 1,
+					 0, p->symtab[i].nprim,
 					 GFP_KERNEL | __GFP_ZERO);
 		if (rc)
 			goto out;
@@ -2439,7 +2439,7 @@ int policydb_read(struct policydb *p, void *fp)
 		goto bad;
 
 	/* preallocate so we don't have to worry about the put ever failing */
-	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1,
+	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim,
 				 GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto bad;
-- 
cgit v1.2.3


From 68972efa657040f891c7eda07c7da8c8dd576788 Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Thu, 28 Apr 2011 05:43:37 +0000
Subject: usbnet: Resubmit interrupt URB if device is open

Resubmit interrupt URB if device is open.  Use a flag set in
usbnet_open() to determine this state.  Also kill and free
interrupt URB in usbnet_disconnect().

[Rebased off git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git]

Signed-off-by: Paul Stewart <pstew@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 8 ++++++++
 include/linux/usb/usbnet.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 069c1cf0fdf7..009bba3d753e 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -736,6 +736,7 @@ int usbnet_open (struct net_device *net)
 		}
 	}
 
+	set_bit(EVENT_DEV_OPEN, &dev->flags);
 	netif_start_queue (net);
 	netif_info(dev, ifup, dev->net,
 		   "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n",
@@ -1259,6 +1260,9 @@ void usbnet_disconnect (struct usb_interface *intf)
 	if (dev->driver_info->unbind)
 		dev->driver_info->unbind (dev, intf);
 
+	usb_kill_urb(dev->interrupt);
+	usb_free_urb(dev->interrupt);
+
 	free_netdev(net);
 	usb_put_dev (xdev);
 }
@@ -1498,6 +1502,10 @@ int usbnet_resume (struct usb_interface *intf)
 	int                     retval;
 
 	if (!--dev->suspend_count) {
+		/* resume interrupt URBs */
+		if (dev->interrupt && test_bit(EVENT_DEV_OPEN, &dev->flags))
+			usb_submit_urb(dev->interrupt, GFP_NOIO);
+
 		spin_lock_irq(&dev->txq.lock);
 		while ((res = usb_get_from_anchor(&dev->deferred))) {
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0e1855079fbb..605b0aa8d852 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -68,6 +68,7 @@ struct usbnet {
 #		define EVENT_RX_PAUSED	5
 #		define EVENT_DEV_WAKING 6
 #		define EVENT_DEV_ASLEEP 7
+#		define EVENT_DEV_OPEN	8
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3


From 5d30b10bd68df007e7ae21e77d1e0ce184b53040 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Apr 2011 15:55:52 -0400
Subject: flex_array: flex_array_prealloc takes a number of elements, not an
 end

Change flex_array_prealloc to take the number of elements for which space
should be allocated instead of the last (inclusive) element. Users
and documentation are updated accordingly.  flex_arrays got introduced before
they had users.  When folks started using it, they ended up needing a
different API than was coded up originally.  This swaps over to the API that
folks apparently need.

Based-on-patch-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Tested-by: Chris Richards <gizmo@giz-works.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: stable@kernel.org [2.6.38+]
---
 Documentation/flexible-arrays.txt |  4 ++--
 include/linux/flex_array.h        |  2 +-
 lib/flex_array.c                  | 13 ++++++++-----
 security/selinux/ss/policydb.c    |  6 +++---
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
index cb8a3a00cc92..df904aec9904 100644
--- a/Documentation/flexible-arrays.txt
+++ b/Documentation/flexible-arrays.txt
@@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before
 entering atomic context, using:
 
     int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int end, gfp_t flags);
+			    unsigned int nr_elements, gfp_t flags);
 
 This function will ensure that memory for the elements indexed in the range
-defined by start and end has been allocated.  Thereafter, a
+defined by start and nr_elements has been allocated.  Thereafter, a
 flex_array_put() call on an element in that range is guaranteed not to
 block.
 
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 70e4efabe0fb..ebeb2f3ad068 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -61,7 +61,7 @@ struct flex_array {
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-		unsigned int end, gfp_t flags);
+		unsigned int nr_elements, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
diff --git a/lib/flex_array.c b/lib/flex_array.c
index c0ea40ba2082..0c33b24498ba 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear);
 
 /**
  * flex_array_prealloc - guarantee that array space exists
- * @fa:		the flex array for which to preallocate parts
- * @start:	index of first array element for which space is allocated
- * @end:	index of last (inclusive) element for which space is allocated
- * @flags:	page allocation flags
+ * @fa:			the flex array for which to preallocate parts
+ * @start:		index of first array element for which space is allocated
+ * @nr_elements:	number of elements for which space is allocated
+ * @flags:		page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -245,13 +245,16 @@ EXPORT_SYMBOL(flex_array_clear);
  * Locking must be provided by the caller.
  */
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int end, gfp_t flags)
+			unsigned int nr_elements, gfp_t flags)
 {
 	int start_part;
 	int end_part;
 	int part_nr;
+	unsigned int end;
 	struct flex_array_part *part;
 
+	end = start + nr_elements - 1;
+
 	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
 		return -ENOSPC;
 	if (elements_fit_in_base(fa))
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index e7b850ad57ee..e6e7ce0d3d55 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -502,7 +502,7 @@ static int policydb_index(struct policydb *p)
 		goto out;
 
 	rc = flex_array_prealloc(p->type_val_to_struct_array, 0,
-				 p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO);
+				 p->p_types.nprim, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto out;
 
@@ -519,7 +519,7 @@ static int policydb_index(struct policydb *p)
 			goto out;
 
 		rc = flex_array_prealloc(p->sym_val_to_name[i],
-					 0, p->symtab[i].nprim - 1,
+					 0, p->symtab[i].nprim,
 					 GFP_KERNEL | __GFP_ZERO);
 		if (rc)
 			goto out;
@@ -2375,7 +2375,7 @@ int policydb_read(struct policydb *p, void *fp)
 		goto bad;
 
 	/* preallocate so we don't have to worry about the put ever failing */
-	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1,
+	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim,
 				 GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto bad;
-- 
cgit v1.2.3


From 1742f183fc218798dab6fcf0ded25b6608fc0a48 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 Apr 2011 06:31:16 +0000
Subject: net: fix netdev_increment_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify and fix netdev_increment_features() to conform to what is
stated in netdevice.h comments about NETIF_F_ONE_FOR_ALL.
Include FCoE segmentation and VLAN-challedged flags in computation.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++++-
 net/core/dev.c            | 35 +++++++++++------------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 364bcf212f71..b7d0304762aa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1106,7 +1106,12 @@ struct net_device {
 	 */
 #define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
 				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST)
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
+	/*
+	 * If one device doesn't support one of these features, then disable it
+	 * for all in netdev_increment_features.
+	 */
+#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
 
 	/* changeable features with no special hardware requirements */
 #define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3bbb4c2ce92e..7db99b52679f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6164,33 +6164,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  */
 u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 {
-	/* If device needs checksumming, downgrade to it. */
-	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
-		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
-	else if (mask & NETIF_F_ALL_CSUM) {
-		/* If one device supports v4/v6 checksumming, set for all. */
-		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
-		    !(all & NETIF_F_GEN_CSUM)) {
-			all &= ~NETIF_F_ALL_CSUM;
-			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-		}
-
-		/* If one device supports hw checksumming, set for all. */
-		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
-			all &= ~NETIF_F_ALL_CSUM;
-			all |= NETIF_F_HW_CSUM;
-		}
-	}
+	if (mask & NETIF_F_GEN_CSUM)
+		mask |= NETIF_F_ALL_CSUM;
+	mask |= NETIF_F_VLAN_CHALLENGED;
 
-	/* If device can't no cache copy, don't do for all */
-	if (!(one & NETIF_F_NOCACHE_COPY))
-		all &= ~NETIF_F_NOCACHE_COPY;
+	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+	all &= one | ~NETIF_F_ALL_FOR_ALL;
 
-	one |= NETIF_F_ALL_CSUM;
+	/* If device needs checksumming, downgrade to it. */
+	if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
+		all &= ~NETIF_F_NO_CSUM;
 
-	one |= all & NETIF_F_ONE_FOR_ALL;
-	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
-	all |= one & mask & NETIF_F_ONE_FOR_ALL;
+	/* If one device supports hw checksumming, set for all. */
+	if (all & NETIF_F_GEN_CSUM)
+		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
 
 	return all;
 }
-- 
cgit v1.2.3


From fa2bd7ff9247f4218dfc907db14d000cd7edd862 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 Apr 2011 06:31:16 +0000
Subject: net: allow user to change NETIF_F_HIGHDMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NETIF_F_HIGHDMA is like any other TX offloads, so allow user to toggle it.
This is needed later for bridge and bonding convertsion to hw_features.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b7d0304762aa..e03af35843bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,7 +1080,7 @@ struct net_device {
 
 	/* Features valid for ethtool to change */
 	/* = all defined minus driver/device-class-related */
-#define NETIF_F_NEVER_CHANGE	(NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
+#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
 #define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
 
@@ -1098,6 +1098,7 @@ struct net_device {
 
 #define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
 				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+				 NETIF_F_HIGHDMA | \
 				 NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC)
 
 	/*
-- 
cgit v1.2.3


From 180bf812ceaf01eb8ac69b86f3be0bd57f697668 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 28 Apr 2011 12:58:11 -0700
Subject: timers: Improve alarmtimer comments and minor fixes

This patch addresses a number of minor comment improvements and
other minor issues from Thomas' review of the alarmtimers code.

CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/alarmtimer.h | 12 ++++++++-
 kernel/time/alarmtimer.c   | 67 +++++++++++++++++++---------------------------
 2 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 6b364b2e2074..c5d6095b46f8 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -13,12 +13,22 @@ enum alarmtimer_type {
 	ALARM_NUMTYPE,
 };
 
+/**
+ * struct alarm - Alarm timer structure
+ * @node:	timerqueue node for adding to the event list this value
+ *		also includes the expiration time.
+ * @period:	Period for recuring alarms
+ * @function:	Function pointer to be executed when the timer fires.
+ * @type:	Alarm type (BOOTTIME/REALTIME)
+ * @enabled:	Flag that represents if the alarm is set to fire or not
+ * @data:	Internal data value.
+ */
 struct alarm {
 	struct timerqueue_node	node;
 	ktime_t			period;
 	void			(*function)(struct alarm *);
 	enum alarmtimer_type	type;
-	char			enabled;
+	bool			enabled;
 	void			*data;
 };
 
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4058ad79d55f..bed98004ae1a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,7 +26,15 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 
-
+/**
+ * struct alarm_base - Alarm timer bases
+ * @lock:		Lock for syncrhonized access to the base
+ * @timerqueue:		Timerqueue head managing the list of events
+ * @timer: 		hrtimer used to schedule events while running
+ * @gettime:		Function to read the time correlating to the base
+ * @base_clockid:	clockid for the base
+ * @irqwork		Delayed work structure for expiring timers
+ */
 static struct alarm_base {
 	spinlock_t		lock;
 	struct timerqueue_head	timerqueue;
@@ -36,18 +44,16 @@ static struct alarm_base {
 	struct work_struct	irqwork;
 } alarm_bases[ALARM_NUMTYPE];
 
+/* rtc timer and device for setting alarm wakeups at suspend */
 static struct rtc_timer		rtctimer;
 static struct rtc_device	*rtcdev;
 
+/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
 static ktime_t freezer_delta;
 static DEFINE_SPINLOCK(freezer_delta_lock);
 
 
-/**************************************************************************
- * alarmtimer management code
- */
-
-/*
+/**
  * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
  * @base: pointer to the base where the timer is being run
  * @alarm: pointer to alarm being enqueued.
@@ -67,7 +73,7 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
 	}
 }
 
-/*
+/**
  * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
  * @base: pointer to the base where the timer is running
  * @alarm: pointer to alarm being removed
@@ -91,16 +97,16 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
 	}
 }
 
-/*
+/**
  * alarmtimer_do_work - Handles alarm being fired.
  * @work: pointer to workqueue being run
  *
- * When a timer fires, this runs through the timerqueue to see
- * which alarm timers, and run those that expired. If there are
- * more alarm timers queued, we set the hrtimer to fire in the
- * future.
+ * When a alarm timer fires, this runs through the timerqueue to
+ * see which alarms expired, and runs those. If there are more alarm
+ * timers queued for the future, we set the hrtimer to fire when
+ * when the next future alarm timer expires.
  */
-void alarmtimer_do_work(struct work_struct *work)
+static void alarmtimer_do_work(struct work_struct *work)
 {
 	struct alarm_base *base = container_of(work, struct alarm_base,
 						irqwork);
@@ -141,7 +147,7 @@ void alarmtimer_do_work(struct work_struct *work)
 }
 
 
-/*
+/**
  * alarmtimer_fired - Handles alarm hrtimer being fired.
  * @timer: pointer to hrtimer being run
  *
@@ -156,7 +162,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 }
 
 
-/*
+/**
  * alarmtimer_suspend - Suspend time callback
  * @dev: unused
  * @state: unused
@@ -230,17 +236,11 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
 }
 
 
-/**************************************************************************
- * alarm kernel interface code
- */
-
-/*
+/**
  * alarm_init - Initialize an alarm structure
  * @alarm: ptr to alarm to be initialized
  * @type: the type of the alarm
  * @function: callback that is run when the alarm fires
- *
- * In-kernel interface to initializes the alarm structure.
  */
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		void (*function)(struct alarm *))
@@ -252,13 +252,11 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 	alarm->enabled = 0;
 }
 
-/*
+/**
  * alarm_start - Sets an alarm to fire
  * @alarm: ptr to alarm to set
  * @start: time to run the alarm
  * @period: period at which the alarm will recur
- *
- * In-kernel interface set an alarm timer.
  */
 void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
 {
@@ -275,11 +273,9 @@ void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
 	spin_unlock_irqrestore(&base->lock, flags);
 }
 
-/*
+/**
  * alarm_cancel - Tries to cancel an alarm timer
  * @alarm: ptr to alarm to be canceled
- *
- * In-kernel interface to cancel an alarm timer.
  */
 void alarm_cancel(struct alarm *alarm)
 {
@@ -294,15 +290,9 @@ void alarm_cancel(struct alarm *alarm)
 }
 
 
-/**************************************************************************
- * alarm posix interface code
- */
-
-/*
+/**
  * clock2alarm - helper that converts from clockid to alarmtypes
  * @clockid: clockid.
- *
- * Helper function that converts from clockids to alarmtypes
  */
 static enum alarmtimer_type clock2alarm(clockid_t clockid)
 {
@@ -313,7 +303,7 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
 	return -1;
 }
 
-/*
+/**
  * alarm_handle_timer - Callback for posix timers
  * @alarm: alarm that fired
  *
@@ -327,7 +317,7 @@ static void alarm_handle_timer(struct alarm *alarm)
 		ptr->it_overrun++;
 }
 
-/*
+/**
  * alarm_clock_getres - posix getres interface
  * @which_clock: clockid
  * @tp: timespec to fill
@@ -598,9 +588,6 @@ out:
 	return ret;
 }
 
-/**************************************************************************
- * alarmtimer initialization code
- */
 
 /* Suspend hook structures */
 static const struct dev_pm_ops alarmtimer_pm_ops = {
-- 
cgit v1.2.3


From 69c9dd1ecf446ad8a830e4afc539a2a1adc85b78 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:05 +0200
Subject: PM: Export platform bus type's default PM callbacks

Export the default PM callbacks defined for the platform bus type so
that they can be used by power domains for suspending and resuming
platform devices in the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 72 +++++++++++------------------------------
 include/linux/platform_device.h | 60 ++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 9e0e4fc24c46..313556f28c9e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -667,7 +667,7 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_prepare(struct device *dev)
+int platform_pm_prepare(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -678,7 +678,7 @@ static int platform_pm_prepare(struct device *dev)
 	return ret;
 }
 
-static void platform_pm_complete(struct device *dev)
+void platform_pm_complete(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 
@@ -686,16 +686,11 @@ static void platform_pm_complete(struct device *dev)
 		drv->pm->complete(dev);
 }
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define platform_pm_prepare		NULL
-#define platform_pm_complete		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
 
-int __weak platform_pm_suspend(struct device *dev)
+int platform_pm_suspend(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -713,7 +708,7 @@ int __weak platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_suspend_noirq(struct device *dev)
+int platform_pm_suspend_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -729,7 +724,7 @@ int __weak platform_pm_suspend_noirq(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume(struct device *dev)
+int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -747,7 +742,7 @@ int __weak platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume_noirq(struct device *dev)
+int platform_pm_resume_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -763,18 +758,11 @@ int __weak platform_pm_resume_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_SUSPEND */
-
-#define platform_pm_suspend		NULL
-#define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
-
-#endif /* !CONFIG_SUSPEND */
+#endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 
-static int platform_pm_freeze(struct device *dev)
+int platform_pm_freeze(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -792,7 +780,7 @@ static int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_freeze_noirq(struct device *dev)
+int platform_pm_freeze_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -808,7 +796,7 @@ static int platform_pm_freeze_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw(struct device *dev)
+int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -826,7 +814,7 @@ static int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw_noirq(struct device *dev)
+int platform_pm_thaw_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -842,7 +830,7 @@ static int platform_pm_thaw_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff(struct device *dev)
+int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -860,7 +848,7 @@ static int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff_noirq(struct device *dev)
+int platform_pm_poweroff_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -876,7 +864,7 @@ static int platform_pm_poweroff_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore(struct device *dev)
+int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -894,7 +882,7 @@ static int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore_noirq(struct device *dev)
+int platform_pm_restore_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -910,18 +898,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATE_CALLBACKS */
-
-#define platform_pm_freeze		NULL
-#define platform_pm_thaw		NULL
-#define platform_pm_poweroff		NULL
-#define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
-
-#endif /* !CONFIG_HIBERNATE_CALLBACKS */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM_RUNTIME
 
@@ -949,23 +926,10 @@ int __weak platform_pm_runtime_idle(struct device *dev)
 #endif /* !CONFIG_PM_RUNTIME */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
-	.prepare = platform_pm_prepare,
-	.complete = platform_pm_complete,
-	.suspend = platform_pm_suspend,
-	.resume = platform_pm_resume,
-	.freeze = platform_pm_freeze,
-	.thaw = platform_pm_thaw,
-	.poweroff = platform_pm_poweroff,
-	.restore = platform_pm_restore,
-	.suspend_noirq = platform_pm_suspend_noirq,
-	.resume_noirq = platform_pm_resume_noirq,
-	.freeze_noirq = platform_pm_freeze_noirq,
-	.thaw_noirq = platform_pm_thaw_noirq,
-	.poweroff_noirq = platform_pm_poweroff_noirq,
-	.restore_noirq = platform_pm_restore_noirq,
 	.runtime_suspend = platform_pm_runtime_suspend,
 	.runtime_resume = platform_pm_runtime_resume,
 	.runtime_idle = platform_pm_runtime_idle,
+	USE_PLATFORM_PM_SLEEP_OPS
 };
 
 struct bus_type platform_bus_type = {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 744942c95fec..e0093e061b08 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -205,4 +205,64 @@ static inline char *early_platform_driver_setup_func(void)		\
 }
 #endif /* MODULE */
 
+#ifdef CONFIG_PM_SLEEP
+extern int platform_pm_prepare(struct device *dev);
+extern void platform_pm_complete(struct device *dev);
+#else
+#define platform_pm_prepare	NULL
+#define platform_pm_complete	NULL
+#endif
+
+#ifdef CONFIG_SUSPEND
+extern int platform_pm_suspend(struct device *dev);
+extern int platform_pm_suspend_noirq(struct device *dev);
+extern int platform_pm_resume(struct device *dev);
+extern int platform_pm_resume_noirq(struct device *dev);
+#else
+#define platform_pm_suspend		NULL
+#define platform_pm_resume		NULL
+#define platform_pm_suspend_noirq	NULL
+#define platform_pm_resume_noirq	NULL
+#endif
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+extern int platform_pm_freeze(struct device *dev);
+extern int platform_pm_freeze_noirq(struct device *dev);
+extern int platform_pm_thaw(struct device *dev);
+extern int platform_pm_thaw_noirq(struct device *dev);
+extern int platform_pm_poweroff(struct device *dev);
+extern int platform_pm_poweroff_noirq(struct device *dev);
+extern int platform_pm_restore(struct device *dev);
+extern int platform_pm_restore_noirq(struct device *dev);
+#else
+#define platform_pm_freeze		NULL
+#define platform_pm_thaw		NULL
+#define platform_pm_poweroff		NULL
+#define platform_pm_restore		NULL
+#define platform_pm_freeze_noirq	NULL
+#define platform_pm_thaw_noirq		NULL
+#define platform_pm_poweroff_noirq	NULL
+#define platform_pm_restore_noirq	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define USE_PLATFORM_PM_SLEEP_OPS \
+	.prepare = platform_pm_prepare, \
+	.complete = platform_pm_complete, \
+	.suspend = platform_pm_suspend, \
+	.resume = platform_pm_resume, \
+	.freeze = platform_pm_freeze, \
+	.thaw = platform_pm_thaw, \
+	.poweroff = platform_pm_poweroff, \
+	.restore = platform_pm_restore, \
+	.suspend_noirq = platform_pm_suspend_noirq, \
+	.resume_noirq = platform_pm_resume_noirq, \
+	.freeze_noirq = platform_pm_freeze_noirq, \
+	.thaw_noirq = platform_pm_thaw_noirq, \
+	.poweroff_noirq = platform_pm_poweroff_noirq, \
+	.restore_noirq = platform_pm_restore_noirq,
+#else
+#define USE_PLATFORM_PM_SLEEP_OPS
+#endif
+
 #endif /* _PLATFORM_DEVICE_H_ */
-- 
cgit v1.2.3


From 1d2b71f61b6a10216274e27b717becf9ae101fc7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:53 +0200
Subject: PM / Runtime: Add subsystem data field to struct dev_pm_info

Some subsystems need to attach PM-related data to struct device and
they need to use devres for this purpose.  For their convenience
and to make code more straightforward, add a new field called
subsys_data to struct dev_pm_info and let subsystems use it for
attaching PM-related information to devices.

Convert the ARM shmobile platform to using the new field.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 34 +++++++++++++++++-----------------
 include/linux/pm.h                  |  1 +
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 12bb504c7f49..30bbe9a99ae1 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
 #define BIT_ONCE 0
@@ -29,22 +30,9 @@ struct pm_runtime_data {
 	struct clk *clk;
 };
 
-static void __devres_release(struct device *dev, void *res)
-{
-	struct pm_runtime_data *prd = res;
-
-	dev_dbg(dev, "__devres_release()\n");
-
-	if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-		clk_disable(prd->clk);
-
-	if (test_bit(BIT_ACTIVE, &prd->flags))
-		clk_put(prd->clk);
-}
-
 static struct pm_runtime_data *__to_prd(struct device *dev)
 {
-	return devres_find(dev, __devres_release, NULL, NULL);
+	return dev ? dev->power.subsys_data : NULL;
 }
 
 static void platform_pm_runtime_init(struct device *dev,
@@ -121,14 +109,26 @@ static int platform_bus_notify(struct notifier_block *nb,
 
 	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
 
-	if (action == BUS_NOTIFY_BIND_DRIVER) {
-		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
 		if (prd) {
-			devres_add(dev, prd);
+			dev->power.subsys_data = prd;
 			dev->pwr_domain = &default_power_domain;
 		} else {
 			dev_err(dev, "unable to alloc memory for runtime pm\n");
 		}
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		prd = __to_prd(dev);
+		if (prd) {
+			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
+				clk_disable(prd->clk);
+
+			if (test_bit(BIT_ACTIVE, &prd->flags))
+				clk_put(prd->clk);
+		}
+		break;
 	}
 
 	return 0;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..f4167d0faa67 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -460,6 +460,7 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
+	void			*subsys_data;  /* Owned by the subsystem. */
 #endif
 };
 
-- 
cgit v1.2.3


From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Apr 2011 13:19:47 +0200
Subject: perf events: Add generic front-end and back-end stalled cycle event
 definitions

Add two generic hardware events: front-end and back-end stalled cycles.

These events measure conditions when the CPU is executing code but its
capabilities are not fully utilized. Understanding such situations and
analyzing them is an important sub-task of code optimization workflows.

Both events limit performance: most front end stalls tend to be caused
by branch misprediction or instruction fetch cachemisses, backend
stalls can be caused by various resource shortages or inefficient
instruction scheduling.

Front-end stalls are the more important ones: code cannot run fast
if the instruction stream is not being kept up.

An over-utilized back-end can cause front-end stalls and thus
has to be kept an eye on as well.

The exact composition is very program logic and instruction mix
dependent.

We use the terms 'stall', 'front-end' and 'back-end' loosely and
try to use the best available events from specific CPUs that
approximate these concepts.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
 include/linux/perf_event.h             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1ea94224f62e..393085b87a2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
 		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ac636dd20a0c..4e2d7ae71499 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,7 +52,8 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
-	PERF_COUNT_HW_STALLED_CYCLES		= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
-- 
cgit v1.2.3


From 36504605432996590f889e33d47e2d9c581f7569 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@google.com>
Date: Wed, 27 Apr 2011 18:32:37 +0000
Subject: ethtool: cosmetics: enforce const-ness in ethtool_cmd_speed

The 'ep' argument of ethtool_cmd_speed is not altered: advertise it in
protoype. +Indentation fix. Also add comments to advise using the
ethtool_cmd_speed API to get/set the link speed.

Signed-off-by: David Decotigny <decot@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9de31274341d..7e6e0a89ca26 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -24,7 +24,10 @@ struct ethtool_cmd {
 	__u32	cmd;
 	__u32	supported;	/* Features this interface supports */
 	__u32	advertising;	/* Features this interface advertises */
-	__u16	speed;		/* The forced speed, 10Mb, 100Mb, gigabit */
+	__u16	speed;	        /* The forced speed (lower bits) in
+				 * Mbps. Please use
+				 * ethtool_cmd_speed()/_set() to
+				 * access it */
 	__u8	duplex;		/* Duplex, half or full */
 	__u8	port;		/* Which connector port */
 	__u8	phy_address;
@@ -33,7 +36,10 @@ struct ethtool_cmd {
 	__u8	mdio_support;
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
-	__u16	speed_hi;
+	__u16	speed_hi;       /* The forced speed (upper
+				 * bits) in Mbps. Please use
+				 * ethtool_cmd_speed()/_set() to
+				 * access it */
 	__u8	eth_tp_mdix;
 	__u8	reserved2;
 	__u32	lp_advertising;	/* Features the link partner advertises */
@@ -41,14 +47,14 @@ struct ethtool_cmd {
 };
 
 static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
-						__u32 speed)
+					 __u32 speed)
 {
 
 	ep->speed = (__u16)speed;
 	ep->speed_hi = (__u16)(speed >> 16);
 }
 
-static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep)
+static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
 {
 	return (ep->speed_hi << 16) | ep->speed;
 }
-- 
cgit v1.2.3


From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@google.com>
Date: Wed, 27 Apr 2011 18:32:38 +0000
Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data

This makes sure that when a driver calls the ethtool's
get/set_settings() callback of another driver, the data passed to it
is clean. This guarantees that speed_hi will be zeroed correctly if
the called callback doesn't explicitely set it: we are sure we don't
get a corrupted speed from the underlying driver. We also take care of
setting the cmd field appropriately (ETHTOOL_GSET/SSET).

This applies to dev_ethtool_get_settings(), which now makes sure it
sets up that ethtool command parameter correctly before passing it to
drivers. This also means that whoever calls dev_ethtool_get_settings()
does not have to clean the ethtool command parameter. This function
also becomes an exported symbol instead of an inline.

All drivers visible to make allyesconfig under x86_64 have been
updated.

Signed-off-by: David Decotigny <decot@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/txx9/generic/setup_tx4939.c | 21 ++++++++-------------
 drivers/net/e100.c                    |  2 +-
 drivers/net/mdio.c                    |  3 +++
 drivers/net/mii.c                     |  3 +++
 drivers/net/pch_gbe/pch_gbe_main.c    |  6 +++---
 drivers/net/pch_gbe/pch_gbe_phy.c     |  2 +-
 drivers/net/pcnet32.c                 | 16 ++++++++--------
 drivers/net/sfc/mdio_10g.c            |  4 ++--
 drivers/net/stmmac/stmmac_ethtool.c   |  5 ++---
 drivers/net/usb/asix.c                | 28 +++++++++++++++-------------
 drivers/net/usb/dm9601.c              |  6 +++---
 drivers/net/usb/smsc75xx.c            |  7 ++++---
 drivers/net/usb/smsc95xx.c            |  7 ++++---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c     | 11 +++++++----
 drivers/scsi/fcoe/fcoe.c              | 11 +++++++----
 include/linux/ethtool.h               |  4 +++-
 include/linux/netdevice.h             |  9 ++-------
 include/rdma/ib_addr.h                | 13 +++++++------
 net/core/dev.c                        | 24 ++++++++++++++++++++++++
 net/core/net-sysfs.c                  | 24 ++++++++++--------------
 20 files changed, 117 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 3dc19f482959..e9f95dcde379 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
 }
 
 #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE)
-static int tx4939_get_eth_speed(struct net_device *dev)
+static u32 tx4939_get_eth_speed(struct net_device *dev)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
-	int speed = 100;	/* default 100Mbps */
-	int err;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return speed;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return speed;
-	speed = cmd.speed == SPEED_100 ? 100 : 10;
-	return speed;
+	struct ethtool_cmd cmd;
+	if (dev_ethtool_get_settings(dev, &cmd))
+		return 100;	/* default 100Mbps */
+
+	return ethtool_cmd_speed(&cmd);
 }
+
 static int tx4939_netdev_event(struct notifier_block *this,
 			       unsigned long event,
 			       void *ptr)
@@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this,
 		else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1))
 			bit = TX4939_PCFG_SPEED1;
 		if (bit) {
-			int speed = tx4939_get_eth_speed(dev);
-			if (speed == 100)
+			if (tx4939_get_eth_speed(dev) == 100)
 				txx9_set64(&tx4939_ccfgptr->pcfg, bit);
 			else
 				txx9_clear64(&tx4939_ccfgptr->pcfg, bit);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index b0aa9e68990a..66ba596a4d37 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1668,7 +1668,7 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
 static void e100_watchdog(unsigned long data)
 {
 	struct nic *nic = (struct nic *)data;
-	struct ethtool_cmd cmd;
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 
 	netif_printk(nic, timer, KERN_DEBUG, nic->netdev,
 		     "right now = %ld\n", jiffies);
diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index e85bf04cf813..f2d10abd0403 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -176,6 +176,9 @@ static u32 mdio45_get_an(const struct mdio_if_info *mdio, u16 addr)
  * @npage_adv: Modes currently advertised on next pages
  * @npage_lpa: Modes advertised by link partner on next pages
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mdio45_ethtool_gset_npage().
+ *
  * Since the CSRs for auto-negotiation using next pages are not fully
  * standardised, this function does not attempt to decode them.  The
  * caller must pass them in.
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 0a6c6a2e7550..05acca78f63a 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -58,6 +58,9 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
  * @mii: MII interface
  * @ecmd: requested ethtool_cmd
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mii_ethtool_gset().
+ *
  * Returns 0 for success, negative on error.
  */
 int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 4cc9872f5ec4..f3e4b0adae93 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -888,12 +888,12 @@ static void pch_gbe_watchdog(unsigned long data)
 	struct pch_gbe_adapter *adapter = (struct pch_gbe_adapter *)data;
 	struct net_device *netdev = adapter->netdev;
 	struct pch_gbe_hw *hw = &adapter->hw;
-	struct ethtool_cmd cmd;
 
 	pr_debug("right now = %ld\n", jiffies);
 
 	pch_gbe_update_stats(adapter);
 	if ((mii_link_ok(&adapter->mii)) && (!netif_carrier_ok(netdev))) {
+		struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 		netdev->tx_queue_len = adapter->tx_queue_len;
 		/* mii library handles link maintenance tasks */
 		if (mii_ethtool_gset(&adapter->mii, &cmd)) {
@@ -903,7 +903,7 @@ static void pch_gbe_watchdog(unsigned long data)
 						PCH_GBE_WATCHDOG_PERIOD));
 			return;
 		}
-		hw->mac.link_speed = cmd.speed;
+		hw->mac.link_speed = ethtool_cmd_speed(&cmd);
 		hw->mac.link_duplex = cmd.duplex;
 		/* Set the RGMII control. */
 		pch_gbe_set_rgmii_ctrl(adapter, hw->mac.link_speed,
@@ -913,7 +913,7 @@ static void pch_gbe_watchdog(unsigned long data)
 				 hw->mac.link_duplex);
 		netdev_dbg(netdev,
 			   "Link is Up %d Mbps %s-Duplex\n",
-			   cmd.speed,
+			   hw->mac.link_speed,
 			   cmd.duplex == DUPLEX_FULL ? "Full" : "Half");
 		netif_carrier_on(netdev);
 		netif_wake_queue(netdev);
diff --git a/drivers/net/pch_gbe/pch_gbe_phy.c b/drivers/net/pch_gbe/pch_gbe_phy.c
index 923a687acd30..9a8207f686fd 100644
--- a/drivers/net/pch_gbe/pch_gbe_phy.c
+++ b/drivers/net/pch_gbe/pch_gbe_phy.c
@@ -247,7 +247,7 @@ inline void pch_gbe_phy_set_rgmii(struct pch_gbe_hw *hw)
 void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw)
 {
 	struct pch_gbe_adapter *adapter;
-	struct ethtool_cmd     cmd;
+	struct ethtool_cmd     cmd = { .cmd = ETHTOOL_GSET };
 	int ret;
 	u16 mii_reg;
 
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 0a1efbae1bc0..b48aba9e4227 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -2099,7 +2099,7 @@ static int pcnet32_open(struct net_device *dev)
 		int first_phy = -1;
 		u16 bmcr;
 		u32 bcr9;
-		struct ethtool_cmd ecmd;
+		struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 		/*
 		 * There is really no good other way to handle multiple PHYs
@@ -2115,9 +2115,9 @@ static int pcnet32_open(struct net_device *dev)
 			ecmd.port = PORT_MII;
 			ecmd.transceiver = XCVR_INTERNAL;
 			ecmd.autoneg = AUTONEG_DISABLE;
-			ecmd.speed =
-			    lp->
-			    options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10;
+			ethtool_cmd_speed_set(&ecmd,
+					      (lp->options & PCNET32_PORT_100) ?
+					      SPEED_100 : SPEED_10);
 			bcr9 = lp->a.read_bcr(ioaddr, 9);
 
 			if (lp->options & PCNET32_PORT_FD) {
@@ -2763,11 +2763,11 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
 		netif_carrier_on(dev);
 		if (lp->mii) {
 			if (netif_msg_link(lp)) {
-				struct ethtool_cmd ecmd;
+				struct ethtool_cmd ecmd = {
+					.cmd = ETHTOOL_GSET };
 				mii_ethtool_gset(&lp->mii_if, &ecmd);
-				netdev_info(dev, "link up, %sMbps, %s-duplex\n",
-					    (ecmd.speed == SPEED_100)
-					    ? "100" : "10",
+				netdev_info(dev, "link up, %uMbps, %s-duplex\n",
+					    ethtool_cmd_speed(&ecmd),
 					    (ecmd.duplex == DUPLEX_FULL)
 					    ? "full" : "half");
 			}
diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c
index 19e68c26d103..71159145b4bf 100644
--- a/drivers/net/sfc/mdio_10g.c
+++ b/drivers/net/sfc/mdio_10g.c
@@ -232,12 +232,12 @@ void efx_mdio_set_mmds_lpower(struct efx_nic *efx,
  */
 int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
 {
-	struct ethtool_cmd prev;
+	struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
 
 	efx->phy_op->get_settings(efx, &prev);
 
 	if (ecmd->advertising == prev.advertising &&
-	    ecmd->speed == prev.speed &&
+	    ethtool_cmd_speed(ecmd) == ethtool_cmd_speed(&prev) &&
 	    ecmd->duplex == prev.duplex &&
 	    ecmd->port == prev.port &&
 	    ecmd->autoneg == prev.autoneg)
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index 0e61ac8707cb..6f5aaeb986ff 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -237,13 +237,12 @@ stmmac_set_pauseparam(struct net_device *netdev,
 
 	if (phy->autoneg) {
 		if (netif_running(netdev)) {
-			struct ethtool_cmd cmd;
+			struct ethtool_cmd cmd = { .cmd = ETHTOOL_SSET };
 			/* auto-negotiation automatically restarted */
-			cmd.cmd = ETHTOOL_NWAY_RST;
 			cmd.supported = phy->supported;
 			cmd.advertising = phy->advertising;
 			cmd.autoneg = phy->autoneg;
-			cmd.speed = phy->speed;
+			ethtool_cmd_speed_set(&cmd, phy->speed);
 			cmd.duplex = phy->duplex;
 			cmd.phy_address = phy->addr;
 			ret = phy_ethtool_sset(phy, &cmd);
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 6140b56cce53..6998aa6b7bb7 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -847,7 +847,7 @@ static void ax88172_set_multicast(struct net_device *net)
 static int ax88172_link_reset(struct usbnet *dev)
 {
 	u8 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
@@ -856,8 +856,8 @@ static int ax88172_link_reset(struct usbnet *dev)
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode |= ~AX88172_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88172_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -947,20 +947,20 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 static int ax88772_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88772_MEDIUM_DEFAULT;
 
-	if (ecmd.speed != SPEED_100)
+	if (ethtool_cmd_speed(&ecmd) != SPEED_100)
 		mode &= ~AX_MEDIUM_PS;
 
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88772_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -1173,18 +1173,20 @@ static int marvell_led_status(struct usbnet *dev, u16 speed)
 static int ax88178_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	struct asix_data *data = (struct asix_data *)&dev->data;
+	u32 speed;
 
 	netdev_dbg(dev->net, "ax88178_link_reset()\n");
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88178_MEDIUM_DEFAULT;
+	speed = ethtool_cmd_speed(&ecmd);
 
-	if (ecmd.speed == SPEED_1000)
+	if (speed == SPEED_1000)
 		mode |= AX_MEDIUM_GM;
-	else if (ecmd.speed == SPEED_100)
+	else if (speed == SPEED_100)
 		mode |= AX_MEDIUM_PS;
 	else
 		mode &= ~(AX_MEDIUM_PS | AX_MEDIUM_GM);
@@ -1196,13 +1198,13 @@ static int ax88178_link_reset(struct usbnet *dev)
 	else
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88178_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
 	if (data->phymode == PHY_MODE_MARVELL && data->ledmode)
-		marvell_led_status(dev, ecmd.speed);
+		marvell_led_status(dev, speed);
 
 	return 0;
 }
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 5002f5be47be..1d93133e9b74 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -599,13 +599,13 @@ static void dm9601_status(struct usbnet *dev, struct urb *urb)
 
 static int dm9601_link_reset(struct usbnet *dev)
 {
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 
-	netdev_dbg(dev->net, "link_reset() speed: %d duplex: %d\n",
-		   ecmd.speed, ecmd.duplex);
+	netdev_dbg(dev->net, "link_reset() speed: %u duplex: %d\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex);
 
 	return 0;
 }
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 860a20c938b4..15b3d6888ae9 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -503,7 +503,7 @@ static int smsc75xx_update_flowcontrol(struct usbnet *dev, u8 duplex,
 static int smsc75xx_link_reset(struct usbnet *dev)
 {
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	u16 lcladv, rmtadv;
 	int ret;
 
@@ -519,8 +519,9 @@ static int smsc75xx_link_reset(struct usbnet *dev)
 	lcladv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x"
-		" rmtadv: %04x", ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net, "speed: %u duplex: %d lcladv: %04x"
+		  " rmtadv: %04x", ethtool_cmd_speed(&ecmd),
+		  ecmd.duplex, lcladv, rmtadv);
 
 	return smsc75xx_update_flowcontrol(dev, ecmd.duplex, lcladv, rmtadv);
 }
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 24f4b3739dd2..b374a9997908 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -457,7 +457,7 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 {
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	unsigned long flags;
 	u16 lcladv, rmtadv;
 	u32 intdata;
@@ -472,8 +472,9 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 	lcladv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x rmtadv: %04x\n",
-		  ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net,
+		  "speed: %u duplex: %d lcladv: %04x rmtadv: %04x\n",
+		  ethtool_cmd_speed(&ecmd), ecmd.duplex, lcladv, rmtadv);
 
 	spin_lock_irqsave(&pdata->mac_cr_lock, flags);
 	if (ecmd.duplex != DUPLEX_FULL) {
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e2e647509a73..cd050196a163 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -664,7 +664,7 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 	struct fcoe_port *port = lport_priv(lport);
 	struct bnx2fc_hba *hba = port->priv;
 	struct net_device *netdev = hba->netdev;
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -675,12 +675,15 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
 
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
+			break;
+		}
 	}
-	return;
 }
 static int bnx2fc_link_ok(struct fc_lport *lport)
 {
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index bde6ee5333eb..04f346b562da 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2026,7 +2026,7 @@ out_nodev:
 int fcoe_link_speed_update(struct fc_lport *lport)
 {
 	struct net_device *netdev = fcoe_netdev(lport);
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -2037,11 +2037,14 @@ int fcoe_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |=
 				FC_PORTSPEED_10GBIT;
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
-
+			break;
+		}
 		return 0;
 	}
 	return -1;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 7e6e0a89ca26..4194a2067a14 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -744,7 +744,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: Get various device settings including Ethernet link
- *	settings.  Returns a negative error code or zero.
+ *	settings. The @cmd parameter is expected to have been cleared
+ *	before get_settings is called. Returns a negative error code or
+ *	zero.
  * @set_settings: Set various device settings including Ethernet link
  *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e03af35843bc..d5de66af46f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2597,13 +2597,8 @@ static inline int netif_is_bond_slave(struct net_device *dev)
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
-static inline int dev_ethtool_get_settings(struct net_device *dev,
-					   struct ethtool_cmd *cmd)
-{
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-	return dev->ethtool_ops->get_settings(dev, cmd);
-}
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd);
 
 static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
 {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index b5fc9f39122b..ae8c68f30f1b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -217,18 +217,19 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
 static inline int iboe_get_rate(struct net_device *dev)
 {
 	struct ethtool_cmd cmd;
+	u32 speed;
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
-	    dev->ethtool_ops->get_settings(dev, &cmd))
+	if (dev_ethtool_get_settings(dev, &cmd))
 		return IB_RATE_PORT_CURRENT;
 
-	if (cmd.speed >= 40000)
+	speed = ethtool_cmd_speed(&cmd);
+	if (speed >= 40000)
 		return IB_RATE_40_GBPS;
-	else if (cmd.speed >= 30000)
+	else if (speed >= 30000)
 		return IB_RATE_30_GBPS;
-	else if (cmd.speed >= 20000)
+	else if (speed >= 20000)
 		return IB_RATE_20_GBPS;
-	else if (cmd.speed >= 10000)
+	else if (speed >= 10000)
 		return IB_RATE_10_GBPS;
 	else
 		return IB_RATE_PORT_CURRENT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7db99b52679f..e95dc30110eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4495,6 +4495,30 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/**
+ *	dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
+ *	@dev: device
+ *	@cmd: memory area for ethtool_ops::get_settings() result
+ *
+ *      The cmd arg is initialized properly (cleared and
+ *      ethtool_cmd::cmd field set to ETHTOOL_GSET).
+ *
+ *	Return device's ethtool_ops::get_settings() result value or
+ *	-EOPNOTSUPP when device doesn't expose
+ *	ethtool_ops::get_settings() operation.
+ */
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	memset(cmd, 0, sizeof(struct ethtool_cmd));
+	cmd->cmd = ETHTOOL_GSET;
+	return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(dev_ethtool_get_settings);
+
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ceb257e860c..381813eae46c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -28,6 +28,7 @@
 static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
+static const char fmt_udec[] = "%u\n";
 static const char fmt_ulong[] = "%lu\n";
 static const char fmt_u64[] = "%llu\n";
 
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
 	}
 	rtnl_unlock();
 	return ret;
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n",
+				      cmd.duplex ? "full" : "half");
 	}
 	rtnl_unlock();
 	return ret;
-- 
cgit v1.2.3


From 85eb8c8d0b0900c073b0e6f89979ac9c439ade1a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 30 Apr 2011 00:25:44 +0200
Subject: PM / Runtime: Generic clock manipulation rountines for runtime PM
 (v6)

Many different platforms and subsystems may want to disable device
clocks during suspend and enable them during resume which is going to
be done in a very similar way in all those cases.  For this reason,
provide generic routines for the manipulation of device clocks during
suspend and resume.

Convert the ARM shmobile platform to using the new routines.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 140 +-----------
 drivers/base/power/Makefile         |   1 +
 drivers/base/power/clock_ops.c      | 430 ++++++++++++++++++++++++++++++++++++
 include/linux/pm_runtime.h          |  42 ++++
 kernel/power/Kconfig                |   4 +
 5 files changed, 486 insertions(+), 131 deletions(-)
 create mode 100644 drivers/base/power/clock_ops.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 30bbe9a99ae1..2d1b67a59e4a 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -21,70 +21,6 @@
 #include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
-#define BIT_ONCE 0
-#define BIT_ACTIVE 1
-#define BIT_CLK_ENABLED 2
-
-struct pm_runtime_data {
-	unsigned long flags;
-	struct clk *clk;
-};
-
-static struct pm_runtime_data *__to_prd(struct device *dev)
-{
-	return dev ? dev->power.subsys_data : NULL;
-}
-
-static void platform_pm_runtime_init(struct device *dev,
-				     struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) {
-		prd->clk = clk_get(dev, NULL);
-		if (!IS_ERR(prd->clk)) {
-			set_bit(BIT_ACTIVE, &prd->flags);
-			dev_info(dev, "clocks managed by runtime pm\n");
-		}
-	}
-}
-
-static void platform_pm_runtime_bug(struct device *dev,
-				    struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags))
-		dev_err(dev, "runtime pm suspend before resume\n");
-}
-
-static int default_platform_runtime_suspend(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_bug(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_disable(prd->clk);
-		clear_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-static int default_platform_runtime_resume(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_init(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_enable(prd->clk);
-		set_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
 
 static int default_platform_runtime_idle(struct device *dev)
 {
@@ -94,87 +30,29 @@ static int default_platform_runtime_idle(struct device *dev)
 
 static struct dev_power_domain default_power_domain = {
 	.ops = {
-		.runtime_suspend = default_platform_runtime_suspend,
-		.runtime_resume = default_platform_runtime_resume,
+		.runtime_suspend = pm_runtime_clk_suspend,
+		.runtime_resume = pm_runtime_clk_resume,
 		.runtime_idle = default_platform_runtime_idle,
 		USE_PLATFORM_PM_SLEEP_OPS
 	},
 };
 
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct pm_runtime_data *prd;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
-		if (prd) {
-			dev->power.subsys_data = prd;
-			dev->pwr_domain = &default_power_domain;
-		} else {
-			dev_err(dev, "unable to alloc memory for runtime pm\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		prd = __to_prd(dev);
-		if (prd) {
-			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-				clk_disable(prd->clk);
+#define DEFAULT_PWR_DOMAIN_PTR	(&default_power_domain)
 
-			if (test_bit(BIT_ACTIVE, &prd->flags))
-				clk_put(prd->clk);
-		}
-		break;
-	}
+#else
 
-	return 0;
-}
-
-#else /* CONFIG_PM_RUNTIME */
-
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct clk *clk;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_enable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced on\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_disable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced off\n");
-		}
-		break;
-	}
-
-	return 0;
-}
+#define DEFAULT_PWR_DOMAIN_PTR	NULL
 
 #endif /* CONFIG_PM_RUNTIME */
 
-static struct notifier_block platform_bus_notifier = {
-	.notifier_call = platform_bus_notify
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = DEFAULT_PWR_DOMAIN_PTR,
+	.con_ids = { NULL, },
 };
 
 static int __init sh_pm_runtime_init(void)
 {
-	bus_register_notifier(&platform_bus_type, &platform_bus_notifier);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
 core_initcall(sh_pm_runtime_init);
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 118c1b92a511..06a7073f9027 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
new file mode 100644
index 000000000000..d74abf334391
--- /dev/null
+++ b/drivers/base/power/clock_ops.c
@@ -0,0 +1,430 @@
+/*
+ * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
+ *
+ * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+struct pm_runtime_clk_data {
+	struct list_head clock_list;
+	struct mutex lock;
+};
+
+enum pce_status {
+	PCE_STATUS_NONE = 0,
+	PCE_STATUS_ACQUIRED,
+	PCE_STATUS_ENABLED,
+	PCE_STATUS_ERROR,
+};
+
+struct pm_clock_entry {
+	struct list_head node;
+	char *con_id;
+	struct clk *clk;
+	enum pce_status status;
+};
+
+static struct pm_runtime_clk_data *__to_prd(struct device *dev)
+{
+	return dev ? dev->power.subsys_data : NULL;
+}
+
+/**
+ * pm_runtime_clk_add - Start using a device clock for runtime PM.
+ * @dev: Device whose clock is going to be used for runtime PM.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the runtime PM of @dev.
+ */
+int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return -EINVAL;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		dev_err(dev, "Not enough memory for clock entry.\n");
+		return -ENOMEM;
+	}
+
+	if (con_id) {
+		ce->con_id = kstrdup(con_id, GFP_KERNEL);
+		if (!ce->con_id) {
+			dev_err(dev,
+				"Not enough memory for clock connection ID.\n");
+			kfree(ce);
+			return -ENOMEM;
+		}
+	}
+
+	mutex_lock(&prd->lock);
+	list_add_tail(&ce->node, &prd->clock_list);
+	mutex_unlock(&prd->lock);
+	return 0;
+}
+
+/**
+ * __pm_runtime_clk_remove - Destroy runtime PM clock entry.
+ * @ce: Runtime PM clock entry to destroy.
+ *
+ * This routine must be called under the mutex protecting the runtime PM list
+ * of clocks corresponding the the @ce's device.
+ */
+static void __pm_runtime_clk_remove(struct pm_clock_entry *ce)
+{
+	if (!ce)
+		return;
+
+	list_del(&ce->node);
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		if (ce->status == PCE_STATUS_ENABLED)
+			clk_disable(ce->clk);
+
+		if (ce->status >= PCE_STATUS_ACQUIRED)
+			clk_put(ce->clk);
+	}
+
+	if (ce->con_id)
+		kfree(ce->con_id);
+
+	kfree(ce);
+}
+
+/**
+ * pm_runtime_clk_remove - Stop using a device clock for runtime PM.
+ * @dev: Device whose clock should not be used for runtime PM any more.
+ * @con_id: Connection ID of the clock.
+ *
+ * Remove the clock represented by @con_id from the list of clocks used for
+ * the runtime PM of @dev.
+ */
+void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (!con_id && !ce->con_id) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		} else if (!con_id || !ce->con_id) {
+			continue;
+		} else if (!strcmp(con_id, ce->con_id)) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+}
+
+/**
+ * pm_runtime_clk_init - Initialize a device's list of runtime PM clocks.
+ * @dev: Device to initialize the list of runtime PM clocks for.
+ *
+ * Allocate a struct pm_runtime_clk_data object, initialize its lock member and
+ * make the @dev's power.subsys_data field point to it.
+ */
+int pm_runtime_clk_init(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd;
+
+	prd = kzalloc(sizeof(*prd), GFP_KERNEL);
+	if (!prd) {
+		dev_err(dev, "Not enough memory fo runtime PM data.\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&prd->clock_list);
+	mutex_init(&prd->lock);
+	dev->power.subsys_data = prd;
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_destroy - Destroy a device's list of runtime PM clocks.
+ * @dev: Device to destroy the list of runtime PM clocks for.
+ *
+ * Clear the @dev's power.subsys_data field, remove the list of clock entries
+ * from the struct pm_runtime_clk_data object pointed to by it before and free
+ * that object.
+ */
+void pm_runtime_clk_destroy(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce, *c;
+
+	if (!prd)
+		return;
+
+	dev->power.subsys_data = NULL;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_safe_reverse(ce, c, &prd->clock_list, node)
+		__pm_runtime_clk_remove(ce);
+
+	mutex_unlock(&prd->lock);
+
+	kfree(prd);
+}
+
+/**
+ * pm_runtime_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @con_id: Connection ID of the clock.
+ */
+static void pm_runtime_clk_acquire(struct device *dev,
+				    struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
+ * pm_runtime_clk_suspend - Disable clocks in a device's runtime PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_runtime_clk_suspend(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_reverse(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_disable(ce->clk);
+			ce->status = PCE_STATUS_ACQUIRED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_resume - Enable clocks in a device's runtime PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_runtime_clk_resume(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_enable(ce->clk);
+			ce->status = PCE_STATUS_ENABLED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the pwr_domain member of that object is copied to the device's
+ * pwr_domain field and its con_ids member is used to populate the device's list
+ * of runtime PM clocks, depending on @action.
+ *
+ * If the device's pwr_domain field is already populated with a value different
+ * from the one stored in the struct pm_clk_notifier_block object, the function
+ * does nothing.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char *con_id;
+	int error;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->pwr_domain)
+			break;
+
+		error = pm_runtime_clk_init(dev);
+		if (error)
+			break;
+
+		dev->pwr_domain = clknb->pwr_domain;
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				pm_runtime_clk_add(dev, con_id);
+		} else {
+			pm_runtime_clk_add(dev, NULL);
+		}
+
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->pwr_domain != clknb->pwr_domain)
+			break;
+
+		dev->pwr_domain = NULL;
+		pm_runtime_clk_destroy(dev);
+		break;
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+/**
+ * enable_clock - Enable a device clock.
+ * @dev: Device whose clock is to be enabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void enable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_enable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
+	}
+}
+
+/**
+ * disable_clock - Disable a device clock.
+ * @dev: Device whose clock is to be disabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void disable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_disable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
+	}
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the con_ids member of that object is used to enable or disable
+ * the device's clocks, depending on @action.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				enable_clock(dev, con_id);
+		} else {
+			enable_clock(dev, NULL);
+		}
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				disable_clock(dev, con_id);
+		} else {
+			disable_clock(dev, NULL);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+/**
+ * pm_runtime_clk_add_notifier - Add bus type notifier for runtime PM clocks.
+ * @bus: Bus type to add the notifier to.
+ * @clknb: Notifier to be added to the given bus type.
+ *
+ * The nb member of @clknb is not expected to be initialized and its
+ * notifier_call member will be replaced with pm_runtime_clk_notify().  However,
+ * the remaining members of @clknb should be populated prior to calling this
+ * routine.
+ */
+void pm_runtime_clk_add_notifier(struct bus_type *bus,
+				 struct pm_clk_notifier_block *clknb)
+{
+	if (!bus || !clknb)
+		return;
+
+	clknb->nb.notifier_call = pm_runtime_clk_notify;
+	bus_register_notifier(bus, &clknb->nb);
+}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 8de9aa6e7def..878cf84baeb1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -245,4 +245,46 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
 	__pm_runtime_use_autosuspend(dev, false);
 }
 
+struct pm_clk_notifier_block {
+	struct notifier_block nb;
+	struct dev_power_domain *pwr_domain;
+	char *con_ids[];
+};
+
+#ifdef CONFIG_PM_RUNTIME_CLK
+extern int pm_runtime_clk_init(struct device *dev);
+extern void pm_runtime_clk_destroy(struct device *dev);
+extern int pm_runtime_clk_add(struct device *dev, const char *con_id);
+extern void pm_runtime_clk_remove(struct device *dev, const char *con_id);
+extern int pm_runtime_clk_suspend(struct device *dev);
+extern int pm_runtime_clk_resume(struct device *dev);
+#else
+static inline int pm_runtime_clk_init(struct device *dev)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_destroy(struct device *dev)
+{
+}
+static inline int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+}
+#define pm_runtime_clock_suspend	NULL
+#define pm_runtime_clock_resume		NULL
+#endif
+
+#ifdef CONFIG_HAVE_CLK
+extern void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb);
+#else
+static inline void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb)
+{
+}
+#endif
+
 #endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6de9a8fc3417..d74ad4a90695 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -229,3 +229,7 @@ config PM_OPP
 	  representing individual voltage domains and provides SOC
 	  implementations a ready to use framework to manage OPPs.
 	  For more information, read <file:Documentation/power/opp.txt>
+
+config PM_RUNTIME_CLK
+	def_bool y
+	depends on PM_RUNTIME && HAVE_CLK
-- 
cgit v1.2.3


From 6498d9db6d2dad4cf5deb2dd09e0816904f41ca5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 28 Apr 2011 10:45:24 -0400
Subject: USB: documentation update for the pre_reset method

This patch (as1459) updates the documentation for the pre_reset method
in struct usb_driver.  When a driver is notified of an impending
reset, it must cancel all outstanding I/O and not start any new I/O
until it has been notified that the reset is complete.

As far as I know, most existing drivers that implement pre_reset do
this now.  The major exceptions appear to be the SpeedTouch and
CDC-WDM drivers.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/callbacks.txt | 8 +++++---
 include/linux/usb.h             | 6 ++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
index bfb36b34b79e..9e85846bdb98 100644
--- a/Documentation/usb/callbacks.txt
+++ b/Documentation/usb/callbacks.txt
@@ -95,9 +95,11 @@ pre_reset
 
 int (*pre_reset)(struct usb_interface *intf);
 
-Another driver or user space is triggering a reset on the device which
-contains the interface passed as an argument. Cease IO and save any
-device state you need to restore.
+A driver or user space is triggering a reset on the device which
+contains the interface passed as an argument. Cease IO, wait for all
+outstanding URBs to complete, and save any device state you need to
+restore.  No more URBs may be submitted until the post_reset method
+is called.
 
 If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
 are in atomic context.
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 65f78ca5d88e..73c7df489607 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -806,8 +806,10 @@ struct usbdrv_wrap {
  * @resume: Called when the device is being resumed by the system.
  * @reset_resume: Called when the suspended device has been reset instead
  *	of being resumed.
- * @pre_reset: Called by usb_reset_device() when the device
- *	is about to be reset.
+ * @pre_reset: Called by usb_reset_device() when the device is about to be
+ *	reset.  This routine must not return until the driver has no active
+ *	URBs for the device, and no more URBs may be submitted until the
+ *	post_reset method is called.
  * @post_reset: Called by usb_reset_device() after the device
  *	has been reset
  * @id_table: USB drivers use ID table to support hotplugging.
-- 
cgit v1.2.3


From af32fe511374f17feb137d7fbfe2f4c73a8f531c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 21 Apr 2011 14:10:16 +0900
Subject: usb: renesas_usbhs: remove callback when module removed.

The callback function which is called from platform must be removed
if module removed.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/renesas_usbhs/common.c | 4 ++++
 include/linux/usb/renesas_usbhs.h  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index d9ad60d1c156..fda586d1f7d8 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -352,9 +352,13 @@ probe_end_kfree:
 static int __devexit usbhs_remove(struct platform_device *pdev)
 {
 	struct usbhs_priv *priv = usbhsc_pdev_to_priv(pdev);
+	struct renesas_usbhs_platform_info *info = pdev->dev.platform_data;
+	struct renesas_usbhs_driver_callback *dfunc = &info->driver_callback;
 
 	dev_dbg(&pdev->dev, "usb remove\n");
 
+	dfunc->notify_hotplug = NULL;
+
 	pm_runtime_disable(&pdev->dev);
 
 	usbhsc_bus_ctrl(priv, 0);
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 565bca3aa440..66bbdd12d153 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -143,7 +143,7 @@ struct renesas_usbhs_platform_info {
 	({								\
 		struct renesas_usbhs_driver_callback *dc;		\
 		dc = &(renesas_usbhs_get_info(pdev)->driver_callback);	\
-		if (dc)							\
+		if (dc && dc->notify_hotplug)				\
 			dc->notify_hotplug(pdev);			\
 	})
 #endif /* RENESAS_USB_H */
-- 
cgit v1.2.3


From bc57381e634782009b1cb2e86b18013699ada576 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 28 Apr 2011 16:41:14 +0900
Subject: usb: renesas_usbhs: use delayed_work instead of work_struct

This delay is used to overjump debounce.

And, this patch also move usbhsc_drvcllbck_notify_hotplug to global,
because it will be called from other files.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/renesas_usbhs/common.c | 9 +++++----
 drivers/usb/renesas_usbhs/common.h | 3 ++-
 include/linux/usb/renesas_usbhs.h  | 7 +++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index db13cef9effe..9a75a45687bb 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -178,7 +178,7 @@ static void usbhsc_notify_hotplug(struct work_struct *work)
 {
 	struct usbhs_priv *priv = container_of(work,
 					       struct usbhs_priv,
-					       notify_hotplug_work);
+					       notify_hotplug_work.work);
 	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
 	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
 	int id;
@@ -224,16 +224,17 @@ static void usbhsc_notify_hotplug(struct work_struct *work)
 	}
 }
 
-static int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
+int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev)
 {
 	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+	int delay = usbhs_get_dparam(priv, detection_delay);
 
 	/*
 	 * This functions will be called in interrupt.
 	 * To make sure safety context,
 	 * use workqueue for usbhs_notify_hotplug
 	 */
-	schedule_work(&priv->notify_hotplug_work);
+	schedule_delayed_work(&priv->notify_hotplug_work, delay);
 	return 0;
 }
 
@@ -300,7 +301,7 @@ static int __devinit usbhs_probe(struct platform_device *pdev)
 	 */
 	priv->irq	= irq;
 	priv->pdev	= pdev;
-	INIT_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
+	INIT_DELAYED_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug);
 	spin_lock_init(usbhs_priv_to_lock(priv));
 
 	/* call pipe and module init */
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index f3b907d26082..0157eb805cf6 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -177,7 +177,7 @@ struct usbhs_priv {
 	struct renesas_usbhs_platform_callback	*pfunc;
 	struct renesas_usbhs_driver_param	*dparam;
 
-	struct work_struct notify_hotplug_work;
+	struct delayed_work notify_hotplug_work;
 	struct platform_device *pdev;
 
 	spinlock_t		lock;
@@ -200,6 +200,7 @@ u16 usbhs_read(struct usbhs_priv *priv, u32 reg);
 void usbhs_write(struct usbhs_priv *priv, u32 reg, u16 data);
 void usbhs_bset(struct usbhs_priv *priv, u32 reg, u16 mask, u16 data);
 
+int usbhsc_drvcllbck_notify_hotplug(struct platform_device *pdev);
 /*
  * sysconfig
  */
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 66bbdd12d153..3a7f1d982dd6 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -103,6 +103,13 @@ struct renesas_usbhs_driver_param {
 	 * for BUSWAIT :: BWAIT
 	 * */
 	int buswait_bwait;
+
+	/*
+	 * option:
+	 *
+	 * delay time from notify_hotplug callback
+	 */
+	int detection_delay;
 };
 
 /*
-- 
cgit v1.2.3


From a62573dc353b255dbbc8520bfdcb1c8a8c1ada87 Mon Sep 17 00:00:00 2001
From: Mi Jinlong <mijinlong@cn.fujitsu.com>
Date: Wed, 23 Mar 2011 17:57:07 +0800
Subject: nfsd41: add flag checking for create_session

Teach the NFS server to reject invalid create_session flags.

Also do some minor formatting adjustments.

Signed-off-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c  | 3 +++
 include/linux/nfs4.h | 8 +++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fbde6f79922e..6dbaa379c863 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1515,6 +1515,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	bool confirm_me = false;
 	int status = 0;
 
+	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
+		return nfserr_inval;
+
 	nfs4_lock_state();
 	unconf = find_unconfirmed_client(&cr_ses->clientid);
 	conf = find_confirmed_client(&cr_ses->clientid);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index b528f6d4b860..8937727e7039 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -570,9 +570,11 @@ struct nfs4_sessionid {
 };
 
 /* Create Session Flags */
-#define SESSION4_PERSIST	 0x001
-#define SESSION4_BACK_CHAN 	 0x002
-#define SESSION4_RDMA		 0x004
+#define SESSION4_PERSIST	0x001
+#define SESSION4_BACK_CHAN	0x002
+#define SESSION4_RDMA		0x004
+
+#define SESSION4_FLAG_MASK_A	0x007
 
 enum state_protect_how4 {
 	SP4_NONE	= 0,
-- 
cgit v1.2.3


From 45a4a2372b364107cabea79f255b333236626416 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 21 Apr 2011 23:16:46 -0400
Subject: ftrace: Remove FTRACE_FL_FAILED flag

Since we disable all function tracer processing if we detect
that a modification of a instruction had failed, we do not need
to track that the record has failed. No more ftrace processing
is allowed, and the FTRACE_FL_FAILED flag is pointless.

Removing this flag simplifies some of the code, but some ftrace_disabled
checks needed to be added or move around a little.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  9 +++---
 kernel/trace/ftrace.c  | 76 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 51 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ca29e03c1fac..2a195ffd4269 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -147,11 +147,10 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_FAILED	= (1 << 1),
-	FTRACE_FL_FILTER	= (1 << 2),
-	FTRACE_FL_ENABLED	= (1 << 3),
-	FTRACE_FL_NOTRACE	= (1 << 4),
-	FTRACE_FL_CONVERTED	= (1 << 5),
+	FTRACE_FL_FILTER	= (1 << 1),
+	FTRACE_FL_ENABLED	= (1 << 2),
+	FTRACE_FL_NOTRACE	= (1 << 3),
+	FTRACE_FL_CONVERTED	= (1 << 4),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 97b30f818642..eb19fae2c54a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1083,19 +1083,20 @@ static void ftrace_replace_code(int enable)
 	struct ftrace_page *pg;
 	int failed;
 
+	if (unlikely(ftrace_disabled))
+		return;
+
 	do_for_each_ftrace_rec(pg, rec) {
 		/*
 		 * Skip over free records, records that have
 		 * failed and not converted.
 		 */
 		if (rec->flags & FTRACE_FL_FREE ||
-		    rec->flags & FTRACE_FL_FAILED ||
 		    !(rec->flags & FTRACE_FL_CONVERTED))
 			continue;
 
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
-			rec->flags |= FTRACE_FL_FAILED;
 			ftrace_bug(failed, rec->ip);
 			/* Stop processing */
 			return;
@@ -1111,10 +1112,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 
 	ip = rec->ip;
 
+	if (unlikely(ftrace_disabled))
+		return 0;
+
 	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
 	if (ret) {
 		ftrace_bug(ret, ip);
-		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
 	return 1;
@@ -1466,6 +1469,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	struct ftrace_iterator *iter = m->private;
 	struct dyn_ftrace *rec = NULL;
 
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	if (iter->flags & FTRACE_ITER_HASH)
 		return t_hash_next(m, pos);
 
@@ -1518,6 +1524,10 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	loff_t l;
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	/*
 	 * If an lseek was done, then reset and start from beginning.
 	 */
@@ -1636,8 +1646,6 @@ static void ftrace_filter_reset(int enable)
 	if (enable)
 		ftrace_filtered = 0;
 	do_for_each_ftrace_rec(pg, rec) {
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
 		rec->flags &= ~type;
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
@@ -1767,9 +1775,6 @@ static int ftrace_match_records(char *buff, int len, int enable)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
-
 		if (ftrace_match_record(rec, search, search_len, type)) {
 			if (not)
 				rec->flags &= ~flag;
@@ -1837,10 +1842,11 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 	}
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
+	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_module_record(rec, mod,
 					       search, search_len, type)) {
@@ -1854,6 +1860,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 			ftrace_filtered = 1;
 
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 
 	return found;
@@ -2008,10 +2015,11 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		return -EINVAL;
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
+	do_for_each_ftrace_rec(pg, rec) {
 
 		if (!ftrace_match_record(rec, search, len, type))
 			continue;
@@ -2218,6 +2226,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 
 	mutex_lock(&ftrace_regex_lock);
 
+	ret = -ENODEV;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 		iter = m->private;
@@ -2545,9 +2557,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	bool exists;
 	int i;
 
-	if (ftrace_disabled)
-		return -ENODEV;
-
 	/* decode regex */
 	type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
 	if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
@@ -2556,9 +2565,15 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	search_len = strlen(search);
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled)) {
+		mutex_unlock(&ftrace_lock);
+		return -ENODEV;
+	}
+
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
 		if (ftrace_match_record(rec, search, search_len, type)) {
@@ -2700,10 +2715,11 @@ void ftrace_release_mod(struct module *mod)
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
+	mutex_lock(&ftrace_lock);
+
 	if (ftrace_disabled)
-		return;
+		goto out_unlock;
 
-	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if (within_module_core(rec->ip, mod)) {
 			/*
@@ -2714,6 +2730,7 @@ void ftrace_release_mod(struct module *mod)
 			ftrace_free_rec(rec);
 		}
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 }
 
@@ -3108,16 +3125,17 @@ void ftrace_kill(void)
  */
 int register_ftrace_function(struct ftrace_ops *ops)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -1;
+	int ret = -1;
 
 	mutex_lock(&ftrace_lock);
 
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	ret = __register_ftrace_function(ops);
 	ftrace_startup(0);
 
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 	return ret;
 }
@@ -3145,14 +3163,14 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -ENODEV;
+	int ret = -ENODEV;
 
 	mutex_lock(&ftrace_lock);
 
-	ret  = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (unlikely(ftrace_disabled))
+		goto out;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
-- 
cgit v1.2.3


From d2c8c3eafbf715306ec891e7ca52d3d999acbe31 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 25 Apr 2011 14:32:42 -0400
Subject: ftrace: Remove FTRACE_FL_CONVERTED flag

Since we disable all function tracer processing if we detect
that a modification of a instruction had failed, we do not need
to track that the record has failed. No more ftrace processing
is allowed, and the FTRACE_FL_CONVERTED flag is pointless.

The FTRACE_FL_CONVERTED flag was used to denote records that were
successfully converted from mcount calls into nops. But if a single
record fails, all of ftrace is disabled.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  1 -
 kernel/trace/ftrace.c  | 12 ++++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2a195ffd4269..32047449b309 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -150,7 +150,6 @@ enum {
 	FTRACE_FL_FILTER	= (1 << 1),
 	FTRACE_FL_ENABLED	= (1 << 2),
 	FTRACE_FL_NOTRACE	= (1 << 3),
-	FTRACE_FL_CONVERTED	= (1 << 4),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eb19fae2c54a..9abaaf46f212 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1087,12 +1087,8 @@ static void ftrace_replace_code(int enable)
 		return;
 
 	do_for_each_ftrace_rec(pg, rec) {
-		/*
-		 * Skip over free records, records that have
-		 * failed and not converted.
-		 */
-		if (rec->flags & FTRACE_FL_FREE ||
-		    !(rec->flags & FTRACE_FL_CONVERTED))
+		/* Skip over free records */
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
 		failed = __ftrace_replace_code(rec, enable);
@@ -1280,10 +1276,10 @@ static int ftrace_update_code(struct module *mod)
 		 */
 		if (!ftrace_code_disable(mod, p)) {
 			ftrace_free_rec(p);
-			continue;
+			/* Game over */
+			break;
 		}
 
-		p->flags |= FTRACE_FL_CONVERTED;
 		ftrace_update_cnt++;
 
 		/*
-- 
cgit v1.2.3


From a6e5e2be44616c8400f9ec2f635b10f8e579217c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 1 May 2011 18:18:49 +0200
Subject: i2c-i801: Move device ID definitions to driver

Move the SMBus device ID definitions of recent devices from pci_ids.h
to the i2c-i801.c driver file. They don't have to be shared, as they
are clearly identified and only used in this driver. In the future,
such IDs will go to i2c-i801 directly. This will make adding support
for new devices much faster and easier, as it will avoid cross-
subsystem patch sets and merge conflicts.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Seth Heasley <seth.heasley@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/i2c/busses/i2c-i801.c | 5 +++++
 include/linux/pci_ids.h       | 4 ----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 72c0415f6f94..455e909bc768 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -134,10 +134,15 @@
 				 SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \
 				 SMBHSTSTS_INTR)
 
+/* Older devices have their ID defined in <linux/pci_ids.h> */
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
+#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0	0x1d70
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1	0x1d71
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2	0x1d72
+#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 
 struct i801_priv {
 	struct i2c_adapter adapter;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4e2c9150a785..8abe8d78c4bf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2477,15 +2477,12 @@
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
 #define PCI_DEVICE_ID_INTEL_IOAT	0x1a38
-#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
-#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN	0x2310
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX	0x231f
-#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
@@ -2696,7 +2693,6 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN	0x3b00
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX	0x3b1f
-#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-- 
cgit v1.2.3


From b12a03ce4880bd13786a98db6de494a3e0123129 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 2 May 2011 16:48:57 +0200
Subject: hrtimers: Prepare for cancel on clock was set timers

Make clock_was_set() unconditional and rename hres_timers_resume to
hrtimers_resume. This is a preparatory patch for hrtimers which are
cancelled when clock realtime was set.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h   |  16 ++----
 kernel/hrtimer.c          | 125 ++++++++++++++++++++++------------------------
 kernel/time/timekeeping.c |   2 +-
 3 files changed, 65 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 62f500c724f9..4135c88fe4fa 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -148,9 +148,7 @@ struct hrtimer_clock_base {
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
 	ktime_t			softirq_time;
-#ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t			offset;
-#endif
 };
 
 enum  hrtimer_base_type {
@@ -256,8 +254,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
 struct clock_event_device;
 
-extern void clock_was_set(void);
-extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
 /*
@@ -291,16 +287,8 @@ extern void hrtimer_peek_ahead_timers(void);
 # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
 # define KTIME_MONOTONIC_RES	KTIME_LOW_RES
 
-/*
- * clock_was_set() is a NOP for non- high-resolution systems. The
- * time-sorted order guarantees that a timer does not expire early and
- * is expired in the next softirq when the clock was advanced.
- */
-static inline void clock_was_set(void) { }
 static inline void hrtimer_peek_ahead_timers(void) { }
 
-static inline void hres_timers_resume(void) { }
-
 /*
  * In non high resolution mode the time reference is taken from
  * the base softirq time variable.
@@ -316,11 +304,13 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 }
 #endif
 
+extern void clock_was_set(void);
+extern void hrtimers_resume(void);
+
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 
-
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index dbbbf7d43080..c145ed643bca 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -621,66 +621,6 @@ static int hrtimer_reprogram(struct hrtimer *timer,
 	return res;
 }
 
-
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
-	struct hrtimer_cpu_base *base;
-	struct timespec realtime_offset, wtm, sleep;
-
-	if (!hrtimer_hres_active())
-		return;
-
-	get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
-							&sleep);
-	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
-	base = &__get_cpu_var(hrtimer_bases);
-
-	/* Adjust CLOCK_REALTIME offset */
-	raw_spin_lock(&base->lock);
-	base->clock_base[HRTIMER_BASE_REALTIME].offset =
-		timespec_to_ktime(realtime_offset);
-	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
-		timespec_to_ktime(sleep);
-
-	hrtimer_force_reprogram(base, 0);
-	raw_spin_unlock(&base->lock);
-}
-
-/*
- * Clock realtime was set
- *
- * Change the offset of the realtime clock vs. the monotonic
- * clock.
- *
- * We might have to reprogram the high resolution timer interrupt. On
- * SMP we call the architecture specific code to retrigger _all_ high
- * resolution timer interrupts. On UP we just disable interrupts and
- * call the high resolution interrupt code.
- */
-void clock_was_set(void)
-{
-	/* Retrigger the CPU local events everywhere */
-	on_each_cpu(retrigger_next_event, NULL, 1);
-}
-
-/*
- * During resume we might have to reprogram the high resolution timer
- * interrupt (on the local CPU):
- */
-void hres_timers_resume(void)
-{
-	WARN_ONCE(!irqs_disabled(),
-		  KERN_INFO "hres_timers_resume() called with IRQs enabled!");
-
-	retrigger_next_event(NULL);
-}
-
 /*
  * Initialize the high resolution related parts of cpu_base
  */
@@ -714,12 +654,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 
+static void retrigger_next_event(void *arg);
+
 /*
  * Switch to high resolution mode
  */
 static int hrtimer_switch_to_hres(void)
 {
-	int cpu = smp_processor_id();
+	int i, cpu = smp_processor_id();
 	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 	unsigned long flags;
 
@@ -735,9 +677,8 @@ static int hrtimer_switch_to_hres(void)
 		return 0;
 	}
 	base->hres_active = 1;
-	base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
+	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+		base->clock_base[i].resolution = KTIME_HIGH_RES;
 
 	tick_setup_sched_timer();
 
@@ -764,6 +705,62 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+	struct timespec realtime_offset, xtim, wtm, sleep;
+
+	if (!hrtimer_hres_active())
+		return;
+
+	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+
+	/* Adjust CLOCK_REALTIME offset */
+	raw_spin_lock(&base->lock);
+	base->clock_base[HRTIMER_BASE_REALTIME].offset =
+		timespec_to_ktime(realtime_offset);
+	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+		timespec_to_ktime(sleep);
+
+	hrtimer_force_reprogram(base, 0);
+	raw_spin_unlock(&base->lock);
+}
+
+/*
+ * Clock realtime was set
+ *
+ * Change the offset of the realtime clock vs. the monotonic
+ * clock.
+ *
+ * We might have to reprogram the high resolution timer interrupt. On
+ * SMP we call the architecture specific code to retrigger _all_ high
+ * resolution timer interrupts. On UP we just disable interrupts and
+ * call the high resolution interrupt code.
+ */
+void clock_was_set(void)
+{
+	/* Retrigger the CPU local events everywhere */
+	on_each_cpu(retrigger_next_event, NULL, 1);
+}
+
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt (on the local CPU):
+ */
+void hrtimers_resume(void)
+{
+	WARN_ONCE(!irqs_disabled(),
+		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
+
+	retrigger_next_event(NULL);
+}
+
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
 {
 #ifdef CONFIG_TIMER_STATS
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e6a05a5915a..a61b8fa2d39a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -680,7 +680,7 @@ static void timekeeping_resume(void)
 	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
 
 	/* Resume hrtimers */
-	hres_timers_resume();
+	hrtimers_resume();
 }
 
 static int timekeeping_suspend(void)
-- 
cgit v1.2.3


From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 27 Apr 2011 14:16:42 +0200
Subject: timerfd: Allow timers to be cancelled when clock was set

Some applications must be aware of clock realtime being set
backward. A simple example is a clock applet which arms a timer for
the next minute display. If clock realtime is set backward then the
applet displays a stale time for the amount of time which the clock
was set backwards. Due to that applications poll the time because we
don't have an interface.

Extend the timerfd interface by adding a flag which puts the timer
onto a different internal realtime clock. All timers on this clock are
expired whenever the clock was set.

The timerfd core records the monotonic offset when the timer is
created. When the timer is armed, then the current offset is compared
to the previous recorded offset. When it has changed, then
timerfd_settime returns -ECANCELED. When a timer is read the offset is
compared and if it changed -ECANCELED returned to user space. Periodic
timers are not rearmed in the cancelation case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Chris Friesen <chris.friesen@genband.com>
Tested-by: Kay Sievers <kay.sievers@vrfy.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davide Libenzi <davidel@xmailserver.org>
Reviewed-by: Alexander Shishkin <virtuoso@slind.org>
Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c              | 57 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/hrtimer.h   |  2 ++
 include/linux/time.h      |  6 +++++
 include/linux/timerfd.h   |  3 ++-
 kernel/hrtimer.c          | 36 +++++++++++++++++++++++++++++-
 kernel/time/timekeeping.c | 15 +++++++++++++
 6 files changed, 111 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..7e14c9e7c4ee 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -26,10 +26,12 @@
 struct timerfd_ctx {
 	struct hrtimer tmr;
 	ktime_t tintv;
+	ktime_t moffs;
 	wait_queue_head_t wqh;
 	u64 ticks;
 	int expired;
 	int clockid;
+	bool might_cancel;
 };
 
 /*
@@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
-static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
-			  const struct itimerspec *ktmr)
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+	ktime_t moffs;
+
+	if (!ctx->might_cancel)
+		return false;
+
+	moffs = ktime_get_monotonic_offset();
+
+	if (moffs.tv64 == ctx->moffs.tv64)
+		return false;
+
+	ctx->moffs = moffs;
+	return true;
+}
+
+static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
+			 const struct itimerspec *ktmr)
 {
 	enum hrtimer_mode htmode;
 	ktime_t texp;
+	int clockid = ctx->clockid;
 
 	htmode = (flags & TFD_TIMER_ABSTIME) ?
 		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 
+	ctx->might_cancel = false;
+	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
+	    (flags & TFD_TIMER_CANCELON_SET)) {
+		clockid = CLOCK_REALTIME_COS;
+		ctx->might_cancel = true;
+	}
+
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
 	ctx->ticks = 0;
 	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
+	hrtimer_init(&ctx->tmr, clockid, htmode);
 	hrtimer_set_expires(&ctx->tmr, texp);
 	ctx->tmr.function = timerfd_tmrproc;
-	if (texp.tv64 != 0)
+	if (texp.tv64 != 0) {
 		hrtimer_start(&ctx->tmr, texp, htmode);
+		if (timerfd_canceled(ctx))
+			return -ECANCELED;
+	}
+	return 0;
 }
 
 static int timerfd_release(struct inode *inode, struct file *file)
@@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 		res = -EAGAIN;
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
+
+		/*
+		 * If clock has changed, we do not care about the
+		 * ticks and we do not rearm the timer. Userspace must
+		 * reevaluate anyway.
+		 */
+		if (timerfd_canceled(ctx)) {
+			ticks = 0;
+			ctx->expired = 0;
+			res = -ECANCELED;
+		}
+
 		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	init_waitqueue_head(&ctx->wqh);
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+	ctx->moffs = ktime_get_monotonic_offset();
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	struct file *file;
 	struct timerfd_ctx *ctx;
 	struct itimerspec ktmr, kotmr;
+	int ret;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
@@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	/*
 	 * Re-program the timer to the new value ...
 	 */
-	timerfd_setup(ctx, flags, &ktmr);
+	ret = timerfd_setup(ctx, flags, &ktmr);
 
 	spin_unlock_irq(&ctx->wqh.lock);
 	fput(file);
 	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
 		return -EFAULT;
 
-	return 0;
+	return ret;
 }
 
 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4135c88fe4fa..eda4ccde0730 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,6 +155,7 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_BOOTTIME,
+	HRTIMER_BASE_REALTIME_COS,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -310,6 +311,7 @@ extern void hrtimers_resume(void);
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
+extern ktime_t ktime_get_monotonic_offset(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/include/linux/time.h b/include/linux/time.h
index b3061782dec3..a9242773eb24 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,6 +302,12 @@ struct itimerval {
  * The IDs of various hardware clocks:
  */
 #define CLOCK_SGI_CYCLE			10
+
+#ifdef __KERNEL__
+/* This clock is not exposed to user space */
+#define CLOCK_REALTIME_COS		15
+#endif
+
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 2d0792983f8c..e9571fc8f1a0 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,6 +19,7 @@
  * shared O_* flags.
  */
 #define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_TIMER_CANCELON_SET (1 << 1)
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
@@ -26,6 +27,6 @@
 /* Flags for timerfd_create.  */
 #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
 /* Flags for timerfd_settime.  */
-#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
 
 #endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c145ed643bca..eabcbd781433 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = CLOCK_REALTIME_COS,
+			.get_time = &ktime_get_real,
+			.resolution = KTIME_LOW_RES,
+		},
 	}
 };
 
@@ -85,6 +90,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_REALTIME_COS]	= HRTIMER_BASE_REALTIME_COS,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -110,6 +116,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
 }
 
 /*
@@ -479,6 +486,8 @@ static inline void debug_deactivate(struct hrtimer *timer)
 	trace_hrtimer_cancel(timer);
 }
 
+static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -715,9 +724,14 @@ static void retrigger_next_event(void *arg)
 	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
 	struct timespec realtime_offset, xtim, wtm, sleep;
 
-	if (!hrtimer_hres_active())
+	if (!hrtimer_hres_active()) {
+		raw_spin_lock(&base->lock);
+		hrtimer_expire_cancelable(base);
+		raw_spin_unlock(&base->lock);
 		return;
+	}
 
+	/* Optimized out for !HIGH_RES */
 	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
 	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
 
@@ -727,6 +741,10 @@ static void retrigger_next_event(void *arg)
 		timespec_to_ktime(realtime_offset);
 	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
 		timespec_to_ktime(sleep);
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
+		timespec_to_ktime(realtime_offset);
+
+	hrtimer_expire_cancelable(base);
 
 	hrtimer_force_reprogram(base, 0);
 	raw_spin_unlock(&base->lock);
@@ -1222,6 +1240,22 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
 
+static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
+{
+	struct timerqueue_node *node;
+	struct hrtimer_clock_base *base;
+	ktime_t now = ktime_get_real();
+
+	base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
+
+	while ((node = timerqueue_getnext(&base->active))) {
+			struct hrtimer *timer;
+
+			timer = container_of(node, struct hrtimer, node);
+			__run_hrtimer(timer, &now);
+	}
+}
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a61b8fa2d39a..342408cf68dd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1098,6 +1098,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 	} while (read_seqretry(&xtime_lock, seq));
 }
 
+/**
+ * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
+ */
+ktime_t ktime_get_monotonic_offset(void)
+{
+	unsigned long seq;
+	struct timespec wtom;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		wtom = wall_to_monotonic;
+	} while (read_seqretry(&xtime_lock, seq));
+	return timespec_to_ktime(wtom);
+}
+
 /**
  * xtime_update() - advances the timekeeping infrastructure
  * @ticks:	number of ticks, that have elapsed since the last call.
-- 
cgit v1.2.3


From 3dacdf11f1f82b98d301d5e1d42cdaea9a39968a Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Fri, 15 Apr 2011 16:18:38 +0200
Subject: usb: factor out state_string() on otg drivers

Provide common otg_state_string() and use
it in drivers.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/isp1301_omap.c | 26 +++-----------------------
 drivers/usb/otg/langwell_otg.c | 40 +++-------------------------------------
 drivers/usb/otg/otg.c          | 35 +++++++++++++++++++++++++++++++++++
 include/linux/usb/otg.h        |  1 +
 4 files changed, 42 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index e25700f44b6f..8c282258e1bd 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -234,29 +234,9 @@ isp1301_clear_bits(struct isp1301 *isp, u8 reg, u8 bits)
 
 /*-------------------------------------------------------------------------*/
 
-static const char *state_string(enum usb_otg_state state)
-{
-	switch (state) {
-	case OTG_STATE_A_IDLE:		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:	return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:	return "a_wait_bcon";
-	case OTG_STATE_A_HOST:		return "a_host";
-	case OTG_STATE_A_SUSPEND:	return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:	return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:	return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:	return "a_vbus_err";
-	case OTG_STATE_B_IDLE:		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:	return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:	return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:	return "b_wait_acon";
-	case OTG_STATE_B_HOST:		return "b_host";
-	default:			return "UNDEFINED";
-	}
-}
-
 static inline const char *state_name(struct isp1301 *isp)
 {
-	return state_string(isp->otg.state);
+	return otg_state_string(isp->otg.state);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -501,7 +481,7 @@ static void check_state(struct isp1301 *isp, const char *tag)
 	if (isp->otg.state == state && !extra)
 		return;
 	pr_debug("otg: %s FSM %s/%02x, %s, %06x\n", tag,
-		state_string(state), fsm, state_name(isp),
+		otg_state_string(state), fsm, state_name(isp),
 		omap_readl(OTG_CTRL));
 }
 
@@ -1095,7 +1075,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 
 	if (state != isp->otg.state)
 		pr_debug("  isp, %s -> %s\n",
-				state_string(state), state_name(isp));
+				otg_state_string(state), state_name(isp));
 
 #ifdef	CONFIG_USB_OTG
 	/* update the OTG controller state to match the isp1301; may
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
index e973ff19c55a..f08f784086f7 100644
--- a/drivers/usb/otg/langwell_otg.c
+++ b/drivers/usb/otg/langwell_otg.c
@@ -82,40 +82,6 @@ static struct pci_driver otg_pci_driver = {
 	.resume =	langwell_otg_resume,
 };
 
-static const char *state_string(enum usb_otg_state state)
-{
-	switch (state) {
-	case OTG_STATE_A_IDLE:
-		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:
-		return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:
-		return "a_wait_bcon";
-	case OTG_STATE_A_HOST:
-		return "a_host";
-	case OTG_STATE_A_SUSPEND:
-		return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:
-		return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:
-		return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:
-		return "a_vbus_err";
-	case OTG_STATE_B_IDLE:
-		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:
-		return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:
-		return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:
-		return "b_wait_acon";
-	case OTG_STATE_B_HOST:
-		return "b_host";
-	default:
-		return "UNDEFINED";
-	}
-}
-
 /* HSM timers */
 static inline struct langwell_otg_timer *otg_timer_initializer
 (void (*function)(unsigned long), unsigned long expires, unsigned long data)
@@ -968,7 +934,7 @@ static void langwell_otg_work(struct work_struct *work)
 	pdev = to_pci_dev(lnw->dev);
 
 	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
-			state_string(iotg->otg.state));
+			otg_state_string(iotg->otg.state));
 
 	switch (iotg->otg.state) {
 	case OTG_STATE_UNDEFINED:
@@ -1703,7 +1669,7 @@ static void langwell_otg_work(struct work_struct *work)
 	}
 
 	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
-			state_string(iotg->otg.state));
+			otg_state_string(iotg->otg.state));
 }
 
 static ssize_t
@@ -1789,7 +1755,7 @@ show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
 		"b_bus_req = \t%d\n"
 		"b_bus_suspend_tmout = \t%d\n"
 		"b_bus_suspend_vld = \t%d\n",
-		state_string(iotg->otg.state),
+		otg_state_string(iotg->otg.state),
 		iotg->hsm.a_bus_resume,
 		iotg->hsm.a_bus_suspend,
 		iotg->hsm.a_conn,
diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c
index 0a43a7db750f..fb7adeff9ffa 100644
--- a/drivers/usb/otg/otg.c
+++ b/drivers/usb/otg/otg.c
@@ -64,3 +64,38 @@ int otg_set_transceiver(struct otg_transceiver *x)
 	return 0;
 }
 EXPORT_SYMBOL(otg_set_transceiver);
+
+const char *otg_state_string(enum usb_otg_state state)
+{
+	switch (state) {
+	case OTG_STATE_A_IDLE:
+		return "a_idle";
+	case OTG_STATE_A_WAIT_VRISE:
+		return "a_wait_vrise";
+	case OTG_STATE_A_WAIT_BCON:
+		return "a_wait_bcon";
+	case OTG_STATE_A_HOST:
+		return "a_host";
+	case OTG_STATE_A_SUSPEND:
+		return "a_suspend";
+	case OTG_STATE_A_PERIPHERAL:
+		return "a_peripheral";
+	case OTG_STATE_A_WAIT_VFALL:
+		return "a_wait_vfall";
+	case OTG_STATE_A_VBUS_ERR:
+		return "a_vbus_err";
+	case OTG_STATE_B_IDLE:
+		return "b_idle";
+	case OTG_STATE_B_SRP_INIT:
+		return "b_srp_init";
+	case OTG_STATE_B_PERIPHERAL:
+		return "b_peripheral";
+	case OTG_STATE_B_WAIT_ACON:
+		return "b_wait_acon";
+	case OTG_STATE_B_HOST:
+		return "b_host";
+	default:
+		return "UNDEFINED";
+	}
+}
+EXPORT_SYMBOL(otg_state_string);
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 6e40718f5abe..bc84858b3a4d 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -246,5 +246,6 @@ otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
+extern const char *otg_state_string(enum usb_otg_state state);
 
 #endif /* __LINUX_USB_OTG_H */
-- 
cgit v1.2.3


From 64b3c304bed25388fed48dbdc098dfcad7063d9c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 11 Apr 2011 20:19:12 +0200
Subject: usb/ch9: use proper endianess for wBytesPerInterval

while going through Tatyana's changes for the gadget framework I noticed
that this type is not defined as __le16.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/config.c   | 2 +-
 drivers/usb/host/xhci-mem.c | 2 +-
 include/linux/usb/ch9.h     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 83126b03e7cf..c962608b4b9a 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -129,7 +129,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1);
 	else
 		max_tx = 999999;
-	if (desc->wBytesPerInterval > max_tx) {
+	if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) {
 		dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to %d\n",
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 500ec7a9eb8a..a4fc4d929385 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1130,7 +1130,7 @@ static u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci,
 		return 0;
 
 	if (udev->speed == USB_SPEED_SUPER)
-		return ep->ss_ep_comp.wBytesPerInterval;
+		return le16_to_cpu(ep->ss_ep_comp.wBytesPerInterval);
 
 	max_packet = GET_MAX_PACKET(le16_to_cpu(ep->desc.wMaxPacketSize));
 	max_burst = (le16_to_cpu(ep->desc.wMaxPacketSize) & 0x1800) >> 11;
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index b72f305ce6bd..0fd3fbdd8283 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -579,7 +579,7 @@ struct usb_ss_ep_comp_descriptor {
 
 	__u8  bMaxBurst;
 	__u8  bmAttributes;
-	__u16 wBytesPerInterval;
+	__le16 wBytesPerInterval;
 } __attribute__ ((packed));
 
 #define USB_DT_SS_EP_COMP_SIZE		6
-- 
cgit v1.2.3


From 13b7ee2a953f07d994b6bc3439cdd4a718de6f80 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Mon, 18 Apr 2011 22:01:55 +0200
Subject: USB: ehci-fsl: add MPC5121E specific suspend and resume

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-fsl.c | 153 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/ehci-fsl.h |   4 ++
 include/linux/fsl_devices.h |  15 +++++
 3 files changed, 172 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 5c761df7fa83..caf3d4ac42bd 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -328,6 +328,149 @@ struct ehci_fsl {
 
 #ifdef CONFIG_PM
 
+#ifdef CONFIG_PPC_MPC512x
+static int ehci_fsl_mpc512x_drv_suspend(struct device *dev)
+{
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct fsl_usb2_platform_data *pdata = dev->platform_data;
+	u32 tmp;
+
+#ifdef DEBUG
+	u32 mode = ehci_readl(ehci, hcd->regs + FSL_SOC_USB_USBMODE);
+	mode &= USBMODE_CM_MASK;
+	tmp = ehci_readl(ehci, hcd->regs + 0x140);	/* usbcmd */
+
+	dev_dbg(dev, "suspend=%d already_suspended=%d "
+		"mode=%d  usbcmd %08x\n", pdata->suspended,
+		pdata->already_suspended, mode, tmp);
+#endif
+
+	/*
+	 * If the controller is already suspended, then this must be a
+	 * PM suspend.  Remember this fact, so that we will leave the
+	 * controller suspended at PM resume time.
+	 */
+	if (pdata->suspended) {
+		dev_dbg(dev, "already suspended, leaving early\n");
+		pdata->already_suspended = 1;
+		return 0;
+	}
+
+	dev_dbg(dev, "suspending...\n");
+
+	hcd->state = HC_STATE_SUSPENDED;
+	dev->power.power_state = PMSG_SUSPEND;
+
+	/* ignore non-host interrupts */
+	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+
+	/* stop the controller */
+	tmp = ehci_readl(ehci, &ehci->regs->command);
+	tmp &= ~CMD_RUN;
+	ehci_writel(ehci, tmp, &ehci->regs->command);
+
+	/* save EHCI registers */
+	pdata->pm_command = ehci_readl(ehci, &ehci->regs->command);
+	pdata->pm_command &= ~CMD_RUN;
+	pdata->pm_status  = ehci_readl(ehci, &ehci->regs->status);
+	pdata->pm_intr_enable  = ehci_readl(ehci, &ehci->regs->intr_enable);
+	pdata->pm_frame_index  = ehci_readl(ehci, &ehci->regs->frame_index);
+	pdata->pm_segment  = ehci_readl(ehci, &ehci->regs->segment);
+	pdata->pm_frame_list  = ehci_readl(ehci, &ehci->regs->frame_list);
+	pdata->pm_async_next  = ehci_readl(ehci, &ehci->regs->async_next);
+	pdata->pm_configured_flag  =
+		ehci_readl(ehci, &ehci->regs->configured_flag);
+	pdata->pm_portsc = ehci_readl(ehci, &ehci->regs->port_status[0]);
+	pdata->pm_usbgenctrl = ehci_readl(ehci,
+					  hcd->regs + FSL_SOC_USB_USBGENCTRL);
+
+	/* clear the W1C bits */
+	pdata->pm_portsc &= cpu_to_hc32(ehci, ~PORT_RWC_BITS);
+
+	pdata->suspended = 1;
+
+	/* clear PP to cut power to the port */
+	tmp = ehci_readl(ehci, &ehci->regs->port_status[0]);
+	tmp &= ~PORT_POWER;
+	ehci_writel(ehci, tmp, &ehci->regs->port_status[0]);
+
+	return 0;
+}
+
+static int ehci_fsl_mpc512x_drv_resume(struct device *dev)
+{
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct fsl_usb2_platform_data *pdata = dev->platform_data;
+	u32 tmp;
+
+	dev_dbg(dev, "suspend=%d already_suspended=%d\n",
+		pdata->suspended, pdata->already_suspended);
+
+	/*
+	 * If the controller was already suspended at suspend time,
+	 * then don't resume it now.
+	 */
+	if (pdata->already_suspended) {
+		dev_dbg(dev, "already suspended, leaving early\n");
+		pdata->already_suspended = 0;
+		return 0;
+	}
+
+	if (!pdata->suspended) {
+		dev_dbg(dev, "not suspended, leaving early\n");
+		return 0;
+	}
+
+	pdata->suspended = 0;
+
+	dev_dbg(dev, "resuming...\n");
+
+	/* set host mode */
+	tmp = USBMODE_CM_HOST | (pdata->es ? USBMODE_ES : 0);
+	ehci_writel(ehci, tmp, hcd->regs + FSL_SOC_USB_USBMODE);
+
+	ehci_writel(ehci, pdata->pm_usbgenctrl,
+		    hcd->regs + FSL_SOC_USB_USBGENCTRL);
+	ehci_writel(ehci, ISIPHYCTRL_PXE | ISIPHYCTRL_PHYE,
+		    hcd->regs + FSL_SOC_USB_ISIPHYCTRL);
+
+	/* restore EHCI registers */
+	ehci_writel(ehci, pdata->pm_command, &ehci->regs->command);
+	ehci_writel(ehci, pdata->pm_intr_enable, &ehci->regs->intr_enable);
+	ehci_writel(ehci, pdata->pm_frame_index, &ehci->regs->frame_index);
+	ehci_writel(ehci, pdata->pm_segment, &ehci->regs->segment);
+	ehci_writel(ehci, pdata->pm_frame_list, &ehci->regs->frame_list);
+	ehci_writel(ehci, pdata->pm_async_next, &ehci->regs->async_next);
+	ehci_writel(ehci, pdata->pm_configured_flag,
+		    &ehci->regs->configured_flag);
+	ehci_writel(ehci, pdata->pm_portsc, &ehci->regs->port_status[0]);
+
+	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+	hcd->state = HC_STATE_RUNNING;
+	dev->power.power_state = PMSG_ON;
+
+	tmp = ehci_readl(ehci, &ehci->regs->command);
+	tmp |= CMD_RUN;
+	ehci_writel(ehci, tmp, &ehci->regs->command);
+
+	usb_hcd_resume_root_hub(hcd);
+
+	return 0;
+}
+#else
+static inline int ehci_fsl_mpc512x_drv_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static inline int ehci_fsl_mpc512x_drv_resume(struct device *dev)
+{
+	return 0;
+}
+#endif /* CONFIG_PPC_MPC512x */
+
 static struct ehci_fsl *hcd_to_ehci_fsl(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
@@ -341,6 +484,11 @@ static int ehci_fsl_drv_suspend(struct device *dev)
 	struct ehci_fsl *ehci_fsl = hcd_to_ehci_fsl(hcd);
 	void __iomem *non_ehci = hcd->regs;
 
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "fsl,mpc5121-usb2-dr")) {
+		return ehci_fsl_mpc512x_drv_suspend(dev);
+	}
+
 	ehci_prepare_ports_for_controller_suspend(hcd_to_ehci(hcd),
 			device_may_wakeup(dev));
 	if (!fsl_deep_sleep())
@@ -357,6 +505,11 @@ static int ehci_fsl_drv_resume(struct device *dev)
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	void __iomem *non_ehci = hcd->regs;
 
+	if (of_device_is_compatible(dev->parent->of_node,
+				    "fsl,mpc5121-usb2-dr")) {
+		return ehci_fsl_mpc512x_drv_resume(dev);
+	}
+
 	ehci_prepare_ports_for_controller_resume(ehci);
 	if (!fsl_deep_sleep())
 		return 0;
diff --git a/drivers/usb/host/ehci-fsl.h b/drivers/usb/host/ehci-fsl.h
index 3fabed33d940..491806221165 100644
--- a/drivers/usb/host/ehci-fsl.h
+++ b/drivers/usb/host/ehci-fsl.h
@@ -27,6 +27,10 @@
 #define	PORT_PTS_SERIAL		(3<<30)
 #define PORT_PTS_PTW		(1<<28)
 #define FSL_SOC_USB_PORTSC2	0x188
+#define FSL_SOC_USB_USBMODE	0x1a8
+#define USBMODE_CM_MASK		(3 << 0)	/* controller mode mask */
+#define USBMODE_CM_HOST		(3 << 0)	/* controller mode: host */
+#define USBMODE_ES		(1 << 2)	/* (Big) Endian Select */
 
 #define FSL_SOC_USB_USBGENCTRL	0x200
 #define USBGENCTRL_PPP		(1 << 3)
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 4eb56ed75fbc..3773c5dab8f5 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -79,6 +79,21 @@ struct fsl_usb2_platform_data {
 	unsigned	have_sysif_regs:1;
 	unsigned	invert_drvvbus:1;
 	unsigned	invert_pwr_fault:1;
+
+	unsigned	suspended:1;
+	unsigned	already_suspended:1;
+
+	/* register save area for suspend/resume */
+	u32		pm_command;
+	u32		pm_status;
+	u32		pm_intr_enable;
+	u32		pm_frame_index;
+	u32		pm_segment;
+	u32		pm_frame_list;
+	u32		pm_async_next;
+	u32		pm_configured_flag;
+	u32		pm_portsc;
+	u32		pm_usbgenctrl;
 };
 
 /* Flags in fsl_usb2_mph_platform_data */
-- 
cgit v1.2.3


From 83722bc9430424de1614ff31696f73a40b3d81a9 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Mon, 18 Apr 2011 22:02:00 +0200
Subject: USB: extend ehci-fsl and fsl_udc_core driver for OTG operation

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/fsl_udc_core.c | 147 ++++++++++++++++++++++++++++++++++----
 drivers/usb/gadget/fsl_usb2_udc.h |   2 +
 drivers/usb/host/ehci-fsl.c       |  66 +++++++++++++++++
 drivers/usb/host/ehci-hub.c       |   8 +++
 drivers/usb/host/ehci.h           |   4 ++
 include/linux/fsl_devices.h       |   1 +
 6 files changed, 213 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index 28b3a9f25f3b..999eafe89653 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -353,6 +353,19 @@ static void dr_controller_stop(struct fsl_udc *udc)
 {
 	unsigned int tmp;
 
+	pr_debug("%s\n", __func__);
+
+	/* if we're in OTG mode, and the Host is currently using the port,
+	 * stop now and don't rip the controller out from under the
+	 * ehci driver
+	 */
+	if (udc->gadget.is_otg) {
+		if (!(fsl_readl(&dr_regs->otgsc) & OTGSC_STS_USB_ID)) {
+			pr_debug("udc: Leaving early\n");
+			return;
+		}
+	}
+
 	/* disable all INTR */
 	fsl_writel(0, &dr_regs->usbintr);
 
@@ -1668,6 +1681,9 @@ static void port_change_irq(struct fsl_udc *udc)
 {
 	u32 speed;
 
+	if (udc->bus_reset)
+		udc->bus_reset = 0;
+
 	/* Bus resetting is finished */
 	if (!(fsl_readl(&dr_regs->portsc1) & PORTSCX_PORT_RESET)) {
 		/* Get the speed */
@@ -1775,6 +1791,8 @@ static void reset_irq(struct fsl_udc *udc)
 
 	if (fsl_readl(&dr_regs->portsc1) & PORTSCX_PORT_RESET) {
 		VDBG("Bus reset");
+		/* Bus is reseting */
+		udc->bus_reset = 1;
 		/* Reset all the queues, include XD, dTD, EP queue
 		 * head and TR Queue */
 		reset_queues(udc);
@@ -1852,6 +1870,7 @@ static irqreturn_t fsl_udc_irq(int irq, void *_udc)
 
 	/* Reset Received */
 	if (irq_src & USB_STS_RESET) {
+		VDBG("reset int");
 		reset_irq(udc);
 		status = IRQ_HANDLED;
 	}
@@ -1909,11 +1928,30 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
 		goto out;
 	}
 
-	/* Enable DR IRQ reg and Set usbcmd reg  Run bit */
-	dr_controller_run(udc_controller);
-	udc_controller->usb_state = USB_STATE_ATTACHED;
-	udc_controller->ep0_state = WAIT_FOR_SETUP;
-	udc_controller->ep0_dir = 0;
+	if (udc_controller->transceiver) {
+		/* Suspend the controller until OTG enable it */
+		udc_controller->stopped = 1;
+		printk(KERN_INFO "Suspend udc for OTG auto detect\n");
+
+		/* connect to bus through transceiver */
+		if (udc_controller->transceiver) {
+			retval = otg_set_peripheral(udc_controller->transceiver,
+						    &udc_controller->gadget);
+			if (retval < 0) {
+				ERR("can't bind to transceiver\n");
+				driver->unbind(&udc_controller->gadget);
+				udc_controller->gadget.dev.driver = 0;
+				udc_controller->driver = 0;
+				return retval;
+			}
+		}
+	} else {
+		/* Enable DR IRQ reg and set USBCMD reg Run bit */
+		dr_controller_run(udc_controller);
+		udc_controller->usb_state = USB_STATE_ATTACHED;
+		udc_controller->ep0_state = WAIT_FOR_SETUP;
+		udc_controller->ep0_dir = 0;
+	}
 	printk(KERN_INFO "%s: bind to driver %s\n",
 			udc_controller->gadget.name, driver->driver.name);
 
@@ -2374,17 +2412,30 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 	spin_lock_init(&udc_controller->lock);
 	udc_controller->stopped = 1;
 
+#ifdef CONFIG_USB_OTG
+	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
+		udc_controller->transceiver = otg_get_transceiver();
+		if (!udc_controller->transceiver) {
+			ERR("Can't find OTG driver!\n");
+			ret = -ENODEV;
+			goto err_kfree;
+		}
+	}
+#endif
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		ret = -ENXIO;
 		goto err_kfree;
 	}
 
-	if (!request_mem_region(res->start, res->end - res->start + 1,
-				driver_name)) {
-		ERR("request mem region for %s failed\n", pdev->name);
-		ret = -EBUSY;
-		goto err_kfree;
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE) {
+		if (!request_mem_region(res->start, res->end - res->start + 1,
+					driver_name)) {
+			ERR("request mem region for %s failed\n", pdev->name);
+			ret = -EBUSY;
+			goto err_kfree;
+		}
 	}
 
 	dr_regs = ioremap(res->start, resource_size(res));
@@ -2455,9 +2506,11 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 		goto err_free_irq;
 	}
 
-	/* initialize usb hw reg except for regs for EP,
-	 * leave usbintr reg untouched */
-	dr_controller_setup(udc_controller);
+	if (!udc_controller->transceiver) {
+		/* initialize usb hw reg except for regs for EP,
+		 * leave usbintr reg untouched */
+		dr_controller_setup(udc_controller);
+	}
 
 	fsl_udc_clk_finalize(pdev);
 
@@ -2477,6 +2530,9 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_free_irq;
 
+	if (udc_controller->transceiver)
+		udc_controller->gadget.is_otg = 1;
+
 	/* setup QH and epctrl for ep0 */
 	ep0_setup(udc_controller);
 
@@ -2521,7 +2577,8 @@ err_iounmap:
 err_iounmap_noclk:
 	iounmap(dr_regs);
 err_release_mem_region:
-	release_mem_region(res->start, res->end - res->start + 1);
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+		release_mem_region(res->start, res->end - res->start + 1);
 err_kfree:
 	kfree(udc_controller);
 	udc_controller = NULL;
@@ -2555,7 +2612,8 @@ static int __exit fsl_udc_remove(struct platform_device *pdev)
 	dma_pool_destroy(udc_controller->td_pool);
 	free_irq(udc_controller->irq, udc_controller);
 	iounmap(dr_regs);
-	release_mem_region(res->start, res->end - res->start + 1);
+	if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+		release_mem_region(res->start, res->end - res->start + 1);
 
 	device_unregister(&udc_controller->gadget.dev);
 	/* free udc --wait for the release() finished */
@@ -2598,6 +2656,62 @@ static int fsl_udc_resume(struct platform_device *pdev)
 	return 0;
 }
 
+static int fsl_udc_otg_suspend(struct device *dev, pm_message_t state)
+{
+	struct fsl_udc *udc = udc_controller;
+	u32 mode, usbcmd;
+
+	mode = fsl_readl(&dr_regs->usbmode) & USB_MODE_CTRL_MODE_MASK;
+
+	pr_debug("%s(): mode 0x%x stopped %d\n", __func__, mode, udc->stopped);
+
+	/*
+	 * If the controller is already stopped, then this must be a
+	 * PM suspend.  Remember this fact, so that we will leave the
+	 * controller stopped at PM resume time.
+	 */
+	if (udc->stopped) {
+		pr_debug("gadget already stopped, leaving early\n");
+		udc->already_stopped = 1;
+		return 0;
+	}
+
+	if (mode != USB_MODE_CTRL_MODE_DEVICE) {
+		pr_debug("gadget not in device mode, leaving early\n");
+		return 0;
+	}
+
+	/* stop the controller */
+	usbcmd = fsl_readl(&dr_regs->usbcmd) & ~USB_CMD_RUN_STOP;
+	fsl_writel(usbcmd, &dr_regs->usbcmd);
+
+	udc->stopped = 1;
+
+	pr_info("USB Gadget suspended\n");
+
+	return 0;
+}
+
+static int fsl_udc_otg_resume(struct device *dev)
+{
+	pr_debug("%s(): stopped %d  already_stopped %d\n", __func__,
+		 udc_controller->stopped, udc_controller->already_stopped);
+
+	/*
+	 * If the controller was stopped at suspend time, then
+	 * don't resume it now.
+	 */
+	if (udc_controller->already_stopped) {
+		udc_controller->already_stopped = 0;
+		pr_debug("gadget was already stopped, leaving early\n");
+		return 0;
+	}
+
+	pr_info("USB Gadget resume\n");
+
+	return fsl_udc_resume(NULL);
+}
+
 /*-------------------------------------------------------------------------
 	Register entry point for the peripheral controller driver
 --------------------------------------------------------------------------*/
@@ -2610,6 +2724,9 @@ static struct platform_driver udc_driver = {
 	.driver  = {
 		.name = (char *)driver_name,
 		.owner = THIS_MODULE,
+		/* udc suspend/resume called from OTG driver */
+		.suspend = fsl_udc_otg_suspend,
+		.resume  = fsl_udc_otg_resume,
 	},
 };
 
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 5647cc21b84c..1d51be83fda8 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -476,6 +476,7 @@ struct fsl_udc {
 	unsigned vbus_active:1;
 	unsigned stopped:1;
 	unsigned remote_wakeup:1;
+	unsigned already_stopped:1;
 	unsigned big_endian_desc:1;
 
 	struct ep_queue_head *ep_qh;	/* Endpoints Queue-Head */
@@ -487,6 +488,7 @@ struct fsl_udc {
 	dma_addr_t ep_qh_dma;		/* dma address of QH */
 
 	u32 max_pipes;          /* Device max pipes */
+	u32 bus_reset;		/* Device is bus resetting */
 	u32 resume_state;	/* USB state to resume */
 	u32 usb_state;		/* USB current state */
 	u32 ep0_state;		/* Endpoint zero state */
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index caf3d4ac42bd..623732a312dd 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -117,6 +117,9 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 
 	pdata->regs = hcd->regs;
 
+	if (pdata->power_budget)
+		hcd->power_budget = pdata->power_budget;
+
 	/*
 	 * do platform specific init: check the clock, grab/config pins, etc.
 	 */
@@ -134,6 +137,30 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 	retval = usb_add_hcd(hcd, irq, IRQF_DISABLED | IRQF_SHARED);
 	if (retval != 0)
 		goto err4;
+
+#ifdef CONFIG_USB_OTG
+	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
+		struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+
+		ehci->transceiver = otg_get_transceiver();
+		dev_dbg(&pdev->dev, "hcd=0x%p  ehci=0x%p, transceiver=0x%p\n",
+			hcd, ehci, ehci->transceiver);
+
+		if (ehci->transceiver) {
+			retval = otg_set_host(ehci->transceiver,
+					      &ehci_to_hcd(ehci)->self);
+			if (retval) {
+				if (ehci->transceiver)
+					put_device(ehci->transceiver->dev);
+				goto err4;
+			}
+		} else {
+			dev_err(&pdev->dev, "can't find transceiver\n");
+			retval = -ENODEV;
+			goto err4;
+		}
+	}
+#endif
 	return retval;
 
       err4:
@@ -164,6 +191,12 @@ static void usb_hcd_fsl_remove(struct usb_hcd *hcd,
 			       struct platform_device *pdev)
 {
 	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+
+	if (ehci->transceiver) {
+		otg_set_host(ehci->transceiver, NULL);
+		put_device(ehci->transceiver->dev);
+	}
 
 	usb_remove_hcd(hcd);
 
@@ -544,6 +577,38 @@ static struct dev_pm_ops ehci_fsl_pm_ops = {
 #define EHCI_FSL_PM_OPS		NULL
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_USB_OTG
+static int ehci_start_port_reset(struct usb_hcd *hcd, unsigned port)
+{
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	u32 status;
+
+	if (!port)
+		return -EINVAL;
+
+	port--;
+
+	/* start port reset before HNP protocol time out */
+	status = readl(&ehci->regs->port_status[port]);
+	if (!(status & PORT_CONNECT))
+		return -ENODEV;
+
+	/* khubd will finish the reset later */
+	if (ehci_is_TDI(ehci)) {
+		writel(PORT_RESET |
+		       (status & ~(PORT_CSC | PORT_PEC | PORT_OCC)),
+		       &ehci->regs->port_status[port]);
+	} else {
+		writel(PORT_RESET, &ehci->regs->port_status[port]);
+	}
+
+	return 0;
+}
+#else
+#define ehci_start_port_reset	NULL
+#endif /* CONFIG_USB_OTG */
+
+
 static const struct hc_driver ehci_fsl_hc_driver = {
 	.description = hcd_name,
 	.product_desc = "Freescale On-Chip EHCI Host Controller",
@@ -583,6 +648,7 @@ static const struct hc_driver ehci_fsl_hc_driver = {
 	.hub_control = ehci_hub_control,
 	.bus_suspend = ehci_bus_suspend,
 	.bus_resume = ehci_bus_resume,
+	.start_port_reset = ehci_start_port_reset,
 	.relinquish_port = ehci_relinquish_port,
 	.port_handed_over = ehci_port_handed_over,
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 1a21799195af..ea6184bf48d0 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -27,6 +27,7 @@
  */
 
 /*-------------------------------------------------------------------------*/
+#include <linux/usb/otg.h>
 
 #define	PORT_WAKE_BITS	(PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E)
 
@@ -801,6 +802,13 @@ static int ehci_hub_control (
 				goto error;
 			if (ehci->no_selective_suspend)
 				break;
+#ifdef CONFIG_USB_OTG
+			if ((hcd->self.otg_port == (wIndex + 1))
+			    && hcd->self.b_hnp_enable) {
+				otg_start_hnp(ehci->transceiver);
+				break;
+			}
+#endif
 			if (!(temp & PORT_SUSPEND))
 				break;
 			if ((temp & PORT_PE) == 0)
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 168f1a88c4d0..e9ba8e252489 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -161,6 +161,10 @@ struct ehci_hcd {			/* one per controller */
 #ifdef DEBUG
 	struct dentry		*debug_dir;
 #endif
+	/*
+	 * OTG controllers and transceivers need software interaction
+	 */
+	struct otg_transceiver	*transceiver;
 };
 
 /* convert between an HCD pointer and the corresponding EHCI_HCD */
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 3773c5dab8f5..fffdf00f87b9 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -72,6 +72,7 @@ struct fsl_usb2_platform_data {
 	void		(*exit)(struct platform_device *);
 	void __iomem	*regs;		/* ioremap'd register base */
 	struct clk	*clk;
+	unsigned	power_budget;	/* hcd->power_budget */
 	unsigned	big_endian_mmio:1;
 	unsigned	big_endian_desc:1;
 	unsigned	es:1;		/* need USBMODE:ES */
-- 
cgit v1.2.3


From 139540170d9d9b7ead3caaf540f161756b356d56 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Wed, 27 Apr 2011 21:07:28 +0530
Subject: USB: ehci: remove structure packing from ehci_def

As pointed out by Arnd Bergmann, in include/linux/usb/ehci_def.h, struct
ehci_caps is defined with __attribute__((packed)) for no good reason,
and this triggers undefined behaviour when using ARM's readl() on
pointers to elements of this structure:

http://lkml.kernel.org/r/201102021700.20683.arnd@arndb.de

The same problem exists with the other two structures in ehci_def.h too,
so remove the __attribute__((packed)) from all of them.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ehci_def.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index e49dfd45baa4..78799432008e 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -52,7 +52,7 @@ struct ehci_caps {
 #define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
 #define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
 	u8		portroute[8];	 /* nibbles for routing - offset 0xC */
-} __attribute__ ((packed));
+};
 
 
 /* Section 2.3 Host Controller Operational Registers */
@@ -150,7 +150,7 @@ struct ehci_regs {
 #define PORT_CSC	(1<<1)		/* connect status change */
 #define PORT_CONNECT	(1<<0)		/* device connected */
 #define PORT_RWC_BITS   (PORT_CSC | PORT_PEC | PORT_OCC)
-} __attribute__ ((packed));
+};
 
 #define USBMODE		0x68		/* USB Device mode */
 #define USBMODE_SDIS	(1<<3)		/* Stream disable */
@@ -194,7 +194,7 @@ struct ehci_dbg_port {
 	u32	data47;
 	u32	address;
 #define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
-} __attribute__ ((packed));
+};
 
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 #include <linux/init.h>
-- 
cgit v1.2.3


From 61ec9016988f5c030e96e3c8a42ee9e11b8517aa Mon Sep 17 00:00:00 2001
From: John Linn <john.linn@xilinx.com>
Date: Sat, 30 Apr 2011 00:07:43 -0400
Subject: tty/serial: add support for Xilinx PS UART

The Xilinx PS Uart is used on the new ARM based SoC. This
UART is not compatible with others such that a seperate
driver is required.

Signed-off-by: John Linn <john.linn@xilinx.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/Kconfig         |   13 +
 drivers/tty/serial/Makefile        |    1 +
 drivers/tty/serial/xilinx_uartps.c | 1113 ++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h        |    3 +
 4 files changed, 1130 insertions(+)
 create mode 100644 drivers/tty/serial/xilinx_uartps.c

(limited to 'include/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 80484af781e1..84876ec2ed97 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1612,4 +1612,17 @@ config SERIAL_MXS_AUART_CONSOLE
 	help
 	  Enable a MXS AUART port to be the system console.
 
+config SERIAL_XILINX_PS_UART
+	tristate "Xilinx PS UART support"
+	select SERIAL_CORE
+	help
+	  This driver supports the Xilinx PS UART port.
+
+config SERIAL_XILINX_PS_UART_CONSOLE
+	bool "Xilinx PS UART console support"
+	depends on SERIAL_XILINX_PS_UART=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a Xilinx PS UART port to be the system console.
+
 endmenu
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index fee0690ef8e3..aafddf15992f 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
 obj-$(CONFIG_SERIAL_PCH_UART)	+= pch_uart.o
 obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
 obj-$(CONFIG_SERIAL_MXS_AUART) += mxs-auart.o
+obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
new file mode 100644
index 000000000000..19cc1e8149dd
--- /dev/null
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -0,0 +1,1113 @@
+/*
+ * Xilinx PS UART driver
+ *
+ * 2011 (c) Xilinx Inc.
+ *
+ * This program is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ */
+
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of.h>
+
+#define XUARTPS_TTY_NAME	"ttyPS"
+#define XUARTPS_NAME		"xuartps"
+#define XUARTPS_MAJOR		0	/* use dynamic node allocation */
+#define XUARTPS_MINOR		0	/* works best with devtmpfs */
+#define XUARTPS_NR_PORTS	2
+#define XUARTPS_FIFO_SIZE	16	/* FIFO size */
+#define XUARTPS_REGISTER_SPACE	0xFFF
+
+#define xuartps_readl(offset)		ioread32(port->membase + offset)
+#define xuartps_writel(val, offset)	iowrite32(val, port->membase + offset)
+
+/********************************Register Map********************************/
+/** UART
+ *
+ * Register offsets for the UART.
+ *
+ */
+#define XUARTPS_CR_OFFSET	0x00  /* Control Register [8:0] */
+#define XUARTPS_MR_OFFSET	0x04  /* Mode Register [10:0] */
+#define XUARTPS_IER_OFFSET	0x08  /* Interrupt Enable [10:0] */
+#define XUARTPS_IDR_OFFSET	0x0C  /* Interrupt Disable [10:0] */
+#define XUARTPS_IMR_OFFSET	0x10  /* Interrupt Mask [10:0] */
+#define XUARTPS_ISR_OFFSET	0x14  /* Interrupt Status [10:0]*/
+#define XUARTPS_BAUDGEN_OFFSET	0x18  /* Baud Rate Generator [15:0] */
+#define XUARTPS_RXTOUT_OFFSET	0x1C  /* RX Timeout [7:0] */
+#define XUARTPS_RXWM_OFFSET	0x20  /* RX FIFO Trigger Level [5:0] */
+#define XUARTPS_MODEMCR_OFFSET	0x24  /* Modem Control [5:0] */
+#define XUARTPS_MODEMSR_OFFSET	0x28  /* Modem Status [8:0] */
+#define XUARTPS_SR_OFFSET	0x2C  /* Channel Status [11:0] */
+#define XUARTPS_FIFO_OFFSET	0x30  /* FIFO [15:0] or [7:0] */
+#define XUARTPS_BAUDDIV_OFFSET	0x34  /* Baud Rate Divider [7:0] */
+#define XUARTPS_FLOWDEL_OFFSET	0x38  /* Flow Delay [15:0] */
+#define XUARTPS_IRRX_PWIDTH_OFFSET 0x3C /* IR Minimum Received Pulse
+						Width [15:0] */
+#define XUARTPS_IRTX_PWIDTH_OFFSET 0x40 /* IR Transmitted pulse
+						Width [7:0] */
+#define XUARTPS_TXWM_OFFSET	0x44  /* TX FIFO Trigger Level [5:0] */
+
+/** Control Register
+ *
+ * The Control register (CR) controls the major functions of the device.
+ *
+ * Control Register Bit Definitions
+ */
+#define XUARTPS_CR_STOPBRK	0x00000100  /* Stop TX break */
+#define XUARTPS_CR_STARTBRK	0x00000080  /* Set TX break */
+#define XUARTPS_CR_TX_DIS	0x00000020  /* TX disabled. */
+#define XUARTPS_CR_TX_EN	0x00000010  /* TX enabled */
+#define XUARTPS_CR_RX_DIS	0x00000008  /* RX disabled. */
+#define XUARTPS_CR_RX_EN	0x00000004  /* RX enabled */
+#define XUARTPS_CR_TXRST	0x00000002  /* TX logic reset */
+#define XUARTPS_CR_RXRST	0x00000001  /* RX logic reset */
+#define XUARTPS_CR_RST_TO	0x00000040  /* Restart Timeout Counter */
+
+/** Mode Register
+ *
+ * The mode register (MR) defines the mode of transfer as well as the data
+ * format. If this register is modified during transmission or reception,
+ * data validity cannot be guaranteed.
+ *
+ * Mode Register Bit Definitions
+ *
+ */
+#define XUARTPS_MR_CLKSEL		0x00000001  /* Pre-scalar selection */
+#define XUARTPS_MR_CHMODE_L_LOOP	0x00000200  /* Local loop back mode */
+#define XUARTPS_MR_CHMODE_NORM		0x00000000  /* Normal mode */
+
+#define XUARTPS_MR_STOPMODE_2_BIT	0x00000080  /* 2 stop bits */
+#define XUARTPS_MR_STOPMODE_1_BIT	0x00000000  /* 1 stop bit */
+
+#define XUARTPS_MR_PARITY_NONE		0x00000020  /* No parity mode */
+#define XUARTPS_MR_PARITY_MARK		0x00000018  /* Mark parity mode */
+#define XUARTPS_MR_PARITY_SPACE		0x00000010  /* Space parity mode */
+#define XUARTPS_MR_PARITY_ODD		0x00000008  /* Odd parity mode */
+#define XUARTPS_MR_PARITY_EVEN		0x00000000  /* Even parity mode */
+
+#define XUARTPS_MR_CHARLEN_6_BIT	0x00000006  /* 6 bits data */
+#define XUARTPS_MR_CHARLEN_7_BIT	0x00000004  /* 7 bits data */
+#define XUARTPS_MR_CHARLEN_8_BIT	0x00000000  /* 8 bits data */
+
+/** Interrupt Registers
+ *
+ * Interrupt control logic uses the interrupt enable register (IER) and the
+ * interrupt disable register (IDR) to set the value of the bits in the
+ * interrupt mask register (IMR). The IMR determines whether to pass an
+ * interrupt to the interrupt status register (ISR).
+ * Writing a 1 to IER Enables an interrupt, writing a 1 to IDR disables an
+ * interrupt. IMR and ISR are read only, and IER and IDR are write only.
+ * Reading either IER or IDR returns 0x00.
+ *
+ * All four registers have the same bit definitions.
+ */
+#define XUARTPS_IXR_TOUT	0x00000100 /* RX Timeout error interrupt */
+#define XUARTPS_IXR_PARITY	0x00000080 /* Parity error interrupt */
+#define XUARTPS_IXR_FRAMING	0x00000040 /* Framing error interrupt */
+#define XUARTPS_IXR_OVERRUN	0x00000020 /* Overrun error interrupt */
+#define XUARTPS_IXR_TXFULL	0x00000010 /* TX FIFO Full interrupt */
+#define XUARTPS_IXR_TXEMPTY	0x00000008 /* TX FIFO empty interrupt */
+#define XUARTPS_ISR_RXEMPTY	0x00000002 /* RX FIFO empty interrupt */
+#define XUARTPS_IXR_RXTRIG	0x00000001 /* RX FIFO trigger interrupt */
+#define XUARTPS_IXR_RXFULL	0x00000004 /* RX FIFO full interrupt. */
+#define XUARTPS_IXR_RXEMPTY	0x00000002 /* RX FIFO empty interrupt. */
+#define XUARTPS_IXR_MASK	0x00001FFF /* Valid bit mask */
+
+/** Channel Status Register
+ *
+ * The channel status register (CSR) is provided to enable the control logic
+ * to monitor the status of bits in the channel interrupt status register,
+ * even if these are masked out by the interrupt mask register.
+ */
+#define XUARTPS_SR_RXEMPTY	0x00000002 /* RX FIFO empty */
+#define XUARTPS_SR_TXEMPTY	0x00000008 /* TX FIFO empty */
+#define XUARTPS_SR_TXFULL	0x00000010 /* TX FIFO full */
+#define XUARTPS_SR_RXTRIG	0x00000001 /* Rx Trigger */
+
+/**
+ * xuartps_isr - Interrupt handler
+ * @irq: Irq number
+ * @dev_id: Id of the port
+ *
+ * Returns IRQHANDLED
+ **/
+static irqreturn_t xuartps_isr(int irq, void *dev_id)
+{
+	struct uart_port *port = (struct uart_port *)dev_id;
+	struct tty_struct *tty;
+	unsigned long flags;
+	unsigned int isrstatus, numbytes;
+	unsigned int data;
+	char status = TTY_NORMAL;
+
+	/* Get the tty which could be NULL so don't assume it's valid */
+	tty = tty_port_tty_get(&port->state->port);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Read the interrupt status register to determine which
+	 * interrupt(s) is/are active.
+	 */
+	isrstatus = xuartps_readl(XUARTPS_ISR_OFFSET);
+
+	/* drop byte with parity error if IGNPAR specified */
+	if (isrstatus & port->ignore_status_mask & XUARTPS_IXR_PARITY)
+		isrstatus &= ~(XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT);
+
+	isrstatus &= port->read_status_mask;
+	isrstatus &= ~port->ignore_status_mask;
+
+	if ((isrstatus & XUARTPS_IXR_TOUT) ||
+		(isrstatus & XUARTPS_IXR_RXTRIG)) {
+		/* Receive Timeout Interrupt */
+		while ((xuartps_readl(XUARTPS_SR_OFFSET) &
+			XUARTPS_SR_RXEMPTY) != XUARTPS_SR_RXEMPTY) {
+			data = xuartps_readl(XUARTPS_FIFO_OFFSET);
+			port->icount.rx++;
+
+			if (isrstatus & XUARTPS_IXR_PARITY) {
+				port->icount.parity++;
+				status = TTY_PARITY;
+			} else if (isrstatus & XUARTPS_IXR_FRAMING) {
+				port->icount.frame++;
+				status = TTY_FRAME;
+			} else if (isrstatus & XUARTPS_IXR_OVERRUN)
+				port->icount.overrun++;
+
+			if (tty)
+				uart_insert_char(port, isrstatus,
+						XUARTPS_IXR_OVERRUN, data,
+						status);
+		}
+		spin_unlock(&port->lock);
+		if (tty)
+			tty_flip_buffer_push(tty);
+		spin_lock(&port->lock);
+	}
+
+	/* Dispatch an appropriate handler */
+	if ((isrstatus & XUARTPS_IXR_TXEMPTY) == XUARTPS_IXR_TXEMPTY) {
+		if (uart_circ_empty(&port->state->xmit)) {
+			xuartps_writel(XUARTPS_IXR_TXEMPTY,
+						XUARTPS_IDR_OFFSET);
+		} else {
+			numbytes = port->fifosize;
+			/* Break if no more data available in the UART buffer */
+			while (numbytes--) {
+				if (uart_circ_empty(&port->state->xmit))
+					break;
+				/* Get the data from the UART circular buffer
+				 * and write it to the xuartps's TX_FIFO
+				 * register.
+				 */
+				xuartps_writel(
+					port->state->xmit.buf[port->state->xmit.
+					tail], XUARTPS_FIFO_OFFSET);
+
+				port->icount.tx++;
+
+				/* Adjust the tail of the UART buffer and wrap
+				 * the buffer if it reaches limit.
+				 */
+				port->state->xmit.tail =
+					(port->state->xmit.tail + 1) & \
+						(UART_XMIT_SIZE - 1);
+			}
+
+			if (uart_circ_chars_pending(
+					&port->state->xmit) < WAKEUP_CHARS)
+				uart_write_wakeup(port);
+		}
+	}
+
+	xuartps_writel(isrstatus, XUARTPS_ISR_OFFSET);
+
+	/* be sure to release the lock and tty before leaving */
+	spin_unlock_irqrestore(&port->lock, flags);
+	tty_kref_put(tty);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * xuartps_set_baud_rate - Calculate and set the baud rate
+ * @port: Handle to the uart port structure
+ * @baud: Baud rate to set
+ *
+ * Returns baud rate, requested baud when possible, or actual baud when there
+ *	was too much error
+ **/
+static unsigned int xuartps_set_baud_rate(struct uart_port *port,
+						unsigned int baud)
+{
+	unsigned int sel_clk;
+	unsigned int calc_baud = 0;
+	unsigned int brgr_val, brdiv_val;
+	unsigned int bauderror;
+
+	/* Formula to obtain baud rate is
+	 *	baud_tx/rx rate = sel_clk/CD * (BDIV + 1)
+	 *	input_clk = (Uart User Defined Clock or Apb Clock)
+	 *		depends on UCLKEN in MR Reg
+	 *	sel_clk = input_clk or input_clk/8;
+	 *		depends on CLKS in MR reg
+	 *	CD and BDIV depends on values in
+	 *			baud rate generate register
+	 *			baud rate clock divisor register
+	 */
+	sel_clk = port->uartclk;
+	if (xuartps_readl(XUARTPS_MR_OFFSET) & XUARTPS_MR_CLKSEL)
+		sel_clk = sel_clk / 8;
+
+	/* Find the best values for baud generation */
+	for (brdiv_val = 4; brdiv_val < 255; brdiv_val++) {
+
+		brgr_val = sel_clk / (baud * (brdiv_val + 1));
+		if (brgr_val < 2 || brgr_val > 65535)
+			continue;
+
+		calc_baud = sel_clk / (brgr_val * (brdiv_val + 1));
+
+		if (baud > calc_baud)
+			bauderror = baud - calc_baud;
+		else
+			bauderror = calc_baud - baud;
+
+		/* use the values when percent error is acceptable */
+		if (((bauderror * 100) / baud) < 3) {
+			calc_baud = baud;
+			break;
+		}
+	}
+
+	/* Set the values for the new baud rate */
+	xuartps_writel(brgr_val, XUARTPS_BAUDGEN_OFFSET);
+	xuartps_writel(brdiv_val, XUARTPS_BAUDDIV_OFFSET);
+
+	return calc_baud;
+}
+
+/*----------------------Uart Operations---------------------------*/
+
+/**
+ * xuartps_start_tx -  Start transmitting bytes
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_start_tx(struct uart_port *port)
+{
+	unsigned int status, numbytes = port->fifosize;
+
+	if (uart_circ_empty(&port->state->xmit) || uart_tx_stopped(port))
+		return;
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+	/* Set the TX enable bit and clear the TX disable bit to enable the
+	 * transmitter.
+	 */
+	xuartps_writel((status & ~XUARTPS_CR_TX_DIS) | XUARTPS_CR_TX_EN,
+		XUARTPS_CR_OFFSET);
+
+	while (numbytes-- && ((xuartps_readl(XUARTPS_SR_OFFSET)
+		& XUARTPS_SR_TXFULL)) != XUARTPS_SR_TXFULL) {
+
+		/* Break if no more data available in the UART buffer */
+		if (uart_circ_empty(&port->state->xmit))
+			break;
+
+		/* Get the data from the UART circular buffer and
+		 * write it to the xuartps's TX_FIFO register.
+		 */
+		xuartps_writel(
+			port->state->xmit.buf[port->state->xmit.tail],
+			XUARTPS_FIFO_OFFSET);
+		port->icount.tx++;
+
+		/* Adjust the tail of the UART buffer and wrap
+		 * the buffer if it reaches limit.
+		 */
+		port->state->xmit.tail = (port->state->xmit.tail + 1) &
+					(UART_XMIT_SIZE - 1);
+	}
+
+	/* Enable the TX Empty interrupt */
+	xuartps_writel(XUARTPS_IXR_TXEMPTY, XUARTPS_IER_OFFSET);
+
+	if (uart_circ_chars_pending(&port->state->xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+}
+
+/**
+ * xuartps_stop_tx - Stop TX
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_stop_tx(struct uart_port *port)
+{
+	unsigned int regval;
+
+	regval = xuartps_readl(XUARTPS_CR_OFFSET);
+	regval |= XUARTPS_CR_TX_DIS;
+	/* Disable the transmitter */
+	xuartps_writel(regval, XUARTPS_CR_OFFSET);
+}
+
+/**
+ * xuartps_stop_rx - Stop RX
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_stop_rx(struct uart_port *port)
+{
+	unsigned int regval;
+
+	regval = xuartps_readl(XUARTPS_CR_OFFSET);
+	regval |= XUARTPS_CR_RX_DIS;
+	/* Disable the receiver */
+	xuartps_writel(regval, XUARTPS_CR_OFFSET);
+}
+
+/**
+ * xuartps_tx_empty -  Check whether TX is empty
+ * @port: Handle to the uart port structure
+ *
+ * Returns TIOCSER_TEMT on success, 0 otherwise
+ **/
+static unsigned int xuartps_tx_empty(struct uart_port *port)
+{
+	unsigned int status;
+
+	status = xuartps_readl(XUARTPS_ISR_OFFSET) & XUARTPS_IXR_TXEMPTY;
+	return status ? TIOCSER_TEMT : 0;
+}
+
+/**
+ * xuartps_break_ctl - Based on the input ctl we have to start or stop
+ *			transmitting char breaks
+ * @port: Handle to the uart port structure
+ * @ctl: Value based on which start or stop decision is taken
+ *
+ **/
+static void xuartps_break_ctl(struct uart_port *port, int ctl)
+{
+	unsigned int status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	if (ctl == -1)
+		xuartps_writel(XUARTPS_CR_STARTBRK | status,
+					XUARTPS_CR_OFFSET);
+	else {
+		if ((status & XUARTPS_CR_STOPBRK) == 0)
+			xuartps_writel(XUARTPS_CR_STOPBRK | status,
+					 XUARTPS_CR_OFFSET);
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_set_termios - termios operations, handling data length, parity,
+ *				stop bits, flow control, baud rate
+ * @port: Handle to the uart port structure
+ * @termios: Handle to the input termios structure
+ * @old: Values of the previously saved termios structure
+ *
+ **/
+static void xuartps_set_termios(struct uart_port *port,
+				struct ktermios *termios, struct ktermios *old)
+{
+	unsigned int cval = 0;
+	unsigned int baud;
+	unsigned long flags;
+	unsigned int ctrl_reg, mode_reg;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Empty the receive FIFO 1st before making changes */
+	while ((xuartps_readl(XUARTPS_SR_OFFSET) &
+		 XUARTPS_SR_RXEMPTY) != XUARTPS_SR_RXEMPTY) {
+		xuartps_readl(XUARTPS_FIFO_OFFSET);
+	}
+
+	/* Disable the TX and RX to set baud rate */
+	xuartps_writel(xuartps_readl(XUARTPS_CR_OFFSET) |
+			(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS),
+			XUARTPS_CR_OFFSET);
+
+	/* Min baud rate = 6bps and Max Baud Rate is 10Mbps for 100Mhz clk */
+	baud = uart_get_baud_rate(port, termios, old, 0, 10000000);
+	baud = xuartps_set_baud_rate(port, baud);
+	if (tty_termios_baud_rate(termios))
+		tty_termios_encode_baud_rate(termios, baud, baud);
+
+	/*
+	 * Update the per-port timeout.
+	 */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	/* Set TX/RX Reset */
+	xuartps_writel(xuartps_readl(XUARTPS_CR_OFFSET) |
+			(XUARTPS_CR_TXRST | XUARTPS_CR_RXRST),
+			XUARTPS_CR_OFFSET);
+
+	ctrl_reg = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	/* Clear the RX disable and TX disable bits and then set the TX enable
+	 * bit and RX enable bit to enable the transmitter and receiver.
+	 */
+	xuartps_writel(
+		(ctrl_reg & ~(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS))
+			| (XUARTPS_CR_TX_EN | XUARTPS_CR_RX_EN),
+			XUARTPS_CR_OFFSET);
+
+	xuartps_writel(10, XUARTPS_RXTOUT_OFFSET);
+
+	port->read_status_mask = XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_RXTRIG |
+			XUARTPS_IXR_OVERRUN | XUARTPS_IXR_TOUT;
+	port->ignore_status_mask = 0;
+
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING;
+
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= XUARTPS_IXR_PARITY |
+			XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN;
+
+	/* ignore all characters if CREAD is not set */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= XUARTPS_IXR_RXTRIG |
+			XUARTPS_IXR_TOUT | XUARTPS_IXR_PARITY |
+			XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN;
+
+	mode_reg = xuartps_readl(XUARTPS_MR_OFFSET);
+
+	/* Handling Data Size */
+	switch (termios->c_cflag & CSIZE) {
+	case CS6:
+		cval |= XUARTPS_MR_CHARLEN_6_BIT;
+		break;
+	case CS7:
+		cval |= XUARTPS_MR_CHARLEN_7_BIT;
+		break;
+	default:
+	case CS8:
+		cval |= XUARTPS_MR_CHARLEN_8_BIT;
+		termios->c_cflag &= ~CSIZE;
+		termios->c_cflag |= CS8;
+		break;
+	}
+
+	/* Handling Parity and Stop Bits length */
+	if (termios->c_cflag & CSTOPB)
+		cval |= XUARTPS_MR_STOPMODE_2_BIT; /* 2 STOP bits */
+	else
+		cval |= XUARTPS_MR_STOPMODE_1_BIT; /* 1 STOP bit */
+
+	if (termios->c_cflag & PARENB) {
+		/* Mark or Space parity */
+		if (termios->c_cflag & CMSPAR) {
+			if (termios->c_cflag & PARODD)
+				cval |= XUARTPS_MR_PARITY_MARK;
+			else
+				cval |= XUARTPS_MR_PARITY_SPACE;
+		} else if (termios->c_cflag & PARODD)
+				cval |= XUARTPS_MR_PARITY_ODD;
+			else
+				cval |= XUARTPS_MR_PARITY_EVEN;
+	} else
+		cval |= XUARTPS_MR_PARITY_NONE;
+	xuartps_writel(cval , XUARTPS_MR_OFFSET);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_startup - Called when an application opens a xuartps port
+ * @port: Handle to the uart port structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int xuartps_startup(struct uart_port *port)
+{
+	unsigned int retval = 0, status = 0;
+
+	retval = request_irq(port->irq, xuartps_isr, 0, XUARTPS_NAME,
+								(void *)port);
+	if (retval)
+		return retval;
+
+	/* Disable the TX and RX */
+	xuartps_writel(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS,
+						XUARTPS_CR_OFFSET);
+
+	/* Set the Control Register with TX/RX Enable, TX/RX Reset,
+	 * no break chars.
+	 */
+	xuartps_writel(XUARTPS_CR_TXRST | XUARTPS_CR_RXRST,
+				XUARTPS_CR_OFFSET);
+
+	status = xuartps_readl(XUARTPS_CR_OFFSET);
+
+	/* Clear the RX disable and TX disable bits and then set the TX enable
+	 * bit and RX enable bit to enable the transmitter and receiver.
+	 */
+	xuartps_writel((status & ~(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS))
+			| (XUARTPS_CR_TX_EN | XUARTPS_CR_RX_EN |
+			XUARTPS_CR_STOPBRK), XUARTPS_CR_OFFSET);
+
+	/* Set the Mode Register with normal mode,8 data bits,1 stop bit,
+	 * no parity.
+	 */
+	xuartps_writel(XUARTPS_MR_CHMODE_NORM | XUARTPS_MR_STOPMODE_1_BIT
+		| XUARTPS_MR_PARITY_NONE | XUARTPS_MR_CHARLEN_8_BIT,
+		 XUARTPS_MR_OFFSET);
+
+	/* Set the RX FIFO Trigger level to 14 assuming FIFO size as 16 */
+	xuartps_writel(14, XUARTPS_RXWM_OFFSET);
+
+	/* Receive Timeout register is enabled with value of 10 */
+	xuartps_writel(10, XUARTPS_RXTOUT_OFFSET);
+
+
+	/* Set the Interrupt Registers with desired interrupts */
+	xuartps_writel(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN |
+		XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT, XUARTPS_IER_OFFSET);
+	xuartps_writel(~(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY |
+		XUARTPS_IXR_FRAMING | XUARTPS_IXR_OVERRUN |
+		XUARTPS_IXR_RXTRIG | XUARTPS_IXR_TOUT), XUARTPS_IDR_OFFSET);
+
+	return retval;
+}
+
+/**
+ * xuartps_shutdown - Called when an application closes a xuartps port
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_shutdown(struct uart_port *port)
+{
+	int status;
+
+	/* Disable interrupts */
+	status = xuartps_readl(XUARTPS_IMR_OFFSET);
+	xuartps_writel(status, XUARTPS_IDR_OFFSET);
+
+	/* Disable the TX and RX */
+	xuartps_writel(XUARTPS_CR_TX_DIS | XUARTPS_CR_RX_DIS,
+				 XUARTPS_CR_OFFSET);
+	free_irq(port->irq, port);
+}
+
+/**
+ * xuartps_type - Set UART type to xuartps port
+ * @port: Handle to the uart port structure
+ *
+ * Returns string on success, NULL otherwise
+ **/
+static const char *xuartps_type(struct uart_port *port)
+{
+	return port->type == PORT_XUARTPS ? XUARTPS_NAME : NULL;
+}
+
+/**
+ * xuartps_verify_port - Verify the port params
+ * @port: Handle to the uart port structure
+ * @ser: Handle to the structure whose members are compared
+ *
+ * Returns 0 if success otherwise -EINVAL
+ **/
+static int xuartps_verify_port(struct uart_port *port,
+					struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_XUARTPS)
+		return -EINVAL;
+	if (port->irq != ser->irq)
+		return -EINVAL;
+	if (ser->io_type != UPIO_MEM)
+		return -EINVAL;
+	if (port->iobase != ser->port)
+		return -EINVAL;
+	if (ser->hub6 != 0)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * xuartps_request_port - Claim the memory region attached to xuartps port,
+ *				called when the driver adds a xuartps port via
+ *				uart_add_one_port()
+ * @port: Handle to the uart port structure
+ *
+ * Returns 0, -ENOMEM if request fails
+ **/
+static int xuartps_request_port(struct uart_port *port)
+{
+	if (!request_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE,
+					 XUARTPS_NAME)) {
+		return -ENOMEM;
+	}
+
+	port->membase = ioremap(port->mapbase, XUARTPS_REGISTER_SPACE);
+	if (!port->membase) {
+		dev_err(port->dev, "Unable to map registers\n");
+		release_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * xuartps_release_port - Release the memory region attached to a xuartps
+ *				port, called when the driver removes a xuartps
+ *				port via uart_remove_one_port().
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_release_port(struct uart_port *port)
+{
+	release_mem_region(port->mapbase, XUARTPS_REGISTER_SPACE);
+	iounmap(port->membase);
+	port->membase = NULL;
+}
+
+/**
+ * xuartps_config_port - Configure xuartps, called when the driver adds a
+ *				xuartps port
+ * @port: Handle to the uart port structure
+ * @flags: If any
+ *
+ **/
+static void xuartps_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE && xuartps_request_port(port) == 0)
+		port->type = PORT_XUARTPS;
+}
+
+/**
+ * xuartps_get_mctrl - Get the modem control state
+ *
+ * @port: Handle to the uart port structure
+ *
+ * Returns the modem control state
+ *
+ **/
+static unsigned int xuartps_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+}
+
+static void xuartps_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	/* N/A */
+}
+
+static void xuartps_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+/** The UART operations structure
+ */
+static struct uart_ops xuartps_ops = {
+	.set_mctrl	= xuartps_set_mctrl,
+	.get_mctrl	= xuartps_get_mctrl,
+	.enable_ms	= xuartps_enable_ms,
+
+	.start_tx	= xuartps_start_tx,	/* Start transmitting */
+	.stop_tx	= xuartps_stop_tx,	/* Stop transmission */
+	.stop_rx	= xuartps_stop_rx,	/* Stop reception */
+	.tx_empty	= xuartps_tx_empty,	/* Transmitter busy? */
+	.break_ctl	= xuartps_break_ctl,	/* Start/stop
+						 * transmitting break
+						 */
+	.set_termios	= xuartps_set_termios,	/* Set termios */
+	.startup	= xuartps_startup,	/* App opens xuartps */
+	.shutdown	= xuartps_shutdown,	/* App closes xuartps */
+	.type		= xuartps_type,		/* Set UART type */
+	.verify_port	= xuartps_verify_port,	/* Verification of port
+						 * params
+						 */
+	.request_port	= xuartps_request_port,	/* Claim resources
+						 * associated with a
+						 * xuartps port
+						 */
+	.release_port	= xuartps_release_port,	/* Release resources
+						 * associated with a
+						 * xuartps port
+						 */
+	.config_port	= xuartps_config_port,	/* Configure when driver
+						 * adds a xuartps port
+						 */
+};
+
+static struct uart_port xuartps_port[2];
+
+/**
+ * xuartps_get_port - Configure the port from the platform device resource
+ *			info
+ *
+ * Returns a pointer to a uart_port or NULL for failure
+ **/
+static struct uart_port *xuartps_get_port(void)
+{
+	struct uart_port *port;
+	int id;
+
+	/* Find the next unused port */
+	for (id = 0; id < XUARTPS_NR_PORTS; id++)
+		if (xuartps_port[id].mapbase == 0)
+			break;
+
+	if (id >= XUARTPS_NR_PORTS)
+		return NULL;
+
+	port = &xuartps_port[id];
+
+	/* At this point, we've got an empty uart_port struct, initialize it */
+	spin_lock_init(&port->lock);
+	port->membase	= NULL;
+	port->iobase	= 1; /* mark port in use */
+	port->irq	= 0;
+	port->type	= PORT_UNKNOWN;
+	port->iotype	= UPIO_MEM32;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->ops	= &xuartps_ops;
+	port->fifosize	= XUARTPS_FIFO_SIZE;
+	port->line	= id;
+	port->dev	= NULL;
+	return port;
+}
+
+/*-----------------------Console driver operations--------------------------*/
+
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+/**
+ * xuartps_console_wait_tx - Wait for the TX to be full
+ * @port: Handle to the uart port structure
+ *
+ **/
+static void xuartps_console_wait_tx(struct uart_port *port)
+{
+	while ((xuartps_readl(XUARTPS_SR_OFFSET) & XUARTPS_SR_TXEMPTY)
+				!= XUARTPS_SR_TXEMPTY)
+		barrier();
+}
+
+/**
+ * xuartps_console_putchar - write the character to the FIFO buffer
+ * @port: Handle to the uart port structure
+ * @ch: Character to be written
+ *
+ **/
+static void xuartps_console_putchar(struct uart_port *port, int ch)
+{
+	xuartps_console_wait_tx(port);
+	xuartps_writel(ch, XUARTPS_FIFO_OFFSET);
+}
+
+/**
+ * xuartps_console_write - perform write operation
+ * @port: Handle to the uart port structure
+ * @s: Pointer to character array
+ * @count: No of characters
+ **/
+static void xuartps_console_write(struct console *co, const char *s,
+				unsigned int count)
+{
+	struct uart_port *port = &xuartps_port[co->index];
+	unsigned long flags;
+	unsigned int imr;
+	int locked = 1;
+
+	if (oops_in_progress)
+		locked = spin_trylock_irqsave(&port->lock, flags);
+	else
+		spin_lock_irqsave(&port->lock, flags);
+
+	/* save and disable interrupt */
+	imr = xuartps_readl(XUARTPS_IMR_OFFSET);
+	xuartps_writel(imr, XUARTPS_IDR_OFFSET);
+
+	uart_console_write(port, s, count, xuartps_console_putchar);
+	xuartps_console_wait_tx(port);
+
+	/* restore interrupt state, it seems like there may be a h/w bug
+	 * in that the interrupt enable register should not need to be
+	 * written based on the data sheet
+	 */
+	xuartps_writel(~imr, XUARTPS_IDR_OFFSET);
+	xuartps_writel(imr, XUARTPS_IER_OFFSET);
+
+	if (locked)
+		spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/**
+ * xuartps_console_setup - Initialize the uart to default config
+ * @co: Console handle
+ * @options: Initial settings of uart
+ *
+ * Returns 0, -ENODEV if no device
+ **/
+static int __init xuartps_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port = &xuartps_port[co->index];
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= XUARTPS_NR_PORTS)
+		return -EINVAL;
+
+	if (!port->mapbase) {
+		pr_debug("console on ttyPS%i not present\n", co->index);
+		return -ENODEV;
+	}
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver xuartps_uart_driver;
+
+static struct console xuartps_console = {
+	.name	= XUARTPS_TTY_NAME,
+	.write	= xuartps_console_write,
+	.device	= uart_console_device,
+	.setup	= xuartps_console_setup,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1, /* Specified on the cmdline (e.g. console=ttyPS ) */
+	.data	= &xuartps_uart_driver,
+};
+
+/**
+ * xuartps_console_init - Initialization call
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __init xuartps_console_init(void)
+{
+	register_console(&xuartps_console);
+	return 0;
+}
+
+console_initcall(xuartps_console_init);
+
+#endif /* CONFIG_SERIAL_XILINX_PS_UART_CONSOLE */
+
+/** Structure Definitions
+ */
+static struct uart_driver xuartps_uart_driver = {
+	.owner		= THIS_MODULE,		/* Owner */
+	.driver_name	= XUARTPS_NAME,		/* Driver name */
+	.dev_name	= XUARTPS_TTY_NAME,	/* Node name */
+	.major		= XUARTPS_MAJOR,	/* Major number */
+	.minor		= XUARTPS_MINOR,	/* Minor number */
+	.nr		= XUARTPS_NR_PORTS,	/* Number of UART ports */
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+	.cons		= &xuartps_console,	/* Console */
+#endif
+};
+
+/* ---------------------------------------------------------------------
+ * Platform bus binding
+ */
+/**
+ * xuartps_probe - Platform driver probe
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __devinit xuartps_probe(struct platform_device *pdev)
+{
+	int rc;
+	struct uart_port *port;
+	struct resource *res, *res2;
+	int clk = 0;
+
+#ifdef CONFIG_OF
+	const unsigned int *prop;
+
+	prop = of_get_property(pdev->dev.of_node, "clock", NULL);
+	if (prop)
+		clk = be32_to_cpup(prop);
+#else
+	clk = *((unsigned int *)(pdev->dev.platform_data));
+#endif
+	if (!clk) {
+		dev_err(&pdev->dev, "no clock specified\n");
+		return -ENODEV;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res2)
+		return -ENODEV;
+
+	/* Initialize the port structure */
+	port = xuartps_get_port();
+
+	if (!port) {
+		dev_err(&pdev->dev, "Cannot get uart_port structure\n");
+		return -ENODEV;
+	} else {
+		/* Register the port.
+		 * This function also registers this device with the tty layer
+		 * and triggers invocation of the config_port() entry point.
+		 */
+		port->mapbase = res->start;
+		port->irq = res2->start;
+		port->dev = &pdev->dev;
+		port->uartclk = clk;
+		dev_set_drvdata(&pdev->dev, port);
+		rc = uart_add_one_port(&xuartps_uart_driver, port);
+		if (rc) {
+			dev_err(&pdev->dev,
+				"uart_add_one_port() failed; err=%i\n", rc);
+			dev_set_drvdata(&pdev->dev, NULL);
+			return rc;
+		}
+		return 0;
+	}
+}
+
+/**
+ * xuartps_remove - called when the platform driver is unregistered
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0 on success, negative error otherwise
+ **/
+static int __devexit xuartps_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = dev_get_drvdata(&pdev->dev);
+	int rc = 0;
+
+	/* Remove the xuartps port from the serial core */
+	if (port) {
+		rc = uart_remove_one_port(&xuartps_uart_driver, port);
+		dev_set_drvdata(&pdev->dev, NULL);
+		port->mapbase = 0;
+	}
+	return rc;
+}
+
+/**
+ * xuartps_suspend - suspend event
+ * @pdev: Pointer to the platform device structure
+ * @state: State of the device
+ *
+ * Returns 0
+ **/
+static int xuartps_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	/* Call the API provided in serial_core.c file which handles
+	 * the suspend.
+	 */
+	uart_suspend_port(&xuartps_uart_driver, &xuartps_port[pdev->id]);
+	return 0;
+}
+
+/**
+ * xuartps_resume - Resume after a previous suspend
+ * @pdev: Pointer to the platform device structure
+ *
+ * Returns 0
+ **/
+static int xuartps_resume(struct platform_device *pdev)
+{
+	uart_resume_port(&xuartps_uart_driver, &xuartps_port[pdev->id]);
+	return 0;
+}
+
+/* Match table for of_platform binding */
+
+#ifdef CONFIG_OF
+static struct of_device_id xuartps_of_match[] __devinitdata = {
+	{ .compatible = "xlnx,xuartps", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, xuartps_of_match);
+#else
+#define xuartps_of_match NULL
+#endif
+
+static struct platform_driver xuartps_platform_driver = {
+	.probe   = xuartps_probe,		/* Probe method */
+	.remove  = __exit_p(xuartps_remove),	/* Detach method */
+	.suspend = xuartps_suspend,		/* Suspend */
+	.resume  = xuartps_resume,		/* Resume after a suspend */
+	.driver  = {
+		.owner = THIS_MODULE,
+		.name = XUARTPS_NAME,		/* Driver name */
+		.of_match_table = xuartps_of_match,
+		},
+};
+
+/* ---------------------------------------------------------------------
+ * Module Init and Exit
+ */
+/**
+ * xuartps_init - Initial driver registration call
+ *
+ * Returns whether the registration was successful or not
+ **/
+static int __init xuartps_init(void)
+{
+	int retval = 0;
+
+	/* Register the xuartps driver with the serial core */
+	retval = uart_register_driver(&xuartps_uart_driver);
+	if (retval)
+		return retval;
+
+	/* Register the platform driver */
+	retval = platform_driver_register(&xuartps_platform_driver);
+	if (retval)
+		uart_unregister_driver(&xuartps_uart_driver);
+
+	return retval;
+}
+
+/**
+ * xuartps_exit - Driver unregistration call
+ **/
+static void __exit xuartps_exit(void)
+{
+	/* The order of unregistration is important. Unregister the
+	 * UART driver before the platform driver crashes the system.
+	 */
+
+	/* Unregister the platform driver */
+	platform_driver_unregister(&xuartps_platform_driver);
+
+	/* Unregister the xuartps driver */
+	uart_unregister_driver(&xuartps_uart_driver);
+}
+
+module_init(xuartps_init);
+module_exit(xuartps_exit);
+
+MODULE_DESCRIPTION("Driver for PS UART");
+MODULE_AUTHOR("Xilinx Inc.");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 758c5b0c6fd3..95d479ba514e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -202,6 +202,9 @@
 /* VIA VT8500 SoC */
 #define PORT_VT8500	97
 
+/* Xilinx PSS UART */
+#define PORT_XUARTPS	98
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From c430131a02d677aa708f56342c1565edfdacb3c0 Mon Sep 17 00:00:00 2001
From: Jan Andersson <jan@gaisler.com>
Date: Tue, 3 May 2011 20:11:57 +0200
Subject: USB: EHCI: Support controllers with big endian capability regs

The two first HC capability registers (CAPLENGTH and HCIVERSION)
are defined as one 8-bit and one 16-bit register. Most HC
implementations have selected to treat these registers as part
of a 32-bit register, giving the same layout for both big and
small endian systems.

This patch adds a new quirk, big_endian_capbase, to support
controllers with big endian register interfaces that treat
HCIVERSION and CAPLENGTH as individual registers.

Signed-off-by: Jan Andersson <jan@gaisler.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c     | 5 ++++-
 drivers/usb/host/ehci-ath79.c     | 8 ++++----
 drivers/usb/host/ehci-atmel.c     | 2 +-
 drivers/usb/host/ehci-au1xxx.c    | 3 ++-
 drivers/usb/host/ehci-cns3xxx.c   | 2 +-
 drivers/usb/host/ehci-dbg.c       | 2 +-
 drivers/usb/host/ehci-fsl.c       | 2 +-
 drivers/usb/host/ehci-hcd.c       | 2 +-
 drivers/usb/host/ehci-ixp4xx.c    | 2 +-
 drivers/usb/host/ehci-msm.c       | 2 +-
 drivers/usb/host/ehci-mxc.c       | 2 +-
 drivers/usb/host/ehci-octeon.c    | 2 +-
 drivers/usb/host/ehci-omap.c      | 2 +-
 drivers/usb/host/ehci-orion.c     | 2 +-
 drivers/usb/host/ehci-pci.c       | 2 +-
 drivers/usb/host/ehci-pmcmsp.c    | 2 +-
 drivers/usb/host/ehci-ppc-of.c    | 2 +-
 drivers/usb/host/ehci-ps3.c       | 2 +-
 drivers/usb/host/ehci-s5p.c       | 3 ++-
 drivers/usb/host/ehci-sh.c        | 2 +-
 drivers/usb/host/ehci-spear.c     | 2 +-
 drivers/usb/host/ehci-tegra.c     | 2 +-
 drivers/usb/host/ehci-vt8500.c    | 3 ++-
 drivers/usb/host/ehci-w90x900.c   | 2 +-
 drivers/usb/host/ehci-xilinx-of.c | 2 +-
 drivers/usb/host/ehci.h           | 7 +++++++
 include/linux/usb/ehci_def.h      | 9 +++++++--
 27 files changed, 48 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index a6a350f5827b..1fc8f1249806 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -102,6 +102,9 @@ static struct kgdb_io kgdbdbgp_io_ops;
 #define dbgp_kgdb_mode (0)
 #endif
 
+/* Local version of HC_LENGTH macro as ehci struct is not available here */
+#define EARLY_HC_LENGTH(p)	(0x00ff & (p)) /* bits 7 : 0 */
+
 /*
  * USB Packet IDs (PIDs)
  */
@@ -892,7 +895,7 @@ int __init early_dbgp_init(char *s)
 	dbgp_printk("ehci_bar: %p\n", ehci_bar);
 
 	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_regs  = ehci_bar + EARLY_HC_LENGTH(readl(&ehci_caps->hc_capbase));
 	ehci_debug = ehci_bar + offset;
 	ehci_dev.bus = bus;
 	ehci_dev.slot = slot;
diff --git a/drivers/usb/host/ehci-ath79.c b/drivers/usb/host/ehci-ath79.c
index 7ea23b50f5d8..98cc8a13169c 100644
--- a/drivers/usb/host/ehci-ath79.c
+++ b/drivers/usb/host/ehci-ath79.c
@@ -44,6 +44,7 @@ static int ehci_ath79_init(struct usb_hcd *hcd)
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	struct platform_device *pdev = to_platform_device(hcd->self.controller);
 	const struct platform_device_id *id;
+	int hclength;
 	int ret;
 
 	id = platform_get_device_id(pdev);
@@ -52,21 +53,20 @@ static int ehci_ath79_init(struct usb_hcd *hcd)
 		return -EINVAL;
 	}
 
+	hclength = HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	switch (id->driver_data) {
 	case EHCI_ATH79_IP_V1:
 		ehci->has_synopsys_hc_bug = 1;
 
 		ehci->caps = hcd->regs;
-		ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		ehci->regs = hcd->regs + hclength;
 		break;
 
 	case EHCI_ATH79_IP_V2:
 		hcd->has_tt = 1;
 
 		ehci->caps = hcd->regs + 0x100;
-		ehci->regs = hcd->regs + 0x100 +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		ehci->regs = hcd->regs + 0x100 + hclength;
 		break;
 
 	default:
diff --git a/drivers/usb/host/ehci-atmel.c b/drivers/usb/host/ehci-atmel.c
index b2ed55cb811d..a5a3ef1f0096 100644
--- a/drivers/usb/host/ehci-atmel.c
+++ b/drivers/usb/host/ehci-atmel.c
@@ -56,7 +56,7 @@ static int ehci_atmel_setup(struct usb_hcd *hcd)
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-au1xxx.c b/drivers/usb/host/ehci-au1xxx.c
index a869e3c103d3..40b002869ac2 100644
--- a/drivers/usb/host/ehci-au1xxx.c
+++ b/drivers/usb/host/ehci-au1xxx.c
@@ -175,7 +175,8 @@ static int ehci_hcd_au1xxx_drv_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = readl(&ehci->caps->hcs_params);
 
diff --git a/drivers/usb/host/ehci-cns3xxx.c b/drivers/usb/host/ehci-cns3xxx.c
index 708a05b5d258..d41745c6f0c4 100644
--- a/drivers/usb/host/ehci-cns3xxx.c
+++ b/drivers/usb/host/ehci-cns3xxx.c
@@ -34,7 +34,7 @@ static int cns3xxx_ehci_init(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs
-		+ HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
 	hcd->has_tt = 0;
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 693c29b30521..40a844c1dbb4 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -726,7 +726,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf)
 	}
 
 	/* Capability Registers */
-	i = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase));
+	i = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	temp = scnprintf (next, size,
 		"bus %s, device %s\n"
 		"%s\n"
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 623732a312dd..f380bf97e5af 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -324,7 +324,7 @@ static int ehci_fsl_setup(struct usb_hcd *hcd)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-	    HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 83b7d5f02a15..8164ffafd10a 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -739,7 +739,7 @@ static int ehci_run (struct usb_hcd *hcd)
 	up_write(&ehci_cf_port_reset_rwsem);
 	ehci->last_periodic_enable = ktime_get_real();
 
-	temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase));
+	temp = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci_info (ehci,
 		"USB %x.%x started, EHCI %x.%02x%s\n",
 		((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f),
diff --git a/drivers/usb/host/ehci-ixp4xx.c b/drivers/usb/host/ehci-ixp4xx.c
index 89b7c70c6ed6..50e600d26e28 100644
--- a/drivers/usb/host/ehci-ixp4xx.c
+++ b/drivers/usb/host/ehci-ixp4xx.c
@@ -23,7 +23,7 @@ static int ixp4xx_ehci_init(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100
-		+ HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
 	hcd->has_tt = 1;
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index 9ce1b0bc186d..b5a0bf649c95 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -41,7 +41,7 @@ static int ehci_msm_reset(struct usb_hcd *hcd)
 
 	ehci->caps = USB_CAPLENGTH;
 	ehci->regs = USB_CAPLENGTH +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c
index 25c8c10bb689..0c058be35a38 100644
--- a/drivers/usb/host/ehci-mxc.c
+++ b/drivers/usb/host/ehci-mxc.c
@@ -208,7 +208,7 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-	    HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* set up the PORTSCx register */
 	ehci_writel(ehci, pdata->portsc, &ehci->regs->port_status[0]);
diff --git a/drivers/usb/host/ehci-octeon.c b/drivers/usb/host/ehci-octeon.c
index a31a031178a8..ff55757ba7d8 100644
--- a/drivers/usb/host/ehci-octeon.c
+++ b/drivers/usb/host/ehci-octeon.c
@@ -151,7 +151,7 @@ static int ehci_octeon_drv_probe(struct platform_device *pdev)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index 7e41a95c5ceb..3c482dc99ece 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -188,7 +188,7 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev)
 	/* we know this is the memory we want, no need to ioremap again */
 	omap_ehci->caps = hcd->regs;
 	omap_ehci->regs = hcd->regs
-			+ HC_LENGTH(readl(&omap_ehci->caps->hc_capbase));
+		+ HC_LENGTH(ehci, readl(&omap_ehci->caps->hc_capbase));
 
 	dbg_hcs_params(omap_ehci, "reset");
 	dbg_hcc_params(omap_ehci, "reset");
diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c
index 281e094e1c18..395bdb0248d5 100644
--- a/drivers/usb/host/ehci-orion.c
+++ b/drivers/usb/host/ehci-orion.c
@@ -251,7 +251,7 @@ static int __devinit ehci_orion_drv_probe(struct platform_device *pdev)
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
 	hcd->has_tt = 1;
 	ehci->sbrn = 0x20;
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index d5eaea7caf89..660b80a75cac 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -70,7 +70,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-pmcmsp.c b/drivers/usb/host/ehci-pmcmsp.c
index a2168642175b..cd69099cda19 100644
--- a/drivers/usb/host/ehci-pmcmsp.c
+++ b/drivers/usb/host/ehci-pmcmsp.c
@@ -83,7 +83,7 @@ static int ehci_msp_setup(struct usb_hcd *hcd)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
 
diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
index 1f09f253697e..8552db6c29c9 100644
--- a/drivers/usb/host/ehci-ppc-of.c
+++ b/drivers/usb/host/ehci-ppc-of.c
@@ -179,7 +179,7 @@ static int __devinit ehci_hcd_ppc_of_probe(struct platform_device *op)
 
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c
index 1dee33b9139e..64626a777d61 100644
--- a/drivers/usb/host/ehci-ps3.c
+++ b/drivers/usb/host/ehci-ps3.c
@@ -29,7 +29,7 @@ static int ps3_ehci_hc_reset(struct usb_hcd *hcd)
 	ehci->big_endian_mmio = 1;
 
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 		&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c
index 0c18f280bf4c..321a03301ad2 100644
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -126,7 +126,8 @@ static int s5p_ehci_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c
index 595f70f42b52..86a95bb80a61 100644
--- a/drivers/usb/host/ehci-sh.c
+++ b/drivers/usb/host/ehci-sh.c
@@ -23,7 +23,7 @@ static int ehci_sh_reset(struct usb_hcd *hcd)
 	int ret;
 
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 		&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-spear.c b/drivers/usb/host/ehci-spear.c
index 75c00873443d..dbf1e4ef3c17 100644
--- a/drivers/usb/host/ehci-spear.c
+++ b/drivers/usb/host/ehci-spear.c
@@ -38,7 +38,7 @@ static int ehci_spear_setup(struct usb_hcd *hcd)
 
 	/* registers start at offset 0x0 */
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(ehci_readl(ehci,
+	ehci->regs = hcd->regs + HC_LENGTH(ehci, ehci_readl(ehci,
 				&ehci->caps->hc_capbase));
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 7359bcbe4176..02b2bfd49a10 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -400,7 +400,7 @@ static int tegra_ehci_setup(struct usb_hcd *hcd)
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-		HC_LENGTH(readl(&ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-vt8500.c b/drivers/usb/host/ehci-vt8500.c
index 20168062035a..47d749631bc7 100644
--- a/drivers/usb/host/ehci-vt8500.c
+++ b/drivers/usb/host/ehci-vt8500.c
@@ -121,7 +121,8 @@ static int vt8500_ehci_drv_probe(struct platform_device *pdev)
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
-	ehci->regs = hcd->regs + HC_LENGTH(readl(&ehci->caps->hc_capbase));
+	ehci->regs = hcd->regs +
+		HC_LENGTH(ehci, readl(&ehci->caps->hc_capbase));
 
 	dbg_hcs_params(ehci, "reset");
 	dbg_hcc_params(ehci, "reset");
diff --git a/drivers/usb/host/ehci-w90x900.c b/drivers/usb/host/ehci-w90x900.c
index 6bc35809a5c6..52a027aaa370 100644
--- a/drivers/usb/host/ehci-w90x900.c
+++ b/drivers/usb/host/ehci-w90x900.c
@@ -57,7 +57,7 @@ static int __devinit usb_w90x900_probe(const struct hc_driver *driver,
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
 	ehci->regs = hcd->regs +
-		 HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* enable PHY 0,1,the regs only apply to w90p910
 	*  0xA4,0xA8 were offsets of PHY0 and PHY1 controller of
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index effc58d7af8b..a64d6d66d760 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -220,7 +220,7 @@ static int __devinit ehci_hcd_xilinx_of_probe(struct platform_device *op)
 	 */
 	ehci->caps = hcd->regs + 0x100;
 	ehci->regs = hcd->regs + 0x100 +
-			HC_LENGTH(ehci_readl(ehci, &ehci->caps->hc_capbase));
+		HC_LENGTH(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase));
 
 	/* cache this readonly data; minimize chip reads */
 	ehci->hcs_params = ehci_readl(ehci, &ehci->caps->hcs_params);
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index e9ba8e252489..d0792f591590 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -128,6 +128,7 @@ struct ehci_hcd {			/* one per controller */
 	unsigned		has_fsl_port_bug:1; /* FreeScale */
 	unsigned		big_endian_mmio:1;
 	unsigned		big_endian_desc:1;
+	unsigned		big_endian_capbase:1;
 	unsigned		has_amcc_usb23:1;
 	unsigned		need_io_watchdog:1;
 	unsigned		broken_periodic:1;
@@ -605,12 +606,18 @@ ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
  * This attempts to support either format at compile time without a
  * runtime penalty, or both formats with the additional overhead
  * of checking a flag bit.
+ *
+ * ehci_big_endian_capbase is a special quirk for controllers that
+ * implement the HC capability registers as separate registers and not
+ * as fields of a 32-bit register.
  */
 
 #ifdef CONFIG_USB_EHCI_BIG_ENDIAN_MMIO
 #define ehci_big_endian_mmio(e)		((e)->big_endian_mmio)
+#define ehci_big_endian_capbase(e)	((e)->big_endian_capbase)
 #else
 #define ehci_big_endian_mmio(e)		0
+#define ehci_big_endian_capbase(e)	0
 #endif
 
 /*
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 78799432008e..7cc95ee3606b 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -25,10 +25,15 @@
 struct ehci_caps {
 	/* these fields are specified as 8 and 16 bit registers,
 	 * but some hosts can't perform 8 or 16 bit PCI accesses.
+	 * some hosts treat caplength and hciversion as parts of a 32-bit
+	 * register, others treat them as two separate registers, this
+	 * affects the memory map for big endian controllers.
 	 */
 	u32		hc_capbase;
-#define HC_LENGTH(p)		(((p)>>00)&0x00ff)	/* bits 7:0 */
-#define HC_VERSION(p)		(((p)>>16)&0xffff)	/* bits 31:16 */
+#define HC_LENGTH(ehci, p)	(0x00ff&((p) >> /* bits 7:0 / offset 00h */ \
+				(ehci_big_endian_capbase(ehci) ? 24 : 0)))
+#define HC_VERSION(ehci, p)	(0xffff&((p) >> /* bits 31:16 / offset 02h */ \
+				(ehci_big_endian_capbase(ehci) ? 0 : 16)))
 	u32		hcs_params;     /* HCSPARAMS - offset 0x4 */
 #define HCS_DEBUG_PORT(p)	(((p)>>20)&0xf)	/* bits 23:20, debug port? */
 #define HCS_INDICATOR(p)	((p)&(1 << 16))	/* true: has port indicators */
-- 
cgit v1.2.3


From 57a503c61db077b923e23f36050c02166a4a1db2 Mon Sep 17 00:00:00 2001
From: Viresh KUMAR <viresh.kumar@st.com>
Date: Mon, 2 May 2011 18:36:45 +0000
Subject: net/stmmac: Move "#include <linux/platform_device.h>" to
 linux/stmmac.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stmmac.h uses struct platform_device and doesn't include
<linux/platform_device.h>. Whereas drivers/net/stmmac/stmmac.h includes it, but
doesn't directly use it. And so we get following compilation warning while using
this file:
	warning: ‘struct platform_device’ declared inside parameter list

This patch includes <linux/platform_device.h> in linux/stmmac.h and removes it
from drivers/net/stmmac/stmmac.h

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/stmmac.h | 1 -
 include/linux/stmmac.h      | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index 5f06c4706abe..2b076b313622 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -21,7 +21,6 @@
 *******************************************************************************/
 
 #define DRV_MODULE_VERSION	"Nov_2010"
-#include <linux/platform_device.h>
 #include <linux/stmmac.h>
 
 #include "common.h"
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index f29197a4b227..9529e49b0385 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -26,6 +26,8 @@
 #ifndef __STMMAC_PLATFORM_DATA
 #define __STMMAC_PLATFORM_DATA
 
+#include <linux/platform_device.h>
+
 /* platform data for platform device structure's platform_data field */
 
 /* Private data for the STM on-board ethernet driver */
-- 
cgit v1.2.3


From 99b38b4acc0d7dbbab443273577cff60080fcfad Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 6 Dec 2010 12:43:33 +1000
Subject: platform/x86: add MXM WMI driver.

MXM is a laptop graphics card form-factor + interface specification,
this adds an initial stub driver to talk to the MXM WMI interface.

The only method used is the MUX switching method needed to do switchable
graphics on the nvidia chipsets.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/Kconfig   |  7 ++++
 drivers/platform/x86/Makefile  |  1 +
 drivers/platform/x86/mxm-wmi.c | 85 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mxm-wmi.h        | 32 ++++++++++++++++
 4 files changed, 125 insertions(+)
 create mode 100644 drivers/platform/x86/mxm-wmi.c
 create mode 100644 include/linux/mxm-wmi.h

(limited to 'include/linux')

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0485e394712a..94914572dd7f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -753,4 +753,11 @@ config SAMSUNG_LAPTOP
 	  To compile this driver as a module, choose M here: the module
 	  will be called samsung-laptop.
 
+config MXM_WMI
+       tristate "WMI support for MXM Laptop Graphics"
+       depends on WMI
+       ---help---
+          MXM is a standard for laptop graphics cards, the WMI interface
+	  is required for switchable nvidia graphics machines
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 029e8861d086..a7ab3bc7b3a1 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -42,3 +42,4 @@ obj-$(CONFIG_XO15_EBOOK)	+= xo15-ebook.o
 obj-$(CONFIG_IBM_RTL)		+= ibm_rtl.o
 obj-$(CONFIG_SAMSUNG_LAPTOP)	+= samsung-laptop.o
 obj-$(CONFIG_INTEL_MFLD_THERMAL)	+= intel_mid_thermal.o
+obj-$(CONFIG_MXM_WMI)		+= mxm-wmi.o
diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c
new file mode 100644
index 000000000000..12b6f341e72b
--- /dev/null
+++ b/drivers/platform/x86/mxm-wmi.c
@@ -0,0 +1,85 @@
+/*
+ * MXM WMI driver
+ *
+ * Copyright(C) 2010 Red Hat.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+MODULE_AUTHOR("Dave Airlie");
+MODULE_DESCRIPTION("MXM WMI Driver");
+MODULE_LICENSE("GPL");
+
+#define MXM_WMMX_GUID "F6CB5C3C-9CAE-4EBD-B577-931EA32A2CC0"
+
+MODULE_ALIAS("wmi:"MXM_WMMX_GUID);
+
+#define MXM_WMMX_FUNC_MXDS 0x5344584D /* "MXDS" */
+
+struct mxds_args {
+	u32 func;
+	u32 args;
+	u32 xarg;
+};
+
+int mxm_wmi_call_mxds(int adapter)
+{
+	struct mxds_args args = {
+		.func = MXM_WMMX_FUNC_MXDS,
+		.args = 0,
+		.xarg = 1,
+	};
+	struct acpi_buffer input = { (acpi_size)sizeof(args), &args };
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+
+	printk("calling mux switch %d\n", adapter);
+	
+	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input,
+				     &output);
+
+	if (ACPI_FAILURE(status))
+		return status;
+
+	printk("mux switched %d\n", status);
+	return 0;
+			    
+}
+EXPORT_SYMBOL_GPL(mxm_wmi_call_mxds);
+
+bool mxm_wmi_supported(void)
+{
+	bool guid_valid;
+	guid_valid = wmi_has_guid(MXM_WMMX_GUID);
+	return guid_valid;
+}
+EXPORT_SYMBOL_GPL(mxm_wmi_supported);
+
+static int __init mxm_wmi_init(void)
+{
+	return 0;
+}
+
+static void __exit mxm_wmi_exit(void)
+{
+}
+
+module_init(mxm_wmi_init);
+module_exit(mxm_wmi_exit);
diff --git a/include/linux/mxm-wmi.h b/include/linux/mxm-wmi.h
new file mode 100644
index 000000000000..51359c0718bf
--- /dev/null
+++ b/include/linux/mxm-wmi.h
@@ -0,0 +1,32 @@
+/*
+ * MXM WMI driver
+ *
+ * Copyright(C) 2010 Red Hat.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef MXM_WMI_H
+#define MXM_WMI_H
+
+/* discrete adapters */
+#define MXM_MXDS_ADAPTER_0 0x0
+#define MXM_MXDS_ADAPTER_1 0x0
+/* integrated adapter */
+#define MXM_MXDS_ADAPTER_IGD 0x10
+int mxm_wmi_call_mxds(int adapter);
+bool mxm_wmi_supported(void);
+
+#endif
-- 
cgit v1.2.3


From 3448a19da479b6bd1e28e2a2be9fa16c6a6feb39 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 1 Jun 2010 15:32:24 +1000
Subject: vgaarb: use bridges to control VGA routing where possible.

So in a lot of modern systems, a GPU will always be below a parent bridge that won't share with any other GPUs. This means VGA arbitration on those GPUs can be controlled by using the bridge routing instead of io/mem decodes.

The problem is locating which GPUs share which upstream bridges. This patch attempts to identify all the GPUs which can be controlled via bridges, and ones that can't. This patch endeavours to work out the bridge sharing semantics.

When disabling GPUs via a bridge, it doesn't do irq callbacks or touch the io/mem decodes for the gpu.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/vga/vgaarb.c | 113 +++++++++++++++++++++++++++++++++++++++++------
 drivers/pci/pci.c        |  25 ++++++-----
 include/linux/pci.h      |   7 ++-
 3 files changed, 118 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index be8d4cb5861c..8a1021f2e319 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -61,7 +61,7 @@ struct vga_device {
 	unsigned int mem_lock_cnt;	/* legacy MEM lock count */
 	unsigned int io_norm_cnt;	/* normal IO count */
 	unsigned int mem_norm_cnt;	/* normal MEM count */
-
+	bool bridge_has_one_vga;
 	/* allow IRQ enable/disable hook */
 	void *cookie;
 	void (*irq_set_state)(void *cookie, bool enable);
@@ -165,6 +165,8 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev,
 	unsigned int wants, legacy_wants, match;
 	struct vga_device *conflict;
 	unsigned int pci_bits;
+	u32 flags = 0;
+
 	/* Account for "normal" resources to lock. If we decode the legacy,
 	 * counterpart, we need to request it as well
 	 */
@@ -237,16 +239,23 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev,
 		/* looks like he doesn't have a lock, we can steal
 		 * them from him
 		 */
-		vga_irq_set_state(conflict, false);
 
+		flags = 0;
 		pci_bits = 0;
-		if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
-			pci_bits |= PCI_COMMAND_MEMORY;
-		if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
-			pci_bits |= PCI_COMMAND_IO;
 
-		pci_set_vga_state(conflict->pdev, false, pci_bits,
-				  change_bridge);
+		if (!conflict->bridge_has_one_vga) {
+			vga_irq_set_state(conflict, false);
+			flags |= PCI_VGA_STATE_CHANGE_DECODES;
+			if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+				pci_bits |= PCI_COMMAND_MEMORY;
+			if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+				pci_bits |= PCI_COMMAND_IO;
+		}
+
+		if (change_bridge)
+			flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+		pci_set_vga_state(conflict->pdev, false, pci_bits, flags);
 		conflict->owns &= ~lwants;
 		/* If he also owned non-legacy, that is no longer the case */
 		if (lwants & VGA_RSRC_LEGACY_MEM)
@@ -261,14 +270,24 @@ enable_them:
 	 * also have in "decodes". We can lock resources we don't decode but
 	 * not own them.
 	 */
+	flags = 0;
 	pci_bits = 0;
-	if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
-		pci_bits |= PCI_COMMAND_MEMORY;
-	if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
-		pci_bits |= PCI_COMMAND_IO;
-	pci_set_vga_state(vgadev->pdev, true, pci_bits, !!(wants & VGA_RSRC_LEGACY_MASK));
 
-	vga_irq_set_state(vgadev, true);
+	if (!vgadev->bridge_has_one_vga) {
+		flags |= PCI_VGA_STATE_CHANGE_DECODES;
+		if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+			pci_bits |= PCI_COMMAND_MEMORY;
+		if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+			pci_bits |= PCI_COMMAND_IO;
+	}
+	if (!!(wants & VGA_RSRC_LEGACY_MASK))
+		flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+	pci_set_vga_state(vgadev->pdev, true, pci_bits, flags);
+
+	if (!vgadev->bridge_has_one_vga) {
+		vga_irq_set_state(vgadev, true);
+	}
 	vgadev->owns |= (wants & vgadev->decodes);
 lock_them:
 	vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
@@ -421,6 +440,62 @@ bail:
 }
 EXPORT_SYMBOL(vga_put);
 
+/* Rules for using a bridge to control a VGA descendant decoding:
+   if a bridge has only one VGA descendant then it can be used
+   to control the VGA routing for that device.
+   It should always use the bridge closest to the device to control it.
+   If a bridge has a direct VGA descendant, but also have a sub-bridge
+   VGA descendant then we cannot use that bridge to control the direct VGA descendant.
+   So for every device we register, we need to iterate all its parent bridges
+   so we can invalidate any devices using them properly.
+*/
+static void vga_arbiter_check_bridge_sharing(struct vga_device *vgadev)
+{
+	struct vga_device *same_bridge_vgadev;
+	struct pci_bus *new_bus, *bus;
+	struct pci_dev *new_bridge, *bridge;
+
+	vgadev->bridge_has_one_vga = true;
+
+	if (list_empty(&vga_list))
+		return;
+
+	/* okay iterate the new devices bridge hierarachy */
+	new_bus = vgadev->pdev->bus;
+	while (new_bus) {
+		new_bridge = new_bus->self;
+
+		if (new_bridge) {
+			/* go through list of devices already registered */
+			list_for_each_entry(same_bridge_vgadev, &vga_list, list) {
+				bus = same_bridge_vgadev->pdev->bus;
+				bridge = bus->self;
+
+				/* see if the share a bridge with this device */
+				if (new_bridge == bridge) {
+					/* if their direct parent bridge is the same
+					   as any bridge of this device then it can't be used
+					   for that device */
+					same_bridge_vgadev->bridge_has_one_vga = false;
+				}
+
+				/* now iterate the previous devices bridge hierarchy */
+				/* if the new devices parent bridge is in the other devices
+				   hierarchy then we can't use it to control this device */
+				while (bus) {
+					bridge = bus->self;
+					if (bridge) {
+						if (bridge == vgadev->pdev->bus->self)
+							vgadev->bridge_has_one_vga = false;
+					}
+					bus = bus->parent;
+				}
+			}
+		}
+		new_bus = new_bus->parent;
+	}
+}
+
 /*
  * Currently, we assume that the "initial" setup of the system is
  * not sane, that is we come up with conflicting devices and let
@@ -500,6 +575,8 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
 		vga_default = pci_dev_get(pdev);
 #endif
 
+	vga_arbiter_check_bridge_sharing(vgadev);
+
 	/* Add to the list */
 	list_add(&vgadev->list, &vga_list);
 	vga_count++;
@@ -1222,6 +1299,7 @@ static int __init vga_arb_device_init(void)
 {
 	int rc;
 	struct pci_dev *pdev;
+	struct vga_device *vgadev;
 
 	rc = misc_register(&vga_arb_device);
 	if (rc < 0)
@@ -1238,6 +1316,13 @@ static int __init vga_arb_device_init(void)
 		vga_arbiter_add_pci_device(pdev);
 
 	pr_info("vgaarb: loaded\n");
+
+	list_for_each_entry(vgadev, &vga_list, list) {
+		if (vgadev->bridge_has_one_vga)
+			pr_info("vgaarb: bridge control possible %s\n", pci_name(vgadev->pdev));
+		else
+			pr_info("vgaarb: no bridge control possible %s\n", pci_name(vgadev->pdev));
+	}
 	return rc;
 }
 subsys_initcall(vga_arb_device_init);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2472e7177b4b..a339237f4f96 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2875,31 +2875,34 @@ static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
  * @dev: the PCI device
  * @decode: true = enable decoding, false = disable decoding
  * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
- * @change_bridge: traverse ancestors and change bridges
+ * @change_bridge_flags: traverse ancestors and change bridges
+ * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
  */
 int pci_set_vga_state(struct pci_dev *dev, bool decode,
-		      unsigned int command_bits, bool change_bridge)
+		      unsigned int command_bits, u32 flags)
 {
 	struct pci_bus *bus;
 	struct pci_dev *bridge;
 	u16 cmd;
 	int rc;
 
-	WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+	WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
 
 	/* ARCH specific VGA enables */
-	rc = pci_set_vga_state_arch(dev, decode, command_bits, change_bridge);
+	rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);
 	if (rc)
 		return rc;
 
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	if (decode == true)
-		cmd |= command_bits;
-	else
-		cmd &= ~command_bits;
-	pci_write_config_word(dev, PCI_COMMAND, cmd);
+	if (flags & PCI_VGA_STATE_CHANGE_DECODES) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (decode == true)
+			cmd |= command_bits;
+		else
+			cmd &= ~command_bits;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
 
-	if (change_bridge == false)
+	if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
 		return 0;
 
 	bus = dev->bus;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96f70d7e058d..f2e57b2e6a81 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -915,8 +915,11 @@ int pci_cfg_space_size_ext(struct pci_dev *dev);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 
+#define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0)
+#define PCI_VGA_STATE_CHANGE_DECODES (1 << 1)
+
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
-		      unsigned int command_bits, bool change_bridge);
+		      unsigned int command_bits, u32 flags);
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/pci-dma.h>
@@ -1061,7 +1064,7 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 
 /* some architectures require additional setup to direct VGA traffic */
 typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
-		      unsigned int command_bits, bool change_bridge);
+		      unsigned int command_bits, u32 flags);
 extern void pci_register_set_vga_state(arch_set_vga_state_t func);
 
 #else /* CONFIG_PCI is not enabled */
-- 
cgit v1.2.3


From e7e7ee2eab2080248084d71fe0a115ab745eb2aa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 4 May 2011 08:42:29 +0200
Subject: perf events: Clean up definitions and initializers, update copyrights

Fix a few inconsistent style bits that were added over the past few
months.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-yv4hwf9yhnzoada8pcpb3a97@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 96 +++++++++++++++++++++-------------------------
 kernel/events/core.c       | 40 +++++++++----------
 2 files changed, 64 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9eec53d97370..207c16976a17 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -2,8 +2,8 @@
  * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
  *
  * Data type definitions, declarations, prototypes.
  *
@@ -468,9 +468,9 @@ enum perf_callchain_context {
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-#define PERF_FLAG_PID_CGROUP	(1U << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
 
 #ifdef __KERNEL__
 /*
@@ -484,9 +484,9 @@ enum perf_callchain_context {
 #endif
 
 struct perf_guest_info_callbacks {
-	int (*is_in_guest) (void);
-	int (*is_user_mode) (void);
-	unsigned long (*get_guest_ip) (void);
+	int				(*is_in_guest)(void);
+	int				(*is_user_mode)(void);
+	unsigned long			(*get_guest_ip)(void);
 };
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -652,19 +652,19 @@ struct pmu {
 	 * Start the transaction, after this ->add() doesn't need to
 	 * do schedulability tests.
 	 */
-	void (*start_txn)	(struct pmu *pmu); /* optional */
+	void (*start_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
-	int  (*commit_txn)	(struct pmu *pmu); /* optional */
+	int  (*commit_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * Will cancel the transaction, assumes ->del() is called
 	 * for each successful ->add() during the transaction.
 	 */
-	void (*cancel_txn)	(struct pmu *pmu); /* optional */
+	void (*cancel_txn)		(struct pmu *pmu); /* optional */
 };
 
 /**
@@ -712,15 +712,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct pt_regs *regs);
 
 enum perf_group_flag {
-	PERF_GROUP_SOFTWARE = 0x1,
+	PERF_GROUP_SOFTWARE		= 0x1,
 };
 
-#define SWEVENT_HLIST_BITS	8
-#define SWEVENT_HLIST_SIZE	(1 << SWEVENT_HLIST_BITS)
+#define SWEVENT_HLIST_BITS		8
+#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
 
 struct swevent_hlist {
-	struct hlist_head	heads[SWEVENT_HLIST_SIZE];
-	struct rcu_head		rcu_head;
+	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
+	struct rcu_head			rcu_head;
 };
 
 #define PERF_ATTACH_CONTEXT	0x01
@@ -733,13 +733,13 @@ struct swevent_hlist {
  * This is a per-cpu dynamically allocated data structure.
  */
 struct perf_cgroup_info {
-	u64 time;
-	u64 timestamp;
+	u64				time;
+	u64				timestamp;
 };
 
 struct perf_cgroup {
-	struct cgroup_subsys_state css;
-	struct perf_cgroup_info *info;	/* timing info, one per cpu */
+	struct				cgroup_subsys_state css;
+	struct				perf_cgroup_info *info;	/* timing info, one per cpu */
 };
 #endif
 
@@ -923,7 +923,7 @@ struct perf_event_context {
 
 /*
  * Number of contexts where an event can trigger:
- * 	task, softirq, hardirq, nmi.
+ *	task, softirq, hardirq, nmi.
  */
 #define PERF_NR_CONTEXTS	4
 
@@ -1001,8 +1001,7 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 };
 
-static inline
-void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
 	data->addr = addr;
 	data->raw  = NULL;
@@ -1039,8 +1038,7 @@ extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
-static inline void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
 #endif
 
 /*
@@ -1080,8 +1078,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
 		__perf_event_task_sched_in(task);
 }
 
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
@@ -1099,14 +1096,10 @@ extern void perf_event_fork(struct task_struct *tsk);
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 
-extern void perf_callchain_user(struct perf_callchain_entry *entry,
-				struct pt_regs *regs);
-extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
-				  struct pt_regs *regs);
-
+extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
+extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
 
-static inline void
-perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
 	if (entry->nr < PERF_MAX_STACK_DEPTH)
 		entry->ip[entry->nr++] = ip;
@@ -1142,9 +1135,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record,
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
-				 PERF_RECORD_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+# define perf_misc_flags(regs) \
+		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
+# define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 
 extern int perf_output_begin(struct perf_output_handle *handle,
@@ -1179,9 +1172,9 @@ static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline int perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 static inline int perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
@@ -1194,23 +1187,22 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
+#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
  * This has to have a higher priority than migration_notifier in sched.c.
  */
-#define perf_cpu_notifier(fn)					\
-do {								\
-	static struct notifier_block fn##_nb __cpuinitdata =	\
-		{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
-	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
-		(void *)(unsigned long)smp_processor_id());	\
-	register_cpu_notifier(&fn##_nb);			\
+#define perf_cpu_notifier(fn)						\
+do {									\
+	static struct notifier_block fn##_nb __cpuinitdata =		\
+		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
+	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
+		(void *)(unsigned long)smp_processor_id());		\
+	register_cpu_notifier(&fn##_nb);				\
 } while (0)
 
 #endif /* __KERNEL__ */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 440bc485bbff..0fc34a370ba4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2,8 +2,8 @@
  * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
@@ -39,10 +39,10 @@
 #include <asm/irq_regs.h>
 
 struct remote_function_call {
-	struct task_struct *p;
-	int (*func)(void *info);
-	void *info;
-	int ret;
+	struct task_struct	*p;
+	int			(*func)(void *info);
+	void			*info;
+	int			ret;
 };
 
 static void remote_function(void *data)
@@ -76,10 +76,10 @@ static int
 task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = p,
-		.func = func,
-		.info = info,
-		.ret = -ESRCH, /* No such (running) process */
+		.p	= p,
+		.func	= func,
+		.info	= info,
+		.ret	= -ESRCH, /* No such (running) process */
 	};
 
 	if (task_curr(p))
@@ -100,10 +100,10 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = NULL,
-		.func = func,
-		.info = info,
-		.ret = -ENXIO, /* No such CPU */
+		.p	= NULL,
+		.func	= func,
+		.info	= info,
+		.ret	= -ENXIO, /* No such CPU */
 	};
 
 	smp_call_function_single(cpu, remote_function, &data, 1);
@@ -7445,11 +7445,11 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 }
 
 struct cgroup_subsys perf_subsys = {
-	.name = "perf_event",
-	.subsys_id = perf_subsys_id,
-	.create = perf_cgroup_create,
-	.destroy = perf_cgroup_destroy,
-	.exit = perf_cgroup_exit,
-	.attach = perf_cgroup_attach,
+	.name		= "perf_event",
+	.subsys_id	= perf_subsys_id,
+	.create		= perf_cgroup_create,
+	.destroy	= perf_cgroup_destroy,
+	.exit		= perf_cgroup_exit,
+	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
-- 
cgit v1.2.3


From 2d06d8c49afdcc9bb35a85039fa50f0fe35bd40e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 27 Mar 2011 15:04:46 +0200
Subject: [CPUFREQ] use dynamic debug instead of custom infrastructure

With dynamic debug having gained the capability to report debug messages
also during the boot process, it offers a far superior interface for
debug messages than the custom cpufreq infrastructure. As a first step,
remove the old cpufreq_debug_printk() function and replace it with a call
to the generic pr_debug() function.

How can dynamic debug be used on cpufreq? You need a kernel which has
CONFIG_DYNAMIC_DEBUG enabled.

To enabled debugging during runtime, mount debugfs and

$ echo -n 'module cpufreq +p' > /sys/kernel/debug/dynamic_debug/control

for debugging the complete "cpufreq" module. To achieve the same goal during
boot, append

	ddebug_query="module cpufreq +p"

as a boot parameter to the kernel of your choice.

For more detailled instructions, please see
Documentation/dynamic-debug-howto.txt

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/arm/mach-davinci/cpufreq.c                  |   4 +-
 arch/blackfin/mach-common/dpmc.c                 |   3 -
 arch/ia64/kernel/cpufreq/acpi-cpufreq.c          |  44 +++---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       |  45 +++---
 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c    |   6 +-
 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c         |  21 ++-
 arch/x86/kernel/cpu/cpufreq/longhaul.c           |  11 +-
 arch/x86/kernel/cpu/cpufreq/longrun.c            |  17 +--
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c        |  10 +-
 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c        |  47 +++---
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c        |  33 ++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 100 ++++++-------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h        |   2 -
 arch/x86/kernel/cpu/cpufreq/sc520_freq.c         |   6 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c |  23 ++-
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  28 ++--
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c      |  43 +++---
 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c      |  41 +++---
 drivers/acpi/processor_perflib.c                 |   6 +-
 drivers/cpufreq/Kconfig                          |  13 --
 drivers/cpufreq/cpufreq.c                        | 180 +++++------------------
 drivers/cpufreq/cpufreq_performance.c            |   5 +-
 drivers/cpufreq/cpufreq_powersave.c              |   5 +-
 drivers/cpufreq/cpufreq_userspace.c              |  13 +-
 drivers/cpufreq/freq_table.c                     |  19 +--
 include/linux/cpufreq.h                          |  19 ---
 26 files changed, 270 insertions(+), 474 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 0a95be1512bb..41669ecc1f91 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return ret;
 
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,
-			dev_driver_string(cpufreq.dev),
-			"transition: %u --> %u\n", freqs.old, freqs.new);
+	dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
 	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
 						freqs.new, relation, &idx);
diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index 382099fd5561..5e4112e518a9 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c
@@ -19,9 +19,6 @@
 
 #define DRIVER_NAME "bfin dpmc"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg)
-
 struct bfin_dpmc_platform_data *pdata;
 
 /**
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 22f61526a8e1..f09b174244d5 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -23,8 +23,6 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -47,12 +45,12 @@ processor_set_pstate (
 {
 	s64 retval;
 
-	dprintk("processor_set_pstate\n");
+	pr_debug("processor_set_pstate\n");
 
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -67,14 +65,14 @@ processor_get_pstate (
 	u64	pstate_index = 0;
 	s64 	retval;
 
-	dprintk("processor_get_pstate\n");
+	pr_debug("processor_get_pstate\n");
 
 	retval = ia64_pal_get_pstate(&pstate_index,
 	                             PAL_GET_PSTATE_TYPE_INSTANT);
 	*value = (u32) pstate_index;
 
 	if (retval)
-		dprintk("Failed to get current freq with "
+		pr_debug("Failed to get current freq with "
 			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
@@ -90,7 +88,7 @@ extract_clock (
 {
 	unsigned long i;
 
-	dprintk("extract_clock\n");
+	pr_debug("extract_clock\n");
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		if (value == data->acpi_data.states[i].status)
@@ -110,7 +108,7 @@ processor_get_freq (
 	cpumask_t		saved_mask;
 	unsigned long 		clock_freq;
 
-	dprintk("processor_get_freq\n");
+	pr_debug("processor_get_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -148,7 +146,7 @@ processor_set_freq (
 	cpumask_t		saved_mask;
 	int			retval;
 
-	dprintk("processor_set_freq\n");
+	pr_debug("processor_set_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -159,16 +157,16 @@ processor_set_freq (
 
 	if (state == data->acpi_data.state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
+			pr_debug("Called after resume, resetting to P%d\n", state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n", state);
+			pr_debug("Already at target state (P%d)\n", state);
 			retval = 0;
 			goto migrate_end;
 		}
 	}
 
-	dprintk("Transitioning from P%d to P%d\n",
+	pr_debug("Transitioning from P%d to P%d\n",
 		data->acpi_data.state, state);
 
 	/* cpufreq frequency struct */
@@ -186,7 +184,7 @@ processor_set_freq (
 
 	value = (u32) data->acpi_data.states[state].control;
 
-	dprintk("Transitioning to state: 0x%08x\n", value);
+	pr_debug("Transitioning to state: 0x%08x\n", value);
 
 	ret = processor_set_pstate(value);
 	if (ret) {
@@ -219,7 +217,7 @@ acpi_cpufreq_get (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
 
-	dprintk("acpi_cpufreq_get\n");
+	pr_debug("acpi_cpufreq_get\n");
 
 	return processor_get_freq(data, cpu);
 }
@@ -235,7 +233,7 @@ acpi_cpufreq_target (
 	unsigned int next_state = 0;
 	unsigned int result = 0;
 
-	dprintk("acpi_cpufreq_setpolicy\n");
+	pr_debug("acpi_cpufreq_setpolicy\n");
 
 	result = cpufreq_frequency_table_target(policy,
 			data->freq_table, target_freq, relation, &next_state);
@@ -255,7 +253,7 @@ acpi_cpufreq_verify (
 	unsigned int result = 0;
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	result = cpufreq_frequency_table_verify(policy,
 			data->freq_table);
@@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init (
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
 	if (!data)
@@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init (
 
 	/* capability check */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init (
 					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (data->acpi_data.status_register.space_id !=
 					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
+		pr_debug("Unsupported address space [%d, %d]\n",
 			(u32) (data->acpi_data.control_register.space_id),
 			(u32) (data->acpi_data.status_register.space_id));
 		result = -ENODEV;
@@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init (
 	       "activated.\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
 			(i == data->acpi_data.state?'*':' '), i,
 			(u32) data->acpi_data.states[i].core_frequency,
 			(u32) data->acpi_data.states[i].power,
@@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
  	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
@@ -427,7 +425,7 @@ acpi_cpufreq_init (void)
 static void __exit
 acpi_cpufreq_exit (void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 	return;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index a2baafb2fe6d..4e04e1274388 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -47,9 +47,6 @@
 #include <asm/cpufeature.h>
 #include "mperf.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -233,7 +230,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 	cmd.mask = mask;
 	drv_read(&cmd);
 
-	dprintk("get_cur_val = %u\n", cmd.val);
+	pr_debug("get_cur_val = %u\n", cmd.val);
 
 	return cmd.val;
 }
@@ -244,7 +241,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	unsigned int freq;
 	unsigned int cached_freq;
 
-	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
 
 	if (unlikely(data == NULL ||
 		     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -261,7 +258,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		data->resume = 1;
 	}
 
-	dprintk("cur freq = %u\n", freq);
+	pr_debug("cur freq = %u\n", freq);
 
 	return freq;
 }
@@ -293,7 +290,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int i;
 	int result = 0;
 
-	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
 	if (unlikely(data == NULL ||
 	     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -313,11 +310,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	next_perf_state = data->freq_table[next_state].index;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n",
+			pr_debug("Called after resume, resetting to P%d\n",
 				next_perf_state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n",
+			pr_debug("Already at target state (P%d)\n",
 				next_perf_state);
 			goto out;
 		}
@@ -357,7 +354,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 	if (acpi_pstate_strict) {
 		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			dprintk("acpi_cpufreq_target failed (%d)\n",
+			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
 			goto out;
@@ -378,7 +375,7 @@ static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	return cpufreq_frequency_table_verify(policy, data->freq_table);
 }
@@ -433,11 +430,11 @@ static void free_acpi_perf_data(void)
 static int __init acpi_cpufreq_early_init(void)
 {
 	unsigned int i;
-	dprintk("acpi_cpufreq_early_init\n");
+	pr_debug("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
 	if (!acpi_perf_data) {
-		dprintk("Memory allocation error for acpi_perf_data.\n");
+		pr_debug("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
@@ -519,7 +516,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	static int blacklisted;
 #endif
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 #ifdef CONFIG_SMP
 	if (blacklisted)
@@ -566,7 +563,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* capability check */
 	if (perf->state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -578,11 +575,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
-		dprintk("SYSTEM IO addr space\n");
+		pr_debug("SYSTEM IO addr space\n");
 		data->cpu_feature = SYSTEM_IO_CAPABLE;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		dprintk("HARDWARE addr space\n");
+		pr_debug("HARDWARE addr space\n");
 		if (!check_est_cpu(cpu)) {
 			result = -ENODEV;
 			goto err_unreg;
@@ -590,7 +587,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
 		break;
 	default:
-		dprintk("Unknown addr space %d\n",
+		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
 		result = -ENODEV;
 		goto err_unreg;
@@ -661,9 +658,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (cpu_has(c, X86_FEATURE_APERFMPERF))
 		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
 
-	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
+	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
 			(i == perf->state ? '*' : ' '), i,
 			(u32) perf->states[i].core_frequency,
 			(u32) perf->states[i].power,
@@ -694,7 +691,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -712,7 +709,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_resume\n");
+	pr_debug("acpi_cpufreq_resume\n");
 
 	data->resume = 1;
 
@@ -743,7 +740,7 @@ static int __init acpi_cpufreq_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
 	ret = acpi_cpufreq_early_init();
 	if (ret)
@@ -758,7 +755,7 @@ static int __init acpi_cpufreq_init(void)
 
 static void __exit acpi_cpufreq_exit(void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 141abebc4516..7bac808804f3 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -57,8 +57,6 @@ MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
 #define PFX "cpufreq-nforce2: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"cpufreq-nforce2", msg)
 
 /**
  * nforce2_calc_fsb - calculate FSB
@@ -270,7 +268,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
-	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
+	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
 	       freqs.old, freqs.new);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -282,7 +280,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
 			target_fsb);
 	else
-		dprintk("Changed FSB successfully to %d\n",
+		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
 
 	/* Enable IRQs */
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 32974cf84232..ffe1f2c92ed3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -142,9 +142,6 @@ module_param(max_duration, int, 0444);
 #define POLICY_MIN_DIV 20
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"gx-suspmod", msg)
-
 /**
  * we can detect a core multipiler from dir0_lsb
  * from GX1 datasheet p.56,
@@ -191,7 +188,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 	/* check if CPU is a MediaGX or a Geode. */
 	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
 	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		dprintk("error: no MediaGX/Geode processor found!\n");
+		pr_debug("error: no MediaGX/Geode processor found!\n");
 		return NULL;
 	}
 
@@ -201,7 +198,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 			return gx_pci;
 	}
 
-	dprintk("error: no supported chipset found!\n");
+	pr_debug("error: no supported chipset found!\n");
 	return NULL;
 }
 
@@ -305,14 +302,14 @@ static void gx_set_cpuspeed(unsigned int khz)
 			break;
 		default:
 			local_irq_restore(flags);
-			dprintk("fatal: try to set unknown chipset.\n");
+			pr_debug("fatal: try to set unknown chipset.\n");
 			return;
 		}
 	} else {
 		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
 		gx_params->off_duration = 0;
 		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
+		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
 	}
 
 	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
@@ -327,9 +324,9 @@ static void gx_set_cpuspeed(unsigned int khz)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
-	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
 		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
 }
 
 /****************************************************************
@@ -428,8 +425,8 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
 	stock_freq = maxfreq;
 	curfreq = gx_get_cpuspeed(0);
 
-	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n", curfreq);
+	pr_debug("cpu max frequency is %d.\n", maxfreq);
+	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
 
 	/* setup basic struct for cpufreq API */
 	policy->cpu = 0;
@@ -475,7 +472,7 @@ static int __init cpufreq_gx_init(void)
 	if (max_duration > 0xff)
 		max_duration = 0xff;
 
-	dprintk("geode suspend modulation available.\n");
+	pr_debug("geode suspend modulation available.\n");
 
 	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
 	if (params == NULL)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cf48cdd6907d..f47d26e2a135 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -77,9 +77,6 @@ static int scale_voltage;
 static int disable_acpi_c3;
 static int revid_errata;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longhaul", msg)
-
 
 /* Clock ratios multiplied by 10 */
 static int mults[32];
@@ -87,7 +84,6 @@ static int eblcr[32];
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 static char speedbuffer[8];
 
 static char *print_speed(int speed)
@@ -106,7 +102,6 @@ static char *print_speed(int speed)
 
 	return speedbuffer;
 }
-#endif
 
 
 static unsigned int calc_speed(int mult)
@@ -275,7 +270,7 @@ static void longhaul_setstate(unsigned int table_index)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
 			fsb, mult/10, mult%10, print_speed(speed/1000));
 retry_loop:
 	preempt_disable();
@@ -460,12 +455,12 @@ static int __cpuinit longhaul_get_ranges(void)
 		break;
 	}
 
-	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
+	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
 		 minmult/10, minmult%10, maxmult/10, maxmult%10);
 
 	highest_speed = calc_speed(maxmult);
 	lowest_speed = calc_speed(minmult);
-	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
 		 print_speed(lowest_speed/1000),
 		 print_speed(highest_speed/1000));
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index d9f51367666b..34ea359b370e 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -15,9 +15,6 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longrun", msg)
-
 static struct cpufreq_driver	longrun_driver;
 
 /**
@@ -40,14 +37,14 @@ static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
 	u32 msr_lo, msr_hi;
 
 	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
 	if (msr_lo & 0x01)
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
 	msr_lo &= 0x0000007F;
 	msr_hi &= 0x0000007F;
 
@@ -150,7 +147,7 @@ static unsigned int longrun_get(unsigned int cpu)
 		return 0;
 
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	dprintk("cpuid eax is %u\n", eax);
+	pr_debug("cpuid eax is %u\n", eax);
 
 	return eax * 1000;
 }
@@ -196,7 +193,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
 		*high_freq = msr_lo * 1000; /* to kHz */
 
-		dprintk("longrun table interface told %u - %u kHz\n",
+		pr_debug("longrun table interface told %u - %u kHz\n",
 				*low_freq, *high_freq);
 
 		if (*low_freq > *high_freq)
@@ -207,7 +204,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	/* set the upper border to the value determined during TSC init */
 	*high_freq = (cpu_khz / 1000);
 	*high_freq = *high_freq * 1000;
-	dprintk("high frequency is %u kHz\n", *high_freq);
+	pr_debug("high frequency is %u kHz\n", *high_freq);
 
 	/* get current borders */
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
@@ -233,7 +230,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		/* restore values */
 		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
 	}
-	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
 
 	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
 	 * eqals
@@ -249,7 +246,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	edx = ((eax - ebx) * 100) / (100 - ecx);
 	*low_freq = edx * 1000; /* back to kHz */
 
-	dprintk("low frequency is %u kHz\n", *low_freq);
+	pr_debug("low frequency is %u kHz\n", *low_freq);
 
 	if (*low_freq > *high_freq)
 		*low_freq = *high_freq;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 52c93648e492..6be3e0760c26 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -35,8 +35,6 @@
 #include "speedstep-lib.h"
 
 #define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"p4-clockmod", msg)
 
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -66,7 +64,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
-		dprintk("CPU#%d currently thermal throttled\n", cpu);
+		pr_debug("CPU#%d currently thermal throttled\n", cpu);
 
 	if (has_N44_O17_errata[cpu] &&
 	    (newstate == DC_25PT || newstate == DC_DFLT))
@@ -74,10 +72,10 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
-		dprintk("CPU#%d disabling modulation\n", cpu);
+		pr_debug("CPU#%d disabling modulation\n", cpu);
 		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
-		dprintk("CPU#%d setting duty cycle to %d%%\n",
+		pr_debug("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
 		/* bits 63 - 5	: reserved
 		 * bit  4	: enable/disable
@@ -217,7 +215,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	case 0x0f11:
 	case 0x0f12:
 		has_N44_O17_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling low frequencies\n");
+		pr_debug("has errata -- disabling low frequencies\n");
 	}
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 907c8e637ef5..7b0603eb0129 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -48,9 +48,6 @@
 
 #define BUF_SZ		4
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
-					     "pcc-cpufreq", msg)
-
 struct pcc_register_resource {
 	u8 descriptor;
 	u16 length;
@@ -152,7 +149,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	spin_lock(&pcc_lock);
 
-	dprintk("get: get_freq for CPU %d\n", cpu);
+	pr_debug("get: get_freq for CPU %d\n", cpu);
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
 	input_buffer = 0x1;
@@ -170,7 +167,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("get: FAILED: for CPU %d, status is %d\n",
+		pr_debug("get: FAILED: for CPU %d, status is %d\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
@@ -178,14 +175,14 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
 			/ 100) * 1000);
 
-	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
+	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
 		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
 		output_buffer, curr_freq);
 
 	freq_limit = (output_buffer >> 8) & 0xff;
 	if (freq_limit != 0xff) {
-		dprintk("get: frequency for cpu %d is being temporarily"
+		pr_debug("get: frequency for cpu %d is being temporarily"
 			" capped at %d\n", cpu, curr_freq);
 	}
 
@@ -212,8 +209,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 	cpu = policy->cpu;
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
-	dprintk("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%x\n",
+	pr_debug("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%p\n",
 		cpu, target_freq,
 		(pcch_virt_addr + pcc_cpu_data->input_offset));
 
@@ -234,14 +231,14 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
+		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
 	iowrite16(0, &pcch_hdr->status);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
+	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
 	spin_unlock(&pcc_lock);
 
 	return 0;
@@ -293,7 +290,7 @@ static int pcc_get_offset(int cpu)
 	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
 	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
 
-	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
+	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
 		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
 		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
 out_free:
@@ -410,7 +407,7 @@ static int __init pcc_cpufreq_probe(void)
 	if (ACPI_SUCCESS(status)) {
 		ret = pcc_cpufreq_do_osc(&osc_handle);
 		if (ret)
-			dprintk("probe: _OSC evaluation did not succeed\n");
+			pr_debug("probe: _OSC evaluation did not succeed\n");
 		/* Firmware's use of _OSC is optional */
 		ret = 0;
 	}
@@ -433,7 +430,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
 
-	dprintk("probe: mem_resource descriptor: 0x%x,"
+	pr_debug("probe: mem_resource descriptor: 0x%x,"
 		" length: %d, space_id: %d, resource_usage: %d,"
 		" type_specific: %d, granularity: 0x%llx,"
 		" minimum: 0x%llx, maximum: 0x%llx,"
@@ -453,13 +450,13 @@ static int __init pcc_cpufreq_probe(void)
 	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
 					mem_resource->address_length);
 	if (pcch_virt_addr == NULL) {
-		dprintk("probe: could not map shared mem region\n");
+		pr_debug("probe: could not map shared mem region\n");
 		goto out_free;
 	}
 	pcch_hdr = pcch_virt_addr;
 
-	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	dprintk("probe: PCCH header is at physical address: 0x%llx,"
+	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
 		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
 		" supported features: 0x%x, command field: 0x%x,"
 		" status field: 0x%x, nominal latency: %d us\n",
@@ -469,7 +466,7 @@ static int __init pcc_cpufreq_probe(void)
 		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
 		ioread32(&pcch_hdr->latency));
 
-	dprintk("probe: min time between commands: %d us,"
+	pr_debug("probe: min time between commands: %d us,"
 		" max time between commands: %d us,"
 		" nominal CPU frequency: %d MHz,"
 		" minimum CPU frequency: %d MHz,"
@@ -494,7 +491,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.access_width = 64;
 	doorbell.address = reg_resource->address;
 
-	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
+	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
 		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
 		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
 		doorbell.access_width, reg_resource->address);
@@ -515,7 +512,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	doorbell_write = member->integer.value;
 
-	dprintk("probe: doorbell_preserve: 0x%llx,"
+	pr_debug("probe: doorbell_preserve: 0x%llx,"
 		" doorbell_write: 0x%llx\n",
 		doorbell_preserve, doorbell_write);
 
@@ -550,7 +547,7 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	result = pcc_get_offset(cpu);
 	if (result) {
-		dprintk("init: PCCP evaluation failed\n");
+		pr_debug("init: PCCP evaluation failed\n");
 		goto out;
 	}
 
@@ -561,12 +558,12 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = pcc_get_freq(cpu);
 
 	if (!policy->cur) {
-		dprintk("init: Unable to get current CPU frequency\n");
+		pr_debug("init: Unable to get current CPU frequency\n");
 		result = -EINVAL;
 		goto out;
 	}
 
-	dprintk("init: policy->max is %d, policy->min is %d\n",
+	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
 	return result;
@@ -597,7 +594,7 @@ static int __init pcc_cpufreq_init(void)
 
 	ret = pcc_cpufreq_probe();
 	if (ret) {
-		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
+		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
 		return ret;
 	}
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 4a45fd6e41ba..d71d9f372359 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -68,7 +68,6 @@ union powernow_acpi_control_t {
 };
 #endif
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 /* divide by 1000 to get VCore voltage in V. */
 static const int mobile_vid_table[32] = {
     2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
@@ -76,7 +75,6 @@ static const int mobile_vid_table[32] = {
     1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
     1075, 1050, 1025, 1000, 975, 950, 925, 0,
 };
-#endif
 
 /* divide by 10 to get FID. */
 static const int fid_codes[32] = {
@@ -103,9 +101,6 @@ static unsigned int fsb;
 static unsigned int latency;
 static char have_a0;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"powernow-k7", msg)
-
 static int check_fsb(unsigned int fsbspeed)
 {
 	int delta;
@@ -209,7 +204,7 @@ static int get_ranges(unsigned char *pst)
 		vid = *pst++;
 		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed/1000, vid,
 			 mobile_vid_table[vid]/1000,
@@ -367,7 +362,7 @@ static int powernow_acpi_init(void)
 		unsigned int speed, speed_mhz;
 
 		pc.val = (unsigned long) state->control;
-		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
 			 i,
 			 (u32) state->core_frequency,
 			 (u32) state->power,
@@ -401,7 +396,7 @@ static int powernow_acpi_init(void)
 				invalidate_entry(i);
 		}
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed_mhz, vid,
 			 mobile_vid_table[vid]/1000,
@@ -409,7 +404,7 @@ static int powernow_acpi_init(void)
 
 		if (state->core_frequency != speed_mhz) {
 			state->core_frequency = speed_mhz;
-			dprintk("   Corrected ACPI frequency to %d\n",
+			pr_debug("   Corrected ACPI frequency to %d\n",
 				speed_mhz);
 		}
 
@@ -453,8 +448,8 @@ static int powernow_acpi_init(void)
 
 static void print_pst_entry(struct pst_s *pst, unsigned int j)
 {
-	dprintk("PST:%d (@%p)\n", j, pst);
-	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+	pr_debug("PST:%d (@%p)\n", j, pst);
+	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
 		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
 }
 
@@ -474,20 +469,20 @@ static int powernow_decode_bios(int maxfid, int startvid)
 		p = phys_to_virt(i);
 
 		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			dprintk("Found PSB header at %p\n", p);
+			pr_debug("Found PSB header at %p\n", p);
 			psb = (struct psb_s *) p;
-			dprintk("Table version: 0x%x\n", psb->tableversion);
+			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
 				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
 						" supported right now\n");
 				return -ENODEV;
 			}
 
-			dprintk("Flags: 0x%x\n", psb->flags);
+			pr_debug("Flags: 0x%x\n", psb->flags);
 			if ((psb->flags & 1) == 0)
-				dprintk("Mobile voltage regulator\n");
+				pr_debug("Mobile voltage regulator\n");
 			else
-				dprintk("Desktop voltage regulator\n");
+				pr_debug("Desktop voltage regulator\n");
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
@@ -497,9 +492,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						"Correcting.\n", latency);
 				latency = 100;
 			}
-			dprintk("Settling Time: %d microseconds.\n",
+			pr_debug("Settling Time: %d microseconds.\n",
 					psb->settlingtime);
-			dprintk("Has %d PST tables. (Only dumping ones "
+			pr_debug("Has %d PST tables. (Only dumping ones "
 					"relevant to this CPU).\n",
 					psb->numpst);
 
@@ -650,7 +645,7 @@ static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
 		printk(KERN_WARNING PFX "can not determine bus frequency\n");
 		return -EINVAL;
 	}
-	dprintk("FSB: %3dMHz\n", fsb/1000);
+	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
 		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 2368e38327b3..83479b6fb9a1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -139,7 +139,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	}
 	do {
 		if (i++ > 10000) {
-			dprintk("detected change pending stuck\n");
+			pr_debug("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -176,7 +176,7 @@ static void fidvid_msr_init(void)
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
-	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
 	wrmsr(MSR_FIDVID_CTL, lo, hi);
 }
 
@@ -196,7 +196,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
 	do {
@@ -244,7 +244,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	lo |= (vid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
 	do {
@@ -325,7 +325,7 @@ static int transition_fid_vid(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), data->currfid, data->currvid);
 
 	return 0;
@@ -339,7 +339,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 maxvid, lo, rvomult = 1;
 
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
 		"reqvid 0x%x, rvo 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
@@ -349,12 +349,12 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	rvosteps *= rvomult;
 	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
-	dprintk("ph1 maxvid=0x%x\n", maxvid);
+	pr_debug("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
 		reqvid = maxvid;
 
 	while (data->currvid > reqvid) {
-		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
 			data->currvid, reqvid);
 		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
 			return 1;
@@ -365,7 +365,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		if (data->currvid == maxvid) {
 			rvosteps = 0;
 		} else {
-			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
 				data->currvid - 1);
 			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
 				return 1;
@@ -382,7 +382,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -400,7 +400,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 0;
 	}
 
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
 		"reqfid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqfid);
@@ -457,7 +457,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 1;
 	}
 
-	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -470,7 +470,7 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 savereqvid = reqvid;
 
-	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid);
 
@@ -498,17 +498,17 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 		return 1;
 
 	if (savereqvid != data->currvid) {
-		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
 		return 1;
 	}
 
 	if (savefid != data->currfid) {
-		dprintk("ph3 failed, currfid changed 0x%x\n",
+		pr_debug("ph3 failed, currfid changed 0x%x\n",
 			data->currfid);
 		return 1;
 	}
 
-	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -707,7 +707,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		return -EIO;
 	}
 
-	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
 	data->powernow_table = powernow_table;
 	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
 		print_basics(data);
@@ -717,7 +717,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		    (pst[j].vid == data->currvid))
 			return 0;
 
-	dprintk("currfid/vid do not match PST, ignoring\n");
+	pr_debug("currfid/vid do not match PST, ignoring\n");
 	return 0;
 }
 
@@ -739,36 +739,36 @@ static int find_psb_table(struct powernow_k8_data *data)
 		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
 			continue;
 
-		dprintk("found PSB header at 0x%p\n", psb);
+		pr_debug("found PSB header at 0x%p\n", psb);
 
-		dprintk("table vers: 0x%x\n", psb->tableversion);
+		pr_debug("table vers: 0x%x\n", psb->tableversion);
 		if (psb->tableversion != PSB_VERSION_1_4) {
 			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
 			return -ENODEV;
 		}
 
-		dprintk("flags: 0x%x\n", psb->flags1);
+		pr_debug("flags: 0x%x\n", psb->flags1);
 		if (psb->flags1) {
 			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
 			return -ENODEV;
 		}
 
 		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n",
+		pr_debug("voltage stabilization time: %d(*20us)\n",
 				data->vstable);
 
-		dprintk("flags2: 0x%x\n", psb->flags2);
+		pr_debug("flags2: 0x%x\n", psb->flags2);
 		data->rvo = psb->flags2 & 3;
 		data->irt = ((psb->flags2) >> 2) & 3;
 		mvs = ((psb->flags2) >> 4) & 3;
 		data->vidmvs = 1 << mvs;
 		data->batps = ((psb->flags2) >> 6) & 3;
 
-		dprintk("ramp voltage offset: %d\n", data->rvo);
-		dprintk("isochronous relief time: %d\n", data->irt);
-		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+		pr_debug("ramp voltage offset: %d\n", data->rvo);
+		pr_debug("isochronous relief time: %d\n", data->irt);
+		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
 
-		dprintk("numpst: 0x%x\n", psb->num_tables);
+		pr_debug("numpst: 0x%x\n", psb->num_tables);
 		cpst = psb->num_tables;
 		if ((psb->cpuid == 0x00000fc0) ||
 		    (psb->cpuid == 0x00000fe0)) {
@@ -783,13 +783,13 @@ static int find_psb_table(struct powernow_k8_data *data)
 		}
 
 		data->plllock = psb->plllocktime;
-		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		dprintk("maxfid: 0x%x\n", psb->maxfid);
-		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		pr_debug("maxfid: 0x%x\n", psb->maxfid);
+		pr_debug("maxvid: 0x%x\n", psb->maxvid);
 		maxvid = psb->maxvid;
 
 		data->numps = psb->numps;
-		dprintk("numpstates: 0x%x\n", data->numps);
+		pr_debug("numpstates: 0x%x\n", data->numps);
 		return fill_powernow_table(data,
 				(struct pst_s *)(psb+1), maxvid);
 	}
@@ -834,13 +834,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	u64 control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		dprintk("register performance failed: bad ACPI data\n");
+		pr_debug("register performance failed: bad ACPI data\n");
 		return -EIO;
 	}
 
 	/* verify the data contained in the ACPI structures */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No ACPI P-States\n");
+		pr_debug("No ACPI P-States\n");
 		goto err_out;
 	}
 
@@ -849,7 +849,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 
 	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Invalid control/status registers (%x - %x)\n",
+		pr_debug("Invalid control/status registers (%llx - %llx)\n",
 			control, status);
 		goto err_out;
 	}
@@ -858,7 +858,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
 		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
 	if (!powernow_table) {
-		dprintk("powernow_table memory alloc failure\n");
+		pr_debug("powernow_table memory alloc failure\n");
 		goto err_out;
 	}
 
@@ -928,7 +928,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			dprintk("invalid pstate %d, ignoring\n", index);
+			pr_debug("invalid pstate %d, ignoring\n", index);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -968,7 +968,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 			vid = (control >> VID_SHIFT) & VID_MASK;
 		}
 
-		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
 
 		index = fid | (vid<<8);
 		powernow_table[i].index = index;
@@ -978,7 +978,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 
 		/* verify frequency is OK */
 		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", freq);
+			pr_debug("invalid freq %u kHz, ignoring\n", freq);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -986,7 +986,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		/* verify voltage is OK -
 		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
-			dprintk("invalid vid %u, ignoring\n", vid);
+			pr_debug("invalid vid %u, ignoring\n", vid);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -1047,7 +1047,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* fid/vid correctness check for k8 */
 	/* fid are the lower 8 bits of the index we stored into
@@ -1057,18 +1057,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	fid = data->powernow_table[index].index & 0xFF;
 	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
 
-	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
 
 	if (query_current_values_with_pending_wait(data))
 		return 1;
 
 	if ((data->currvid == vid) && (data->currfid == fid)) {
-		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
 			fid, vid);
 		return 0;
 	}
 
-	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), fid, vid);
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
@@ -1096,7 +1096,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* get MSR index for hardware pstate transition */
 	pstate = index & HW_PSTATE_MASK;
@@ -1156,14 +1156,14 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 		goto err_out;
 	}
 
-	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
 		pol->cpu, targfreq, pol->min, pol->max, relation);
 
 	if (query_current_values_with_pending_wait(data))
 		goto err_out;
 
 	if (cpu_family != CPU_HW_PSTATE) {
-		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
 		if ((checkvid != data->currvid) ||
@@ -1319,7 +1319,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 				data->currpstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
-	dprintk("policy current frequency %d kHz\n", pol->cur);
+	pr_debug("policy current frequency %d kHz\n", pol->cur);
 
 	/* min/max the cpu is capable of */
 	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
@@ -1337,10 +1337,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n",
+		pr_debug("cpu_init done, current pstate 0x%x\n",
 				data->currpstate);
 	else
-		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
 
 	per_cpu(powernow_data, pol->cpu) = data;
@@ -1586,7 +1586,7 @@ static int __cpuinit powernowk8_init(void)
 /* driver entry point for term */
 static void __exit powernowk8_exit(void)
 {
-	dprintk("exit\n");
+	pr_debug("exit\n");
 
 	if (boot_cpu_has(X86_FEATURE_CPB)) {
 		msrs_free(msrs);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index df3529b1c02d..3744d26cdc2b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -211,8 +211,6 @@ struct pst_s {
 	u8 vid;
 };
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
 static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 reqvid, u32 regfid);
 static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 435a996a613a..1e205e6b1727 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -29,8 +29,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"sc520_freq", msg)
 #define PFX "sc520_freq: "
 
 static struct cpufreq_frequency_table sc520_freq_table[] = {
@@ -66,7 +64,7 @@ static void sc520_freq_set_cpu_state(unsigned int state)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("attempting to set frequency to %i kHz\n",
+	pr_debug("attempting to set frequency to %i kHz\n",
 			sc520_freq_table[state].frequency);
 
 	local_irq_disable();
@@ -161,7 +159,7 @@ static int __init sc520_freq_init(void)
 	/* Test if we have the right hardware */
 	if (c->x86_vendor != X86_VENDOR_AMD ||
 	    c->x86 != 4 || c->x86_model != 9) {
-		dprintk("no Elan SC520 processor found!\n");
+		pr_debug("no Elan SC520 processor found!\n");
 		return -ENODEV;
 	}
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 9b1ff37de46a..6ea3455def21 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -29,9 +29,6 @@
 #define PFX		"speedstep-centrino: "
 #define MAINTAINER	"cpufreq@vger.kernel.org"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
 #define INTEL_MSR_RANGE	(0xffff)
 
 struct cpu_id
@@ -244,7 +241,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->cpu_id == NULL) {
 		/* No match at all */
-		dprintk("no support for CPU model \"%s\": "
+		pr_debug("no support for CPU model \"%s\": "
 		       "send /proc/cpuinfo to " MAINTAINER "\n",
 		       cpu->x86_model_id);
 		return -ENOENT;
@@ -252,15 +249,15 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->op_points == NULL) {
 		/* Matched a non-match */
-		dprintk("no table support for CPU model \"%s\"\n",
+		pr_debug("no table support for CPU model \"%s\"\n",
 		       cpu->x86_model_id);
-		dprintk("try using the acpi-cpufreq driver\n");
+		pr_debug("try using the acpi-cpufreq driver\n");
 		return -ENOENT;
 	}
 
 	per_cpu(centrino_model, policy->cpu) = model;
 
-	dprintk("found \"%s\": max frequency: %dkHz\n",
+	pr_debug("found \"%s\": max frequency: %dkHz\n",
 	       model->model_name, model->max_freq);
 
 	return 0;
@@ -369,7 +366,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
 
 	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		dprintk("found unsupported CPU with "
+		pr_debug("found unsupported CPU with "
 		"Enhanced SpeedStep: send /proc/cpuinfo to "
 		MAINTAINER "\n");
 		return -ENODEV;
@@ -385,7 +382,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 
 	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
@@ -402,7 +399,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 						/* 10uS transition latency */
 	policy->cur = freq;
 
-	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
 
 	ret = cpufreq_frequency_table_cpuinfo(policy,
 		per_cpu(centrino_model, policy->cpu)->op_points);
@@ -498,7 +495,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			good_cpu = j;
 
 		if (good_cpu >= nr_cpu_ids) {
-			dprintk("couldn't limit to CPUs in this domain\n");
+			pr_debug("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
 				/* We haven't started the transition yet. */
@@ -512,7 +509,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		if (first_cpu) {
 			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
 			if (msr == (oldmsr & 0xffff)) {
-				dprintk("no change needed - msr was and needs "
+				pr_debug("no change needed - msr was and needs "
 					"to be %x\n", oldmsr);
 				retval = 0;
 				goto out;
@@ -521,7 +518,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			freqs.old = extract_clock(oldmsr, cpu, 0);
 			freqs.new = extract_clock(msr, cpu, 0);
 
-			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
 			for_each_cpu(k, policy->cpus) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 561758e95180..a748ce782fee 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -53,10 +53,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
 };
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-ich", msg)
-
-
 /**
  * speedstep_find_register - read the PMBASE address
  *
@@ -80,7 +76,7 @@ static int speedstep_find_register(void)
 		return -ENODEV;
 	}
 
-	dprintk("pmbase is 0x%x\n", pmbase);
+	pr_debug("pmbase is 0x%x\n", pmbase);
 	return 0;
 }
 
@@ -106,13 +102,13 @@ static void speedstep_set_state(unsigned int state)
 	/* read state */
 	value = inb(pmbase + 0x50);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	/* write new state */
 	value &= 0xFE;
 	value |= state;
 
-	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
 
 	/* Disable bus master arbitration */
 	pm2_blk = inb(pmbase + 0x20);
@@ -132,10 +128,10 @@ static void speedstep_set_state(unsigned int state)
 	/* Enable IRQs */
 	local_irq_restore(flags);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	if (state == (value & 0x1))
-		dprintk("change to %u MHz succeeded\n",
+		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
 		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
@@ -165,7 +161,7 @@ static int speedstep_activate(void)
 	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
 	if (!(value & 0x08)) {
 		value |= 0x08;
-		dprintk("activating SpeedStep (TM) registers\n");
+		pr_debug("activating SpeedStep (TM) registers\n");
 		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
 	}
 
@@ -218,7 +214,7 @@ static unsigned int speedstep_detect_chipset(void)
 			return 2; /* 2-M */
 
 		if (hostbridge->revision < 5) {
-			dprintk("hostbridge does not support speedstep\n");
+			pr_debug("hostbridge does not support speedstep\n");
 			speedstep_chipset_dev = NULL;
 			pci_dev_put(hostbridge);
 			return 0;
@@ -246,7 +242,7 @@ static unsigned int speedstep_get(unsigned int cpu)
 	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
 		BUG();
 
-	dprintk("detected %u kHz as current frequency\n", speed);
+	pr_debug("detected %u kHz as current frequency\n", speed);
 	return speed;
 }
 
@@ -276,7 +272,7 @@ static int speedstep_target(struct cpufreq_policy *policy,
 	freqs.new = speedstep_freqs[newstate].frequency;
 	freqs.cpu = policy->cpu;
 
-	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
 
 	/* no transition necessary */
 	if (freqs.old == freqs.new)
@@ -351,7 +347,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (!speed)
 		return -EIO;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -405,14 +401,14 @@ static int __init speedstep_init(void)
 	/* detect processor */
 	speedstep_processor = speedstep_detect_processor();
 	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor "
+		pr_debug("Intel(R) SpeedStep(TM) capable processor "
 				"not found\n");
 		return -ENODEV;
 	}
 
 	/* detect chipset */
 	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
 				"(yet) available.\n");
 		return -ENODEV;
 	}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index a94ec6be69fa..8af2d2fd9d51 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -18,9 +18,6 @@
 #include <asm/tsc.h>
 #include "speedstep-lib.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-lib", msg)
-
 #define PFX "speedstep-lib: "
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
@@ -75,7 +72,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* read MSR 0x2a - we only need the low 32 bits */
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 	msr_tmp = msr_lo;
 
 	/* decode the FSB */
@@ -89,7 +86,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* decode the multiplier */
 	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		dprintk("workaround for early PIIIs\n");
+		pr_debug("workaround for early PIIIs\n");
 		msr_lo &= 0x03c00000;
 	} else
 		msr_lo &= 0x0bc00000;
@@ -100,7 +97,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 		j++;
 	}
 
-	dprintk("speed is %u\n",
+	pr_debug("speed is %u\n",
 		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
 
 	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
@@ -112,7 +109,7 @@ static unsigned int pentiumM_get_frequency(void)
 	u32 msr_lo, msr_tmp;
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 
 	/* see table B-2 of 24547212.pdf */
 	if (msr_lo & 0x00040000) {
@@ -122,7 +119,7 @@ static unsigned int pentiumM_get_frequency(void)
 	}
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * 100 * 1000));
 
 	return msr_tmp * 100 * 1000;
@@ -160,11 +157,11 @@ static unsigned int pentium_core_get_frequency(void)
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
 			msr_lo, msr_tmp);
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * fsb));
 
 	ret = (msr_tmp * fsb);
@@ -190,7 +187,7 @@ static unsigned int pentium4_get_frequency(void)
 
 	rdmsr(0x2c, msr_lo, msr_hi);
 
-	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
 
 	/* decode the FSB: see IA-32 Intel (C) Architecture Software
 	 * Developer's Manual, Volume 3: System Prgramming Guide,
@@ -217,7 +214,7 @@ static unsigned int pentium4_get_frequency(void)
 	/* Multiplier. */
 	mult = msr_lo >> 24;
 
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
 			fsb, mult, (fsb * mult));
 
 	ret = (fsb * mult);
@@ -257,7 +254,7 @@ unsigned int speedstep_detect_processor(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
-	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
 
 	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
 	    ((c->x86 != 6) && (c->x86 != 0xF)))
@@ -272,7 +269,7 @@ unsigned int speedstep_detect_processor(void)
 		ebx = cpuid_ebx(0x00000001);
 		ebx &= 0x000000FF;
 
-		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
 
 		switch (c->x86_mask) {
 		case 4:
@@ -327,7 +324,7 @@ unsigned int speedstep_detect_processor(void)
 		/* cpuid_ebx(1) is 0x04 for desktop PIII,
 		 * 0x06 for mobile PIII-M */
 		ebx = cpuid_ebx(0x00000001);
-		dprintk("ebx is %x\n", ebx);
+		pr_debug("ebx is %x\n", ebx);
 
 		ebx &= 0x000000FF;
 
@@ -344,7 +341,7 @@ unsigned int speedstep_detect_processor(void)
 		/* all mobile PIII Coppermines have FSB 100 MHz
 		 * ==> sort out a few desktop PIIIs. */
 		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		msr_lo &= 0x00c0000;
 		if (msr_lo != 0x0080000)
@@ -357,12 +354,12 @@ unsigned int speedstep_detect_processor(void)
 		 * bit 56 or 57 is set
 		 */
 		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		if ((msr_hi & (1<<18)) &&
 		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
 			if (c->x86_mask == 0x01) {
-				dprintk("early PIII version\n");
+				pr_debug("early PIII version\n");
 				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
 				return SPEEDSTEP_CPU_PIII_C;
@@ -393,14 +390,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
 		return -EINVAL;
 
-	dprintk("trying to determine both speeds\n");
+	pr_debug("trying to determine both speeds\n");
 
 	/* get current speed */
 	prev_speed = speedstep_get_frequency(processor);
 	if (!prev_speed)
 		return -EIO;
 
-	dprintk("previous speed is %u\n", prev_speed);
+	pr_debug("previous speed is %u\n", prev_speed);
 
 	local_irq_save(flags);
 
@@ -412,7 +409,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("low speed is %u\n", *low_speed);
+	pr_debug("low speed is %u\n", *low_speed);
 
 	/* start latency measurement */
 	if (transition_latency)
@@ -431,7 +428,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("high speed is %u\n", *high_speed);
+	pr_debug("high speed is %u\n", *high_speed);
 
 	if (*low_speed == *high_speed) {
 		ret = -ENODEV;
@@ -445,7 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if (transition_latency) {
 		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
 			tv2.tv_usec - tv1.tv_usec;
-		dprintk("transition latency is %u uSec\n", *transition_latency);
+		pr_debug("transition latency is %u uSec\n", *transition_latency);
 
 		/* convert uSec to nSec and add 20% for safety reasons */
 		*transition_latency *= 1200;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 91bc25b67bc1..c76ead3490bf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -55,9 +55,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  * of DMA activity going on? */
 #define SMI_TRIES 5
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-smi", msg)
-
 /**
  * speedstep_smi_ownership
  */
@@ -70,7 +67,7 @@ static int speedstep_smi_ownership(void)
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 	magic = virt_to_phys(magic_data);
 
-	dprintk("trying to obtain ownership with command %x at port %x\n",
+	pr_debug("trying to obtain ownership with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -85,7 +82,7 @@ static int speedstep_smi_ownership(void)
 		: "memory"
 	);
 
-	dprintk("result is %x\n", result);
+	pr_debug("result is %x\n", result);
 
 	return result;
 }
@@ -106,13 +103,13 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 	u32 function = GET_SPEEDSTEP_FREQS;
 
 	if (!(ist_info.event & 0xFFFF)) {
-		dprintk("bug #1422 -- can't read freqs from BIOS\n");
+		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
 		return -ENODEV;
 	}
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine frequencies with command %x at port %x\n",
+	pr_debug("trying to determine frequencies with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -129,7 +126,7 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("result %x, low_freq %u, high_freq %u\n",
+	pr_debug("result %x, low_freq %u, high_freq %u\n",
 			result, low_mhz, high_mhz);
 
 	/* abort if results are obviously incorrect... */
@@ -154,7 +151,7 @@ static int speedstep_get_state(void)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine current setting with command %x "
+	pr_debug("trying to determine current setting with command %x "
 		"at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
@@ -168,7 +165,7 @@ static int speedstep_get_state(void)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("state is %x, result is %x\n", state, result);
+	pr_debug("state is %x, result is %x\n", state, result);
 
 	return state & 1;
 }
@@ -194,13 +191,13 @@ static void speedstep_set_state(unsigned int state)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to set frequency to state %u "
+	pr_debug("trying to set frequency to state %u "
 		"with command %x at port %x\n",
 		state, command, smi_port);
 
 	do {
 		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n",
+			pr_debug("retry %u, previous result %u, waiting...\n",
 					retry, result);
 			mdelay(retry * 50);
 		}
@@ -221,7 +218,7 @@ static void speedstep_set_state(unsigned int state)
 	local_irq_restore(flags);
 
 	if (new_state == state)
-		dprintk("change to %u MHz succeeded after %u tries "
+		pr_debug("change to %u MHz succeeded after %u tries "
 			"with result %u\n",
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
@@ -292,7 +289,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = speedstep_smi_ownership();
 	if (result) {
-		dprintk("fails in acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in acquiring ownership of a SMI interface.\n");
 		return -EINVAL;
 	}
 
@@ -304,7 +301,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (result) {
 		/* fall back to speedstep_lib.c dection mechanism:
 		 * try both states out */
-		dprintk("could not detect low and high frequencies "
+		pr_debug("could not detect low and high frequencies "
 				"by SMI call.\n");
 		result = speedstep_get_freqs(speedstep_processor,
 				low, high,
@@ -312,18 +309,18 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 				&speedstep_set_state);
 
 		if (result) {
-			dprintk("could not detect two different speeds"
+			pr_debug("could not detect two different speeds"
 					" -- aborting.\n");
 			return result;
 		} else
-			dprintk("workaround worked.\n");
+			pr_debug("workaround worked.\n");
 	}
 
 	/* get current speed setting */
 	state = speedstep_get_state();
 	speed = speedstep_freqs[state].frequency;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -360,7 +357,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 	int result = speedstep_smi_ownership();
 
 	if (result)
-		dprintk("fails in re-acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
 
 	return result;
 }
@@ -403,12 +400,12 @@ static int __init speedstep_init(void)
 	}
 
 	if (!speedstep_processor) {
-		dprintk("No supported Intel CPU detected.\n");
+		pr_debug("No supported Intel CPU detected.\n");
 		return -ENODEV;
 	}
 
-	dprintk("signature:0x%.8lx, command:0x%.8lx, "
-		"event:0x%.8lx, perf_level:0x%.8lx.\n",
+	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
 		ist_info.signature, ist_info.command,
 		ist_info.event, ist_info.perf_level);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 3a73a93596e8..85b32376dad7 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -49,10 +49,6 @@ ACPI_MODULE_NAME("processor_perflib");
 
 static DEFINE_MUTEX(performance_mutex);
 
-/* Use cpufreq debug layer for _PPC changes. */
-#define cpufreq_printk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /*
  * _PPC support is implemented as a CPUfreq policy notifier:
  * This means each time a CPUfreq driver registered also with
@@ -145,7 +141,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 		return -ENODEV;
 	}
 
-	cpufreq_printk("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
+	pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
 		       (int)ppc, ppc ? "" : "not");
 
 	pr->performance_platform_limit = (int)ppc;
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index ca8ee8093d6c..b78baa547ef5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -18,19 +18,6 @@ if CPU_FREQ
 config CPU_FREQ_TABLE
 	tristate
 
-config CPU_FREQ_DEBUG
-	bool "Enable CPUfreq debugging"
-	help
-	  Say Y here to enable CPUfreq subsystem (including drivers)
-	  debugging. You will need to activate it via the kernel
-	  command line by passing
-	     cpufreq.debug=<value>
-
-	  To get <value>, add 
-	       1 to activate CPUfreq core debugging,
-	       2 to activate CPUfreq drivers debugging, and
-	       4 to activate CPUfreq governor debugging
-
 config CPU_FREQ_STAT
 	tristate "CPU frequency translation statistics"
 	select CPU_FREQ_TABLE
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7c10f96c5ae9..1e08af43ae72 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,9 +32,6 @@
 
 #include <trace/events/power.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -180,93 +177,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *data)
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-/* what part(s) of the CPUfreq subsystem are debugged? */
-static unsigned int debug;
-
-/* is the debug output ratelimit'ed using printk_ratelimit? User can
- * set or modify this value.
- */
-static unsigned int debug_ratelimit = 1;
-
-/* is the printk_ratelimit'ing enabled? It's enabled after a successful
- * loading of a cpufreq driver, temporarily disabled when a new policy
- * is set, and disabled upon cpufreq driver removal
- */
-static unsigned int disable_ratelimit = 1;
-static DEFINE_SPINLOCK(disable_ratelimit_lock);
-
-static void cpufreq_debug_enable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	if (disable_ratelimit)
-		disable_ratelimit--;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-static void cpufreq_debug_disable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	disable_ratelimit++;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-void cpufreq_debug_printk(unsigned int type, const char *prefix,
-			const char *fmt, ...)
-{
-	char s[256];
-	va_list args;
-	unsigned int len;
-	unsigned long flags;
-
-	WARN_ON(!prefix);
-	if (type & debug) {
-		spin_lock_irqsave(&disable_ratelimit_lock, flags);
-		if (!disable_ratelimit && debug_ratelimit
-					&& !printk_ratelimit()) {
-			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-			return;
-		}
-		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-
-		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
-
-		va_start(args, fmt);
-		len += vsnprintf(&s[len], (256 - len), fmt, args);
-		va_end(args);
-
-		printk(s);
-
-		WARN_ON(len < 5);
-	}
-}
-EXPORT_SYMBOL(cpufreq_debug_printk);
-
-
-module_param(debug, uint, 0644);
-MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
-			" 2 to debug drivers, and 4 to debug governors.");
-
-module_param(debug_ratelimit, uint, 0644);
-MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
-					" set to 0 to disable ratelimiting.");
-
-#else /* !CONFIG_CPU_FREQ_DEBUG */
-
-static inline void cpufreq_debug_enable_ratelimit(void) { return; }
-static inline void cpufreq_debug_disable_ratelimit(void) { return; }
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
-
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
@@ -291,7 +201,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	if (!l_p_j_ref_freq) {
 		l_p_j_ref = loops_per_jiffy;
 		l_p_j_ref_freq = ci->old;
-		dprintk("saving %lu as reference value for loops_per_jiffy; "
+		pr_debug("saving %lu as reference value for loops_per_jiffy; "
 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 	}
 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
@@ -299,7 +209,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 								ci->new);
-		dprintk("scaling loops_per_jiffy to %lu "
+		pr_debug("scaling loops_per_jiffy to %lu "
 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
 	}
 }
@@ -326,7 +236,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 	BUG_ON(irqs_disabled());
 
 	freqs->flags = cpufreq_driver->flags;
-	dprintk("notification %u of frequency transition to %u kHz\n",
+	pr_debug("notification %u of frequency transition to %u kHz\n",
 		state, freqs->new);
 
 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
@@ -340,7 +250,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 			if ((policy) && (policy->cpu == freqs->cpu) &&
 			    (policy->cur) && (policy->cur != freqs->old)) {
-				dprintk("Warning: CPU frequency is"
+				pr_debug("Warning: CPU frequency is"
 					" %u, cpufreq assumed %u kHz.\n",
 					freqs->old, policy->cur);
 				freqs->old = policy->cur;
@@ -353,7 +263,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 			(unsigned long)freqs->cpu);
 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
@@ -753,7 +663,7 @@ no_policy:
 static void cpufreq_sysfs_release(struct kobject *kobj)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
-	dprintk("last reference is dropped\n");
+	pr_debug("last reference is dropped\n");
 	complete(&policy->kobj_unregister);
 }
 
@@ -788,7 +698,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 	if (gov) {
 		policy->governor = gov;
-		dprintk("Restoring governor %s for cpu %d\n",
+		pr_debug("Restoring governor %s for cpu %d\n",
 		       policy->governor->name, cpu);
 	}
 #endif
@@ -824,7 +734,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-			dprintk("CPU already managed, adding link\n");
+			pr_debug("CPU already managed, adding link\n");
 			ret = sysfs_create_link(&sys_dev->kobj,
 						&managed_policy->kobj,
 						"cpufreq");
@@ -865,7 +775,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 		if (!cpu_online(j))
 			continue;
 
-		dprintk("CPU %u already managed, adding link\n", j);
+		pr_debug("CPU %u already managed, adding link\n", j);
 		managed_policy = cpufreq_cpu_get(cpu);
 		cpu_sys_dev = get_cpu_sysdev(j);
 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
@@ -941,7 +851,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 	policy->user_policy.governor = policy->governor;
 
 	if (ret) {
-		dprintk("setting policy failed\n");
+		pr_debug("setting policy failed\n");
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(policy);
 	}
@@ -977,8 +887,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("adding CPU %u\n", cpu);
+	pr_debug("adding CPU %u\n", cpu);
 
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
@@ -986,7 +895,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
 		cpufreq_cpu_put(policy);
-		cpufreq_debug_enable_ratelimit();
 		return 0;
 	}
 #endif
@@ -1037,7 +945,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	 */
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
-		dprintk("initialization failed\n");
+		pr_debug("initialization failed\n");
 		goto err_unlock_policy;
 	}
 	policy->user_policy.min = policy->min;
@@ -1063,8 +971,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 	module_put(cpufreq_driver->owner);
-	dprintk("initialization complete\n");
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("initialization complete\n");
 
 	return 0;
 
@@ -1088,7 +995,6 @@ err_free_policy:
 nomem_out:
 	module_put(cpufreq_driver->owner);
 module_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1112,15 +1018,13 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	unsigned int j;
 #endif
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("unregistering CPU %u\n", cpu);
+	pr_debug("unregistering CPU %u\n", cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
@@ -1132,12 +1036,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * only need to unlink, put and exit
 	 */
 	if (unlikely(cpu != data->cpu)) {
-		dprintk("removing link\n");
+		pr_debug("removing link\n");
 		cpumask_clear_cpu(cpu, data->cpus);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		kobj = &sys_dev->kobj;
 		cpufreq_cpu_put(data);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		sysfs_remove_link(kobj, "cpufreq");
 		return 0;
@@ -1170,7 +1073,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 		for_each_cpu(j, data->cpus) {
 			if (j == cpu)
 				continue;
-			dprintk("removing link for cpu %u\n", j);
+			pr_debug("removing link for cpu %u\n", j);
 #ifdef CONFIG_HOTPLUG_CPU
 			strncpy(per_cpu(cpufreq_cpu_governor, j),
 				data->governor->name, CPUFREQ_NAME_LEN);
@@ -1199,17 +1102,15 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * not referenced anymore by anybody before we proceed with
 	 * unloading.
 	 */
-	dprintk("waiting for dropping of refcount\n");
+	pr_debug("waiting for dropping of refcount\n");
 	wait_for_completion(cmp);
-	dprintk("wait complete\n");
+	pr_debug("wait complete\n");
 
 	lock_policy_rwsem_write(cpu);
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(data);
 	unlock_policy_rwsem_write(cpu);
 
-	cpufreq_debug_enable_ratelimit();
-
 #ifdef CONFIG_HOTPLUG_CPU
 	/* when the CPU which is the parent of the kobj is hotplugged
 	 * offline, check for siblings, and create cpufreq sysfs interface
@@ -1255,7 +1156,7 @@ static void handle_update(struct work_struct *work)
 	struct cpufreq_policy *policy =
 		container_of(work, struct cpufreq_policy, update);
 	unsigned int cpu = policy->cpu;
-	dprintk("handle_update for cpu %u called\n", cpu);
+	pr_debug("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
 
@@ -1273,7 +1174,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
 {
 	struct cpufreq_freqs freqs;
 
-	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
+	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
 
 	freqs.cpu = cpu;
@@ -1376,7 +1277,7 @@ static int cpufreq_bp_suspend(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("suspending cpu %u\n", cpu);
+	pr_debug("suspending cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1414,7 +1315,7 @@ static void cpufreq_bp_resume(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("resuming cpu %u\n", cpu);
+	pr_debug("resuming cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1526,7 +1427,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 {
 	int retval = -EINVAL;
 
-	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
+	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
 		target_freq, relation);
 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
@@ -1612,7 +1513,7 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	if (!try_module_get(policy->governor->owner))
 		return -EINVAL;
 
-	dprintk("__cpufreq_governor for CPU %u, event %u\n",
+	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
@@ -1713,8 +1614,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 {
 	int ret = 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
+	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
 		policy->min, policy->max);
 
 	memcpy(&policy->cpuinfo, &data->cpuinfo,
@@ -1751,19 +1651,19 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 	data->min = policy->min;
 	data->max = policy->max;
 
-	dprintk("new min and max freqs are %u - %u kHz\n",
+	pr_debug("new min and max freqs are %u - %u kHz\n",
 					data->min, data->max);
 
 	if (cpufreq_driver->setpolicy) {
 		data->policy = policy->policy;
-		dprintk("setting range\n");
+		pr_debug("setting range\n");
 		ret = cpufreq_driver->setpolicy(policy);
 	} else {
 		if (policy->governor != data->governor) {
 			/* save old, working values */
 			struct cpufreq_governor *old_gov = data->governor;
 
-			dprintk("governor switch\n");
+			pr_debug("governor switch\n");
 
 			/* end old governor */
 			if (data->governor)
@@ -1773,7 +1673,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			data->governor = policy->governor;
 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
 				/* new governor failed, so re-start old one */
-				dprintk("starting governor %s failed\n",
+				pr_debug("starting governor %s failed\n",
 							data->governor->name);
 				if (old_gov) {
 					data->governor = old_gov;
@@ -1785,12 +1685,11 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			}
 			/* might be a policy change, too, so fall through */
 		}
-		dprintk("governor: change or update limits\n");
+		pr_debug("governor: change or update limits\n");
 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
 	}
 
 error_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1817,7 +1716,7 @@ int cpufreq_update_policy(unsigned int cpu)
 		goto fail;
 	}
 
-	dprintk("updating policy for CPU %u\n", cpu);
+	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
 	policy.min = data->user_policy.min;
 	policy.max = data->user_policy.max;
@@ -1829,7 +1728,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	if (cpufreq_driver->get) {
 		policy.cur = cpufreq_driver->get(cpu);
 		if (!data->cur) {
-			dprintk("Driver did not initialize current freq");
+			pr_debug("Driver did not initialize current freq");
 			data->cur = policy.cur;
 		} else {
 			if (data->cur != policy.cur)
@@ -1905,7 +1804,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    ((!driver_data->setpolicy) && (!driver_data->target)))
 		return -EINVAL;
 
-	dprintk("trying to register driver %s\n", driver_data->name);
+	pr_debug("trying to register driver %s\n", driver_data->name);
 
 	if (driver_data->setpolicy)
 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
@@ -1936,15 +1835,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
 		/* if all ->init() calls failed, unregister */
 		if (ret) {
-			dprintk("no CPU initialized for driver %s\n",
+			pr_debug("no CPU initialized for driver %s\n",
 							driver_data->name);
 			goto err_sysdev_unreg;
 		}
 	}
 
 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
-	dprintk("driver %s up and running\n", driver_data->name);
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
 err_sysdev_unreg:
@@ -1971,14 +1869,10 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 {
 	unsigned long flags;
 
-	cpufreq_debug_disable_ratelimit();
-
-	if (!cpufreq_driver || (driver != cpufreq_driver)) {
-		cpufreq_debug_enable_ratelimit();
+	if (!cpufreq_driver || (driver != cpufreq_driver))
 		return -EINVAL;
-	}
 
-	dprintk("unregistering driver %s\n", driver->name);
+	pr_debug("unregistering driver %s\n", driver->name);
 
 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index 7e2e515087f8..f13a8a9af6a1 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -15,9 +15,6 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg)
-
 
 static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 					unsigned int event)
@@ -25,7 +22,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 						policy->max, event);
 		__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e6db5faf3eb1..4c2eb512f2bc 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -15,16 +15,13 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg)
-
 static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 					unsigned int event)
 {
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 							policy->min, event);
 		__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 66d2d1d6c80f..f231015904c0 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -37,9 +37,6 @@ static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
 static int cpus_using_userspace_governor;
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg)
-
 /* keep track of frequency transitions */
 static int
 userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -50,7 +47,7 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!per_cpu(cpu_is_managed, freq->cpu))
 		return 0;
 
-	dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n",
+	pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
 			freq->cpu, freq->new);
 	per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
 
@@ -73,7 +70,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
 
-	dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
+	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
 	mutex_lock(&userspace_mutex);
 	if (!per_cpu(cpu_is_managed, policy->cpu))
@@ -134,7 +131,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_max_freq, cpu) = policy->max;
 		per_cpu(cpu_cur_freq, cpu) = policy->cur;
 		per_cpu(cpu_set_freq, cpu) = policy->cur;
-		dprintk("managing cpu %u started "
+		pr_debug("managing cpu %u started "
 			"(%u - %u kHz, currently %u kHz)\n",
 				cpu,
 				per_cpu(cpu_min_freq, cpu),
@@ -156,12 +153,12 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_min_freq, cpu) = 0;
 		per_cpu(cpu_max_freq, cpu) = 0;
 		per_cpu(cpu_set_freq, cpu) = 0;
-		dprintk("managing cpu %u stopped\n", cpu);
+		pr_debug("managing cpu %u stopped\n", cpu);
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		dprintk("limit event for cpu %u: %u - %u kHz, "
+		pr_debug("limit event for cpu %u: %u - %u kHz, "
 			"currently %u kHz, last set to %u kHz\n",
 			cpu, policy->min, policy->max,
 			per_cpu(cpu_cur_freq, cpu),
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 05432216e224..90431cb92804 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -14,9 +14,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg)
-
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
@@ -31,11 +28,11 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		unsigned int freq = table[i].frequency;
 		if (freq == CPUFREQ_ENTRY_INVALID) {
-			dprintk("table entry %u is invalid, skipping\n", i);
+			pr_debug("table entry %u is invalid, skipping\n", i);
 
 			continue;
 		}
-		dprintk("table entry %u: %u kHz, %u index\n",
+		pr_debug("table entry %u: %u kHz, %u index\n",
 					i, freq, table[i].index);
 		if (freq < min_freq)
 			min_freq = freq;
@@ -61,7 +58,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	unsigned int i;
 	unsigned int count = 0;
 
-	dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n",
+	pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
 					policy->min, policy->max, policy->cpu);
 
 	if (!cpu_online(policy->cpu))
@@ -86,7 +83,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
 				     policy->cpuinfo.max_freq);
 
-	dprintk("verification lead to (%u - %u kHz) for cpu %u\n",
+	pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
 				policy->min, policy->max, policy->cpu);
 
 	return 0;
@@ -110,7 +107,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	};
 	unsigned int i;
 
-	dprintk("request for target %u kHz (relation: %u) for cpu %u\n",
+	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
 
 	switch (relation) {
@@ -167,7 +164,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	} else
 		*index = optimal.index;
 
-	dprintk("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
+	pr_debug("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
 		table[*index].index);
 
 	return 0;
@@ -216,14 +213,14 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu)
 {
-	dprintk("setting show_table for cpu %u to %p\n", cpu, table);
+	pr_debug("setting show_table for cpu %u to %p\n", cpu, table);
 	per_cpu(cpufreq_show_table, cpu) = table;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu)
 {
-	dprintk("clearing show_table for cpu %u\n", cpu);
+	pr_debug("clearing show_table for cpu %u\n", cpu);
 	per_cpu(cpufreq_show_table, cpu) = NULL;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9343dd3de858..2845f6e67221 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -397,23 +397,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-
-#define CPUFREQ_DEBUG_CORE	1
-#define CPUFREQ_DEBUG_DRIVER	2
-#define CPUFREQ_DEBUG_GOVERNOR	4
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-extern void cpufreq_debug_printk(unsigned int type, const char *prefix, 
-				 const char *fmt, ...);
-
-#else
-
-#define cpufreq_debug_printk(msg...) do { } while(0)
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 335dc3335ff692173bc766b248f7a97f3a23b30a Mon Sep 17 00:00:00 2001
From: Thiago Farina <tfransosi@gmail.com>
Date: Thu, 28 Apr 2011 20:42:53 -0300
Subject: [CPUFREQ] cpufreq.h: Fix some checkpatch.pl coding style issues.

Before:
$ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h
total: 14 errors, 11 warnings, 419 lines checked

After:
$ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h
total: 2 errors, 4 warnings, 422 lines checked

Signed-off-by: Thiago Farina <tfransosi@gmail.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/cpufreq.h | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2845f6e67221..11be48e0d168 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2001 Russell King
  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *            
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -56,9 +56,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb,
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
-/* Frequency values here are CPU kHz so that hardware which doesn't run 
- * with some frequencies can complain without having to guess what per 
- * cent / per mille means. 
+/* Frequency values here are CPU kHz so that hardware which doesn't run
+ * with some frequencies can complain without having to guess what per
+ * cent / per mille means.
  * Maximum transition latency is in nanoseconds - if it's unknown,
  * CPUFREQ_ETERNAL shall be used.
  */
@@ -72,13 +72,15 @@ extern struct kobject *cpufreq_global_kobject;
 struct cpufreq_cpuinfo {
 	unsigned int		max_freq;
 	unsigned int		min_freq;
-	unsigned int		transition_latency; /* in 10^(-9) s = nanoseconds */
+
+	/* in 10^(-9) s = nanoseconds */
+	unsigned int		transition_latency;
 };
 
 struct cpufreq_real_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 };
 
@@ -94,7 +96,7 @@ struct cpufreq_policy {
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 
 	struct work_struct	update; /* if update_policy() needs to be
@@ -167,11 +169,11 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
 
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
-	int 	(*governor)	(struct cpufreq_policy *policy,
+	int	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
 					 char *buf);
-	int 	(*store_setspeed)	(struct cpufreq_policy *policy,
+	int	(*store_setspeed)	(struct cpufreq_policy *policy,
 					 unsigned int freq);
 	unsigned int max_transition_latency; /* HW must be able to switch to
 			next freq faster than this value in nano secs or we
@@ -180,7 +182,8 @@ struct cpufreq_governor {
 	struct module		*owner;
 };
 
-/* pass a target to the cpufreq driver 
+/*
+ * Pass a target to the cpufreq driver.
  */
 extern int cpufreq_driver_target(struct cpufreq_policy *policy,
 				 unsigned int target_freq,
@@ -237,9 +240,9 @@ struct cpufreq_driver {
 
 /* flags */
 
-#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if 
+#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if
 					 * all ->init() calls failed */
-#define CPUFREQ_CONST_LOOPS 	0x02	/* loops_per_jiffy or other kernel
+#define CPUFREQ_CONST_LOOPS	0x02	/* loops_per_jiffy or other kernel
 					 * "constants" aren't affected by
 					 * frequency transitions */
 #define CPUFREQ_PM_NO_WARN	0x04	/* don't warn on suspend/resume speed
@@ -252,7 +255,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state);
 
 
-static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) 
+static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max)
 {
 	if (policy->min < min)
 		policy->min = min;
@@ -386,12 +389,12 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 /* the following 3 funtions are for cpufreq core use only */
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
-void   cpufreq_cpu_put (struct cpufreq_policy *data);
+void   cpufreq_cpu_put(struct cpufreq_policy *data);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 
-void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, 
+void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
-- 
cgit v1.2.3


From 9a1b9496cd2b013f74885218947fa7120d53e74c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 May 2011 12:18:54 -0700
Subject: ipv4: Pass explicit saddr/daddr args to ipmr_get_route().

This eliminates the need to use rt->rt_{src,dst}.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h |  1 +
 net/ipv4/ipmr.c        | 16 ++++++++--------
 net/ipv4/route.c       |  4 +++-
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index b21d567692b2..46caaf44339d 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -244,6 +244,7 @@ struct mfc_cache {
 #ifdef __KERNEL__
 struct rtmsg;
 extern int ipmr_get_route(struct net *net, struct sk_buff *skb,
+			  __be32 saddr, __be32 daddr,
 			  struct rtmsg *rtm, int nowait);
 #endif
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 86033b7a05ba..30a7763c400e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2041,20 +2041,20 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
-int ipmr_get_route(struct net *net,
-		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ipmr_get_route(struct net *net, struct sk_buff *skb,
+		   __be32 saddr, __be32 daddr,
+		   struct rtmsg *rtm, int nowait)
 {
-	int err;
-	struct mr_table *mrt;
 	struct mfc_cache *cache;
-	struct rtable *rt = skb_rtable(skb);
+	struct mr_table *mrt;
+	int err;
 
 	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
 	if (mrt == NULL)
 		return -ENOENT;
 
 	rcu_read_lock();
-	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
+	cache = ipmr_cache_find(mrt, saddr, daddr);
 
 	if (cache == NULL) {
 		struct sk_buff *skb2;
@@ -2087,8 +2087,8 @@ int ipmr_get_route(struct net *net,
 		skb_reset_network_header(skb2);
 		iph = ip_hdr(skb2);
 		iph->ihl = sizeof(struct iphdr) >> 2;
-		iph->saddr = rt->rt_src;
-		iph->daddr = rt->rt_dst;
+		iph->saddr = saddr;
+		iph->daddr = daddr;
 		iph->version = 0;
 		err = ipmr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3bc685454b5b..6a83840b16af 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2857,7 +2857,9 @@ static int rt_fill_info(struct net *net,
 
 		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
 		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
-			int err = ipmr_get_route(net, skb, r, nowait);
+			int err = ipmr_get_route(net, skb,
+						 rt->rt_src, rt->rt_dst,
+						 r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
-- 
cgit v1.2.3


From 1650629d1800bf05ad775f974e931ca2fa03b0ff Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 3 May 2011 18:40:57 +0000
Subject: can: make struct can_proto const

commit 53914b67993c724cec585863755c9ebc8446e83b had the
same message. That commit did put everything in place but
did not make can_proto const itself.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h |  4 ++--
 net/can/af_can.c         | 12 ++++++------
 net/can/bcm.c            |  2 +-
 net/can/raw.c            |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 6f70a6d3a16e..5ce6b5d62ecc 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -44,8 +44,8 @@ struct can_proto {
 
 /* function prototypes for the CAN networklayer core (af_can.c) */
 
-extern int  can_proto_register(struct can_proto *cp);
-extern void can_proto_unregister(struct can_proto *cp);
+extern int  can_proto_register(const struct can_proto *cp);
+extern void can_proto_unregister(const struct can_proto *cp);
 
 extern int  can_rx_register(struct net_device *dev, canid_t can_id,
 			    canid_t mask,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index a8dcaa49675a..5b52762b9f20 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -84,7 +84,7 @@ static DEFINE_SPINLOCK(can_rcvlists_lock);
 static struct kmem_cache *rcv_cache __read_mostly;
 
 /* table of registered CAN protocols */
-static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
 static DEFINE_MUTEX(proto_tab_lock);
 
 struct timer_list can_stattimer;   /* timer for statistics update */
@@ -115,9 +115,9 @@ static void can_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 }
 
-static struct can_proto *can_try_module_get(int protocol)
+static const struct can_proto *can_try_module_get(int protocol)
 {
-	struct can_proto *cp;
+	const struct can_proto *cp;
 
 	rcu_read_lock();
 	cp = rcu_dereference(proto_tab[protocol]);
@@ -132,7 +132,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
 	struct sock *sk;
-	struct can_proto *cp;
+	const struct can_proto *cp;
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -691,7 +691,7 @@ drop:
  *  -EBUSY  protocol already in use
  *  -ENOBUF if proto_register() fails
  */
-int can_proto_register(struct can_proto *cp)
+int can_proto_register(const struct can_proto *cp)
 {
 	int proto = cp->protocol;
 	int err = 0;
@@ -728,7 +728,7 @@ EXPORT_SYMBOL(can_proto_register);
  * can_proto_unregister - unregister CAN transport protocol
  * @cp: pointer to CAN protocol structure
  */
-void can_proto_unregister(struct can_proto *cp)
+void can_proto_unregister(const struct can_proto *cp)
 {
 	int proto = cp->protocol;
 
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8a6a05e7c3c8..cced806098a9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1601,7 +1601,7 @@ static struct proto bcm_proto __read_mostly = {
 	.init       = bcm_init,
 };
 
-static struct can_proto bcm_can_proto __read_mostly = {
+static const struct can_proto bcm_can_proto = {
 	.type       = SOCK_DGRAM,
 	.protocol   = CAN_BCM,
 	.ops        = &bcm_ops,
diff --git a/net/can/raw.c b/net/can/raw.c
index 0eb39a7fdf64..dea99a6e596c 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -774,7 +774,7 @@ static struct proto raw_proto __read_mostly = {
 	.init       = raw_init,
 };
 
-static struct can_proto raw_can_proto __read_mostly = {
+static const struct can_proto raw_can_proto = {
 	.type       = SOCK_RAW,
 	.protocol   = CAN_RAW,
 	.ops        = &raw_ops,
-- 
cgit v1.2.3


From 30106b8ce2cc2243514116d6f29086e6deecc754 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 4 May 2011 15:38:19 +0200
Subject: slub: Fix the lockless code on 32-bit platforms with no 64-bit
 cmpxchg

The SLUB allocator use of the cmpxchg_double logic was wrong: it
actually needs the irq-safe one.

That happens automatically when we use the native unlocked 'cmpxchg8b'
instruction, but when compiling the kernel for older x86 CPUs that do
not support that instruction, we fall back to the generic emulation
code.

And if you don't specify that you want the irq-safe version, the generic
code ends up just open-coding the cmpxchg8b equivalent without any
protection against interrupts or preemption.  Which definitely doesn't
work for SLUB.

This was reported by Werner Landgraf <w.landgraf@ru.ru>, who saw
instability with his distro-kernel that was compiled to support pretty
much everything under the sun.  Most big Linux distributions tend to
compile for PPro and later, and would never have noticed this problem.

This also fixes the prototypes for the irqsafe cmpxchg_double functions
to use 'bool' like they should.

[ Btw, that whole "generic code defaults to no protection" design just
  sounds stupid - if the code needs no protection, there is no reason to
  use "cmpxchg_double" to begin with.  So we should probably just remove
  the unprotected version entirely as pointless.   - Linus ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reported-and-tested-by: werner <w.landgraf@ru.ru>
Acked-and-tested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1105041539050.3005@ionos
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 2 +-
 mm/slub.c              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3a5c4449fd36..8b97308e65df 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -948,7 +948,7 @@ do {									\
 	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 # endif
 # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+	__pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 #endif
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/slub.c b/mm/slub.c
index 94d2a33a866e..9d2e5e46bf09 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1940,7 +1940,7 @@ redo:
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer(s, object), next_tid(tid)))) {
@@ -2145,7 +2145,7 @@ redo:
 		set_freepointer(s, object, c->freelist);
 
 #ifdef CONFIG_CMPXCHG_LOCAL
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
-- 
cgit v1.2.3


From 228e548e602061b08ee8e8966f567c12aa079682 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 May 2011 20:21:35 +0000
Subject: net: Add sendmmsg socket system call

This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.

I wrote a microbenchmark to test the performance gains of using
this new syscall:

http://ozlabs.org/~anton/junkcode/sendmmsg_test.c

The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.

64B UDP

batch   pkts/sec
1       804570
2       872800 (+ 8 %)
4       916556 (+14 %)
8       939712 (+17 %)
16      952688 (+18 %)
32      956448 (+19 %)
64      964800 (+20 %)

64B raw socket

batch   pkts/sec
1       1201449
2       1350028 (+12 %)
4       1461416 (+22 %)
8       1513080 (+26 %)
16      1541216 (+28 %)
32      1553440 (+29 %)
64      1557888 (+30 %)

We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.

[ Add sparc syscall entries. -DaveM ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/systbl.h  |   1 +
 arch/powerpc/include/asm/unistd.h  |   3 +-
 arch/sparc/include/asm/unistd.h    |   3 +-
 arch/sparc/kernel/systbls_32.S     |   2 +-
 arch/sparc/kernel/systbls_64.S     |   4 +-
 arch/x86/ia32/ia32entry.S          |   1 +
 arch/x86/include/asm/unistd_32.h   |   3 +-
 arch/x86/include/asm/unistd_64.h   |   2 +
 arch/x86/kernel/syscall_table_32.S |   1 +
 include/linux/net.h                |   1 +
 include/linux/socket.h             |   2 +
 include/linux/syscalls.h           |   2 +
 include/net/compat.h               |   2 +
 kernel/sys_ni.c                    |   2 +
 net/compat.c                       |  16 ++-
 net/socket.c                       | 199 +++++++++++++++++++++++++++++--------
 16 files changed, 192 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd4..8489d372077f 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
 COMPAT_SYS_SPU(open_by_handle_at)
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
+COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6d..6d23c8193caa 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
 #define __NR_open_by_handle_at	346
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
+#define __NR_sendmmsg		349
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		349
+#define __NR_syscalls		350
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db983..c5387ed0add8 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
 #define __NR_open_by_handle_at	333
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
+#define __NR_sendmmsg		336
 
-#define NR_syscalls		336
+#define NR_syscalls		337
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e88..332c83ff7701 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs
+/*335*/	.long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef47..43887ca0be0e 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, compat_sys_sendmmsg
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@ sys_call_table:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_sendmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71d..95f5826be458 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
 	.quad compat_sys_open_by_handle_at
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
+	.quad compat_sys_sendmmsg	/* 345 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e5977..fb6a625c99bf 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
 #define __NR_open_by_handle_at  342
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
+#define __NR_sendmmsg		345
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 345
+#define NR_syscalls 346
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd578..79f90eb15aad 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs                             306
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg				307
+__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79d..32cbffb0c494 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_sendmmsg		/* 345 */
diff --git a/include/linux/net.h b/include/linux/net.h
index 94de83c0f877..1da55e9b6f01 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,6 +42,7 @@
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
 #define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 #define SYS_RECVMMSG	19		/* sys_recvmmsg(2)		*/
+#define SYS_SENDMMSG	20		/* sys_sendmmsg(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d2b5e982f079..4ef98e422fde 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -333,5 +333,7 @@ struct timespec;
 
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags);
 #endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..ab71447d0c5a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
 asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int);
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
+			     unsigned int vlen, unsigned flags);
 asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
 asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int __user *);
diff --git a/include/net/compat.h b/include/net/compat.h
index 28d5428ec6a2..9ee75edcc295 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
 extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
 extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
+extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
+					   unsigned, unsigned);
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
 					   unsigned, unsigned,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41cd8f33..97e966f171c6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
 cond_syscall(compat_sys_getsockopt);
 cond_syscall(sys_shutdown);
 cond_syscall(sys_sendmsg);
+cond_syscall(sys_sendmmsg);
 cond_syscall(compat_sys_sendmsg);
+cond_syscall(compat_sys_sendmmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_recvmmsg);
 cond_syscall(compat_sys_recvmsg);
diff --git a/net/compat.c b/net/compat.c
index 3649d5895361..c578d9382e19 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20] = {
+static unsigned char nas[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 #undef AL
 
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags)
+{
+	return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+			      flags | MSG_CMSG_COMPAT);
+}
+
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
 {
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMMSG)
+	if (call < SYS_SOCKET || call > SYS_SENDMMSG)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_SENDMSG:
 		ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_SENDMMSG:
+		ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
diff --git a/net/socket.c b/net/socket.c
index d25f5a9d6fa2..ed50255143d5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size)
+static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
+				       struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
-	int err;
 
 	sock_update_classid(sock->sk);
 
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	si->msg = msg;
 	si->size = size;
 
-	err = security_socket_sendmsg(sock, msg, size);
-	if (err)
-		return err;
-
 	return sock->ops->sendmsg(iocb, sock, msg, size);
 }
 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size)
+{
+	int err = security_socket_sendmsg(sock, msg, size);
+
+	return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
+}
+
 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct kiocb iocb;
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
+int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-/*
- *	BSD sendmsg interface
- */
-
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
 	    (struct compat_msghdr __user *)msg;
-	struct socket *sock;
 	struct sockaddr_storage address;
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
 	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
-	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
-	int fput_needed;
 
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(&msg_sys, msg_compat))
+		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
-	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock)
-		goto out;
-
 	/* do not move before msg_sys is valid */
 	err = -EMSGSIZE;
-	if (msg_sys.msg_iovlen > UIO_MAXIOV)
-		goto out_put;
+	if (msg_sys->msg_iovlen > UIO_MAXIOV)
+		goto out;
 
 	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
-	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+	iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
+	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 		if (!iov)
-			goto out_put;
+			goto out;
 	}
 
 	/* This will also move the address data into kernel space */
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov,
+		err = verify_compat_iovec(msg_sys, iov,
 					  (struct sockaddr *)&address,
 					  VERIFY_READ);
 	} else
-		err = verify_iovec(&msg_sys, iov,
+		err = verify_iovec(msg_sys, iov,
 				   (struct sockaddr *)&address,
 				   VERIFY_READ);
 	if (err < 0)
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 
 	err = -ENOBUFS;
 
-	if (msg_sys.msg_controllen > INT_MAX)
+	if (msg_sys->msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen;
+	ctl_len = msg_sys->msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
 		err =
-		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
 						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
-		ctl_buf = msg_sys.msg_control;
-		ctl_len = msg_sys.msg_controllen;
+		ctl_buf = msg_sys->msg_control;
+		ctl_len = msg_sys->msg_controllen;
 	} else if (ctl_len) {
 		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 		}
 		err = -EFAULT;
 		/*
-		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
+		 * Careful! Before this, msg_sys->msg_control contains a user pointer.
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
 		if (copy_from_user(ctl_buf,
-				   (void __user __force *)msg_sys.msg_control,
+				   (void __user __force *)msg_sys->msg_control,
 				   ctl_len))
 			goto out_freectl;
-		msg_sys.msg_control = ctl_buf;
+		msg_sys->msg_control = ctl_buf;
 	}
-	msg_sys.msg_flags = flags;
+	msg_sys->msg_flags = flags;
 
 	if (sock->file->f_flags & O_NONBLOCK)
-		msg_sys.msg_flags |= MSG_DONTWAIT;
-	err = sock_sendmsg(sock, &msg_sys, total_len);
+		msg_sys->msg_flags |= MSG_DONTWAIT;
+	err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
+							  total_len);
 
 out_freectl:
 	if (ctl_buf != ctl)
@@ -1963,12 +1971,114 @@ out_freectl:
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
-out_put:
+out:
+	return err;
+}
+
+/*
+ *	BSD sendmsg interface
+ */
+
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+{
+	int fput_needed, err;
+	struct msghdr msg_sys;
+	struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+
+	if (!sock)
+		goto out;
+
+	err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
+
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
+/*
+ *	Linux sendmmsg interface
+ */
+
+int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+		   unsigned int flags)
+{
+	int fput_needed, err, datagrams;
+	struct socket *sock;
+	struct mmsghdr __user *entry;
+	struct compat_mmsghdr __user *compat_entry;
+	struct msghdr msg_sys;
+
+	datagrams = 0;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		return err;
+
+	err = sock_error(sock->sk);
+	if (err)
+		goto out_put;
+
+	entry = mmsg;
+	compat_entry = (struct compat_mmsghdr __user *)mmsg;
+
+	while (datagrams < vlen) {
+		/*
+		 * No need to ask LSM for more than the first datagram.
+		 */
+		if (MSG_CMSG_COMPAT & flags) {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = __put_user(err, &compat_entry->msg_len);
+			++compat_entry;
+		} else {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = put_user(err, &entry->msg_len);
+			++entry;
+		}
+
+		if (err)
+			break;
+		++datagrams;
+	}
+
+out_put:
+	fput_light(sock->file, fput_needed);
+
+	if (err == 0)
+		return datagrams;
+
+	if (datagrams != 0) {
+		/*
+		 * We may send less entries than requested (vlen) if the
+		 * sock is non blocking...
+		 */
+		if (err != -EAGAIN) {
+			/*
+			 * ... or if sendmsg returns an error after we
+			 * send some datagrams, where we record the
+			 * error to return on the next call or if the
+			 * app asks about it using getsockopt(SO_ERROR).
+			 */
+			sock->sk->sk_err = -err;
+		}
+
+		return datagrams;
+	}
+
+	return err;
+}
+
+SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
+		unsigned int, vlen, unsigned int, flags)
+{
+	return __sys_sendmmsg(fd, mmsg, vlen, flags);
+}
+
 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 #ifdef __ARCH_WANT_SYS_SOCKETCALL
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[20] = {
+static const unsigned char nargs[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 
 #undef AL
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	int err;
 	unsigned int len;
 
-	if (call < 1 || call > SYS_RECVMMSG)
+	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
 
 	len = nargs[call];
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	case SYS_SENDMSG:
 		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_SENDMMSG:
+		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-- 
cgit v1.2.3


From ff1b6e69ad4f31fb3c9c6da2665655f2e798dd70 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 4 May 2011 15:37:28 +0200
Subject: nl80211/cfg80211: WoWLAN support

This is based on (but now quite far from) the
original work from Luis and Eliad. Add support
for configuring WoWLAN triggers, and getting
the configuration out again. Changes from the
original patchset are too numerous to list,
but one important change needs highlighting:
the suspend() callback is passed NULL for the
trigger configuration if userspace has not
configured WoWLAN at all.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  99 ++++++++++++++++++++
 include/net/cfg80211.h  |  74 ++++++++++++++-
 net/mac80211/cfg.c      |   3 +-
 net/wireless/core.c     |   8 ++
 net/wireless/core.h     |  14 +++
 net/wireless/nl80211.c  | 243 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/sysfs.c    |   2 +-
 7 files changed, 439 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index be8df57b789d..a75dea9c416e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -420,6 +420,14 @@
  *      new station with the AUTHENTICATED flag unset and maybe change it later
  *      depending on the authentication result.
  *
+ * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings.
+ * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings.
+ *	Since wireless is more complex than wired ethernet, it supports
+ *	various triggers. These triggers can be configured through this
+ *	command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For
+ *	more background information, see
+ *	http://wireless.kernel.org/en/users/Documentation/WoWLAN.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -534,6 +542,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_NEW_PEER_CANDIDATE,
 
+	NL80211_CMD_GET_WOWLAN,
+	NL80211_CMD_SET_WOWLAN,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -903,6 +914,13 @@ enum nl80211_commands {
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
  *
+ * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
+ *	the supported WoWLAN triggers
+ * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to
+ *	indicate which WoW triggers should be enabled. This is also
+ *	used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
+ *	triggers.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1092,6 +1110,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORT_MESH_AUTH,
 
+	NL80211_ATTR_WOWLAN_TRIGGERS,
+	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2061,4 +2082,82 @@ enum nl80211_tx_power_setting {
 	NL80211_TX_POWER_FIXED,
 };
 
+/**
+ * enum nl80211_wowlan_packet_pattern_attr - WoWLAN packet pattern attribute
+ * @__NL80211_WOWLAN_PKTPAT_INVALID: invalid number for nested attribute
+ * @NL80211_WOWLAN_PKTPAT_PATTERN: the pattern, values where the mask has
+ *	a zero bit are ignored
+ * @NL80211_WOWLAN_PKTPAT_MASK: pattern mask, must be long enough to have
+ *	a bit for each byte in the pattern. The lowest-order bit corresponds
+ *	to the first byte of the pattern, but the bytes of the pattern are
+ *	in a little-endian-like format, i.e. the 9th byte of the pattern
+ *	corresponds to the lowest-order bit in the second byte of the mask.
+ *	For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where
+ *	xx indicates "don't care") would be represented by a pattern of
+ *	twelve zero bytes, and a mask of "0xed,0x07".
+ *	Note that the pattern matching is done as though frames were not
+ *	802.11 frames but 802.3 frames, i.e. the frame is fully unpacked
+ *	first (including SNAP header unpacking) and then matched.
+ * @NUM_NL80211_WOWLAN_PKTPAT: number of attributes
+ * @MAX_NL80211_WOWLAN_PKTPAT: max attribute number
+ */
+enum nl80211_wowlan_packet_pattern_attr {
+	__NL80211_WOWLAN_PKTPAT_INVALID,
+	NL80211_WOWLAN_PKTPAT_MASK,
+	NL80211_WOWLAN_PKTPAT_PATTERN,
+
+	NUM_NL80211_WOWLAN_PKTPAT,
+	MAX_NL80211_WOWLAN_PKTPAT = NUM_NL80211_WOWLAN_PKTPAT - 1,
+};
+
+/**
+ * struct nl80211_wowlan_pattern_support - pattern support information
+ * @max_patterns: maximum number of patterns supported
+ * @min_pattern_len: minimum length of each pattern
+ * @max_pattern_len: maximum length of each pattern
+ *
+ * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when
+ * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED in the
+ * capability information given by the kernel to userspace.
+ */
+struct nl80211_wowlan_pattern_support {
+	__u32 max_patterns;
+	__u32 min_pattern_len;
+	__u32 max_pattern_len;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_wowlan_triggers - WoWLAN trigger definitions
+ * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
+ *	the chip into a special state -- works best with chips that have
+ *	support for low-power operation already (flag)
+ * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
+ *	is detected is implementation-specific (flag)
+ * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
+ *	by 16 repetitions of MAC addr, anywhere in payload) (flag)
+ * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns
+ *	which are passed in an array of nested attributes, each nested attribute
+ *	defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern.
+ *	Each pattern defines a wakeup packet. The matching is done on the MSDU,
+ *	i.e. as though the packet was an 802.3 packet, so the pattern matching
+ *	is done after the packet is converted to the MSDU.
+ *
+ *	In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute
+ *	carrying a &struct nl80211_wowlan_pattern_support.
+ * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers
+ * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number
+ */
+enum nl80211_wowlan_triggers {
+	__NL80211_WOWLAN_TRIG_INVALID,
+	NL80211_WOWLAN_TRIG_ANY,
+	NL80211_WOWLAN_TRIG_DISCONNECT,
+	NL80211_WOWLAN_TRIG_MAGIC_PKT,
+	NL80211_WOWLAN_TRIG_PKT_PATTERN,
+
+	/* keep last */
+	NUM_NL80211_WOWLAN_TRIG,
+	MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4932dfcb72b4..0920daf36807 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1087,6 +1087,38 @@ struct cfg80211_pmksa {
 	u8 *pmkid;
 };
 
+/**
+ * struct cfg80211_wowlan_trig_pkt_pattern - packet pattern
+ * @mask: bitmask where to match pattern and where to ignore bytes,
+ *	one bit per byte, in same format as nl80211
+ * @pattern: bytes to match where bitmask is 1
+ * @pattern_len: length of pattern (in bytes)
+ *
+ * Internal note: @mask and @pattern are allocated in one chunk of
+ * memory, free @mask only!
+ */
+struct cfg80211_wowlan_trig_pkt_pattern {
+	u8 *mask, *pattern;
+	int pattern_len;
+};
+
+/**
+ * struct cfg80211_wowlan - Wake on Wireless-LAN support info
+ *
+ * This structure defines the enabled WoWLAN triggers for the device.
+ * @any: wake up on any activity -- special trigger if device continues
+ *	operating as normal during suspend
+ * @disconnect: wake up if getting disconnected
+ * @magic_pkt: wake up on receiving magic packet
+ * @patterns: wake up on receiving packet matching a pattern
+ * @n_patterns: number of patterns
+ */
+struct cfg80211_wowlan {
+	bool any, disconnect, magic_pkt;
+	struct cfg80211_wowlan_trig_pkt_pattern *patterns;
+	int n_patterns;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -1100,7 +1132,9 @@ struct cfg80211_pmksa {
  * wireless extensions but this is subject to reevaluation as soon as this
  * code is used more widely and we have a first user without wext.
  *
- * @suspend: wiphy device needs to be suspended
+ * @suspend: wiphy device needs to be suspended. The variable @wow will
+ *	be %NULL or contain the enabled Wake-on-Wireless triggers that are
+ *	configured for the device.
  * @resume: wiphy device needs to be resumed
  *
  * @add_virtual_intf: create a new virtual interface with the given name,
@@ -1244,7 +1278,7 @@ struct cfg80211_pmksa {
  * @get_ringparam: Get tx and rx ring current and maximum sizes.
  */
 struct cfg80211_ops {
-	int	(*suspend)(struct wiphy *wiphy);
+	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
 	int	(*resume)(struct wiphy *wiphy);
 
 	struct net_device * (*add_virtual_intf)(struct wiphy *wiphy,
@@ -1479,6 +1513,38 @@ struct ieee80211_txrx_stypes {
 	u16 tx, rx;
 };
 
+/**
+ * enum wiphy_wowlan_support_flags - WoWLAN support flags
+ * @WIPHY_WOWLAN_ANY: supports wakeup for the special "any"
+ *	trigger that keeps the device operating as-is and
+ *	wakes up the host on any activity, for example a
+ *	received packet that passed filtering; note that the
+ *	packet should be preserved in that case
+ * @WIPHY_WOWLAN_MAGIC_PKT: supports wakeup on magic packet
+ *	(see nl80211.h)
+ * @WIPHY_WOWLAN_DISCONNECT: supports wakeup on disconnect
+ */
+enum wiphy_wowlan_support_flags {
+	WIPHY_WOWLAN_ANY	= BIT(0),
+	WIPHY_WOWLAN_MAGIC_PKT	= BIT(1),
+	WIPHY_WOWLAN_DISCONNECT	= BIT(2),
+};
+
+/**
+ * struct wiphy_wowlan_support - WoWLAN support data
+ * @flags: see &enum wiphy_wowlan_support_flags
+ * @n_patterns: number of supported wakeup patterns
+ *	(see nl80211.h for the pattern definition)
+ * @pattern_max_len: maximum length of each pattern
+ * @pattern_min_len: minimum length of each pattern
+ */
+struct wiphy_wowlan_support {
+	u32 flags;
+	int n_patterns;
+	int pattern_max_len;
+	int pattern_min_len;
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback,
@@ -1546,6 +1612,8 @@ struct ieee80211_txrx_stypes {
  *
  * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
  *	may request, if implemented.
+ *
+ * @wowlan: WoWLAN support information
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1583,6 +1651,8 @@ struct wiphy {
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
 
+	struct wiphy_wowlan_support wowlan;
+
 	u16 max_remain_on_channel_duration;
 
 	u8 max_num_pmkids;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 12d52cec9515..321d598eb8cb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1297,7 +1297,8 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 }
 
 #ifdef CONFIG_PM
-static int ieee80211_suspend(struct wiphy *wiphy)
+static int ieee80211_suspend(struct wiphy *wiphy,
+			     struct cfg80211_wowlan *wowlan)
 {
 	return __ieee80211_suspend(wiphy_priv(wiphy));
 }
diff --git a/net/wireless/core.c b/net/wireless/core.c
index bbf1fa11107a..bea0d80710c8 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -493,6 +493,13 @@ int wiphy_register(struct wiphy *wiphy)
 		return -EINVAL;
 	}
 
+	if (rdev->wiphy.wowlan.n_patterns) {
+		if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len ||
+			    rdev->wiphy.wowlan.pattern_min_len >
+			    rdev->wiphy.wowlan.pattern_max_len))
+			return -EINVAL;
+	}
+
 	/* check and set up bitrates */
 	ieee80211_set_bitrate_flags(wiphy);
 
@@ -631,6 +638,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
 	mutex_destroy(&rdev->devlist_mtx);
 	list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
 		cfg80211_put_bss(&scan->pub);
+	cfg80211_rdev_free_wowlan(rdev);
 	kfree(rdev);
 }
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 26a0a084e16b..7a18c10a7fb6 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,6 +70,8 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
+	struct cfg80211_wowlan *wowlan;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -89,6 +91,18 @@ bool wiphy_idx_valid(int wiphy_idx)
 	return wiphy_idx >= 0;
 }
 
+static inline void
+cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
+{
+	int i;
+
+	if (!rdev->wowlan)
+		return;
+	for (i = 0; i < rdev->wowlan->n_patterns; i++)
+		kfree(rdev->wowlan->patterns[i].mask);
+	kfree(rdev->wowlan->patterns);
+	kfree(rdev->wowlan);
+}
 
 extern struct workqueue_struct *cfg80211_wq;
 extern struct mutex cfg80211_mutex;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ab77f943dc04..0a199a1ca9b6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -173,6 +173,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
+	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -194,6 +195,15 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
 	[NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
 };
 
+/* policy for WoWLAN attributes */
+static const struct nla_policy
+nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
+	[NL80211_WOWLAN_TRIG_ANY] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_DISCONNECT] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_MAGIC_PKT] = { .type = NLA_FLAG },
+	[NL80211_WOWLAN_TRIG_PKT_PATTERN] = { .type = NLA_NESTED },
+};
+
 /* ifidx get helper */
 static int nl80211_get_ifidx(struct netlink_callback *cb)
 {
@@ -821,6 +831,35 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		nla_nest_end(msg, nl_ifs);
 	}
 
+	if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
+		struct nlattr *nl_wowlan;
+
+		nl_wowlan = nla_nest_start(msg,
+				NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+		if (!nl_wowlan)
+			goto nla_put_failure;
+
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
+		if (dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
+		if (dev->wiphy.wowlan.n_patterns) {
+			struct nl80211_wowlan_pattern_support pat = {
+				.max_patterns = dev->wiphy.wowlan.n_patterns,
+				.min_pattern_len =
+					dev->wiphy.wowlan.pattern_min_len,
+				.max_pattern_len =
+					dev->wiphy.wowlan.pattern_max_len,
+			};
+			NLA_PUT(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+				sizeof(pat), &pat);
+		}
+
+		nla_nest_end(msg, nl_wowlan);
+	}
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -4808,6 +4847,194 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
 	return cfg80211_leave_mesh(rdev, dev);
 }
 
+static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_WOWLAN);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (rdev->wowlan) {
+		struct nlattr *nl_wowlan;
+
+		nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS);
+		if (!nl_wowlan)
+			goto nla_put_failure;
+
+		if (rdev->wowlan->any)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_ANY);
+		if (rdev->wowlan->disconnect)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_DISCONNECT);
+		if (rdev->wowlan->magic_pkt)
+			NLA_PUT_FLAG(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT);
+		if (rdev->wowlan->n_patterns) {
+			struct nlattr *nl_pats, *nl_pat;
+			int i, pat_len;
+
+			nl_pats = nla_nest_start(msg,
+					NL80211_WOWLAN_TRIG_PKT_PATTERN);
+			if (!nl_pats)
+				goto nla_put_failure;
+
+			for (i = 0; i < rdev->wowlan->n_patterns; i++) {
+				nl_pat = nla_nest_start(msg, i + 1);
+				if (!nl_pat)
+					goto nla_put_failure;
+				pat_len = rdev->wowlan->patterns[i].pattern_len;
+				NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_MASK,
+					DIV_ROUND_UP(pat_len, 8),
+					rdev->wowlan->patterns[i].mask);
+				NLA_PUT(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
+					pat_len,
+					rdev->wowlan->patterns[i].pattern);
+				nla_nest_end(msg, nl_pat);
+			}
+			nla_nest_end(msg, nl_pats);
+		}
+
+		nla_nest_end(msg, nl_wowlan);
+	}
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG];
+	struct cfg80211_wowlan no_triggers = {};
+	struct cfg80211_wowlan new_triggers = {};
+	struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan;
+	int err, i;
+
+	if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS])
+		goto no_triggers;
+
+	err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG,
+			nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
+			nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
+			nl80211_wowlan_policy);
+	if (err)
+		return err;
+
+	if (tb[NL80211_WOWLAN_TRIG_ANY]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_ANY))
+			return -EINVAL;
+		new_triggers.any = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_DISCONNECT]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT))
+			return -EINVAL;
+		new_triggers.disconnect = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) {
+		if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT))
+			return -EINVAL;
+		new_triggers.magic_pkt = true;
+	}
+
+	if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
+		struct nlattr *pat;
+		int n_patterns = 0;
+		int rem, pat_len, mask_len;
+		struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT];
+
+		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
+				    rem)
+			n_patterns++;
+		if (n_patterns > wowlan->n_patterns)
+			return -EINVAL;
+
+		new_triggers.patterns = kcalloc(n_patterns,
+						sizeof(new_triggers.patterns[0]),
+						GFP_KERNEL);
+		if (!new_triggers.patterns)
+			return -ENOMEM;
+
+		new_triggers.n_patterns = n_patterns;
+		i = 0;
+
+		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
+				    rem) {
+			nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT,
+				  nla_data(pat), nla_len(pat), NULL);
+			err = -EINVAL;
+			if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] ||
+			    !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN])
+				goto error;
+			pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]);
+			mask_len = DIV_ROUND_UP(pat_len, 8);
+			if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) !=
+			    mask_len)
+				goto error;
+			if (pat_len > wowlan->pattern_max_len ||
+			    pat_len < wowlan->pattern_min_len)
+				goto error;
+
+			new_triggers.patterns[i].mask =
+				kmalloc(mask_len + pat_len, GFP_KERNEL);
+			if (!new_triggers.patterns[i].mask) {
+				err = -ENOMEM;
+				goto error;
+			}
+			new_triggers.patterns[i].pattern =
+				new_triggers.patterns[i].mask + mask_len;
+			memcpy(new_triggers.patterns[i].mask,
+			       nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]),
+			       mask_len);
+			new_triggers.patterns[i].pattern_len = pat_len;
+			memcpy(new_triggers.patterns[i].pattern,
+			       nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]),
+			       pat_len);
+			i++;
+		}
+	}
+
+	if (memcmp(&new_triggers, &no_triggers, sizeof(new_triggers))) {
+		struct cfg80211_wowlan *ntrig;
+		ntrig = kmemdup(&new_triggers, sizeof(new_triggers),
+				GFP_KERNEL);
+		if (!ntrig) {
+			err = -ENOMEM;
+			goto error;
+		}
+		cfg80211_rdev_free_wowlan(rdev);
+		rdev->wowlan = ntrig;
+	} else {
+ no_triggers:
+		cfg80211_rdev_free_wowlan(rdev);
+		rdev->wowlan = NULL;
+	}
+
+	return 0;
+ error:
+	for (i = 0; i < new_triggers.n_patterns; i++)
+		kfree(new_triggers.patterns[i].mask);
+	kfree(new_triggers.patterns);
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -5306,6 +5533,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_GET_WOWLAN,
+		.doit = nl80211_get_wowlan,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_SET_WOWLAN,
+		.doit = nl80211_set_wowlan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 4294fa22bb2d..c6e4ca6a7d2e 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -93,7 +93,7 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
 
 	if (rdev->ops->suspend) {
 		rtnl_lock();
-		ret = rdev->ops->suspend(&rdev->wiphy);
+		ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
 		rtnl_unlock();
 	}
 
-- 
cgit v1.2.3


From a26ac2455ffcf3be5c6ef92bc6df7182700f2114 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 12 Jan 2011 14:10:23 -0800
Subject: rcu: move TREE_RCU from softirq to kthread

If RCU priority boosting is to be meaningful, callback invocation must
be boosted in addition to preempted RCU readers.  Otherwise, in presence
of CPU real-time threads, the grace period ends, but the callbacks don't
get invoked.  If the callbacks don't get invoked, the associated memory
doesn't get freed, so the system is still subject to OOM.

But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit
moves the callback invocations to a kthread, which can be boosted easily.

Also add comments and properly synchronized all accesses to
rcu_cpu_kthread_task, as suggested by Lai Jiangshan.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 Documentation/filesystems/proc.txt  |   1 -
 include/linux/interrupt.h           |   1 -
 include/trace/events/irq.h          |   3 +-
 kernel/rcutree.c                    | 340 +++++++++++++++++++++++++++++++++++-
 kernel/rcutree.h                    |   8 +
 kernel/rcutree_plugin.h             |   4 +-
 kernel/softirq.c                    |   2 +-
 tools/perf/util/trace-event-parse.c |   1 -
 8 files changed, 348 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b0b814d75ca1..60740e8ecb37 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -836,7 +836,6 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
  TASKLET:          0          0          0        290
    SCHED:      27035      26983      26971      26746
  HRTIMER:          0          0          0          0
-     RCU:       1678       1769       2178       2250
 
 
 1.3 IDE devices in /proc/ide
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index bea0ac750712..6c12989839d9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,7 +414,6 @@ enum
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
 	HRTIMER_SOFTIRQ,
-	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
 };
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1c09820df585..ae045ca7d356 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,8 +20,7 @@ struct softirq_action;
 			 softirq_name(BLOCK_IOPOLL),	\
 			 softirq_name(TASKLET),		\
 			 softirq_name(SCHED),		\
-			 softirq_name(HRTIMER),		\
-			 softirq_name(RCU))
+			 softirq_name(HRTIMER))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0ac1cc03f935..18e33313873e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
 #include <linux/mutex.h>
 #include <linux/time.h>
 #include <linux/kernel_stat.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
 
 #include "rcutree.h"
 
@@ -82,6 +84,20 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
+/*
+ * Control variables for per-CPU and per-rcu_node kthreads.  These
+ * handle all flavors of RCU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
+static DEFINE_PER_CPU(char, rcu_cpu_has_work);
+static char rcu_kthreads_spawnable;
+
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp);
+static void invoke_rcu_kthread(void);
+
+#define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
+
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -1009,6 +1025,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 /*
  * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
  * and move all callbacks from the outgoing CPU to the current one.
+ * There can only be one CPU hotplug operation at a time, so no other
+ * CPU can be attempting to update rcu_cpu_kthread_task.
  */
 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 {
@@ -1017,6 +1035,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	int need_report = 0;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp;
+	struct task_struct *t;
+
+	/* Stop the CPU's kthread. */
+	t = per_cpu(rcu_cpu_kthread_task, cpu);
+	if (t != NULL) {
+		per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
+		kthread_stop(t);
+	}
 
 	/* Exclude any attempts to start a new grace period. */
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1054,6 +1080,19 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	if (need_report & RCU_OFL_TASKS_EXP_GP)
 		rcu_report_exp_rnp(rsp, rnp);
+
+	/*
+	 * If there are no more online CPUs for this rcu_node structure,
+	 * kill the rcu_node structure's kthread.  Otherwise, adjust its
+	 * affinity.
+	 */
+	t = rnp->node_kthread_task;
+	if (t != NULL &&
+	    rnp->qsmaskinit == 0) {
+		kthread_stop(t);
+		rnp->node_kthread_task = NULL;
+	} else
+		rcu_node_kthread_setaffinity(rnp);
 }
 
 /*
@@ -1151,7 +1190,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	/* Re-raise the RCU softirq if there are callbacks remaining. */
 	if (cpu_has_callbacks_ready_to_invoke(rdp))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 }
 
 /*
@@ -1197,7 +1236,7 @@ void rcu_check_callbacks(int cpu, int user)
 	}
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 }
 
 #ifdef CONFIG_SMP
@@ -1361,7 +1400,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 /*
  * Do softirq processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void rcu_process_callbacks(void)
 {
 	__rcu_process_callbacks(&rcu_sched_state,
 				&__get_cpu_var(rcu_sched_data));
@@ -1372,6 +1411,281 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	rcu_needs_cpu_flush();
 }
 
+/*
+ * Wake up the current CPU's kthread.  This replaces raise_softirq()
+ * in earlier versions of RCU.  Note that because we are running on
+ * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
+ * cannot disappear out from under us.
+ */
+static void invoke_rcu_kthread(void)
+{
+	unsigned long flags;
+	wait_queue_head_t *q;
+	int cpu;
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	per_cpu(rcu_cpu_has_work, cpu) = 1;
+	if (per_cpu(rcu_cpu_kthread_task, cpu) == NULL) {
+		local_irq_restore(flags);
+		return;
+	}
+	q = &per_cpu(rcu_cpu_wq, cpu);
+	wake_up(q);
+	local_irq_restore(flags);
+}
+
+/*
+ * Timer handler to initiate the waking up of per-CPU kthreads that
+ * have yielded the CPU due to excess numbers of RCU callbacks.
+ */
+static void rcu_cpu_kthread_timer(unsigned long arg)
+{
+	unsigned long flags;
+	struct rcu_data *rdp = (struct rcu_data *)arg;
+	struct rcu_node *rnp = rdp->mynode;
+	struct task_struct *t;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	rnp->wakemask |= rdp->grpmask;
+	t = rnp->node_kthread_task;
+	if (t == NULL) {
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	wake_up_process(t);
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Drop to non-real-time priority and yield, but only after posting a
+ * timer that will cause us to regain our real-time priority if we
+ * remain preempted.  Either way, we restore our real-time priority
+ * before returning.
+ */
+static void rcu_yield(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct sched_param sp;
+	struct timer_list yield_timer;
+
+	setup_timer_on_stack(&yield_timer, rcu_cpu_kthread_timer, (unsigned long)rdp);
+	mod_timer(&yield_timer, jiffies + 2);
+	sp.sched_priority = 0;
+	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
+	schedule();
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+	del_timer(&yield_timer);
+}
+
+/*
+ * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
+ * This can happen while the corresponding CPU is either coming online
+ * or going offline.  We cannot wait until the CPU is fully online
+ * before starting the kthread, because the various notifier functions
+ * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
+ * the corresponding CPU is online.
+ *
+ * Return 1 if the kthread needs to stop, 0 otherwise.
+ *
+ * Caller must disable bh.  This function can momentarily enable it.
+ */
+static int rcu_cpu_kthread_should_stop(int cpu)
+{
+	while (cpu_is_offline(cpu) ||
+	       !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
+	       smp_processor_id() != cpu) {
+		if (kthread_should_stop())
+			return 1;
+		local_bh_enable();
+		schedule_timeout_uninterruptible(1);
+		if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
+		local_bh_disable();
+	}
+	return 0;
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * earlier RCU softirq.
+ */
+static int rcu_cpu_kthread(void *arg)
+{
+	int cpu = (int)(long)arg;
+	unsigned long flags;
+	int spincnt = 0;
+	wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
+	char work;
+	char *workp = &per_cpu(rcu_cpu_has_work, cpu);
+
+	for (;;) {
+		wait_event_interruptible(*wqp,
+					 *workp != 0 || kthread_should_stop());
+		local_bh_disable();
+		if (rcu_cpu_kthread_should_stop(cpu)) {
+			local_bh_enable();
+			break;
+		}
+		local_irq_save(flags);
+		work = *workp;
+		*workp = 0;
+		local_irq_restore(flags);
+		if (work)
+			rcu_process_callbacks();
+		local_bh_enable();
+		if (*workp != 0)
+			spincnt++;
+		else
+			spincnt = 0;
+		if (spincnt > 10) {
+			rcu_yield(cpu);
+			spincnt = 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Spawn a per-CPU kthread, setting up affinity and priority.
+ * Because the CPU hotplug lock is held, no other CPU will be attempting
+ * to manipulate rcu_cpu_kthread_task.  There might be another CPU
+ * attempting to access it during boot, but the locking in kthread_bind()
+ * will enforce sufficient ordering.
+ */
+static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
+{
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
+		return 0;
+	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	kthread_bind(t, cpu);
+	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
+	per_cpu(rcu_cpu_kthread_task, cpu) = t;
+	wake_up_process(t);
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+/*
+ * Per-rcu_node kthread, which is in charge of waking up the per-CPU
+ * kthreads when needed.  We ignore requests to wake up kthreads
+ * for offline CPUs, which is OK because force_quiescent_state()
+ * takes care of this case.
+ */
+static int rcu_node_kthread(void *arg)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp = (struct rcu_node *)arg;
+	struct sched_param sp;
+	struct task_struct *t;
+
+	for (;;) {
+		wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0 ||
+						       kthread_should_stop());
+		if (kthread_should_stop())
+			break;
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		mask = rnp->wakemask;
+		rnp->wakemask = 0;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
+			if ((mask & 0x1) == 0)
+				continue;
+			preempt_disable();
+			t = per_cpu(rcu_cpu_kthread_task, cpu);
+			if (!cpu_online(cpu) || t == NULL) {
+				preempt_enable();
+				continue;
+			}
+			per_cpu(rcu_cpu_has_work, cpu) = 1;
+			sp.sched_priority = RCU_KTHREAD_PRIO;
+			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+			preempt_enable();
+		}
+	}
+	return 0;
+}
+
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.
+ */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp)
+{
+	cpumask_var_t cm;
+	int cpu;
+	unsigned long mask = rnp->qsmaskinit;
+
+	if (rnp->node_kthread_task == NULL ||
+	    rnp->qsmaskinit == 0)
+		return;
+	if (!alloc_cpumask_var(&cm, GFP_KERNEL))
+		return;
+	cpumask_clear(cm);
+	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+		if (mask & 0x1)
+			cpumask_set_cpu(cpu, cm);
+	set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+	free_cpumask_var(cm);
+}
+
+/*
+ * Spawn a per-rcu_node kthread, setting priority and affinity.
+ */
+static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
+						struct rcu_node *rnp)
+{
+	int rnp_index = rnp - &rsp->node[0];
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    rnp->qsmaskinit == 0 ||
+	    rnp->node_kthread_task != NULL)
+		return 0;
+	t = kthread_create(rcu_node_kthread, (void *)rnp, "rcun%d", rnp_index);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	rnp->node_kthread_task = t;
+	wake_up_process(t);
+	sp.sched_priority = 99;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+	int cpu;
+	struct rcu_node *rnp;
+
+	rcu_kthreads_spawnable = 1;
+	for_each_possible_cpu(cpu) {
+		init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
+		per_cpu(rcu_cpu_has_work, cpu) = 0;
+		if (cpu_online(cpu))
+			(void)rcu_spawn_one_cpu_kthread(cpu);
+	}
+	rcu_for_each_leaf_node(&rcu_sched_state, rnp) {
+		init_waitqueue_head(&rnp->node_wq);
+		(void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
+	}
+	return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+
 static void
 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	   struct rcu_state *rsp)
@@ -1771,6 +2085,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
 	rcu_preempt_init_percpu_data(cpu);
 }
 
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
+
+	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+	if (rcu_kthreads_spawnable) {
+		(void)rcu_spawn_one_cpu_kthread(cpu);
+		if (rnp->node_kthread_task == NULL)
+			(void)rcu_spawn_one_node_kthread(&rcu_sched_state, rnp);
+	}
+}
+
 /*
  * Handle CPU online/offline notification events.
  */
@@ -1778,11 +2105,17 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
+	struct rcu_data *rdp = per_cpu_ptr(rcu_sched_state.rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
+		rcu_online_kthreads(cpu);
+		break;
+	case CPU_ONLINE:
+		rcu_node_kthread_setaffinity(rnp);
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
@@ -1923,7 +2256,6 @@ void __init rcu_init(void)
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5a439c180e69..c0213802d164 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -111,6 +111,7 @@ struct rcu_node {
 				/*  elements that need to drain to allow the */
 				/*  current expedited grace period to */
 				/*  complete (only for TREE_PREEMPT_RCU). */
+	unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
@@ -134,6 +135,13 @@ struct rcu_node {
 				/*  if there is no such task.  If there */
 				/*  is no current expedited grace period, */
 				/*  then there can cannot be any such task. */
+	struct task_struct *node_kthread_task;
+				/* kthread that takes care of this rcu_node */
+				/*  structure, for example, awakening the */
+				/*  per-CPU kthreads as needed. */
+	wait_queue_head_t node_wq;
+				/* Wait queue on which to park the per-node */
+				/*  kthread. */
 } ____cacheline_internodealigned_in_smp;
 
 /*
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 774f010a4619..b9bd69a5a4fe 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1206,7 +1206,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
  *
  * Because it is not legal to invoke rcu_process_callbacks() with irqs
  * disabled, we do one pass of force_quiescent_state(), then do a
- * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
+ * invoke_rcu_kthread() to cause rcu_process_callbacks() to be invoked later.
  * The per-cpu rcu_dyntick_drain variable controls the sequencing.
  */
 int rcu_needs_cpu(int cpu)
@@ -1257,7 +1257,7 @@ int rcu_needs_cpu(int cpu)
 
 	/* If RCU callbacks are still pending, RCU still needs this CPU. */
 	if (c)
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_kthread();
 	return c;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 174f976c2874..13960170cad4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
-	"TASKLET", "SCHED", "HRTIMER",	"RCU"
+	"TASKLET", "SCHED", "HRTIMER"
 };
 
 /*
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0a7ed5b5e281..1e88485c16a0 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,7 +2187,6 @@ static const struct flag flags[] = {
 	{ "TASKLET_SOFTIRQ", 6 },
 	{ "SCHED_SOFTIRQ", 7 },
 	{ "HRTIMER_SOFTIRQ", 8 },
-	{ "RCU_SOFTIRQ", 9 },
 
 	{ "HRTIMER_NORESTART", 0 },
 	{ "HRTIMER_RESTART", 1 },
-- 
cgit v1.2.3


From 4a29865689dbb87a02e3b0fff4a4ae5041273173 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sun, 3 Apr 2011 21:33:51 -0700
Subject: rcu: make rcutorture version numbers available through debugfs

It is not possible to accurately correlate rcutorture output with that
of debugfs.  This patch therefore adds a debugfs file that prints out
the rcutorture version number, permitting easy correlation.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 13 ++++++++++++-
 include/linux/rcutree.h  |  3 +++
 kernel/rcutorture.c      |  8 +++++---
 kernel/rcutree.c         | 37 +++++++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c   | 28 ++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ff422d2b7f90..9e169c2ba91f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,18 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+extern void rcutorture_record_test_transition(void);
+extern void rcutorture_record_progress(unsigned long vernum);
+#else
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#endif
+
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
 #define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
@@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
 {
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3a933482734a..284dad10c55b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -58,9 +58,12 @@ static inline void synchronize_rcu_bh_expedited(void)
 
 extern void rcu_barrier(void);
 
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
+
 extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 22b0e74e7d99..c2f58ec24751 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
 
 static LIST_HEAD(rcu_torture_freelist);
 static struct rcu_torture __rcu *rcu_torture_current;
-static long rcu_torture_current_version;
+static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -884,7 +884,7 @@ rcu_torture_writer(void *arg)
 			old_rp->rtort_pipe_count++;
 			cur_ops->deferred_free(old_rp);
 		}
-		rcu_torture_current_version++;
+		rcutorture_record_progress(++rcu_torture_current_version);
 		oldbatch = cur_ops->completed();
 		rcu_stutter_wait("rcu_torture_writer");
 	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1064,7 +1064,7 @@ rcu_torture_printk(char *page)
 	}
 	cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
 	cnt += sprintf(&page[cnt],
-		       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
+		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
 		       "rtmbe: %d rtbke: %ld rtbre: %ld "
 		       "rtbf: %ld rtb: %ld nt: %ld",
 		       rcu_torture_current,
@@ -1325,6 +1325,7 @@ rcu_torture_cleanup(void)
 	int i;
 
 	mutex_lock(&fullstop_mutex);
+	rcutorture_record_test_transition();
 	if (fullstop == FULLSTOP_SHUTDOWN) {
 		printk(KERN_WARNING /* but going down anyway, so... */
 		       "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1616,6 +1617,7 @@ rcu_torture_init(void)
 		}
 	}
 	register_reboot_notifier(&rcutorture_shutdown_nb);
+	rcutorture_record_test_transition();
 	mutex_unlock(&fullstop_mutex);
 	return 0;
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d8917401cbbc..bb84deca3319 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -101,6 +101,18 @@ static void invoke_rcu_cpu_kthread(void);
 
 #define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
 
+/*
+ * Track the rcutorture test sequence number and the update version
+ * number within a given test.  The rcutorture_testseq is incremented
+ * on every rcutorture module load and unload, so has an odd value
+ * when a test is running.  The rcutorture_vernum is set to zero
+ * when rcutorture starts and is incremented on each rcutorture update.
+ * These variables enable correlating rcutorture output with the
+ * RCU tracing information.
+ */
+unsigned long rcutorture_testseq;
+unsigned long rcutorture_vernum;
+
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -192,6 +204,31 @@ void rcu_bh_force_quiescent_state(void)
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
+/*
+ * Record the number of times rcutorture tests have been initiated and
+ * terminated.  This information allows the debugfs tracing stats to be
+ * correlated to the rcutorture messages, even when the rcutorture module
+ * is being repeatedly loaded and unloaded.  In other words, we cannot
+ * store this state in rcutorture itself.
+ */
+void rcutorture_record_test_transition(void)
+{
+	rcutorture_testseq++;
+	rcutorture_vernum = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
+
+/*
+ * Record the number of writer passes through the current rcutorture test.
+ * This is also used to correlate debugfs tracing stats with the rcutorture
+ * messages.
+ */
+void rcutorture_record_progress(unsigned long vernum)
+{
+	rcutorture_vernum++;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_progress);
+
 /*
  * Force a quiescent state for RCU-sched.
  */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index fc40e89a028c..3baa235786b5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -394,6 +394,29 @@ static const struct file_operations rcu_pending_fops = {
 	.release = single_release,
 };
 
+static int show_rcutorture(struct seq_file *m, void *unused)
+{
+	seq_printf(m, "rcutorture test sequence: %lu %s\n",
+		   rcutorture_testseq >> 1,
+		   (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
+	seq_printf(m, "rcutorture update version number: %lu\n",
+		   rcutorture_vernum);
+	return 0;
+}
+
+static int rcutorture_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcutorture, NULL);
+}
+
+static const struct file_operations rcutorture_fops = {
+	.owner = THIS_MODULE,
+	.open = rcutorture_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 static struct dentry *rcudir;
 
 static int __init rcutree_trace_init(void)
@@ -430,6 +453,11 @@ static int __init rcutree_trace_init(void)
 						NULL, &rcu_pending_fops);
 	if (!retval)
 		goto free_out;
+
+	retval = debugfs_create_file("rcutorture", 0444, rcudir,
+						NULL, &rcutorture_fops);
+	if (!retval)
+		goto free_out;
 	return 0;
 free_out:
 	debugfs_remove_recursive(rcudir);
-- 
cgit v1.2.3


From b0c9d7ff2793502650ad987c3f237d5fe5587a1e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 29 Mar 2011 12:56:56 -0700
Subject: rcu: add DEBUG_OBJECTS_RCU_HEAD check for alignment

Verify that rcu_head structures are aligned to a four-byte boundary.
This check is enabled by CONFIG_DEBUG_OBJECTS_RCU_HEAD.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9e169c2ba91f..c7aeacf7fc98 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -785,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr;
 
 static inline void debug_rcu_head_queue(struct rcu_head *head)
 {
+	WARN_ON_ONCE((unsigned long)head & 0x3);
 	debug_object_activate(head, &rcuhead_debug_descr);
 	debug_object_active_state(head, &rcuhead_debug_descr,
 				  STATE_RCU_HEAD_READY,
-- 
cgit v1.2.3


From 9ab1544eb4196ca8d05c433b2eb56f74496b1ee3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 18 Mar 2011 11:15:47 +0800
Subject: rcu: introduce kfree_rcu()

Many rcu callbacks functions just call kfree() on the base structure.
These functions are trivial, but their size adds up, and furthermore
when they are used in a kernel module, that module must invoke the
high-latency rcu_barrier() function at module-unload time.

The kfree_rcu() function introduced by this commit addresses this issue.
Rather than encoding a function address in the embedded rcu_head
structure, kfree_rcu() instead encodes the offset of the rcu_head
structure within the base structure.  Because the functions are not
allowed in the low-order 4096 bytes of kernel virtual memory, offsets
up to 4095 bytes can be accommodated.  If the offset is larger than
4095 bytes, a compile-time error will be generated in __kfree_rcu().
If this error is triggered, you can either fall back to use of call_rcu()
or rearrange the structure to position the rcu_head structure into the
first 4096 bytes.

Note that the allowable offset might decrease in the future, for example,
to allow something like kmem_cache_free_rcu().

The new kfree_rcu() function can replace code as follows:

	call_rcu(&p->rcu, simple_kfree_callback);

where "simple_kfree_callback()" might be defined as follows:

	void simple_kfree_callback(struct rcu_head *p)
	{
		struct foo *q = container_of(p, struct foo, rcu);

		kfree(q);
	}

with the following:

	kfree_rcu(&p->rcu, rcu);

Note that the "rcu" is the name of a field in the structure being
freed.  The reason for using this rather than passing in a pointer
to the base structure is that the above approach allows better type
checking.

This commit is based on earlier work by Lai Jiangshan and Manfred Spraul:

Lai's V1 patch: http://lkml.org/lkml/2008/9/18/1
Manfred's patch: http://lkml.org/lkml/2009/1/2/115

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutiny.c         |  2 +-
 kernel/rcutree.c         |  2 +-
 3 files changed, 58 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c7aeacf7fc98..99f9aa7c2804 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -809,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
+{
+	return offset < 4096;
+}
+
+static __always_inline
+void __kfree_rcu(struct rcu_head *head, unsigned long offset)
+{
+	typedef void (*rcu_callback)(struct rcu_head *);
+
+	BUILD_BUG_ON(!__builtin_constant_p(offset));
+
+	/* See the kfree_rcu() header comment. */
+	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
+
+	call_rcu(head, (rcu_callback)offset);
+}
+
+extern void kfree(const void *);
+
+static inline void __rcu_reclaim(struct rcu_head *head)
+{
+	unsigned long offset = (unsigned long)head->func;
+
+	if (__is_kfree_rcu_offset(offset))
+		kfree((void *)head - offset);
+	else
+		head->func(head);
+}
+
+/**
+ * kfree_rcu() - kfree an object after a grace period.
+ * @ptr:	pointer to kfree
+ * @rcu_head:	the name of the struct rcu_head within the type of @ptr.
+ *
+ * Many rcu callbacks functions just call kfree() on the base structure.
+ * These functions are trivial, but their size adds up, and furthermore
+ * when they are used in a kernel module, that module must invoke the
+ * high-latency rcu_barrier() function at module-unload time.
+ *
+ * The kfree_rcu() function handles this issue.  Rather than encoding a
+ * function address in the embedded rcu_head structure, kfree_rcu() instead
+ * encodes the offset of the rcu_head structure within the base structure.
+ * Because the functions are not allowed in the low-order 4096 bytes of
+ * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
+ * If the offset is larger than 4095 bytes, a compile-time error will
+ * be generated in __kfree_rcu().  If this error is triggered, you can
+ * either fall back to use of call_rcu() or rearrange the structure to
+ * position the rcu_head structure into the first 4096 bytes.
+ *
+ * Note that the allowable offset might decrease in the future, for example,
+ * to allow something like kmem_cache_free_rcu().
+ */
+#define kfree_rcu(ptr, rcu_head)					\
+	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d5..4d60fbc9c64c 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -167,7 +167,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
 		local_bh_disable();
-		list->func(list);
+		__rcu_reclaim(list);
 		local_bh_enable();
 		list = next;
 		RCU_TRACE(cb_count++);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b579e4f97210..2c07adb97088 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1206,7 +1206,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
-		list->func(list);
+		__rcu_reclaim(list);
 		list = next;
 		if (++count >= rdp->blimit)
 			break;
-- 
cgit v1.2.3


From 29ce831000081dd757d3116bf774aafffc4b6b20 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 May 2011 16:31:03 +0300
Subject: rcu: provide rcu_virt_note_context_switch() function.

Provide rcu_virt_note_context_switch() for vitalization use to note
quiescent state during guest entry.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h |  8 ++++++++
 include/linux/rcutree.h | 10 ++++++++++
 kernel/rcutree.c        |  1 +
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 30ebd7c8d874..52b3e0281fd0 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -99,6 +99,14 @@ static inline void rcu_note_context_switch(int cpu)
 	rcu_preempt_note_context_switch();
 }
 
+/*
+ * Take advantage of the fact that there is only one CPU, which
+ * allows us to ignore virtualization-based context switches.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+}
+
 /*
  * Return the number of grace periods.
  */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 284dad10c55b..e65d06634dd8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
 
+/*
+ * Note a virtualization-based context switch.  This is simply a
+ * wrapper around rcu_note_context_switch(), which allows TINY_RCU
+ * to save a few bytes.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+	rcu_note_context_switch(cpu);
+}
+
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 extern void exit_rcu(void);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b2fe2a273df2..54ff7eb92819 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,6 +157,7 @@ void rcu_note_context_switch(int cpu)
 	rcu_sched_qs(cpu);
 	rcu_preempt_note_context_switch(cpu);
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 #ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-- 
cgit v1.2.3


From f3876930952390a31c3a7fd68dd621464a36eb80 Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:32 -0600
Subject: block: add a non-queueable flush flag

flush request isn't queueable in some drives. Add a flag to let driver
notify block layer about this. We can optimize flush performance with the
knowledge.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 6 ++++++
 include/linux/blkdev.h | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..cd3c428e194f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+	q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98ad4a3..8bd2a271b2d8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -364,6 +364,7 @@ struct request_queue
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
+	unsigned int		flush_not_queueable:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
@@ -843,6 +844,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1111,6 +1113,11 @@ static inline unsigned int block_size(struct block_device *bdev)
 	return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+	return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-- 
cgit v1.2.3


From 3ac0cc4508709d42ec9aa351086c7d38bfc0660c Mon Sep 17 00:00:00 2001
From: "shaohua.li@intel.com" <shaohua.li@intel.com>
Date: Fri, 6 May 2011 11:34:41 -0600
Subject: block: hold queue if flush is running for non-queueable flush drive

In some drives, flush requests are non-queueable. When flush request is
running, normal read/write requests can't run. If block layer dispatches
such request, driver can't handle it and requeue it.  Tejun suggested we
can hold the queue when flush is running. This can avoid unnecessary
requeue.  Also this can improve performance. For example, we have
request flush1, write1, flush 2. flush1 is dispatched, then queue is
hold, write1 isn't inserted to queue. After flush1 is finished, flush2
will be dispatched. Since disk cache is already clean, flush2 will be
finished very soon, so looks like flush2 is folded to flush1.

In my test, the queue holding completely solves a regression introduced by
commit 53d63e6b0dfb95882ec0219ba6bbd50cde423794:

    block: make the flush insertion use the tail of the dispatch list

    It's not a preempt type request, in fact we have to insert it
    behind requests that do specify INSERT_FRONT.

which causes about 20% regression running a sysbench fileio
workload.

Stable: 2.6.39 only

Cc: stable@kernel.org
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c      | 16 +++++++++++-----
 block/blk.h            | 21 ++++++++++++++++++++-
 include/linux/blkdev.h |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e189e62..bb21e4c36f70 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
 	}
 
 	/*
-	 * Moving a request silently to empty queue_head may stall the
-	 * queue.  Kick the queue in those cases.  This function is called
-	 * from request completion path and calling directly into
-	 * request_fn may confuse the driver.  Always use kblockd.
+	 * Kick the queue to avoid stall for two cases:
+	 * 1. Moving a request silently to empty queue_head may stall the
+	 * queue.
+	 * 2. When flush request is running in non-queueable queue, the
+	 * queue is hold. Restart the queue after flush request is finished
+	 * to avoid stall.
+	 * This function is called from request completion path and calling
+	 * directly into request_fn may confuse the driver.  Always use
+	 * kblockd.
 	 */
-	if (queued)
+	if (queued || q->flush_queue_delayed)
 		blk_run_queue_async(q);
+	q->flush_queue_delayed = 0;
 }
 
 /**
diff --git a/block/blk.h b/block/blk.h
index c9df8fc3c999..83e4bff36201 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			return rq;
 		}
-
+		/*
+		 * Flush request is running and flush request isn't queueable
+		 * in the drive, we can hold the queue till flush request is
+		 * finished. Even we don't do this, driver can't dispatch next
+		 * requests and will requeue them. And this can improve
+		 * throughput too. For example, we have request flush1, write1,
+		 * flush 2. flush1 is dispatched, then queue is hold, write1
+		 * isn't inserted to queue. After flush1 is finished, flush2
+		 * will be dispatched. Since disk cache is already clean,
+		 * flush2 will be finished very soon, so looks like flush2 is
+		 * folded to flush1.
+		 * Since the queue is hold, a flag is set to indicate the queue
+		 * should be restarted later. Please see flush_end_io() for
+		 * details.
+		 */
+		if (q->flush_pending_idx != q->flush_running_idx &&
+				!queue_flush_queueable(q)) {
+			q->flush_queue_delayed = 1;
+			return NULL;
+		}
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bd2a271b2d8..9f921bf4bf8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -365,6 +365,7 @@ struct request_queue
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
+	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
-- 
cgit v1.2.3


From a3a4a5acd3bd2f6f1e102e1f1b9d2e2bb320a7fd Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 5 May 2011 23:55:18 -0400
Subject: Regression: partial revert "tracing: Remove lock_depth from event
 entry"

This partially reverts commit e6e1e2593592a8f6f6380496655d8c6f67431266.

That commit changed the structure layout of the trace structure, which
in turn broke PowerTOP (1.9x generation) quite badly.

I appreciate not wanting to expose the variable in question, and
PowerTOP was not using it, so I've replaced the variable with just a
padding field - that way if in the future a new field is needed it can
just use this padding field.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ftrace_event.h | 1 +
 kernel/trace/trace.c         | 1 +
 kernel/trace/trace_events.c  | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 22b32af1b5ec..b5a550a39a70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,6 +37,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d38c16a06a6f..1cb49be7c7fb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1110,6 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e88f74fe1d4c..2fe110341359 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,6 +116,7 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, padding);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 880ffb5c6c5c8c8c6efd9efe9355317322b4603b Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Thu, 5 May 2011 07:55:36 +0800
Subject: driver core: Add the device driver-model structures to kerneldoc

Add the comments to the structure bus_type, device_driver, device,
class to device.h for generating the driver-model kerneldoc. With another patch
these all removed from the files in Documentation/driver-model/ since
they are out of date. That will keep things up to date and provide a better way
to document this stuff.

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/device-drivers.tmpl |   6 +-
 include/linux/device.h                    | 154 +++++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 36f63d4a0a06..b638e50cf8f6 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -96,10 +96,10 @@ X!Iinclude/linux/kobject.h
 
   <chapter id="devdrivers">
      <title>Device drivers infrastructure</title>
+     <sect1><title>The Basic Device Driver-Model Structures </title>
+!Iinclude/linux/device.h
+     </sect1>
      <sect1><title>Device Drivers Base</title>
-<!--
-X!Iinclude/linux/device.h
--->
 !Edrivers/base/driver.c
 !Edrivers/base/core.c
 !Edrivers/base/class.c
diff --git a/include/linux/device.h b/include/linux/device.h
index 2215d013ca96..42365067a836 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,6 +47,38 @@ extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
+/**
+ * struct bus_type - The bus type of the device
+ *
+ * @name:	The name of the bus.
+ * @bus_attrs:	Default attributes of the bus.
+ * @dev_attrs:	Default attributes of the devices on the bus.
+ * @drv_attrs:	Default attributes of the device drivers on the bus.
+ * @match:	Called, perhaps multiple times, whenever a new device or driver
+ *		is added for this bus. It should return a nonzero value if the
+ *		given device can be handled by the given driver.
+ * @uevent:	Called when a device is added, removed, or a few other things
+ *		that generate uevents to add the environment variables.
+ * @probe:	Called when a new device or driver add to this bus, and callback
+ *		the specific driver's probe to initial the matched device.
+ * @remove:	Called when a device removed from this bus.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called when a device on this bus wants to go to sleep mode.
+ * @resume:	Called to bring a device on this bus out of sleep mode.
+ * @pm:		Power management operations of this bus, callback the specific
+ *		device driver's pm-ops.
+ * @p:		The private data of the driver core, only the driver core can
+ *		touch this.
+ *
+ * A bus is a channel between the processor and one or more devices. For the
+ * purposes of the device model, all devices are connected via a bus, even if
+ * it is an internal, virtual, "platform" bus. Buses can plug into each other.
+ * A USB controller is usually a PCI device, for example. The device model
+ * represents the actual connections between buses and the devices they control.
+ * A bus is represented by the bus_type structure. It contains the name, the
+ * default attributes, the bus' methods, PM operations, and the driver core's
+ * private data.
+ */
 struct bus_type {
 	const char		*name;
 	struct bus_attribute	*bus_attrs;
@@ -119,6 +151,37 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 extern struct kset *bus_get_kset(struct bus_type *bus);
 extern struct klist *bus_get_device_klist(struct bus_type *bus);
 
+/**
+ * struct device_driver - The basic device driver structure
+ * @name:	Name of the device driver.
+ * @bus:	The bus which the device of this driver belongs to.
+ * @owner:	The module owner.
+ * @mod_name:	Used for built-in modules.
+ * @suppress_bind_attrs: Disables bind/unbind via sysfs.
+ * @of_match_table: The open firmware table.
+ * @probe:	Called to query the existence of a specific device,
+ *		whether this driver can work with it, and bind the driver
+ *		to a specific device.
+ * @remove:	Called when the device is removed from the system to
+ *		unbind a device from this driver.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called to put the device to sleep mode. Usually to a
+ *		low power state.
+ * @resume:	Called to bring a device from sleep mode.
+ * @groups:	Default attributes that get created by the driver core
+ *		automatically.
+ * @pm:		Power management operations of the device which matched
+ *		this driver.
+ * @p:		Driver core's private data, no one other than the driver
+ *		core can touch this.
+ *
+ * The device driver-model tracks all of the drivers known to the system.
+ * The main reason for this tracking is to enable the driver core to match
+ * up drivers with new devices. Once drivers are known objects within the
+ * system, however, a number of other things become possible. Device drivers
+ * can export information and configuration variables that are independent
+ * of any specific device.
+ */
 struct device_driver {
 	const char		*name;
 	struct bus_type		*bus;
@@ -185,8 +248,34 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
-/*
- * device classes
+/**
+ * struct class - device classes
+ * @name:	Name of the class.
+ * @owner:	The module owner.
+ * @class_attrs: Default attributes of this class.
+ * @dev_attrs:	Default attributes of the devices belong to the class.
+ * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
+ * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
+ * @dev_uevent:	Called when a device is added, removed from this class, or a
+ *		few other things that generate uevents to add the environment
+ *		variables.
+ * @devnode:	Callback to provide the devtmpfs.
+ * @class_release: Called to release this class.
+ * @dev_release: Called to release the device.
+ * @suspend:	Used to put the device to sleep mode, usually to a low power
+ *		state.
+ * @resume:	Used to bring the device from the sleep mode.
+ * @ns_type:	Callbacks so sysfs can detemine namespaces.
+ * @namespace:	Namespace of the device belongs to this class.
+ * @pm:		The default device power management operations of this class.
+ * @p:		The private data of the driver core, no one other than the
+ *		driver core can touch this.
+ *
+ * A class is a higher-level view of a device that abstracts out low-level
+ * implementation details. Drivers may see a SCSI disk or an ATA disk, but,
+ * at the class level, they are all simply disks. Classes allow user space
+ * to work with devices based on what they do, rather than how they are
+ * connected or how they work.
  */
 struct class {
 	const char		*name;
@@ -401,6 +490,65 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+/**
+ * struct device - The basic device structure
+ * @parent:	The device's "parent" device, the device to which it is attached.
+ * 		In most cases, a parent device is some sort of bus or host
+ * 		controller. If parent is NULL, the device, is a top-level device,
+ * 		which is not usually what you want.
+ * @p:		Holds the private data of the driver core portions of the device.
+ * 		See the comment of the struct device_private for detail.
+ * @kobj:	A top-level, abstract class from which other classes are derived.
+ * @init_name:	Initial name of the device.
+ * @type:	The type of device.
+ * 		This identifies the device type and carries type-specific
+ * 		information.
+ * @mutex:	Mutex to synchronize calls to its driver.
+ * @bus:	Type of bus device is on.
+ * @driver:	Which driver has allocated this
+ * @platform_data: Platform data specific to the device.
+ * 		Example: For devices on custom boards, as typical of embedded
+ * 		and SOC based hardware, Linux often uses platform_data to point
+ * 		to board-specific structures describing devices and how they
+ * 		are wired.  That can include what ports are available, chip
+ * 		variants, which GPIO pins act in what additional roles, and so
+ * 		on.  This shrinks the "Board Support Packages" (BSPs) and
+ * 		minimizes board-specific #ifdefs in drivers.
+ * @power:	For device power management.
+ * 		See Documentation/power/devices.txt for details.
+ * @pwr_domain:	Provide callbacks that are executed during system suspend,
+ * 		hibernation, system resume and during runtime PM transitions
+ * 		along with subsystem-level and driver-level callbacks.
+ * @numa_node:	NUMA node this device is close to.
+ * @dma_mask:	Dma mask (if dma'ble device).
+ * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
+ * 		hardware supports 64-bit addresses for consistent allocations
+ * 		such descriptors.
+ * @dma_parms:	A low level driver may set these to teach IOMMU code about
+ * 		segment limitations.
+ * @dma_pools:	Dma pools (if dma'ble device).
+ * @dma_mem:	Internal for coherent mem override.
+ * @archdata:	For arch-specific additions.
+ * @of_node:	Associated device tree node.
+ * @of_match:	Matching of_device_id from driver.
+ * @devt:	For creating the sysfs "dev".
+ * @devres_lock: Spinlock to protect the resource of the device.
+ * @devres_head: The resources list of the device.
+ * @knode_class: The node used to add the device to the class list.
+ * @class:	The class of the device.
+ * @groups:	Optional attribute groups.
+ * @release:	Callback to free the device after all references have
+ * 		gone away. This should be set by the allocator of the
+ * 		device (i.e. the bus driver that discovered the device).
+ *
+ * At the lowest level, every device in a Linux system is represented by an
+ * instance of struct device. The device structure contains the information
+ * that the device model core needs to model the system. Most subsystems,
+ * however, track additional information about the devices they host. As a
+ * result, it is rare for devices to be represented by bare device structures;
+ * instead, that structure, like kobject structures, is usually embedded within
+ * a higher-level representation of the device.
+ */
 struct device {
 	struct device		*parent;
 
@@ -611,7 +759,7 @@ extern int (*platform_notify)(struct device *dev);
 extern int (*platform_notify_remove)(struct device *dev);
 
 
-/**
+/*
  * get_device - atomically increment the reference count for the device.
  *
  */
-- 
cgit v1.2.3


From 3df004532582d0cc721da0df28311bcedd639724 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Thu, 5 May 2011 12:11:21 +0200
Subject: usb: fix building musb drivers

Commit 3dacdf11 "usb: factor out state_string() on otg drivers"
broke building musb drivers since there is already another
otg_state_string() function in musb drivers, but with different
prototype. Fix musb drivers to use common otg_state_string(), too.

Also provide a nop for otg_state_string() if CONFIG_USB_OTG_UTILS
is not defined.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/am35x.c        | 11 ++++---
 drivers/usb/musb/blackfin.c     |  7 +++--
 drivers/usb/musb/da8xx.c        | 11 ++++---
 drivers/usb/musb/davinci.c      |  5 ++--
 drivers/usb/musb/musb_core.c    | 65 ++++++++++++++++-------------------------
 drivers/usb/musb/musb_debug.h   |  2 --
 drivers/usb/musb/musb_gadget.c  |  9 +++---
 drivers/usb/musb/musb_host.c    |  2 +-
 drivers/usb/musb/musb_virthub.c |  5 ++--
 drivers/usb/musb/omap2430.c     |  7 +++--
 drivers/usb/musb/tusb6010.c     | 16 +++++-----
 include/linux/usb/otg.h         |  7 ++++-
 12 files changed, 74 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index d5a3da37c90c..9e6209f87d3b 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -151,7 +151,8 @@ static void otg_timer(unsigned long _musb)
 	 * status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "Poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "Poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -202,7 +203,8 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
 				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -215,7 +217,8 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, starting idle timer for %u ms\n",
-	    otg_state_string(musb), jiffies_to_msecs(timeout - jiffies));
+		otg_state_string(musb->xceiv->state),
+		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
 
@@ -304,7 +307,7 @@ static irqreturn_t am35x_musb_interrupt(int irq, void *hci)
 		/* NOTE: this must complete power-on within 100 ms. */
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 8e2a1ff8a35a..e141d89656e1 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -279,12 +279,13 @@ static void musb_conn_timer_handler(unsigned long _musb)
 		}
 		break;
 	default:
-		DBG(1, "%s state not handled\n", otg_state_string(musb));
+		DBG(1, "%s state not handled\n",
+			otg_state_string(musb->xceiv->state));
 		break;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	DBG(4, "state is %s\n", otg_state_string(musb));
+	DBG(4, "state is %s\n", otg_state_string(musb->xceiv->state));
 }
 
 static void bfin_musb_enable(struct musb *musb)
@@ -308,7 +309,7 @@ static void bfin_musb_set_vbus(struct musb *musb, int is_on)
 
 	DBG(1, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 69a0da3c8f09..0d8984993a38 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -199,7 +199,8 @@ static void otg_timer(unsigned long _musb)
 	 * status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "Poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "Poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -273,7 +274,8 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
 				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -286,7 +288,8 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, starting idle timer for %u ms\n",
-	    otg_state_string(musb), jiffies_to_msecs(timeout - jiffies));
+		otg_state_string(musb->xceiv->state),
+		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
 
@@ -365,7 +368,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index e6de097fb7e8..3661b81a9571 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -220,7 +220,8 @@ static void otg_timer(unsigned long _musb)
 	* status change events (from the transceiver) otherwise.
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	DBG(7, "poll devctl %02x (%s)\n", devctl, otg_state_string(musb));
+	DBG(7, "poll devctl %02x (%s)\n", devctl,
+		otg_state_string(musb->xceiv->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->state) {
@@ -356,7 +357,7 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
 		davinci_musb_source_power(musb, drvvbus, 0);
 		DBG(2, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				err ? " ERROR" : "",
 				devctl);
 		retval = IRQ_HANDLED;
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index f10ff00ca09e..9a280872c2b4 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -333,26 +333,6 @@ void musb_load_testpacket(struct musb *musb)
 
 /*-------------------------------------------------------------------------*/
 
-const char *otg_state_string(struct musb *musb)
-{
-	switch (musb->xceiv->state) {
-	case OTG_STATE_A_IDLE:		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:	return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:	return "a_wait_bcon";
-	case OTG_STATE_A_HOST:		return "a_host";
-	case OTG_STATE_A_SUSPEND:	return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:	return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:	return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:	return "a_vbus_err";
-	case OTG_STATE_B_IDLE:		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:	return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:	return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:	return "b_wait_acon";
-	case OTG_STATE_B_HOST:		return "b_host";
-	default:			return "UNDEFINED";
-	}
-}
-
 #ifdef	CONFIG_USB_MUSB_OTG
 
 /*
@@ -373,12 +353,14 @@ void musb_otg_timer_func(unsigned long data)
 		break;
 	case OTG_STATE_A_SUSPEND:
 	case OTG_STATE_A_WAIT_BCON:
-		DBG(1, "HNP: %s timeout\n", otg_state_string(musb));
+		DBG(1, "HNP: %s timeout\n",
+			otg_state_string(musb->xceiv->state));
 		musb_platform_set_vbus(musb, 0);
 		musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
 		break;
 	default:
-		DBG(1, "HNP: Unhandled mode %s\n", otg_state_string(musb));
+		DBG(1, "HNP: Unhandled mode %s\n",
+			otg_state_string(musb->xceiv->state));
 	}
 	musb->ignore_disconnect = 0;
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -393,12 +375,13 @@ void musb_hnp_stop(struct musb *musb)
 	void __iomem	*mbase = musb->mregs;
 	u8	reg;
 
-	DBG(1, "HNP: stop from %s\n", otg_state_string(musb));
+	DBG(1, "HNP: stop from %s\n", otg_state_string(musb->xceiv->state));
 
 	switch (musb->xceiv->state) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
-		DBG(1, "HNP: back to %s\n", otg_state_string(musb));
+		DBG(1, "HNP: back to %s\n",
+			otg_state_string(musb->xceiv->state));
 		break;
 	case OTG_STATE_B_HOST:
 		DBG(1, "HNP: Disabling HR\n");
@@ -412,7 +395,7 @@ void musb_hnp_stop(struct musb *musb)
 		break;
 	default:
 		DBG(1, "HNP: Stopping in unknown state %s\n",
-			otg_state_string(musb));
+			otg_state_string(musb->xceiv->state));
 	}
 
 	/*
@@ -451,7 +434,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 	 */
 	if (int_usb & MUSB_INTR_RESUME) {
 		handled = IRQ_HANDLED;
-		DBG(3, "RESUME (%s)\n", otg_state_string(musb));
+		DBG(3, "RESUME (%s)\n", otg_state_string(musb->xceiv->state));
 
 		if (devctl & MUSB_DEVCTL_HM) {
 #ifdef CONFIG_USB_MUSB_HDRC_HCD
@@ -492,7 +475,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"host",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 #endif
 		} else {
@@ -526,7 +509,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"peripheral",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 		}
 	}
@@ -542,7 +525,8 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			return IRQ_HANDLED;
 		}
 
-		DBG(1, "SESSION_REQUEST (%s)\n", otg_state_string(musb));
+		DBG(1, "SESSION_REQUEST (%s)\n",
+			otg_state_string(musb->xceiv->state));
 
 		/* IRQ arrives from ID pin sense or (later, if VBUS power
 		 * is removed) SRP.  responses are time critical:
@@ -607,7 +591,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		}
 
 		DBG(1, "VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				devctl,
 				({ char *s;
 				switch (devctl & MUSB_DEVCTL_VBUS) {
@@ -633,7 +617,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 #endif
 	if (int_usb & MUSB_INTR_SUSPEND) {
 		DBG(1, "SUSPEND (%s) devctl %02x power %02x\n",
-				otg_state_string(musb), devctl, power);
+			otg_state_string(musb->xceiv->state), devctl, power);
 		handled = IRQ_HANDLED;
 
 		switch (musb->xceiv->state) {
@@ -758,13 +742,13 @@ b_host:
 			usb_hcd_resume_root_hub(hcd);
 
 		DBG(1, "CONNECT (%s) devctl %02x\n",
-				otg_state_string(musb), devctl);
+				otg_state_string(musb->xceiv->state), devctl);
 	}
 #endif	/* CONFIG_USB_MUSB_HDRC_HCD */
 
 	if ((int_usb & MUSB_INTR_DISCONNECT) && !musb->ignore_disconnect) {
 		DBG(1, "DISCONNECT (%s) as %s, devctl %02x\n",
-				otg_state_string(musb),
+				otg_state_string(musb->xceiv->state),
 				MUSB_MODE(musb), devctl);
 		handled = IRQ_HANDLED;
 
@@ -807,7 +791,7 @@ b_host:
 #endif	/* GADGET */
 		default:
 			WARNING("unhandled DISCONNECT transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 			break;
 		}
 	}
@@ -832,7 +816,8 @@ b_host:
 				musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 			}
 		} else if (is_peripheral_capable()) {
-			DBG(1, "BUS RESET as %s\n", otg_state_string(musb));
+			DBG(1, "BUS RESET as %s\n",
+				otg_state_string(musb->xceiv->state));
 			switch (musb->xceiv->state) {
 #ifdef CONFIG_USB_OTG
 			case OTG_STATE_A_SUSPEND:
@@ -846,8 +831,8 @@ b_host:
 			case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
 				/* never use invalid T(a_wait_bcon) */
 				DBG(1, "HNP: in %s, %d msec timeout\n",
-						otg_state_string(musb),
-						TA_WAIT_BCON(musb));
+					otg_state_string(musb->xceiv->state),
+					TA_WAIT_BCON(musb));
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
 				break;
@@ -858,7 +843,7 @@ b_host:
 				break;
 			case OTG_STATE_B_WAIT_ACON:
 				DBG(1, "HNP: RESET (%s), to b_peripheral\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 				musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
 				musb_g_reset(musb);
 				break;
@@ -871,7 +856,7 @@ b_host:
 				break;
 			default:
 				DBG(1, "Unhandled BUS RESET as %s\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 			}
 		}
 	}
@@ -1713,7 +1698,7 @@ musb_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	ret = sprintf(buf, "%s\n", otg_state_string(musb));
+	ret = sprintf(buf, "%s\n", otg_state_string(musb->xceiv->state));
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/musb/musb_debug.h b/drivers/usb/musb/musb_debug.h
index 94f6973cf8f7..f958a49a0124 100644
--- a/drivers/usb/musb/musb_debug.h
+++ b/drivers/usb/musb/musb_debug.h
@@ -54,8 +54,6 @@ static inline int _dbg_level(unsigned l)
 	return musb_debug >= l;
 }
 
-extern const char *otg_state_string(struct musb *);
-
 #ifdef CONFIG_DEBUG_FS
 extern int musb_init_debugfs(struct musb *musb);
 extern void musb_exit_debugfs(struct musb *musb);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6dfbf9ffd7a6..3d799195c8b4 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1556,7 +1556,8 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 		status = 0;
 		goto done;
 	default:
-		DBG(2, "Unhandled wake: %s\n", otg_state_string(musb));
+		DBG(2, "Unhandled wake: %s\n",
+			otg_state_string(musb->xceiv->state));
 		goto done;
 	}
 
@@ -2039,7 +2040,7 @@ void musb_g_resume(struct musb *musb)
 		break;
 	default:
 		WARNING("unhandled RESUME transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 	}
 }
 
@@ -2069,7 +2070,7 @@ void musb_g_suspend(struct musb *musb)
 		 * A_PERIPHERAL may need care too
 		 */
 		WARNING("unhandled SUSPEND transition (%s)\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 	}
 }
 
@@ -2104,7 +2105,7 @@ void musb_g_disconnect(struct musb *musb)
 	default:
 #ifdef	CONFIG_USB_MUSB_OTG
 		DBG(2, "Unhandled disconnect %s, setting a_idle\n",
-			otg_state_string(musb));
+			otg_state_string(musb->xceiv->state));
 		musb->xceiv->state = OTG_STATE_A_IDLE;
 		MUSB_HST_MODE(musb);
 		break;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 5eef4a8847db..07b106c7de82 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2304,7 +2304,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 
 	if (musb->is_active) {
 		WARNING("trying to suspend as %s while active\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 		return -EBUSY;
 	} else
 		return 0;
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 489104a5ae14..bb1247d5fd04 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -98,7 +98,7 @@ static void musb_port_suspend(struct musb *musb, bool do_suspend)
 #endif
 		default:
 			DBG(1, "bogus rh suspend? %s\n",
-				otg_state_string(musb));
+				otg_state_string(musb->xceiv->state));
 		}
 	} else if (power & MUSB_POWER_SUSPENDM) {
 		power &= ~MUSB_POWER_SUSPENDM;
@@ -208,7 +208,8 @@ void musb_root_disconnect(struct musb *musb)
 		musb->xceiv->state = OTG_STATE_B_IDLE;
 		break;
 	default:
-		DBG(1, "host disconnect (%s)\n", otg_state_string(musb));
+		DBG(1, "host disconnect (%s)\n",
+			otg_state_string(musb->xceiv->state));
 	}
 }
 
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 57a27fa954b4..d51b15adc0b0 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -114,7 +114,8 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
 			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -131,7 +132,7 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, for idle timer for %lu ms\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -195,7 +196,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 
 	DBG(1, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index c47aac4a1f98..221feaaded72 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -422,7 +422,7 @@ static void musb_do_idle(unsigned long _musb)
 			&& (musb->idle_timeout == 0
 				|| time_after(jiffies, musb->idle_timeout))) {
 			DBG(4, "Nothing connected %s, turning off VBUS\n",
-					otg_state_string(musb));
+					otg_state_string(musb->xceiv->state));
 		}
 		/* FALLTHROUGH */
 	case OTG_STATE_A_IDLE:
@@ -481,7 +481,8 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
 			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
-		DBG(4, "%s active, deleting timer\n", otg_state_string(musb));
+		DBG(4, "%s active, deleting timer\n",
+			otg_state_string(musb->xceiv->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -498,7 +499,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	DBG(4, "%s inactive, for idle timer for %lu ms\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -573,7 +574,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
 
 	DBG(1, "VBUS %s, devctl %02x otg %3x conf %08x prcm %08x\n",
-		otg_state_string(musb),
+		otg_state_string(musb->xceiv->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL),
 		musb_readl(tbase, TUSB_DEV_OTG_STAT),
 		conf, prcm);
@@ -702,13 +703,13 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 				musb->is_active = 0;
 			}
 			DBG(2, "vbus change, %s, otg %03x\n",
-				otg_state_string(musb), otg_stat);
+				otg_state_string(musb->xceiv->state), otg_stat);
 			idle_timeout = jiffies + (1 * HZ);
 			schedule_work(&musb->irq_work);
 
 		} else /* A-dev state machine */ {
 			DBG(2, "vbus change, %s, otg %03x\n",
-				otg_state_string(musb), otg_stat);
+				otg_state_string(musb->xceiv->state), otg_stat);
 
 			switch (musb->xceiv->state) {
 			case OTG_STATE_A_IDLE:
@@ -756,7 +757,8 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 	if (int_src & TUSB_INT_SRC_OTG_TIMEOUT) {
 		u8	devctl;
 
-		DBG(4, "%s timer, %03x\n", otg_state_string(musb), otg_stat);
+		DBG(4, "%s timer, %03x\n",
+			otg_state_string(musb->xceiv->state), otg_stat);
 
 		switch (musb->xceiv->state) {
 		case OTG_STATE_A_WAIT_VRISE:
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index bc84858b3a4d..d87f44f5b04e 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -168,6 +168,7 @@ otg_shutdown(struct otg_transceiver *otg)
 #ifdef CONFIG_USB_OTG_UTILS
 extern struct otg_transceiver *otg_get_transceiver(void);
 extern void otg_put_transceiver(struct otg_transceiver *);
+extern const char *otg_state_string(enum usb_otg_state state);
 #else
 static inline struct otg_transceiver *otg_get_transceiver(void)
 {
@@ -177,6 +178,11 @@ static inline struct otg_transceiver *otg_get_transceiver(void)
 static inline void otg_put_transceiver(struct otg_transceiver *x)
 {
 }
+
+static inline const char *otg_state_string(enum usb_otg_state state)
+{
+	return NULL;
+}
 #endif
 
 /* Context: can sleep */
@@ -246,6 +252,5 @@ otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-extern const char *otg_state_string(enum usb_otg_state state);
 
 #endif /* __LINUX_USB_OTG_H */
-- 
cgit v1.2.3


From 0f73cac8e41723d600c91a0f5b481dc3202f4f82 Mon Sep 17 00:00:00 2001
From: Anji jonnala <anjir@codeaurora.org>
Date: Wed, 4 May 2011 10:19:46 +0530
Subject: USB: OTG: msm: vote for dayatona fabric clock

HSUSB core clock is derived from daytona fabric clock and for
HSUSB operational require minimum core clock at 55MHz. Since, HSUSB
cannot tolerate daytona fabric clock change in the middle of HSUSB
operational, vote for maximum Daytona fabric clock
while usb is operational

Signed-off-by: Anji jonnala <anjir@codeaurora.org>
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/msm_otg.c     | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/usb/msm_hsusb.h |  7 +++++--
 2 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index f58b7dab75aa..7792fef0be5e 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -324,6 +324,9 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	if (motg->core_clk)
 		clk_disable(motg->core_clk);
 
+	if (!IS_ERR(motg->pclk_src))
+		clk_disable(motg->pclk_src);
+
 	if (device_may_wakeup(otg->dev))
 		enable_irq_wake(motg->irq);
 	if (bus)
@@ -347,6 +350,9 @@ static int msm_otg_resume(struct msm_otg *motg)
 	if (!atomic_read(&motg->in_lpm))
 		return 0;
 
+	if (!IS_ERR(motg->pclk_src))
+		clk_enable(motg->pclk_src);
+
 	clk_enable(motg->pclk);
 	clk_enable(motg->clk);
 	if (motg->core_clk)
@@ -862,12 +868,31 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 		ret = PTR_ERR(motg->clk);
 		goto put_phy_reset_clk;
 	}
+	clk_set_rate(motg->clk, 60000000);
+
+	/*
+	 * If USB Core is running its protocol engine based on CORE CLK,
+	 * CORE CLK  must be running at >55Mhz for correct HSUSB
+	 * operation and USB core cannot tolerate frequency changes on
+	 * CORE CLK. For such USB cores, vote for maximum clk frequency
+	 * on pclk source
+	 */
+	 if (motg->pdata->pclk_src_name) {
+		motg->pclk_src = clk_get(&pdev->dev,
+			motg->pdata->pclk_src_name);
+		if (IS_ERR(motg->pclk_src))
+			goto put_clk;
+		clk_set_rate(motg->pclk_src, INT_MAX);
+		clk_enable(motg->pclk_src);
+	} else
+		motg->pclk_src = ERR_PTR(-ENOENT);
+
 
 	motg->pclk = clk_get(&pdev->dev, "usb_hs_pclk");
 	if (IS_ERR(motg->pclk)) {
 		dev_err(&pdev->dev, "failed to get usb_hs_pclk\n");
 		ret = PTR_ERR(motg->pclk);
-		goto put_clk;
+		goto put_pclk_src;
 	}
 
 	/*
@@ -955,6 +980,11 @@ put_core_clk:
 	if (motg->core_clk)
 		clk_put(motg->core_clk);
 	clk_put(motg->pclk);
+put_pclk_src:
+	if (!IS_ERR(motg->pclk_src)) {
+		clk_disable(motg->pclk_src);
+		clk_put(motg->pclk_src);
+	}
 put_clk:
 	clk_put(motg->clk);
 put_phy_reset_clk:
@@ -1004,6 +1034,10 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 	clk_disable(motg->clk);
 	if (motg->core_clk)
 		clk_disable(motg->core_clk);
+	if (!IS_ERR(motg->pclk_src)) {
+		clk_disable(motg->pclk_src);
+		clk_put(motg->pclk_src);
+	}
 
 	iounmap(motg->regs);
 	pm_runtime_set_suspended(&pdev->dev);
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 3657403eac18..31ef1853f93c 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -64,7 +64,8 @@ enum otg_control_type {
  * @otg_control: OTG switch controlled by user/Id pin
  * @default_mode: Default operational mode. Applicable only if
  *              OTG switch is controller by user.
- *
+ * @pclk_src_name: pclk is derived from ebi1_usb_clk in case of 7x27 and 8k
+ *              dfab_usb_hs_clk in case of 8660 and 8960.
  */
 struct msm_otg_platform_data {
 	int *phy_init_seq;
@@ -74,6 +75,7 @@ struct msm_otg_platform_data {
 	enum otg_control_type otg_control;
 	enum usb_mode_type default_mode;
 	void (*setup_gpio)(enum usb_otg_state state);
+	char *pclk_src_name;
 };
 
 /**
@@ -83,6 +85,7 @@ struct msm_otg_platform_data {
  * @irq: IRQ number assigned for HSUSB controller.
  * @clk: clock struct of usb_hs_clk.
  * @pclk: clock struct of usb_hs_pclk.
+ * @pclk_src: pclk source for voting.
  * @phy_reset_clk: clock struct of usb_phy_clk.
  * @core_clk: clock struct of usb_hs_core_clk.
  * @regs: ioremapped register base address.
@@ -90,7 +93,6 @@ struct msm_otg_platform_data {
  * @sm_work: OTG state machine work.
  * @in_lpm: indicates low power mode (LPM) state.
  * @async_int: Async interrupt arrived.
- *
  */
 struct msm_otg {
 	struct otg_transceiver otg;
@@ -98,6 +100,7 @@ struct msm_otg {
 	int irq;
 	struct clk *clk;
 	struct clk *pclk;
+	struct clk *pclk_src;
 	struct clk *phy_reset_clk;
 	struct clk *core_clk;
 	void __iomem *regs;
-- 
cgit v1.2.3


From d860852e087eed7eadbea64f1a8db9a231c5e9b3 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Wed, 4 May 2011 10:19:47 +0530
Subject: USB: OTG: msm: Implement charger detection

Implement good battery algorithm defined in the battery charging V1.2 spec
for detecting different charging ports.  USB hardware is put into low power
mode when connected to a dedicated charging port.  vbus_draw and set_power
methods are implemented for determining the allowed current from Host in
different states (un-configured/suspend/configured).

The charger block is implemented using vendor specific registers and the
PHY used in MSM8960(28nm PHY) different from older targets like MSM8x60
and MSM7x30(45nm PHY).  The PHY vendor and product id registers are not
implemented in the above chipsets.  Hence PHY type is passed via platform
data.

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/ci13xxx_udc.c |  10 ++
 drivers/usb/otg/msm_otg.c        | 380 ++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/msm_hsusb.h    |  72 +++++++-
 3 files changed, 457 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/ci13xxx_udc.c b/drivers/usb/gadget/ci13xxx_udc.c
index 68123ee01392..baaf87ed7685 100644
--- a/drivers/usb/gadget/ci13xxx_udc.c
+++ b/drivers/usb/gadget/ci13xxx_udc.c
@@ -2506,6 +2506,15 @@ out:
 	return ret;
 }
 
+static int ci13xxx_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
+{
+	struct ci13xxx *udc = container_of(_gadget, struct ci13xxx, gadget);
+
+	if (udc->transceiver)
+		return otg_set_power(udc->transceiver, mA);
+	return -ENOTSUPP;
+}
+
 /**
  * Device operations part of the API to the USB controller hardware,
  * which don't involve endpoints (or i/o)
@@ -2514,6 +2523,7 @@ out:
 static const struct usb_gadget_ops usb_gadget_ops = {
 	.vbus_session	= ci13xxx_vbus_session,
 	.wakeup		= ci13xxx_wakeup,
+	.vbus_draw	= ci13xxx_vbus_draw,
 };
 
 /**
diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index 7792fef0be5e..854b7e3413dd 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -409,6 +409,33 @@ skip_phy_resume:
 }
 #endif
 
+static void msm_otg_notify_charger(struct msm_otg *motg, unsigned mA)
+{
+	if (motg->cur_power == mA)
+		return;
+
+	/* TODO: Notify PMIC about available current */
+	dev_info(motg->otg.dev, "Avail curr from USB = %u\n", mA);
+	motg->cur_power = mA;
+}
+
+static int msm_otg_set_power(struct otg_transceiver *otg, unsigned mA)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+
+	/*
+	 * Gadget driver uses set_power method to notify about the
+	 * available current based on suspend/configured states.
+	 *
+	 * IDEV_CHG can be drawn irrespective of suspend/un-configured
+	 * states when CDP/ACA is connected.
+	 */
+	if (motg->chg_type == USB_SDP_CHARGER)
+		msm_otg_notify_charger(motg, mA);
+
+	return 0;
+}
+
 static void msm_otg_start_host(struct otg_transceiver *otg, int on)
 {
 	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
@@ -563,6 +590,306 @@ static int msm_otg_set_peripheral(struct otg_transceiver *otg,
 	return 0;
 }
 
+static bool msm_chg_check_secondary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		ret = chg_det & (1 << 4);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x87);
+		ret = chg_det & 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_enable_secondary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn off charger block */
+		chg_det |= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		/* control chg block via ULPI */
+		chg_det &= ~(1 << 3);
+		ulpi_write(otg, chg_det, 0x34);
+		/* put it in host mode for enabling D- source */
+		chg_det &= ~(1 << 2);
+		ulpi_write(otg, chg_det, 0x34);
+		/* Turn on chg detect block */
+		chg_det &= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		/* enable chg detection */
+		chg_det &= ~(1 << 0);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/*
+		 * Configure DM as current source, DP as current sink
+		 * and enable battery charging comparators.
+		 */
+		ulpi_write(otg, 0x8, 0x85);
+		ulpi_write(otg, 0x2, 0x85);
+		ulpi_write(otg, 0x1, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool msm_chg_check_primary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		ret = chg_det & (1 << 4);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x87);
+		ret = chg_det & 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_enable_primary_det(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* enable chg detection */
+		chg_det &= ~(1 << 0);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/*
+		 * Configure DP as current source, DM as current sink
+		 * and enable battery charging comparators.
+		 */
+		ulpi_write(otg, 0x2, 0x85);
+		ulpi_write(otg, 0x1, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool msm_chg_check_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 line_state;
+	bool ret = false;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		line_state = ulpi_read(otg, 0x15);
+		ret = !(line_state & 1);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		line_state = ulpi_read(otg, 0x87);
+		ret = line_state & 2;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void msm_chg_disable_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		chg_det &= ~(1 << 5);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		ulpi_write(otg, 0x10, 0x86);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_enable_dcd(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn on D+ current source */
+		chg_det |= (1 << 5);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Data contact detection enable */
+		ulpi_write(otg, 0x10, 0x85);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_block_on(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 func_ctrl, chg_det;
+
+	/* put the controller in non-driving mode */
+	func_ctrl = ulpi_read(otg, ULPI_FUNC_CTRL);
+	func_ctrl &= ~ULPI_FUNC_CTRL_OPMODE_MASK;
+	func_ctrl |= ULPI_FUNC_CTRL_OPMODE_NONDRIVING;
+	ulpi_write(otg, func_ctrl, ULPI_FUNC_CTRL);
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* control chg block via ULPI */
+		chg_det &= ~(1 << 3);
+		ulpi_write(otg, chg_det, 0x34);
+		/* Turn on chg detect block */
+		chg_det &= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		udelay(20);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Clear charger detecting control bits */
+		ulpi_write(otg, 0x3F, 0x86);
+		/* Clear alt interrupt latch and enable bits */
+		ulpi_write(otg, 0x1F, 0x92);
+		ulpi_write(otg, 0x1F, 0x95);
+		udelay(100);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_chg_block_off(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	u32 func_ctrl, chg_det;
+
+	switch (motg->pdata->phy_type) {
+	case CI_45NM_INTEGRATED_PHY:
+		chg_det = ulpi_read(otg, 0x34);
+		/* Turn off charger block */
+		chg_det |= ~(1 << 1);
+		ulpi_write(otg, chg_det, 0x34);
+		break;
+	case SNPS_28NM_INTEGRATED_PHY:
+		/* Clear charger detecting control bits */
+		ulpi_write(otg, 0x3F, 0x86);
+		/* Clear alt interrupt latch and enable bits */
+		ulpi_write(otg, 0x1F, 0x92);
+		ulpi_write(otg, 0x1F, 0x95);
+		break;
+	default:
+		break;
+	}
+
+	/* put the controller in normal mode */
+	func_ctrl = ulpi_read(otg, ULPI_FUNC_CTRL);
+	func_ctrl &= ~ULPI_FUNC_CTRL_OPMODE_MASK;
+	func_ctrl |= ULPI_FUNC_CTRL_OPMODE_NORMAL;
+	ulpi_write(otg, func_ctrl, ULPI_FUNC_CTRL);
+}
+
+#define MSM_CHG_DCD_POLL_TIME		(100 * HZ/1000) /* 100 msec */
+#define MSM_CHG_DCD_MAX_RETRIES		6 /* Tdcd_tmout = 6 * 100 msec */
+#define MSM_CHG_PRIMARY_DET_TIME	(40 * HZ/1000) /* TVDPSRC_ON */
+#define MSM_CHG_SECONDARY_DET_TIME	(40 * HZ/1000) /* TVDMSRC_ON */
+static void msm_chg_detect_work(struct work_struct *w)
+{
+	struct msm_otg *motg = container_of(w, struct msm_otg, chg_work.work);
+	struct otg_transceiver *otg = &motg->otg;
+	bool is_dcd, tmout, vout;
+	unsigned long delay;
+
+	dev_dbg(otg->dev, "chg detection work\n");
+	switch (motg->chg_state) {
+	case USB_CHG_STATE_UNDEFINED:
+		pm_runtime_get_sync(otg->dev);
+		msm_chg_block_on(motg);
+		msm_chg_enable_dcd(motg);
+		motg->chg_state = USB_CHG_STATE_WAIT_FOR_DCD;
+		motg->dcd_retries = 0;
+		delay = MSM_CHG_DCD_POLL_TIME;
+		break;
+	case USB_CHG_STATE_WAIT_FOR_DCD:
+		is_dcd = msm_chg_check_dcd(motg);
+		tmout = ++motg->dcd_retries == MSM_CHG_DCD_MAX_RETRIES;
+		if (is_dcd || tmout) {
+			msm_chg_disable_dcd(motg);
+			msm_chg_enable_primary_det(motg);
+			delay = MSM_CHG_PRIMARY_DET_TIME;
+			motg->chg_state = USB_CHG_STATE_DCD_DONE;
+		} else {
+			delay = MSM_CHG_DCD_POLL_TIME;
+		}
+		break;
+	case USB_CHG_STATE_DCD_DONE:
+		vout = msm_chg_check_primary_det(motg);
+		if (vout) {
+			msm_chg_enable_secondary_det(motg);
+			delay = MSM_CHG_SECONDARY_DET_TIME;
+			motg->chg_state = USB_CHG_STATE_PRIMARY_DONE;
+		} else {
+			motg->chg_type = USB_SDP_CHARGER;
+			motg->chg_state = USB_CHG_STATE_DETECTED;
+			delay = 0;
+		}
+		break;
+	case USB_CHG_STATE_PRIMARY_DONE:
+		vout = msm_chg_check_secondary_det(motg);
+		if (vout)
+			motg->chg_type = USB_DCP_CHARGER;
+		else
+			motg->chg_type = USB_CDP_CHARGER;
+		motg->chg_state = USB_CHG_STATE_SECONDARY_DONE;
+		/* fall through */
+	case USB_CHG_STATE_SECONDARY_DONE:
+		motg->chg_state = USB_CHG_STATE_DETECTED;
+	case USB_CHG_STATE_DETECTED:
+		msm_chg_block_off(motg);
+		dev_dbg(otg->dev, "charger = %d\n", motg->chg_type);
+		schedule_work(&motg->sm_work);
+		return;
+	default:
+		return;
+	}
+
+	schedule_delayed_work(&motg->chg_work, delay);
+}
+
 /*
  * We support OTG, Peripheral only and Host only configurations. In case
  * of OTG, mode switch (host-->peripheral/peripheral-->host) can happen
@@ -633,9 +960,48 @@ static void msm_otg_sm_work(struct work_struct *w)
 			writel(readl(USB_OTGSC) & ~OTGSC_BSVIE, USB_OTGSC);
 			msm_otg_start_host(otg, 1);
 			otg->state = OTG_STATE_A_HOST;
-		} else if (test_bit(B_SESS_VLD, &motg->inputs) && otg->gadget) {
-			msm_otg_start_peripheral(otg, 1);
-			otg->state = OTG_STATE_B_PERIPHERAL;
+		} else if (test_bit(B_SESS_VLD, &motg->inputs)) {
+			switch (motg->chg_state) {
+			case USB_CHG_STATE_UNDEFINED:
+				msm_chg_detect_work(&motg->chg_work.work);
+				break;
+			case USB_CHG_STATE_DETECTED:
+				switch (motg->chg_type) {
+				case USB_DCP_CHARGER:
+					msm_otg_notify_charger(motg,
+							IDEV_CHG_MAX);
+					break;
+				case USB_CDP_CHARGER:
+					msm_otg_notify_charger(motg,
+							IDEV_CHG_MAX);
+					msm_otg_start_peripheral(otg, 1);
+					otg->state = OTG_STATE_B_PERIPHERAL;
+					break;
+				case USB_SDP_CHARGER:
+					msm_otg_notify_charger(motg, IUNIT);
+					msm_otg_start_peripheral(otg, 1);
+					otg->state = OTG_STATE_B_PERIPHERAL;
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				break;
+			}
+		} else {
+			/*
+			 * If charger detection work is pending, decrement
+			 * the pm usage counter to balance with the one that
+			 * is incremented in charger detection work.
+			 */
+			if (cancel_delayed_work_sync(&motg->chg_work)) {
+				pm_runtime_put_sync(otg->dev);
+				msm_otg_reset(otg);
+			}
+			msm_otg_notify_charger(motg, 0);
+			motg->chg_state = USB_CHG_STATE_UNDEFINED;
+			motg->chg_type = USB_INVALID_CHARGER;
 		}
 		pm_runtime_put_sync(otg->dev);
 		break;
@@ -643,7 +1009,10 @@ static void msm_otg_sm_work(struct work_struct *w)
 		dev_dbg(otg->dev, "OTG_STATE_B_PERIPHERAL state\n");
 		if (!test_bit(B_SESS_VLD, &motg->inputs) ||
 				!test_bit(ID, &motg->inputs)) {
+			msm_otg_notify_charger(motg, 0);
 			msm_otg_start_peripheral(otg, 0);
+			motg->chg_state = USB_CHG_STATE_UNDEFINED;
+			motg->chg_type = USB_INVALID_CHARGER;
 			otg->state = OTG_STATE_B_IDLE;
 			msm_otg_reset(otg);
 			schedule_work(w);
@@ -935,6 +1304,7 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 	writel(0, USB_OTGSC);
 
 	INIT_WORK(&motg->sm_work, msm_otg_sm_work);
+	INIT_DELAYED_WORK(&motg->chg_work, msm_chg_detect_work);
 	ret = request_irq(motg->irq, msm_otg_irq, IRQF_SHARED,
 					"msm_otg", motg);
 	if (ret) {
@@ -945,6 +1315,7 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 	otg->init = msm_otg_reset;
 	otg->set_host = msm_otg_set_host;
 	otg->set_peripheral = msm_otg_set_peripheral;
+	otg->set_power = msm_otg_set_power;
 
 	otg->io_ops = &msm_otg_io_ops;
 
@@ -1004,6 +1375,7 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 		return -EBUSY;
 
 	msm_otg_debugfs_cleanup();
+	cancel_delayed_work_sync(&motg->chg_work);
 	cancel_work_sync(&motg->sm_work);
 
 	pm_runtime_resume(&pdev->dev);
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 31ef1853f93c..00311fe9d0df 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2008 Google, Inc.
  * Author: Brian Swetland <swetland@google.com>
- * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -53,6 +53,64 @@ enum otg_control_type {
 	OTG_USER_CONTROL,
 };
 
+/**
+ * PHY used in
+ *
+ * INVALID_PHY			Unsupported PHY
+ * CI_45NM_INTEGRATED_PHY	Chipidea 45nm integrated PHY
+ * SNPS_28NM_INTEGRATED_PHY	Synopsis 28nm integrated PHY
+ *
+ */
+enum msm_usb_phy_type {
+	INVALID_PHY = 0,
+	CI_45NM_INTEGRATED_PHY,
+	SNPS_28NM_INTEGRATED_PHY,
+};
+
+#define IDEV_CHG_MAX	1500
+#define IUNIT		100
+
+/**
+ * Different states involved in USB charger detection.
+ *
+ * USB_CHG_STATE_UNDEFINED	USB charger is not connected or detection
+ *                              process is not yet started.
+ * USB_CHG_STATE_WAIT_FOR_DCD	Waiting for Data pins contact.
+ * USB_CHG_STATE_DCD_DONE	Data pin contact is detected.
+ * USB_CHG_STATE_PRIMARY_DONE	Primary detection is completed (Detects
+ *                              between SDP and DCP/CDP).
+ * USB_CHG_STATE_SECONDARY_DONE	Secondary detection is completed (Detects
+ *                              between DCP and CDP).
+ * USB_CHG_STATE_DETECTED	USB charger type is determined.
+ *
+ */
+enum usb_chg_state {
+	USB_CHG_STATE_UNDEFINED = 0,
+	USB_CHG_STATE_WAIT_FOR_DCD,
+	USB_CHG_STATE_DCD_DONE,
+	USB_CHG_STATE_PRIMARY_DONE,
+	USB_CHG_STATE_SECONDARY_DONE,
+	USB_CHG_STATE_DETECTED,
+};
+
+/**
+ * USB charger types
+ *
+ * USB_INVALID_CHARGER	Invalid USB charger.
+ * USB_SDP_CHARGER	Standard downstream port. Refers to a downstream port
+ *                      on USB2.0 compliant host/hub.
+ * USB_DCP_CHARGER	Dedicated charger port (AC charger/ Wall charger).
+ * USB_CDP_CHARGER	Charging downstream port. Enumeration can happen and
+ *                      IDEV_CHG_MAX can be drawn irrespective of USB state.
+ *
+ */
+enum usb_chg_type {
+	USB_INVALID_CHARGER = 0,
+	USB_SDP_CHARGER,
+	USB_DCP_CHARGER,
+	USB_CDP_CHARGER,
+};
+
 /**
  * struct msm_otg_platform_data - platform device data
  *              for msm_otg driver.
@@ -74,6 +132,7 @@ struct msm_otg_platform_data {
 	enum usb_mode_type mode;
 	enum otg_control_type otg_control;
 	enum usb_mode_type default_mode;
+	enum msm_usb_phy_type phy_type;
 	void (*setup_gpio)(enum usb_otg_state state);
 	char *pclk_src_name;
 };
@@ -93,6 +152,12 @@ struct msm_otg_platform_data {
  * @sm_work: OTG state machine work.
  * @in_lpm: indicates low power mode (LPM) state.
  * @async_int: Async interrupt arrived.
+ * @cur_power: The amount of mA available from downstream port.
+ * @chg_work: Charger detection work.
+ * @chg_state: The state of charger detection process.
+ * @chg_type: The type of charger attached.
+ * @dcd_retires: The retry count used to track Data contact
+ *               detection process.
  */
 struct msm_otg {
 	struct otg_transceiver otg;
@@ -110,6 +175,11 @@ struct msm_otg {
 	struct work_struct sm_work;
 	atomic_t in_lpm;
 	int async_int;
+	unsigned cur_power;
+	struct delayed_work chg_work;
+	enum usb_chg_state chg_state;
+	enum usb_chg_type chg_type;
+	u8 dcd_retries;
 };
 
 #endif
-- 
cgit v1.2.3


From 04aebcbb1b6dccadc8862b2765265f65a946db57 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Wed, 4 May 2011 10:19:49 +0530
Subject: USB: OTG: msm: Add PHY suspend support for MSM8960

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/msm_otg.c        | 64 +++++++++++++++++++++++++++++++++-------
 include/linux/usb/msm_hsusb_hw.h |  2 ++
 2 files changed, 56 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/msm_otg.c b/drivers/usb/otg/msm_otg.c
index 628ba7d943da..1cdda6c05238 100644
--- a/drivers/usb/otg/msm_otg.c
+++ b/drivers/usb/otg/msm_otg.c
@@ -163,6 +163,32 @@ put_3p3:
 	return rc;
 }
 
+#ifdef CONFIG_PM_SLEEP
+#define USB_PHY_SUSP_DIG_VOL  500000
+static int msm_hsusb_config_vddcx(int high)
+{
+	int max_vol = USB_PHY_VDD_DIG_VOL_MAX;
+	int min_vol;
+	int ret;
+
+	if (high)
+		min_vol = USB_PHY_VDD_DIG_VOL_MIN;
+	else
+		min_vol = USB_PHY_SUSP_DIG_VOL;
+
+	ret = regulator_set_voltage(hsusb_vddcx, min_vol, max_vol);
+	if (ret) {
+		pr_err("%s: unable to set the voltage for regulator "
+			"HSUSB_VDDCX\n", __func__);
+		return ret;
+	}
+
+	pr_debug("%s: min_vol:%d max_vol:%d\n", __func__, min_vol, max_vol);
+
+	return ret;
+}
+#endif
+
 static int msm_hsusb_ldo_set_mode(int on)
 {
 	int ret = 0;
@@ -434,27 +460,28 @@ static int msm_otg_suspend(struct msm_otg *motg)
 
 	disable_irq(motg->irq);
 	/*
+	 * Chipidea 45-nm PHY suspend sequence:
+	 *
 	 * Interrupt Latch Register auto-clear feature is not present
 	 * in all PHY versions. Latch register is clear on read type.
 	 * Clear latch register to avoid spurious wakeup from
 	 * low power mode (LPM).
-	 */
-	ulpi_read(otg, 0x14);
-
-	/*
+	 *
 	 * PHY comparators are disabled when PHY enters into low power
 	 * mode (LPM). Keep PHY comparators ON in LPM only when we expect
 	 * VBUS/Id notifications from USB PHY. Otherwise turn off USB
 	 * PHY comparators. This save significant amount of power.
-	 */
-	if (pdata->otg_control == OTG_PHY_CONTROL)
-		ulpi_write(otg, 0x01, 0x30);
-
-	/*
+	 *
 	 * PLL is not turned off when PHY enters into low power mode (LPM).
 	 * Disable PLL for maximum power savings.
 	 */
-	ulpi_write(otg, 0x08, 0x09);
+
+	if (motg->pdata->phy_type == CI_45NM_INTEGRATED_PHY) {
+		ulpi_read(otg, 0x14);
+		if (pdata->otg_control == OTG_PHY_CONTROL)
+			ulpi_write(otg, 0x01, 0x30);
+		ulpi_write(otg, 0x08, 0x09);
+	}
 
 	/*
 	 * PHY may take some time or even fail to enter into low power
@@ -485,6 +512,10 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	 */
 	writel(readl(USB_USBCMD) | ASYNC_INTR_CTRL | ULPI_STP_CTRL, USB_USBCMD);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL)
+		writel(readl(USB_PHY_CTRL) | PHY_RETEN, USB_PHY_CTRL);
+
 	clk_disable(motg->pclk);
 	clk_disable(motg->clk);
 	if (motg->core_clk)
@@ -493,6 +524,12 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	if (!IS_ERR(motg->pclk_src))
 		clk_disable(motg->pclk_src);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
+		msm_hsusb_ldo_set_mode(0);
+		msm_hsusb_config_vddcx(0);
+	}
+
 	if (device_may_wakeup(otg->dev))
 		enable_irq_wake(motg->irq);
 	if (bus)
@@ -524,6 +561,13 @@ static int msm_otg_resume(struct msm_otg *motg)
 	if (motg->core_clk)
 		clk_enable(motg->core_clk);
 
+	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
+			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
+		msm_hsusb_ldo_set_mode(1);
+		msm_hsusb_config_vddcx(1);
+		writel(readl(USB_PHY_CTRL) & ~PHY_RETEN, USB_PHY_CTRL);
+	}
+
 	temp = readl(USB_USBCMD);
 	temp &= ~ASYNC_INTR_CTRL;
 	temp &= ~ULPI_STP_CTRL;
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index 7d1babbff071..6e97a2d3d39f 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -24,6 +24,7 @@
 #define USB_PORTSC           (MSM_USB_BASE + 0x0184)
 #define USB_OTGSC            (MSM_USB_BASE + 0x01A4)
 #define USB_USBMODE          (MSM_USB_BASE + 0x01A8)
+#define USB_PHY_CTRL         (MSM_USB_BASE + 0x0240)
 
 #define USBCMD_RESET   2
 #define USB_USBINTR          (MSM_USB_BASE + 0x0148)
@@ -42,6 +43,7 @@
 
 #define ASYNC_INTR_CTRL         (1 << 29) /* Enable async interrupt */
 #define ULPI_STP_CTRL           (1 << 30) /* Block communication with PHY */
+#define PHY_RETEN               (1 << 1) /* PHY retention enable/disable */
 
 /* OTG definitions */
 #define OTGSC_INTSTS_MASK	(0x7f << 16)
-- 
cgit v1.2.3


From 23ceb5b7719e9276d4fa72a3ecf94dd396755276 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Fri, 6 May 2011 19:30:02 -0600
Subject: block: Remove extra discard_alignment from hd_struct.

Currently, hd_struct.discard_alignment is only used when we
show /sys/block/sdx/sdx/discard_alignment. So remove it and
calculate when it is asked to show.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/partitions/check.c | 9 ++++++---
 include/linux/genhd.h | 1 -
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..b7e16bccd5e5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,12 @@ ssize_t part_discard_alignment_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	return sprintf(buf, "%u\n", p->discard_alignment);
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%u\n",
+		       (unsigned long long)queue_limit_discard_alignment(
+							&disk->queue->limits,
+							p->start_sect));
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +454,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->start_sect = start;
 	p->alignment_offset =
 		queue_limit_alignment_offset(&disk->queue->limits, start);
-	p->discard_alignment =
-		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 300d7582006e..b78956b3c2e7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,7 +100,6 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
-	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v1.2.3


From 1759415e630e5db0dd2390df9f94892cbfb9a8a2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 5 May 2011 15:23:54 -0500
Subject: slub: Remove CONFIG_CMPXCHG_LOCAL ifdeffery

Remove the #ifdefs. This means that the irqsafe_cpu_cmpxchg_double() is used
everywhere.

There may be performance implications since:

A. We now have to manage a transaction ID for all arches

B. The interrupt holdoff for arches not supporting CONFIG_CMPXCHG_LOCAL is reduced
to a very short irqoff section.

There are no multiple irqoff/irqon sequences as a result of this change. Even in the fallback
case we only have to do one disable and enable like before.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h |  2 --
 mm/slub.c                | 56 ------------------------------------------------
 2 files changed, 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 45ca123e8002..ca0c076b2374 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -37,9 +37,7 @@ enum stat_item {
 
 struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to next available object */
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;	/* Globally unique transaction id */
-#endif
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
 #ifdef CONFIG_SLUB_STATS
diff --git a/mm/slub.c b/mm/slub.c
index c952fac112e8..461199f019d6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1551,7 +1551,6 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	}
 }
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 #ifdef CONFIG_PREEMPT
 /*
  * Calculate the next globally unique transaction for disambiguiation
@@ -1611,17 +1610,12 @@ static inline void note_cmpxchg_failure(const char *n,
 	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
 }
 
-#endif
-
 void init_kmem_cache_cpus(struct kmem_cache *s)
 {
-#ifdef CONFIG_CMPXCHG_LOCAL
 	int cpu;
 
 	for_each_possible_cpu(cpu)
 		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
-#endif
-
 }
 /*
  * Remove the cpu slab
@@ -1654,9 +1648,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 		page->inuse--;
 	}
 	c->page = NULL;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
-#endif
 	unfreeze_slab(s, page, tail);
 }
 
@@ -1791,7 +1783,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
 	void **object;
 	struct page *page;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -1802,7 +1793,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	 * pointer.
 	 */
 	c = this_cpu_ptr(s->cpu_slab);
-#endif
 #endif
 
 	/* We handle __GFP_ZERO in the caller */
@@ -1831,10 +1821,8 @@ load_freelist:
 
 unlock_out:
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	c->tid = next_tid(c->tid);
 	local_irq_restore(flags);
-#endif
 	stat(s, ALLOC_SLOWPATH);
 	return object;
 
@@ -1873,9 +1861,7 @@ load_from_page:
 	}
 	if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 		slab_out_of_memory(s, gfpflags, node);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return NULL;
 debug:
 	if (!alloc_debug_processing(s, page, object, addr))
@@ -1902,20 +1888,12 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 {
 	void **object;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif
 
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-#else
 redo:
-#endif
 
 	/*
 	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
@@ -1925,7 +1903,6 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 	/*
 	 * The transaction ids are globally unique per cpu and per operation on
 	 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
@@ -1934,7 +1911,6 @@ redo:
 	 */
 	tid = c->tid;
 	barrier();
-#endif
 
 	object = c->freelist;
 	if (unlikely(!object || !node_match(c, node)))
@@ -1942,7 +1918,6 @@ redo:
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-#ifdef CONFIG_CMPXCHG_LOCAL
 		/*
 		 * The cmpxchg will only match if there was no additonal
 		 * operation and if we are on the right processor.
@@ -1963,16 +1938,9 @@ redo:
 			note_cmpxchg_failure("slab_alloc", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = get_freepointer(s, object);
-#endif
 		stat(s, ALLOC_FASTPATH);
 	}
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
-
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->objsize);
 
@@ -2049,11 +2017,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 {
 	void *prior;
 	void **object = (void *)x;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long flags;
 
 	local_irq_save(flags);
-#endif
 	slab_lock(page);
 	stat(s, FREE_SLOWPATH);
 
@@ -2084,9 +2050,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 
 out_unlock:
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	return;
 
 slab_empty:
@@ -2098,9 +2062,7 @@ slab_empty:
 		stat(s, FREE_REMOVE_PARTIAL);
 	}
 	slab_unlock(page);
-#ifdef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
-#endif
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 }
@@ -2121,20 +2083,11 @@ static __always_inline void slab_free(struct kmem_cache *s,
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
-#ifdef CONFIG_CMPXCHG_LOCAL
 	unsigned long tid;
-#else
-	unsigned long flags;
-#endif
 
 	slab_free_hook(s, x);
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_save(flags);
-
-#else
 redo:
-#endif
 
 	/*
 	 * Determine the currently cpus per cpu slab.
@@ -2144,15 +2097,12 @@ redo:
 	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 	tid = c->tid;
 	barrier();
-#endif
 
 	if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
 		set_freepointer(s, object, c->freelist);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 		if (unlikely(!this_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
@@ -2161,16 +2111,10 @@ redo:
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
-#else
-		c->freelist = object;
-#endif
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);
 
-#ifndef CONFIG_CMPXCHG_LOCAL
-	local_irq_restore(flags);
-#endif
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
-- 
cgit v1.2.3


From 1ab7b6ac2709d0eb05a7144cd0e14faa3a7ea162 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 14 Apr 2011 23:46:06 -0700
Subject: ethtool: remove phys_id from ethtool_ops

After that all the upstream kernel drivers now use phys_id,
and the old ethtool_ops interface (phys_id) can be removed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 7 -------
 net/core/ethtool.c      | 6 +-----
 2 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4194a2067a14..d659fdc77eb3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -814,12 +814,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	the indicator accordingly.  Finally, it is called with the argument
  *	%ETHTOOL_ID_INACTIVE and must deactivate the indicator.  Returns a
  *	negative error code or zero.
- * @phys_id: Deprecated in favour of @set_phys_id.
- *	Identify the physical device, e.g. by flashing an LED
- *	attached to it until interrupted by a signal or the given time
- *	(in seconds) elapses.  If the given time is zero, use a default
- *	time limit.  Returns a negative error code or zero.  Being
- *	interrupted by a signal is not an error.
  * @get_ethtool_stats: Return extended statistics about the device.
  *	This is only useful if the device maintains statistics not
  *	included in &struct rtnl_link_stats64.
@@ -908,7 +902,6 @@ struct ethtool_ops {
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
-	int	(*phys_id)(struct net_device *, u32);
 	void	(*get_ethtool_stats)(struct net_device *,
 				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8b1a8d85a96..927819d92248 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1655,7 +1655,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	static bool busy;
 	int rc;
 
-	if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id)
+	if (!dev->ethtool_ops->set_phys_id)
 		return -EOPNOTSUPP;
 
 	if (busy)
@@ -1664,10 +1664,6 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&id, useraddr, sizeof(id)))
 		return -EFAULT;
 
-	if (!dev->ethtool_ops->set_phys_id)
-		/* Do it the old way */
-		return dev->ethtool_ops->phys_id(dev, id.data);
-
 	rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
 	if (rc < 0)
 		return rc;
-- 
cgit v1.2.3


From 57cc71bc3c0cf18bfd1b7bc8cd0eb6c303da24c5 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Mon, 2 May 2011 21:30:08 +0000
Subject: ethtool: Add 20G bit definitions

Add 20G supported and advertising bit definitions.
20G will be supported with the 57840 chips.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
------
 include/linux/ethtool.h |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d659fdc77eb3..bd0b50b85f06 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1025,6 +1025,8 @@ struct ethtool_ops {
 #define SUPPORTED_10000baseKX4_Full	(1 << 18)
 #define SUPPORTED_10000baseKR_Full	(1 << 19)
 #define SUPPORTED_10000baseR_FEC	(1 << 20)
+#define SUPPORTED_20000baseMLD2_Full	(1 << 21)
+#define SUPPORTED_20000baseKR2_Full	(1 << 22)
 
 /* Indicates what features are advertised by the interface. */
 #define ADVERTISED_10baseT_Half		(1 << 0)
@@ -1048,6 +1050,8 @@ struct ethtool_ops {
 #define ADVERTISED_10000baseKX4_Full	(1 << 18)
 #define ADVERTISED_10000baseKR_Full	(1 << 19)
 #define ADVERTISED_10000baseR_FEC	(1 << 20)
+#define ADVERTISED_20000baseMLD2_Full	(1 << 21)
+#define ADVERTISED_20000baseKR2_Full	(1 << 22)
 
 /* The following are all involved in forcing a particular link
  * mode for the device for setting things.  When getting the
-- 
cgit v1.2.3


From eed2a12f1ed9aabf0676f4d0db34aad51976c5c6 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 4 May 2011 15:30:11 +0000
Subject: net: Allow ethtool to set interface in loopback mode.

This patch enables ethtool to set the loopback mode on a given interface.
By configuring the interface in loopback mode in conjunction with a policy
route / rule, a userland application can stress the egress / ingress path
exposing the flows of the change in progress and potentially help developer(s)
understand the impact of those changes without even sending a packet out
on the network.

Following set of commands illustrates one such example -
    a) ip -4 addr add 192.168.1.1/24 dev eth1
    b) ip -4 rule add from all iif eth1 lookup 250
    c) ip -4 route add local 0/0 dev lo proto kernel scope host table 250
    d) arp -Ds 192.168.1.100 eth1
    e) arp -Ds 192.168.1.200 eth1
    f) sysctl -w net.ipv4.ip_nonlocal_bind=1
    g) sysctl -w net.ipv4.conf.all.accept_local=1
    # Assuming that the machine has 8 cores
    h) taskset 000f netserver -L 192.168.1.200
    i) taskset 00f0 netperf -t TCP_CRR -L 192.168.1.100 -H 192.168.1.200 -l 30

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 3 ++-
 include/linux/netdevice.h | 3 ++-
 net/core/ethtool.c        | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index d70fb76edb77..4ce9e5f2c069 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -174,7 +174,8 @@ static void loopback_setup(struct net_device *dev)
 		| NETIF_F_HIGHDMA
 		| NETIF_F_LLTX
 		| NETIF_F_NETNS_LOCAL
-		| NETIF_F_VLAN_CHALLENGED;
+		| NETIF_F_VLAN_CHALLENGED
+		| NETIF_F_LOOPBACK;
 	dev->ethtool_ops	= &loopback_ethtool_ops;
 	dev->header_ops		= &eth_header_ops;
 	dev->netdev_ops		= &loopback_ops;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d5de66af46f9..e7244ed1f9a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1067,6 +1067,7 @@ struct net_device {
 #define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 #define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
 #define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
+#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -1082,7 +1083,7 @@ struct net_device {
 	/* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0x7f3fffff & ~NETIF_F_NEVER_CHANGE)
+#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
 
 	/* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 927819d92248..b6f405888538 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -362,7 +362,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
 	/* NETIF_F_RXHASH */          "rx-hashing",
 	/* NETIF_F_RXCSUM */          "rx-checksum",
 	/* NETIF_F_NOCACHE_COPY */    "tx-nocache-copy"
-	"",
+	/* NETIF_F_LOOPBACK */        "loopback",
 };
 
 static int __ethtool_get_sset_count(struct net_device *dev, int sset)
-- 
cgit v1.2.3


From 245e37182b7dead255bdc9e333e0bcc128360675 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 16:41:45 -0700
Subject: ide: Use linux/mutex.h

The IDE code is still including asm/mutex.h instead of linux/mutex.h

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 072fe8c93e6f..42557851b12e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -18,13 +18,13 @@
 #include <linux/pci.h>
 #include <linux/completion.h>
 #include <linux/pm.h>
+#include <linux/mutex.h>
 #ifdef CONFIG_BLK_DEV_IDEACPI
 #include <acpi/acpi.h>
 #endif
 #include <asm/byteorder.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/mutex.h>
 
 /* for request_sense */
 #include <linux/cdrom.h>
-- 
cgit v1.2.3


From 000703f44c77b152cd966eaf06f4ab043274ff46 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 9 May 2011 11:40:25 +1000
Subject: mxm/wmi: add MXMX interface entry point.

The MXMX method appears to be a mutex of some sort.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_acpi.c |  1 +
 drivers/platform/x86/mxm-wmi.c         | 28 +++++++++++++++++++++++++++-
 include/linux/mxm-wmi.h                |  1 +
 3 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index e0a885b72e0e..f0d459bb46e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -150,6 +150,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 
 static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
+	mxm_wmi_call_mxmx(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0);
 	mxm_wmi_call_mxds(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0);
 	return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL);
 }
diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c
index 12b6f341e72b..0aea63b3729a 100644
--- a/drivers/platform/x86/mxm-wmi.c
+++ b/drivers/platform/x86/mxm-wmi.c
@@ -32,6 +32,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("wmi:"MXM_WMMX_GUID);
 
 #define MXM_WMMX_FUNC_MXDS 0x5344584D /* "MXDS" */
+#define MXM_WMMX_FUNC_MXMX 0x53445344 /* "MXMX" */
 
 struct mxds_args {
 	u32 func;
@@ -51,7 +52,7 @@ int mxm_wmi_call_mxds(int adapter)
 	acpi_status status;
 
 	printk("calling mux switch %d\n", adapter);
-	
+
 	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input,
 				     &output);
 
@@ -64,6 +65,31 @@ int mxm_wmi_call_mxds(int adapter)
 }
 EXPORT_SYMBOL_GPL(mxm_wmi_call_mxds);
 
+int mxm_wmi_call_mxmx(int adapter)
+{
+	struct mxds_args args = {
+		.func = MXM_WMMX_FUNC_MXMX,
+		.args = 0,
+		.xarg = 1,
+	};
+	struct acpi_buffer input = { (acpi_size)sizeof(args), &args };
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+
+	printk("calling mux switch %d\n", adapter);
+
+	status = wmi_evaluate_method(MXM_WMMX_GUID, 0x1, adapter, &input,
+				     &output);
+
+	if (ACPI_FAILURE(status))
+		return status;
+
+	printk("mux mutex set switched %d\n", status);
+	return 0;
+			    
+}
+EXPORT_SYMBOL_GPL(mxm_wmi_call_mxmx);
+
 bool mxm_wmi_supported(void)
 {
 	bool guid_valid;
diff --git a/include/linux/mxm-wmi.h b/include/linux/mxm-wmi.h
index 51359c0718bf..617a2950523c 100644
--- a/include/linux/mxm-wmi.h
+++ b/include/linux/mxm-wmi.h
@@ -27,6 +27,7 @@
 /* integrated adapter */
 #define MXM_MXDS_ADAPTER_IGD 0x10
 int mxm_wmi_call_mxds(int adapter);
+int mxm_wmi_call_mxmx(int adapter);
 bool mxm_wmi_supported(void);
 
 #endif
-- 
cgit v1.2.3


From 48752e1b1802231ef2a076f34d861918b7d571c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 9 May 2011 04:40:44 +0000
Subject: vlan: remove one synchronize_net() call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At VLAN dismantle phase, unregister_vlan_dev() makes one
synchronize_net() call after vlan_group_set_device(grp, vlan_id, NULL).

This call can be safely removed because we are calling
unregister_netdevice_queue() to queue device for deletion, and this
process needs at least one rcu grace period to complete.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jesse Gross <jesse@nicira.com>
Cc: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  1 -
 net/8021q/vlan.c        | 10 ++++------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 546d9d35fbd4..290bd8ac94cf 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -86,7 +86,6 @@ struct vlan_group {
 					    * the vlan is attached to.
 					    */
 	unsigned int		nr_vlans;
-	int			killall;
 	struct hlist_node	hlist;	/* linked list */
 	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
 	struct rcu_head		rcu;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 969e7004cf86..718d635d3379 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -120,9 +120,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	grp->nr_vlans--;
 
 	vlan_group_set_device(grp, vlan_id, NULL);
-	if (!grp->killall)
-		synchronize_net();
-
+	/* Because unregister_netdevice_queue() makes sure at least one rcu
+	 * grace period is respected before device freeing,
+	 * we dont need to call synchronize_net() here.
+	 */
 	unregister_netdevice_queue(dev, head);
 
 	/* If the group is now empty, kill off the group. */
@@ -478,9 +479,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		/* Delete all VLANs for this dev. */
-		grp->killall = 1;
-
 		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
-- 
cgit v1.2.3


From 4940fc889e1e63667a15243028ddcd84d471cd8e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 7 May 2011 23:00:07 +0000
Subject: net: add mac_pton() for parsing MAC address

mac_pton() parses MAC address in form XX:XX:XX:XX:XX:XX and only in that form.

mac_pton() doesn't dirty result until it's sure string representation is valid.

mac_pton() doesn't care about characters _after_ last octet,
it's up to caller to deal with it.

mac_pton() diverges from 0/-E return value convention.
Target usage:

	if (!mac_pton(str, whatever->mac))
		return -EINVAL;
	/* ->mac being u8 [ETH_ALEN] is filled at this point. */
	/* optionally check str[3 * ETH_ALEN - 1] for termination */

Use mac_pton() in pktgen and netconsole for start.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c | 20 ++++--------------
 include/linux/if_ether.h |  1 +
 net/core/netpoll.c       | 26 +-----------------------
 net/core/pktgen.c        | 53 ++++++++----------------------------------------
 net/core/utils.c         | 24 ++++++++++++++++++++++
 5 files changed, 38 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 62fdbaa1fb60..a83e101440fd 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -429,8 +429,6 @@ static ssize_t store_remote_mac(struct netconsole_target *nt,
 				size_t count)
 {
 	u8 remote_mac[ETH_ALEN];
-	char *p = (char *) buf;
-	int i;
 
 	if (nt->enabled) {
 		printk(KERN_ERR "netconsole: target (%s) is enabled, "
@@ -439,23 +437,13 @@ static ssize_t store_remote_mac(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	for (i = 0; i < ETH_ALEN - 1; i++) {
-		remote_mac[i] = simple_strtoul(p, &p, 16);
-		if (*p != ':')
-			goto invalid;
-		p++;
-	}
-	remote_mac[ETH_ALEN - 1] = simple_strtoul(p, &p, 16);
-	if (*p && (*p != '\n'))
-		goto invalid;
-
+	if (!mac_pton(buf, remote_mac))
+		return -EINVAL;
+	if (buf[3 * ETH_ALEN - 1] && buf[3 * ETH_ALEN - 1] != '\n')
+		return -EINVAL;
 	memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN);
 
 	return strnlen(buf, count);
-
-invalid:
-	printk(KERN_ERR "netconsole: invalid input\n");
-	return -EINVAL;
 }
 
 /*
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index be69043d2896..0f1325d98295 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -136,6 +136,7 @@ int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 extern struct ctl_table ether_table[];
 #endif
 
+int mac_pton(const char *s, u8 *mac);
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 46d9c3a4de2f..2d7d6d473781 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 
 	if (*cur != 0) {
 		/* MAC address */
-		if ((delim = strchr(cur, ':')) == NULL)
+		if (!mac_pton(cur, np->remote_mac))
 			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[0] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[1] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[2] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[3] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		if ((delim = strchr(cur, ':')) == NULL)
-			goto parse_failed;
-		*delim = 0;
-		np->remote_mac[4] = simple_strtol(cur, NULL, 16);
-		cur = delim + 1;
-		np->remote_mac[5] = simple_strtol(cur, NULL, 16);
 	}
 
 	netpoll_print_options(np);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d41d88b53e18..379270f14771 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1420,11 +1420,6 @@ static ssize_t pktgen_if_write(struct file *file,
 		return count;
 	}
 	if (!strcmp(name, "dst_mac")) {
-		char *v = valstr;
-		unsigned char old_dmac[ETH_ALEN];
-		unsigned char *m = pkt_dev->dst_mac;
-		memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
-
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
 		if (len < 0)
 			return len;
@@ -1432,35 +1427,16 @@ static ssize_t pktgen_if_write(struct file *file,
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
-		i += len;
-
-		for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
-			int value;
-
-			value = hex_to_bin(*v);
-			if (value >= 0)
-				*m = *m * 16 + value;
-
-			if (*v == ':') {
-				m++;
-				*m = 0;
-			}
-		}
 
+		if (!mac_pton(valstr, pkt_dev->dst_mac))
+			return -EINVAL;
 		/* Set up Dest MAC */
-		if (compare_ether_addr(old_dmac, pkt_dev->dst_mac))
-			memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
+		memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
 
-		sprintf(pg_result, "OK: dstmac");
+		sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
 		return count;
 	}
 	if (!strcmp(name, "src_mac")) {
-		char *v = valstr;
-		unsigned char old_smac[ETH_ALEN];
-		unsigned char *m = pkt_dev->src_mac;
-
-		memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
-
 		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
 		if (len < 0)
 			return len;
@@ -1468,26 +1444,13 @@ static ssize_t pktgen_if_write(struct file *file,
 		memset(valstr, 0, sizeof(valstr));
 		if (copy_from_user(valstr, &user_buffer[i], len))
 			return -EFAULT;
-		i += len;
-
-		for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
-			int value;
-
-			value = hex_to_bin(*v);
-			if (value >= 0)
-				*m = *m * 16 + value;
-
-			if (*v == ':') {
-				m++;
-				*m = 0;
-			}
-		}
 
+		if (!mac_pton(valstr, pkt_dev->src_mac))
+			return -EINVAL;
 		/* Set up Src MAC */
-		if (compare_ether_addr(old_smac, pkt_dev->src_mac))
-			memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
+		memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
 
-		sprintf(pg_result, "OK: srcmac");
+		sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
 		return count;
 	}
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 5fea0ab21902..2012bc797f9c 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -296,3 +296,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 				csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(inet_proto_csum_replace4);
+
+int mac_pton(const char *s, u8 *mac)
+{
+	int i;
+
+	/* XX:XX:XX:XX:XX:XX */
+	if (strlen(s) < 3 * ETH_ALEN - 1)
+		return 0;
+
+	/* Don't dirty result unless string is valid MAC. */
+	for (i = 0; i < ETH_ALEN; i++) {
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
+			return 0;
+		if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
+			return 0;
+		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
+			return 0;
+	}
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
+	}
+	return 1;
+}
+EXPORT_SYMBOL(mac_pton);
-- 
cgit v1.2.3


From a09a79f66874c905af35d5bb5e5f2fdc7b6b894d Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Date: Mon, 9 May 2011 13:01:09 +0200
Subject: Don't lock guardpage if the stack is growing up

Linux kernel excludes guard page when performing mlock on a VMA with
down-growing stack. However, some architectures have up-growing stack
and locking the guard page should be excluded in this case too.

This patch fixes lvm2 on PA-RISC (and possibly other architectures with
up-growing stack). lvm2 calculates number of used pages when locking and
when unlocking and reports an internal error if the numbers mismatch.

[ Patch changed fairly extensively to also fix /proc/<pid>/maps for the
  grows-up case, and to move things around a bit to clean it all up and
  share the infrstructure with the /proc bits.

  Tested on ia64 that has both grow-up and grow-down segments  - Linus ]

Signed-off-by: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Tested-by: Tony Luck <tony.luck@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 12 +++++++-----
 include/linux/mm.h | 24 +++++++++++++++++++++++-
 mm/memory.c        | 16 +++++++---------
 3 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..318d8654989b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	int flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
-	unsigned long start;
+	unsigned long start, end;
 	dev_t dev = 0;
 	int len;
 
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (vma->vm_flags & VM_GROWSDOWN)
-		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
-			start += PAGE_SIZE;
+	if (stack_guard_page_start(vma, start))
+		start += PAGE_SIZE;
+	end = vma->vm_end;
+	if (stack_guard_page_end(vma, end))
+		end -= PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
-			vma->vm_end,
+			end,
 			flags & VM_READ ? 'r' : '-',
 			flags & VM_WRITE ? 'w' : '-',
 			flags & VM_EXEC ? 'x' : '-',
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2348db26bc3d..6507dde38b16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1011,11 +1011,33 @@ int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
 /* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
 {
 	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
 }
 
+static inline int stack_guard_page_start(struct vm_area_struct *vma,
+					     unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSDOWN) &&
+		(vma->vm_start == addr) &&
+		!vma_growsdown(vma->vm_prev, addr);
+}
+
+/* Is the vma a continuation of the stack vma below it? */
+static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+{
+	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+}
+
+static inline int stack_guard_page_end(struct vm_area_struct *vma,
+					   unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSUP) &&
+		(vma->vm_end == addr) &&
+		!vma_growsup(vma->vm_next, addr);
+}
+
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len);
diff --git a/mm/memory.c b/mm/memory.c
index 27f425378112..61e66f026563 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1412,9 +1412,8 @@ no_page_table:
 
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_stack_continue(vma->vm_prev, addr);
+	return stack_guard_page_start(vma, addr) ||
+	       stack_guard_page_end(vma, addr+PAGE_SIZE);
 }
 
 /**
@@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			continue;
 		}
 
-		/*
-		 * For mlock, just skip the stack guard page.
-		 */
-		if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start))
-			goto next_page;
-
 		do {
 			struct page *page;
 			unsigned int foll_flags = gup_flags;
@@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				int ret;
 				unsigned int fault_flags = 0;
 
+				/* For mlock, just skip the stack guard page. */
+				if (foll_flags & FOLL_MLOCK) {
+					if (stack_guard_page(vma, start))
+						goto next_page;
+				}
 				if (foll_flags & FOLL_WRITE)
 					fault_flags |= FAULT_FLAG_WRITE;
 				if (nonblocking)
-- 
cgit v1.2.3


From 70f23fd66bc821a0e99647f70a809e277cc93c4c Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Tue, 10 May 2011 10:16:21 +0200
Subject: treewide: fix a few typos in comments

- kenrel -> kernel
- whetehr -> whether
- ttt -> tt
- sss -> ss

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/devicetree/bindings/powerpc/nintendo/wii.txt | 2 +-
 Documentation/feature-removal-schedule.txt                 | 2 +-
 arch/arm/mach-msm/include/mach/msm_iomap.h                 | 2 +-
 arch/arm/mach-omap2/control.h                              | 2 +-
 arch/ia64/hp/common/sba_iommu.c                            | 2 +-
 arch/powerpc/include/asm/pte-hash64-64k.h                  | 2 +-
 arch/powerpc/kernel/kgdb.c                                 | 2 +-
 arch/x86/xen/pci-swiotlb-xen.c                             | 2 +-
 drivers/acpi/acpica/utresrc.c                              | 2 +-
 drivers/acpi/video.c                                       | 2 +-
 drivers/firmware/efivars.c                                 | 2 +-
 drivers/macintosh/therm_pm72.c                             | 4 ++--
 drivers/media/dvb/dvb-usb/dw2102.c                         | 2 +-
 drivers/media/video/msp3400-driver.c                       | 2 +-
 drivers/media/video/saa7164/saa7164-encoder.c              | 2 +-
 drivers/media/video/saa7164/saa7164-vbi.c                  | 2 +-
 drivers/message/i2o/README.ioctl                           | 2 +-
 drivers/net/can/pch_can.c                                  | 2 +-
 drivers/net/irda/ali-ircc.c                                | 2 +-
 drivers/net/s2io.c                                         | 2 +-
 drivers/net/ucc_geth_ethtool.c                             | 2 +-
 drivers/net/usb/usbnet.c                                   | 2 +-
 drivers/net/wan/pc300_drv.c                                | 2 +-
 drivers/net/wireless/ath/ath9k/ar9003_eeprom.c             | 8 ++++----
 drivers/net/wireless/ipw2x00/ipw2200.c                     | 2 +-
 drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c                | 2 +-
 drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h                | 2 +-
 drivers/scsi/constants.c                                   | 2 +-
 drivers/scsi/esp_scsi.c                                    | 2 +-
 drivers/scsi/lpfc/lpfc_attr.c                              | 2 +-
 drivers/scsi/lpfc/lpfc_hw.h                                | 2 +-
 drivers/scsi/lpfc/lpfc_sli.c                               | 2 +-
 drivers/scsi/nsp32_debug.c                                 | 2 +-
 drivers/scsi/pcmcia/nsp_debug.c                            | 2 +-
 drivers/scsi/qla4xxx/ql4_mbx.c                             | 2 +-
 drivers/tty/n_gsm.c                                        | 2 +-
 drivers/usb/host/imx21-dbg.c                               | 2 +-
 drivers/usb/misc/usbtest.c                                 | 2 +-
 fs/btrfs/relocation.c                                      | 2 +-
 fs/freevxfs/vxfs_inode.c                                   | 2 +-
 fs/nfsd/stats.c                                            | 2 +-
 fs/squashfs/Kconfig                                        | 4 ++--
 fs/squashfs/cache.c                                        | 2 +-
 fs/xfs/xfs_inode.c                                         | 2 +-
 include/linux/posix-clock.h                                | 2 +-
 include/net/genetlink.h                                    | 2 +-
 include/net/netlink.h                                      | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c                   | 2 +-
 48 files changed, 53 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
index a7e155a023b8..36afa322b04b 100644
--- a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
+++ b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
@@ -127,7 +127,7 @@ Nintendo Wii device tree
    - reg : should contain the SDHCI registers location and length
    - interrupts : should contain the SDHCI interrupt
 
-1.j) The Inter-Processsor Communication (IPC) node
+1.j) The Inter-Processor Communication (IPC) node
 
   Represent the Inter-Processor Communication interface. This interface
   enables communications between the Broadway and the Starlet processors.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 492e81df2968..9536652eadbf 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -226,7 +226,7 @@ Who:	Zhang Rui <rui.zhang@intel.com>
 What:	CONFIG_ACPI_PROCFS_POWER
 When:	2.6.39
 Why:	sysfs I/F for ACPI power devices, including AC and Battery,
-        has been working in upstream kenrel since 2.6.24, Sep 2007.
+        has been working in upstream kernel since 2.6.24, Sep 2007.
 	In 2.6.37, we make the sysfs I/F always built in and this option
 	disabled by default.
 	Remove this option and the ACPI power procfs interface in 2.6.39.
diff --git a/arch/arm/mach-msm/include/mach/msm_iomap.h b/arch/arm/mach-msm/include/mach/msm_iomap.h
index c98c7591f3b8..2f494b6a9d0a 100644
--- a/arch/arm/mach-msm/include/mach/msm_iomap.h
+++ b/arch/arm/mach-msm/include/mach/msm_iomap.h
@@ -55,7 +55,7 @@
 
 #include "msm_iomap-8960.h"
 
-/* Virtual addressses shared across all MSM targets. */
+/* Virtual addresses shared across all MSM targets. */
 #define MSM_CSR_BASE		IOMEM(0xE0001000)
 #define MSM_QGIC_DIST_BASE	IOMEM(0xF0000000)
 #define MSM_QGIC_CPU_BASE	IOMEM(0xF0001000)
diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h
index c2804c1c4efd..a016c8b59e00 100644
--- a/arch/arm/mach-omap2/control.h
+++ b/arch/arm/mach-omap2/control.h
@@ -236,7 +236,7 @@
 #define OMAP343X_CONTROL_WKUP_DEBOBS3 (OMAP343X_CONTROL_GENERAL_WKUP + 0x014)
 #define OMAP343X_CONTROL_WKUP_DEBOBS4 (OMAP343X_CONTROL_GENERAL_WKUP + 0x018)
 
-/* 36xx-only RTA - Retention till Accesss control registers and bits */
+/* 36xx-only RTA - Retention till Access control registers and bits */
 #define OMAP36XX_CONTROL_MEM_RTA_CTRL	0x40C
 #define OMAP36XX_RTA_DISABLE		0x0
 
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4ce8d1358fee..ab3ccab132ce 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1063,7 +1063,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
 		/*
 		** Address does not fall w/in IOVA, must be bypassing
 		*/
-		DBG_BYPASS("sba_unmap_single_atttrs() bypass addr: 0x%lx\n",
+		DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n",
 			   iova);
 
 #ifdef ENABLE_MARK_CLEAN
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index c4490f9c67c4..59247e816ac5 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -22,7 +22,7 @@
 #define _PAGE_HASHPTE	_PAGE_HPTE_SUB
 
 /* Note the full page bits must be in the same location as for normal
- * 4k pages as the same asssembly will be used to insert 64K pages
+ * 4k pages as the same assembly will be used to insert 64K pages
  * wether the kernel has CONFIG_PPC_64K_PAGES or not
  */
 #define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 42850ee00ada..9411747bbcaf 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -142,7 +142,7 @@ static int kgdb_singlestep(struct pt_regs *regs)
 		return 0;
 
 	/*
-	 * On Book E and perhaps other processsors, singlestep is handled on
+	 * On Book E and perhaps other processors, singlestep is handled on
 	 * the critical exception stack.  This causes current_thread_info()
 	 * to fail, since it it locates the thread_info by masking off
 	 * the low bits of the current stack pointer.  We work around
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index bfd0632fe65e..b480d4207a4c 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -36,7 +36,7 @@ int __init pci_xen_swiotlb_detect(void)
 
 	/* If running as PV guest, either iommu=soft, or swiotlb=force will
 	 * activate this IOMMU. If running as PV privileged, activate it
-	 * irregardlesss.
+	 * irregardless.
 	 */
 	if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
 	    (xen_pv_domain()))
diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index 84e051844247..6ffd3a8bdaa5 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -50,7 +50,7 @@ ACPI_MODULE_NAME("utresrc")
 #if defined(ACPI_DISASSEMBLER) || defined (ACPI_DEBUGGER)
 /*
  * Strings used to decode resource descriptors.
- * Used by both the disasssembler and the debugger resource dump routines
+ * Used by both the disassembler and the debugger resource dump routines
  */
 const char *acpi_gbl_bm_decode[] = {
 	"NotBusMaster",
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ec574fc8fbc6..db39e9e607d8 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -1521,7 +1521,7 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
 		acpi_bus_generate_proc_event(device, event, 0);
 		keycode = KEY_BRIGHTNESSDOWN;
 		break;
-	case ACPI_VIDEO_NOTIFY_ZERO_BRIGHTNESS:	/* zero brightnesss */
+	case ACPI_VIDEO_NOTIFY_ZERO_BRIGHTNESS:	/* zero brightness */
 		if (brightness_switch_enabled)
 			acpi_video_switch_brightness(video_device, event);
 		acpi_bus_generate_proc_event(device, event, 0);
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index ff0c373e3bbf..5629a0c56f61 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -321,7 +321,7 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
 
 /*
  * Generic read/write functions that call the specific functions of
- * the atttributes...
+ * the attributes...
  */
 static ssize_t efivar_attr_show(struct kobject *kobj, struct attribute *attr,
 				char *buf)
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index bb8b722a9783..0ff92c208005 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -44,11 +44,11 @@
  * TODO:  - Check MPU structure version/signature
  *        - Add things like /sbin/overtemp for non-critical
  *          overtemp conditions so userland can take some policy
- *          decisions, like slewing down CPUs
+ *          decisions, like slowing down CPUs
  *	  - Deal with fan and i2c failures in a better way
  *	  - Maybe do a generic PID based on params used for
  *	    U3 and Drives ? Definitely need to factor code a bit
- *          bettter... also make sensor detection more robust using
+ *          better... also make sensor detection more robust using
  *          the device-tree to probe for them
  *        - Figure out how to get the slots consumption and set the
  *          slots fan accordingly
diff --git a/drivers/media/dvb/dvb-usb/dw2102.c b/drivers/media/dvb/dvb-usb/dw2102.c
index f5b9da18f611..d312323504a4 100644
--- a/drivers/media/dvb/dvb-usb/dw2102.c
+++ b/drivers/media/dvb/dvb-usb/dw2102.c
@@ -1377,7 +1377,7 @@ static struct rc_map_table rc_map_su3000_table[] = {
 	{ 0x0f, KEY_BLUE },	/* bottom yellow button */
 	{ 0x14, KEY_AUDIO },	/* Snapshot */
 	{ 0x38, KEY_TV },	/* TV/Radio */
-	{ 0x0c, KEY_ESC }	/* upper Red buttton */
+	{ 0x0c, KEY_ESC }	/* upper Red button */
 };
 
 static struct rc_map_dvb_usb_table_table keys_tables[] = {
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index 8126622fb4f5..de5d481b0328 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -96,7 +96,7 @@ MODULE_PARM_DESC(debug, "Enable debug messages [0-3]");
 MODULE_PARM_DESC(stereo_threshold, "Sets signal threshold to activate stereo");
 MODULE_PARM_DESC(standard, "Specify audio standard: 32 = NTSC, 64 = radio, Default: Autodetect");
 MODULE_PARM_DESC(amsound, "Hardwire AM sound at 6.5Hz (France), FM can autoscan");
-MODULE_PARM_DESC(dolby, "Activates Dolby processsing");
+MODULE_PARM_DESC(dolby, "Activates Dolby processing");
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/drivers/media/video/saa7164/saa7164-encoder.c b/drivers/media/video/saa7164/saa7164-encoder.c
index f9d594698832..400364569c8d 100644
--- a/drivers/media/video/saa7164/saa7164-encoder.c
+++ b/drivers/media/video/saa7164/saa7164-encoder.c
@@ -177,7 +177,7 @@ static int saa7164_encoder_buffers_alloc(struct saa7164_port *port)
 		}
 	}
 
-	/* Allocate some kenrel kernel buffers for copying
+	/* Allocate some kernel buffers for copying
 	 * to userpsace.
 	 */
 	len = params->numberoflines * params->pitch;
diff --git a/drivers/media/video/saa7164/saa7164-vbi.c b/drivers/media/video/saa7164/saa7164-vbi.c
index 9e5b01c29cf5..bc1fcedba874 100644
--- a/drivers/media/video/saa7164/saa7164-vbi.c
+++ b/drivers/media/video/saa7164/saa7164-vbi.c
@@ -148,7 +148,7 @@ static int saa7164_vbi_buffers_alloc(struct saa7164_port *port)
 		}
 	}
 
-	/* Allocate some kenrel kernel buffers for copying
+	/* Allocate some kernel buffers for copying
 	 * to userpsace.
 	 */
 	len = params->numberoflines * params->pitch;
diff --git a/drivers/message/i2o/README.ioctl b/drivers/message/i2o/README.ioctl
index 65c0c47aeb79..5fb195af43e2 100644
--- a/drivers/message/i2o/README.ioctl
+++ b/drivers/message/i2o/README.ioctl
@@ -110,7 +110,7 @@ V. Getting Logical Configuration Table
       ENOBUFS     Buffer not large enough.  If this occurs, the required
                   buffer length is written into *(lct->reslen)
 
-VI. Settting Parameters
+VI. Setting Parameters
    
    SYNOPSIS 
  
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index e54712b22c27..d11fbb2b95ff 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -653,7 +653,7 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota)
 	u16 data_reg;
 
 	do {
-		/* Reading the messsage object from the Message RAM */
+		/* Reading the message object from the Message RAM */
 		iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
 		pch_can_rw_msg_obj(&priv->regs->ifregs[0].creq, obj_num);
 
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 872183f29ec4..d532dde5120f 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1800,7 +1800,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 	MessageCount = inb(iobase+ FIR_LSR)&0x07;
 	
 	if (MessageCount > 0)	
-		IRDA_DEBUG(0, "%s(), Messsage count = %d,\n", __func__ , MessageCount);
+		IRDA_DEBUG(0, "%s(), Message count = %d,\n", __func__ , MessageCount);
 		
 	for (i=0; i<=MessageCount; i++)
 	{
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 337bdcd5abc9..665c3d832034 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -7231,7 +7231,7 @@ static void do_s2io_card_down(struct s2io_nic *sp, int do_io)
 		/* As per the HW requirement we need to replenish the
 		 * receive buffer to avoid the ring bump. Since there is
 		 * no intention of processing the Rx frame at this pointwe are
-		 * just settting the ownership bit of rxd in Each Rx
+		 * just setting the ownership bit of rxd in Each Rx
 		 * ring to HW and set the appropriate buffer size
 		 * based on the ring mode
 		 */
diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
index 6f92e48f02d3..cfd8881452ac 100644
--- a/drivers/net/ucc_geth_ethtool.c
+++ b/drivers/net/ucc_geth_ethtool.c
@@ -6,7 +6,7 @@
  * Author: Li Yang <leoli@freescale.com>
  *
  * Limitation:
- * Can only get/set setttings of the first queue.
+ * Can only get/set settings of the first queue.
  * Need to re-open the interface manually after changing some parameters.
  *
  * This program is free software; you can redistribute  it and/or modify it
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 069c1cf0fdf7..fef9ecc7b7aa 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -109,7 +109,7 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
 
 		/* take the first altsetting with in-bulk + out-bulk;
 		 * remember any status endpoint, just in case;
-		 * ignore other endpoints and altsetttings.
+		 * ignore other endpoints and altsettings.
 		 */
 		for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
 			struct usb_host_endpoint	*e;
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index f875cfae3093..737b59f1a8dc 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1445,7 +1445,7 @@ static void falc_update_stats(pc300_t * card, int ch)
  * Description:	In the remote loopback mode the clock and data recovered
  *		from the line inputs RL1/2 or RDIP/RDIN are routed back
  *		to the line outputs XL1/2 or XDOP/XDON via the analog
- *		transmitter. As in normal mode they are processsed by
+ *		transmitter. As in normal mode they are processed by
  *		the synchronizer and then sent to the system interface.
  *----------------------------------------------------------------------------
  */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index 6eadf975ae48..aa1ba5dbfebb 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -3329,26 +3329,26 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah,
 	else
 		cptr = AR9300_BASE_ADDR;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying EEPROM accesss at Address 0x%04x\n", cptr);
+		"Trying EEPROM access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	cptr = AR9300_BASE_ADDR_512;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying EEPROM accesss at Address 0x%04x\n", cptr);
+		"Trying EEPROM access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	read = ar9300_read_otp;
 	cptr = AR9300_BASE_ADDR;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying OTP accesss at Address 0x%04x\n", cptr);
+		"Trying OTP access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
 	cptr = AR9300_BASE_ADDR_512;
 	ath_dbg(common, ATH_DBG_EEPROM,
-		"Trying OTP accesss at Address 0x%04x\n", cptr);
+		"Trying OTP access at Address 0x%04x\n", cptr);
 	if (ar9300_check_eeprom_header(ah, read, cptr))
 		goto found;
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 42c3fe37af64..87813c33bdc2 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -7430,7 +7430,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 		priv->assoc_request.capability &=
 		    ~cpu_to_le16(WLAN_CAPABILITY_SHORT_SLOT_TIME);
 
-	IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, "
+	IPW_DEBUG_ASSOC("%ssociation attempt: '%s', channel %d, "
 			"802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n",
 			roaming ? "Rea" : "A",
 			print_ssid(ssid, priv->essid, priv->essid_len),
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
index 078ed600f47a..232aff1fe784 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.c
@@ -1,5 +1,5 @@
 /*
- * Aic7xxx SCSI host adapter firmware asssembler symbol table implementation
+ * Aic7xxx SCSI host adapter firmware assembler symbol table implementation
  *
  * Copyright (c) 1997 Justin T. Gibbs.
  * Copyright (c) 2002 Adaptec Inc.
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
index 2ba73ae7c777..34bbcad7f83f 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
@@ -1,5 +1,5 @@
 /*
- * Aic7xxx SCSI host adapter firmware asssembler symbol table definitions
+ * Aic7xxx SCSI host adapter firmware assembler symbol table definitions
  *
  * Copyright (c) 1997 Justin T. Gibbs.
  * Copyright (c) 2002 Adaptec Inc.
diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c
index d0c82340f0e2..d0cdde57d7d2 100644
--- a/drivers/scsi/constants.c
+++ b/drivers/scsi/constants.c
@@ -34,7 +34,7 @@
 static const char * cdb_byte0_names[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit/Rewind", NULL, "Request Sense",
 /* 04-07 */ "Format Unit/Medium", "Read Block Limits", NULL,
-	    "Reasssign Blocks",
+	    "Reassign Blocks",
 /* 08-0d */ "Read(6)", NULL, "Write(6)", "Seek(6)", NULL, NULL,
 /* 0e-12 */ NULL, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ "Verify(6)", "Recover Buffered Data", "Mode Select(6)",
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index 57558523c1b8..182831e1e0b2 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -1059,7 +1059,7 @@ static struct esp_cmd_entry *esp_reconnect_with_tag(struct esp *esp,
 	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
 			       2, 2, 1, ESP_CMD_DMA | ESP_CMD_TI);
 
-	/* ACK the msssage.  */
+	/* ACK the message.  */
 	scsi_esp_cmd(esp, ESP_CMD_MOK);
 
 	for (i = 0; i < ESP_RESELECT_TAG_LIMIT; i++) {
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 17d789325f40..8dcbf8fff673 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4532,7 +4532,7 @@ lpfc_set_vport_symbolic_name(struct fc_vport *fc_vport)
  *
  * This function is called by the lpfc_get_cfgparam() routine to set the
  * module lpfc_log_verbose into the @phba cfg_log_verbose for use with
- * log messsage according to the module's lpfc_log_verbose parameter setting
+ * log message according to the module's lpfc_log_verbose parameter setting
  * before hba port or vport created.
  **/
 static void
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 95f11ed79463..86b6f7e6686a 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1002,7 +1002,7 @@ typedef struct _ELS_PKT {	/* Structure is in Big Endian format */
 #define  SLI_MGMT_GRPL     0x102	/* Get registered Port list */
 #define  SLI_MGMT_GPAT     0x110	/* Get Port attributes */
 #define  SLI_MGMT_RHBA     0x200	/* Register HBA */
-#define  SLI_MGMT_RHAT     0x201	/* Register HBA atttributes */
+#define  SLI_MGMT_RHAT     0x201	/* Register HBA attributes */
 #define  SLI_MGMT_RPRT     0x210	/* Register Port */
 #define  SLI_MGMT_RPA      0x211	/* Register Port attributes */
 #define  SLI_MGMT_DHBA     0x300	/* De-register HBA */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index dacabbe0a586..63c5d627a5a3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -3040,7 +3040,7 @@ lpfc_sli_sp_handle_rspiocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
 	pring->iocb_continueq_cnt++;
 
-	/* Now, determine whetehr the list is completed for processing */
+	/* Now, determine whether the list is completed for processing */
 	irsp = &rspiocbp->iocb;
 	if (irsp->ulpLe) {
 		/*
diff --git a/drivers/scsi/nsp32_debug.c b/drivers/scsi/nsp32_debug.c
index 2fb3fb58858d..58806f432a16 100644
--- a/drivers/scsi/nsp32_debug.c
+++ b/drivers/scsi/nsp32_debug.c
@@ -13,7 +13,7 @@ static const char unknown[] = "UNKNOWN";
 
 static const char * group_0_commands[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
-/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reassign Blocks",
 /* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
 /* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ unknown, "Recover Buffered Data", "Mode Select", "Reserve",
diff --git a/drivers/scsi/pcmcia/nsp_debug.c b/drivers/scsi/pcmcia/nsp_debug.c
index 3c6ef64fcbff..6aa7d269d3b3 100644
--- a/drivers/scsi/pcmcia/nsp_debug.c
+++ b/drivers/scsi/pcmcia/nsp_debug.c
@@ -15,7 +15,7 @@ static const char unknown[] = "UNKNOWN";
 
 static const char * group_0_commands[] = {
 /* 00-03 */ "Test Unit Ready", "Rezero Unit", unknown, "Request Sense",
-/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reasssign Blocks",
+/* 04-07 */ "Format Unit", "Read Block Limits", unknown, "Reassign Blocks",
 /* 08-0d */ "Read (6)", unknown, "Write (6)", "Seek (6)", unknown, unknown,
 /* 0e-12 */ unknown, "Read Reverse", "Write Filemarks", "Space", "Inquiry",  
 /* 13-16 */ unknown, "Recover Buffered Data", "Mode Select", "Reserve",
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index f9d81c8372c3..d78b58dc5011 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c
@@ -19,7 +19,7 @@
  * @mbx_cmd: data pointer for mailbox in registers.
  * @mbx_sts: data pointer for mailbox out registers.
  *
- * This routine isssue mailbox commands and waits for completion.
+ * This routine issue mailbox commands and waits for completion.
  * If outCount is 0, this routine completes successfully WITHOUT waiting
  * for the mailbox command to complete.
  **/
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 74273e638c0d..77623b936538 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2128,7 +2128,7 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
 
 /**
  *	gsmld_detach_gsm	-	stop doing 0710 mux
- *	@tty: tty atttached to the mux
+ *	@tty: tty attached to the mux
  *	@gsm: mux
  *
  *	Shutdown and then clean up the resources used by the line discipline
diff --git a/drivers/usb/host/imx21-dbg.c b/drivers/usb/host/imx21-dbg.c
index 512f647448ca..6d7533427163 100644
--- a/drivers/usb/host/imx21-dbg.c
+++ b/drivers/usb/host/imx21-dbg.c
@@ -384,7 +384,7 @@ static void debug_isoc_show_one(struct seq_file *s,
 	seq_printf(s, "%s %d:\n"
 		"cc=0X%02X\n"
 		"scheduled frame %d (%d)\n"
-		"submittted frame %d (%d)\n"
+		"submitted frame %d (%d)\n"
 		"completed frame %d (%d)\n"
 		"requested length=%d\n"
 		"completed length=%d\n\n",
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 388cc128072a..ff9a01f8d405 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -104,7 +104,7 @@ get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf)
 		alt = intf->altsetting + tmp;
 
 		/* take the first altsetting with in-bulk + out-bulk;
-		 * ignore other endpoints and altsetttings.
+		 * ignore other endpoints and altsettings.
 		 */
 		for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
 			struct usb_host_endpoint	*e;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
 	WARN_ON(cur->checked);
 	if (!list_empty(&cur->upper)) {
 		/*
-		 * the backref was added previously when processsing
+		 * the backref was added previously when processing
 		 * backref of type BTRFS_TREE_BLOCK_REF_KEY
 		 */
 		BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
  * *ip:			VFS inode
  *
  * Description:
- *  vxfs_put_fake_inode frees all data asssociated with @ip.
+ *  vxfs_put_fake_inode frees all data associated with @ip.
  */
 void
 vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
  *			Statistsics for the reply cache
  *	fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
  *			statistics for filehandle lookup
- *	io <bytes-read> <bytes-writtten>
+ *	io <bytes-read> <bytes-written>
  *			statistics for IO throughput
  *	th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
  *			time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
 	select LZO_DECOMPRESS
 	help
 	  Saying Y here includes support for reading Squashfs file systems
-	  compressed with LZO compresssion.  LZO compression is mainly
+	  compressed with LZO compression.  LZO compression is mainly
 	  aimed at embedded systems with slower CPUs where the overheads
 	  of zlib are too high.
 
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
 	select XZ_DEC
 	help
 	  Saying Y here includes support for reading Squashfs file systems
-	  compressed with XZ compresssion.  XZ gives better compression than
+	  compressed with XZ compression.  XZ gives better compression than
 	  the default zlib compression, at the expense of greater CPU and
 	  memory overhead.
 
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
  * plus functions layered ontop of the generic cache implementation to
  * access the metadata and fragment caches.
  *
- * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * To avoid out of memory and fragmentation issues with vmalloc the cache
  * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
  *
  * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..d11ce613d692 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
 	 * file but the log buffers containing the free and reallocation
 	 * don't, then we'd end up with garbage in the blocks being freed.
 	 * As long as we make the new_size permanent before actually
-	 * freeing any blocks it doesn't matter if they get writtten to.
+	 * freeing any blocks it doesn't matter if they get written to.
 	 *
 	 * The callers must signal into us whether or not the size
 	 * setting here must be synchronous.  There are a few cases
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 7f1183dcd119..34c4498b800f 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -45,7 +45,7 @@ struct posix_clock;
  * @timer_create:   Create a new timer
  * @timer_delete:   Remove a previously created timer
  * @timer_gettime:  Get remaining time and interval of a timer
- * @timer_setttime: Set a timer's initial expiration and interval
+ * @timer_settime: Set a timer's initial expiration and interval
  * @fasync:         Optional character device fasync method
  * @mmap:           Optional character device mmap method
  * @open:           Optional character device open method
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index b4c7c1cbcf40..d420f28b6d60 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -260,7 +260,7 @@ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
 
 /**
  * gennlmsg_data - head of message payload
- * @gnlh: genetlink messsage header
+ * @gnlh: genetlink message header
  */
 static inline void *genlmsg_data(const struct genlmsghdr *gnlh)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 8a3906a08f5f..02740a94f108 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -290,7 +290,7 @@ static inline int nlmsg_padlen(int payload)
 
 /**
  * nlmsg_data - head of message payload
- * @nlh: netlink messsage header
+ * @nlh: netlink message header
  */
 static inline void *nlmsg_data(const struct nlmsghdr *nlh)
 {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 1a10dcd999ea..6c014dd3a20b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -333,7 +333,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
 }
 
 /*
- * Processs a completion context
+ * Process a completion context
  */
 static void process_context(struct svcxprt_rdma *xprt,
 			    struct svc_rdma_op_ctxt *ctxt)
-- 
cgit v1.2.3


From 6c6de1aa65189c37cc3c9af78da756c06a99899b Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sat, 30 Apr 2011 18:56:12 +0900
Subject: nilfs2: super root size should change depending on inode size

The size of super root structure depends on inode size, so
NILFS_SR_BYTES macro should be a function of the inode size.  This
fixes the issue.

Even though a different size value will be written for a possible
future filesystem with extended inode, but fortunately this does not
break disk format compatibility.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segbuf.c        | 5 ++++-
 fs/nilfs2/segment.c       | 2 +-
 include/linux/nilfs2_fs.h | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 410ec2b1af4f..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
 				    u32 seed)
 {
 	struct nilfs_super_root *raw_sr;
+	struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
+	unsigned srsize;
 	u32 crc;
 
 	raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
+	srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
 	crc = crc32_le(seed,
 		       (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
-		       NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
+		       srsize - sizeof(raw_sr->sr_sum));
 	raw_sr->sr_sum = cpu_to_le32(crc);
 }
 
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index abbfab974700..8006d0cd4440 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -894,7 +894,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
 	bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
 	raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
 
-	raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
+	raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES(isz));
 	raw_sr->sr_nongc_ctime
 		= cpu_to_le64(nilfs_doing_gc() ?
 			      nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 8768c469e93e..bd8678f8a421 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -107,7 +107,7 @@ struct nilfs_super_root {
 #define NILFS_SR_DAT_OFFSET(inode_size)     NILFS_SR_MDT_OFFSET(inode_size, 0)
 #define NILFS_SR_CPFILE_OFFSET(inode_size)  NILFS_SR_MDT_OFFSET(inode_size, 1)
 #define NILFS_SR_SUFILE_OFFSET(inode_size)  NILFS_SR_MDT_OFFSET(inode_size, 2)
-#define NILFS_SR_BYTES                  (sizeof(struct nilfs_super_root))
+#define NILFS_SR_BYTES(inode_size)	    NILFS_SR_MDT_OFFSET(inode_size, 3)
 
 /*
  * Maximal mount counts
-- 
cgit v1.2.3


From 619205da5b567504310daf829dede1187fa29bbc Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Thu, 5 May 2011 01:23:57 +0900
Subject: nilfs2: add ioctl which limits range of segment to be allocated

This adds a new ioctl command which limits range of segment to be
allocated.  This is intended to gather data whithin a range of the
partition before shrinking the filesystem, or to control new log
location for some purpose.

If a range is specified by the ioctl, segment allocator of nilfs tries
to allocate new segments from the range unless no free segments are
available there.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/ioctl.c         | 34 ++++++++++++++++++++++
 fs/nilfs2/sufile.c        | 73 ++++++++++++++++++++++++++++++++++++++++-------
 fs/nilfs2/sufile.h        |  1 +
 include/linux/nilfs2_fs.h |  2 ++
 4 files changed, 100 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..6f617773a7f7 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,38 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
 	return 0;
 }
 
+static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+{
+	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+	__u64 range[2];
+	__u64 minseg, maxseg;
+	unsigned long segbytes;
+	int ret = -EPERM;
+
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(range, argp, sizeof(__u64[2])))
+		goto out;
+
+	ret = -ERANGE;
+	if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+		goto out;
+
+	segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
+
+	minseg = range[0] + segbytes - 1;
+	do_div(minseg, segbytes);
+	maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+	do_div(maxseg, segbytes);
+	maxseg--;
+
+	ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
+out:
+	return ret;
+}
+
 static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 				unsigned int cmd, void __user *argp,
 				size_t membsz,
@@ -763,6 +795,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
 	case NILFS_IOCTL_SYNC:
 		return nilfs_ioctl_sync(inode, filp, cmd, argp);
+	case NILFS_IOCTL_SET_ALLOC_RANGE:
+		return nilfs_ioctl_set_alloc_range(inode, argp);
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..f4374df00ad5 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
 
 struct nilfs_sufile_info {
 	struct nilfs_mdt_info mi;
-	unsigned long ncleansegs;
+	unsigned long ncleansegs;/* number of clean segments */
+	__u64 allocmin;		/* lower limit of allocatable segment range */
+	__u64 allocmax;		/* upper limit of allocatable segment range */
 };
 
 static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -247,6 +249,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
 	return ret;
 }
 
+/**
+ * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
+ * @sufile: inode of segment usage file
+ * @start: minimum segment number of allocatable region (inclusive)
+ * @end: maximum segment number of allocatable region (inclusive)
+ *
+ * Return Value: On success, 0 is returned.  On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-ERANGE - invalid segment region
+ */
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
+{
+	struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+	__u64 nsegs;
+	int ret = -ERANGE;
+
+	down_write(&NILFS_MDT(sufile)->mi_sem);
+	nsegs = nilfs_sufile_get_nsegments(sufile);
+
+	if (start <= end && end < nsegs) {
+		sui->allocmin = start;
+		sui->allocmax = end;
+		ret = 0;
+	}
+	up_write(&NILFS_MDT(sufile)->mi_sem);
+	return ret;
+}
+
 /**
  * nilfs_sufile_alloc - allocate a segment
  * @sufile: inode of segment usage file
@@ -269,11 +300,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	struct buffer_head *header_bh, *su_bh;
 	struct nilfs_sufile_header *header;
 	struct nilfs_segment_usage *su;
+	struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
 	size_t susz = NILFS_MDT(sufile)->mi_entry_size;
 	__u64 segnum, maxsegnum, last_alloc;
 	void *kaddr;
-	unsigned long nsegments, ncleansegs, nsus;
-	int ret, i, j;
+	unsigned long nsegments, ncleansegs, nsus, cnt;
+	int ret, j;
 
 	down_write(&NILFS_MDT(sufile)->mi_sem);
 
@@ -287,13 +319,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	nsegments = nilfs_sufile_get_nsegments(sufile);
+	maxsegnum = sui->allocmax;
 	segnum = last_alloc + 1;
-	maxsegnum = nsegments - 1;
-	for (i = 0; i < nsegments; i += nsus) {
-		if (segnum >= nsegments) {
-			/* wrap around */
-			segnum = 0;
-			maxsegnum = last_alloc;
+	if (segnum < sui->allocmin || segnum > sui->allocmax)
+		segnum = sui->allocmin;
+
+	for (cnt = 0; cnt < nsegments; cnt += nsus) {
+		if (segnum > maxsegnum) {
+			if (cnt < sui->allocmax - sui->allocmin + 1) {
+				/*
+				 * wrap around in the limited region.
+				 * if allocation started from
+				 * sui->allocmin, this never happens.
+				 */
+				segnum = sui->allocmin;
+				maxsegnum = last_alloc;
+			} else if (segnum > sui->allocmin &&
+				   sui->allocmax + 1 < nsegments) {
+				segnum = sui->allocmax + 1;
+				maxsegnum = nsegments - 1;
+			} else if (sui->allocmin > 0)  {
+				segnum = 0;
+				maxsegnum = sui->allocmin - 1;
+			} else {
+				break; /* never happens */
+			}
 		}
 		ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
 							   &su_bh);
@@ -319,7 +369,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 			header->sh_last_alloc = cpu_to_le64(segnum);
 			kunmap_atomic(kaddr, KM_USER0);
 
-			NILFS_SUI(sufile)->ncleansegs--;
+			sui->ncleansegs--;
 			nilfs_mdt_mark_buffer_dirty(header_bh);
 			nilfs_mdt_mark_buffer_dirty(su_bh);
 			nilfs_mdt_mark_dirty(sufile);
@@ -679,6 +729,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
 	kunmap_atomic(kaddr, KM_USER0);
 	brelse(header_bh);
 
+	sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
+	sui->allocmin = 0;
+
 	unlock_new_inode(sufile);
  out:
 	*inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..57bfee9cd02d 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,6 +36,7 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
 
 unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
 
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
 int nilfs_sufile_alloc(struct inode *, __u64 *);
 int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index bd8678f8a421..7454ad7451b4 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -845,5 +845,7 @@ struct nilfs_bdesc {
 	_IOR(NILFS_IOCTL_IDENT, 0x8A, __u64)
 #define NILFS_IOCTL_RESIZE  \
 	_IOW(NILFS_IOCTL_IDENT, 0x8B, __u64)
+#define NILFS_IOCTL_SET_ALLOC_RANGE  \
+	_IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2])
 
 #endif	/* _LINUX_NILFS_FS_H */
-- 
cgit v1.2.3


From 8369ae33b705222aa05ab53c7d6b4458f4ed161b Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 9 May 2011 18:56:46 +0200
Subject: bcma: add Broadcom specific AMBA bus driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broadcom has released cards based on a new AMBA-based bus type. From a
programming point of view, this new bus type differs from AMBA and does
not use AMBA common registers. It also differs enough from SSB. We
decided that a new bus driver is needed to keep the code clean.

In its current form, the driver detects devices present on the bus and
registers them in the system. It allows registering BCMA drivers for
specified bus devices and provides them basic operations. The bus driver
itself includes two important bus managing drivers: ChipCommon core
driver and PCI(c) core driver. They are early used to allow correct
initialization.

Currently code is limited to supporting buses on PCI(e) devices, however
the driver is designed to be used also on other hosts. The host
abstraction layer is implemented and already used for PCI(e).

Support for PCI(e) hosts is working and seems to be stable (access to
80211 core was tested successfully on a few devices). We can still
optimize it by using some fixed windows, but this can be done later
without affecting any external code. Windows are just ranges in MMIO
used for accessing cores on the bus.

Cc: Greg KH <greg@kroah.com>
Cc: Michael Büsch <mb@bu3sch.de>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: George Kashperko <george@znau.edu.ua>
Cc: Arend van Spriel <arend@broadcom.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Botting <andy@andybotting.com>
Cc: linuxdriverproject <devel@linuxdriverproject.org>
Cc: linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/ABI/testing/sysfs-bus-bcma    |  31 +++
 MAINTAINERS                                 |   7 +
 drivers/Kconfig                             |   2 +
 drivers/Makefile                            |   1 +
 drivers/bcma/Kconfig                        |  33 +++
 drivers/bcma/Makefile                       |   7 +
 drivers/bcma/README                         |  19 ++
 drivers/bcma/TODO                           |   3 +
 drivers/bcma/bcma_private.h                 |  28 +++
 drivers/bcma/core.c                         |  51 ++++
 drivers/bcma/driver_chipcommon.c            |  87 +++++++
 drivers/bcma/driver_chipcommon_pmu.c        | 134 +++++++++++
 drivers/bcma/driver_pci.c                   | 163 +++++++++++++
 drivers/bcma/host_pci.c                     | 196 +++++++++++++++
 drivers/bcma/main.c                         | 247 +++++++++++++++++++
 drivers/bcma/scan.c                         | 360 ++++++++++++++++++++++++++++
 drivers/bcma/scan.h                         |  56 +++++
 include/linux/bcma/bcma.h                   | 224 +++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h | 297 +++++++++++++++++++++++
 include/linux/bcma/bcma_driver_pci.h        |  89 +++++++
 include/linux/bcma/bcma_regs.h              |  34 +++
 include/linux/mod_devicetable.h             |  17 ++
 scripts/mod/file2alias.c                    |  22 ++
 23 files changed, 2108 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-bcma
 create mode 100644 drivers/bcma/Kconfig
 create mode 100644 drivers/bcma/Makefile
 create mode 100644 drivers/bcma/README
 create mode 100644 drivers/bcma/TODO
 create mode 100644 drivers/bcma/bcma_private.h
 create mode 100644 drivers/bcma/core.c
 create mode 100644 drivers/bcma/driver_chipcommon.c
 create mode 100644 drivers/bcma/driver_chipcommon_pmu.c
 create mode 100644 drivers/bcma/driver_pci.c
 create mode 100644 drivers/bcma/host_pci.c
 create mode 100644 drivers/bcma/main.c
 create mode 100644 drivers/bcma/scan.c
 create mode 100644 drivers/bcma/scan.h
 create mode 100644 include/linux/bcma/bcma.h
 create mode 100644 include/linux/bcma/bcma_driver_chipcommon.h
 create mode 100644 include/linux/bcma/bcma_driver_pci.h
 create mode 100644 include/linux/bcma/bcma_regs.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma
new file mode 100644
index 000000000000..06b62badddd1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-bcma
@@ -0,0 +1,31 @@
+What:		/sys/bus/bcma/devices/.../manuf
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		Each BCMA core has it's manufacturer id. See
+		include/linux/bcma/bcma.h for possible values.
+
+What:		/sys/bus/bcma/devices/.../id
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		There are a few types of BCMA cores, they can be identified by
+		id field.
+
+What:		/sys/bus/bcma/devices/.../rev
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		BCMA cores of the same type can still slightly differ depending
+		on their revision. Use it for detailed programming.
+
+What:		/sys/bus/bcma/devices/.../class
+Date:		May 2011
+KernelVersion:	2.6.40
+Contact:	Rafał Miłecki <zajec5@gmail.com>
+Description:
+		Each BCMA core is identified by few fields, including class it
+		belongs to. See include/linux/bcma/bcma.h for possible values.
diff --git a/MAINTAINERS b/MAINTAINERS
index 9f9104987a73..df5585819a62 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5810,6 +5810,13 @@ S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
 
+BROADCOM SPECIFIC AMBA DRIVER (BCMA)
+M:	Rafał Miłecki <zajec5@gmail.com>
+L:	linux-wireless@vger.kernel.org
+S:	Maintained
+F:	drivers/bcma/
+F:	include/linux/bcma/
+
 SONY VAIO CONTROL DEVICE DRIVER
 M:	Mattia Dongili <malattia@linux.it>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 177c7d156933..aca706751469 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -68,6 +68,8 @@ source "drivers/watchdog/Kconfig"
 
 source "drivers/ssb/Kconfig"
 
+source "drivers/bcma/Kconfig"
+
 source "drivers/mfd/Kconfig"
 
 source "drivers/regulator/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 3f135b6fb014..a29527f4ded6 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_HID)		+= hid/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
 obj-$(CONFIG_SSB)		+= ssb/
+obj-$(CONFIG_BCMA)		+= bcma/
 obj-$(CONFIG_VHOST_NET)		+= vhost/
 obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
new file mode 100644
index 000000000000..353781b5b78b
--- /dev/null
+++ b/drivers/bcma/Kconfig
@@ -0,0 +1,33 @@
+config BCMA_POSSIBLE
+	bool
+	depends on HAS_IOMEM && HAS_DMA
+	default y
+
+menu "Broadcom specific AMBA"
+	depends on BCMA_POSSIBLE
+
+config BCMA
+	tristate "BCMA support"
+	depends on BCMA_POSSIBLE
+	help
+	  Bus driver for Broadcom specific Advanced Microcontroller Bus
+	  Architecture.
+
+config BCMA_HOST_PCI_POSSIBLE
+	bool
+	depends on BCMA && PCI = y
+	default y
+
+config BCMA_HOST_PCI
+	bool "Support for BCMA on PCI-host bus"
+	depends on BCMA_HOST_PCI_POSSIBLE
+
+config BCMA_DEBUG
+	bool "BCMA debugging"
+	depends on BCMA
+	help
+	  This turns on additional debugging messages.
+
+	  If unsure, say N
+
+endmenu
diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
new file mode 100644
index 000000000000..0d56245bcb79
--- /dev/null
+++ b/drivers/bcma/Makefile
@@ -0,0 +1,7 @@
+bcma-y					+= main.o scan.o core.o
+bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
+bcma-y					+= driver_pci.o
+bcma-$(CONFIG_BCMA_HOST_PCI)		+= host_pci.o
+obj-$(CONFIG_BCMA)			+= bcma.o
+
+ccflags-$(CONFIG_BCMA_DEBUG)		:= -DDEBUG
diff --git a/drivers/bcma/README b/drivers/bcma/README
new file mode 100644
index 000000000000..f7e7ce46c603
--- /dev/null
+++ b/drivers/bcma/README
@@ -0,0 +1,19 @@
+Broadcom introduced new bus as replacement for older SSB. It is based on AMBA,
+however from programming point of view there is nothing AMBA specific we use.
+
+Standard AMBA drivers are platform specific, have hardcoded addresses and use
+AMBA standard fields like CID and PID.
+
+In case of Broadcom's cards every device consists of:
+1) Broadcom specific AMBA device. It is put on AMBA bus, but can not be treated
+   as standard AMBA device. Reading it's CID or PID can cause machine lockup.
+2) AMBA standard devices called ports or wrappers. They have CIDs (AMBA_CID)
+   and PIDs (0x103BB369), but we do not use that info for anything. One of that
+   devices is used for managing Broadcom specific core.
+
+Addresses of AMBA devices are not hardcoded in driver and have to be read from
+EPROM.
+
+In this situation we decided to introduce separated bus. It can contain up to
+16 devices identified by Broadcom specific fields: manufacturer, id, revision
+and class.
diff --git a/drivers/bcma/TODO b/drivers/bcma/TODO
new file mode 100644
index 000000000000..da7aa99fe81c
--- /dev/null
+++ b/drivers/bcma/TODO
@@ -0,0 +1,3 @@
+- Interrupts
+- Defines for PCI core driver
+- Create kernel Documentation (use info from README)
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
new file mode 100644
index 000000000000..2f72e9c585fd
--- /dev/null
+++ b/drivers/bcma/bcma_private.h
@@ -0,0 +1,28 @@
+#ifndef LINUX_BCMA_PRIVATE_H_
+#define LINUX_BCMA_PRIVATE_H_
+
+#ifndef pr_fmt
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+#endif
+
+#include <linux/bcma/bcma.h>
+#include <linux/delay.h>
+
+#define BCMA_CORE_SIZE		0x1000
+
+struct bcma_bus;
+
+/* main.c */
+extern int bcma_bus_register(struct bcma_bus *bus);
+extern void bcma_bus_unregister(struct bcma_bus *bus);
+
+/* scan.c */
+int bcma_bus_scan(struct bcma_bus *bus);
+
+#ifdef CONFIG_BCMA_HOST_PCI
+/* host_pci.c */
+extern int __init bcma_host_pci_init(void);
+extern void __exit bcma_host_pci_exit(void);
+#endif /* CONFIG_BCMA_HOST_PCI */
+
+#endif
diff --git a/drivers/bcma/core.c b/drivers/bcma/core.c
new file mode 100644
index 000000000000..ced379f7b371
--- /dev/null
+++ b/drivers/bcma/core.c
@@ -0,0 +1,51 @@
+/*
+ * Broadcom specific AMBA
+ * Core ops
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+bool bcma_core_is_enabled(struct bcma_device *core)
+{
+	if ((bcma_aread32(core, BCMA_IOCTL) & (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC))
+	    != BCMA_IOCTL_CLK)
+		return false;
+	if (bcma_aread32(core, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(bcma_core_is_enabled);
+
+static void bcma_core_disable(struct bcma_device *core, u32 flags)
+{
+	if (bcma_aread32(core, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
+		return;
+
+	bcma_awrite32(core, BCMA_IOCTL, flags);
+	bcma_aread32(core, BCMA_IOCTL);
+	udelay(10);
+
+	bcma_awrite32(core, BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	udelay(1);
+}
+
+int bcma_core_enable(struct bcma_device *core, u32 flags)
+{
+	bcma_core_disable(core, flags);
+
+	bcma_awrite32(core, BCMA_IOCTL, (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+	bcma_aread32(core, BCMA_IOCTL);
+
+	bcma_awrite32(core, BCMA_RESET_CTL, 0);
+	udelay(1);
+
+	bcma_awrite32(core, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	bcma_aread32(core, BCMA_IOCTL);
+	udelay(1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcma_core_enable);
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
new file mode 100644
index 000000000000..caf596091d4d
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon.c
@@ -0,0 +1,87 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon core driver
+ *
+ * Copyright 2005, Broadcom Corporation
+ * Copyright 2006, 2007, Michael Buesch <mb@bu3sch.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset,
+					 u32 mask, u32 value)
+{
+	value &= mask;
+	value |= bcma_cc_read32(cc, offset) & ~mask;
+	bcma_cc_write32(cc, offset, value);
+
+	return value;
+}
+
+void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
+{
+	if (cc->core->id.rev >= 11)
+		cc->status = bcma_cc_read32(cc, BCMA_CC_CHIPSTAT);
+	cc->capabilities = bcma_cc_read32(cc, BCMA_CC_CAP);
+	if (cc->core->id.rev >= 35)
+		cc->capabilities_ext = bcma_cc_read32(cc, BCMA_CC_CAP_EXT);
+
+	bcma_cc_write32(cc, 0x58, 0);
+	bcma_cc_write32(cc, 0x5C, 0);
+
+	if (cc->capabilities & BCMA_CC_CAP_PMU)
+		bcma_pmu_init(cc);
+	if (cc->capabilities & BCMA_CC_CAP_PCTL)
+		pr_err("Power control not implemented!\n");
+}
+
+/* Set chip watchdog reset timer to fire in 'ticks' backplane cycles */
+void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
+{
+	/* instant NMI */
+	bcma_cc_write32(cc, BCMA_CC_WATCHDOG, ticks);
+}
+
+void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	bcma_cc_write32_masked(cc, BCMA_CC_IRQMASK, mask, value);
+}
+
+u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask)
+{
+	return bcma_cc_read32(cc, BCMA_CC_IRQSTAT) & mask;
+}
+
+u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask)
+{
+	return bcma_cc_read32(cc, BCMA_CC_GPIOIN) & mask;
+}
+
+u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+}
+
+u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+}
+
+u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+}
+EXPORT_SYMBOL_GPL(bcma_chipco_gpio_control);
+
+u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+}
+
+u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+}
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
new file mode 100644
index 000000000000..f44177a644c7
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -0,0 +1,134 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon Power Management Unit driver
+ *
+ * Copyright 2009, Michael Buesch <mb@bu3sch.de>
+ * Copyright 2007, Broadcom Corporation
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+static void bcma_chipco_chipctl_maskset(struct bcma_drv_cc *cc,
+					u32 offset, u32 mask, u32 set)
+{
+	u32 value;
+
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR);
+	bcma_cc_write32(cc, BCMA_CC_CHIPCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR);
+	value = bcma_cc_read32(cc, BCMA_CC_CHIPCTL_DATA);
+	value &= mask;
+	value |= set;
+	bcma_cc_write32(cc, BCMA_CC_CHIPCTL_DATA, value);
+	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_DATA);
+}
+
+static void bcma_pmu_pll_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+	case 0x4331:
+	case 43224:
+	case 43225:
+		break;
+	default:
+		pr_err("PLL init unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+}
+
+static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+	u32 min_msk = 0, max_msk = 0;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+		min_msk = 0x200D;
+		max_msk = 0xFFFF;
+		break;
+	case 43224:
+		break;
+	default:
+		pr_err("PMU resource config unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+
+	/* Set the resource masks. */
+	if (min_msk)
+		bcma_cc_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk);
+	if (max_msk)
+		bcma_cc_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk);
+}
+
+void bcma_pmu_swreg_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+	case 0x4331:
+	case 43224:
+		break;
+	default:
+		pr_err("PMU switch/regulators init unknown for device "
+			"0x%04X\n", bus->chipinfo.id);
+	}
+}
+
+void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	switch (bus->chipinfo.id) {
+	case 0x4313:
+		bcma_chipco_chipctl_maskset(cc, 0, ~0, 0x7);
+		break;
+	case 0x4331:
+		pr_err("Enabling Ext PA lines not implemented\n");
+		break;
+	case 43224:
+		if (bus->chipinfo.rev == 0) {
+			pr_err("Workarounds for 43224 rev 0 not fully "
+				"implemented\n");
+			bcma_chipco_chipctl_maskset(cc, 0, ~0, 0xF0);
+		} else {
+			bcma_chipco_chipctl_maskset(cc, 0, ~0, 0xF0);
+		}
+		break;
+	default:
+		pr_err("Workarounds unknown for device 0x%04X\n",
+			bus->chipinfo.id);
+	}
+}
+
+void bcma_pmu_init(struct bcma_drv_cc *cc)
+{
+	u32 pmucap;
+
+	pmucap = bcma_cc_read32(cc, BCMA_CC_PMU_CAP);
+	cc->pmu.rev = (pmucap & BCMA_CC_PMU_CAP_REVISION);
+
+	pr_debug("Found rev %u PMU (capabilities 0x%08X)\n", cc->pmu.rev,
+		 pmucap);
+
+	if (cc->pmu.rev == 1)
+		bcma_cc_mask32(cc, BCMA_CC_PMU_CTL,
+			      ~BCMA_CC_PMU_CTL_NOILPONW);
+	else
+		bcma_cc_set32(cc, BCMA_CC_PMU_CTL,
+			     BCMA_CC_PMU_CTL_NOILPONW);
+
+	if (cc->core->id.id == 0x4329 && cc->core->id.rev == 2)
+		pr_err("Fix for 4329b0 bad LPOM state not implemented!\n");
+
+	bcma_pmu_pll_init(cc);
+	bcma_pmu_resources_init(cc);
+	bcma_pmu_swreg_init(cc);
+	bcma_pmu_workarounds(cc);
+}
diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
new file mode 100644
index 000000000000..b98b8359bef5
--- /dev/null
+++ b/drivers/bcma/driver_pci.c
@@ -0,0 +1,163 @@
+/*
+ * Broadcom specific AMBA
+ * PCI Core
+ *
+ * Copyright 2005, Broadcom Corporation
+ * Copyright 2006, 2007, Michael Buesch <mb@bu3sch.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+/**************************************************
+ * R/W ops.
+ **************************************************/
+
+static u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address)
+{
+	pcicore_write32(pc, 0x130, address);
+	pcicore_read32(pc, 0x130);
+	return pcicore_read32(pc, 0x134);
+}
+
+#if 0
+static void bcma_pcie_write(struct bcma_drv_pci *pc, u32 address, u32 data)
+{
+	pcicore_write32(pc, 0x130, address);
+	pcicore_read32(pc, 0x130);
+	pcicore_write32(pc, 0x134, data);
+}
+#endif
+
+static void bcma_pcie_mdio_set_phy(struct bcma_drv_pci *pc, u8 phy)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	u32 v;
+	int i;
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 28); /* Write Transaction */
+	v |= (1 << 17); /* Turnaround */
+	v |= (0x1F << 18);
+	v |= (phy << 4);
+	pcicore_write32(pc, mdio_data, v);
+
+	udelay(10);
+	for (i = 0; i < 200; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */)
+			break;
+		msleep(1);
+	}
+}
+
+static u16 bcma_pcie_mdio_read(struct bcma_drv_pci *pc, u8 device, u8 address)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	int max_retries = 10;
+	u16 ret = 0;
+	u32 v;
+	int i;
+
+	v = 0x80; /* Enable Preamble Sequence */
+	v |= 0x2; /* MDIO Clock Divisor */
+	pcicore_write32(pc, mdio_control, v);
+
+	if (pc->core->id.rev >= 10) {
+		max_retries = 200;
+		bcma_pcie_mdio_set_phy(pc, device);
+	}
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 29); /* Read Transaction */
+	v |= (1 << 17); /* Turnaround */
+	if (pc->core->id.rev < 10)
+		v |= (u32)device << 22;
+	v |= (u32)address << 18;
+	pcicore_write32(pc, mdio_data, v);
+	/* Wait for the device to complete the transaction */
+	udelay(10);
+	for (i = 0; i < 200; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */) {
+			udelay(10);
+			ret = pcicore_read32(pc, mdio_data);
+			break;
+		}
+		msleep(1);
+	}
+	pcicore_write32(pc, mdio_control, 0);
+	return ret;
+}
+
+static void bcma_pcie_mdio_write(struct bcma_drv_pci *pc, u8 device,
+				u8 address, u16 data)
+{
+	const u16 mdio_control = 0x128;
+	const u16 mdio_data = 0x12C;
+	int max_retries = 10;
+	u32 v;
+	int i;
+
+	v = 0x80; /* Enable Preamble Sequence */
+	v |= 0x2; /* MDIO Clock Divisor */
+	pcicore_write32(pc, mdio_control, v);
+
+	if (pc->core->id.rev >= 10) {
+		max_retries = 200;
+		bcma_pcie_mdio_set_phy(pc, device);
+	}
+
+	v = (1 << 30); /* Start of Transaction */
+	v |= (1 << 28); /* Write Transaction */
+	v |= (1 << 17); /* Turnaround */
+	if (pc->core->id.rev < 10)
+		v |= (u32)device << 22;
+	v |= (u32)address << 18;
+	v |= data;
+	pcicore_write32(pc, mdio_data, v);
+	/* Wait for the device to complete the transaction */
+	udelay(10);
+	for (i = 0; i < max_retries; i++) {
+		v = pcicore_read32(pc, mdio_control);
+		if (v & 0x100 /* Trans complete */)
+			break;
+		msleep(1);
+	}
+	pcicore_write32(pc, mdio_control, 0);
+}
+
+/**************************************************
+ * Workarounds.
+ **************************************************/
+
+static u8 bcma_pcicore_polarity_workaround(struct bcma_drv_pci *pc)
+{
+	return (bcma_pcie_read(pc, 0x204) & 0x10) ? 0xC0 : 0x80;
+}
+
+static void bcma_pcicore_serdes_workaround(struct bcma_drv_pci *pc)
+{
+	const u8 serdes_pll_device = 0x1D;
+	const u8 serdes_rx_device = 0x1F;
+	u16 tmp;
+
+	bcma_pcie_mdio_write(pc, serdes_rx_device, 1 /* Control */,
+			      bcma_pcicore_polarity_workaround(pc));
+	tmp = bcma_pcie_mdio_read(pc, serdes_pll_device, 1 /* Control */);
+	if (tmp & 0x4000)
+		bcma_pcie_mdio_write(pc, serdes_pll_device, 1, tmp & ~0x4000);
+}
+
+/**************************************************
+ * Init.
+ **************************************************/
+
+void bcma_core_pci_init(struct bcma_drv_pci *pc)
+{
+	bcma_pcicore_serdes_workaround(pc);
+}
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
new file mode 100644
index 000000000000..99dd36e8500b
--- /dev/null
+++ b/drivers/bcma/host_pci.c
@@ -0,0 +1,196 @@
+/*
+ * Broadcom specific AMBA
+ * PCI Host
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+#include <linux/pci.h>
+
+static void bcma_host_pci_switch_core(struct bcma_device *core)
+{
+	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN,
+			       core->addr);
+	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN2,
+			       core->wrap);
+	core->bus->mapped_core = core;
+	pr_debug("Switched to core: 0x%X\n", core->id.id);
+}
+
+static u8 bcma_host_pci_read8(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread8(core->bus->mmio + offset);
+}
+
+static u16 bcma_host_pci_read16(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread16(core->bus->mmio + offset);
+}
+
+static u32 bcma_host_pci_read32(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread32(core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write8(struct bcma_device *core, u16 offset,
+				 u8 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite8(value, core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write16(struct bcma_device *core, u16 offset,
+				 u16 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite16(value, core->bus->mmio + offset);
+}
+
+static void bcma_host_pci_write32(struct bcma_device *core, u16 offset,
+				 u32 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite32(value, core->bus->mmio + offset);
+}
+
+static u32 bcma_host_pci_aread32(struct bcma_device *core, u16 offset)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	return ioread32(core->bus->mmio + (1 * BCMA_CORE_SIZE) + offset);
+}
+
+static void bcma_host_pci_awrite32(struct bcma_device *core, u16 offset,
+				  u32 value)
+{
+	if (core->bus->mapped_core != core)
+		bcma_host_pci_switch_core(core);
+	iowrite32(value, core->bus->mmio + (1 * BCMA_CORE_SIZE) + offset);
+}
+
+const struct bcma_host_ops bcma_host_pci_ops = {
+	.read8		= bcma_host_pci_read8,
+	.read16		= bcma_host_pci_read16,
+	.read32		= bcma_host_pci_read32,
+	.write8		= bcma_host_pci_write8,
+	.write16	= bcma_host_pci_write16,
+	.write32	= bcma_host_pci_write32,
+	.aread32	= bcma_host_pci_aread32,
+	.awrite32	= bcma_host_pci_awrite32,
+};
+
+static int bcma_host_pci_probe(struct pci_dev *dev,
+			     const struct pci_device_id *id)
+{
+	struct bcma_bus *bus;
+	int err = -ENOMEM;
+	const char *name;
+	u32 val;
+
+	/* Alloc */
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		goto out;
+
+	/* Basic PCI configuration */
+	err = pci_enable_device(dev);
+	if (err)
+		goto err_kfree_bus;
+
+	name = dev_name(&dev->dev);
+	if (dev->driver && dev->driver->name)
+		name = dev->driver->name;
+	err = pci_request_regions(dev, name);
+	if (err)
+		goto err_pci_disable;
+	pci_set_master(dev);
+
+	/* Disable the RETRY_TIMEOUT register (0x41) to keep
+	 * PCI Tx retries from interfering with C3 CPU state */
+	pci_read_config_dword(dev, 0x40, &val);
+	if ((val & 0x0000ff00) != 0)
+		pci_write_config_dword(dev, 0x40, val & 0xffff00ff);
+
+	/* SSB needed additional powering up, do we have any AMBA PCI cards? */
+	if (!pci_is_pcie(dev))
+		pr_err("PCI card detected, report problems.\n");
+
+	/* Map MMIO */
+	err = -ENOMEM;
+	bus->mmio = pci_iomap(dev, 0, ~0UL);
+	if (!bus->mmio)
+		goto err_pci_release_regions;
+
+	/* Host specific */
+	bus->host_pci = dev;
+	bus->hosttype = BCMA_HOSTTYPE_PCI;
+	bus->ops = &bcma_host_pci_ops;
+
+	/* Register */
+	err = bcma_bus_register(bus);
+	if (err)
+		goto err_pci_unmap_mmio;
+
+	pci_set_drvdata(dev, bus);
+
+out:
+	return err;
+
+err_pci_unmap_mmio:
+	pci_iounmap(dev, bus->mmio);
+err_pci_release_regions:
+	pci_release_regions(dev);
+err_pci_disable:
+	pci_disable_device(dev);
+err_kfree_bus:
+	kfree(bus);
+	return err;
+}
+
+static void bcma_host_pci_remove(struct pci_dev *dev)
+{
+	struct bcma_bus *bus = pci_get_drvdata(dev);
+
+	bcma_bus_unregister(bus);
+	pci_iounmap(dev, bus->mmio);
+	pci_release_regions(dev);
+	pci_disable_device(dev);
+	kfree(bus);
+	pci_set_drvdata(dev, NULL);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(bcma_pci_bridge_tbl) = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4331) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4353) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4727) },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, bcma_pci_bridge_tbl);
+
+static struct pci_driver bcma_pci_bridge_driver = {
+	.name = "bcma-pci-bridge",
+	.id_table = bcma_pci_bridge_tbl,
+	.probe = bcma_host_pci_probe,
+	.remove = bcma_host_pci_remove,
+};
+
+int __init bcma_host_pci_init(void)
+{
+	return pci_register_driver(&bcma_pci_bridge_driver);
+}
+
+void __exit bcma_host_pci_exit(void)
+{
+	pci_unregister_driver(&bcma_pci_bridge_driver);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
new file mode 100644
index 000000000000..be52344ed19d
--- /dev/null
+++ b/drivers/bcma/main.c
@@ -0,0 +1,247 @@
+/*
+ * Broadcom specific AMBA
+ * Bus subsystem
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+MODULE_DESCRIPTION("Broadcom's specific AMBA driver");
+MODULE_LICENSE("GPL");
+
+static int bcma_bus_match(struct device *dev, struct device_driver *drv);
+static int bcma_device_probe(struct device *dev);
+static int bcma_device_remove(struct device *dev);
+
+static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%03X\n", core->id.manuf);
+}
+static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%03X\n", core->id.id);
+}
+static ssize_t rev_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%02X\n", core->id.rev);
+}
+static ssize_t class_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	return sprintf(buf, "0x%X\n", core->id.class);
+}
+static struct device_attribute bcma_device_attrs[] = {
+	__ATTR_RO(manuf),
+	__ATTR_RO(id),
+	__ATTR_RO(rev),
+	__ATTR_RO(class),
+	__ATTR_NULL,
+};
+
+static struct bus_type bcma_bus_type = {
+	.name		= "bcma",
+	.match		= bcma_bus_match,
+	.probe		= bcma_device_probe,
+	.remove		= bcma_device_remove,
+	.dev_attrs	= bcma_device_attrs,
+};
+
+static struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid)
+{
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		if (core->id.id == coreid)
+			return core;
+	}
+	return NULL;
+}
+
+static void bcma_release_core_dev(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	kfree(core);
+}
+
+static int bcma_register_cores(struct bcma_bus *bus)
+{
+	struct bcma_device *core;
+	int err, dev_id = 0;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		/* We support that cores ourself */
+		switch (core->id.id) {
+		case BCMA_CORE_CHIPCOMMON:
+		case BCMA_CORE_PCI:
+		case BCMA_CORE_PCIE:
+			continue;
+		}
+
+		core->dev.release = bcma_release_core_dev;
+		core->dev.bus = &bcma_bus_type;
+		dev_set_name(&core->dev, "bcma%d:%d", 0/*bus->num*/, dev_id);
+
+		switch (bus->hosttype) {
+		case BCMA_HOSTTYPE_PCI:
+			core->dev.parent = &bus->host_pci->dev;
+			break;
+		case BCMA_HOSTTYPE_NONE:
+		case BCMA_HOSTTYPE_SDIO:
+			break;
+		}
+
+		err = device_register(&core->dev);
+		if (err) {
+			pr_err("Could not register dev for core 0x%03X\n",
+			       core->id.id);
+			continue;
+		}
+		core->dev_registered = true;
+		dev_id++;
+	}
+
+	return 0;
+}
+
+static void bcma_unregister_cores(struct bcma_bus *bus)
+{
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		if (core->dev_registered)
+			device_unregister(&core->dev);
+	}
+}
+
+int bcma_bus_register(struct bcma_bus *bus)
+{
+	int err;
+	struct bcma_device *core;
+
+	/* Scan for devices (cores) */
+	err = bcma_bus_scan(bus);
+	if (err) {
+		pr_err("Failed to scan: %d\n", err);
+		return -1;
+	}
+
+	/* Init CC core */
+	core = bcma_find_core(bus, BCMA_CORE_CHIPCOMMON);
+	if (core) {
+		bus->drv_cc.core = core;
+		bcma_core_chipcommon_init(&bus->drv_cc);
+	}
+
+	/* Init PCIE core */
+	core = bcma_find_core(bus, BCMA_CORE_PCIE);
+	if (core) {
+		bus->drv_pci.core = core;
+		bcma_core_pci_init(&bus->drv_pci);
+	}
+
+	/* Register found cores */
+	bcma_register_cores(bus);
+
+	pr_info("Bus registered\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcma_bus_register);
+
+void bcma_bus_unregister(struct bcma_bus *bus)
+{
+	bcma_unregister_cores(bus);
+}
+EXPORT_SYMBOL_GPL(bcma_bus_unregister);
+
+int __bcma_driver_register(struct bcma_driver *drv, struct module *owner)
+{
+	drv->drv.name = drv->name;
+	drv->drv.bus = &bcma_bus_type;
+	drv->drv.owner = owner;
+
+	return driver_register(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(__bcma_driver_register);
+
+void bcma_driver_unregister(struct bcma_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(bcma_driver_unregister);
+
+static int bcma_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+	const struct bcma_device_id *cid = &core->id;
+	const struct bcma_device_id *did;
+
+	for (did = adrv->id_table; did->manuf || did->id || did->rev; did++) {
+	    if ((did->manuf == cid->manuf || did->manuf == BCMA_ANY_MANUF) &&
+		(did->id == cid->id || did->id == BCMA_ANY_ID) &&
+		(did->rev == cid->rev || did->rev == BCMA_ANY_REV) &&
+		(did->class == cid->class || did->class == BCMA_ANY_CLASS))
+			return 1;
+	}
+	return 0;
+}
+
+static int bcma_device_probe(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(dev->driver, struct bcma_driver,
+					       drv);
+	int err = 0;
+
+	if (adrv->probe)
+		err = adrv->probe(core);
+
+	return err;
+}
+
+static int bcma_device_remove(struct device *dev)
+{
+	struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+	struct bcma_driver *adrv = container_of(dev->driver, struct bcma_driver,
+					       drv);
+
+	if (adrv->remove)
+		adrv->remove(core);
+
+	return 0;
+}
+
+static int __init bcma_modinit(void)
+{
+	int err;
+
+	err = bus_register(&bcma_bus_type);
+	if (err)
+		return err;
+
+#ifdef CONFIG_BCMA_HOST_PCI
+	err = bcma_host_pci_init();
+	if (err) {
+		pr_err("PCI host initialization failed\n");
+		err = 0;
+	}
+#endif
+
+	return err;
+}
+fs_initcall(bcma_modinit);
+
+static void __exit bcma_modexit(void)
+{
+#ifdef CONFIG_BCMA_HOST_PCI
+	bcma_host_pci_exit();
+#endif
+	bus_unregister(&bcma_bus_type);
+}
+module_exit(bcma_modexit)
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
new file mode 100644
index 000000000000..40d7dcce8933
--- /dev/null
+++ b/drivers/bcma/scan.c
@@ -0,0 +1,360 @@
+/*
+ * Broadcom specific AMBA
+ * Bus scanning
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "scan.h"
+#include "bcma_private.h"
+
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+struct bcma_device_id_name {
+	u16 id;
+	const char *name;
+};
+struct bcma_device_id_name bcma_device_names[] = {
+	{ BCMA_CORE_OOB_ROUTER, "OOB Router" },
+	{ BCMA_CORE_INVALID, "Invalid" },
+	{ BCMA_CORE_CHIPCOMMON, "ChipCommon" },
+	{ BCMA_CORE_ILINE20, "ILine 20" },
+	{ BCMA_CORE_SRAM, "SRAM" },
+	{ BCMA_CORE_SDRAM, "SDRAM" },
+	{ BCMA_CORE_PCI, "PCI" },
+	{ BCMA_CORE_MIPS, "MIPS" },
+	{ BCMA_CORE_ETHERNET, "Fast Ethernet" },
+	{ BCMA_CORE_V90, "V90" },
+	{ BCMA_CORE_USB11_HOSTDEV, "USB 1.1 Hostdev" },
+	{ BCMA_CORE_ADSL, "ADSL" },
+	{ BCMA_CORE_ILINE100, "ILine 100" },
+	{ BCMA_CORE_IPSEC, "IPSEC" },
+	{ BCMA_CORE_UTOPIA, "UTOPIA" },
+	{ BCMA_CORE_PCMCIA, "PCMCIA" },
+	{ BCMA_CORE_INTERNAL_MEM, "Internal Memory" },
+	{ BCMA_CORE_MEMC_SDRAM, "MEMC SDRAM" },
+	{ BCMA_CORE_OFDM, "OFDM" },
+	{ BCMA_CORE_EXTIF, "EXTIF" },
+	{ BCMA_CORE_80211, "IEEE 802.11" },
+	{ BCMA_CORE_PHY_A, "PHY A" },
+	{ BCMA_CORE_PHY_B, "PHY B" },
+	{ BCMA_CORE_PHY_G, "PHY G" },
+	{ BCMA_CORE_MIPS_3302, "MIPS 3302" },
+	{ BCMA_CORE_USB11_HOST, "USB 1.1 Host" },
+	{ BCMA_CORE_USB11_DEV, "USB 1.1 Device" },
+	{ BCMA_CORE_USB20_HOST, "USB 2.0 Host" },
+	{ BCMA_CORE_USB20_DEV, "USB 2.0 Device" },
+	{ BCMA_CORE_SDIO_HOST, "SDIO Host" },
+	{ BCMA_CORE_ROBOSWITCH, "Roboswitch" },
+	{ BCMA_CORE_PARA_ATA, "PATA" },
+	{ BCMA_CORE_SATA_XORDMA, "SATA XOR-DMA" },
+	{ BCMA_CORE_ETHERNET_GBIT, "GBit Ethernet" },
+	{ BCMA_CORE_PCIE, "PCIe" },
+	{ BCMA_CORE_PHY_N, "PHY N" },
+	{ BCMA_CORE_SRAM_CTL, "SRAM Controller" },
+	{ BCMA_CORE_MINI_MACPHY, "Mini MACPHY" },
+	{ BCMA_CORE_ARM_1176, "ARM 1176" },
+	{ BCMA_CORE_ARM_7TDMI, "ARM 7TDMI" },
+	{ BCMA_CORE_PHY_LP, "PHY LP" },
+	{ BCMA_CORE_PMU, "PMU" },
+	{ BCMA_CORE_PHY_SSN, "PHY SSN" },
+	{ BCMA_CORE_SDIO_DEV, "SDIO Device" },
+	{ BCMA_CORE_ARM_CM3, "ARM CM3" },
+	{ BCMA_CORE_PHY_HT, "PHY HT" },
+	{ BCMA_CORE_MIPS_74K, "MIPS 74K" },
+	{ BCMA_CORE_MAC_GBIT, "GBit MAC" },
+	{ BCMA_CORE_DDR12_MEM_CTL, "DDR1/DDR2 Memory Controller" },
+	{ BCMA_CORE_PCIE_RC, "PCIe Root Complex" },
+	{ BCMA_CORE_OCP_OCP_BRIDGE, "OCP to OCP Bridge" },
+	{ BCMA_CORE_SHARED_COMMON, "Common Shared" },
+	{ BCMA_CORE_OCP_AHB_BRIDGE, "OCP to AHB Bridge" },
+	{ BCMA_CORE_SPI_HOST, "SPI Host" },
+	{ BCMA_CORE_I2S, "I2S" },
+	{ BCMA_CORE_SDR_DDR1_MEM_CTL, "SDR/DDR1 Memory Controller" },
+	{ BCMA_CORE_SHIM, "SHIM" },
+	{ BCMA_CORE_DEFAULT, "Default" },
+};
+const char *bcma_device_name(struct bcma_device_id *id)
+{
+	int i;
+
+	if (id->manuf == BCMA_MANUF_BCM) {
+		for (i = 0; i < ARRAY_SIZE(bcma_device_names); i++) {
+			if (bcma_device_names[i].id == id->id)
+				return bcma_device_names[i].name;
+		}
+	}
+	return "UNKNOWN";
+}
+
+static u32 bcma_scan_read32(struct bcma_bus *bus, u8 current_coreidx,
+		       u16 offset)
+{
+	return readl(bus->mmio + offset);
+}
+
+static void bcma_scan_switch_core(struct bcma_bus *bus, u32 addr)
+{
+	if (bus->hosttype == BCMA_HOSTTYPE_PCI)
+		pci_write_config_dword(bus->host_pci, BCMA_PCI_BAR0_WIN,
+				       addr);
+}
+
+static u32 bcma_erom_get_ent(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = readl(*eromptr);
+	(*eromptr)++;
+	return ent;
+}
+
+static void bcma_erom_push_ent(u32 **eromptr)
+{
+	(*eromptr)--;
+}
+
+static s32 bcma_erom_get_ci(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if (!(ent & SCAN_ER_VALID))
+		return -ENOENT;
+	if ((ent & SCAN_ER_TAG) != SCAN_ER_TAG_CI)
+		return -ENOENT;
+	return ent;
+}
+
+static bool bcma_erom_is_end(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	bcma_erom_push_ent(eromptr);
+	return (ent == (SCAN_ER_TAG_END | SCAN_ER_VALID));
+}
+
+static bool bcma_erom_is_bridge(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	bcma_erom_push_ent(eromptr);
+	return (((ent & SCAN_ER_VALID)) &&
+		((ent & SCAN_ER_TAGX) == SCAN_ER_TAG_ADDR) &&
+		((ent & SCAN_ADDR_TYPE) == SCAN_ADDR_TYPE_BRIDGE));
+}
+
+static void bcma_erom_skip_component(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent;
+	while (1) {
+		ent = bcma_erom_get_ent(bus, eromptr);
+		if ((ent & SCAN_ER_VALID) &&
+		    ((ent & SCAN_ER_TAG) == SCAN_ER_TAG_CI))
+			break;
+		if (ent == (SCAN_ER_TAG_END | SCAN_ER_VALID))
+			break;
+	}
+	bcma_erom_push_ent(eromptr);
+}
+
+static s32 bcma_erom_get_mst_port(struct bcma_bus *bus, u32 **eromptr)
+{
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if (!(ent & SCAN_ER_VALID))
+		return -ENOENT;
+	if ((ent & SCAN_ER_TAG) != SCAN_ER_TAG_MP)
+		return -ENOENT;
+	return ent;
+}
+
+static s32 bcma_erom_get_addr_desc(struct bcma_bus *bus, u32 **eromptr,
+				  u32 type, u8 port)
+{
+	u32 addrl, addrh, sizel, sizeh = 0;
+	u32 size;
+
+	u32 ent = bcma_erom_get_ent(bus, eromptr);
+	if ((!(ent & SCAN_ER_VALID)) ||
+	    ((ent & SCAN_ER_TAGX) != SCAN_ER_TAG_ADDR) ||
+	    ((ent & SCAN_ADDR_TYPE) != type) ||
+	    (((ent & SCAN_ADDR_PORT) >> SCAN_ADDR_PORT_SHIFT) != port)) {
+		bcma_erom_push_ent(eromptr);
+		return -EINVAL;
+	}
+
+	addrl = ent & SCAN_ADDR_ADDR;
+	if (ent & SCAN_ADDR_AG32)
+		addrh = bcma_erom_get_ent(bus, eromptr);
+	else
+		addrh = 0;
+
+	if ((ent & SCAN_ADDR_SZ) == SCAN_ADDR_SZ_SZD) {
+		size = bcma_erom_get_ent(bus, eromptr);
+		sizel = size & SCAN_SIZE_SZ;
+		if (size & SCAN_SIZE_SG32)
+			sizeh = bcma_erom_get_ent(bus, eromptr);
+	} else
+		sizel = SCAN_ADDR_SZ_BASE <<
+				((ent & SCAN_ADDR_SZ) >> SCAN_ADDR_SZ_SHIFT);
+
+	return addrl;
+}
+
+int bcma_bus_scan(struct bcma_bus *bus)
+{
+	u32 erombase;
+	u32 __iomem *eromptr, *eromend;
+
+	s32 cia, cib;
+	u8 ports[2], wrappers[2];
+
+	s32 tmp;
+	u8 i, j;
+
+	int err;
+
+	INIT_LIST_HEAD(&bus->cores);
+	bus->nr_cores = 0;
+
+	bcma_scan_switch_core(bus, BCMA_ADDR_BASE);
+
+	tmp = bcma_scan_read32(bus, 0, BCMA_CC_ID);
+	bus->chipinfo.id = (tmp & BCMA_CC_ID_ID) >> BCMA_CC_ID_ID_SHIFT;
+	bus->chipinfo.rev = (tmp & BCMA_CC_ID_REV) >> BCMA_CC_ID_REV_SHIFT;
+	bus->chipinfo.pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
+
+	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
+	eromptr = bus->mmio;
+	eromend = eromptr + BCMA_CORE_SIZE / sizeof(u32);
+
+	bcma_scan_switch_core(bus, erombase);
+
+	while (eromptr < eromend) {
+		struct bcma_device *core = kzalloc(sizeof(*core), GFP_KERNEL);
+		if (!core)
+			return -ENOMEM;
+		INIT_LIST_HEAD(&core->list);
+		core->bus = bus;
+
+		/* get CIs */
+		cia = bcma_erom_get_ci(bus, &eromptr);
+		if (cia < 0) {
+			bcma_erom_push_ent(&eromptr);
+			if (bcma_erom_is_end(bus, &eromptr))
+				break;
+			err= -EILSEQ;
+			goto out;
+		}
+		cib = bcma_erom_get_ci(bus, &eromptr);
+		if (cib < 0) {
+			err= -EILSEQ;
+			goto out;
+		}
+
+		/* parse CIs */
+		core->id.class = (cia & SCAN_CIA_CLASS) >> SCAN_CIA_CLASS_SHIFT;
+		core->id.id = (cia & SCAN_CIA_ID) >> SCAN_CIA_ID_SHIFT;
+		core->id.manuf = (cia & SCAN_CIA_MANUF) >> SCAN_CIA_MANUF_SHIFT;
+		ports[0] = (cib & SCAN_CIB_NMP) >> SCAN_CIB_NMP_SHIFT;
+		ports[1] = (cib & SCAN_CIB_NSP) >> SCAN_CIB_NSP_SHIFT;
+		wrappers[0] = (cib & SCAN_CIB_NMW) >> SCAN_CIB_NMW_SHIFT;
+		wrappers[1] = (cib & SCAN_CIB_NSW) >> SCAN_CIB_NSW_SHIFT;
+		core->id.rev = (cib & SCAN_CIB_REV) >> SCAN_CIB_REV_SHIFT;
+
+		if (((core->id.manuf == BCMA_MANUF_ARM) &&
+		     (core->id.id == 0xFFF)) ||
+		    (ports[1] == 0)) {
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		/* check if component is a core at all */
+		if (wrappers[0] + wrappers[1] == 0) {
+			/* we could save addrl of the router
+			if (cid == BCMA_CORE_OOB_ROUTER)
+			 */
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		if (bcma_erom_is_bridge(bus, &eromptr)) {
+			bcma_erom_skip_component(bus, &eromptr);
+			continue;
+		}
+
+		/* get & parse master ports */
+		for (i = 0; i < ports[0]; i++) {
+			u32 mst_port_d = bcma_erom_get_mst_port(bus, &eromptr);
+			if (mst_port_d < 0) {
+				err= -EILSEQ;
+				goto out;
+			}
+		}
+
+		/* get & parse slave ports */
+		for (i = 0; i < ports[1]; i++) {
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_SLAVE, i);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: slave port %d "
+					 * "has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (i == 0 && j == 0)
+						core->addr = tmp;
+				}
+			}
+		}
+
+		/* get & parse master wrappers */
+		for (i = 0; i < wrappers[0]; i++) {
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_MWRAP, i);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: master wrapper %d "
+					 * "has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (i == 0 && j == 0)
+						core->wrap = tmp;
+				}
+			}
+		}
+
+		/* get & parse slave wrappers */
+		for (i = 0; i < wrappers[1]; i++) {
+			u8 hack = (ports[1] == 1) ? 0 : 1;
+			for (j = 0; ; j++) {
+				tmp = bcma_erom_get_addr_desc(bus, &eromptr,
+					SCAN_ADDR_TYPE_SWRAP, i + hack);
+				if (tmp < 0) {
+					/* no more entries for port _i_ */
+					/* pr_debug("erom: master wrapper %d "
+					 * has %d descriptors\n", i, j); */
+					break;
+				} else {
+					if (wrappers[0] == 0 && !i && !j)
+						core->wrap = tmp;
+				}
+			}
+		}
+
+		pr_info("Core %d found: %s "
+			"(manuf 0x%03X, id 0x%03X, rev 0x%02X, class 0x%X)\n",
+			bus->nr_cores, bcma_device_name(&core->id),
+			core->id.manuf, core->id.id, core->id.rev,
+			core->id.class);
+
+		core->core_index = bus->nr_cores++;
+		list_add(&core->list, &bus->cores);
+		continue;
+out:
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/bcma/scan.h b/drivers/bcma/scan.h
new file mode 100644
index 000000000000..113e6a66884c
--- /dev/null
+++ b/drivers/bcma/scan.h
@@ -0,0 +1,56 @@
+#ifndef BCMA_SCAN_H_
+#define BCMA_SCAN_H_
+
+#define BCMA_ADDR_BASE		0x18000000
+#define BCMA_WRAP_BASE		0x18100000
+
+#define SCAN_ER_VALID		0x00000001
+#define SCAN_ER_TAGX		0x00000006 /* we have to ignore 0x8 bit when checking tag for SCAN_ER_TAG_ADDR */
+#define SCAN_ER_TAG		0x0000000E
+#define  SCAN_ER_TAG_CI		0x00000000
+#define  SCAN_ER_TAG_MP		0x00000002
+#define  SCAN_ER_TAG_ADDR	0x00000004
+#define  SCAN_ER_TAG_END	0x0000000E
+#define SCAN_ER_BAD		0xFFFFFFFF
+
+#define SCAN_CIA_CLASS		0x000000F0
+#define SCAN_CIA_CLASS_SHIFT	4
+#define SCAN_CIA_ID		0x000FFF00
+#define SCAN_CIA_ID_SHIFT	8
+#define SCAN_CIA_MANUF		0xFFF00000
+#define SCAN_CIA_MANUF_SHIFT	20
+
+#define SCAN_CIB_NMP		0x000001F0
+#define SCAN_CIB_NMP_SHIFT	4
+#define SCAN_CIB_NSP		0x00003E00
+#define SCAN_CIB_NSP_SHIFT	9
+#define SCAN_CIB_NMW		0x0007C000
+#define SCAN_CIB_NMW_SHIFT	14
+#define SCAN_CIB_NSW		0x00F80000
+#define SCAN_CIB_NSW_SHIFT	17
+#define SCAN_CIB_REV		0xFF000000
+#define SCAN_CIB_REV_SHIFT	24
+
+#define SCAN_ADDR_AG32		0x00000008
+#define SCAN_ADDR_SZ		0x00000030
+#define SCAN_ADDR_SZ_SHIFT	4
+#define  SCAN_ADDR_SZ_4K	0x00000000
+#define  SCAN_ADDR_SZ_8K	0x00000010
+#define  SCAN_ADDR_SZ_16K	0x00000020
+#define  SCAN_ADDR_SZ_SZD	0x00000030
+#define SCAN_ADDR_TYPE		0x000000C0
+#define  SCAN_ADDR_TYPE_SLAVE	0x00000000
+#define  SCAN_ADDR_TYPE_BRIDGE	0x00000040
+#define  SCAN_ADDR_TYPE_SWRAP	0x00000080
+#define  SCAN_ADDR_TYPE_MWRAP	0x000000C0
+#define SCAN_ADDR_PORT		0x00000F00
+#define SCAN_ADDR_PORT_SHIFT	8
+#define SCAN_ADDR_ADDR		0xFFFFF000
+
+#define SCAN_ADDR_SZ_BASE	0x00001000	/* 4KB */
+
+#define SCAN_SIZE_SZ_ALIGN	0x00000FFF
+#define SCAN_SIZE_SZ		0xFFFFF000
+#define SCAN_SIZE_SG32		0x00000008
+
+#endif /* BCMA_SCAN_H_ */
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
new file mode 100644
index 000000000000..08763e4e848f
--- /dev/null
+++ b/include/linux/bcma/bcma.h
@@ -0,0 +1,224 @@
+#ifndef LINUX_BCMA_H_
+#define LINUX_BCMA_H_
+
+#include <linux/pci.h>
+#include <linux/mod_devicetable.h>
+
+#include <linux/bcma/bcma_driver_chipcommon.h>
+#include <linux/bcma/bcma_driver_pci.h>
+
+#include "bcma_regs.h"
+
+struct bcma_device;
+struct bcma_bus;
+
+enum bcma_hosttype {
+	BCMA_HOSTTYPE_NONE,
+	BCMA_HOSTTYPE_PCI,
+	BCMA_HOSTTYPE_SDIO,
+};
+
+struct bcma_chipinfo {
+	u16 id;
+	u8 rev;
+	u8 pkg;
+};
+
+struct bcma_host_ops {
+	u8 (*read8)(struct bcma_device *core, u16 offset);
+	u16 (*read16)(struct bcma_device *core, u16 offset);
+	u32 (*read32)(struct bcma_device *core, u16 offset);
+	void (*write8)(struct bcma_device *core, u16 offset, u8 value);
+	void (*write16)(struct bcma_device *core, u16 offset, u16 value);
+	void (*write32)(struct bcma_device *core, u16 offset, u32 value);
+	/* Agent ops */
+	u32 (*aread32)(struct bcma_device *core, u16 offset);
+	void (*awrite32)(struct bcma_device *core, u16 offset, u32 value);
+};
+
+/* Core manufacturers */
+#define BCMA_MANUF_ARM			0x43B
+#define BCMA_MANUF_MIPS			0x4A7
+#define BCMA_MANUF_BCM			0x4BF
+
+/* Core class values. */
+#define BCMA_CL_SIM			0x0
+#define BCMA_CL_EROM			0x1
+#define BCMA_CL_CORESIGHT		0x9
+#define BCMA_CL_VERIF			0xB
+#define BCMA_CL_OPTIMO			0xD
+#define BCMA_CL_GEN			0xE
+#define BCMA_CL_PRIMECELL		0xF
+
+/* Core-ID values. */
+#define BCMA_CORE_OOB_ROUTER		0x367	/* Out of band */
+#define BCMA_CORE_INVALID		0x700
+#define BCMA_CORE_CHIPCOMMON		0x800
+#define BCMA_CORE_ILINE20		0x801
+#define BCMA_CORE_SRAM			0x802
+#define BCMA_CORE_SDRAM			0x803
+#define BCMA_CORE_PCI			0x804
+#define BCMA_CORE_MIPS			0x805
+#define BCMA_CORE_ETHERNET		0x806
+#define BCMA_CORE_V90			0x807
+#define BCMA_CORE_USB11_HOSTDEV		0x808
+#define BCMA_CORE_ADSL			0x809
+#define BCMA_CORE_ILINE100		0x80A
+#define BCMA_CORE_IPSEC			0x80B
+#define BCMA_CORE_UTOPIA		0x80C
+#define BCMA_CORE_PCMCIA		0x80D
+#define BCMA_CORE_INTERNAL_MEM		0x80E
+#define BCMA_CORE_MEMC_SDRAM		0x80F
+#define BCMA_CORE_OFDM			0x810
+#define BCMA_CORE_EXTIF			0x811
+#define BCMA_CORE_80211			0x812
+#define BCMA_CORE_PHY_A			0x813
+#define BCMA_CORE_PHY_B			0x814
+#define BCMA_CORE_PHY_G			0x815
+#define BCMA_CORE_MIPS_3302		0x816
+#define BCMA_CORE_USB11_HOST		0x817
+#define BCMA_CORE_USB11_DEV		0x818
+#define BCMA_CORE_USB20_HOST		0x819
+#define BCMA_CORE_USB20_DEV		0x81A
+#define BCMA_CORE_SDIO_HOST		0x81B
+#define BCMA_CORE_ROBOSWITCH		0x81C
+#define BCMA_CORE_PARA_ATA		0x81D
+#define BCMA_CORE_SATA_XORDMA		0x81E
+#define BCMA_CORE_ETHERNET_GBIT		0x81F
+#define BCMA_CORE_PCIE			0x820
+#define BCMA_CORE_PHY_N			0x821
+#define BCMA_CORE_SRAM_CTL		0x822
+#define BCMA_CORE_MINI_MACPHY		0x823
+#define BCMA_CORE_ARM_1176		0x824
+#define BCMA_CORE_ARM_7TDMI		0x825
+#define BCMA_CORE_PHY_LP		0x826
+#define BCMA_CORE_PMU			0x827
+#define BCMA_CORE_PHY_SSN		0x828
+#define BCMA_CORE_SDIO_DEV		0x829
+#define BCMA_CORE_ARM_CM3		0x82A
+#define BCMA_CORE_PHY_HT		0x82B
+#define BCMA_CORE_MIPS_74K		0x82C
+#define BCMA_CORE_MAC_GBIT		0x82D
+#define BCMA_CORE_DDR12_MEM_CTL		0x82E
+#define BCMA_CORE_PCIE_RC		0x82F	/* PCIe Root Complex */
+#define BCMA_CORE_OCP_OCP_BRIDGE	0x830
+#define BCMA_CORE_SHARED_COMMON		0x831
+#define BCMA_CORE_OCP_AHB_BRIDGE	0x832
+#define BCMA_CORE_SPI_HOST		0x833
+#define BCMA_CORE_I2S			0x834
+#define BCMA_CORE_SDR_DDR1_MEM_CTL	0x835	/* SDR/DDR1 memory controller core */
+#define BCMA_CORE_SHIM			0x837	/* SHIM component in ubus/6362 */
+#define BCMA_CORE_DEFAULT		0xFFF
+
+#define BCMA_MAX_NR_CORES		16
+
+struct bcma_device {
+	struct bcma_bus *bus;
+	struct bcma_device_id id;
+
+	struct device dev;
+	bool dev_registered;
+
+	u8 core_index;
+
+	u32 addr;
+	u32 wrap;
+
+	void *drvdata;
+	struct list_head list;
+};
+
+static inline void *bcma_get_drvdata(struct bcma_device *core)
+{
+	return core->drvdata;
+}
+static inline void bcma_set_drvdata(struct bcma_device *core, void *drvdata)
+{
+	core->drvdata = drvdata;
+}
+
+struct bcma_driver {
+	const char *name;
+	const struct bcma_device_id *id_table;
+
+	int (*probe)(struct bcma_device *dev);
+	void (*remove)(struct bcma_device *dev);
+	int (*suspend)(struct bcma_device *dev, pm_message_t state);
+	int (*resume)(struct bcma_device *dev);
+	void (*shutdown)(struct bcma_device *dev);
+
+	struct device_driver drv;
+};
+extern
+int __bcma_driver_register(struct bcma_driver *drv, struct module *owner);
+static inline int bcma_driver_register(struct bcma_driver *drv)
+{
+	return __bcma_driver_register(drv, THIS_MODULE);
+}
+extern void bcma_driver_unregister(struct bcma_driver *drv);
+
+struct bcma_bus {
+	/* The MMIO area. */
+	void __iomem *mmio;
+
+	const struct bcma_host_ops *ops;
+
+	enum bcma_hosttype hosttype;
+	union {
+		/* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */
+		struct pci_dev *host_pci;
+		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
+		struct sdio_func *host_sdio;
+	};
+
+	struct bcma_chipinfo chipinfo;
+
+	struct bcma_device *mapped_core;
+	struct list_head cores;
+	u8 nr_cores;
+
+	struct bcma_drv_cc drv_cc;
+	struct bcma_drv_pci drv_pci;
+};
+
+extern inline u32 bcma_read8(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read8(core, offset);
+}
+extern inline u32 bcma_read16(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read16(core, offset);
+}
+extern inline u32 bcma_read32(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->read32(core, offset);
+}
+extern inline
+void bcma_write8(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write8(core, offset, value);
+}
+extern inline
+void bcma_write16(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write16(core, offset, value);
+}
+extern inline
+void bcma_write32(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->write32(core, offset, value);
+}
+extern inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
+{
+	return core->bus->ops->aread32(core, offset);
+}
+extern inline
+void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
+{
+	core->bus->ops->awrite32(core, offset, value);
+}
+
+extern bool bcma_core_is_enabled(struct bcma_device *core);
+extern int bcma_core_enable(struct bcma_device *core, u32 flags);
+
+#endif /* LINUX_BCMA_H_ */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
new file mode 100644
index 000000000000..4f8fd6a4c1e6
--- /dev/null
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -0,0 +1,297 @@
+#ifndef LINUX_BCMA_DRIVER_CC_H_
+#define LINUX_BCMA_DRIVER_CC_H_
+
+/** ChipCommon core registers. **/
+#define BCMA_CC_ID			0x0000
+#define  BCMA_CC_ID_ID			0x0000FFFF
+#define  BCMA_CC_ID_ID_SHIFT		0
+#define  BCMA_CC_ID_REV			0x000F0000
+#define  BCMA_CC_ID_REV_SHIFT		16
+#define  BCMA_CC_ID_PKG			0x00F00000
+#define  BCMA_CC_ID_PKG_SHIFT		20
+#define  BCMA_CC_ID_NRCORES		0x0F000000
+#define  BCMA_CC_ID_NRCORES_SHIFT	24
+#define  BCMA_CC_ID_TYPE		0xF0000000
+#define  BCMA_CC_ID_TYPE_SHIFT		28
+#define BCMA_CC_CAP			0x0004		/* Capabilities */
+#define  BCMA_CC_CAP_NRUART		0x00000003	/* # of UARTs */
+#define  BCMA_CC_CAP_MIPSEB		0x00000004	/* MIPS in BigEndian Mode */
+#define  BCMA_CC_CAP_UARTCLK		0x00000018	/* UART clock select */
+#define   BCMA_CC_CAP_UARTCLK_INT	0x00000008	/* UARTs are driven by internal divided clock */
+#define  BCMA_CC_CAP_UARTGPIO		0x00000020	/* UARTs on GPIO 15-12 */
+#define  BCMA_CC_CAP_EXTBUS		0x000000C0	/* External buses present */
+#define  BCMA_CC_CAP_FLASHT		0x00000700	/* Flash Type */
+#define   BCMA_CC_FLASHT_NONE		0x00000000	/* No flash */
+#define   BCMA_CC_FLASHT_STSER		0x00000100	/* ST serial flash */
+#define   BCMA_CC_FLASHT_ATSER		0x00000200	/* Atmel serial flash */
+#define	  BCMA_CC_FLASHT_PARA		0x00000700	/* Parallel flash */
+#define  BCMA_CC_CAP_PLLT		0x00038000	/* PLL Type */
+#define   BCMA_PLLTYPE_NONE		0x00000000
+#define   BCMA_PLLTYPE_1		0x00010000	/* 48Mhz base, 3 dividers */
+#define   BCMA_PLLTYPE_2		0x00020000	/* 48Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_3		0x00030000	/* 25Mhz, 2 dividers */
+#define   BCMA_PLLTYPE_4		0x00008000	/* 48Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_5		0x00018000	/* 25Mhz, 4 dividers */
+#define   BCMA_PLLTYPE_6		0x00028000	/* 100/200 or 120/240 only */
+#define   BCMA_PLLTYPE_7		0x00038000	/* 25Mhz, 4 dividers */
+#define  BCMA_CC_CAP_PCTL		0x00040000	/* Power Control */
+#define  BCMA_CC_CAP_OTPS		0x00380000	/* OTP size */
+#define  BCMA_CC_CAP_OTPS_SHIFT		19
+#define  BCMA_CC_CAP_OTPS_BASE		5
+#define  BCMA_CC_CAP_JTAGM		0x00400000	/* JTAG master present */
+#define  BCMA_CC_CAP_BROM		0x00800000	/* Internal boot ROM active */
+#define  BCMA_CC_CAP_64BIT		0x08000000	/* 64-bit Backplane */
+#define  BCMA_CC_CAP_PMU		0x10000000	/* PMU available (rev >= 20) */
+#define  BCMA_CC_CAP_ECI		0x20000000	/* ECI available (rev >= 20) */
+#define  BCMA_CC_CAP_SPROM		0x40000000	/* SPROM present */
+#define BCMA_CC_CORECTL			0x0008
+#define  BCMA_CC_CORECTL_UARTCLK0	0x00000001	/* Drive UART with internal clock */
+#define	 BCMA_CC_CORECTL_SE		0x00000002	/* sync clk out enable (corerev >= 3) */
+#define  BCMA_CC_CORECTL_UARTCLKEN	0x00000008	/* UART clock enable (rev >= 21) */
+#define BCMA_CC_BIST			0x000C
+#define BCMA_CC_OTPS			0x0010		/* OTP status */
+#define	 BCMA_CC_OTPS_PROGFAIL		0x80000000
+#define	 BCMA_CC_OTPS_PROTECT		0x00000007
+#define	 BCMA_CC_OTPS_HW_PROTECT	0x00000001
+#define	 BCMA_CC_OTPS_SW_PROTECT	0x00000002
+#define	 BCMA_CC_OTPS_CID_PROTECT	0x00000004
+#define BCMA_CC_OTPC			0x0014		/* OTP control */
+#define	 BCMA_CC_OTPC_RECWAIT		0xFF000000
+#define	 BCMA_CC_OTPC_PROGWAIT		0x00FFFF00
+#define	 BCMA_CC_OTPC_PRW_SHIFT		8
+#define	 BCMA_CC_OTPC_MAXFAIL		0x00000038
+#define	 BCMA_CC_OTPC_VSEL		0x00000006
+#define	 BCMA_CC_OTPC_SELVL		0x00000001
+#define BCMA_CC_OTPP			0x0018		/* OTP prog */
+#define	 BCMA_CC_OTPP_COL		0x000000FF
+#define	 BCMA_CC_OTPP_ROW		0x0000FF00
+#define	 BCMA_CC_OTPP_ROW_SHIFT		8
+#define	 BCMA_CC_OTPP_READERR		0x10000000
+#define	 BCMA_CC_OTPP_VALUE		0x20000000
+#define	 BCMA_CC_OTPP_READ		0x40000000
+#define	 BCMA_CC_OTPP_START		0x80000000
+#define	 BCMA_CC_OTPP_BUSY		0x80000000
+#define BCMA_CC_IRQSTAT			0x0020
+#define BCMA_CC_IRQMASK			0x0024
+#define	 BCMA_CC_IRQ_GPIO		0x00000001	/* gpio intr */
+#define	 BCMA_CC_IRQ_EXT		0x00000002	/* ro: ext intr pin (corerev >= 3) */
+#define	 BCMA_CC_IRQ_WDRESET		0x80000000	/* watchdog reset occurred */
+#define BCMA_CC_CHIPCTL			0x0028		/* Rev >= 11 only */
+#define BCMA_CC_CHIPSTAT		0x002C		/* Rev >= 11 only */
+#define BCMA_CC_JCMD			0x0030		/* Rev >= 10 only */
+#define  BCMA_CC_JCMD_START		0x80000000
+#define  BCMA_CC_JCMD_BUSY		0x80000000
+#define  BCMA_CC_JCMD_PAUSE		0x40000000
+#define  BCMA_CC_JCMD0_ACC_MASK		0x0000F000
+#define  BCMA_CC_JCMD0_ACC_IRDR		0x00000000
+#define  BCMA_CC_JCMD0_ACC_DR		0x00001000
+#define  BCMA_CC_JCMD0_ACC_IR		0x00002000
+#define  BCMA_CC_JCMD0_ACC_RESET	0x00003000
+#define  BCMA_CC_JCMD0_ACC_IRPDR	0x00004000
+#define  BCMA_CC_JCMD0_ACC_PDR		0x00005000
+#define  BCMA_CC_JCMD0_IRW_MASK		0x00000F00
+#define  BCMA_CC_JCMD_ACC_MASK		0x000F0000	/* Changes for corerev 11 */
+#define  BCMA_CC_JCMD_ACC_IRDR		0x00000000
+#define  BCMA_CC_JCMD_ACC_DR		0x00010000
+#define  BCMA_CC_JCMD_ACC_IR		0x00020000
+#define  BCMA_CC_JCMD_ACC_RESET		0x00030000
+#define  BCMA_CC_JCMD_ACC_IRPDR		0x00040000
+#define  BCMA_CC_JCMD_ACC_PDR		0x00050000
+#define  BCMA_CC_JCMD_IRW_MASK		0x00001F00
+#define  BCMA_CC_JCMD_IRW_SHIFT		8
+#define  BCMA_CC_JCMD_DRW_MASK		0x0000003F
+#define BCMA_CC_JIR			0x0034		/* Rev >= 10 only */
+#define BCMA_CC_JDR			0x0038		/* Rev >= 10 only */
+#define BCMA_CC_JCTL			0x003C		/* Rev >= 10 only */
+#define  BCMA_CC_JCTL_FORCE_CLK		4		/* Force clock */
+#define  BCMA_CC_JCTL_EXT_EN		2		/* Enable external targets */
+#define  BCMA_CC_JCTL_EN		1		/* Enable Jtag master */
+#define BCMA_CC_FLASHCTL		0x0040
+#define  BCMA_CC_FLASHCTL_START		0x80000000
+#define  BCMA_CC_FLASHCTL_BUSY		BCMA_CC_FLASHCTL_START
+#define BCMA_CC_FLASHADDR		0x0044
+#define BCMA_CC_FLASHDATA		0x0048
+#define BCMA_CC_BCAST_ADDR		0x0050
+#define BCMA_CC_BCAST_DATA		0x0054
+#define BCMA_CC_GPIOIN			0x0060
+#define BCMA_CC_GPIOOUT			0x0064
+#define BCMA_CC_GPIOOUTEN		0x0068
+#define BCMA_CC_GPIOCTL			0x006C
+#define BCMA_CC_GPIOPOL			0x0070
+#define BCMA_CC_GPIOIRQ			0x0074
+#define BCMA_CC_WATCHDOG		0x0080
+#define BCMA_CC_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  BCMA_CC_GPIOTIMER_ONTIME_SHIFT	16
+#define BCMA_CC_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
+#define BCMA_CC_CLOCK_N			0x0090
+#define BCMA_CC_CLOCK_SB		0x0094
+#define BCMA_CC_CLOCK_PCI		0x0098
+#define BCMA_CC_CLOCK_M2		0x009C
+#define BCMA_CC_CLOCK_MIPS		0x00A0
+#define BCMA_CC_CLKDIV			0x00A4		/* Rev >= 3 only */
+#define	 BCMA_CC_CLKDIV_SFLASH		0x0F000000
+#define	 BCMA_CC_CLKDIV_SFLASH_SHIFT	24
+#define	 BCMA_CC_CLKDIV_OTP		0x000F0000
+#define	 BCMA_CC_CLKDIV_OTP_SHIFT	16
+#define	 BCMA_CC_CLKDIV_JTAG		0x00000F00
+#define	 BCMA_CC_CLKDIV_JTAG_SHIFT	8
+#define	 BCMA_CC_CLKDIV_UART		0x000000FF
+#define BCMA_CC_CAP_EXT			0x00AC		/* Capabilities */
+#define BCMA_CC_PLLONDELAY		0x00B0		/* Rev >= 4 only */
+#define BCMA_CC_FREFSELDELAY		0x00B4		/* Rev >= 4 only */
+#define BCMA_CC_SLOWCLKCTL		0x00B8		/* 6 <= Rev <= 9 only */
+#define  BCMA_CC_SLOWCLKCTL_SRC		0x00000007	/* slow clock source mask */
+#define	  BCMA_CC_SLOWCLKCTL_SRC_LPO	0x00000000	/* source of slow clock is LPO */
+#define   BCMA_CC_SLOWCLKCTL_SRC_XTAL	0x00000001	/* source of slow clock is crystal */
+#define	  BCMA_CC_SLOECLKCTL_SRC_PCI	0x00000002	/* source of slow clock is PCI */
+#define  BCMA_CC_SLOWCLKCTL_LPOFREQ	0x00000200	/* LPOFreqSel, 1: 160Khz, 0: 32KHz */
+#define  BCMA_CC_SLOWCLKCTL_LPOPD	0x00000400	/* LPOPowerDown, 1: LPO is disabled, 0: LPO is enabled */
+#define  BCMA_CC_SLOWCLKCTL_FSLOW	0x00000800	/* ForceSlowClk, 1: sb/cores running on slow clock, 0: power logic control */
+#define  BCMA_CC_SLOWCLKCTL_IPLL	0x00001000	/* IgnorePllOffReq, 1/0: power logic ignores/honors PLL clock disable requests from core */
+#define  BCMA_CC_SLOWCLKCTL_ENXTAL	0x00002000	/* XtalControlEn, 1/0: power logic does/doesn't disable crystal when appropriate */
+#define  BCMA_CC_SLOWCLKCTL_XTALPU	0x00004000	/* XtalPU (RO), 1/0: crystal running/disabled */
+#define  BCMA_CC_SLOWCLKCTL_CLKDIV	0xFFFF0000	/* ClockDivider (SlowClk = 1/(4+divisor)) */
+#define  BCMA_CC_SLOWCLKCTL_CLKDIV_SHIFT	16
+#define BCMA_CC_SYSCLKCTL		0x00C0		/* Rev >= 3 only */
+#define	 BCMA_CC_SYSCLKCTL_IDLPEN	0x00000001	/* ILPen: Enable Idle Low Power */
+#define	 BCMA_CC_SYSCLKCTL_ALPEN	0x00000002	/* ALPen: Enable Active Low Power */
+#define	 BCMA_CC_SYSCLKCTL_PLLEN	0x00000004	/* ForcePLLOn */
+#define	 BCMA_CC_SYSCLKCTL_FORCEALP	0x00000008	/* Force ALP (or HT if ALPen is not set */
+#define	 BCMA_CC_SYSCLKCTL_FORCEHT	0x00000010	/* Force HT */
+#define  BCMA_CC_SYSCLKCTL_CLKDIV	0xFFFF0000	/* ClkDiv  (ILP = 1/(4+divisor)) */
+#define  BCMA_CC_SYSCLKCTL_CLKDIV_SHIFT	16
+#define BCMA_CC_CLKSTSTR		0x00C4		/* Rev >= 3 only */
+#define BCMA_CC_EROM			0x00FC
+#define BCMA_CC_PCMCIA_CFG		0x0100
+#define BCMA_CC_PCMCIA_MEMWAIT		0x0104
+#define BCMA_CC_PCMCIA_ATTRWAIT		0x0108
+#define BCMA_CC_PCMCIA_IOWAIT		0x010C
+#define BCMA_CC_IDE_CFG			0x0110
+#define BCMA_CC_IDE_MEMWAIT		0x0114
+#define BCMA_CC_IDE_ATTRWAIT		0x0118
+#define BCMA_CC_IDE_IOWAIT		0x011C
+#define BCMA_CC_PROG_CFG		0x0120
+#define BCMA_CC_PROG_WAITCNT		0x0124
+#define BCMA_CC_FLASH_CFG		0x0128
+#define BCMA_CC_FLASH_WAITCNT		0x012C
+#define BCMA_CC_CLKCTLST		0x01E0 /* Clock control and status (rev >= 20) */
+#define  BCMA_CC_CLKCTLST_FORCEALP	0x00000001 /* Force ALP request */
+#define  BCMA_CC_CLKCTLST_FORCEHT	0x00000002 /* Force HT request */
+#define  BCMA_CC_CLKCTLST_FORCEILP	0x00000004 /* Force ILP request */
+#define  BCMA_CC_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
+#define  BCMA_CC_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
+#define  BCMA_CC_CLKCTLST_HWCROFF	0x00000020 /* Force HW clock request off */
+#define  BCMA_CC_CLKCTLST_HAVEHT	0x00010000 /* HT available */
+#define  BCMA_CC_CLKCTLST_HAVEALP	0x00020000 /* APL available */
+#define BCMA_CC_HW_WORKAROUND		0x01E4 /* Hardware workaround (rev >= 20) */
+#define BCMA_CC_UART0_DATA		0x0300
+#define BCMA_CC_UART0_IMR		0x0304
+#define BCMA_CC_UART0_FCR		0x0308
+#define BCMA_CC_UART0_LCR		0x030C
+#define BCMA_CC_UART0_MCR		0x0310
+#define BCMA_CC_UART0_LSR		0x0314
+#define BCMA_CC_UART0_MSR		0x0318
+#define BCMA_CC_UART0_SCRATCH		0x031C
+#define BCMA_CC_UART1_DATA		0x0400
+#define BCMA_CC_UART1_IMR		0x0404
+#define BCMA_CC_UART1_FCR		0x0408
+#define BCMA_CC_UART1_LCR		0x040C
+#define BCMA_CC_UART1_MCR		0x0410
+#define BCMA_CC_UART1_LSR		0x0414
+#define BCMA_CC_UART1_MSR		0x0418
+#define BCMA_CC_UART1_SCRATCH		0x041C
+/* PMU registers (rev >= 20) */
+#define BCMA_CC_PMU_CTL			0x0600 /* PMU control */
+#define  BCMA_CC_PMU_CTL_ILP_DIV	0xFFFF0000 /* ILP div mask */
+#define  BCMA_CC_PMU_CTL_ILP_DIV_SHIFT	16
+#define  BCMA_CC_PMU_CTL_NOILPONW	0x00000200 /* No ILP on wait */
+#define  BCMA_CC_PMU_CTL_HTREQEN	0x00000100 /* HT req enable */
+#define  BCMA_CC_PMU_CTL_ALPREQEN	0x00000080 /* ALP req enable */
+#define  BCMA_CC_PMU_CTL_XTALFREQ	0x0000007C /* Crystal freq */
+#define  BCMA_CC_PMU_CTL_XTALFREQ_SHIFT	2
+#define  BCMA_CC_PMU_CTL_ILPDIVEN	0x00000002 /* ILP div enable */
+#define  BCMA_CC_PMU_CTL_LPOSEL		0x00000001 /* LPO sel */
+#define BCMA_CC_PMU_CAP			0x0604 /* PMU capabilities */
+#define  BCMA_CC_PMU_CAP_REVISION	0x000000FF /* Revision mask */
+#define BCMA_CC_PMU_STAT		0x0608 /* PMU status */
+#define  BCMA_CC_PMU_STAT_INTPEND	0x00000040 /* Interrupt pending */
+#define  BCMA_CC_PMU_STAT_SBCLKST	0x00000030 /* Backplane clock status? */
+#define  BCMA_CC_PMU_STAT_HAVEALP	0x00000008 /* ALP available */
+#define  BCMA_CC_PMU_STAT_HAVEHT	0x00000004 /* HT available */
+#define  BCMA_CC_PMU_STAT_RESINIT	0x00000003 /* Res init */
+#define BCMA_CC_PMU_RES_STAT		0x060C /* PMU res status */
+#define BCMA_CC_PMU_RES_PEND		0x0610 /* PMU res pending */
+#define BCMA_CC_PMU_TIMER		0x0614 /* PMU timer */
+#define BCMA_CC_PMU_MINRES_MSK		0x0618 /* PMU min res mask */
+#define BCMA_CC_PMU_MAXRES_MSK		0x061C /* PMU max res mask */
+#define BCMA_CC_PMU_RES_TABSEL		0x0620 /* PMU res table sel */
+#define BCMA_CC_PMU_RES_DEPMSK		0x0624 /* PMU res dep mask */
+#define BCMA_CC_PMU_RES_UPDNTM		0x0628 /* PMU res updown timer */
+#define BCMA_CC_PMU_RES_TIMER		0x062C /* PMU res timer */
+#define BCMA_CC_PMU_CLKSTRETCH		0x0630 /* PMU clockstretch */
+#define BCMA_CC_PMU_WATCHDOG		0x0634 /* PMU watchdog */
+#define BCMA_CC_PMU_RES_REQTS		0x0640 /* PMU res req timer sel */
+#define BCMA_CC_PMU_RES_REQT		0x0644 /* PMU res req timer */
+#define BCMA_CC_PMU_RES_REQM		0x0648 /* PMU res req mask */
+#define BCMA_CC_CHIPCTL_ADDR		0x0650
+#define BCMA_CC_CHIPCTL_DATA		0x0654
+#define BCMA_CC_REGCTL_ADDR		0x0658
+#define BCMA_CC_REGCTL_DATA		0x065C
+#define BCMA_CC_PLLCTL_ADDR		0x0660
+#define BCMA_CC_PLLCTL_DATA		0x0664
+
+/* Data for the PMU, if available.
+ * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU)
+ */
+struct bcma_chipcommon_pmu {
+	u8 rev;			/* PMU revision */
+	u32 crystalfreq;	/* The active crystal frequency (in kHz) */
+};
+
+struct bcma_drv_cc {
+	struct bcma_device *core;
+	u32 status;
+	u32 capabilities;
+	u32 capabilities_ext;
+	/* Fast Powerup Delay constant */
+	u16 fast_pwrup_delay;
+	struct bcma_chipcommon_pmu pmu;
+};
+
+/* Register access */
+#define bcma_cc_read32(cc, offset) \
+	bcma_read32((cc)->core, offset)
+#define bcma_cc_write32(cc, offset, val) \
+	bcma_write32((cc)->core, offset, val)
+
+#define bcma_cc_mask32(cc, offset, mask) \
+	bcma_cc_write32(cc, offset, bcma_cc_read32(cc, offset) & (mask))
+#define bcma_cc_set32(cc, offset, set) \
+	bcma_cc_write32(cc, offset, bcma_cc_read32(cc, offset) | (set))
+#define bcma_cc_maskset32(cc, offset, mask, set) \
+	bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set))
+
+extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
+
+extern void bcma_chipco_suspend(struct bcma_drv_cc *cc);
+extern void bcma_chipco_resume(struct bcma_drv_cc *cc);
+
+extern void bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc,
+					  u32 ticks);
+
+void bcma_chipco_irq_mask(struct bcma_drv_cc *cc, u32 mask, u32 value);
+
+u32 bcma_chipco_irq_status(struct bcma_drv_cc *cc, u32 mask);
+
+/* Chipcommon GPIO pin access. */
+u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask);
+u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value);
+
+/* PMU support */
+extern void bcma_pmu_init(struct bcma_drv_cc *cc);
+
+#endif /* LINUX_BCMA_DRIVER_CC_H_ */
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
new file mode 100644
index 000000000000..b7e191cf00ec
--- /dev/null
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -0,0 +1,89 @@
+#ifndef LINUX_BCMA_DRIVER_PCI_H_
+#define LINUX_BCMA_DRIVER_PCI_H_
+
+#include <linux/types.h>
+
+struct pci_dev;
+
+/** PCI core registers. **/
+#define BCMA_CORE_PCI_CTL			0x0000	/* PCI Control */
+#define  BCMA_CORE_PCI_CTL_RST_OE		0x00000001 /* PCI_RESET Output Enable */
+#define  BCMA_CORE_PCI_CTL_RST			0x00000002 /* PCI_RESET driven out to pin */
+#define  BCMA_CORE_PCI_CTL_CLK_OE		0x00000004 /* Clock gate Output Enable */
+#define  BCMA_CORE_PCI_CTL_CLK			0x00000008 /* Gate for clock driven out to pin */
+#define BCMA_CORE_PCI_ARBCTL			0x0010	/* PCI Arbiter Control */
+#define  BCMA_CORE_PCI_ARBCTL_INTERN		0x00000001 /* Use internal arbiter */
+#define  BCMA_CORE_PCI_ARBCTL_EXTERN		0x00000002 /* Use external arbiter */
+#define  BCMA_CORE_PCI_ARBCTL_PARKID		0x00000006 /* Mask, selects which agent is parked on an idle bus */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_LAST	0x00000000 /* Last requestor */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_4710	0x00000002 /* 4710 */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_EXT0	0x00000004 /* External requestor 0 */
+#define   BCMA_CORE_PCI_ARBCTL_PARKID_EXT1	0x00000006 /* External requestor 1 */
+#define BCMA_CORE_PCI_ISTAT			0x0020	/* Interrupt status */
+#define  BCMA_CORE_PCI_ISTAT_INTA		0x00000001 /* PCI INTA# */
+#define  BCMA_CORE_PCI_ISTAT_INTB		0x00000002 /* PCI INTB# */
+#define  BCMA_CORE_PCI_ISTAT_SERR		0x00000004 /* PCI SERR# (write to clear) */
+#define  BCMA_CORE_PCI_ISTAT_PERR		0x00000008 /* PCI PERR# (write to clear) */
+#define  BCMA_CORE_PCI_ISTAT_PME		0x00000010 /* PCI PME# */
+#define BCMA_CORE_PCI_IMASK			0x0024	/* Interrupt mask */
+#define  BCMA_CORE_PCI_IMASK_INTA		0x00000001 /* PCI INTA# */
+#define  BCMA_CORE_PCI_IMASK_INTB		0x00000002 /* PCI INTB# */
+#define  BCMA_CORE_PCI_IMASK_SERR		0x00000004 /* PCI SERR# */
+#define  BCMA_CORE_PCI_IMASK_PERR		0x00000008 /* PCI PERR# */
+#define  BCMA_CORE_PCI_IMASK_PME		0x00000010 /* PCI PME# */
+#define BCMA_CORE_PCI_MBOX			0x0028	/* Backplane to PCI Mailbox */
+#define  BCMA_CORE_PCI_MBOX_F0_0		0x00000100 /* PCI function 0, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F0_1		0x00000200 /* PCI function 0, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F1_0		0x00000400 /* PCI function 1, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F1_1		0x00000800 /* PCI function 1, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F2_0		0x00001000 /* PCI function 2, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F2_1		0x00002000 /* PCI function 2, INT 1 */
+#define  BCMA_CORE_PCI_MBOX_F3_0		0x00004000 /* PCI function 3, INT 0 */
+#define  BCMA_CORE_PCI_MBOX_F3_1		0x00008000 /* PCI function 3, INT 1 */
+#define BCMA_CORE_PCI_BCAST_ADDR		0x0050	/* Backplane Broadcast Address */
+#define  BCMA_CORE_PCI_BCAST_ADDR_MASK		0x000000FF
+#define BCMA_CORE_PCI_BCAST_DATA		0x0054	/* Backplane Broadcast Data */
+#define BCMA_CORE_PCI_GPIO_IN			0x0060	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_OUT			0x0064	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_ENABLE		0x0068	/* rev >= 2 only */
+#define BCMA_CORE_PCI_GPIO_CTL			0x006C	/* rev >= 2 only */
+#define BCMA_CORE_PCI_SBTOPCI0			0x0100	/* Backplane to PCI translation 0 (sbtopci0) */
+#define  BCMA_CORE_PCI_SBTOPCI0_MASK		0xFC000000
+#define BCMA_CORE_PCI_SBTOPCI1			0x0104	/* Backplane to PCI translation 1 (sbtopci1) */
+#define  BCMA_CORE_PCI_SBTOPCI1_MASK		0xFC000000
+#define BCMA_CORE_PCI_SBTOPCI2			0x0108	/* Backplane to PCI translation 2 (sbtopci2) */
+#define  BCMA_CORE_PCI_SBTOPCI2_MASK		0xC0000000
+#define BCMA_CORE_PCI_PCICFG0			0x0400	/* PCI config space 0 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG1			0x0500	/* PCI config space 1 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG2			0x0600	/* PCI config space 2 (rev >= 8) */
+#define BCMA_CORE_PCI_PCICFG3			0x0700	/* PCI config space 3 (rev >= 8) */
+#define BCMA_CORE_PCI_SPROM(wordoffset)		(0x0800 + ((wordoffset) * 2)) /* SPROM shadow area (72 bytes) */
+
+/* SBtoPCIx */
+#define BCMA_CORE_PCI_SBTOPCI_MEM		0x00000000
+#define BCMA_CORE_PCI_SBTOPCI_IO		0x00000001
+#define BCMA_CORE_PCI_SBTOPCI_CFG0		0x00000002
+#define BCMA_CORE_PCI_SBTOPCI_CFG1		0x00000003
+#define BCMA_CORE_PCI_SBTOPCI_PREF		0x00000004 /* Prefetch enable */
+#define BCMA_CORE_PCI_SBTOPCI_BURST		0x00000008 /* Burst enable */
+#define BCMA_CORE_PCI_SBTOPCI_MRM		0x00000020 /* Memory Read Multiple */
+#define BCMA_CORE_PCI_SBTOPCI_RC		0x00000030 /* Read Command mask (rev >= 11) */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READ		0x00000000 /* Memory read */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READL		0x00000010 /* Memory read line */
+#define  BCMA_CORE_PCI_SBTOPCI_RC_READM		0x00000020 /* Memory read multiple */
+
+/* PCIcore specific boardflags */
+#define BCMA_CORE_PCI_BFL_NOPCI			0x00000400 /* Board leaves PCI floating */
+
+struct bcma_drv_pci {
+	struct bcma_device *core;
+	u8 setup_done:1;
+};
+
+/* Register access */
+#define pcicore_read32(pc, offset)		bcma_read32((pc)->core, offset)
+#define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
+
+extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
+
+#endif /* LINUX_BCMA_DRIVER_PCI_H_ */
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
new file mode 100644
index 000000000000..f82d88a960ce
--- /dev/null
+++ b/include/linux/bcma/bcma_regs.h
@@ -0,0 +1,34 @@
+#ifndef LINUX_BCMA_REGS_H_
+#define LINUX_BCMA_REGS_H_
+
+/* Agent registers (common for every core) */
+#define BCMA_IOCTL			0x0408
+#define  BCMA_IOCTL_CLK			0x0001
+#define  BCMA_IOCTL_FGC			0x0002
+#define  BCMA_IOCTL_CORE_BITS		0x3FFC
+#define  BCMA_IOCTL_PME_EN		0x4000
+#define  BCMA_IOCTL_BIST_EN		0x8000
+#define BCMA_RESET_CTL			0x0800
+#define  BCMA_RESET_CTL_RESET		0x0001
+
+/* BCMA PCI config space registers. */
+#define BCMA_PCI_PMCSR			0x44
+#define  BCMA_PCI_PE			0x100
+#define BCMA_PCI_BAR0_WIN		0x80	/* Backplane address space 0 */
+#define BCMA_PCI_BAR1_WIN		0x84	/* Backplane address space 1 */
+#define BCMA_PCI_SPROMCTL		0x88	/* SPROM control */
+#define  BCMA_PCI_SPROMCTL_WE		0x10	/* SPROM write enable */
+#define BCMA_PCI_BAR1_CONTROL		0x8c	/* Address space 1 burst control */
+#define BCMA_PCI_IRQS			0x90	/* PCI interrupts */
+#define BCMA_PCI_IRQMASK		0x94	/* PCI IRQ control and mask (pcirev >= 6 only) */
+#define BCMA_PCI_BACKPLANE_IRQS		0x98	/* Backplane Interrupts */
+#define BCMA_PCI_BAR0_WIN2		0xAC
+#define BCMA_PCI_GPIO_IN		0xB0	/* GPIO Input (pcirev >= 3 only) */
+#define BCMA_PCI_GPIO_OUT		0xB4	/* GPIO Output (pcirev >= 3 only) */
+#define BCMA_PCI_GPIO_OUT_ENABLE	0xB8	/* GPIO Output Enable/Disable (pcirev >= 3 only) */
+#define  BCMA_PCI_GPIO_SCS		0x10	/* PCI config space bit 4 for 4306c0 slow clock source */
+#define  BCMA_PCI_GPIO_HWRAD		0x20	/* PCI config space GPIO 13 for hw radio disable */
+#define  BCMA_PCI_GPIO_XTAL		0x40	/* PCI config space GPIO 14 for Xtal powerup */
+#define  BCMA_PCI_GPIO_PLL		0x80	/* PCI config space GPIO 15 for PLL powerdown */
+
+#endif /* LINUX_BCMA_REGS_H_ */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 48c007dae476..ae28e93fd072 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -382,6 +382,23 @@ struct ssb_device_id {
 #define SSB_ANY_ID		0xFFFF
 #define SSB_ANY_REV		0xFF
 
+/* Broadcom's specific AMBA core, see drivers/bcma/ */
+struct bcma_device_id {
+	__u16	manuf;
+	__u16	id;
+	__u8	rev;
+	__u8	class;
+};
+#define BCMA_CORE(_manuf, _id, _rev, _class)  \
+	{ .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, }
+#define BCMA_CORETABLE_END  \
+	{ 0, },
+
+#define BCMA_ANY_MANUF		0xFFFF
+#define BCMA_ANY_ID		0xFFFF
+#define BCMA_ANY_REV		0xFF
+#define BCMA_ANY_CLASS		0xFF
+
 struct virtio_device_id {
 	__u32 device;
 	__u32 vendor;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 88f3f07205f8..e26e2fb462d4 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -702,6 +702,24 @@ static int do_ssb_entry(const char *filename,
 	return 1;
 }
 
+/* Looks like: bcma:mNidNrevNclN. */
+static int do_bcma_entry(const char *filename,
+			 struct bcma_device_id *id, char *alias)
+{
+	id->manuf = TO_NATIVE(id->manuf);
+	id->id = TO_NATIVE(id->id);
+	id->rev = TO_NATIVE(id->rev);
+	id->class = TO_NATIVE(id->class);
+
+	strcpy(alias, "bcma:");
+	ADD(alias, "m", id->manuf != BCMA_ANY_MANUF, id->manuf);
+	ADD(alias, "id", id->id != BCMA_ANY_ID, id->id);
+	ADD(alias, "rev", id->rev != BCMA_ANY_REV, id->rev);
+	ADD(alias, "cl", id->class != BCMA_ANY_CLASS, id->class);
+	add_wildcard(alias);
+	return 1;
+}
+
 /* Looks like: virtio:dNvN */
 static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
 			   char *alias)
@@ -968,6 +986,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct ssb_device_id), "ssb",
 			 do_ssb_entry, mod);
+	else if (sym_is(symname, "__mod_bcma_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct bcma_device_id), "bcma",
+			 do_bcma_entry, mod);
 	else if (sym_is(symname, "__mod_virtio_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct virtio_device_id), "virtio",
-- 
cgit v1.2.3


From c29c3f70c9eb6f18090da5af9dbe9dcb4adece8c Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 20 Apr 2010 17:58:24 -0400
Subject: tipc: Abort excessive send requests as early as possible

Adds checks to TIPC's socket send routines to promptly detect and
abort attempts to send more than 66,000 bytes in a single TIPC
message or more than 2**31-1 bytes in a single TIPC byte stream request.
In addition, this ensures that the number of iovecs in a send request
does not exceed the limits of a standard integer variable.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/tipc.h |  2 +-
 net/tipc/socket.c    | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index a5b994a204d2..f2d90091cc20 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -101,7 +101,7 @@ static inline unsigned int tipc_node(__u32 addr)
  * Limiting values for messages
  */
 
-#define TIPC_MAX_USER_MSG_SIZE	66000
+#define TIPC_MAX_USER_MSG_SIZE	66000U
 
 /*
  * Message importance levels
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 29d94d53198d..e1c791798ba1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -535,6 +535,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 		     (dest->family != AF_TIPC)))
 		return -EINVAL;
+	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX))
+		return -EMSGSIZE;
 
 	if (iocb)
 		lock_sock(sk);
@@ -640,6 +643,10 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 	if (unlikely(dest))
 		return send_msg(iocb, sock, m, total_len);
 
+	if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX))
+		return -EMSGSIZE;
+
 	if (iocb)
 		lock_sock(sk);
 
@@ -723,6 +730,12 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 		goto exit;
 	}
 
+	if ((total_len > (unsigned)INT_MAX) ||
+	    (m->msg_iovlen > (unsigned)INT_MAX)) {
+		res = -EMSGSIZE;
+		goto exit;
+	}
+
 	/*
 	 * Send each iovec entry using one or more messages
 	 *
-- 
cgit v1.2.3


From f30e6d3e419bfb5540fa82ba7eca01d578556e6b Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 22 Apr 2011 15:13:54 +0200
Subject: firewire: octlet AT payloads can be stack-allocated

We do not need slab allocations anymore in order to satisfy
streaming DMA mapping constraints, thanks to commit da28947e7e36
"firewire: ohci: avoid separate DMA mapping for small AT payloads".

(Besides, the slab-allocated buffers that firewire-core, firewire-sbp2,
and firedtv used to provide for 8-byte write and lock requests were
still not fully portable since they crossed cacheline boundaries or
shared a cacheline with unrelated CPU-accessed data.  snd-firewire-lib
got this aspect right by using an extra kmalloc/ kfree just for the
8-byte transaction buffer.)

This change replaces kmalloc'ed lock transaction scratch buffers in
firewire-core, firedtv, and snd-firewire-lib by local stack allocations.
Perhaps the most notable result of the change is simpler locking because
there is no need to serialize usages of preallocated per-device buffers
anymore.  Also, allocations and deallocations are simpler.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
---
 drivers/firewire/core-card.c             | 16 ++++++++--------
 drivers/firewire/core-cdev.c             |  4 +---
 drivers/firewire/core-iso.c              | 21 +++++++++++----------
 drivers/firewire/core-transaction.c      |  7 ++++---
 drivers/media/dvb/firewire/firedtv-avc.c | 15 +--------------
 include/linux/firewire.h                 |  3 +--
 sound/firewire/cmp.c                     |  3 +--
 sound/firewire/iso-resources.c           | 12 +++---------
 sound/firewire/iso-resources.h           |  1 -
 9 files changed, 30 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 3c44fbc81acb..e119f1e6ba47 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -258,8 +258,7 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation)
 
 	if (!card->broadcast_channel_allocated) {
 		fw_iso_resource_manage(card, generation, 1ULL << 31,
-				       &channel, &bandwidth, true,
-				       card->bm_transaction_data);
+				       &channel, &bandwidth, true);
 		if (channel != 31) {
 			fw_notify("failed to allocate broadcast channel\n");
 			return;
@@ -294,6 +293,7 @@ static void bm_work(struct work_struct *work)
 	bool root_device_is_cmc;
 	bool irm_is_1394_1995_only;
 	bool keep_this_irm;
+	__be32 transaction_data[2];
 
 	spin_lock_irq(&card->lock);
 
@@ -355,21 +355,21 @@ static void bm_work(struct work_struct *work)
 			goto pick_me;
 		}
 
-		card->bm_transaction_data[0] = cpu_to_be32(0x3f);
-		card->bm_transaction_data[1] = cpu_to_be32(local_id);
+		transaction_data[0] = cpu_to_be32(0x3f);
+		transaction_data[1] = cpu_to_be32(local_id);
 
 		spin_unlock_irq(&card->lock);
 
 		rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 				irm_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
-				card->bm_transaction_data, 8);
+				transaction_data, 8);
 
 		if (rcode == RCODE_GENERATION)
 			/* Another bus reset, BM work has been rescheduled. */
 			goto out;
 
-		bm_id = be32_to_cpu(card->bm_transaction_data[0]);
+		bm_id = be32_to_cpu(transaction_data[0]);
 
 		spin_lock_irq(&card->lock);
 		if (rcode == RCODE_COMPLETE && generation == card->generation)
@@ -490,11 +490,11 @@ static void bm_work(struct work_struct *work)
 		/*
 		 * Make sure that the cycle master sends cycle start packets.
 		 */
-		card->bm_transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
+		transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
 		rcode = fw_run_transaction(card, TCODE_WRITE_QUADLET_REQUEST,
 				root_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_STATE_SET,
-				card->bm_transaction_data, 4);
+				transaction_data, 4);
 		if (rcode == RCODE_GENERATION)
 			goto out;
 	}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 62ac111af243..2a3f1c4d6906 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -141,7 +141,6 @@ struct iso_resource {
 	int generation;
 	u64 channels;
 	s32 bandwidth;
-	__be32 transaction_data[2];
 	struct iso_resource_event *e_alloc, *e_dealloc;
 };
 
@@ -1229,8 +1228,7 @@ static void iso_resource_work(struct work_struct *work)
 			r->channels, &channel, &bandwidth,
 			todo == ISO_RES_ALLOC ||
 			todo == ISO_RES_REALLOC ||
-			todo == ISO_RES_ALLOC_ONCE,
-			r->transaction_data);
+			todo == ISO_RES_ALLOC_ONCE);
 	/*
 	 * Is this generation outdated already?  As long as this resource sticks
 	 * in the idr, it will be scheduled again for a newer generation or at
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 481056df9268..f872ede5af37 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -196,9 +196,10 @@ EXPORT_SYMBOL(fw_iso_context_stop);
  */
 
 static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
-			    int bandwidth, bool allocate, __be32 data[2])
+			    int bandwidth, bool allocate)
 {
 	int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
+	__be32 data[2];
 
 	/*
 	 * On a 1394a IRM with low contention, try < 1 is enough.
@@ -233,9 +234,10 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
 }
 
 static int manage_channel(struct fw_card *card, int irm_id, int generation,
-		u32 channels_mask, u64 offset, bool allocate, __be32 data[2])
+		u32 channels_mask, u64 offset, bool allocate)
 {
 	__be32 bit, all, old;
+	__be32 data[2];
 	int channel, ret = -EIO, retry = 5;
 
 	old = all = allocate ? cpu_to_be32(~0) : 0;
@@ -284,7 +286,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 }
 
 static void deallocate_channel(struct fw_card *card, int irm_id,
-			       int generation, int channel, __be32 buffer[2])
+			       int generation, int channel)
 {
 	u32 mask;
 	u64 offset;
@@ -293,7 +295,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
 	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
 
-	manage_channel(card, irm_id, generation, mask, offset, false, buffer);
+	manage_channel(card, irm_id, generation, mask, offset, false);
 }
 
 /**
@@ -322,7 +324,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
  */
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate, __be32 buffer[2])
+			    bool allocate)
 {
 	u32 channels_hi = channels_mask;	/* channels 31...0 */
 	u32 channels_lo = channels_mask >> 32;	/* channels 63...32 */
@@ -335,11 +337,11 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (channels_hi)
 		c = manage_channel(card, irm_id, generation, channels_hi,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI,
-				allocate, buffer);
+				allocate);
 	if (channels_lo && c < 0) {
 		c = manage_channel(card, irm_id, generation, channels_lo,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO,
-				allocate, buffer);
+				allocate);
 		if (c >= 0)
 			c += 32;
 	}
@@ -351,14 +353,13 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (*bandwidth == 0)
 		return;
 
-	ret = manage_bandwidth(card, irm_id, generation, *bandwidth,
-			       allocate, buffer);
+	ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
 	if (ret < 0)
 		*bandwidth = 0;
 
 	if (allocate && ret < 0) {
 		if (c >= 0)
-			deallocate_channel(card, irm_id, generation, c, buffer);
+			deallocate_channel(card, irm_id, generation, c);
 		*channel = ret;
 	}
 }
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index d00f8ce902cc..77275fdf6c1f 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -326,8 +326,8 @@ static int allocate_tlabel(struct fw_card *card)
  * It will contain tag, channel, and sy data instead of a node ID then.
  *
  * The payload buffer at @data is going to be DMA-mapped except in case of
- * quadlet-sized payload or of local (loopback) requests.  Hence make sure that
- * the buffer complies with the restrictions for DMA-mapped memory.  The
+ * @length <= 8 or of local (loopback) requests.  Hence make sure that the
+ * buffer complies with the restrictions of the streaming DMA mapping API.
  * @payload must not be freed before the @callback is called.
  *
  * In case of request types without payload, @data is NULL and @length is 0.
@@ -411,7 +411,8 @@ static void transaction_callback(struct fw_card *card, int rcode,
  *
  * Returns the RCODE.  See fw_send_request() for parameter documentation.
  * Unlike fw_send_request(), @data points to the payload of the request or/and
- * to the payload of the response.
+ * to the payload of the response.  DMA mapping restrictions apply to outbound
+ * request payloads of >= 8 bytes but not to inbound response payloads.
  */
 int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
 		       int generation, int speed, unsigned long long offset,
diff --git a/drivers/media/dvb/firewire/firedtv-avc.c b/drivers/media/dvb/firewire/firedtv-avc.c
index fc5ccd8c923a..21c52e3b522e 100644
--- a/drivers/media/dvb/firewire/firedtv-avc.c
+++ b/drivers/media/dvb/firewire/firedtv-avc.c
@@ -1320,14 +1320,10 @@ static int cmp_read(struct firedtv *fdtv, u64 addr, __be32 *data)
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
 	ret = fdtv_read(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: read I/O error\n");
 
-	mutex_unlock(&fdtv->avc_mutex);
-
 	return ret;
 }
 
@@ -1335,18 +1331,9 @@ static int cmp_lock(struct firedtv *fdtv, u64 addr, __be32 data[])
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
-	/* data[] is stack-allocated and should not be DMA-mapped. */
-	memcpy(fdtv->avc_data, data, 8);
-
-	ret = fdtv_lock(fdtv, addr, fdtv->avc_data);
+	ret = fdtv_lock(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: lock I/O error\n");
-	else
-		memcpy(data, fdtv->avc_data, 8);
-
-	mutex_unlock(&fdtv->avc_mutex);
 
 	return ret;
 }
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index c64f3680d4f1..de90e1ff8488 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -125,7 +125,6 @@ struct fw_card {
 	struct delayed_work bm_work; /* bus manager job */
 	int bm_retries;
 	int bm_generation;
-	__be32 bm_transaction_data[2];
 	int bm_node_id;
 	bool bm_abdicate;
 
@@ -447,6 +446,6 @@ int fw_iso_context_stop(struct fw_iso_context *ctx);
 void fw_iso_context_destroy(struct fw_iso_context *ctx);
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate, __be32 buffer[2]);
+			    bool allocate);
 
 #endif /* _LINUX_FIREWIRE_H */
diff --git a/sound/firewire/cmp.c b/sound/firewire/cmp.c
index 4a37f3a6fab9..14cacbc655dd 100644
--- a/sound/firewire/cmp.c
+++ b/sound/firewire/cmp.c
@@ -49,10 +49,9 @@ static int pcr_modify(struct cmp_connection *c,
 		      enum bus_reset_handling bus_reset_handling)
 {
 	struct fw_device *device = fw_parent_device(c->resources.unit);
-	__be32 *buffer = c->resources.buffer;
 	int generation = c->resources.generation;
 	int rcode, errors = 0;
-	__be32 old_arg;
+	__be32 old_arg, buffer[2];
 	int err;
 
 	buffer[0] = c->last_pcr_value;
diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c
index 775dbd5f3445..bb9c0c1fb529 100644
--- a/sound/firewire/iso-resources.c
+++ b/sound/firewire/iso-resources.c
@@ -11,7 +11,6 @@
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include "iso-resources.h"
 
@@ -25,10 +24,6 @@
  */
 int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit)
 {
-	r->buffer = kmalloc(2 * 4, GFP_KERNEL);
-	if (!r->buffer)
-		return -ENOMEM;
-
 	r->channels_mask = ~0uLL;
 	r->unit = fw_unit_get(unit);
 	mutex_init(&r->mutex);
@@ -44,7 +39,6 @@ int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit)
 void fw_iso_resources_destroy(struct fw_iso_resources *r)
 {
 	WARN_ON(r->allocated);
-	kfree(r->buffer);
 	mutex_destroy(&r->mutex);
 	fw_unit_put(r->unit);
 }
@@ -131,7 +125,7 @@ retry_after_bus_reset:
 
 	bandwidth = r->bandwidth + r->bandwidth_overhead;
 	fw_iso_resource_manage(card, r->generation, r->channels_mask,
-			       &channel, &bandwidth, true, r->buffer);
+			       &channel, &bandwidth, true);
 	if (channel == -EAGAIN) {
 		mutex_unlock(&r->mutex);
 		goto retry_after_bus_reset;
@@ -184,7 +178,7 @@ int fw_iso_resources_update(struct fw_iso_resources *r)
 	bandwidth = r->bandwidth + r->bandwidth_overhead;
 
 	fw_iso_resource_manage(card, r->generation, 1uLL << r->channel,
-			       &channel, &bandwidth, true, r->buffer);
+			       &channel, &bandwidth, true);
 	/*
 	 * When another bus reset happens, pretend that the allocation
 	 * succeeded; we will try again for the new generation later.
@@ -220,7 +214,7 @@ void fw_iso_resources_free(struct fw_iso_resources *r)
 	if (r->allocated) {
 		bandwidth = r->bandwidth + r->bandwidth_overhead;
 		fw_iso_resource_manage(card, r->generation, 1uLL << r->channel,
-				       &channel, &bandwidth, false, r->buffer);
+				       &channel, &bandwidth, false);
 		if (channel < 0)
 			dev_err(&r->unit->device,
 				"isochronous resource deallocation failed\n");
diff --git a/sound/firewire/iso-resources.h b/sound/firewire/iso-resources.h
index 3f0730e4d841..5a9af7c61657 100644
--- a/sound/firewire/iso-resources.h
+++ b/sound/firewire/iso-resources.h
@@ -24,7 +24,6 @@ struct fw_iso_resources {
 	unsigned int bandwidth_overhead;
 	int generation; /* in which allocation is valid */
 	bool allocated;
-	__be32 *buffer;
 };
 
 int fw_iso_resources_init(struct fw_iso_resources *r,
-- 
cgit v1.2.3


From 13882a82ee1646336c3996c93b4a560a55d2a419 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 2 May 2011 09:33:56 +0200
Subject: firewire: optimize iso queueing by setting wake only after the last
 packet

When queueing iso packets, the run time is dominated by the two
MMIO accesses that set the DMA context's wake bit.  Because most
drivers submit packets in batches, we can save much time by
removing all but the last wakeup.

The internal kernel API is changed to require a call to
fw_iso_context_queue_flush() after a batch of queued packets.
The user space API does not change, so one call to
FW_CDEV_IOC_QUEUE_ISO must specify multiple packets to take
advantage of this optimization.

In my measurements, this patch reduces the time needed to queue
fifty skip packets from userspace to one sixth on a 2.5 GHz CPU,
or to one third at 800 MHz.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c            |  5 +++++
 drivers/firewire/core-cdev.c            |  1 +
 drivers/firewire/core-iso.c             |  6 ++++++
 drivers/firewire/core.h                 |  2 ++
 drivers/firewire/net.c                  |  4 +++-
 drivers/firewire/ohci.c                 | 19 +++++++++++++++----
 drivers/media/dvb/firewire/firedtv-fw.c |  1 +
 include/linux/firewire.h                |  1 +
 sound/firewire/amdtp.c                  |  1 +
 9 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index e119f1e6ba47..f05fc7bfceeb 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -630,6 +630,10 @@ static int dummy_queue_iso(struct fw_iso_context *ctx, struct fw_iso_packet *p,
 	return -ENODEV;
 }
 
+static void dummy_flush_queue_iso(struct fw_iso_context *ctx)
+{
+}
+
 static const struct fw_card_driver dummy_driver_template = {
 	.read_phy_reg		= dummy_read_phy_reg,
 	.update_phy_reg		= dummy_update_phy_reg,
@@ -641,6 +645,7 @@ static const struct fw_card_driver dummy_driver_template = {
 	.start_iso		= dummy_start_iso,
 	.set_iso_channels	= dummy_set_iso_channels,
 	.queue_iso		= dummy_queue_iso,
+	.flush_queue_iso	= dummy_flush_queue_iso,
 };
 
 void fw_card_release(struct kref *kref)
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3f1c4d6906..64768c2194f1 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1107,6 +1107,7 @@ static int ioctl_queue_iso(struct client *client, union ioctl_arg *arg)
 		payload += u.packet.payload_length;
 		count++;
 	}
+	fw_iso_context_queue_flush(ctx);
 
 	a->size    -= uptr_to_u64(p) - a->packets;
 	a->packets  = uptr_to_u64(p);
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index f872ede5af37..57c3973093ad 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -185,6 +185,12 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 }
 EXPORT_SYMBOL(fw_iso_context_queue);
 
+void fw_iso_context_queue_flush(struct fw_iso_context *ctx)
+{
+	ctx->card->driver->flush_queue_iso(ctx);
+}
+EXPORT_SYMBOL(fw_iso_context_queue_flush);
+
 int fw_iso_context_stop(struct fw_iso_context *ctx)
 {
 	return ctx->card->driver->stop_iso(ctx);
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 25e729cde2f7..0fe4e4e6eda7 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -97,6 +97,8 @@ struct fw_card_driver {
 			 struct fw_iso_buffer *buffer,
 			 unsigned long payload);
 
+	void (*flush_queue_iso)(struct fw_iso_context *ctx);
+
 	int (*stop_iso)(struct fw_iso_context *ctx);
 };
 
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 3f04dd3681cf..b9762d07198d 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -881,7 +881,9 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	if (retval < 0)
+	if (retval >= 0)
+		fw_iso_context_queue_flush(dev->broadcast_rcv_context);
+	else
 		fw_error("requeue failed\n");
 }
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index f9f55703375e..438e6c831170 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1192,9 +1192,6 @@ static void context_append(struct context *ctx,
 	wmb(); /* finish init of new descriptors before branch_address update */
 	ctx->prev->branch_address = cpu_to_le32(d_bus | z);
 	ctx->prev = find_branch_descriptor(d, z);
-
-	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
-	flush_writes(ctx->ohci);
 }
 
 static void context_stop(struct context *ctx)
@@ -1348,8 +1345,12 @@ static int at_context_queue_packet(struct context *ctx,
 
 	context_append(ctx, d, z, 4 - z);
 
-	if (!ctx->running)
+	if (ctx->running) {
+		reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+		flush_writes(ohci);
+	} else {
 		context_run(ctx, 0);
+	}
 
 	return 0;
 }
@@ -3121,6 +3122,15 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 	return ret;
 }
 
+static void ohci_flush_queue_iso(struct fw_iso_context *base)
+{
+	struct context *ctx =
+			&container_of(base, struct iso_context, base)->context;
+
+	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+	flush_writes(ctx->ohci);
+}
+
 static const struct fw_card_driver ohci_driver = {
 	.enable			= ohci_enable,
 	.read_phy_reg		= ohci_read_phy_reg,
@@ -3137,6 +3147,7 @@ static const struct fw_card_driver ohci_driver = {
 	.free_iso_context	= ohci_free_iso_context,
 	.set_iso_channels	= ohci_set_iso_channels,
 	.queue_iso		= ohci_queue_iso,
+	.flush_queue_iso	= ohci_flush_queue_iso,
 	.start_iso		= ohci_start_iso,
 	.stop_iso		= ohci_stop_iso,
 };
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index 8022b743af91..864b6274c729 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -125,6 +125,7 @@ static void handle_iso(struct fw_iso_context *context, u32 cycle,
 
 		i = (i + 1) & (N_PACKETS - 1);
 	}
+	fw_iso_context_queue_flush(ctx->context);
 	ctx->current_packet = i;
 }
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index de90e1ff8488..c0fb405bb435 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -440,6 +440,7 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
 			 struct fw_iso_buffer *buffer,
 			 unsigned long payload);
+void fw_iso_context_queue_flush(struct fw_iso_context *ctx);
 int fw_iso_context_start(struct fw_iso_context *ctx,
 			 int cycle, int sync, int tags);
 int fw_iso_context_stop(struct fw_iso_context *ctx);
diff --git a/sound/firewire/amdtp.c b/sound/firewire/amdtp.c
index b18140ff2b93..87657dd7714c 100644
--- a/sound/firewire/amdtp.c
+++ b/sound/firewire/amdtp.c
@@ -396,6 +396,7 @@ static void out_packet_callback(struct fw_iso_context *context, u32 cycle,
 
 	for (i = 0; i < packets; ++i)
 		queue_out_packet(s, ++cycle);
+	fw_iso_context_queue_flush(s->context);
 }
 
 static int queue_initial_skip_packets(struct amdtp_out_stream *s)
-- 
cgit v1.2.3


From 105e53f863c04e1d9e5bb34bf753c9fdbce6a60c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 1 May 2011 20:50:31 +0200
Subject: firewire: sbp2: parallelize login, reconnect, logout

The struct sbp2_logical_unit.work items can all be executed in parallel
but are not reentrant.  Furthermore, reconnect or re-login work must be
executed in a WQ_MEM_RECLAIM workqueue.

Hence replace the old single-threaded firewire-sbp2 workqueue by a
concurrency-managed but non-reentrant workqueue with rescuer.
firewire-core already maintains one, hence use this one.

In earlier versions of this change, I observed occasional failures of
parallel INQUIRY to an Initio INIC-2430 FireWire 800 to dual IDE bridge.
More testing indicates that parallel INQUIRY is not actually a problem,
but too quick successions of logout and login + INQUIRY, e.g. a quick
sequence of cable plugout and plugin, can result in failed INQUIRY.
This does not seem to be something that should or could be addressed by
serialization.

Another dual-LU device to which I currently have access to, an
OXUF924DSB FireWire 800 to dual SATA bridge with firmware from MacPower,
has been successfully tested with this too.

This change is beneficial to environments with two or more FireWire
storage devices, especially if they are located on the same bus.
Management tasks that should be performed as soon and as quickly as
possible, especially reconnect, are no longer held up by tasks on other
devices that may take a long time, especially login with INQUIRY and sd
or sr driver probe.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c        |  4 ++--
 drivers/firewire/core-cdev.c        |  2 +-
 drivers/firewire/core-device.c      |  5 +++--
 drivers/firewire/core-transaction.c | 12 ++++++------
 drivers/firewire/core.h             |  2 --
 drivers/firewire/sbp2.c             |  9 +--------
 include/linux/firewire.h            |  2 ++
 7 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index bb8c4d22b03e..29d2423fae6d 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -228,7 +228,7 @@ void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset)
 
 	/* Use an arbitrary short delay to combine multiple reset requests. */
 	fw_card_get(card);
-	if (!queue_delayed_work(fw_wq, &card->br_work,
+	if (!queue_delayed_work(fw_workqueue, &card->br_work,
 				delayed ? DIV_ROUND_UP(HZ, 100) : 0))
 		fw_card_put(card);
 }
@@ -241,7 +241,7 @@ static void br_work(struct work_struct *work)
 	/* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */
 	if (card->reset_jiffies != 0 &&
 	    time_before64(get_jiffies_64(), card->reset_jiffies + 2 * HZ)) {
-		if (!queue_delayed_work(fw_wq, &card->br_work, 2 * HZ))
+		if (!queue_delayed_work(fw_workqueue, &card->br_work, 2 * HZ))
 			fw_card_put(card);
 		return;
 	}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index aa1131d26e30..b1c11775839c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -149,7 +149,7 @@ static void release_iso_resource(struct client *, struct client_resource *);
 static void schedule_iso_resource(struct iso_resource *r, unsigned long delay)
 {
 	client_get(r->client);
-	if (!queue_delayed_work(fw_wq, &r->work, delay))
+	if (!queue_delayed_work(fw_workqueue, &r->work, delay))
 		client_put(r->client);
 }
 
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index ef900d923f15..95a471401892 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -725,12 +725,13 @@ struct fw_device *fw_device_get_by_devt(dev_t devt)
 	return device;
 }
 
-struct workqueue_struct *fw_wq;
+struct workqueue_struct *fw_workqueue;
+EXPORT_SYMBOL(fw_workqueue);
 
 static void fw_schedule_device_work(struct fw_device *device,
 				    unsigned long delay)
 {
-	queue_delayed_work(fw_wq, &device->work, delay);
+	queue_delayed_work(fw_workqueue, &device->work, delay);
 }
 
 /*
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index d4c28a217b2c..334b82a3542c 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1214,21 +1214,21 @@ static int __init fw_core_init(void)
 {
 	int ret;
 
-	fw_wq = alloc_workqueue(KBUILD_MODNAME,
-				WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-	if (!fw_wq)
+	fw_workqueue = alloc_workqueue("firewire",
+				       WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	if (!fw_workqueue)
 		return -ENOMEM;
 
 	ret = bus_register(&fw_bus_type);
 	if (ret < 0) {
-		destroy_workqueue(fw_wq);
+		destroy_workqueue(fw_workqueue);
 		return ret;
 	}
 
 	fw_cdev_major = register_chrdev(0, "firewire", &fw_device_ops);
 	if (fw_cdev_major < 0) {
 		bus_unregister(&fw_bus_type);
-		destroy_workqueue(fw_wq);
+		destroy_workqueue(fw_workqueue);
 		return fw_cdev_major;
 	}
 
@@ -1244,7 +1244,7 @@ static void __exit fw_core_cleanup(void)
 {
 	unregister_chrdev(fw_cdev_major, "firewire");
 	bus_unregister(&fw_bus_type);
-	destroy_workqueue(fw_wq);
+	destroy_workqueue(fw_workqueue);
 	idr_destroy(&fw_device_idr);
 }
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 00ea7730c6a7..0fe4e4e6eda7 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -140,8 +140,6 @@ void fw_cdev_handle_phy_packet(struct fw_card *card, struct fw_packet *p);
 extern struct rw_semaphore fw_device_rwsem;
 extern struct idr fw_device_idr;
 extern int fw_cdev_major;
-struct workqueue_struct;
-extern struct workqueue_struct *fw_wq;
 
 struct fw_device *fw_device_get_by_devt(dev_t devt);
 int fw_device_set_broadcast_channel(struct device *dev, void *gen);
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 2aafc614ae14..41841a3e3f99 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -826,8 +826,6 @@ static void sbp2_target_put(struct sbp2_target *tgt)
 	kref_put(&tgt->kref, sbp2_release_target);
 }
 
-static struct workqueue_struct *sbp2_wq;
-
 /*
  * Always get the target's kref when scheduling work on one its units.
  * Each workqueue job is responsible to call sbp2_target_put() upon return.
@@ -835,7 +833,7 @@ static struct workqueue_struct *sbp2_wq;
 static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay)
 {
 	sbp2_target_get(lu->tgt);
-	if (!queue_delayed_work(sbp2_wq, &lu->work, delay))
+	if (!queue_delayed_work(fw_workqueue, &lu->work, delay))
 		sbp2_target_put(lu->tgt);
 }
 
@@ -1645,17 +1643,12 @@ MODULE_ALIAS("sbp2");
 
 static int __init sbp2_init(void)
 {
-	sbp2_wq = create_singlethread_workqueue(KBUILD_MODNAME);
-	if (!sbp2_wq)
-		return -ENOMEM;
-
 	return driver_register(&sbp2_driver.driver);
 }
 
 static void __exit sbp2_cleanup(void)
 {
 	driver_unregister(&sbp2_driver.driver);
-	destroy_workqueue(sbp2_wq);
 }
 
 module_init(sbp2_init);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index c0fb405bb435..5e6f42789afe 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -449,4 +449,6 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
 			    bool allocate);
 
+extern struct workqueue_struct *fw_workqueue;
+
 #endif /* _LINUX_FIREWIRE_H */
-- 
cgit v1.2.3


From 1b9ba000177ee47bcc5b44c7c34e48e735f5f9b1 Mon Sep 17 00:00:00 2001
From: Roger Quadros <roger.quadros@nokia.com>
Date: Mon, 9 May 2011 13:08:06 +0300
Subject: usb: gadget: composite: Allow function drivers to pause control
 transfers

Some USB function drivers (e.g. f_mass_storage.c) need to delay or defer the
data/status stages of standard control requests like SET_CONFIGURATION or
SET_INTERFACE till they are done with their bookkeeping and are actually ready
for accepting new commands to their interface.

They can now achieve this functionality by returning USB_GADGET_DELAYED_STATUS
in their setup handlers (e.g. set_alt()). The composite framework will then
defer completion of the control transfer by not completing the data/status stages.

This ensures that the host does not send new packets to the interface till the
function driver is ready to take them.

When the function driver that requested for USB_GADGET_DELAYED_STATUS is done
with its bookkeeping, it should signal the composite framework to continue with
the data/status stages of the control transfer. It can do so by invoking
the new API usb_composite_setup_continue(). This is where the control transfer's
data/status stages are completed and host can initiate new transfers.

The DELAYED_STATUS mechanism is currently only supported if the expected data phase
is 0 bytes (i.e. w_length == 0). Since SET_CONFIGURATION and SET_INTERFACE are the
only cases that will use this mechanism, this is not a limitation.

Signed-off-by: Roger Quadros <roger.quadros@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 62 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/composite.h  | 16 ++++++++++-
 2 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 82314ed22506..5cbb1a41c223 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -461,12 +461,23 @@ static int set_config(struct usb_composite_dev *cdev,
 			reset_config(cdev);
 			goto done;
 		}
+
+		if (result == USB_GADGET_DELAYED_STATUS) {
+			DBG(cdev,
+			 "%s: interface %d (%s) requested delayed status\n",
+					__func__, tmp, f->name);
+			cdev->delayed_status++;
+			DBG(cdev, "delayed_status count %d\n",
+					cdev->delayed_status);
+		}
 	}
 
 	/* when we return, be sure our power usage is valid */
 	power = c->bMaxPower ? (2 * c->bMaxPower) : CONFIG_USB_GADGET_VBUS_DRAW;
 done:
 	usb_gadget_vbus_draw(gadget, power);
+	if (result >= 0 && cdev->delayed_status)
+		result = USB_GADGET_DELAYED_STATUS;
 	return result;
 }
 
@@ -895,6 +906,14 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 		if (w_value && !f->set_alt)
 			break;
 		value = f->set_alt(f, w_index, w_value);
+		if (value == USB_GADGET_DELAYED_STATUS) {
+			DBG(cdev,
+			 "%s: interface %d (%s) requested delayed status\n",
+					__func__, intf, f->name);
+			cdev->delayed_status++;
+			DBG(cdev, "delayed_status count %d\n",
+					cdev->delayed_status);
+		}
 		break;
 	case USB_REQ_GET_INTERFACE:
 		if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE))
@@ -958,7 +977,7 @@ unknown:
 	}
 
 	/* respond with data transfer before status phase? */
-	if (value >= 0) {
+	if (value >= 0 && value != USB_GADGET_DELAYED_STATUS) {
 		req->length = value;
 		req->zero = value < w_length;
 		value = usb_ep_queue(gadget->ep0, req, GFP_ATOMIC);
@@ -967,6 +986,10 @@ unknown:
 			req->status = 0;
 			composite_setup_complete(gadget->ep0, req);
 		}
+	} else if (value == USB_GADGET_DELAYED_STATUS && w_length != 0) {
+		WARN(cdev,
+			"%s: Delayed status not supported for w_length != 0",
+			__func__);
 	}
 
 done:
@@ -1289,3 +1312,40 @@ void usb_composite_unregister(struct usb_composite_driver *driver)
 		return;
 	usb_gadget_unregister_driver(&composite_driver);
 }
+
+/**
+ * usb_composite_setup_continue() - Continue with the control transfer
+ * @cdev: the composite device who's control transfer was kept waiting
+ *
+ * This function must be called by the USB function driver to continue
+ * with the control transfer's data/status stage in case it had requested to
+ * delay the data/status stages. A USB function's setup handler (e.g. set_alt())
+ * can request the composite framework to delay the setup request's data/status
+ * stages by returning USB_GADGET_DELAYED_STATUS.
+ */
+void usb_composite_setup_continue(struct usb_composite_dev *cdev)
+{
+	int			value;
+	struct usb_request	*req = cdev->req;
+	unsigned long		flags;
+
+	DBG(cdev, "%s\n", __func__);
+	spin_lock_irqsave(&cdev->lock, flags);
+
+	if (cdev->delayed_status == 0) {
+		WARN(cdev, "%s: Unexpected call\n", __func__);
+
+	} else if (--cdev->delayed_status == 0) {
+		DBG(cdev, "%s: Completing delayed status\n", __func__);
+		req->length = 0;
+		value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0) {
+			DBG(cdev, "ep_queue --> %d\n", value);
+			req->status = 0;
+			composite_setup_complete(cdev->gadget->ep0, req);
+		}
+	}
+
+	spin_unlock_irqrestore(&cdev->lock, flags);
+}
+
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 882a084a8411..b78cba466d3d 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -37,6 +37,14 @@
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 
+/*
+ * USB function drivers should return USB_GADGET_DELAYED_STATUS if they
+ * wish to delay the data/status stages of the control transfer till they
+ * are ready. The control transfer will then be kept from completing till
+ * all the function drivers that requested for USB_GADGET_DELAYED_STAUS
+ * invoke usb_composite_setup_continue().
+ */
+#define USB_GADGET_DELAYED_STATUS       0x7fff	/* Impossibly large value */
 
 struct usb_configuration;
 
@@ -285,6 +293,7 @@ struct usb_composite_driver {
 extern int usb_composite_probe(struct usb_composite_driver *driver,
 			       int (*bind)(struct usb_composite_dev *cdev));
 extern void usb_composite_unregister(struct usb_composite_driver *driver);
+extern void usb_composite_setup_continue(struct usb_composite_dev *cdev);
 
 
 /**
@@ -342,7 +351,12 @@ struct usb_composite_dev {
 	 */
 	unsigned			deactivations;
 
-	/* protects at least deactivation count */
+	/* the composite driver won't complete the control transfer's
+	 * data/status stages till delayed_status is zero.
+	 */
+	int				delayed_status;
+
+	/* protects deactivations and delayed_status counts*/
 	spinlock_t			lock;
 };
 
-- 
cgit v1.2.3


From 6b4e306aa3dc94a0545eb9279475b1ab6209a31f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 16:41:34 -0800
Subject: ns: proc files for namespace naming policy.

Create files under /proc/<pid>/ns/ to allow controlling the
namespaces of a process.

This addresses three specific problems that can make namespaces hard to
work with.
- Namespaces require a dedicated process to pin them in memory.
- It is not possible to use a namespace unless you are the child
  of the original creator.
- Namespaces don't have names that userspace can use to talk about
  them.

The namespace files under /proc/<pid>/ns/ can be opened and the
file descriptor can be used to talk about a specific namespace, and
to keep the specified namespace alive.

A namespace can be kept alive by either holding the file descriptor
open or bind mounting the file someplace else.  aka:
mount --bind /proc/self/ns/net /some/filesystem/path
mount --bind /proc/self/fd/<N> /some/filesystem/path

This allows namespaces to be named with userspace policy.

It requires additional support to make use of these filedescriptors
and that will be comming in the following patches.

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/Makefile        |   1 +
 fs/proc/base.c          |  20 +++---
 fs/proc/inode.c         |   7 ++
 fs/proc/internal.h      |  18 +++++
 fs/proc/namespaces.c    | 188 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |  18 +++++
 6 files changed, 241 insertions(+), 11 deletions(-)
 create mode 100644 fs/proc/namespaces.c

(limited to 'include/linux')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y	+= stat.o
 proc-y	+= uptime.o
 proc-y	+= version.o
 proc-y	+= softirqs.o
+proc-y	+= namespaces.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
 	return allowed;
 }
 
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	int error;
 	struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
 	return 0;
 }
 
-
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
 	struct inode * inode;
 	struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
 	return NULL;
 }
 
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
  * made this apply to all per process world readable and executable
  * directories.
  */
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode;
 	struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
 	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
 }
 
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
 	.d_delete	= pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
 
 /* Lookups */
 
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
-				struct task_struct *, const void *);
-
 /*
  * Fill a directory entry.
  *
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
  * reported by readdir in sync with the inode numbers reported
  * by stat.
  */
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-	char *name, int len,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+	const char *name, int len,
 	instantiate_t instantiate, struct task_struct *task, const void *ptr)
 {
 	struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
 	DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
 #endif
@@ -3168,6 +3165,7 @@ out_no_task:
 static const struct pid_entry tid_base_stuff[] = {
 	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
 	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+	DIR("ns",	 S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 	REG("environ",   S_IRUSR, proc_environ_operations),
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
+	const struct proc_ns_operations *ns_ops;
 
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
 		rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
 		sysctl_head_put(head);
 	}
+	/* Release any associated namespace */
+	ns_ops = PROC_I(inode)->ns_ops;
+	if (ns_ops && ns_ops->put)
+		ns_ops->put(PROC_I(inode)->ns);
 }
 
 static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	ei->pde = NULL;
 	ei->sysctl = NULL;
 	ei->sysctl_entry = NULL;
+	ei->ns = NULL;
+	ei->ns_ops = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..96245a1b1a7c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
  */
 int proc_readdir(struct file *, void *, filldir_t);
 struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+				struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+	const char *name, int len,
+	instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..6ae9f07d59ee
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,188 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+
+static const struct proc_ns_operations *ns_entries[] = {
+};
+
+static const struct file_operations ns_file_operations = {
+	.llseek		= no_llseek,
+};
+
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+	struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+	const struct proc_ns_operations *ns_ops = ptr;
+	struct inode *inode;
+	struct proc_inode *ei;
+	struct dentry *error = ERR_PTR(-ENOENT);
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	inode->i_mode = S_IFREG|S_IRUSR;
+	inode->i_fop  = &ns_file_operations;
+	ei->ns_ops    = ns_ops;
+	ei->ns	      = ns_ops->get(task);
+	if (!ei->ns)
+		goto out_iput;
+
+	dentry->d_op = &pid_dentry_operations;
+	d_add(dentry, inode);
+	/* Close the race of the process dying before we return the dentry */
+	if (pid_revalidate(dentry, NULL))
+		error = NULL;
+out:
+	return error;
+out_iput:
+	iput(inode);
+	goto out;
+}
+
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+	filldir_t filldir, struct task_struct *task,
+	const struct proc_ns_operations *ops)
+{
+	return proc_fill_cache(filp, dirent, filldir,
+				ops->name, strlen(ops->name),
+				proc_ns_instantiate, task, ops);
+}
+
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+				filldir_t filldir)
+{
+	int i;
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct task_struct *task = get_proc_task(inode);
+	const struct proc_ns_operations **entry, **last;
+	ino_t ino;
+	int ret;
+
+	ret = -ENOENT;
+	if (!task)
+		goto out_no_task;
+
+	ret = -EPERM;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	ret = 0;
+	i = filp->f_pos;
+	switch (i) {
+	case 0:
+		ino = inode->i_ino;
+		if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+			goto out;
+		i++;
+		filp->f_pos++;
+		/* fall through */
+	case 1:
+		ino = parent_ino(dentry);
+		if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+			goto out;
+		i++;
+		filp->f_pos++;
+		/* fall through */
+	default:
+		i -= 2;
+		if (i >= ARRAY_SIZE(ns_entries)) {
+			ret = 1;
+			goto out;
+		}
+		entry = ns_entries + i;
+		last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+		while (entry <= last) {
+			if (proc_ns_fill_cache(filp, dirent, filldir,
+						task, *entry) < 0)
+				goto out;
+			filp->f_pos++;
+			entry++;
+		}
+	}
+
+	ret = 1;
+out:
+	put_task_struct(task);
+out_no_task:
+	return ret;
+}
+
+const struct file_operations proc_ns_dir_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_ns_dir_readdir,
+};
+
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+				struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *error;
+	struct task_struct *task = get_proc_task(dir);
+	const struct proc_ns_operations **entry, **last;
+	unsigned int len = dentry->d_name.len;
+
+	error = ERR_PTR(-ENOENT);
+
+	if (!task)
+		goto out_no_task;
+
+	error = ERR_PTR(-EPERM);
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+	for (entry = ns_entries; entry <= last; entry++) {
+		if (strlen((*entry)->name) != len)
+			continue;
+		if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+			break;
+	}
+	if (entry > last)
+		goto out;
+
+	error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+	put_task_struct(task);
+out_no_task:
+	return error;
+}
+
+const struct inode_operations proc_ns_dir_inode_operations = {
+	.lookup		= proc_ns_dir_lookup,
+	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
+};
+
+struct file *proc_ns_fget(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (file->f_op != &ns_file_operations)
+		goto out_invalid;
+
+	return file;
+
+out_invalid:
+	fput(file);
+	return ERR_PTR(-EINVAL);
+}
+
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..a6d2c6da5e5a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
 
+extern struct file *proc_ns_fget(int fd);
+
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
 	       			   struct mm_struct *newmm)
 {}
 
+static inline struct file *proc_ns_fget(int fd)
+{
+	return ERR_PTR(-EINVAL);
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
@@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #endif
 
+struct nsproxy;
+struct proc_ns_operations {
+	const char *name;
+	int type;
+	void *(*get)(struct task_struct *task);
+	void (*put)(void *ns);
+	int (*install)(struct nsproxy *nsproxy, void *ns);
+};
+
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
 	int (*proc_read)(struct task_struct *task, char *page);
@@ -268,6 +284,8 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
+	void *ns;
+	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
 
-- 
cgit v1.2.3


From 13b6f57623bc485e116344fe91fbcb29f149242b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:14:23 -0800
Subject: ns proc: Add support for the network namespace.

Implementing file descriptors for the network namespace
is simple and straight forward.

Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c     |  3 +++
 include/linux/proc_fs.h  |  1 +
 net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 6ae9f07d59ee..dcbd483e9915 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -16,6 +16,9 @@
 
 
 static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+	&netns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a6d2c6da5e5a..62126ec6ede9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -265,6 +265,7 @@ struct proc_ns_operations {
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
 };
+extern const struct proc_ns_operations netns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f860261c5ee..bf7707e09a80 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+	struct net *net;
+	rcu_read_lock();
+	net = get_net(task->nsproxy->net_ns);
+	rcu_read_unlock();
+	return net;
+}
+
+static void netns_put(void *ns)
+{
+	put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+	put_net(nsproxy->net_ns);
+	nsproxy->net_ns = get_net(ns);
+	return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+	.name		= "net",
+	.type		= CLONE_NEWNET,
+	.get		= netns_get,
+	.put		= netns_put,
+	.install	= netns_install,
+};
+#endif
-- 
cgit v1.2.3


From 34482e89a5218f0f9317abf1cfba3bb38b5c29dd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:43:27 -0800
Subject: ns proc: Add support for the uts namespace

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 kernel/utsname.c        | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index dcbd483e9915..b017181f1273 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_NET_NS
 	&netns_operations,
 #endif
+#ifdef CONFIG_UTS_NS
+	&utsns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 62126ec6ede9..52aa89df8a6d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -266,6 +266,7 @@ struct proc_ns_operations {
 	int (*install)(struct nsproxy *nsproxy, void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 44646179eaba..bff131b9510a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 static struct uts_namespace *create_uts_ns(void)
 {
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
+
+static void *utsns_get(struct task_struct *task)
+{
+	struct uts_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		ns = nsproxy->uts_ns;
+		get_uts_ns(ns);
+	}
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void utsns_put(void *ns)
+{
+	put_uts_ns(ns);
+}
+
+static int utsns_install(struct nsproxy *nsproxy, void *ns)
+{
+	get_uts_ns(ns);
+	put_uts_ns(nsproxy->uts_ns);
+	nsproxy->uts_ns = ns;
+	return 0;
+}
+
+const struct proc_ns_operations utsns_operations = {
+	.name		= "uts",
+	.type		= CLONE_NEWUTS,
+	.get		= utsns_get,
+	.put		= utsns_put,
+	.install	= utsns_install,
+};
+
-- 
cgit v1.2.3


From a00eaf11a223c63fbb212369d6db69ce4c55a2d1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:48:39 -0800
Subject: ns proc: Add support for the ipc namespace

Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 ipc/namespace.c         | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b017181f1273..f18d6d58bf79 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_UTS_NS
 	&utsns_operations,
 #endif
+#ifdef CONFIG_IPC_NS
+	&ipcns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 52aa89df8a6d..a23f0b72a023 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -267,6 +267,7 @@ struct proc_ns_operations {
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 8054c8e5faf1..ce0a647869b1 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 #include "util.h"
 
@@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns)
 		free_ipc_ns(ns);
 	}
 }
+
+static void *ipcns_get(struct task_struct *task)
+{
+	struct ipc_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		ns = get_ipc_ns(nsproxy->ipc_ns);
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void ipcns_put(void *ns)
+{
+	return put_ipc_ns(ns);
+}
+
+static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+{
+	/* Ditch state from the old ipc namespace */
+	exit_sem(current);
+	put_ipc_ns(nsproxy->ipc_ns);
+	nsproxy->ipc_ns = get_ipc_ns(ns);
+	return 0;
+}
+
+const struct proc_ns_operations ipcns_operations = {
+	.name		= "ipc",
+	.type		= CLONE_NEWIPC,
+	.get		= ipcns_get,
+	.put		= ipcns_put,
+	.install	= ipcns_install,
+};
-- 
cgit v1.2.3


From f063052947f770845a6252f7fa24f6f624592a24 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 May 2011 17:51:50 -0700
Subject: net: Allow setting the network namespace by fd

Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.

Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/if_link.h     |  1 +
 include/net/net_namespace.h |  1 +
 net/core/net_namespace.c    | 33 +++++++++++++++++++++++++++++++--
 net/core/rtnetlink.c        |  5 ++++-
 4 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b1b864..0ee969a5593d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
 	IFLA_PORT_SELF,
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
+	IFLA_NET_NS_FD,
 	__IFLA_MAX
 };
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae491932bc8..dcc8f5749d3f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 extern struct list_head net_namespace_list;
 
 extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
 
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e09a80..b7403ff4d6c6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
 #include <linux/idr.h>
 #include <linux/rculist.h>
 #include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+struct net *get_net_ns_by_fd(int fd)
+{
+	struct proc_inode *ei;
+	struct file *file;
+	struct net *net;
+
+	net = ERR_PTR(-EINVAL);
+	file = proc_ns_fget(fd);
+	if (!file)
+		goto out;
+
+	ei = PROC_I(file->f_dentry->d_inode);
+	if (ei->ns_ops != &netns_operations)
+		goto out;
+
+	net = get_net(ei->ns);
+out:
+	if (file)
+		fput(file);
+	return net;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
 #ifdef CONFIG_NET_NS
 static void *netns_get(struct task_struct *task)
 {
-	struct net *net;
+	struct net *net = NULL;
+	struct nsproxy *nsproxy;
+
 	rcu_read_lock();
-	net = get_net(task->nsproxy->net_ns);
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		net = get_net(nsproxy->net_ns);
 	rcu_read_unlock();
+
 	return net;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4b1820..dca9602c62e4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 	 */
 	if (tb[IFLA_NET_NS_PID])
 		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else if (tb[IFLA_NET_NS_FD])
+		net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
 	else
 		net = get_net(src_net);
 	return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	int send_addr_notify = 0;
 	int err;
 
-	if (tb[IFLA_NET_NS_PID]) {
+	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
 		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
-- 
cgit v1.2.3


From c0a86a9bea55d505574120f3e9775e3844276505 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Tue, 19 Apr 2011 16:35:15 -0700
Subject: x86/PCI: irq and pci_ids patch for Intel Panther Point DeviceIDs

This patch adds the LPC Controller DeviceIDs for the Intel Panther Point PCH.

Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 4 +++-
 include/linux/pci_ids.h | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8201165bae28..372e9b8989b3 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -602,7 +602,9 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	||  (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && 
 	     device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)
 	||  (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN &&
-	     device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) {
+	     device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)
+	||  (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN &&
+	     device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) {
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4e2c9150a785..52f4ed4de490 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2480,6 +2480,9 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS	0x1e22
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN	0x1e40
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX	0x1e5f
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
-- 
cgit v1.2.3


From 81f8115305f821335cf9e16110bf0806f7b93283 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 28 Apr 2011 13:25:36 +0900
Subject: i2c: i2c-sh_mobile bus speed platform data V2

Add support to the i2c-sh_mobile driver for setting
the I2C bus speed using platform data.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 13 +++++++++++--
 include/linux/i2c/i2c-sh_mobile.h  | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/i2c/i2c-sh_mobile.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 5a1d37d0b62f..d917dd1cc091 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -32,6 +32,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/i2c/i2c-sh_mobile.h>
 
 /* Transmit operation:                                                      */
 /*                                                                          */
@@ -117,7 +118,7 @@ struct sh_mobile_i2c_data {
 	struct device *dev;
 	void __iomem *reg;
 	struct i2c_adapter adap;
-
+	unsigned long bus_speed;
 	struct clk *clk;
 	u_int8_t icic;
 	u_int8_t iccl;
@@ -205,7 +206,7 @@ static void activate_ch(struct sh_mobile_i2c_data *pd)
 	 * We also round off the result.
 	 */
 	num = i2c_clk * 5;
-	denom = NORMAL_SPEED * 9;
+	denom = pd->bus_speed * 9;
 	tmp = num * 10 / denom;
 	if (tmp % 10 >= 5)
 		pd->iccl = (u_int8_t)((num/denom) + 1);
@@ -574,6 +575,7 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, int hook)
 
 static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
+	struct i2c_sh_mobile_platform_data *pdata = dev->dev.platform_data;
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -618,6 +620,11 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 		goto err_irq;
 	}
 
+	/* Use platformd data bus speed or NORMAL_SPEED */
+	pd->bus_speed = NORMAL_SPEED;
+	if (pdata && pdata->bus_speed)
+		pd->bus_speed = pdata->bus_speed;
+
 	/* The IIC blocks on SH-Mobile ARM processors
 	 * come with two new bits in ICIC.
 	 */
@@ -658,6 +665,8 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 		goto err_all;
 	}
 
+	dev_info(&dev->dev, "I2C adapter %d with bus speed %lu Hz\n",
+		 adap->nr, pd->bus_speed);
 	return 0;
 
  err_all:
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
new file mode 100644
index 000000000000..beda7081aead
--- /dev/null
+++ b/include/linux/i2c/i2c-sh_mobile.h
@@ -0,0 +1,10 @@
+#ifndef __I2C_SH_MOBILE_H__
+#define __I2C_SH_MOBILE_H__
+
+#include <linux/platform_device.h>
+
+struct i2c_sh_mobile_platform_data {
+	unsigned long bus_speed;
+};
+
+#endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3


From 0ee8dcb87e403397e575674d0e79272b06dea12e Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 9 Mar 2011 15:41:59 +0800
Subject: KVM: cleanup memslot_id function

We can get memslot id from memslot->id directly

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  6 +++++-
 virt/kvm/kvm_main.c      | 17 -----------------
 2 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab428552af8e..57d7092d7717 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -365,7 +365,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
-int memslot_id(struct kvm *kvm, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
@@ -597,6 +596,11 @@ static inline void kvm_guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
+static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn_to_memslot(kvm, gfn)->id;
+}
+
 static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
 					       gfn_t gfn)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6330653480e4..58146457bf97 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -996,23 +996,6 @@ out:
 	return size;
 }
 
-int memslot_id(struct kvm *kvm, gfn_t gfn)
-{
-	int i;
-	struct kvm_memslots *slots = kvm_memslots(kvm);
-	struct kvm_memory_slot *memslot = NULL;
-
-	for (i = 0; i < slots->nmemslots; ++i) {
-		memslot = &slots->memslots[i];
-
-		if (gfn >= memslot->base_gfn
-		    && gfn < memslot->base_gfn + memslot->npages)
-			break;
-	}
-
-	return memslot - slots->memslots;
-}
-
 static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 				     gfn_t *nr_pages)
 {
-- 
cgit v1.2.3


From c761e5868e6737abe0464636ebd7fcbb6814c626 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 1 Apr 2011 11:25:03 -0300
Subject: Revert "KVM: Fix race between nmi injection and enabling nmi window"

This reverts commit f86368493ec038218e8663cc1b6e5393cd8e008a.

Simpler fix to follow.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 4 +---
 include/linux/kvm_host.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a38fb9bb342b..b9402d5fa0e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -361,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
-	kvm_make_request(KVM_REQ_NMI, vcpu);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	vcpu->arch.nmi_pending = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
@@ -5208,8 +5208,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 1;
 			goto out;
 		}
-		if (kvm_check_request(KVM_REQ_NMI, vcpu))
-			vcpu->arch.nmi_pending = true;
 	}
 
 	r = kvm_mmu_reload(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57d7092d7717..7ca831e55186 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,7 +43,6 @@
 #define KVM_REQ_DEACTIVATE_FPU    10
 #define KVM_REQ_EVENT             11
 #define KVM_REQ_APF_HALT          12
-#define KVM_REQ_NMI               13
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From cef4dea07f6720b36cc93e18a2e68be4bdb71a92 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 20 Jan 2010 12:01:20 +0200
Subject: KVM: 16-byte mmio support

Since sse instructions can issue 16-byte mmios, we need to support them.  We
can't increase the kvm_run mmio buffer size to 16 bytes without breaking
compatibility, so instead we break the large mmios into two smaller 8-byte
ones.  Since the bus is 64-bit we aren't breaking any atomicity guarantees.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 34 +++++++++++++++++++++++++---------
 include/linux/kvm_host.h        |  7 ++++++-
 3 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35f81b110260..e820c6339b8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bb6b9d3f5e93..11d692c7018d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3833,8 +3833,10 @@ mmio:
 	vcpu->mmio_needed = 1;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_IO_NEEDED;
 }
@@ -3886,11 +3888,14 @@ mmio:
 	val += handled;
 
 	vcpu->mmio_needed = 1;
+	memcpy(vcpu->mmio_data, val, bytes);
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_CONTINUE;
 }
@@ -4498,11 +4503,9 @@ restart:
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
 		r = EMULATE_DO_MMIO;
-	} else if (vcpu->mmio_needed) {
-		if (vcpu->mmio_is_write)
-			vcpu->mmio_needed = 0;
+	} else if (vcpu->mmio_needed)
 		r = EMULATE_DO_MMIO;
-	} else if (r == EMULATION_RESTART)
+	else if (r == EMULATION_RESTART)
 		goto restart;
 	else
 		r = EMULATE_DONE;
@@ -5450,9 +5453,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (vcpu->mmio_needed) {
-		memcpy(vcpu->mmio_data, run->mmio.data, 8);
-		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			memcpy(vcpu->mmio_data, run->mmio.data, 8);
+		vcpu->mmio_index += 8;
+		if (vcpu->mmio_index < vcpu->mmio_size) {
+			run->exit_reason = KVM_EXIT_MMIO;
+			run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+			memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+			run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+			run->mmio.is_write = vcpu->mmio_is_write;
+			vcpu->mmio_needed = 1;
+			return 0;
+		}
+		if (vcpu->mmio_is_write)
+			return 1;
+		vcpu->mmio_read_completed = 1;
 	}
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ca831e55186..d1f507567068 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
 
 #include <asm/kvm_host.h>
 
+#ifndef KVM_MMIO_SIZE
+#define KVM_MMIO_SIZE 8
+#endif
+
 /*
  * vcpu->requests bit members
  */
@@ -132,7 +136,8 @@ struct kvm_vcpu {
 	int mmio_read_completed;
 	int mmio_is_write;
 	int mmio_size;
-	unsigned char mmio_data[8];
+	int mmio_index;
+	unsigned char mmio_data[KVM_MMIO_SIZE];
 	gpa_t mmio_phys_addr;
 #endif
 
-- 
cgit v1.2.3


From 92a1f12d2598f429bd8639e21d89305e787115c5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 25 Mar 2011 09:44:51 +0100
Subject: KVM: X86: Implement userspace interface to set virtual_tsc_khz

This patch implements two new vm-ioctls to get and set the
virtual_tsc_khz if the machine supports tsc-scaling. Setting
the tsc-frequency is only possible before userspace creates
any vcpu.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt       | 23 +++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/svm.c              | 20 ++++++++++++++++++++
 arch/x86/kvm/x86.c              | 35 +++++++++++++++++++++++++++++++++++
 include/linux/kvm.h             |  5 +++++
 5 files changed, 90 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 9bef4e4cec50..1b9eaa7e8856 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1263,6 +1263,29 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+4.54 KVM_SET_TSC_KHZ
+
+Capability: KVM_CAP_TSC_CONTROL
+Architectures: x86
+Type: vcpu ioctl
+Parameters: virtual tsc_khz
+Returns: 0 on success, -1 on error
+
+Specifies the tsc frequency for the virtual machine. The unit of the
+frequency is KHz.
+
+4.55 KVM_GET_TSC_KHZ
+
+Capability: KVM_CAP_GET_TSC_KHZ
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: virtual tsc-khz on success, negative value on error
+
+Returns the tsc frequency of the guest. The unit of the return value is
+KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
+error.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index da0a8ce3a139..bd57639fd5db 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -655,6 +655,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
+/* control of guest tsc rate supported? */
+extern bool kvm_has_tsc_control;
+/* minimum supported tsc_khz for guests */
+extern u32  kvm_min_guest_tsc_khz;
+/* maximum supported tsc_khz for guests */
+extern u32  kvm_max_guest_tsc_khz;
+
 enum emulation_result {
 	EMULATE_DONE,       /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8c4549bef4ed..a98873762433 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -64,6 +64,8 @@ MODULE_LICENSE("GPL");
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
+#define TSC_RATIO_MIN		0x0000000000000001ULL
+#define TSC_RATIO_MAX		0x000000ffffffffffULL
 
 static bool erratum_383_found __read_mostly;
 
@@ -189,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 				      bool has_error_code, u32 error_code);
+static u64 __scale_tsc(u64 ratio, u64 tsc);
 
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -798,6 +801,23 @@ static __init int svm_hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
 		kvm_enable_efer_bits(EFER_FFXSR);
 
+	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+		u64 max;
+
+		kvm_has_tsc_control = true;
+
+		/*
+		 * Make sure the user can only configure tsc_khz values that
+		 * fit into a signed integer.
+		 * A min value is not calculated needed because it will always
+		 * be 1 on all machines and a value of 0 is used to disable
+		 * tsc-scaling for the vcpu.
+		 */
+		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
+
+		kvm_max_guest_tsc_khz = max;
+	}
+
 	if (nested) {
 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 579ce34e7904..1d5a7f418795 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+bool kvm_has_tsc_control;
+EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
+u32  kvm_max_guest_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1986,6 +1991,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_GET_TSC_KHZ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2012,6 +2018,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XCRS:
 		r = cpu_has_xsave;
 		break;
+	case KVM_CAP_TSC_CONTROL:
+		r = kvm_has_tsc_control;
+		break;
 	default:
 		r = 0;
 		break;
@@ -3045,6 +3054,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
 		break;
 	}
+	case KVM_SET_TSC_KHZ: {
+		u32 user_tsc_khz;
+
+		r = -EINVAL;
+		if (!kvm_has_tsc_control)
+			break;
+
+		user_tsc_khz = (u32)arg;
+
+		if (user_tsc_khz >= kvm_max_guest_tsc_khz)
+			goto out;
+
+		kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+
+		r = 0;
+		goto out;
+	}
+	case KVM_GET_TSC_KHZ: {
+		r = -EIO;
+		if (check_tsc_unstable())
+			goto out;
+
+		r = vcpu_tsc_khz(vcpu);
+
+		goto out;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a2e13d..2f63ebeac639 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -541,6 +541,8 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_GET_PVINFO 57
 #define KVM_CAP_PPC_IRQ_LEVEL 58
 #define KVM_CAP_ASYNC_PF 59
+#define KVM_CAP_TSC_CONTROL 60
+#define KVM_CAP_GET_TSC_KHZ 61
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -677,6 +679,9 @@ struct kvm_clock_data {
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
 /* Available with KVM_CAP_PPC_GET_PVINFO */
 #define KVM_PPC_GET_PVINFO	  _IOW(KVMIO,  0xa1, struct kvm_ppc_pvinfo)
+/* Available with KVM_CAP_TSC_CONTROL */
+#define KVM_SET_TSC_KHZ           _IO(KVMIO,  0xa2)
+#define KVM_GET_TSC_KHZ           _IO(KVMIO,  0xa3)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From b42fc3cbc3d6e284463e93896679379443e19d56 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 12 Apr 2011 21:30:17 -0400
Subject: KVM: Fix off by one in kvm_for_each_vcpu iteration

This patch avoids gcc issuing the following warning when KVM_MAX_VCPUS=1:
warning: array subscript is above array bounds

kvm_for_each_vcpu currently checks to see if the index for the vcpu is
valid /after/ loading it. We don't run into problems because the address
is still inside the enclosing struct kvm and we never deference or write
to it, so this isn't a security issue.

The warning occurs when KVM_MAX_VCPUS=1 because the increment portion of
the loop will *always* cause the loop to load an invalid location since
++idx will always be > 0.

This patch moves the load so that the check occurs before the load and
we don't run into the compiler warning.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d1f507567068..0bc3d372e3cb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -296,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 }
 
 #define kvm_for_each_vcpu(idx, vcpup, kvm) \
-	for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
-	     idx < atomic_read(&kvm->online_vcpus) && vcpup; \
-	     vcpup = kvm_get_vcpu(kvm, ++idx))
+	for (idx = 0; \
+	     idx < atomic_read(&kvm->online_vcpus) && \
+	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
+	     idx++)
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From b130e5cec958bae3867cf6ab09a9b24ba8fada01 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:07 -0700
Subject: nl80211: Introduce NL80211_MESH_SETUP_USERSPACE_AMPE

Introduce a new configuration option to support AMPE from userspace.

Prior to this series we only supported authentication in userspace: an
authentication daemon would authenticate peer candidates in userspace
and hand them over to the kernel.  From that point the mesh stack would
take over and establish a peer link (Mesh Peering Management).

These patches introduce support for Authenticated Mesh Peering Exchange
in userspace.  The userspace daemon implements the AMPE protocol and on
successfull completion create mesh peers and install encryption keys.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 10 ++++++++++
 include/net/cfg80211.h     |  4 +++-
 net/mac80211/cfg.c         |  6 +++++-
 net/mac80211/ieee80211_i.h |  6 +++++-
 net/mac80211/mesh.c        |  2 +-
 net/mac80211/mesh_plink.c  |  5 +++--
 net/wireless/nl80211.c     |  4 +++-
 7 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index a75dea9c416e..c53b916036c5 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1769,6 +1769,15 @@ enum nl80211_meshconf_params {
  * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
  * daemon will be authenticating mesh candidates.
  *
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication
+ * daemon will be securing peer link frames.  AMPE is a secured version of Mesh
+ * Peering Management (MPM) and is implemented with the assistance of a
+ * userspace daemon.  When this flag is set, the kernel will send peer
+ * management frames to a userspace daemon that will implement AMPE
+ * functionality (security capabilities selection, key confirmation, and key
+ * management).  When the flag is unset (default), the kernel can autonomously
+ * complete (unsecured) mesh peering without the need of a userspace daemon.
+ *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
  */
@@ -1778,6 +1787,7 @@ enum nl80211_mesh_setup_params {
 	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
 	NL80211_MESH_SETUP_IE,
 	NL80211_MESH_SETUP_USERSPACE_AUTH,
+	NL80211_MESH_SETUP_USERSPACE_AMPE,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0920daf36807..10c17d68059f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -695,7 +695,8 @@ struct mesh_config {
  * @path_metric: which metric to use
  * @ie: vendor information elements (optional)
  * @ie_len: length of vendor information elements
- * @is_secure: or not
+ * @is_authenticated: this mesh requires authentication
+ * @is_secure: this mesh uses security
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -706,6 +707,7 @@ struct mesh_setup {
 	u8  path_metric;
 	const u8 *ie;
 	u8 ie_len;
+	bool is_authenticated;
 	bool is_secure;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1ebc13383ae7..18c2555e04e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1064,7 +1064,11 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
 	ifmsh->mesh_pp_id = setup->path_sel_proto;
 	ifmsh->mesh_pm_id = setup->path_metric;
-	ifmsh->is_secure = setup->is_secure;
+	ifmsh->security = IEEE80211_MESH_SEC_NONE;
+	if (setup->is_authenticated)
+		ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
+	if (setup->is_secure)
+		ifmsh->security |= IEEE80211_MESH_SEC_SECURED;
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e89bc27f8dc3..7f4d0dc1d534 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -490,7 +490,11 @@ struct ieee80211_if_mesh {
 	bool accepting_plinks;
 	const u8 *ie;
 	u8 ie_len;
-	bool is_secure;
+	enum {
+		IEEE80211_MESH_SEC_NONE = 0x0,
+		IEEE80211_MESH_SEC_AUTHED = 0x1,
+		IEEE80211_MESH_SEC_SECURED = 0x2,
+	} security;
 };
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c1299e249541..2a59eb345131 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			       &elems);
 
 	/* ignore beacons from secure mesh peers if our security is off */
-	if (elems.rsn_len && !sdata->u.mesh.is_secure)
+	if (elems.rsn_len && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE)
 		return;
 
 	if (elems.ds_params && elems.ds_params_len == 1)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 84e5b056af02..87abf8deb369 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -251,7 +251,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates,
 		rcu_read_unlock();
 		/* Userspace handles peer allocation when security is enabled
 		 * */
-		if (sdata->u.mesh.is_secure)
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
 			cfg80211_notify_new_peer_candidate(sdata->dev, hw_addr,
 					elems->ie_start, elems->total_len,
 					GFP_KERNEL);
@@ -460,7 +460,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		mpl_dbg("Mesh plink: missing necessary peer link ie\n");
 		return;
 	}
-	if (elems.rsn_len && !sdata->u.mesh.is_secure) {
+	if (elems.rsn_len &&
+			sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
 		mpl_dbg("Mesh plink: can't establish link with secure peer\n");
 		return;
 	}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0a199a1ca9b6..64efc2d7a7ad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2871,6 +2871,7 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 		.len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
 };
 
 static int nl80211_parse_mesh_config(struct genl_info *info,
@@ -2980,7 +2981,8 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
-	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
+	setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
+	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
 
 	return 0;
 }
-- 
cgit v1.2.3


From d3aaec8ab76c2d604c2ba7332e1338674607597b Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:09 -0700
Subject: mac80211: Drop MESH_PLINK category and use new ANA-approved
 MESH_ACTION

Note: This breaks compatibility with previous mesh protocol instances.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 5 ++---
 net/mac80211/mesh.c       | 2 +-
 net/mac80211/mesh_plink.c | 2 +-
 net/mac80211/rx.c         | 7 +++++--
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 79690b710665..ee1c96a866cc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1261,9 +1261,8 @@ enum ieee80211_category {
 	WLAN_CATEGORY_MULTIHOP_ACTION = 14,
 	WLAN_CATEGORY_SELF_PROTECTED = 15,
 	WLAN_CATEGORY_WMM = 17,
-	/* TODO: remove MESH_PLINK and MESH_PATH_SEL after */
-	/*       mesh is updated to current 802.11s draft  */
-	WLAN_CATEGORY_MESH_PLINK = 30,
+	/* TODO: remove MESH_PATH_SEL after mesh is updated
+	 * to current 802.11s draft  */
 	WLAN_CATEGORY_MESH_PATH_SEL = 32,
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 2a59eb345131..75378e852f00 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -600,7 +600,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_rx_status *rx_status)
 {
 	switch (mgmt->u.action.category) {
-	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_ACTION:
 		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
 	case WLAN_CATEGORY_MESH_PATH_SEL:
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 87abf8deb369..0120e9e36286 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -182,7 +182,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION;
 	mgmt->u.action.u.plink_action.action_code = action;
 
 	if (action == PLINK_CLOSE)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 81241e18f3a4..634f3d97a279 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -495,8 +495,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 			return RX_DROP_MONITOR;
 
 		if (ieee80211_is_action(hdr->frame_control)) {
+			u8 category;
 			mgmt = (struct ieee80211_mgmt *)hdr;
-			if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
+			category = mgmt->u.action.category;
+			if (category != WLAN_CATEGORY_MESH_ACTION &&
+				category != WLAN_CATEGORY_SELF_PROTECTED)
 				return RX_DROP_MONITOR;
 			return RX_CONTINUE;
 		}
@@ -2205,7 +2208,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
-	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_ACTION:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
 		goto queue;
-- 
cgit v1.2.3


From 0a35d36d6f019bde6c98812456798275b02e5aee Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Wed, 4 May 2011 10:24:56 -0700
Subject: cfg80211: Use capability info to detect mesh beacons.

Mesh beacons no longer use all-zeroes BSSID.  Beacon frames for MBSS,
infrastructure BSS, or IBSS are differentiated by the Capability
Information field in the Beacon frame.  A mesh STA sets the ESS and IBSS
subfields to 0 in transmitted Beacon or Probe Response management
frames.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  5 +++++
 net/wireless/scan.c       | 14 +++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ee1c96a866cc..d527fb7bd67a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1002,6 +1002,11 @@ struct ieee80211_ht_info {
 
 #define WLAN_CAPABILITY_ESS		(1<<0)
 #define WLAN_CAPABILITY_IBSS		(1<<1)
+
+/* A mesh STA sets the ESS and IBSS capability bits to zero */
+#define WLAN_CAPABILITY_IS_MBSS(cap)	\
+	(!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)))
+
 #define WLAN_CAPABILITY_CF_POLLABLE	(1<<2)
 #define WLAN_CAPABILITY_CF_POLL_REQUEST	(1<<3)
 #define WLAN_CAPABILITY_PRIVACY		(1<<4)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fbf6f33ae4d0..62e542a2b192 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -210,7 +210,7 @@ static bool is_mesh(struct cfg80211_bss *a,
 {
 	const u8 *ie;
 
-	if (!is_zero_ether_addr(a->bssid))
+	if (!WLAN_CAPABILITY_IS_MBSS(a->capability))
 		return false;
 
 	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
@@ -248,11 +248,7 @@ static int cmp_bss(struct cfg80211_bss *a,
 	if (a->channel != b->channel)
 		return b->channel->center_freq - a->channel->center_freq;
 
-	r = memcmp(a->bssid, b->bssid, ETH_ALEN);
-	if (r)
-		return r;
-
-	if (is_zero_ether_addr(a->bssid)) {
+	if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) {
 		r = cmp_ies(WLAN_EID_MESH_ID,
 			    a->information_elements,
 			    a->len_information_elements,
@@ -267,6 +263,10 @@ static int cmp_bss(struct cfg80211_bss *a,
 			       b->len_information_elements);
 	}
 
+	r = memcmp(a->bssid, b->bssid, ETH_ALEN);
+	if (r)
+		return r;
+
 	return cmp_ies(WLAN_EID_SSID,
 		       a->information_elements,
 		       a->len_information_elements,
@@ -407,7 +407,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	res->ts = jiffies;
 
-	if (is_zero_ether_addr(res->pub.bssid)) {
+	if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) {
 		/* must be mesh, verify */
 		meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
 					  res->pub.information_elements,
-- 
cgit v1.2.3


From 9c3990aaec0ad9f686ef6480f6861f2df89b2a7a Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 3 May 2011 16:57:11 -0700
Subject: nl80211: Let userspace drive the peer link management states.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/cfg80211.h  | 29 +++++++++++++++++++++++++++++
 net/mac80211/cfg.c      | 30 +++++++++++++++++++++---------
 net/mac80211/sta_info.h | 23 -----------------------
 net/wireless/nl80211.c  |  6 ++++++
 5 files changed, 60 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c53b916036c5..de96783954a1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -913,6 +913,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link. Used when
+ *      userspace is driving the peer link management state machine.
+ *      @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
  *
  * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
  *	the supported WoWLAN triggers
@@ -1109,6 +1112,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 
 	NL80211_ATTR_SUPPORT_MESH_AUTH,
+	NL80211_ATTR_STA_PLINK_STATE,
 
 	NL80211_ATTR_WOWLAN_TRIGGERS,
 	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 10c17d68059f..4b0d035be64f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,6 +371,33 @@ enum plink_actions {
 	PLINK_ACTION_BLOCK,
 };
 
+/**
+ * enum plink_states - state of a mesh peer link finite state machine
+ *
+ * @PLINK_LISTEN: initial state, considered the implicit state of non
+ * existant mesh peer links
+ * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh
+ * peer @PLINK_OPN_RCVD: mesh plink open frame has been received from
+ * this mesh peer
+ * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from
+ * this mesh peer
+ * @PLINK_ESTAB: mesh peer link is established
+ * @PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @PLINK_BLOCKED: all frames transmitted from this mesh plink are
+ * discarded
+ * @PLINK_INVALID: reserved
+ */
+enum plink_state {
+	PLINK_LISTEN,
+	PLINK_OPN_SNT,
+	PLINK_OPN_RCVD,
+	PLINK_CNF_RCVD,
+	PLINK_ESTAB,
+	PLINK_HOLDING,
+	PLINK_BLOCKED,
+	PLINK_INVALID,
+};
+
 /**
  * struct station_parameters - station parameters
  *
@@ -387,6 +414,7 @@ enum plink_actions {
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
  * @plink_action: plink action to take
+ * @plink_state: set the peer link state for a station
  * @ht_capa: HT capabilities of station
  */
 struct station_parameters {
@@ -397,6 +425,7 @@ struct station_parameters {
 	u16 aid;
 	u8 supported_rates_len;
 	u8 plink_action;
+	u8 plink_state;
 	struct ieee80211_ht_cap *ht_capa;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 18c2555e04e6..51f775772d9e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -734,15 +734,27 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 						  params->ht_capa,
 						  &sta->sta.ht_cap);
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
-		switch (params->plink_action) {
-		case PLINK_ACTION_OPEN:
-			mesh_plink_open(sta);
-			break;
-		case PLINK_ACTION_BLOCK:
-			mesh_plink_block(sta);
-			break;
-		}
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
+			switch (params->plink_state) {
+			case PLINK_LISTEN:
+			case PLINK_ESTAB:
+			case PLINK_BLOCKED:
+				sta->plink_state = params->plink_state;
+				break;
+			default:
+				/*  nothing  */
+				break;
+			}
+		else
+			switch (params->plink_action) {
+			case PLINK_ACTION_OPEN:
+				mesh_plink_open(sta);
+				break;
+			case PLINK_ACTION_BLOCK:
+				mesh_plink_block(sta);
+				break;
+			}
 	}
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index af1a7f8c8675..f00b4dcb49d7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -173,29 +173,6 @@ struct sta_ampdu_mlme {
 };
 
 
-/**
- * enum plink_state - state of a mesh peer link finite state machine
- *
- * @PLINK_LISTEN: initial state, considered the implicit state of non existant
- * 	mesh peer links
- * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer
- * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer
- * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from this mesh
- * 	peer
- * @PLINK_ESTAB: mesh peer link is established
- * @PLINK_HOLDING: mesh peer link is being closed or cancelled
- * @PLINK_BLOCKED: all frames transmitted from this mesh plink are discarded
- */
-enum plink_state {
-	PLINK_LISTEN,
-	PLINK_OPN_SNT,
-	PLINK_OPN_RCVD,
-	PLINK_CNF_RCVD,
-	PLINK_ESTAB,
-	PLINK_HOLDING,
-	PLINK_BLOCKED
-};
-
 /**
  * struct sta_info - STA information
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 64efc2d7a7ad..f698c1d116e4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -174,6 +174,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -2247,6 +2248,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
+	params.plink_state = PLINK_INVALID;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
@@ -2278,6 +2280,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
+	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
+		params.plink_state =
+		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+
 	err = get_vlan(info, rdev, &params.vlan);
 	if (err)
 		goto out;
-- 
cgit v1.2.3


From 8f9cb77d6d213c153b0571f494df0c24456aaf47 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Tue, 3 May 2011 16:57:14 -0700
Subject: mac80211: Self-protected management frames are not robust

They may contain encrypted information elements (as AMPE frames do)
but they are not encrypted.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d527fb7bd67a..b2eee5879883 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1520,6 +1520,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 		category = ((u8 *) hdr) + 24;
 		return *category != WLAN_CATEGORY_PUBLIC &&
 			*category != WLAN_CATEGORY_HT &&
+			*category != WLAN_CATEGORY_SELF_PROTECTED &&
 			*category != WLAN_CATEGORY_VENDOR_SPECIFIC;
 	}
 
-- 
cgit v1.2.3


From 1073e4ee595265086a592a056d903bf4fcc8885a Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 11 May 2011 02:08:09 +0200
Subject: bcma: add missing GPIO defines, use PULL register only when available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similar patch was commited to ssb.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon.c            | 6 ++++--
 include/linux/bcma/bcma_driver_chipcommon.h | 5 +++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index caf596091d4d..606102256b44 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -29,8 +29,10 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 	if (cc->core->id.rev >= 35)
 		cc->capabilities_ext = bcma_cc_read32(cc, BCMA_CC_CAP_EXT);
 
-	bcma_cc_write32(cc, 0x58, 0);
-	bcma_cc_write32(cc, 0x5C, 0);
+	if (cc->core->id.rev >= 20) {
+		bcma_cc_write32(cc, BCMA_CC_GPIOPULLUP, 0);
+		bcma_cc_write32(cc, BCMA_CC_GPIOPULLDOWN, 0);
+	}
 
 	if (cc->capabilities & BCMA_CC_CAP_PMU)
 		bcma_pmu_init(cc);
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 4f8fd6a4c1e6..083c3b6cd5ce 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -113,6 +113,8 @@
 #define BCMA_CC_FLASHDATA		0x0048
 #define BCMA_CC_BCAST_ADDR		0x0050
 #define BCMA_CC_BCAST_DATA		0x0054
+#define BCMA_CC_GPIOPULLUP		0x0058		/* Rev >= 20 only */
+#define BCMA_CC_GPIOPULLDOWN		0x005C		/* Rev >= 20 only */
 #define BCMA_CC_GPIOIN			0x0060
 #define BCMA_CC_GPIOOUT			0x0064
 #define BCMA_CC_GPIOOUTEN		0x0068
@@ -121,6 +123,9 @@
 #define BCMA_CC_GPIOIRQ			0x0074
 #define BCMA_CC_WATCHDOG		0x0080
 #define BCMA_CC_GPIOTIMER		0x0088		/* LED powersave (corerev >= 16) */
+#define  BCMA_CC_GPIOTIMER_OFFTIME	0x0000FFFF
+#define  BCMA_CC_GPIOTIMER_OFFTIME_SHIFT	0
+#define  BCMA_CC_GPIOTIMER_ONTIME	0xFFFF0000
 #define  BCMA_CC_GPIOTIMER_ONTIME_SHIFT	16
 #define BCMA_CC_GPIOTOUTM		0x008C		/* LED powersave (corerev >= 16) */
 #define BCMA_CC_CLOCK_N			0x0090
-- 
cgit v1.2.3


From 8576f815d5c8beb8b10f96abe31831b90af3d352 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 11 May 2011 02:10:58 +0200
Subject: ssb: move ssb_commit_settings and export it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commiting settings is possible on devices without PCI core (but with CC
core). Export it for usage in drivers supporting other cores.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_pcicore.c | 26 --------------------------
 drivers/ssb/main.c           | 25 +++++++++++++++++++++++++
 include/linux/ssb/ssb.h      |  1 +
 3 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index 8fde1220bc89..82feb348c8bb 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -21,8 +21,6 @@ static u16 ssb_pcie_mdio_read(struct ssb_pcicore *pc, u8 device, u8 address);
 static void ssb_pcie_mdio_write(struct ssb_pcicore *pc, u8 device,
 				u8 address, u16 data);
 
-static void ssb_commit_settings(struct ssb_bus *bus);
-
 static inline
 u32 pcicore_read32(struct ssb_pcicore *pc, u16 offset)
 {
@@ -659,30 +657,6 @@ static void ssb_pcie_mdio_write(struct ssb_pcicore *pc, u8 device,
 	pcicore_write32(pc, mdio_control, 0);
 }
 
-static void ssb_broadcast_value(struct ssb_device *dev,
-				u32 address, u32 data)
-{
-	/* This is used for both, PCI and ChipCommon core, so be careful. */
-	BUILD_BUG_ON(SSB_PCICORE_BCAST_ADDR != SSB_CHIPCO_BCAST_ADDR);
-	BUILD_BUG_ON(SSB_PCICORE_BCAST_DATA != SSB_CHIPCO_BCAST_DATA);
-
-	ssb_write32(dev, SSB_PCICORE_BCAST_ADDR, address);
-	ssb_read32(dev, SSB_PCICORE_BCAST_ADDR); /* flush */
-	ssb_write32(dev, SSB_PCICORE_BCAST_DATA, data);
-	ssb_read32(dev, SSB_PCICORE_BCAST_DATA); /* flush */
-}
-
-static void ssb_commit_settings(struct ssb_bus *bus)
-{
-	struct ssb_device *dev;
-
-	dev = bus->chipco.dev ? bus->chipco.dev : bus->pcicore.dev;
-	if (WARN_ON(!dev))
-		return;
-	/* This forces an update of the cached registers. */
-	ssb_broadcast_value(dev, 0xFD8, 0);
-}
-
 int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 				   struct ssb_device *dev)
 {
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index ad3da93a428c..ee2937c41424 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1329,6 +1329,31 @@ error:
 }
 EXPORT_SYMBOL(ssb_bus_powerup);
 
+static void ssb_broadcast_value(struct ssb_device *dev,
+				u32 address, u32 data)
+{
+	/* This is used for both, PCI and ChipCommon core, so be careful. */
+	BUILD_BUG_ON(SSB_PCICORE_BCAST_ADDR != SSB_CHIPCO_BCAST_ADDR);
+	BUILD_BUG_ON(SSB_PCICORE_BCAST_DATA != SSB_CHIPCO_BCAST_DATA);
+
+	ssb_write32(dev, SSB_PCICORE_BCAST_ADDR, address);
+	ssb_read32(dev, SSB_PCICORE_BCAST_ADDR); /* flush */
+	ssb_write32(dev, SSB_PCICORE_BCAST_DATA, data);
+	ssb_read32(dev, SSB_PCICORE_BCAST_DATA); /* flush */
+}
+
+void ssb_commit_settings(struct ssb_bus *bus)
+{
+	struct ssb_device *dev;
+
+	dev = bus->chipco.dev ? bus->chipco.dev : bus->pcicore.dev;
+	if (WARN_ON(!dev))
+		return;
+	/* This forces an update of the cached registers. */
+	ssb_broadcast_value(dev, 0xFD8, 0);
+}
+EXPORT_SYMBOL(ssb_commit_settings);
+
 u32 ssb_admatch_base(u32 adm)
 {
 	u32 base = 0;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 7e99b348834c..f017b8900f78 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -518,6 +518,7 @@ extern int ssb_bus_may_powerdown(struct ssb_bus *bus);
  * Otherwise static always-on powercontrol will be used. */
 extern int ssb_bus_powerup(struct ssb_bus *bus, bool dynamic_pctl);
 
+extern void ssb_commit_settings(struct ssb_bus *bus);
 
 /* Various helper functions */
 extern u32 ssb_admatch_base(u32 adm);
-- 
cgit v1.2.3


From 807f8a8c300435d5483e8d78df9dcdbc27333166 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Wed, 11 May 2011 17:09:35 +0300
Subject: cfg80211/nl80211: add support for scheduled scans

Implement new functionality for scheduled scan offload.  With this feature we
can scan automatically at certain intervals.

The idea is that the hardware can perform scan automatically and filter on
desired results without waking up the host unnecessarily.

Add NL80211_CMD_START_SCHED_SCAN and NL80211_CMD_STOP_SCHED_SCAN
commands to the nl80211 interface.  When results are available they are
reported by NL80211_CMD_SCHED_SCAN_RESULTS events.  The userspace is
informed when the scheduled scan has stopped with a
NL80211_CMD_SCHED_SCAN_STOPPED event, which can be triggered either by
the driver or by a call to NL80211_CMD_STOP_SCHED_SCAN.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  25 +++++
 include/net/cfg80211.h  |  57 +++++++++++
 net/wireless/core.c     |  12 ++-
 net/wireless/core.h     |   7 ++
 net/wireless/nl80211.c  | 250 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |   4 +
 net/wireless/scan.c     |  70 ++++++++++++++
 7 files changed, 424 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index de96783954a1..f8b5595ba4af 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -203,6 +203,26 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan.  Like with normal
+ *	scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) are passed, they are used
+ *	in the probe requests.  For broadcast, a broadcast SSID must be
+ *	passed (ie. an empty string).  If no SSID is passed, no probe
+ *	requests are sent and a passive scan is performed.
+ *	%NL80211_ATTR_SCAN_FREQUENCIES, if passed, define which channels
+ *	should be scanned; if not passed, all channels allowed for the
+ *	current regulatory domain are used.  Extra IEs can also be passed
+ *	from the userspace by using the %NL80211_ATTR_IE attribute.
+ * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan
+ * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
+ *	results available.
+ * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has
+ *	stopped.  The driver may issue this event at any time during a
+ *	scheduled scan.  One reason for stopping the scan is if the hardware
+ *	does not support starting an association or a normal scan while running
+ *	a scheduled scan.  This event is also sent when the
+ *	%NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface
+ *	is brought down while a scheduled scan was running.
+ *
  * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation
  *      or noise level
  * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to
@@ -545,6 +565,11 @@ enum nl80211_commands {
 	NL80211_CMD_GET_WOWLAN,
 	NL80211_CMD_SET_WOWLAN,
 
+	NL80211_CMD_START_SCHED_SCAN,
+	NL80211_CMD_STOP_SCHED_SCAN,
+	NL80211_CMD_SCHED_SCAN_RESULTS,
+	NL80211_CMD_SCHED_SCAN_STOPPED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4b0d035be64f..e214c85b74d2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -823,6 +823,33 @@ struct cfg80211_scan_request {
 	struct ieee80211_channel *channels[0];
 };
 
+/**
+ * struct cfg80211_sched_scan_request - scheduled scan request description
+ *
+ * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
+ * @n_ssids: number of SSIDs
+ * @n_channels: total number of channels to scan
+ * @ie: optional information element(s) to add into Probe Request or %NULL
+ * @ie_len: length of ie in octets
+ * @wiphy: the wiphy this was for
+ * @dev: the interface
+ * @channels: channels to scan
+ */
+struct cfg80211_sched_scan_request {
+	struct cfg80211_ssid *ssids;
+	int n_ssids;
+	u32 n_channels;
+	const u8 *ie;
+	size_t ie_len;
+
+	/* internal */
+	struct wiphy *wiphy;
+	struct net_device *dev;
+
+	/* keep last */
+	struct ieee80211_channel *channels[0];
+};
+
 /**
  * enum cfg80211_signal_type - signal type
  *
@@ -1292,6 +1319,10 @@ struct cfg80211_wowlan {
  * @set_power_mgmt: Configure WLAN power management. A timeout value of -1
  *	allows the driver to adjust the dynamic ps timeout value.
  * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
+ * @sched_scan_start: Tell the driver to start a scheduled scan.
+ * @sched_scan_stop: Tell the driver to stop an ongoing scheduled
+ *	scan.  The driver_initiated flag specifies whether the driver
+ *	itself has informed that the scan has stopped.
  *
  * @mgmt_frame_register: Notify driver that a management frame type was
  *	registered. Note that this callback may not sleep, and cannot run
@@ -1478,6 +1509,12 @@ struct cfg80211_ops {
 	int	(*set_ringparam)(struct wiphy *wiphy, u32 tx, u32 rx);
 	void	(*get_ringparam)(struct wiphy *wiphy,
 				 u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max);
+
+	int	(*sched_scan_start)(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct cfg80211_sched_scan_request *request);
+	int	(*sched_scan_stop)(struct wiphy *wiphy, struct net_device *dev,
+				   bool driver_initiated);
 };
 
 /*
@@ -1522,6 +1559,7 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *	auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
+ * @WIPHY_FLAG_SCHED_SCAN: The device supports scheduled scans.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1534,6 +1572,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_MESH_AUTH			= BIT(10),
+	WIPHY_FLAG_SUPPORTS_SCHED_SCAN		= BIT(11),
 };
 
 struct mac_address {
@@ -2354,6 +2393,24 @@ int cfg80211_wext_siwpmksa(struct net_device *dev,
  */
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted);
 
+/**
+ * cfg80211_sched_scan_results - notify that new scan results are available
+ *
+ * @wiphy: the wiphy which got scheduled scan results
+ */
+void cfg80211_sched_scan_results(struct wiphy *wiphy);
+
+/**
+ * cfg80211_sched_scan_stopped - notify that the scheduled scan has stopped
+ *
+ * @wiphy: the wiphy on which the scheduled scan stopped
+ *
+ * The driver can call this function to inform cfg80211 that the
+ * scheduled scan had to be stopped, for whatever reason.  The driver
+ * is then called back via the sched_scan_stop operation when done.
+ */
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy);
+
 /**
  * cfg80211_inform_bss_frame - inform cfg80211 of a received BSS frame
  *
diff --git a/net/wireless/core.c b/net/wireless/core.c
index bea0d80710c8..f924a49b2425 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -370,7 +370,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	spin_lock_init(&rdev->bss_lock);
 	INIT_LIST_HEAD(&rdev->bss_list);
 	INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
-
+	INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
+	INIT_WORK(&rdev->sched_scan_stopped_wk, __cfg80211_sched_scan_stopped);
 #ifdef CONFIG_CFG80211_WEXT
 	rdev->wiphy.wext = &cfg80211_wext_handler;
 #endif
@@ -672,6 +673,11 @@ static void wdev_cleanup_work(struct work_struct *work)
 		___cfg80211_scan_done(rdev, true);
 	}
 
+	if (WARN_ON(rdev->sched_scan_req &&
+		    rdev->sched_scan_req->dev == wdev->netdev)) {
+		__cfg80211_stop_sched_scan(rdev, false);
+	}
+
 	cfg80211_unlock_rdev(rdev);
 
 	mutex_lock(&rdev->devlist_mtx);
@@ -759,6 +765,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			break;
 		case NL80211_IFTYPE_P2P_CLIENT:
 		case NL80211_IFTYPE_STATION:
+			cfg80211_lock_rdev(rdev);
+			__cfg80211_stop_sched_scan(rdev, false);
+			cfg80211_unlock_rdev(rdev);
+
 			wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.ie);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7a18c10a7fb6..e3f7b1d995cd 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -60,8 +60,11 @@ struct cfg80211_registered_device {
 	struct rb_root bss_tree;
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	unsigned long suspend_at;
 	struct work_struct scan_done_wk;
+	struct work_struct sched_scan_results_wk;
+	struct work_struct sched_scan_stopped_wk;
 
 #ifdef CONFIG_NL80211_TESTMODE
 	struct genl_info *testmode_info;
@@ -411,6 +414,10 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 void cfg80211_sme_disassoc(struct net_device *dev, int idx);
 void __cfg80211_scan_done(struct work_struct *wk);
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak);
+void __cfg80211_sched_scan_results(struct work_struct *wk);
+int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
+			       bool driver_initiated);
+void __cfg80211_sched_scan_stopped(struct work_struct *wk);
 void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
 int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev, enum nl80211_iftype ntype,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 95dd5832e719..4fac370284c0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -761,6 +761,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 	CMD(set_channel, SET_CHANNEL);
 	CMD(set_wds_peer, SET_WDS_PEER);
+	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+		CMD(sched_scan_start, START_SCHED_SCAN);
 
 #undef CMD
 
@@ -3357,6 +3359,179 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_start_sched_scan(struct sk_buff *skb,
+				    struct genl_info *info)
+{
+	struct cfg80211_sched_scan_request *request;
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_ssid *ssid;
+	struct ieee80211_channel *channel;
+	struct nlattr *attr;
+	struct wiphy *wiphy;
+	int err, tmp, n_ssids = 0, n_channels, i;
+	enum ieee80211_band band;
+	size_t ie_len;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
+	    !rdev->ops->sched_scan_start)
+		return -EOPNOTSUPP;
+
+	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	if (rdev->sched_scan_req)
+		return -EINPROGRESS;
+
+	wiphy = &rdev->wiphy;
+
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		n_channels = validate_scan_freqs(
+				info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
+		if (!n_channels)
+			return -EINVAL;
+	} else {
+		n_channels = 0;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			if (wiphy->bands[band])
+				n_channels += wiphy->bands[band]->n_channels;
+	}
+
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
+				    tmp)
+			n_ssids++;
+
+	if (n_ssids > wiphy->max_scan_ssids)
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_IE])
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	else
+		ie_len = 0;
+
+	if (ie_len > wiphy->max_scan_ie_len)
+		return -EINVAL;
+
+	request = kzalloc(sizeof(*request)
+			+ sizeof(*ssid) * n_ssids
+			+ sizeof(channel) * n_channels
+			+ ie_len, GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	if (n_ssids)
+		request->ssids = (void *)&request->channels[n_channels];
+	request->n_ssids = n_ssids;
+	if (ie_len) {
+		if (request->ssids)
+			request->ie = (void *)(request->ssids + n_ssids);
+		else
+			request->ie = (void *)(request->channels + n_channels);
+	}
+
+	i = 0;
+	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+		/* user specified, bail out if channel not found */
+		nla_for_each_nested(attr,
+				    info->attrs[NL80211_ATTR_SCAN_FREQUENCIES],
+				    tmp) {
+			struct ieee80211_channel *chan;
+
+			chan = ieee80211_get_channel(wiphy, nla_get_u32(attr));
+
+			if (!chan) {
+				err = -EINVAL;
+				goto out_free;
+			}
+
+			/* ignore disabled channels */
+			if (chan->flags & IEEE80211_CHAN_DISABLED)
+				continue;
+
+			request->channels[i] = chan;
+			i++;
+		}
+	} else {
+		/* all channels */
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			int j;
+			if (!wiphy->bands[band])
+				continue;
+			for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+				struct ieee80211_channel *chan;
+
+				chan = &wiphy->bands[band]->channels[j];
+
+				if (chan->flags & IEEE80211_CHAN_DISABLED)
+					continue;
+
+				request->channels[i] = chan;
+				i++;
+			}
+		}
+	}
+
+	if (!i) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	request->n_channels = i;
+
+	i = 0;
+	if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
+		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS],
+				    tmp) {
+			if (request->ssids[i].ssid_len >
+			    IEEE80211_MAX_SSID_LEN) {
+				err = -EINVAL;
+				goto out_free;
+			}
+			memcpy(request->ssids[i].ssid, nla_data(attr),
+			       nla_len(attr));
+			request->ssids[i].ssid_len = nla_len(attr);
+			i++;
+		}
+	}
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		memcpy((void *)request->ie,
+		       nla_data(info->attrs[NL80211_ATTR_IE]),
+		       request->ie_len);
+	}
+
+	request->dev = dev;
+	request->wiphy = &rdev->wiphy;
+
+	err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
+	if (!err) {
+		rdev->sched_scan_req = request;
+		nl80211_send_sched_scan(rdev, dev,
+					NL80211_CMD_START_SCHED_SCAN);
+		goto out;
+	}
+
+out_free:
+	kfree(request);
+out:
+	return err;
+}
+
+static int nl80211_stop_sched_scan(struct sk_buff *skb,
+				   struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
+	    !rdev->ops->sched_scan_stop)
+		return -EOPNOTSUPP;
+
+	return __cfg80211_stop_sched_scan(rdev, false);
+}
+
 static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			    struct cfg80211_registered_device *rdev,
 			    struct wireless_dev *wdev,
@@ -5326,6 +5501,22 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.dumpit = nl80211_dump_scan,
 	},
+	{
+		.cmd = NL80211_CMD_START_SCHED_SCAN,
+		.doit = nl80211_start_sched_scan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_STOP_SCHED_SCAN,
+		.doit = nl80211_stop_sched_scan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_AUTHENTICATE,
 		.doit = nl80211_authenticate,
@@ -5652,6 +5843,28 @@ static int nl80211_send_scan_msg(struct sk_buff *msg,
 	return -EMSGSIZE;
 }
 
+static int
+nl80211_send_sched_scan_msg(struct sk_buff *msg,
+			    struct cfg80211_registered_device *rdev,
+			    struct net_device *netdev,
+			    u32 pid, u32 seq, int flags, u32 cmd)
+{
+	void *hdr;
+
+	hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	return genlmsg_end(msg, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev)
 {
@@ -5709,6 +5922,43 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
+void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0,
+					NL80211_CMD_SCHED_SCAN_RESULTS) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
+void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, u32 cmd)
+{
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
+}
+
 /*
  * This can happen on global regulatory changes or device specific settings
  * based on custom world regulatory domains.
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f2af6955a665..2f1bfb87a651 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -12,6 +12,10 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev);
 void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev);
+void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, u32 cmd);
+void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
+				     struct net_device *netdev);
 void nl80211_send_reg_change_event(struct regulatory_request *request);
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 62e542a2b192..65dfae3b9d41 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -93,6 +93,76 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 }
 EXPORT_SYMBOL(cfg80211_scan_done);
 
+void __cfg80211_sched_scan_results(struct work_struct *wk)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(wk, struct cfg80211_registered_device,
+			    sched_scan_results_wk);
+
+	cfg80211_lock_rdev(rdev);
+
+	/* we don't have sched_scan_req anymore if the scan is stopping */
+	if (rdev->sched_scan_req)
+		nl80211_send_sched_scan_results(rdev,
+						rdev->sched_scan_req->dev);
+
+	cfg80211_unlock_rdev(rdev);
+}
+
+void cfg80211_sched_scan_results(struct wiphy *wiphy)
+{
+	/* ignore if we're not scanning */
+	if (wiphy_to_dev(wiphy)->sched_scan_req)
+		queue_work(cfg80211_wq,
+			   &wiphy_to_dev(wiphy)->sched_scan_results_wk);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_results);
+
+void __cfg80211_sched_scan_stopped(struct work_struct *wk)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(wk, struct cfg80211_registered_device,
+			    sched_scan_stopped_wk);
+
+	cfg80211_lock_rdev(rdev);
+	__cfg80211_stop_sched_scan(rdev, true);
+	cfg80211_unlock_rdev(rdev);
+}
+
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+{
+	queue_work(cfg80211_wq, &wiphy_to_dev(wiphy)->sched_scan_stopped_wk);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
+
+int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
+			       bool driver_initiated)
+{
+	int err;
+	struct net_device *dev;
+
+	ASSERT_RDEV_LOCK(rdev);
+
+	if (!rdev->sched_scan_req)
+		return 0;
+
+	dev = rdev->sched_scan_req->dev;
+
+	err = rdev->ops->sched_scan_stop(&rdev->wiphy, dev,
+					 driver_initiated);
+	if (err)
+		return err;
+
+	nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
+
+	kfree(rdev->sched_scan_req);
+	rdev->sched_scan_req = NULL;
+
+	return err;
+}
+
 static void bss_release(struct kref *ref)
 {
 	struct cfg80211_internal_bss *bss;
-- 
cgit v1.2.3


From bbe6ad6dcb1eb26bd12ec85320f402721c3383ae Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Wed, 11 May 2011 17:09:37 +0300
Subject: cfg80211/nl80211: add interval attribute for scheduled scans

Introduce NL80211_ATTR_SCHED_SCAN_INTERVAL as a required attribute for
NL80211_CMD_START_SCHED_SCAN.  This value informs the driver at which
intervals the scheduled scan cycles should be executed.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 ++++++++++++++++---------
 include/net/cfg80211.h  |  2 ++
 net/wireless/nl80211.c  | 10 ++++++++++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f8b5595ba4af..281a2bb6a6ec 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -203,15 +203,17 @@
  * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
  *	partial scan results may be available
  *
- * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan.  Like with normal
- *	scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) are passed, they are used
- *	in the probe requests.  For broadcast, a broadcast SSID must be
- *	passed (ie. an empty string).  If no SSID is passed, no probe
- *	requests are sent and a passive scan is performed.
- *	%NL80211_ATTR_SCAN_FREQUENCIES, if passed, define which channels
- *	should be scanned; if not passed, all channels allowed for the
- *	current regulatory domain are used.  Extra IEs can also be passed
- *	from the userspace by using the %NL80211_ATTR_IE attribute.
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain
+ *	intervals, as specified by %NL80211_ATTR_SCHED_SCAN_INTERVAL.
+ *	Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS)
+ *	are passed, they are used in the probe requests.  For
+ *	broadcast, a broadcast SSID must be passed (ie. an empty
+ *	string).  If no SSID is passed, no probe requests are sent and
+ *	a passive scan is performed.  %NL80211_ATTR_SCAN_FREQUENCIES,
+ *	if passed, define which channels should be scanned; if not
+ *	passed, all channels allowed for the current regulatory domain
+ *	are used.  Extra IEs can also be passed from the userspace by
+ *	using the %NL80211_ATTR_IE attribute.
  * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan
  * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
  *	results available.
@@ -948,6 +950,9 @@ enum nl80211_commands {
  *	indicate which WoW triggers should be enabled. This is also
  *	used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
  *	triggers.
+
+ * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
+ *	cycles, in msecs.
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1142,6 +1147,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WOWLAN_TRIGGERS,
 	NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
 
+	NL80211_ATTR_SCHED_SCAN_INTERVAL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e214c85b74d2..1f1e221b6ce3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -829,6 +829,7 @@ struct cfg80211_scan_request {
  * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
  * @n_ssids: number of SSIDs
  * @n_channels: total number of channels to scan
+ * @interval: interval between each scheduled scan cycle
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
  * @wiphy: the wiphy this was for
@@ -839,6 +840,7 @@ struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
 	int n_ssids;
 	u32 n_channels;
+	u32 interval;
 	const u8 *ie;
 	size_t ie_len;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4fac370284c0..b5b050b62f2a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -175,6 +175,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
+	[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -3370,6 +3371,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	struct nlattr *attr;
 	struct wiphy *wiphy;
 	int err, tmp, n_ssids = 0, n_channels, i;
+	u32 interval;
 	enum ieee80211_band band;
 	size_t ie_len;
 
@@ -3383,6 +3385,13 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	if (rdev->sched_scan_req)
 		return -EINPROGRESS;
 
+	if (!info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
+		return -EINVAL;
+
+	interval = nla_get_u32(info->attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
+	if (interval == 0)
+		return -EINVAL;
+
 	wiphy = &rdev->wiphy;
 
 	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
@@ -3505,6 +3514,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 
 	request->dev = dev;
 	request->wiphy = &rdev->wiphy;
+	request->interval = interval;
 
 	err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
 	if (!err) {
-- 
cgit v1.2.3


From 2e711c04dbbf7a7732a3f7073b1fc285d12b369d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:15:07 +0200
Subject: PM: Remove sysdev suspend, resume and shutdown operations

Since suspend, resume and shutdown operations in struct sysdev_class
and struct sysdev_driver are not used any more, remove them.  Also
drop sysdev_suspend(), sysdev_resume() and sysdev_shutdown() used
for executing those operations and modify all of their users
accordingly.  This reduces kernel code size quite a bit and reduces
its complexity.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sh/Kconfig          |   1 -
 arch/x86/Kconfig         |   1 -
 arch/x86/kernel/apm_32.c |   4 -
 drivers/base/Kconfig     |   7 --
 drivers/base/base.h      |   2 -
 drivers/base/sys.c       | 202 +----------------------------------------------
 drivers/xen/manage.c     |   8 +-
 include/linux/device.h   |   7 --
 include/linux/pm.h       |   8 --
 include/linux/sysdev.h   |  11 ---
 kernel/kexec.c           |   9 +--
 kernel/power/hibernate.c |  18 +----
 kernel/power/suspend.c   |   8 +-
 kernel/sys.c             |   3 -
 14 files changed, 7 insertions(+), 282 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4b89da248d17..bc439de48cd1 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,6 @@ config SUPERH
 	select RTC_LIB
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
-	select ARCH_NO_SYSDEV_OPS
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..140e254fe546 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -71,7 +71,6 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
-	select ARCH_NO_SYSDEV_OPS
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index adee12e0da1f..3bfa02235965 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 
 	local_irq_enable();
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable)
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
@@ -1282,7 +1280,6 @@ static void standby(void)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 	local_irq_enable();
 
@@ -1292,7 +1289,6 @@ static void standby(void)
 
 	local_irq_disable();
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index e9e5238f3106..d57e8d0fb823 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -168,11 +168,4 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
-config ARCH_NO_SYSDEV_OPS
-	bool
-	---help---
-	  To be selected by architectures that don't use sysdev class or
-	  sysdev driver power management (suspend/resume) and shutdown
-	  operations.
-
 endmenu
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 19f49e41ce5d..a34dca0ad041 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -111,8 +111,6 @@ static inline int driver_match_device(struct device_driver *drv,
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
 
-extern void sysdev_shutdown(void);
-
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index acde9b5ee131..9dff77bfe1e3 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -328,203 +328,8 @@ void sysdev_unregister(struct sys_device *sysdev)
 	kobject_put(&sysdev->kobj);
 }
 
-
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/**
- *	sysdev_shutdown - Shut down all system devices.
- *
- *	Loop over each class of system devices, and the devices in each
- *	of those classes. For each device, we call the shutdown method for
- *	each driver registered for the device - the auxiliaries,
- *	and the class driver.
- *
- *	Note: The list is iterated in reverse order, so that we shut down
- *	child devices before we shut down their parents. The list ordering
- *	is guaranteed by virtue of the fact that child devices are registered
- *	after their parents.
- */
-void sysdev_shutdown(void)
-{
-	struct sysdev_class *cls;
-
-	pr_debug("Shutting Down System Devices\n");
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Shutting down type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			struct sysdev_driver *drv;
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->shutdown)
-					drv->shutdown(sysdev);
-			}
-
-			/* Now call the generic one */
-			if (cls->shutdown)
-				cls->shutdown(sysdev);
-		}
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-static void __sysdev_resume(struct sys_device *dev)
-{
-	struct sysdev_class *cls = dev->cls;
-	struct sysdev_driver *drv;
-
-	/* First, call the class-specific one */
-	if (cls->resume)
-		cls->resume(dev);
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled after %pF\n", cls->resume);
-
-	/* Call auxiliary drivers next. */
-	list_for_each_entry(drv, &cls->drivers, entry) {
-		if (drv->resume)
-			drv->resume(dev);
-		WARN_ONCE(!irqs_disabled(),
-			"Interrupts enabled after %pF\n", drv->resume);
-	}
-}
-
-/**
- *	sysdev_suspend - Suspend all system devices.
- *	@state:		Power state to enter.
- *
- *	We perform an almost identical operation as sysdev_shutdown()
- *	above, though calling ->suspend() instead. Interrupts are disabled
- *	when this called. Devices are responsible for both saving state and
- *	quiescing or powering down the device.
- *
- *	This is only called by the device PM core, so we let them handle
- *	all synchronization.
- */
-int sysdev_suspend(pm_message_t state)
-{
-	struct sysdev_class *cls;
-	struct sys_device *sysdev, *err_dev;
-	struct sysdev_driver *drv, *err_drv;
-	int ret;
-
-	pr_debug("Checking wake-up interrupts\n");
-
-	/* Return error code if there are any wake-up interrupts pending */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while suspending system devices\n");
-
-	pr_debug("Suspending System Devices\n");
-
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		pr_debug("Suspending type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->suspend) {
-					ret = drv->suspend(sysdev, state);
-					if (ret)
-						goto aux_driver;
-				}
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					drv->suspend);
-			}
-
-			/* Now call the generic one */
-			if (cls->suspend) {
-				ret = cls->suspend(sysdev, state);
-				if (ret)
-					goto cls_driver;
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					cls->suspend);
-			}
-		}
-	}
-	return 0;
-	/* resume current sysdev */
-cls_driver:
-	drv = NULL;
-	printk(KERN_ERR "Class suspend failed for %s: %d\n",
-		kobject_name(&sysdev->kobj), ret);
-
-aux_driver:
-	if (drv)
-		printk(KERN_ERR "Class driver suspend failed for %s: %d\n",
-				kobject_name(&sysdev->kobj), ret);
-	list_for_each_entry(err_drv, &cls->drivers, entry) {
-		if (err_drv == drv)
-			break;
-		if (err_drv->resume)
-			err_drv->resume(sysdev);
-	}
-
-	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-		if (err_dev == sysdev)
-			break;
-		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-		__sysdev_resume(err_dev);
-	}
-
-	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-			__sysdev_resume(err_dev);
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sysdev_suspend);
-
-/**
- *	sysdev_resume - Bring system devices back to life.
- *
- *	Similar to sysdev_suspend(), but we iterate the list forwards
- *	to guarantee that parent devices are resumed before their children.
- *
- *	Note: Interrupts are disabled when called.
- */
-int sysdev_resume(void)
-{
-	struct sysdev_class *cls;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while resuming system devices\n");
-
-	pr_debug("Resuming System Devices\n");
-
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Resuming type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			__sysdev_resume(sysdev);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysdev_resume);
-#endif /* CONFIG_ARCH_NO_SYSDEV_OPS */
+EXPORT_SYMBOL_GPL(sysdev_register);
+EXPORT_SYMBOL_GPL(sysdev_unregister);
 
 int __init system_bus_init(void)
 {
@@ -534,9 +339,6 @@ int __init system_bus_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
 #define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
 
 ssize_t sysdev_store_ulong(struct sys_device *sysdev,
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index a2eee574784e..0b5366b5be20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -70,12 +70,7 @@ static int xen_suspend(void *data)
 
 	BUG_ON(!irqs_disabled());
 
-	err = sysdev_suspend(PMSG_FREEZE);
-	if (!err) {
-		err = syscore_suspend();
-		if (err)
-			sysdev_resume();
-	}
+	err = syscore_suspend();
 	if (err) {
 		printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
 			err);
@@ -102,7 +97,6 @@ static int xen_suspend(void *data)
 	}
 
 	syscore_resume();
-	sysdev_resume();
 
 	return 0;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..ea9db9b0f248 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -633,13 +633,6 @@ static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/* drivers/base/sys.c */
-extern void sysdev_shutdown(void);
-#else
-static inline void sysdev_shutdown(void) { }
-#endif
-
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..3c053e2beb84 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -529,14 +529,6 @@ struct dev_power_domain {
  */
 
 #ifdef CONFIG_PM_SLEEP
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-extern int sysdev_suspend(pm_message_t state);
-extern int sysdev_resume(void);
-#else
-static inline int sysdev_suspend(pm_message_t state) { return 0; }
-static inline int sysdev_resume(void) { return 0; }
-#endif
-
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index dfb078db8ebb..d35e783a598c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -34,12 +34,6 @@ struct sysdev_class {
 	struct list_head	drivers;
 	struct sysdev_class_attribute **attrs;
 	struct kset		kset;
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	/* Default operations for these types of devices */
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 struct sysdev_class_attribute {
@@ -77,11 +71,6 @@ struct sysdev_driver {
 	struct list_head	entry;
 	int	(*add)(struct sys_device *);
 	int	(*remove)(struct sys_device *);
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 87b77de03dd3..8d814cbc8109 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1531,13 +1531,7 @@ int kernel_kexec(void)
 		if (error)
 			goto Enable_cpus;
 		local_irq_disable();
-		/* Suspend system devices */
-		error = sysdev_suspend(PMSG_FREEZE);
-		if (!error) {
-			error = syscore_suspend();
-			if (error)
-				sysdev_resume();
-		}
+		error = syscore_suspend();
 		if (error)
 			goto Enable_irqs;
 	} else
@@ -1553,7 +1547,6 @@ int kernel_kexec(void)
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
 		syscore_resume();
-		sysdev_resume();
  Enable_irqs:
 		local_irq_enable();
  Enable_cpus:
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 50aae660174d..554d3b049f35 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -272,12 +272,7 @@ static int create_image(int platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_FREEZE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error) {
 		printk(KERN_ERR "PM: Some system devices failed to power down, "
 			"aborting hibernation\n");
@@ -302,7 +297,6 @@ static int create_image(int platform_mode)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
@@ -409,12 +403,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_QUIESCE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error)
 		goto Enable_irqs;
 
@@ -442,7 +431,6 @@ static int resume_target_kernel(bool platform_mode)
 	touch_softlockup_watchdog();
 
 	syscore_resume();
-	sysdev_resume();
 
  Enable_irqs:
 	local_irq_enable();
@@ -528,7 +516,6 @@ int hibernation_platform_enter(void)
 		goto Platform_finish;
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_HIBERNATE);
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
@@ -541,7 +528,6 @@ int hibernation_platform_enter(void)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 	enable_nonboot_cpus();
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8935369d503a..732d77a957e7 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,19 +163,13 @@ static int suspend_enter(suspend_state_t state)
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
 
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (!error) {
 		if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
 		}
 		syscore_resume();
-		sysdev_resume();
 	}
 
 	arch_suspend_enable_irqs();
diff --git a/kernel/sys.c b/kernel/sys.c
index af468edf096a..f0c10385f30c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -315,7 +315,6 @@ void kernel_restart_prepare(char *cmd)
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
-	sysdev_shutdown();
 	syscore_shutdown();
 }
 
@@ -354,7 +353,6 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
 	kernel_shutdown_prepare(SYSTEM_HALT);
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "System halted.\n");
 	kmsg_dump(KMSG_DUMP_HALT);
@@ -374,7 +372,6 @@ void kernel_power_off(void)
 	if (pm_power_off_prepare)
 		pm_power_off_prepare();
 	disable_nonboot_cpus();
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "Power down.\n");
 	kmsg_dump(KMSG_DUMP_POWEROFF);
-- 
cgit v1.2.3


From b48d4425b602f5f4978299474743dbea130d940d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 19 Oct 2010 13:07:57 -0700
Subject: PCI: add ID-based ordering enable/disable support

Add support to allow drivers to enable/disable ID-based ordering.  Where
supported, ID-based ordering can significantly improve the latency of
individual requests by preventing them from queueing up behind unrelated
traffic.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      | 13 ++++++++++++
 include/linux/pci_regs.h |  2 ++
 3 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 44d1c7c3876b..d0182bed7acc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1834,6 +1834,59 @@ void pci_enable_ari(struct pci_dev *dev)
 	bridge->ari_enabled = 1;
 }
 
+/**
+ * pci_enable_ido - enable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to enable
+ *
+ * Enable ID-based ordering on @dev.  @type can contain the bits
+ * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate
+ * which types of transactions are allowed to be re-ordered.
+ */
+void pci_enable_ido(struct pci_dev *dev, unsigned long type)
+{
+	int pos;
+	u16 ctrl;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (type & PCI_EXP_IDO_REQUEST)
+		ctrl |= PCI_EXP_IDO_REQ_EN;
+	if (type & PCI_EXP_IDO_COMPLETION)
+		ctrl |= PCI_EXP_IDO_CMP_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_enable_ido);
+
+/**
+ * pci_disable_ido - disable ID-based ordering on a device
+ * @dev: the PCI device
+ * @type: which types of IDO to disable
+ */
+void pci_disable_ido(struct pci_dev *dev, unsigned long type)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (type & PCI_EXP_IDO_REQUEST)
+		ctrl &= ~PCI_EXP_IDO_REQ_EN;
+	if (type & PCI_EXP_IDO_COMPLETION)
+		ctrl &= ~PCI_EXP_IDO_CMP_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ido);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96f70d7e058d..551ddcb5f940 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -828,6 +828,11 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 	return __pci_enable_wake(dev, state, false, enable);
 }
 
+#define PCI_EXP_IDO_REQUEST	(1<<0)
+#define PCI_EXP_IDO_COMPLETION	(1<<1)
+void pci_enable_ido(struct pci_dev *dev, unsigned long type);
+void pci_disable_ido(struct pci_dev *dev, unsigned long type);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1207,6 +1212,14 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 	return 0;
 }
 
+static inline void pci_enable_ido(struct pci_dev *dev, unsigned long type)
+{
+}
+
+static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type)
+{
+}
+
 static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 {
 	return -EIO;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index be01380f798a..d9acf9b99814 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -510,6 +510,8 @@
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
+#define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 
-- 
cgit v1.2.3


From 48a92a8179b3e677fac07db7bd109e68f020468c Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 10 Jan 2011 12:46:36 -0800
Subject: PCI: add OBFF enable/disable support

OBFF (optimized buffer flush/fill), where supported, can help improve
energy efficiency by giving devices information about when interrupts
and other activity will have a reduced power impact.  It requires
support from both the device and system (i.e. not only does the device
need to respond to OBFF messages, but the platform must be capable of
generating and routing them to the end point).

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 92 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      | 16 +++++++++
 include/linux/pci_regs.h |  6 ++++
 3 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d0182bed7acc..01e4cab2e5cb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1887,6 +1887,98 @@ void pci_disable_ido(struct pci_dev *dev, unsigned long type)
 }
 EXPORT_SYMBOL(pci_disable_ido);
 
+/**
+ * pci_enable_obff - enable optimized buffer flush/fill
+ * @dev: PCI device
+ * @type: type of signaling to use
+ *
+ * Try to enable @type OBFF signaling on @dev.  It will try using WAKE#
+ * signaling if possible, falling back to message signaling only if
+ * WAKE# isn't supported.  @type should indicate whether the PCIe link
+ * be brought out of L0s or L1 to send the message.  It should be either
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0.
+ *
+ * If your device can benefit from receiving all messages, even at the
+ * power cost of bringing the link back up from a low power state, use
+ * %PCI_EXP_OBFF_SIGNAL_ALWAYS.  Otherwise, use %PCI_OBFF_SIGNAL_L0 (the
+ * preferred type).
+ *
+ * RETURNS:
+ * Zero on success, appropriate error number on failure.
+ */
+int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type)
+{
+	int pos;
+	u32 cap;
+	u16 ctrl;
+	int ret;
+
+	if (!pci_is_pcie(dev))
+		return -ENOTSUPP;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return -ENOTSUPP;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_OBFF_MASK))
+		return -ENOTSUPP; /* no OBFF support at all */
+
+	/* Make sure the topology supports OBFF as well */
+	if (dev->bus) {
+		ret = pci_enable_obff(dev->bus->self, type);
+		if (ret)
+			return ret;
+	}
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	if (cap & PCI_EXP_OBFF_WAKE)
+		ctrl |= PCI_EXP_OBFF_WAKE_EN;
+	else {
+		switch (type) {
+		case PCI_EXP_OBFF_SIGNAL_L0:
+			if (!(ctrl & PCI_EXP_OBFF_WAKE_EN))
+				ctrl |= PCI_EXP_OBFF_MSGA_EN;
+			break;
+		case PCI_EXP_OBFF_SIGNAL_ALWAYS:
+			ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+			ctrl |= PCI_EXP_OBFF_MSGB_EN;
+			break;
+		default:
+			WARN(1, "bad OBFF signal type\n");
+			return -ENOTSUPP;
+		}
+	}
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_obff);
+
+/**
+ * pci_disable_obff - disable optimized buffer flush/fill
+ * @dev: PCI device
+ *
+ * Disable OBFF on @dev.
+ */
+void pci_disable_obff(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl &= ~PCI_EXP_OBFF_WAKE_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_obff);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 551ddcb5f940..45a035cccd93 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -833,6 +833,13 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 void pci_enable_ido(struct pci_dev *dev, unsigned long type);
 void pci_disable_ido(struct pci_dev *dev, unsigned long type);
 
+enum pci_obff_signal_type {
+	PCI_EXP_OBFF_SIGNAL_L0,
+	PCI_EXP_OBFF_SIGNAL_ALWAYS,
+};
+int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type);
+void pci_disable_obff(struct pci_dev *dev);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
@@ -1220,6 +1227,15 @@ static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type)
 {
 }
 
+static inline int pci_enable_obff(struct pci_dev *dev, unsigned long type)
+{
+	return 0;
+}
+
+static inline void pci_disable_obff(struct pci_dev *dev)
+{
+}
+
 static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 {
 	return -EIO;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index d9acf9b99814..aa420261843d 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -508,10 +508,16 @@
 #define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
+#define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
+#define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
 #define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
+#define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
+#define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
+#define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 
-- 
cgit v1.2.3


From 51c2e0a7e5bc7ed1384cc68cfb95e702571500c9 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 14 Jan 2011 08:53:04 -0800
Subject: PCI: add latency tolerance reporting enable/disable support

Latency tolerance reporting allows devices to send messages to the root
complex indicating their latency tolerance for snooped & unsnooped
memory transactions.  Add support for enabling & disabling this
feature, along with a routine to set the max latencies a device should
send upstream.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c        | 149 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h      |   5 ++
 include/linux/pci_regs.h |   9 +++
 3 files changed, 163 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 01e4cab2e5cb..53302cbdb94c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1979,6 +1979,155 @@ void pci_disable_obff(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_disable_obff);
 
+/**
+ * pci_ltr_supported - check whether a device supports LTR
+ * @dev: PCI device
+ *
+ * RETURNS:
+ * True if @dev supports latency tolerance reporting, false otherwise.
+ */
+bool pci_ltr_supported(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return false;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return false;
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
+
+	return cap & PCI_EXP_DEVCAP2_LTR;
+}
+EXPORT_SYMBOL(pci_ltr_supported);
+
+/**
+ * pci_enable_ltr - enable latency tolerance reporting
+ * @dev: PCI device
+ *
+ * Enable LTR on @dev if possible, which means enabling it first on
+ * upstream ports.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int pci_enable_ltr(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+	int ret;
+
+	if (!pci_ltr_supported(dev))
+		return -ENOTSUPP;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return -ENOTSUPP;
+
+	/* Only primary function can enable/disable LTR */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return -EINVAL;
+
+	/* Enable upstream ports first */
+	if (dev->bus) {
+		ret = pci_enable_ltr(dev->bus->self);
+		if (ret)
+			return ret;
+	}
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl |= PCI_EXP_LTR_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_ltr);
+
+/**
+ * pci_disable_ltr - disable latency tolerance reporting
+ * @dev: PCI device
+ */
+void pci_disable_ltr(struct pci_dev *dev)
+{
+	int pos;
+	u16 ctrl;
+
+	if (!pci_ltr_supported(dev))
+		return;
+
+	pos = pci_pcie_cap(dev);
+	if (!pos)
+		return;
+
+	/* Only primary function can enable/disable LTR */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
+	ctrl &= ~PCI_EXP_LTR_EN;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
+}
+EXPORT_SYMBOL(pci_disable_ltr);
+
+static int __pci_ltr_scale(int *val)
+{
+	int scale = 0;
+
+	while (*val > 1023) {
+		*val = (*val + 31) / 32;
+		scale++;
+	}
+	return scale;
+}
+
+/**
+ * pci_set_ltr - set LTR latency values
+ * @dev: PCI device
+ * @snoop_lat_ns: snoop latency in nanoseconds
+ * @nosnoop_lat_ns: nosnoop latency in nanoseconds
+ *
+ * Figure out the scale and set the LTR values accordingly.
+ */
+int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns)
+{
+	int pos, ret, snoop_scale, nosnoop_scale;
+	u16 val;
+
+	if (!pci_ltr_supported(dev))
+		return -ENOTSUPP;
+
+	snoop_scale = __pci_ltr_scale(&snoop_lat_ns);
+	nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns);
+
+	if (snoop_lat_ns > PCI_LTR_VALUE_MASK ||
+	    nosnoop_lat_ns > PCI_LTR_VALUE_MASK)
+		return -EINVAL;
+
+	if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) ||
+	    (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)))
+		return -EINVAL;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR);
+	if (!pos)
+		return -ENOTSUPP;
+
+	val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns;
+	ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val);
+	if (ret != 4)
+		return -EIO;
+
+	val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns;
+	ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val);
+	if (ret != 4)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_set_ltr);
+
 static int pci_acs_enable;
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 45a035cccd93..df4d69b82144 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -840,6 +840,11 @@ enum pci_obff_signal_type {
 int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type);
 void pci_disable_obff(struct pci_dev *dev);
 
+bool pci_ltr_supported(struct pci_dev *dev);
+int pci_enable_ltr(struct pci_dev *dev);
+void pci_disable_ltr(struct pci_dev *dev);
+int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns);
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index aa420261843d..e8840964aca1 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -508,6 +508,7 @@
 #define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_LTR	0x800	/* Latency tolerance reporting */
 #define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
 #define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
 #define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
@@ -515,6 +516,7 @@
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
 #define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
+#define  PCI_EXP_LTR_EN		0x400	/* Latency tolerance reporting */
 #define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
 #define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
 #define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
@@ -535,6 +537,7 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_LTR	24
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -691,6 +694,12 @@
 #define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
 #define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
 
+#define PCI_LTR_MAX_SNOOP_LAT	0x4
+#define PCI_LTR_MAX_NOSNOOP_LAT	0x6
+#define  PCI_LTR_VALUE_MASK	0x000003ff
+#define  PCI_LTR_SCALE_MASK	0x00001c00
+#define  PCI_LTR_SCALE_SHIFT	10
+
 /* Access Control Service */
 #define PCI_ACS_CAP		0x04	/* ACS Capability Register */
 #define  PCI_ACS_SV		0x01	/* Source Validation */
-- 
cgit v1.2.3


From 8f389a99b652aab5b42297280bd94d95933ad12f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 11 May 2011 15:13:32 -0700
Subject: mm: use alloc_bootmem_node_nopanic() on really needed path

Stefan found nobootmem does not work on his system that has only 8M of
RAM.  This causes an early panic:

  BIOS-provided physical RAM map:
   BIOS-88: 0000000000000000 - 000000000009f000 (usable)
   BIOS-88: 0000000000100000 - 0000000000840000 (usable)
  bootconsole [earlyser0] enabled
  Notice: NX (Execute Disable) protection missing in CPU or disabled in BIOS!
  DMI not present or invalid.
  last_pfn = 0x840 max_arch_pfn = 0x100000
  init_memory_mapping: 0000000000000000-0000000000840000
  8MB LOWMEM available.
    mapped low ram: 0 - 00840000
    low ram: 0 - 00840000
  Zone PFN ranges:
    DMA      0x00000001 -> 0x00001000
    Normal   empty
  Movable zone start PFN for each node
  early_node_map[2] active PFN ranges
      0: 0x00000001 -> 0x0000009f
      0: 0x00000100 -> 0x00000840
  BUG: Int 6: CR2 (null)
       EDI c034663c  ESI (null)  EBP c0329f38  ESP c0329ef4
       EBX c0346380  EDX 00000006  ECX ffffffff  EAX fffffff4
       err (null)  EIP c0353191   CS c0320060  flg 00010082
  Stack: (null) c030c533 000007cd (null) c030c533 00000001 (null) (null)
         00000003 0000083f 00000018 00000002 00000002 c0329f6c c03534d6 (null)
         (null) 00000100 00000840 (null) c0329f64 00000001 00001000 (null)
  Pid: 0, comm: swapper Not tainted 2.6.36 #5
  Call Trace:
   [<c02e3707>] ? 0xc02e3707
   [<c035e6e5>] 0xc035e6e5
   [<c0353191>] ? 0xc0353191
   [<c03534d6>] 0xc03534d6
   [<c034f1cd>] 0xc034f1cd
   [<c034a824>] 0xc034a824
   [<c03513cb>] ? 0xc03513cb
   [<c0349432>] 0xc0349432
   [<c0349066>] 0xc0349066

It turns out that we should ignore the low limit of 16M.

Use alloc_bootmem_node_nopanic() in this case.

[akpm@linux-foundation.org: less mess]
Signed-off-by: Yinghai LU <yinghai@kernel.org>
Reported-by: Stefan Hellermann <stefan@the2masters.de>
Tested-by: Stefan Hellermann <stefan@the2masters.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>		[2.6.34+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 2 ++
 mm/page_alloc.c         | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b8613e806aa9..01eca1794e14 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_node_nopanic(pgdat, x) \
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f8a97b9a350..454191a25173 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3564,7 +3564,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 
 	if (!slab_is_available()) {
 		zone->wait_table = (wait_queue_head_t *)
-			alloc_bootmem_node(pgdat, alloc_size);
+			alloc_bootmem_node_nopanic(pgdat, alloc_size);
 	} else {
 		/*
 		 * This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4141,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
 	unsigned long usemapsize = usemap_size(zonesize);
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
-		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+		zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
+								   usemapsize);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4308,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
 		if (!map)
-			map = alloc_bootmem_node(pgdat, size);
+			map = alloc_bootmem_node_nopanic(pgdat, size);
 		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
 	}
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-- 
cgit v1.2.3


From ee85c2e1454603ebb9f8d87223ac79dcdc87fa32 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 11 May 2011 15:13:34 -0700
Subject: mm: add alloc_pages_exact_nid()

Add a alloc_pages_exact_nid() that allocates on a specific node.

The naming is quite broken, but fixing that would need a larger renaming
action.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  2 ++
 mm/page_alloc.c     | 49 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bfb8f934521e..56d8fc87fbbc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
+/* This is different from alloc_pages_exact_node !!! */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 454191a25173..570d944daeb5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+{
+	if (addr) {
+		unsigned long alloc_end = addr + (PAGE_SIZE << order);
+		unsigned long used = addr + PAGE_ALIGN(size);
+
+		split_page(virt_to_page((void *)addr), order);
+		while (used < alloc_end) {
+			free_page(used);
+			used += PAGE_SIZE;
+		}
+	}
+	return (void *)addr;
+}
+
 /**
  * alloc_pages_exact - allocate an exact number physically-contiguous pages.
  * @size: the number of bytes to allocate
@@ -2336,21 +2351,31 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
 	unsigned long addr;
 
 	addr = __get_free_pages(gfp_mask, order);
-	if (addr) {
-		unsigned long alloc_end = addr + (PAGE_SIZE << order);
-		unsigned long used = addr + PAGE_ALIGN(size);
-
-		split_page(virt_to_page((void *)addr), order);
-		while (used < alloc_end) {
-			free_page(used);
-			used += PAGE_SIZE;
-		}
-	}
-
-	return (void *)addr;
+	return make_alloc_exact(addr, order, size);
 }
 EXPORT_SYMBOL(alloc_pages_exact);
 
+/**
+ * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
+ *			   pages on a node.
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * Like alloc_pages_exact(), but try to allocate on node nid first before falling
+ * back.
+ * Note this is not alloc_pages_exact_node() which allocates on a specific node,
+ * but is not exact.
+ */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+{
+	unsigned order = get_order(size);
+	struct page *p = alloc_pages_node(nid, gfp_mask, order);
+	if (!p)
+		return NULL;
+	return make_alloc_exact((unsigned long)page_address(p), order, size);
+}
+EXPORT_SYMBOL(alloc_pages_exact_nid);
+
 /**
  * free_pages_exact - release memory allocated via alloc_pages_exact()
  * @virt: the value returned by alloc_pages_exact.
-- 
cgit v1.2.3


From a75b9df9d3bfc3cd1083974c045ae31ce5f3434f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 May 2011 18:00:51 -0400
Subject: NFSv4.1: Ensure that layoutget uses the correct gfp modes

Currently, writebacks may end up recursing back into the filesystem due to
GFP_KERNEL direct reclaims in the pnfs subsystem.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4filelayout.c    | 25 ++++++++++++++-----------
 fs/nfs/nfs4filelayout.h    |  2 +-
 fs/nfs/nfs4filelayoutdev.c | 34 +++++++++++++++++-----------------
 fs/nfs/pnfs.c              | 33 +++++++++++++++++++--------------
 fs/nfs/pnfs.h              |  6 +++---
 fs/nfs/read.c              |  4 ++--
 fs/nfs/write.c             |  4 ++--
 include/linux/nfs_xdr.h    |  1 +
 8 files changed, 59 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 7841ea603c91..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -418,7 +418,8 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_filelayout_segment *fl,
 			struct nfs4_layoutget_res *lgr,
-			struct nfs4_deviceid *id)
+			struct nfs4_deviceid *id,
+			gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
@@ -441,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 	/* find and reference the deviceid */
 	dsaddr = nfs4_fl_find_get_deviceid(id);
 	if (dsaddr == NULL) {
-		dsaddr = get_device_info(lo->plh_inode, id);
+		dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
 		if (dsaddr == NULL)
 			goto out;
 	}
@@ -502,7 +503,8 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 struct nfs4_filelayout_segment *fl,
 			 struct nfs4_layoutget_res *lgr,
-			 struct nfs4_deviceid *id)
+			 struct nfs4_deviceid *id,
+			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
 	struct xdr_buf buf = {
@@ -518,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 
 	dprintk("%s: set_layout_map Begin\n", __func__);
 
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		return -ENOMEM;
 
@@ -556,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		goto out_err;
 
 	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-			       GFP_KERNEL);
+			       gfp_flags);
 	if (!fl->fh_array)
 		goto out_err;
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
-		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
 		if (!fl->fh_array[i])
 			goto out_err_free;
 
@@ -607,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
-		      struct nfs4_layoutget_res *lgr)
+		      struct nfs4_layoutget_res *lgr,
+		      gfp_t gfp_flags)
 {
 	struct nfs4_filelayout_segment *fl;
 	int rc;
 	struct nfs4_deviceid id;
 
 	dprintk("--> %s\n", __func__);
-	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+	fl = kzalloc(sizeof(*fl), gfp_flags);
 	if (!fl)
 		return NULL;
 
-	rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
-	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+	rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
+	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
@@ -635,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 		int size = (fl->stripe_type == STRIPE_SPARSE) ?
 			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
 		if (!fl->commit_buckets) {
 			filelayout_free_lseg(&fl->generic_hdr);
 			return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
 nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
 
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 }
 
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *tmp_ds, *ds;
 
-	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+	ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
 	if (!ds)
 		goto out;
 
@@ -261,7 +261,7 @@ out:
  * Currently only support ipv4, and one multi-path address.
  */
 static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *ds = NULL;
 	char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 			rlen);
 		goto out_err;
 	}
-	buf = kmalloc(rlen + 1, GFP_KERNEL);
+	buf = kmalloc(rlen + 1, gfp_flags);
 	if (!buf) {
 		dprintk("%s: Not enough memory\n", __func__);
 		goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 	sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
 	port = htons((tmp[0] << 8) | (tmp[1]));
 
-	ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+	ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
 	dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
 	kfree(buf);
@@ -343,7 +343,7 @@ out_err:
 
 /* Decode opaque device data and return the result */
 static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev)
+decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 {
 	int i;
 	u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	struct page *scratch;
 
 	/* set up xdr stream */
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		goto out_err;
 
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	}
 
 	/* read stripe indices */
-	stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 	if (!stripe_indices)
 		goto out_err_free_scratch;
 
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 
 	dsaddr = kzalloc(sizeof(*dsaddr) +
 			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
-			GFP_KERNEL);
+			gfp_flags);
 	if (!dsaddr)
 		goto out_err_free_stripe_indices;
 
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 		for (j = 0; j < mp_count; j++) {
 			if (j == 0) {
 				dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-					ino);
+					ino, gfp_flags);
 				if (dsaddr->ds_list[i] == NULL)
 					goto out_err_free_deviceid;
 			} else {
@@ -503,12 +503,12 @@ out_err:
  * available devices.
  */
 static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *d, *new;
 	long hash;
 
-	new = decode_device(inode, dev);
+	new = decode_device(inode, dev, gfp_flags);
 	if (!new) {
 		printk(KERN_WARNING "%s: Could not decode or add device\n",
 			__func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
  * of available devices, and return it.
  */
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 {
 	struct pnfs_device *pdev = NULL;
 	u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
 		__func__, inode, max_resp_sz, max_pages);
 
-	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+	pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
 	if (pdev == NULL)
 		return NULL;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (pages == NULL) {
 		kfree(pdev);
 		return NULL;
 	}
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_free;
 	}
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	 * Found new device, need to decode it and then add it to the
 	 * list of known devices for this mountpoint.
 	 */
-	dsaddr = decode_and_add_device(inode, pdev);
+	dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 out_free:
 	for (i = 0; i < max_pages; i++)
 		__free_page(pages[i]);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 65455f58b109..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -467,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
-	   u32 iomode)
+	   u32 iomode,
+	   gfp_t gfp_flags)
 {
 	struct inode *ino = lo->plh_inode;
 	struct nfs_server *server = NFS_SERVER(ino);
@@ -480,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	dprintk("--> %s\n", __func__);
 
 	BUG_ON(ctx == NULL);
-	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 	if (lgp == NULL)
 		return NULL;
 
@@ -488,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 	max_pages = max_resp_sz >> PAGE_SHIFT;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (!pages)
 		goto out_err_free;
 
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_err_free;
 	}
@@ -509,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	lgp->args.layout.pages = pages;
 	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
 	lgp->lsegpp = &lseg;
+	lgp->gfp_flags = gfp_flags;
 
 	/* Synchronously retrieve layout information from server and
 	 * store in lseg.
@@ -666,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 }
 
 static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino)
+alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 {
 	struct pnfs_layout_hdr *lo;
 
-	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+	lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
 	if (!lo)
 		return NULL;
 	atomic_set(&lo->plh_refcount, 1);
@@ -682,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
 }
 
 static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino)
+pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
@@ -697,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
 			return nfsi->layout;
 	}
 	spin_unlock(&ino->i_lock);
-	new = alloc_init_layout_hdr(ino);
+	new = alloc_init_layout_hdr(ino, gfp_flags);
 	spin_lock(&ino->i_lock);
 
 	if (likely(nfsi->layout == NULL))	/* Won the race? */
@@ -757,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino,
 		   struct nfs_open_context *ctx,
-		   enum pnfs_iomode iomode)
+		   enum pnfs_iomode iomode,
+		   gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -768,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
 	spin_lock(&ino->i_lock);
-	lo = pnfs_find_alloc_layout(ino);
+	lo = pnfs_find_alloc_layout(ino, gfp_flags);
 	if (lo == NULL) {
 		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
 		goto out_unlock;
@@ -808,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
 		spin_unlock(&clp->cl_lock);
 	}
 
-	lseg = send_layoutget(lo, ctx, iomode);
+	lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
 	if (!lseg && first) {
 		spin_lock(&clp->cl_lock);
 		list_del_init(&lo->plh_layouts);
@@ -847,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 		goto out;
 	}
 	/* Inject layout blob into I/O device driver */
-	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
 	if (!lseg || IS_ERR(lseg)) {
 		if (!lseg)
 			status = -ENOMEM;
@@ -900,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_READ);
+						   IOMODE_READ,
+						   GFP_KERNEL);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
@@ -922,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_RW);
+						   IOMODE_RW,
+						   GFP_NOFS);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
 	const u32 id;
 	const char *name;
 	struct module *owner;
-	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type);
+		   enum pnfs_iomode access_type, gfp_t gfp_flags);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 
 static inline struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type)
+		   enum pnfs_iomode access_type, gfp_t gfp_flags)
 {
 	return NULL;
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg != NULL);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 
 	ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
 				0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bd5d7e80f6c..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 890dce242639..7e371f7df9c4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -233,6 +233,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct pnfs_layout_segment **lsegpp;
+	gfp_t gfp_flags;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.3


From 3e51e3edfd81bfd9853ad7de91167e4ce33d0fe7 Mon Sep 17 00:00:00 2001
From: Samir Bellabes <sam@synack.fr>
Date: Wed, 11 May 2011 18:18:05 +0200
Subject: sched: Remove unused parameters from sched_fork() and
 wake_up_new_task()

sched_fork() and wake_up_new_task() are defined with a parameter
'unsigned long clone_flags', which is unused.

This patch removes the parameters.

Signed-off-by: Samir Bellabes <sam@synack.fr>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1305130685-1047-1-git-send-email-sam@synack.fr
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 5 ++---
 kernel/fork.c         | 4 ++--
 kernel/sched.c        | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6b4280b23ee6..12211e1666e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2051,14 +2051,13 @@ extern void xtime_update(unsigned long ticks);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
-extern void wake_up_new_task(struct task_struct *tsk,
-				unsigned long clone_flags);
+extern void wake_up_new_task(struct task_struct *tsk);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void sched_fork(struct task_struct *p, int clone_flags);
+extern void sched_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index aca62871a4f9..2b44d82b8237 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1152,7 +1152,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
-	sched_fork(p, clone_flags);
+	sched_fork(p);
 
 	retval = perf_event_init_task(p);
 	if (retval)
@@ -1463,7 +1463,7 @@ long do_fork(unsigned long clone_flags,
 		 */
 		p->flags &= ~PF_STARTING;
 
-		wake_up_new_task(p, clone_flags);
+		wake_up_new_task(p);
 
 		tracehook_report_clone_complete(trace, regs,
 						clone_flags, nr, p);
diff --git a/kernel/sched.c b/kernel/sched.c
index da9338150484..f9778c0d91e2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2741,7 +2741,7 @@ static void __sched_fork(struct task_struct *p)
 /*
  * fork()/clone()-time setup:
  */
-void sched_fork(struct task_struct *p, int clone_flags)
+void sched_fork(struct task_struct *p)
 {
 	unsigned long flags;
 	int cpu = get_cpu();
@@ -2823,7 +2823,7 @@ void sched_fork(struct task_struct *p, int clone_flags)
  * that must be done for every newly created context, then puts the task
  * on the runqueue and wakes it.
  */
-void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+void wake_up_new_task(struct task_struct *p)
 {
 	unsigned long flags;
 	struct rq *rq;
-- 
cgit v1.2.3


From 5db1256a5131d3b133946fa02ac9770a784e6eb2 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Thu, 12 May 2011 04:13:54 -0500
Subject: seqlock: Don't smp_rmb in seqlock reader spin loop

Move the smp_rmb after cpu_relax loop in read_seqlock and add
ACCESS_ONCE to make sure the test and return are consistent.

A multi-threaded core in the lab didn't like the update
from 2.6.35 to 2.6.36, to the point it would hang during
boot when multiple threads were active.  Bisection showed
af5ab277ded04bd9bc6b048c5a2f0e7d70ef0867 (clockevents:
Remove the per cpu tick skew) as the culprit and it is
supported with stack traces showing xtime_lock waits including
tick_do_update_jiffies64 and/or update_vsyscall.

Experimentation showed the combination of cpu_relax and smp_rmb
was significantly slowing the progress of other threads sharing
the core, and this patch is effective in avoiding the hang.

A theory is the rmb is affecting the whole core while the
cpu_relax is causing a resource rebalance flush, together they
cause an interfernce cadance that is unbroken when the seqlock
reader has interrupts disabled.

At first I was confused why the refactor in
3c22cd5709e8143444a6d08682a87f4c57902df3 (kernel: optimise
seqlock) didn't affect this patch application, but after some
study that affected seqcount not seqlock. The new seqcount was
not factored back into the seqlock.  I defer that the future.

While the removal of the timer interrupt offset created
contention for the xtime lock while a cpu does the
additonal work to update the system clock, the seqlock
implementation with the tight rmb spin loop goes back much
further, and is just waiting for the right trigger.

Cc: <stable@vger.kernel.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: <linuxppc-dev@lists.ozlabs.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Link: http://lkml.kernel.org/r/%3Cseqlock-rmb%40mdm.bga.com%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e98cd2e57194..06d69648fc86 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -88,12 +88,12 @@ static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
 	unsigned ret;
 
 repeat:
-	ret = sl->sequence;
-	smp_rmb();
+	ret = ACCESS_ONCE(sl->sequence);
 	if (unlikely(ret & 1)) {
 		cpu_relax();
 		goto repeat;
 	}
+	smp_rmb();
 
 	return ret;
 }
-- 
cgit v1.2.3


From 698b368275c3fa98261159253cfc79653f9dffc6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 11 May 2011 14:49:36 -0700
Subject: fbcon: add lifetime refcount to opened frame buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This just adds the refcount and the new registration lock logic.  It
does not (for example) actually change the read/write/ioctl routines to
actually use the frame buffer that was opened: those function still end
up alway susing whatever the current frame buffer is at the time of the
call.

Without this, if something holds the frame buffer open over a
framebuffer switch, the close() operation after the switch will access a
fb_info that has been free'd by the unregistering of the old frame
buffer.

(The read/write/ioctl operations will normally not cause problems,
because they will - illogically - pick up the new fbcon instead.  But a
switch that happens just as one of those is going on might see problems
too, the window is just much smaller: one individual op rather than the
whole open-close sequence.)

This use-after-free is apparently fairly easily triggered by the Ubuntu
11.04 boot sequence.

Acked-by: Tim Gardner <tim.gardner@canonical.com>
Tested-by: Daniel J Blueman <daniel.blueman@gmail.com>
Tested-by: Anca Emanuel <anca.emanuel@gmail.com>
Cc: Bruno Prémont <bonbons@linux-vserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Andy Whitcroft <andy.whitcroft@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 56 ++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/fb.h    |  1 +
 2 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index e0c2284924b6..eec14d2ca1c7 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -42,9 +42,34 @@
 
 #define FBPIXMAPSIZE	(1024 * 8)
 
+static DEFINE_MUTEX(registration_lock);
 struct fb_info *registered_fb[FB_MAX] __read_mostly;
 int num_registered_fb __read_mostly;
 
+static struct fb_info *get_fb_info(unsigned int idx)
+{
+	struct fb_info *fb_info;
+
+	if (idx >= FB_MAX)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&registration_lock);
+	fb_info = registered_fb[idx];
+	if (fb_info)
+		atomic_inc(&fb_info->count);
+	mutex_unlock(&registration_lock);
+
+	return fb_info;
+}
+
+static void put_fb_info(struct fb_info *fb_info)
+{
+	if (!atomic_dec_and_test(&fb_info->count))
+		return;
+	if (fb_info->fbops->fb_destroy)
+		fb_info->fbops->fb_destroy(fb_info);
+}
+
 int lock_fb_info(struct fb_info *info)
 {
 	mutex_lock(&info->lock);
@@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; }
 
 static void *fb_seq_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&registration_lock);
 	return (*pos < FB_MAX) ? pos : NULL;
 }
 
@@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void fb_seq_stop(struct seq_file *m, void *v)
 {
+	mutex_unlock(&registration_lock);
 }
 
 static int fb_seq_show(struct seq_file *m, void *v)
@@ -1361,14 +1388,16 @@ __releases(&info->lock)
 	struct fb_info *info;
 	int res = 0;
 
-	if (fbidx >= FB_MAX)
-		return -ENODEV;
-	info = registered_fb[fbidx];
-	if (!info)
+	info = get_fb_info(fbidx);
+	if (!info) {
 		request_module("fb%d", fbidx);
-	info = registered_fb[fbidx];
-	if (!info)
-		return -ENODEV;
+		info = get_fb_info(fbidx);
+		if (!info)
+			return -ENODEV;
+	}
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
 	mutex_lock(&info->lock);
 	if (!try_module_get(info->fbops->owner)) {
 		res = -ENODEV;
@@ -1386,6 +1415,8 @@ __releases(&info->lock)
 #endif
 out:
 	mutex_unlock(&info->lock);
+	if (res)
+		put_fb_info(info);
 	return res;
 }
 
@@ -1401,6 +1432,7 @@ __releases(&info->lock)
 		info->fbops->fb_release(info,1);
 	module_put(info->fbops->owner);
 	mutex_unlock(&info->lock);
+	put_fb_info(info);
 	return 0;
 }
 
@@ -1542,11 +1574,13 @@ register_framebuffer(struct fb_info *fb_info)
 	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id,
 					 fb_is_primary_device(fb_info));
 
+	mutex_lock(&registration_lock);
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
 		if (!registered_fb[i])
 			break;
 	fb_info->node = i;
+	atomic_set(&fb_info->count, 1);
 	mutex_init(&fb_info->lock);
 	mutex_init(&fb_info->mm_lock);
 
@@ -1583,6 +1617,7 @@ register_framebuffer(struct fb_info *fb_info)
 	fb_var_to_videomode(&mode, &fb_info->var);
 	fb_add_videomode(&mode, &fb_info->modelist);
 	registered_fb[i] = fb_info;
+	mutex_unlock(&registration_lock);
 
 	event.info = fb_info;
 	if (!lock_fb_info(fb_info))
@@ -1616,6 +1651,7 @@ unregister_framebuffer(struct fb_info *fb_info)
 	struct fb_event event;
 	int i, ret = 0;
 
+	mutex_lock(&registration_lock);
 	i = fb_info->node;
 	if (!registered_fb[i]) {
 		ret = -EINVAL;
@@ -1638,7 +1674,7 @@ unregister_framebuffer(struct fb_info *fb_info)
 	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
 		kfree(fb_info->pixmap.addr);
 	fb_destroy_modelist(&fb_info->modelist);
-	registered_fb[i]=NULL;
+	registered_fb[i] = NULL;
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	device_destroy(fb_class, MKDEV(FB_MAJOR, i));
@@ -1646,9 +1682,9 @@ unregister_framebuffer(struct fb_info *fb_info)
 	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
 
 	/* this may free fb info */
-	if (fb_info->fbops->fb_destroy)
-		fb_info->fbops->fb_destroy(fb_info);
+	put_fb_info(fb_info);
 done:
+	mutex_unlock(&registration_lock);
 	return ret;
 }
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index df728c1c29ed..6a8274877171 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -832,6 +832,7 @@ struct fb_tile_ops {
 #define FBINFO_CAN_FORCE_OUTPUT     0x200000
 
 struct fb_info {
+	atomic_t count;
 	int node;
 	int flags;
 	struct mutex lock;		/* Lock for open/release/ioctl funcs */
-- 
cgit v1.2.3


From be84cb43833ee40a42e08f5425d20310f16229c7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 9 May 2011 13:12:30 -0400
Subject: compat: fixes to allow working with tile arch

The existing <asm-generic/unistd.h> mechanism doesn't really provide
enough to create the 64-bit "compat" ABI properly in a generic way,
since the compat ABI is a mix of things were you can re-use the 64-bit
versions of syscalls and things where you need a compat wrapper.

To provide this in the most direct way possible, I added two new macros
to go along with the existing __SYSCALL and __SC_3264 macros: __SC_COMP
and SC_COMP_3264.  These macros take an additional argument, typically a
"compat_sys_xxx" function, which is passed to __SYSCALL if you define
__SYSCALL_COMPAT when including the header, resulting in a pointer to
the compat function being placed in the generated syscall table.

The change also adds some missing definitions to <linux/compat.h> so that
it actually has declarations for all the compat syscalls, since the
"[nr] = ##call" approach requires proper C declarations for all the
functions included in the syscall table.

Finally, compat.c defines compat_sys_sigpending() and
compat_sys_sigprocmask() even if the underlying architecture doesn't
request it, which tries to pull in undefined compat_old_sigset_t defines.
We need to guard those compat syscall definitions with appropriate
__ARCH_WANT_SYS_xxx ifdefs.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/compat.c    |   2 +-
 include/asm-generic/unistd.h | 221 ++++++++++++++++++++++++-------------------
 include/linux/compat.h       | 187 ++++++++++++++++++++++++++++++++++++
 kernel/compat.c              |   8 ++
 4 files changed, 320 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index dbc213adf5e1..e35a3975ca3b 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -135,7 +135,7 @@ long tile_compat_sys_msgrcv(int msqid,
 
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
-#define __SYSCALL(nr, call) [nr] = (compat_##call),
+#define __SYSCALL(nr, call) [nr] = (call),
 
 /* The generic versions of these don't work for Tile. */
 #define compat_sys_msgrcv tile_compat_sys_msgrcv
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5149de..33d524704883 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -24,16 +24,24 @@
 #define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
 #endif
 
+#ifdef __SYSCALL_COMPAT
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _comp)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SYSCALL(_nr, _comp)
+#else
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SC_3264(_nr, _32, _64)
+#endif
+
 #define __NR_io_setup 0
-__SYSCALL(__NR_io_setup, sys_io_setup)
+__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
 #define __NR_io_destroy 1
 __SYSCALL(__NR_io_destroy, sys_io_destroy)
 #define __NR_io_submit 2
-__SYSCALL(__NR_io_submit, sys_io_submit)
+__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
 #define __NR_io_getevents 4
-__SYSCALL(__NR_io_getevents, sys_io_getevents)
+__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
 
 /* fs/xattr.c */
 #define __NR_setxattr 5
@@ -67,7 +75,7 @@ __SYSCALL(__NR_getcwd, sys_getcwd)
 
 /* fs/cookies.c */
 #define __NR_lookup_dcookie 18
-__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
+__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
 
 /* fs/eventfd.c */
 #define __NR_eventfd2 19
@@ -79,7 +87,7 @@ __SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_epoll_ctl 21
 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
 #define __NR_epoll_pwait 22
-__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
+__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
 
 /* fs/fcntl.c */
 #define __NR_dup 23
@@ -87,7 +95,7 @@ __SYSCALL(__NR_dup, sys_dup)
 #define __NR_dup3 24
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR3264_fcntl 25
-__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl)
+__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
 
 /* fs/inotify_user.c */
 #define __NR_inotify_init1 26
@@ -99,7 +107,7 @@ __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
 
 /* fs/ioctl.c */
 #define __NR_ioctl 29
-__SYSCALL(__NR_ioctl, sys_ioctl)
+__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
 
 /* fs/ioprio.c */
 #define __NR_ioprio_set 30
@@ -129,26 +137,30 @@ __SYSCALL(__NR_renameat, sys_renameat)
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
-__SYSCALL(__NR_mount, sys_mount)
+__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
 /* fs/nfsctl.c */
 #define __NR_nfsservctl 42
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl)
 
 /* fs/open.c */
 #define __NR3264_statfs 43
-__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs)
+__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
+	       compat_sys_statfs64)
 #define __NR3264_fstatfs 44
-__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs)
+__SC_COMP_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs, \
+	       compat_sys_fstatfs64)
 #define __NR3264_truncate 45
-__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate)
+__SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
+	       compat_sys_truncate64)
 #define __NR3264_ftruncate 46
-__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate)
+__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
+	       compat_sys_ftruncate64)
 
 #define __NR_fallocate 47
-__SYSCALL(__NR_fallocate, sys_fallocate)
+__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
 #define __NR_faccessat 48
 __SYSCALL(__NR_faccessat, sys_faccessat)
 #define __NR_chdir 49
@@ -166,7 +178,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat)
 #define __NR_fchown 55
 __SYSCALL(__NR_fchown, sys_fchown)
 #define __NR_openat 56
-__SYSCALL(__NR_openat, sys_openat)
+__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
 #define __NR_close 57
 __SYSCALL(__NR_close, sys_close)
 #define __NR_vhangup 58
@@ -182,7 +194,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl)
 
 /* fs/readdir.c */
 #define __NR_getdents64 61
-__SYSCALL(__NR_getdents64, sys_getdents64)
+__SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64)
 
 /* fs/read_write.c */
 #define __NR3264_lseek 62
@@ -192,17 +204,17 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SYSCALL(__NR_readv, sys_readv)
+__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
 #define __NR_writev 66
-__SYSCALL(__NR_writev, sys_writev)
+__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
 #define __NR_pread64 67
-__SYSCALL(__NR_pread64, sys_pread64)
+__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
-__SYSCALL(__NR_pwrite64, sys_pwrite64)
+__SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
 #define __NR_preadv 69
-__SYSCALL(__NR_preadv, sys_preadv)
+__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
 #define __NR_pwritev 70
-__SYSCALL(__NR_pwritev, sys_pwritev)
+__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
 
 /* fs/sendfile.c */
 #define __NR3264_sendfile 71
@@ -210,17 +222,17 @@ __SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile)
 
 /* fs/select.c */
 #define __NR_pselect6 72
-__SYSCALL(__NR_pselect6, sys_pselect6)
+__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
 #define __NR_ppoll 73
-__SYSCALL(__NR_ppoll, sys_ppoll)
+__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
 
 /* fs/signalfd.c */
 #define __NR_signalfd4 74
-__SYSCALL(__NR_signalfd4, sys_signalfd4)
+__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SYSCALL(__NR_vmsplice, sys_vmsplice)
+__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
@@ -243,23 +255,27 @@ __SYSCALL(__NR_fsync, sys_fsync)
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
 #ifdef __ARCH_WANT_SYNC_FILE_RANGE2
 #define __NR_sync_file_range2 84
-__SYSCALL(__NR_sync_file_range2, sys_sync_file_range2)
+__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
+	  compat_sys_sync_file_range2)
 #else
 #define __NR_sync_file_range 84
-__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
+__SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
+	  compat_sys_sync_file_range)
 #endif
 
 /* fs/timerfd.c */
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_timerfd_settime 86
-__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
+__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
+	  compat_sys_timerfd_settime)
 #define __NR_timerfd_gettime 87
-__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
+	  compat_sys_timerfd_gettime)
 
 /* fs/utimes.c */
 #define __NR_utimensat 88
-__SYSCALL(__NR_utimensat, sys_utimensat)
+__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
 
 /* kernel/acct.c */
 #define __NR_acct 89
@@ -281,7 +297,7 @@ __SYSCALL(__NR_exit, sys_exit)
 #define __NR_exit_group 94
 __SYSCALL(__NR_exit_group, sys_exit_group)
 #define __NR_waitid 95
-__SYSCALL(__NR_waitid, sys_waitid)
+__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
 
 /* kernel/fork.c */
 #define __NR_set_tid_address 96
@@ -291,25 +307,27 @@ __SYSCALL(__NR_unshare, sys_unshare)
 
 /* kernel/futex.c */
 #define __NR_futex 98
-__SYSCALL(__NR_futex, sys_futex)
+__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
 #define __NR_set_robust_list 99
-__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
+__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
+	  compat_sys_set_robust_list)
 #define __NR_get_robust_list 100
-__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
+	  compat_sys_get_robust_list)
 
 /* kernel/hrtimer.c */
 #define __NR_nanosleep 101
-__SYSCALL(__NR_nanosleep, sys_nanosleep)
+__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
 
 /* kernel/itimer.c */
 #define __NR_getitimer 102
-__SYSCALL(__NR_getitimer, sys_getitimer)
+__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
 #define __NR_setitimer 103
-__SYSCALL(__NR_setitimer, sys_setitimer)
+__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
 
 /* kernel/kexec.c */
 #define __NR_kexec_load 104
-__SYSCALL(__NR_kexec_load, sys_kexec_load)
+__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
 
 /* kernel/module.c */
 #define __NR_init_module 105
@@ -319,23 +337,24 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
 
 /* kernel/posix-timers.c */
 #define __NR_timer_create 107
-__SYSCALL(__NR_timer_create, sys_timer_create)
+__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
 #define __NR_timer_gettime 108
-__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
+__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
 #define __NR_timer_settime 110
-__SYSCALL(__NR_timer_settime, sys_timer_settime)
+__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
 #define __NR_clock_settime 112
-__SYSCALL(__NR_clock_settime, sys_clock_settime)
+__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
 #define __NR_clock_gettime 113
-__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
+__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
 #define __NR_clock_getres 114
-__SYSCALL(__NR_clock_getres, sys_clock_getres)
+__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
 #define __NR_clock_nanosleep 115
-__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
+	  compat_sys_clock_nanosleep)
 
 /* kernel/printk.c */
 #define __NR_syslog 116
@@ -355,9 +374,11 @@ __SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
 #define __NR_sched_getparam 121
 __SYSCALL(__NR_sched_getparam, sys_sched_getparam)
 #define __NR_sched_setaffinity 122
-__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
+__SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \
+	  compat_sys_sched_setaffinity)
 #define __NR_sched_getaffinity 123
-__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
+__SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \
+	  compat_sys_sched_getaffinity)
 #define __NR_sched_yield 124
 __SYSCALL(__NR_sched_yield, sys_sched_yield)
 #define __NR_sched_get_priority_max 125
@@ -365,7 +386,8 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
 #define __NR_sched_rr_get_interval 127
-__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
+__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
+	  compat_sys_sched_rr_get_interval)
 
 /* kernel/signal.c */
 #define __NR_restart_syscall 128
@@ -377,21 +399,23 @@ __SYSCALL(__NR_tkill, sys_tkill)
 #define __NR_tgkill 131
 __SYSCALL(__NR_tgkill, sys_tgkill)
 #define __NR_sigaltstack 132
-__SYSCALL(__NR_sigaltstack, sys_sigaltstack)
+__SC_COMP(__NR_sigaltstack, sys_sigaltstack, compat_sys_sigaltstack)
 #define __NR_rt_sigsuspend 133
-__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
+__SC_COMP(__NR_rt_sigsuspend, sys_rt_sigsuspend, compat_sys_rt_sigsuspend)
 #define __NR_rt_sigaction 134
-__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */
+__SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
 #define __NR_rt_sigprocmask 135
 __SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
 #define __NR_rt_sigtimedwait 137
-__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
+__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
+	  compat_sys_rt_sigtimedwait)
 #define __NR_rt_sigqueueinfo 138
-__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
+__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
+	  compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigreturn 139
-__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */
+__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
 
 /* kernel/sys.c */
 #define __NR_setpriority 140
@@ -421,7 +445,7 @@ __SYSCALL(__NR_setfsuid, sys_setfsuid)
 #define __NR_setfsgid 152
 __SYSCALL(__NR_setfsgid, sys_setfsgid)
 #define __NR_times 153
-__SYSCALL(__NR_times, sys_times)
+__SC_COMP(__NR_times, sys_times, compat_sys_times)
 #define __NR_setpgid 154
 __SYSCALL(__NR_setpgid, sys_setpgid)
 #define __NR_getpgid 155
@@ -441,11 +465,11 @@ __SYSCALL(__NR_sethostname, sys_sethostname)
 #define __NR_setdomainname 162
 __SYSCALL(__NR_setdomainname, sys_setdomainname)
 #define __NR_getrlimit 163
-__SYSCALL(__NR_getrlimit, sys_getrlimit)
+__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
 #define __NR_setrlimit 164
-__SYSCALL(__NR_setrlimit, sys_setrlimit)
+__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
 #define __NR_getrusage 165
-__SYSCALL(__NR_getrusage, sys_getrusage)
+__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
 #define __NR_umask 166
 __SYSCALL(__NR_umask, sys_umask)
 #define __NR_prctl 167
@@ -455,11 +479,11 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
 
 /* kernel/time.c */
 #define __NR_gettimeofday 169
-__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
+__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
 #define __NR_settimeofday 170
-__SYSCALL(__NR_settimeofday, sys_settimeofday)
+__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 #define __NR_adjtimex 171
-__SYSCALL(__NR_adjtimex, sys_adjtimex)
+__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
 
 /* kernel/timer.c */
 #define __NR_getpid 172
@@ -477,39 +501,40 @@ __SYSCALL(__NR_getegid, sys_getegid)
 #define __NR_gettid 178
 __SYSCALL(__NR_gettid, sys_gettid)
 #define __NR_sysinfo 179
-__SYSCALL(__NR_sysinfo, sys_sysinfo)
+__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
 
 /* ipc/mqueue.c */
 #define __NR_mq_open 180
-__SYSCALL(__NR_mq_open, sys_mq_open)
+__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
 #define __NR_mq_timedsend 182
-__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
+__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
 #define __NR_mq_timedreceive 183
-__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
+__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
+	  compat_sys_mq_timedreceive)
 #define __NR_mq_notify 184
-__SYSCALL(__NR_mq_notify, sys_mq_notify)
+__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
-__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
 
 /* ipc/msg.c */
 #define __NR_msgget 186
 __SYSCALL(__NR_msgget, sys_msgget)
 #define __NR_msgctl 187
-__SYSCALL(__NR_msgctl, sys_msgctl)
+__SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
 #define __NR_msgrcv 188
-__SYSCALL(__NR_msgrcv, sys_msgrcv)
+__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
 #define __NR_msgsnd 189
-__SYSCALL(__NR_msgsnd, sys_msgsnd)
+__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
 
 /* ipc/sem.c */
 #define __NR_semget 190
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
-__SYSCALL(__NR_semctl, sys_semctl)
+__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #define __NR_semtimedop 192
-__SYSCALL(__NR_semtimedop, sys_semtimedop)
+__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
 
@@ -517,9 +542,9 @@ __SYSCALL(__NR_semop, sys_semop)
 #define __NR_shmget 194
 __SYSCALL(__NR_shmget, sys_shmget)
 #define __NR_shmctl 195
-__SYSCALL(__NR_shmctl, sys_shmctl)
+__SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
 #define __NR_shmat 196
-__SYSCALL(__NR_shmat, sys_shmat)
+__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
 #define __NR_shmdt 197
 __SYSCALL(__NR_shmdt, sys_shmdt)
 
@@ -543,21 +568,21 @@ __SYSCALL(__NR_getpeername, sys_getpeername)
 #define __NR_sendto 206
 __SYSCALL(__NR_sendto, sys_sendto)
 #define __NR_recvfrom 207
-__SYSCALL(__NR_recvfrom, sys_recvfrom)
+__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
 #define __NR_setsockopt 208
-__SYSCALL(__NR_setsockopt, sys_setsockopt)
+__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
 #define __NR_getsockopt 209
-__SYSCALL(__NR_getsockopt, sys_getsockopt)
+__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
 #define __NR_shutdown 210
 __SYSCALL(__NR_shutdown, sys_shutdown)
 #define __NR_sendmsg 211
-__SYSCALL(__NR_sendmsg, sys_sendmsg)
+__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
 #define __NR_recvmsg 212
-__SYSCALL(__NR_recvmsg, sys_recvmsg)
+__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
 
 /* mm/filemap.c */
 #define __NR_readahead 213
-__SYSCALL(__NR_readahead, sys_readahead)
+__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
 
 /* mm/nommu.c, also with MMU */
 #define __NR_brk 214
@@ -573,19 +598,19 @@ __SYSCALL(__NR_add_key, sys_add_key)
 #define __NR_request_key 218
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl 219
-__SYSCALL(__NR_keyctl, sys_keyctl)
+__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 
 /* arch/example/kernel/sys_example.c */
 #define __NR_clone 220
-__SYSCALL(__NR_clone, sys_clone)	/* .long sys_clone_wrapper */
+__SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SYSCALL(__NR_execve, sys_execve)	/* .long sys_execve_wrapper */
+__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
 /* mm/fadvise.c */
 #define __NR3264_fadvise64 223
-__SYSCALL(__NR3264_fadvise64, sys_fadvise64_64)
+__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
 
 /* mm/, CONFIG_MMU only */
 #ifndef __ARCH_NOMMU
@@ -612,25 +637,26 @@ __SYSCALL(__NR_madvise, sys_madvise)
 #define __NR_remap_file_pages 234
 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
 #define __NR_mbind 235
-__SYSCALL(__NR_mbind, sys_mbind)
+__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
 #define __NR_get_mempolicy 236
-__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
+__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
 #define __NR_set_mempolicy 237
-__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
+__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
 #define __NR_migrate_pages 238
-__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
+__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
 #define __NR_move_pages 239
-__SYSCALL(__NR_move_pages, sys_move_pages)
+__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
 #endif
 
 #define __NR_rt_tgsigqueueinfo 240
-__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
+__SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
+	  compat_sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_recvmmsg 243
-__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
+__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
 
 /*
  * Architectures may provide up to 16 syscalls of their own
@@ -639,19 +665,20 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 #define __NR_arch_specific_syscall 244
 
 #define __NR_wait4 260
-__SYSCALL(__NR_wait4, sys_wait4)
+__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
 #define __NR_fanotify_init 262
 __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
 #define __NR_fanotify_mark 263
 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
-#define __NR_name_to_handle_at		264
+#define __NR_name_to_handle_at         264
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
-#define __NR_open_by_handle_at		265
-__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#define __NR_open_by_handle_at         265
+__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
+	  compat_sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
-__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
+__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 5778b559d59c..e94184834b71 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -12,6 +12,7 @@
 #include <linux/sem.h>
 #include <linux/socket.h>
 #include <linux/if.h>
+#include <linux/fs.h>
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
@@ -209,6 +210,18 @@ struct compat_robust_list_head {
 	compat_uptr_t			list_op_pending;
 };
 
+struct compat_statfs;
+struct compat_statfs64;
+struct compat_old_linux_dirent;
+struct compat_linux_dirent;
+struct linux_dirent64;
+struct compat_msghdr;
+struct compat_mmsghdr;
+struct compat_sysinfo;
+struct compat_sysctl_args;
+struct compat_kexec_segment;
+struct compat_mq_attr;
+
 extern void compat_exit_robust_list(struct task_struct *curr);
 
 asmlinkage long
@@ -331,9 +344,13 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 			const compat_sigset_t __user *sigmask,
 			compat_size_t sigsetsize);
 
+asmlinkage long compat_sys_utime(const char __user *filename,
+				 struct compat_utimbuf __user *t);
 asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename,
 				struct compat_timespec __user *t, int flags);
 
+asmlinkage long compat_sys_time(compat_time_t __user *tloc);
+asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
 asmlinkage long compat_sys_signalfd(int ufd,
 				const compat_sigset_t __user *sigmask,
                                 compat_size_t sigsetsize);
@@ -350,11 +367,181 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
 				      int flags);
 asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename,
 				     struct compat_timeval __user *t);
+asmlinkage long compat_sys_utimes(const char __user *filename,
+				  struct compat_timeval __user *t);
+asmlinkage long compat_sys_newstat(const char __user * filename,
+				   struct compat_stat __user *statbuf);
+asmlinkage long compat_sys_newlstat(const char __user * filename,
+				    struct compat_stat __user *statbuf);
 asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
+asmlinkage long compat_sys_newfstat(unsigned int fd,
+				    struct compat_stat __user * statbuf);
+asmlinkage long compat_sys_statfs(const char __user *pathname,
+				  struct compat_statfs __user *buf);
+asmlinkage long compat_sys_fstatfs(unsigned int fd,
+				   struct compat_statfs __user *buf);
+asmlinkage long compat_sys_statfs64(const char __user *pathname,
+				    compat_size_t sz,
+				    struct compat_statfs64 __user *buf);
+asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+				     struct compat_statfs64 __user *buf);
+asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
+				   unsigned long arg);
+asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p);
+asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id,
+					unsigned long min_nr,
+					unsigned long nr,
+					struct io_event __user *events,
+					struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr,
+				     u32 __user *iocb);
+asmlinkage long compat_sys_mount(const char __user * dev_name,
+				 const char __user * dir_name,
+				 const char __user * type, unsigned long flags,
+				 const void __user * data);
+asmlinkage long compat_sys_old_readdir(unsigned int fd,
+				       struct compat_old_linux_dirent __user *,
+				       unsigned int count);
+asmlinkage long compat_sys_getdents(unsigned int fd,
+				    struct compat_linux_dirent __user *dirent,
+				    unsigned int count);
+asmlinkage long compat_sys_getdents64(unsigned int fd,
+				      struct linux_dirent64 __user * dirent,
+				      unsigned int count);
+asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
+				    unsigned int nr_segs, unsigned int flags);
+asmlinkage long compat_sys_open(const char __user *filename, int flags,
+				int mode);
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				  int flags, int mode);
+asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
+					     struct file_handle __user *handle,
+					     int flags);
+asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
+				    compat_ulong_t __user *outp,
+				    compat_ulong_t __user *exp,
+				    struct compat_timespec __user *tsp,
+				    void __user *sig);
+asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
+				 unsigned int nfds,
+				 struct compat_timespec __user *tsp,
+				 const compat_sigset_t __user *sigmask,
+				 compat_size_t sigsetsize);
+#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
+union compat_nfsctl_res;
+struct compat_nfsctl_arg;
+asmlinkage long compat_sys_nfsservctl(int cmd,
+				      struct compat_nfsctl_arg __user *arg,
+				      union compat_nfsctl_res __user *res);
+#else
+long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
+#endif
+asmlinkage long compat_sys_signalfd4(int ufd,
+				     const compat_sigset_t __user *sigmask,
+				     compat_size_t sigsetsize, int flags);
+asmlinkage long compat_sys_get_mempolicy(int __user *policy,
+					 compat_ulong_t __user *nmask,
+					 compat_ulong_t maxnode,
+					 compat_ulong_t addr,
+					 compat_ulong_t flags);
+asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
+					 compat_ulong_t maxnode);
+asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
+				 compat_ulong_t mode,
+				 compat_ulong_t __user *nmask,
+				 compat_ulong_t maxnode, compat_ulong_t flags);
+
+asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+				      char __user *optval, unsigned int optlen);
+asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
+				   unsigned flags);
+asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
+				   unsigned int flags);
+asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len,
+				unsigned flags);
+asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
+			    unsigned flags, struct sockaddr __user *addr,
+			    int __user *addrlen);
+asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags,
+				    struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
+				     struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_getitimer(int which,
+				     struct compat_itimerval __user *it);
+asmlinkage long compat_sys_setitimer(int which,
+				     struct compat_itimerval __user *in,
+				     struct compat_itimerval __user *out);
+asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
+asmlinkage long compat_sys_setrlimit(unsigned int resource,
+				     struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrlimit (unsigned int resource,
+				      struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
+asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
+				     unsigned int len,
+				     compat_ulong_t __user *user_mask_ptr);
+asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
+				     unsigned int len,
+				     compat_ulong_t __user *user_mask_ptr);
+asmlinkage long compat_sys_timer_create(clockid_t which_clock,
+			struct compat_sigevent __user *timer_event_spec,
+			timer_t __user *created_timer_id);
+asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
+					 struct compat_itimerspec __user *new,
+					 struct compat_itimerspec __user *old);
+asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
+				 struct compat_itimerspec __user *setting);
+asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
+					 struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
+					 struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
+					 struct compat_timex __user *tp);
+asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
+					struct compat_timespec __user *tp);
+asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
+					   struct compat_timespec __user *rqtp,
+					   struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+		struct compat_siginfo __user *uinfo,
+		struct compat_timespec __user *uts, compat_size_t sigsetsize);
+asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
+					 compat_size_t sigsetsize);
+asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
+asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
+		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		u32 val3);
+asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
+				      char __user *optval, int __user *optlen);
+asmlinkage long compat_sys_kexec_load(unsigned long entry,
+				      unsigned long nr_segments,
+				      struct compat_kexec_segment __user *,
+				      unsigned long flags);
+asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
+			const struct compat_mq_attr __user *u_mqstat,
+			struct compat_mq_attr __user *u_omqstat);
+asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
+			const struct compat_sigevent __user *u_notification);
+asmlinkage long compat_sys_mq_open(const char __user *u_name,
+			int oflag, compat_mode_t mode,
+			struct compat_mq_attr __user *u_attr);
+asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
+			const char __user *u_msg_ptr,
+			size_t msg_len, unsigned int msg_prio,
+			const struct compat_timespec __user *u_abs_timeout);
+asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
+			char __user *u_msg_ptr,
+			size_t msg_len, unsigned int __user *u_msg_prio,
+			const struct compat_timespec __user *u_abs_timeout);
+asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
+asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
 
 extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector, unsigned long nr_segs,
diff --git a/kernel/compat.c b/kernel/compat.c
index 38b1d2c1cbe8..80c28562f57b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 	return compat_jiffies_to_clock_t(jiffies);
 }
 
+#ifdef __ARCH_WANT_SYS_SIGPENDING
+
 /*
  * Assumption: old_sigset_t and compat_old_sigset_t are both
  * types that can be passed to put_user()/get_user().
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
 	return ret;
 }
 
+#endif
+
+#ifdef __ARCH_WANT_SYS_SIGPROCMASK
+
 asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 		compat_old_sigset_t __user *oset)
 {
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 	return ret;
 }
 
+#endif
+
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 		struct compat_rlimit __user *rlim)
 {
-- 
cgit v1.2.3


From 7f267051bd7a280265b1b5ead58e9c6e4e1ac3a4 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Mon, 9 May 2011 11:53:27 +0000
Subject: net: group FCoE related feature flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Michał Mirosław's patch (http://patchwork.ozlabs.org/patch/94421/) fixes the
issue (http://patchwork.ozlabs.org/patch/94188/) about not populating FCoE related
flags correctly on vlan devices. However, only NETIF_F_FCOE_CRC is part of the
NETIF_F_ALL_TX_OFFLOADS right now, where weed NETIF_F_FCOE_MTU and NETIF_F_FSO
as well.

Therefore, add NETIF_F_ALL_FCOE to indicate feature flags used by FCoE TX offloads.
These include NETIF_F_FCOE_CRC, NETIF_F_FCOE_MTU, and NETIF_F_FSO and add them to
be part of NETIF_F_ALL_TX_OFFLOADS. This would eventually make sure all FCoE needed
flags are populated properly to vlan devices.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7244ed1f9a8..00d650c74800 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1097,10 +1097,14 @@ struct net_device {
 
 #define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
 
+#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
+				 NETIF_F_FSO)
+
 #define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
 				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
 				 NETIF_F_HIGHDMA | \
-				 NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC)
+				 NETIF_F_SCTP_CSUM | \
+				 NETIF_F_ALL_FCOE)
 
 	/*
 	 * If one device supports one of these features, then enable them
-- 
cgit v1.2.3


From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 7 May 2011 03:22:17 +0000
Subject: net: introduce netdev_change_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will be needed by bonding and other drivers changing vlan_features
after ndo_init callback.

As a bonus, this includes kernel-doc for netdev_update_features().

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 00d650c74800..1d9696a9ee4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2565,6 +2565,7 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask);
 u32 netdev_fix_features(struct net_device *dev, u32 features);
 int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
+void netdev_change_features(struct net_device *dev);
 
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 75898a32c038..ea23353e6251 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev)
 	return 1;
 }
 
+/**
+ *	netdev_update_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications if it
+ *	has changed. Should be called after driver or hardware dependent
+ *	conditions might have changed that influence the features.
+ */
 void netdev_update_features(struct net_device *dev)
 {
 	if (__netdev_update_features(dev))
@@ -5296,6 +5304,23 @@ void netdev_update_features(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_update_features);
 
+/**
+ *	netdev_change_features - recalculate device features
+ *	@dev: the device to check
+ *
+ *	Recalculate dev->features set and send notifications even
+ *	if they have not changed. Should be called instead of
+ *	netdev_update_features() if also dev->vlan_features might
+ *	have changed to allow the changes to be propagated to stacked
+ *	VLAN devices.
+ */
+void netdev_change_features(struct net_device *dev)
+{
+	__netdev_update_features(dev);
+	netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_change_features);
+
 /**
  *	netif_stacked_transfer_operstate -	transfer operstate
  *	@rootdev: the root or lower level device to transfer state from
-- 
cgit v1.2.3


From bdc220da3209d50b8c295490dec94845c88670a2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 9 May 2011 17:42:46 +0000
Subject: netdevice.h: Align struct net_device members

Save a bit of space.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d9696a9ee4d..a134d809125b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1020,9 +1020,6 @@ struct net_device {
 	 *	part of the usual set specified in Space.c.
 	 */
 
-	unsigned char		if_port;	/* Selectable AUI, TP,..*/
-	unsigned char		dma;		/* DMA channel		*/
-
 	unsigned long		state;
 
 	struct list_head	dev_list;
@@ -1146,13 +1143,16 @@ struct net_device {
 	const struct header_ops *header_ops;
 
 	unsigned int		flags;	/* interface flags (a la BSD)	*/
+	unsigned int		priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		gflags;
-        unsigned int            priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		padded;	/* How much padding added by alloc_netdev() */
 
 	unsigned char		operstate; /* RFC2863 operstate */
 	unsigned char		link_mode; /* mapping policy to operstate */
 
+	unsigned char		if_port;	/* Selectable AUI, TP,..*/
+	unsigned char		dma;		/* DMA channel		*/
+
 	unsigned int		mtu;	/* interface MTU value		*/
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
-- 
cgit v1.2.3


From 29dd54b72ba8c5ad0dd6dd33584449b5953f700b Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Thu, 12 May 2011 12:48:32 +0000
Subject: ethtool: Added support for FW dump

Added code to take FW dump via ethtool. Dump level can be controlled via setting the
dump flag. A get function is provided to query the current setting of the dump flag.
Dump data is obtained from the driver via a separate get function.

Changes from v3:
Fixed buffer length issue in ethtool_get_dump_data function.
Updated kernel doc for ethtool_dump struct and get_dump_flag function.

Changes from v2:
Provided separate commands for get flag and data.
Check for minimum of the two buffer length obtained via ethtool and driver and
use that for dump buffer
Pass up the driver return error codes up to the caller.
Added kernel doc comments.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 31 +++++++++++++++++
 net/core/ethtool.c      | 90 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index bd0b50b85f06..c6a850ab2ec5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -601,6 +601,26 @@ struct ethtool_flash {
 	char	data[ETHTOOL_FLASH_MAX_FILENAME];
 };
 
+/**
+ * struct ethtool_dump - used for retrieving, setting device dump
+ * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or
+ * 	%ETHTOOL_SET_DUMP
+ * @version: FW version of the dump, filled in by driver
+ * @flag: driver dependent flag for dump setting, filled in by driver during
+ * 	  get and filled in by ethtool for set operation
+ * @len: length of dump data, used as the length of the user buffer on entry to
+ * 	 %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver
+ * 	 for %ETHTOOL_GET_DUMP_FLAG command
+ * @data: data collected for get dump data operation
+ */
+struct ethtool_dump {
+	__u32	cmd;
+	__u32	version;
+	__u32	flag;
+	__u32	len;
+	__u8	data[0];
+};
+
 /* for returning and changing feature sets */
 
 /**
@@ -853,6 +873,10 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
  *	zero.
+ * @get_dump_flag: Get dump flag indicating current dump length, version,
+ * 		   and flag of the device.
+ * @get_dump_data: Get dump data.
+ * @set_dump: Set dump specific flags to the device.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -927,6 +951,10 @@ struct ethtool_ops {
 				  const struct ethtool_rxfh_indir *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
+	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
+	int	(*get_dump_data)(struct net_device *,
+				 struct ethtool_dump *, void *);
+	int	(*set_dump)(struct net_device *, struct ethtool_dump *);
 
 };
 #endif /* __KERNEL__ */
@@ -998,6 +1026,9 @@ struct ethtool_ops {
 #define ETHTOOL_SFEATURES	0x0000003b /* Change device offload settings */
 #define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
 #define ETHTOOL_SCHANNELS	0x0000003d /* Set no of channels */
+#define ETHTOOL_SET_DUMP	0x0000003e /* Set dump settings */
+#define ETHTOOL_GET_DUMP_FLAG	0x0000003f /* Get dump settings */
+#define ETHTOOL_GET_DUMP_DATA	0x00000040 /* Get dump data */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b8c2b10f397a..b8c2bcfee6af 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1823,6 +1823,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
 	return dev->ethtool_ops->flash_device(dev, &efl);
 }
 
+static int ethtool_set_dump(struct net_device *dev,
+			void __user *useraddr)
+{
+	struct ethtool_dump dump;
+
+	if (!dev->ethtool_ops->set_dump)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_dump(dev, &dump);
+}
+
+static int ethtool_get_dump_flag(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	struct ethtool_dump dump;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	ret = ops->get_dump_flag(dev, &dump);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_dump_data(struct net_device *dev,
+				void __user *useraddr)
+{
+	int ret;
+	__u32 len;
+	struct ethtool_dump dump, tmp;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data = NULL;
+
+	if (!dev->ethtool_ops->get_dump_data ||
+		!dev->ethtool_ops->get_dump_flag)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&dump, useraddr, sizeof(dump)))
+		return -EFAULT;
+
+	memset(&tmp, 0, sizeof(tmp));
+	tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
+	ret = ops->get_dump_flag(dev, &tmp);
+	if (ret)
+		return ret;
+
+	len = (tmp.len > dump.len) ? dump.len : tmp.len;
+	if (!len)
+		return -EFAULT;
+
+	data = vzalloc(tmp.len);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_dump_data(dev, &dump, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, &dump, sizeof(dump))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	useraddr += offsetof(struct ethtool_dump, data);
+	if (copy_to_user(useraddr, data, len))
+		ret = -EFAULT;
+out:
+	vfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2039,6 +2120,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SCHANNELS:
 		rc = ethtool_set_channels(dev, useraddr);
 		break;
+	case ETHTOOL_SET_DUMP:
+		rc = ethtool_set_dump(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_FLAG:
+		rc = ethtool_get_dump_flag(dev, useraddr);
+		break;
+	case ETHTOOL_GET_DUMP_DATA:
+		rc = ethtool_get_dump_data(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge.hallyn@canonical.com>
Date: Fri, 13 May 2011 04:27:54 +0100
Subject: Cache user_ns in struct cred

If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns).

Get rid of _current_user_ns.  This requires nsown_capable() to be
defined in capability.c rather than as static inline in capability.h,
so do that.

Request_key needs init_user_ns defined at current_user_ns if
!CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS
at current_user_ns() define.

Compile-tested with and without CONFIG_USERNS.

Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
[ This makes a huge performance difference for acl_permission_check(),
  up to 30%.  And that is one of the hottest kernel functions for loads
  that are pathname-lookup heavy.  ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/capability.h | 13 +------------
 include/linux/cred.h       | 10 ++++++++--
 kernel/capability.c        | 12 ++++++++++++
 kernel/cred.c              | 12 ++++++------
 4 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..d4675af963fa 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
-
-/**
- * nsown_capable - Check superior capability to one's own user_ns
- * @cap: The capability in question
- *
- * Return true if the current task has the given superior capability
- * targeted at its own user namespace.
- */
-static inline bool nsown_capable(int cap)
-{
-	return ns_capable(current_user_ns(), cap);
-}
+extern bool nsown_capable(int cap);
 
 /* audit system wants to get cap info from files as well */
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 9aeeb0ba2003..be16b61283cc 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -146,6 +146,7 @@ struct cred {
 	void		*security;	/* subjective LSM security */
 #endif
 	struct user_struct *user;	/* real user ID subscription */
+	struct user_namespace *user_ns; /* cached user->user_ns */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
 };
@@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
-#define _current_user_ns()	(current_cred_xxx(user)->user_ns)
 #define current_security()	(current_cred_xxx(security))
 
-extern struct user_namespace *current_user_ns(void);
+#ifdef CONFIG_USER_NS
+#define current_user_ns() (current_cred_xxx(user_ns))
+#else
+extern struct user_namespace init_user_ns;
+#define current_user_ns() (&init_user_ns)
+#endif
+
 
 #define current_uid_gid(_uid, _gid)		\
 do {						\
diff --git a/kernel/capability.c b/kernel/capability.c
index bf0c734d0c12..32a80e08ff4b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap)
 	return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
 }
 EXPORT_SYMBOL(task_ns_capable);
+
+/**
+ * nsown_capable - Check superior capability to one's own user_ns
+ * @cap: The capability in question
+ *
+ * Return true if the current task has the given superior capability
+ * targeted at its own user namespace.
+ */
+bool nsown_capable(int cap)
+{
+	return ns_capable(current_user_ns(), cap);
+}
diff --git a/kernel/cred.c b/kernel/cred.c
index 5557b55048df..8093c16b84b1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -54,6 +54,7 @@ struct cred init_cred = {
 	.cap_effective		= CAP_INIT_EFF_SET,
 	.cap_bset		= CAP_INIT_BSET,
 	.user			= INIT_USER,
+	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
 #ifdef CONFIG_KEYS
 	.tgcred			= &init_tgcred,
@@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 			goto error_put;
 	}
 
+	/* cache user_ns in cred.  Doesn't need a refcount because it will
+	 * stay pinned by cred->user
+	 */
+	new->user_ns = new->user->user_ns;
+
 #ifdef CONFIG_KEYS
 	/* new threads get their own thread keyrings if their parent already
 	 * had one */
@@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 }
 EXPORT_SYMBOL(set_create_files_as);
 
-struct user_namespace *current_user_ns(void)
-{
-	return _current_user_ns();
-}
-EXPORT_SYMBOL(current_user_ns);
-
 #ifdef CONFIG_DEBUG_CREDENTIALS
 
 bool creds_are_invalid(const struct cred *cred)
-- 
cgit v1.2.3


From 82a3242e11d9e63c8195be46c954efaefee35e22 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 12 May 2011 16:01:02 -0700
Subject: sysfs: remove "last sysfs file:" line from the oops messages

On some arches (x86, sh, arm, unicore, powerpc) the oops message would
print out the last sysfs file accessed.

This was very useful in finding a number of sysfs and driver core bugs
in the 2.5 and early 2.6 development days, but it has been a number of
years since this file has actually helped in debugging anything that
couldn't also be trivially determined from the stack traceback.

So it's time to delete the line.  This is good as we need all the space
we can get for oops messages at times on consoles.

Acked-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/kernel/traps.c       |  1 -
 arch/powerpc/kernel/traps.c   |  1 -
 arch/sh/kernel/traps_32.c     |  1 -
 arch/unicore32/kernel/traps.c |  1 -
 arch/x86/kernel/dumpstack.c   |  1 -
 fs/sysfs/file.c               | 12 ------------
 include/linux/sysfs.h         |  5 -----
 7 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3b54ad19d489..d52eec268b47 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc154..d782cd71c07c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err)
 #endif
 		printk("%s\n", ppc_md.name ? ppc_md.name : "");
 
-		sysfs_printk_last_file();
 		if (notify_die(DIE_OOPS, str, regs, err, 255,
 			       SIGSEGV) == NOTIFY_STOP)
 			return 1;
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65aba..b51a17104b5f 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(1);
 
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-	sysfs_printk_last_file();
 	print_modules();
 	show_regs(regs);
 
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 254e36fa9513..b9a26465e728 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread,
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on UniCore */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..f72e7193acc5 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -279,7 +279,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
 
 #include "sysfs.h"
 
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
-	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
-
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	const struct sysfs_ops *ops;
 	int error = -EACCES;
-	char *p;
-
-	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-	if (!IS_ERR(p))
-		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active(attr_sd))
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 30b881555fa5..c3acda60eee0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -176,7 +176,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
-void sysfs_printk_last_file(void);
 
 /* Called to clear a ns tag when it is no longer valid */
 void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
@@ -348,10 +347,6 @@ static inline int __must_check sysfs_init(void)
 	return 0;
 }
 
-static inline void sysfs_printk_last_file(void)
-{
-}
-
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From 0b61d2acb1ea48d8eba798ed92759b7f1b0f4209 Mon Sep 17 00:00:00 2001
From: J Freyensee <james_p_freyensee@linux.intel.com>
Date: Fri, 6 May 2011 16:56:49 -0700
Subject: Intel PTI implementaiton of MIPI 1149.7.

The PTI (Parallel Trace Interface) driver directs
trace data routed from various parts in the system out
through an Intel Penwell PTI port and out of the mobile
device for analysis with a debugging tool (Lauterbach or Fido).
Though n_tracesink and n_tracerouter line discipline drivers
are used to extract modem tracing data to the PTI driver
and other parts of an Intel mobile solution, the PTI driver
can be used independent of n_tracesink and n_tracerouter.

You should select this driver if the target kernel is meant for
an Intel Atom (non-netbook) mobile device containing a MIPI
P1149.7 standard implementation.

Signed-off-by: J Freyensee <james_p_freyensee@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/Kconfig  |  13 +
 drivers/misc/Makefile |   1 +
 drivers/misc/pti.c    | 980 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pti.h   |  42 +++
 4 files changed, 1036 insertions(+)
 create mode 100644 drivers/misc/pti.c
 create mode 100644 include/linux/pti.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 915c31ef84cd..ec33d939548c 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -144,6 +144,19 @@ config PHANTOM
 	  If you choose to build module, its name will be phantom. If unsure,
 	  say N here.
 
+config INTEL_MID_PTI
+	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+	default n
+	help
+	  The PTI (Parallel Trace Interface) driver directs
+	  trace data routed from various parts in the system out
+	  through an Intel Penwell PTI port and out of the mobile
+	  device for analysis with a debugging tool (Lauterbach or Fido).
+
+	  You should select this driver if the target kernel is meant for
+	  an Intel Atom (non-netbook) mobile device containing a MIPI
+	  P1149.7 standard implementation.
+
 config SGI_IOC4
 	tristate "SGI IOC4 Base IO support"
 	depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5468602961f..662aa3c71d05 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_IBM_ASM)		+= ibmasm/
 obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
+0bj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
 obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
new file mode 100644
index 000000000000..bb6f9255c17c
--- /dev/null
+++ b/drivers/misc/pti.c
@@ -0,0 +1,980 @@
+/*
+ *  pti.c - PTI driver for cJTAG data extration
+ *
+ *  Copyright (C) Intel 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+#include <linux/pti.h>
+
+#define DRIVERNAME		"pti"
+#define PCINAME			"pciPTI"
+#define TTYNAME			"ttyPTI"
+#define CHARNAME		"pti"
+#define PTITTY_MINOR_START	0
+#define PTITTY_MINOR_NUM	2
+#define MAX_APP_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_OS_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_MODEM_IDS		16   /* 128 channel ids / u8 bit size */
+#define MODEM_BASE_ID		71   /* modem master ID address    */
+#define CONTROL_ID		72   /* control master ID address  */
+#define CONSOLE_ID		73   /* console master ID address  */
+#define OS_BASE_ID		74   /* base OS master ID address  */
+#define APP_BASE_ID		80   /* base App master ID address */
+#define CONTROL_FRAME_LEN	32   /* PTI control frame maximum size */
+#define USER_COPY_SIZE		8192 /* 8Kb buffer for user space copy */
+#define APERTURE_14		0x3800000 /* offset to first OS write addr */
+#define APERTURE_LEN		0x400000  /* address length */
+
+struct pti_tty {
+	struct pti_masterchannel *mc;
+};
+
+struct pti_dev {
+	struct tty_port port;
+	unsigned long pti_addr;
+	unsigned long aperture_base;
+	void __iomem *pti_ioaddr;
+	u8 ia_app[MAX_APP_IDS];
+	u8 ia_os[MAX_OS_IDS];
+	u8 ia_modem[MAX_MODEM_IDS];
+};
+
+/*
+ * This protects access to ia_app, ia_os, and ia_modem,
+ * which keeps track of channels allocated in
+ * an aperture write id.
+ */
+static DEFINE_MUTEX(alloclock);
+
+static struct pci_device_id pci_ids[] __devinitconst = {
+		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
+		{0}
+};
+
+static struct tty_driver *pti_tty_driver;
+static struct pti_dev *drv_data;
+
+static unsigned int pti_console_channel;
+static unsigned int pti_control_channel;
+
+/**
+ *  pti_write_to_aperture()- The private write function to PTI HW.
+ *
+ *  @mc: The 'aperture'. It's part of a write address that holds
+ *       a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  Since each aperture is specified by a unique
+ *  master/channel ID, no two processes will be writing
+ *  to the same aperture at the same time so no lock is required. The
+ *  PTI-Output agent will send these out in the order that they arrived, and
+ *  thus, it will intermix these messages. The debug tool can then later
+ *  regroup the appropriate message segments together reconstituting each
+ *  message.
+ */
+static void pti_write_to_aperture(struct pti_masterchannel *mc,
+				  u8 *buf,
+				  int len)
+{
+	int dwordcnt;
+	int final;
+	int i;
+	u32 ptiword;
+	u32 __iomem *aperture;
+	u8 *p = buf;
+
+	/*
+	 * calculate the aperture offset from the base using the master and
+	 * channel id's.
+	 */
+	aperture = drv_data->pti_ioaddr + (mc->master << 15)
+		+ (mc->channel << 8);
+
+	dwordcnt = len >> 2;
+	final = len - (dwordcnt << 2);	    /* final = trailing bytes    */
+	if (final == 0 && dwordcnt != 0) {  /* always need a final dword */
+		final += 4;
+		dwordcnt--;
+	}
+
+	for (i = 0; i < dwordcnt; i++) {
+		ptiword = be32_to_cpu(*(u32 *)p);
+		p += 4;
+		iowrite32(ptiword, aperture);
+	}
+
+	aperture += PTI_LASTDWORD_DTS;	/* adding DTS signals that is EOM */
+
+	ptiword = 0;
+	for (i = 0; i < final; i++)
+		ptiword |= *p++ << (24-(8*i));
+
+	iowrite32(ptiword, aperture);
+	return;
+}
+
+/**
+ *  pti_control_frame_built_and_sent()- control frame build and send function.
+ *
+ *  @mc: The master / channel structure on which the function
+ *       built a control frame.
+ *
+ *  To be able to post process the PTI contents on host side, a control frame
+ *  is added before sending any PTI content. So the host side knows on
+ *  each PTI frame the name of the thread using a dedicated master / channel.
+ *  The thread name is retrieved from the 'current' global variable.
+ *  This function builds this frame and sends it to a master ID CONTROL_ID.
+ *  The overhead is only 32 bytes since the driver only writes to HW
+ *  in 32 byte chunks.
+ */
+
+static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc)
+{
+	struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
+					      .channel = 0};
+	const char *control_format = "%3d %3d %s";
+	u8 control_frame[CONTROL_FRAME_LEN];
+
+	/*
+	 * Since we access the comm member in current's task_struct,
+	 * we only need to be as large as what 'comm' in that
+	 * structure is.
+	 */
+	char comm[TASK_COMM_LEN];
+
+	if (!in_interrupt())
+		get_task_comm(comm, current);
+	else
+		strncpy(comm, "Interrupt", TASK_COMM_LEN);
+
+	/* Absolutely ensure our buffer is zero terminated. */
+	comm[TASK_COMM_LEN-1] = 0;
+
+	mccontrol.channel = pti_control_channel;
+	pti_control_channel = (pti_control_channel + 1) & 0x7f;
+
+	snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
+		mc->channel, comm);
+	pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
+}
+
+/**
+ *  pti_write_full_frame_to_aperture()- high level function to
+ *					write to PTI.
+ *
+ *  @mc:  The 'aperture'. It's part of a write address that holds
+ *        a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  All threads sending data (either console, user space application, ...)
+ *  are calling the high level function to write to PTI meaning that it is
+ *  possible to add a control frame before sending the content.
+ */
+static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
+						const unsigned char *buf,
+						int len)
+{
+	pti_control_frame_built_and_sent(mc);
+	pti_write_to_aperture(mc, (u8 *)buf, len);
+}
+
+/**
+ * get_id()- Allocate a master and channel ID.
+ *
+ * @id_array: an array of bits representing what channel
+ *            id's are allocated for writing.
+ * @max_ids:  The max amount of available write IDs to use.
+ * @base_id:  The starting SW channel ID, based on the Intel
+ *            PTI arch.
+ *
+ * Returns:
+ *	pti_masterchannel struct with master, channel ID address
+ *	0 for error
+ *
+ * Each bit in the arrays ia_app and ia_os correspond to a master and
+ * channel id. The bit is one if the id is taken and 0 if free. For
+ * every master there are 128 channel id's.
+ */
+static struct pti_masterchannel *get_id(u8 *id_array, int max_ids, int base_id)
+{
+	struct pti_masterchannel *mc;
+	int i, j, mask;
+
+	mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
+	if (mc == NULL)
+		return NULL;
+
+	/* look for a byte with a free bit */
+	for (i = 0; i < max_ids; i++)
+		if (id_array[i] != 0xff)
+			break;
+	if (i == max_ids) {
+		kfree(mc);
+		return NULL;
+	}
+	/* find the bit in the 128 possible channel opportunities */
+	mask = 0x80;
+	for (j = 0; j < 8; j++) {
+		if ((id_array[i] & mask) == 0)
+			break;
+		mask >>= 1;
+	}
+
+	/* grab it */
+	id_array[i] |= mask;
+	mc->master  = base_id;
+	mc->channel = ((i & 0xf)<<3) + j;
+	/* write new master Id / channel Id allocation to channel control */
+	pti_control_frame_built_and_sent(mc);
+	return mc;
+}
+
+/*
+ * The following three functions:
+ * pti_request_mastercahannel(), mipi_release_masterchannel()
+ * and pti_writedata() are an API for other kernel drivers to
+ * access PTI.
+ */
+
+/**
+ * pti_request_masterchannel()- Kernel API function used to allocate
+ *				a master, channel ID address
+ *				to write to PTI HW.
+ *
+ * @type: 0- request Application  master, channel aperture ID write address.
+ *        1- request OS master, channel aperture ID write
+ *           address.
+ *        2- request Modem master, channel aperture ID
+ *           write address.
+ *        Other values, error.
+ *
+ * Returns:
+ *	pti_masterchannel struct
+ *	0 for error
+ */
+struct pti_masterchannel *pti_request_masterchannel(u8 type)
+{
+	struct pti_masterchannel *mc;
+
+	mutex_lock(&alloclock);
+
+	switch (type) {
+
+	case 0:
+		mc = get_id(drv_data->ia_app, MAX_APP_IDS, APP_BASE_ID);
+		break;
+
+	case 1:
+		mc = get_id(drv_data->ia_os, MAX_OS_IDS, OS_BASE_ID);
+		break;
+
+	case 2:
+		mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS, MODEM_BASE_ID);
+		break;
+	default:
+		mc = NULL;
+	}
+
+	mutex_unlock(&alloclock);
+	return mc;
+}
+EXPORT_SYMBOL_GPL(pti_request_masterchannel);
+
+/**
+ * pti_release_masterchannel()- Kernel API function used to release
+ *				a master, channel ID address
+ *				used to write to PTI HW.
+ *
+ * @mc: master, channel apeture ID address to be released.
+ */
+void pti_release_masterchannel(struct pti_masterchannel *mc)
+{
+	u8 master, channel, i;
+
+	mutex_lock(&alloclock);
+
+	if (mc) {
+		master = mc->master;
+		channel = mc->channel;
+
+		if (master == APP_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_app[i] &=  ~(0x80>>(channel & 0x7));
+		} else if (master == OS_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
+		} else {
+			i = channel >> 3;
+			drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
+		}
+
+		kfree(mc);
+	}
+
+	mutex_unlock(&alloclock);
+}
+EXPORT_SYMBOL_GPL(pti_release_masterchannel);
+
+/**
+ * pti_writedata()- Kernel API function used to write trace
+ *                  debugging data to PTI HW.
+ *
+ * @mc:    Master, channel aperture ID address to write to.
+ *         Null value will return with no write occurring.
+ * @buf:   Trace debuging data to write to the PTI HW.
+ *         Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ *         return with no write occuring.
+ */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
+{
+	/*
+	 * since this function is exported, this is treated like an
+	 * API function, thus, all parameters should
+	 * be checked for validity.
+	 */
+	if ((mc != NULL) && (buf != NULL) && (count > 0))
+		pti_write_to_aperture(mc, buf, count);
+	return;
+}
+EXPORT_SYMBOL_GPL(pti_writedata);
+
+/**
+ * pti_pci_remove()- Driver exit method to remove PTI from
+ *		   PCI bus.
+ * @pdev: variable containing pci info of PTI.
+ */
+static void __devexit pti_pci_remove(struct pci_dev *pdev)
+{
+	struct pti_dev *drv_data;
+
+	drv_data = pci_get_drvdata(pdev);
+	if (drv_data != NULL) {
+		pci_iounmap(pdev, drv_data->pti_ioaddr);
+		pci_set_drvdata(pdev, NULL);
+		kfree(drv_data);
+		pci_release_region(pdev, 1);
+		pci_disable_device(pdev);
+	}
+}
+
+/*
+ * for the tty_driver_*() basic function descriptions, see tty_driver.h.
+ * Specific header comments made for PTI-related specifics.
+ */
+
+/**
+ * pti_tty_driver_open()- Open an Application master, channel aperture
+ * ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_open() call.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, fail value
+ *
+ * The main purpose of using the tty device interface is for
+ * each tty port to have a unique PTI write aperture.  In an
+ * example use case, ttyPTI0 gets syslogd and an APP aperture
+ * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
+ * modem messages into PTI.  Modem trace data does not have to
+ * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
+ * master IDs.  These messages go through the PTI HW and out of
+ * the handheld platform and to the Fido/Lauterbach device.
+ */
+static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
+{
+	/*
+	 * we actually want to allocate a new channel per open, per
+	 * system arch.  HW gives more than plenty channels for a single
+	 * system task to have its own channel to write trace data. This
+	 * also removes a locking requirement for the actual write
+	 * procedure.
+	 */
+	return tty_port_open(&drv_data->port, tty, filp);
+}
+
+/**
+ * pti_tty_driver_close()- close tty device and release Application
+ * master, channel aperture ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_close() call.
+ *
+ * The main purpose of using the tty device interface is to route
+ * syslog daemon messages to the PTI HW and out of the handheld platform
+ * and to the Fido/Lauterbach device.
+ */
+static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
+{
+	tty_port_close(&drv_data->port, tty, filp);
+}
+
+/**
+ * pti_tty_intstall()- Used to set up specific master-channels
+ *		       to tty ports for organizational purposes when
+ *		       tracing viewed from debuging tools.
+ *
+ * @driver: tty driver information.
+ * @tty: tty struct containing pti information.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+	int idx = tty->index;
+	struct pti_tty *pti_tty_data;
+	int ret = tty_init_termios(tty);
+
+	if (ret == 0) {
+		tty_driver_kref_get(driver);
+		tty->count++;
+		driver->ttys[idx] = tty;
+
+		pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
+		if (pti_tty_data == NULL)
+			return -ENOMEM;
+
+		if (idx == PTITTY_MINOR_START)
+			pti_tty_data->mc = pti_request_masterchannel(0);
+		else
+			pti_tty_data->mc = pti_request_masterchannel(2);
+
+		if (pti_tty_data->mc == NULL)
+			return -ENXIO;
+		tty->driver_data = pti_tty_data;
+	}
+
+	return ret;
+}
+
+/**
+ * pti_tty_cleanup()- Used to de-allocate master-channel resources
+ *		      tied to tty's of this driver.
+ *
+ * @tty: tty struct containing pti information.
+ */
+static void pti_tty_cleanup(struct tty_struct *tty)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if (pti_tty_data == NULL)
+		return;
+	pti_release_masterchannel(pti_tty_data->mc);
+	kfree(tty->driver_data);
+	tty->driver_data = NULL;
+}
+
+/**
+ * pti_tty_driver_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp: Contains private data which is used to obtain
+ *        master, channel write ID.
+ * @data: trace data to be written.
+ * @len:  # of byte to write.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error
+ */
+static int pti_tty_driver_write(struct tty_struct *tty,
+	const unsigned char *buf, int len)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
+		pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
+		return len;
+	}
+	/*
+	 * we can't write to the pti hardware if the private driver_data
+	 * and the mc address is not there.
+	 */
+	else
+		return -EFAULT;
+}
+
+/**
+ * pti_tty_write_room()- Always returns 2048.
+ *
+ * @tty: contains tty info of the pti driver.
+ */
+static int pti_tty_write_room(struct tty_struct *tty)
+{
+	return 2048;
+}
+
+/**
+ * pti_char_open()- Open an Application master, channel aperture
+ * ID to the PTI device. Part of the misc device implementation.
+ *
+ * @inode: not used.
+ * @filp:  Output- will have a masterchannel struct set containing
+ *                 the allocated application PTI aperture write address.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, a fail value
+ */
+static int pti_char_open(struct inode *inode, struct file *filp)
+{
+	struct pti_masterchannel *mc;
+
+	/*
+	 * We really do want to fail immediately if
+	 * pti_request_masterchannel() fails,
+	 * before assigning the value to filp->private_data.
+	 * Slightly easier to debug if this driver needs debugging.
+	 */
+	mc = pti_request_masterchannel(0);
+	if (mc == NULL)
+		return -ENOMEM;
+	filp->private_data = mc;
+	return 0;
+}
+
+/**
+ * pti_char_release()-  Close a char channel to the PTI device. Part
+ * of the misc device implementation.
+ *
+ * @inode: Not used in this implementaiton.
+ * @filp:  Contains private_data that contains the master, channel
+ *         ID to be released by the PTI device.
+ *
+ * Returns:
+ *	always 0
+ */
+static int pti_char_release(struct inode *inode, struct file *filp)
+{
+	pti_release_masterchannel(filp->private_data);
+	kfree(filp->private_data);
+	return 0;
+}
+
+/**
+ * pti_char_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp:  Contains private data which is used to obtain
+ *         master, channel write ID.
+ * @data:  trace data to be written.
+ * @len:   # of byte to write.
+ * @ppose: Not used in this function implementation.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error value
+ *
+ * Notes: From side discussions with Alan Cox and experimenting
+ * with PTI debug HW like Nokia's Fido box and Lauterbach
+ * devices, 8192 byte write buffer used by USER_COPY_SIZE was
+ * deemed an appropriate size for this type of usage with
+ * debugging HW.
+ */
+static ssize_t pti_char_write(struct file *filp, const char __user *data,
+			      size_t len, loff_t *ppose)
+{
+	struct pti_masterchannel *mc;
+	void *kbuf;
+	const char __user *tmp;
+	size_t size = USER_COPY_SIZE;
+	size_t n = 0;
+
+	tmp = data;
+	mc = filp->private_data;
+
+	kbuf = kmalloc(size, GFP_KERNEL);
+	if (kbuf == NULL)  {
+		pr_err("%s(%d): buf allocation failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	do {
+		if (len - n > USER_COPY_SIZE)
+			size = USER_COPY_SIZE;
+		else
+			size = len - n;
+
+		if (copy_from_user(kbuf, tmp, size)) {
+			kfree(kbuf);
+			return n ? n : -EFAULT;
+		}
+
+		pti_write_to_aperture(mc, kbuf, size);
+		n  += size;
+		tmp += size;
+
+	} while (len > n);
+
+	kfree(kbuf);
+	return len;
+}
+
+static const struct tty_operations pti_tty_driver_ops = {
+	.open		= pti_tty_driver_open,
+	.close		= pti_tty_driver_close,
+	.write		= pti_tty_driver_write,
+	.write_room	= pti_tty_write_room,
+	.install	= pti_tty_install,
+	.cleanup	= pti_tty_cleanup
+};
+
+static const struct file_operations pti_char_driver_ops = {
+	.owner		= THIS_MODULE,
+	.write		= pti_char_write,
+	.open		= pti_char_open,
+	.release	= pti_char_release,
+};
+
+static struct miscdevice pti_char_driver = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= CHARNAME,
+	.fops		= &pti_char_driver_ops
+};
+
+/**
+ * pti_console_write()-  Write to the console that has been acquired.
+ *
+ * @c:   Not used in this implementaiton.
+ * @buf: Data to be written.
+ * @len: Length of buf.
+ */
+static void pti_console_write(struct console *c, const char *buf, unsigned len)
+{
+	static struct pti_masterchannel mc = {.master  = CONSOLE_ID,
+					      .channel = 0};
+
+	mc.channel = pti_console_channel;
+	pti_console_channel = (pti_console_channel + 1) & 0x7f;
+
+	pti_write_full_frame_to_aperture(&mc, buf, len);
+}
+
+/**
+ * pti_console_device()-  Return the driver tty structure and set the
+ *			  associated index implementation.
+ *
+ * @c:     Console device of the driver.
+ * @index: index associated with c.
+ *
+ * Returns:
+ *	always value of pti_tty_driver structure when this function
+ *	is called.
+ */
+static struct tty_driver *pti_console_device(struct console *c, int *index)
+{
+	*index = c->index;
+	return pti_tty_driver;
+}
+
+/**
+ * pti_console_setup()-  Initialize console variables used by the driver.
+ *
+ * @c:     Not used.
+ * @opts:  Not used.
+ *
+ * Returns:
+ *	always 0.
+ */
+static int pti_console_setup(struct console *c, char *opts)
+{
+	pti_console_channel = 0;
+	pti_control_channel = 0;
+	return 0;
+}
+
+/*
+ * pti_console struct, used to capture OS printk()'s and shift
+ * out to the PTI device for debugging.  This cannot be
+ * enabled upon boot because of the possibility of eating
+ * any serial console printk's (race condition discovered).
+ * The console should be enabled upon when the tty port is
+ * used for the first time.  Since the primary purpose for
+ * the tty port is to hook up syslog to it, the tty port
+ * will be open for a really long time.
+ */
+static struct console pti_console = {
+	.name		= TTYNAME,
+	.write		= pti_console_write,
+	.device		= pti_console_device,
+	.setup		= pti_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= 0,
+};
+
+/**
+ * pti_port_activate()- Used to start/initialize any items upon
+ * first opening of tty_port().
+ *
+ * @port- The tty port number of the PTI device.
+ * @tty-  The tty struct associated with this device.
+ *
+ * Returns:
+ *	always returns 0
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_start(&pti_console);
+	return 0;
+}
+
+/**
+ * pti_port_shutdown()- Used to stop/shutdown any items upon the
+ * last tty port close.
+ *
+ * @port- The tty port number of the PTI device.
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static void pti_port_shutdown(struct tty_port *port)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_stop(&pti_console);
+}
+
+static const struct tty_port_operations tty_port_ops = {
+	.activate = pti_port_activate,
+	.shutdown = pti_port_shutdown,
+};
+
+/*
+ * Note the _probe() call sets everything up and ties the char and tty
+ * to successfully detecting the PTI device on the pci bus.
+ */
+
+/**
+ * pti_pci_probe()- Used to detect pti on the pci bus and set
+ *		    things up in the driver.
+ *
+ * @pdev- pci_dev struct values for pti.
+ * @ent-  pci_device_id struct for pti driver.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int __devinit pti_pci_probe(struct pci_dev *pdev,
+		const struct pci_device_id *ent)
+{
+	int retval = -EINVAL;
+	int pci_bar = 1;
+
+	dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
+			__func__, __LINE__, pdev->vendor, pdev->device);
+
+	retval = misc_register(&pti_char_driver);
+	if (retval) {
+		pr_err("%s(%d): CHAR registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+		return retval;
+	}
+
+	retval = pci_enable_device(pdev);
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s: pci_enable_device() returned error %d\n",
+			__func__, retval);
+		return retval;
+	}
+
+	drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
+
+	if (drv_data == NULL) {
+		retval = -ENOMEM;
+		dev_err(&pdev->dev,
+			"%s(%d): kmalloc() returned NULL memory.\n",
+			__func__, __LINE__);
+		return retval;
+	}
+	drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
+
+	retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s(%d): pci_request_region() returned error %d\n",
+			__func__, __LINE__, retval);
+		kfree(drv_data);
+		return retval;
+	}
+	drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
+	drv_data->pti_ioaddr =
+		ioremap_nocache((u32)drv_data->aperture_base,
+		APERTURE_LEN);
+	if (!drv_data->pti_ioaddr) {
+		pci_release_region(pdev, pci_bar);
+		retval = -ENOMEM;
+		kfree(drv_data);
+		return retval;
+	}
+
+	pci_set_drvdata(pdev, drv_data);
+
+	tty_port_init(&drv_data->port);
+	drv_data->port.ops = &tty_port_ops;
+
+	tty_register_device(pti_tty_driver, 0, &pdev->dev);
+	tty_register_device(pti_tty_driver, 1, &pdev->dev);
+
+	register_console(&pti_console);
+
+	return retval;
+}
+
+static struct pci_driver pti_pci_driver = {
+	.name		= PCINAME,
+	.id_table	= pci_ids,
+	.probe		= pti_pci_probe,
+	.remove		= pti_pci_remove,
+};
+
+/**
+ *
+ * pti_init()- Overall entry/init call to the pti driver.
+ *             It starts the registration process with the kernel.
+ *
+ * Returns:
+ *	int __init, 0 for success
+ *	otherwise value is an error
+ *
+ */
+static int __init pti_init(void)
+{
+	int retval = -EINVAL;
+
+	/* First register module as tty device */
+
+	pti_tty_driver = alloc_tty_driver(1);
+	if (pti_tty_driver == NULL) {
+		pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	pti_tty_driver->owner			= THIS_MODULE;
+	pti_tty_driver->magic			= TTY_DRIVER_MAGIC;
+	pti_tty_driver->driver_name		= DRIVERNAME;
+	pti_tty_driver->name			= TTYNAME;
+	pti_tty_driver->major			= 0;
+	pti_tty_driver->minor_start		= PTITTY_MINOR_START;
+	pti_tty_driver->minor_num		= PTITTY_MINOR_NUM;
+	pti_tty_driver->num			= PTITTY_MINOR_NUM;
+	pti_tty_driver->type			= TTY_DRIVER_TYPE_SYSTEM;
+	pti_tty_driver->subtype			= SYSTEM_TYPE_SYSCONS;
+	pti_tty_driver->flags			= TTY_DRIVER_REAL_RAW |
+						  TTY_DRIVER_DYNAMIC_DEV;
+	pti_tty_driver->init_termios		= tty_std_termios;
+
+	tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
+
+	retval = tty_register_driver(pti_tty_driver);
+	if (retval) {
+		pr_err("%s(%d): TTY registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+
+		pti_tty_driver = NULL;
+		return retval;
+	}
+
+	retval = pci_register_driver(&pti_pci_driver);
+
+	if (retval) {
+		pr_err("%s(%d): PCI registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+
+		tty_unregister_driver(pti_tty_driver);
+		pr_err("%s(%d): Unregistering TTY part of pti driver\n",
+			__func__, __LINE__);
+		pti_tty_driver = NULL;
+		return retval;
+	}
+
+	return retval;
+}
+
+/**
+ * pti_exit()- Unregisters this module as a tty and pci driver.
+ */
+static void __exit pti_exit(void)
+{
+	int retval;
+
+	tty_unregister_device(pti_tty_driver, 0);
+	tty_unregister_device(pti_tty_driver, 1);
+
+	retval = tty_unregister_driver(pti_tty_driver);
+	if (retval) {
+		pr_err("%s(%d): TTY unregistration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+	}
+
+	pci_unregister_driver(&pti_pci_driver);
+
+	retval = misc_deregister(&pti_char_driver);
+	if (retval) {
+		pr_err("%s(%d): CHAR unregistration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+	}
+
+	unregister_console(&pti_console);
+	return;
+}
+
+module_init(pti_init);
+module_exit(pti_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ken Mills, Jay Freyensee");
+MODULE_DESCRIPTION("PTI Driver");
+
diff --git a/include/linux/pti.h b/include/linux/pti.h
new file mode 100644
index 000000000000..81af667bb2d5
--- /dev/null
+++ b/include/linux/pti.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (C) Intel 2011
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ *
+ * This header file will allow other parts of the OS to use the
+ * interface to write out it's contents for debugging a mobile system.
+ */
+
+#ifndef PTI_H_
+#define PTI_H_
+
+/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
+#define PTI_LASTDWORD_DTS	0x30
+
+/* basic structure used as a write address to the PTI HW */
+struct pti_masterchannel {
+	u8 master;
+	u8 channel;
+};
+
+/* the following functions are defined in misc/pti.c */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
+struct pti_masterchannel *pti_request_masterchannel(u8 type);
+void pti_release_masterchannel(struct pti_masterchannel *mc);
+
+#endif /*PTI_H_*/
-- 
cgit v1.2.3


From ee4f6b4b89665b92ead67deaa2e5d2ffa1af2b5f Mon Sep 17 00:00:00 2001
From: J Freyensee <james_p_freyensee@linux.intel.com>
Date: Fri, 6 May 2011 16:56:50 -0700
Subject: n_tracerouter and n_tracesink ldisc additions.

The n_tracerouter and n_tracesink line discpline drivers use the
Linux tty line discpline framework to route trace data coming
from a tty port (say UART for example) to the trace sink line
discipline driver and to another tty port(say USB).  Those
these two line discipline drivers can be used together,
independently from pti.c, they are part of the original
implementation solution of the MIPI P1149.7, compact JTAG, PTI
solution for Intel mobile platforms starting with the
Medfield platform.

Signed-off-by: J Freyensee <james_p_freyensee@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/Kconfig         |  31 ++++++
 drivers/tty/Makefile        |   2 +
 drivers/tty/n_tracerouter.c | 243 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/tty/n_tracesink.c   | 238 +++++++++++++++++++++++++++++++++++++++++++
 drivers/tty/n_tracesink.h   |  36 +++++++
 include/linux/tty.h         |   2 +
 6 files changed, 552 insertions(+)
 create mode 100644 drivers/tty/n_tracerouter.c
 create mode 100644 drivers/tty/n_tracesink.c
 create mode 100644 drivers/tty/n_tracesink.h

(limited to 'include/linux')

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 3fd7199301b6..bd7cc0527999 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -319,3 +319,34 @@ config N_GSM
 	  This line discipline provides support for the GSM MUX protocol and
 	  presents the mux as a set of 61 individual tty devices.
 
+config TRACE_ROUTER
+	tristate "Trace data router for MIPI P1149.7 cJTAG standard"
+	depends on TRACE_SINK
+	default n
+	help
+	  The trace router uses the Linux tty line discipline framework to
+	  route trace data coming from a tty port (say UART for example) to
+	  the trace sink line discipline driver and to another tty port (say
+	  USB). This is part of a solution for the MIPI P1149.7, compact JTAG,
+	  standard, which is for debugging mobile devices. The PTI driver in
+	  drivers/misc/pti.c defines the majority of this MIPI solution.
+
+	  You should select this driver if the target kernel is meant for
+	  a mobile device containing a modem.  Then you will need to select
+	  "Trace data sink for MIPI P1149.7 cJTAG standard" line discipline
+	  driver.
+
+config TRACE_SINK
+	tristate "Trace data sink for MIPI P1149.7 cJTAG standard"
+	default n
+	help
+	  The trace sink uses the Linux line discipline framework to receive
+	  trace data coming from the trace router line discipline driver
+	  to a user-defined tty port target, like USB.
+	  This is to provide a way to extract modem trace data on
+	  devices that do not have a PTI HW module, or just need modem
+	  trace data to come out of a different HW output port.
+	  This is part of a solution for the P1149.7, compact JTAG, standard.
+
+	  If you select this option, you need to select
+	  "Trace data router for MIPI P1149.7 cJTAG standard".
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index 690522fcb338..ea89b0bd15fe 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -6,6 +6,8 @@ obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 obj-$(CONFIG_N_HDLC)		+= n_hdlc.o
 obj-$(CONFIG_N_GSM)		+= n_gsm.o
+obj-$(CONFIG_TRACE_ROUTER)	+= n_tracerouter.o
+obj-$(CONFIG_TRACE_SINK)	+= n_tracesink.o
 obj-$(CONFIG_R3964)		+= n_r3964.o
 
 obj-y				+= vt/
diff --git a/drivers/tty/n_tracerouter.c b/drivers/tty/n_tracerouter.c
new file mode 100644
index 000000000000..1f063d3aa32f
--- /dev/null
+++ b/drivers/tty/n_tracerouter.c
@@ -0,0 +1,243 @@
+/*
+ *  n_tracerouter.c - Trace data router through tty space
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This trace router uses the Linux line discipline framework to route
+ * trace data coming from a HW Modem to a PTI (Parallel Trace Module) port.
+ * The solution is not specific to a HW modem and this line disciple can
+ * be used to route any stream of data in kernel space.
+ * This is part of a solution for the P1149.7, compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/tty.h>
+#include <linux/tty_ldisc.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <asm-generic/bug.h>
+#include "n_tracesink.h"
+
+/*
+ * Other ldisc drivers use 65536 which basically means,
+ * 'I can always accept 64k' and flow control is off.
+ * This number is deemed appropriate for this driver.
+ */
+#define RECEIVE_ROOM	65536
+#define DRIVERNAME	"n_tracerouter"
+
+/*
+ * struct to hold private configuration data for this ldisc.
+ * opencalled is used to hold if this ldisc has been opened.
+ * kref_tty holds the tty reference the ldisc sits on top of.
+ */
+struct tracerouter_data {
+	u8 opencalled;
+	struct tty_struct *kref_tty;
+};
+static struct tracerouter_data *tr_data;
+
+/* lock for when tty reference is being used */
+static DEFINE_MUTEX(routelock);
+
+/**
+ * n_tracerouter_open() - Called when a tty is opened by a SW entity.
+ * @tty: terminal device to the ldisc.
+ *
+ * Return:
+ *      0 for success.
+ *
+ * Caveats: This should only be opened one time per SW entity.
+ */
+static int n_tracerouter_open(struct tty_struct *tty)
+{
+	int retval = -EEXIST;
+
+	mutex_lock(&routelock);
+	if (tr_data->opencalled == 0) {
+
+		tr_data->kref_tty = tty_kref_get(tty);
+		if (tr_data->kref_tty == NULL) {
+			retval = -EFAULT;
+		} else {
+			tr_data->opencalled = 1;
+			tty->disc_data      = tr_data;
+			tty->receive_room   = RECEIVE_ROOM;
+			tty_driver_flush_buffer(tty);
+			retval = 0;
+		}
+	}
+	mutex_unlock(&routelock);
+	return retval;
+}
+
+/**
+ * n_tracerouter_close() - close connection
+ * @tty: terminal device to the ldisc.
+ *
+ * Called when a software entity wants to close a connection.
+ */
+static void n_tracerouter_close(struct tty_struct *tty)
+{
+	struct tracerouter_data *tptr = tty->disc_data;
+
+	mutex_lock(&routelock);
+	WARN_ON(tptr->kref_tty != tr_data->kref_tty);
+	tty_driver_flush_buffer(tty);
+	tty_kref_put(tr_data->kref_tty);
+	tr_data->kref_tty = NULL;
+	tr_data->opencalled = 0;
+	tty->disc_data = NULL;
+	mutex_unlock(&routelock);
+}
+
+/**
+ * n_tracerouter_read() - read request from user space
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * function that allows read() functionality in userspace. By default if this
+ * is not implemented it returns -EIO. This module is functioning like a
+ * router via n_tracerouter_receivebuf(), and there is no real requirement
+ * to implement this function. However, an error return value other than
+ * -EIO should be used just to show that there was an intent not to have
+ * this function implemented.  Return value based on read() man pages.
+ *
+ * Return:
+ *	 -EINVAL
+ */
+static ssize_t n_tracerouter_read(struct tty_struct *tty, struct file *file,
+				  unsigned char __user *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracerouter_write() - Function that allows write() in userspace.
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * By default if this is not implemented, it returns -EIO.
+ * This should not be implemented, ever, because
+ * 1. this driver is functioning like a router via
+ *    n_tracerouter_receivebuf()
+ * 2. No writes to HW will ever go through this line discpline driver.
+ * However, an error return value other than -EIO should be used
+ * just to show that there was an intent not to have this function
+ * implemented.  Return value based on write() man pages.
+ *
+ * Return:
+ *	-EINVAL
+ */
+static ssize_t n_tracerouter_write(struct tty_struct *tty, struct file *file,
+				   const unsigned char *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracerouter_receivebuf() - Routing function for driver.
+ * @tty: terminal device passed into the ldisc.  It's assumed
+ *       tty will never be NULL.
+ * @cp:  buffer, block of characters to be eventually read by
+ *       someone, somewhere (user read() call or some kernel function).
+ * @fp:  flag buffer.
+ * @count: number of characters (aka, bytes) in cp.
+ *
+ * This function takes the input buffer, cp, and passes it to
+ * an external API function for processing.
+ */
+static void n_tracerouter_receivebuf(struct tty_struct *tty,
+					const unsigned char *cp,
+					char *fp, int count)
+{
+	mutex_lock(&routelock);
+	n_tracesink_datadrain((u8 *) cp, count);
+	mutex_unlock(&routelock);
+}
+
+/*
+ * Flush buffer is not impelemented as the ldisc has no internal buffering
+ * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
+ */
+
+static struct tty_ldisc_ops tty_ptirouter_ldisc = {
+	.owner		= THIS_MODULE,
+	.magic		= TTY_LDISC_MAGIC,
+	.name		= DRIVERNAME,
+	.open		= n_tracerouter_open,
+	.close		= n_tracerouter_close,
+	.read		= n_tracerouter_read,
+	.write		= n_tracerouter_write,
+	.receive_buf	= n_tracerouter_receivebuf
+};
+
+/**
+ * n_tracerouter_init -	module initialisation
+ *
+ * Registers this module as a line discipline driver.
+ *
+ * Return:
+ *	0 for success, any other value error.
+ */
+static int __init n_tracerouter_init(void)
+{
+	int retval;
+
+	tr_data = kzalloc(sizeof(struct tracerouter_data), GFP_KERNEL);
+	if (tr_data == NULL)
+		return -ENOMEM;
+
+
+	/* Note N_TRACEROUTER is defined in linux/tty.h */
+	retval = tty_register_ldisc(N_TRACEROUTER, &tty_ptirouter_ldisc);
+	if (retval < 0) {
+		pr_err("%s: Registration failed: %d\n", __func__, retval);
+		kfree(tr_data);
+	}
+	return retval;
+}
+
+/**
+ * n_tracerouter_exit -	module unload
+ *
+ * Removes this module as a line discipline driver.
+ */
+static void __exit n_tracerouter_exit(void)
+{
+	int retval = tty_unregister_ldisc(N_TRACEROUTER);
+
+	if (retval < 0)
+		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
+	else
+		kfree(tr_data);
+}
+
+module_init(n_tracerouter_init);
+module_exit(n_tracerouter_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jay Freyensee");
+MODULE_ALIAS_LDISC(N_TRACEROUTER);
+MODULE_DESCRIPTION("Trace router ldisc driver");
diff --git a/drivers/tty/n_tracesink.c b/drivers/tty/n_tracesink.c
new file mode 100644
index 000000000000..ddce58b973d2
--- /dev/null
+++ b/drivers/tty/n_tracesink.c
@@ -0,0 +1,238 @@
+/*
+ *  n_tracesink.c - Trace data router and sink path through tty space.
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The trace sink uses the Linux line discipline framework to receive
+ * trace data coming from the PTI source line discipline driver
+ * to a user-desired tty port, like USB.
+ * This is to provide a way to extract modem trace data on
+ * devices that do not have a PTI HW module, or just need modem
+ * trace data to come out of a different HW output port.
+ * This is part of a solution for the P1149.7, compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/tty.h>
+#include <linux/tty_ldisc.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm-generic/bug.h>
+#include "n_tracesink.h"
+
+/*
+ * Other ldisc drivers use 65536 which basically means,
+ * 'I can always accept 64k' and flow control is off.
+ * This number is deemed appropriate for this driver.
+ */
+#define RECEIVE_ROOM	65536
+#define DRIVERNAME	"n_tracesink"
+
+/*
+ * there is a quirk with this ldisc is he can write data
+ * to a tty from anyone calling his kernel API, which
+ * meets customer requirements in the drivers/misc/pti.c
+ * project.  So he needs to know when he can and cannot write when
+ * the API is called. In theory, the API can be called
+ * after an init() but before a successful open() which
+ * would crash the system if tty is not checked.
+ */
+static struct tty_struct *this_tty;
+static DEFINE_MUTEX(writelock);
+
+/**
+ * n_tracesink_open() - Called when a tty is opened by a SW entity.
+ * @tty: terminal device to the ldisc.
+ *
+ * Return:
+ *      0 for success,
+ *      -EFAULT = couldn't get a tty kref n_tracesink will sit
+ *       on top of
+ *      -EEXIST = open() called successfully once and it cannot
+ *      be called again.
+ *
+ * Caveats: open() should only be successful the first time a
+ * SW entity calls it.
+ */
+static int n_tracesink_open(struct tty_struct *tty)
+{
+	int retval = -EEXIST;
+
+	mutex_lock(&writelock);
+	if (this_tty == NULL) {
+		this_tty = tty_kref_get(tty);
+		if (this_tty == NULL) {
+			retval = -EFAULT;
+		} else {
+			tty->disc_data = this_tty;
+			tty_driver_flush_buffer(tty);
+			retval = 0;
+		}
+	}
+	mutex_unlock(&writelock);
+
+	return retval;
+}
+
+/**
+ * n_tracesink_close() - close connection
+ * @tty: terminal device to the ldisc.
+ *
+ * Called when a software entity wants to close a connection.
+ */
+static void n_tracesink_close(struct tty_struct *tty)
+{
+	mutex_lock(&writelock);
+	tty_driver_flush_buffer(tty);
+	tty_kref_put(this_tty);
+	this_tty = NULL;
+	tty->disc_data = NULL;
+	mutex_unlock(&writelock);
+}
+
+/**
+ * n_tracesink_read() - read request from user space
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * function that allows read() functionality in userspace. By default if this
+ * is not implemented it returns -EIO. This module is functioning like a
+ * router via n_tracesink_receivebuf(), and there is no real requirement
+ * to implement this function. However, an error return value other than
+ * -EIO should be used just to show that there was an intent not to have
+ * this function implemented.  Return value based on read() man pages.
+ *
+ * Return:
+ *	 -EINVAL
+ */
+static ssize_t n_tracesink_read(struct tty_struct *tty, struct file *file,
+				unsigned char __user *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracesink_write() - Function that allows write() in userspace.
+ * @tty:  terminal device passed into the ldisc.
+ * @file: pointer to open file object.
+ * @buf:  pointer to the data buffer that gets eventually returned.
+ * @nr:   number of bytes of the data buffer that is returned.
+ *
+ * By default if this is not implemented, it returns -EIO.
+ * This should not be implemented, ever, because
+ * 1. this driver is functioning like a router via
+ *    n_tracesink_receivebuf()
+ * 2. No writes to HW will ever go through this line discpline driver.
+ * However, an error return value other than -EIO should be used
+ * just to show that there was an intent not to have this function
+ * implemented.  Return value based on write() man pages.
+ *
+ * Return:
+ *	-EINVAL
+ */
+static ssize_t n_tracesink_write(struct tty_struct *tty, struct file *file,
+				 const unsigned char *buf, size_t nr) {
+	return -EINVAL;
+}
+
+/**
+ * n_tracesink_datadrain() - Kernel API function used to route
+ *			     trace debugging data to user-defined
+ *			     port like USB.
+ *
+ * @buf:   Trace debuging data buffer to write to tty target
+ *         port. Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ *         return with no write occuring.
+ *
+ * Caveat: If this line discipline does not set the tty it sits
+ * on top of via an open() call, this API function will not
+ * call the tty's write() call because it will have no pointer
+ * to call the write().
+ */
+void n_tracesink_datadrain(u8 *buf, int count)
+{
+	mutex_lock(&writelock);
+
+	if ((buf != NULL) && (count > 0) && (this_tty != NULL))
+		this_tty->ops->write(this_tty, buf, count);
+
+	mutex_unlock(&writelock);
+}
+EXPORT_SYMBOL_GPL(n_tracesink_datadrain);
+
+/*
+ * Flush buffer is not impelemented as the ldisc has no internal buffering
+ * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
+ */
+
+/*
+ * tty_ldisc function operations for this driver.
+ */
+static struct tty_ldisc_ops tty_n_tracesink = {
+	.owner		= THIS_MODULE,
+	.magic		= TTY_LDISC_MAGIC,
+	.name		= DRIVERNAME,
+	.open		= n_tracesink_open,
+	.close		= n_tracesink_close,
+	.read		= n_tracesink_read,
+	.write		= n_tracesink_write
+};
+
+/**
+ * n_tracesink_init-	module initialisation
+ *
+ * Registers this module as a line discipline driver.
+ *
+ * Return:
+ *	0 for success, any other value error.
+ */
+static int __init n_tracesink_init(void)
+{
+	/* Note N_TRACESINK is defined in linux/tty.h */
+	int retval = tty_register_ldisc(N_TRACESINK, &tty_n_tracesink);
+
+	if (retval < 0)
+		pr_err("%s: Registration failed: %d\n", __func__, retval);
+
+	return retval;
+}
+
+/**
+ * n_tracesink_exit -	module unload
+ *
+ * Removes this module as a line discipline driver.
+ */
+static void __exit n_tracesink_exit(void)
+{
+	int retval = tty_unregister_ldisc(N_TRACESINK);
+
+	if (retval < 0)
+		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
+}
+
+module_init(n_tracesink_init);
+module_exit(n_tracesink_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jay Freyensee");
+MODULE_ALIAS_LDISC(N_TRACESINK);
+MODULE_DESCRIPTION("Trace sink ldisc driver");
diff --git a/drivers/tty/n_tracesink.h b/drivers/tty/n_tracesink.h
new file mode 100644
index 000000000000..a68bb44f1ef5
--- /dev/null
+++ b/drivers/tty/n_tracesink.h
@@ -0,0 +1,36 @@
+/*
+ *  n_tracesink.h - Kernel driver API to route trace data in kernel space.
+ *
+ *  Copyright (C) Intel 2011
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ *
+ * This header file is used by n_tracerouter to be able to send the
+ * data of it's tty port to the tty port this module sits.  This
+ * mechanism can also be used independent of the PTI module.
+ *
+ */
+
+#ifndef N_TRACESINK_H_
+#define N_TRACESINK_H_
+
+void n_tracesink_datadrain(u8 *buf, int count);
+
+#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4db4ca79f895..d6f05292e456 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -50,6 +50,8 @@
 #define N_CAIF		20      /* CAIF protocol for talking to modems */
 #define N_GSM0710	21	/* GSM 0710 Mux */
 #define N_TI_WL		22	/* for TI's WL BT, FM, GPS combo chips */
+#define N_TRACESINK	23	/* Trace data routing for MIPI P1149.7 */
+#define N_TRACEROUTER	24	/* Trace data routing for MIPI P1149.7 */
 
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
-- 
cgit v1.2.3


From 8c414ff3f4dcdde228c6a668385218290d73a265 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 18:50:20 +0100
Subject: clocksource: convert footbridge to generic i8253 clocksource

Convert the footbridge isa-timer code to use generic i8253 clocksource.

Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/i8253.h         | 15 ++++++++++++
 arch/arm/mach-footbridge/Kconfig     |  2 ++
 arch/arm/mach-footbridge/isa-timer.c | 45 ++++--------------------------------
 include/linux/clocksource.h          |  2 ++
 4 files changed, 23 insertions(+), 41 deletions(-)
 create mode 100644 arch/arm/include/asm/i8253.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h
new file mode 100644
index 000000000000..70656b69d5ce
--- /dev/null
+++ b/arch/arm/include/asm/i8253.h
@@ -0,0 +1,15 @@
+#ifndef __ASMARM_I8253_H
+#define __ASMARM_I8253_H
+
+/* i8253A PIT registers */
+#define PIT_MODE	0x43
+#define PIT_CH0		0x40
+
+#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
+
+extern raw_spinlock_t i8253_lock;
+
+#define outb_pit	outb_p
+#define inb_pit		inb_p
+
+#endif
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index bdd257921cfb..46adca068f2c 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,6 +4,7 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 441c6ce0d555..7020f1a3feca 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -10,53 +10,16 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/spinlock.h>
 #include <linux/timex.h>
 
 #include <asm/irq.h>
-
+#include <asm/i8253.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
 
-#define PIT_MODE	0x43
-#define PIT_CH0		0x40
-
-#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
-
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	static int old_count;
-	static u32 old_jifs;
-	int count;
-	u32 jifs;
-
-	raw_local_irq_save(flags);
-
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);		/* latch the count */
-	count = inb_p(PIT_CH0);		/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_local_irq_restore(flags);
-
-	count = (PIT_LATCH - 1) - count;
-
-	return (cycle_t)(jifs * PIT_LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-};
+DEFINE_RAW_SPINLOCK(i8253_lock);
 
 static void pit_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
@@ -121,7 +84,7 @@ static void __init isa_timer_init(void)
 	pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce);
 	pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce);
 
-	clocksource_register_hz(&pit_cs, PIT_TICK_RATE);
+	clocksource_i8253_init();
 
 	setup_irq(pit_ce.irq, &pit_timer_irq);
 	clockevents_register_device(&pit_ce);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..f13469b3df86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -341,4 +341,6 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern int clocksource_i8253_init(void);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From 9281b16caac1276817b77033c5b8a1f5ca30102c Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Sun, 24 Apr 2011 14:30:14 -0500
Subject: pata_cm64x: fix boot crash on parisc

The old IDE cmd64x checks the status of the CNTRL register to see if
the ports are enabled before probing them.  pata_cmd64x doesn't do
this, which causes a HPMC on parisc when it tries to poke at the
secondary port because apparently the BAR isn't wired up (and a
non-responding piece of memory causes a HPMC).

Fix this by porting the CNTRL register port detection logic from IDE
cmd64x.  In addition, following converns from Alan Cox, add a check to
see if a mobility electronics bridge is the immediate parent and forgo
the check if it is (prevents problems on hotplug controllers).

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/pata_cmd64x.c | 42 ++++++++++++++++++++++++++++++++++++++----
 include/linux/pci_ids.h   |  2 ++
 2 files changed, 40 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index 905ff76d3cbb..7bafc16cf5e0 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c
@@ -41,6 +41,9 @@
 enum {
 	CFR 		= 0x50,
 		CFR_INTR_CH0  = 0x04,
+	CNTRL		= 0x51,
+		CNTRL_CH0     = 0x04,
+		CNTRL_CH1     = 0x08,
 	CMDTIM 		= 0x52,
 	ARTTIM0 	= 0x53,
 	DRWTIM0 	= 0x54,
@@ -328,9 +331,19 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			.port_ops = &cmd648_port_ops
 		}
 	};
-	const struct ata_port_info *ppi[] = { &cmd_info[id->driver_data], NULL };
-	u8 mrdmode;
+	const struct ata_port_info *ppi[] = { 
+		&cmd_info[id->driver_data],
+		&cmd_info[id->driver_data],
+		NULL
+	};
+	u8 mrdmode, reg;
 	int rc;
+	struct pci_dev *bridge = pdev->bus->self;
+	/* mobility split bridges don't report enabled ports correctly */
+	int port_ok = !(bridge && bridge->vendor ==
+			PCI_VENDOR_ID_MOBILITY_ELECTRONICS);
+	/* all (with exceptions below) apart from 643 have CNTRL_CH0 bit */
+	int cntrl_ch0_ok = (id->driver_data != 0);
 
 	rc = pcim_enable_device(pdev);
 	if (rc)
@@ -341,11 +354,18 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	if (pdev->device == PCI_DEVICE_ID_CMD_646) {
 		/* Does UDMA work ? */
-		if (pdev->revision > 4)
+		if (pdev->revision > 4) {
 			ppi[0] = &cmd_info[2];
+			ppi[1] = &cmd_info[2];
+		}
 		/* Early rev with other problems ? */
-		else if (pdev->revision == 1)
+		else if (pdev->revision == 1) {
 			ppi[0] = &cmd_info[3];
+			ppi[1] = &cmd_info[3];
+		}
+		/* revs 1,2 have no CNTRL_CH0 */
+		if (pdev->revision < 3)
+			cntrl_ch0_ok = 0;
 	}
 
 	pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
@@ -354,6 +374,20 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mrdmode |= 0x02;	/* Memory read line enable */
 	pci_write_config_byte(pdev, MRDMODE, mrdmode);
 
+	/* check for enabled ports */
+	pci_read_config_byte(pdev, CNTRL, &reg);
+	if (!port_ok)
+		dev_printk(KERN_NOTICE, &pdev->dev, "Mobility Bridge detected, ignoring CNTRL port enable/disable\n");
+	if (port_ok && cntrl_ch0_ok && !(reg & CNTRL_CH0)) {
+		dev_printk(KERN_NOTICE, &pdev->dev, "Primary port is disabled\n");
+		ppi[0] = &ata_dummy_port_info;
+		
+	}
+	if (port_ok && !(reg & CNTRL_CH1)) {
+		dev_printk(KERN_NOTICE, &pdev->dev, "Secondary port is disabled\n");
+		ppi[1] = &ata_dummy_port_info;
+	}
+
 	/* Force PIO 0 here.. */
 
 	/* PPC specific fixup copied from old driver */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8abe8d78c4bf..8652a4fa3fe2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -608,6 +608,8 @@
 #define PCI_DEVICE_ID_MATROX_G550	0x2527
 #define PCI_DEVICE_ID_MATROX_VIA	0x4536
 
+#define PCI_VENDOR_ID_MOBILITY_ELECTRONICS	0x14f2
+
 #define PCI_VENDOR_ID_CT		0x102c
 #define PCI_DEVICE_ID_CT_69000		0x00c0
 #define PCI_DEVICE_ID_CT_65545		0x00d8
-- 
cgit v1.2.3


From e1e8fb6a1ff3f9487e03a4cbf85b81d1316068ce Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 15 Apr 2011 03:02:49 +0000
Subject: fs: remove FS_COW_FL

FS_COW_FL and FS_NOCOW_FL were newly introduced to control per file
COW in btrfs, but FS_NOCOW_FL is sufficient.

The fact is we don't have corresponding BTRFS_INODE_COW flag.

COW is default, and FS_NOCOW_FL can be used to switch off COW for
a single file.

If we mount btrfs with nodatacow, a newly created file will be set with
the FS_NOCOW_FL flag. So to turn on COW for it, we can just clear the
FS_NOCOW_FL flag.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c   | 15 ++++++---------
 include/linux/fs.h |  1 -
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f580a3a5d2fc..3240dd90da42 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -144,16 +144,13 @@ static int check_flags(unsigned int flags)
 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 		      FS_NOATIME_FL | FS_NODUMP_FL | \
 		      FS_SYNC_FL | FS_DIRSYNC_FL | \
-		      FS_NOCOMP_FL | FS_COMPR_FL | \
-		      FS_NOCOW_FL | FS_COW_FL))
+		      FS_NOCOMP_FL | FS_COMPR_FL |
+		      FS_NOCOW_FL))
 		return -EOPNOTSUPP;
 
 	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
 		return -EINVAL;
 
-	if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -218,6 +215,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 		ip->flags |= BTRFS_INODE_DIRSYNC;
 	else
 		ip->flags &= ~BTRFS_INODE_DIRSYNC;
+	if (flags & FS_NOCOW_FL)
+		ip->flags |= BTRFS_INODE_NODATACOW;
+	else
+		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	/*
 	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -231,10 +232,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 		ip->flags |= BTRFS_INODE_COMPRESS;
 		ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
 	}
-	if (flags & FS_NOCOW_FL)
-		ip->flags |= BTRFS_INODE_NODATACOW;
-	else if (flags & FS_COW_FL)
-		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	trans = btrfs_join_transaction(root, 1);
 	BUG_ON(IS_ERR(trans));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index de9dd8119b71..56a41412903d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -365,7 +365,6 @@ struct inodes_stat_t {
 #define FS_EXTENT_FL			0x00080000 /* Extents */
 #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
-#define FS_COW_FL			0x02000000 /* Cow file */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-- 
cgit v1.2.3


From 4c3b512c6b054e8c0bf14b1d61b20d4569de0a21 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 18 Apr 2011 14:48:22 +0100
Subject: ARM: 6882/1: ELF: Define new core note type for VFP registers

The VFP registers are not currently included in coredumps,
and there's no existing note type where they can sensibly be
included, so this patch defines a dedicated note type for them.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Will Deacon <Will.Deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/elf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 4d608014753a..110821cb6ea5 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -395,6 +395,7 @@ typedef struct elf64_shdr {
 #define NT_S390_CTRS	0x304		/* s390 control registers */
 #define NT_S390_PREFIX	0x305		/* s390 prefix register */
 #define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
+#define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Mon, 16 May 2011 11:32:26 -0400
Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate
 work instances"

This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has
been reported to cause boot/resume-time crashes for some users:

https://bbs.archlinux.org/viewtopic.php?id=118751.

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: <stable@kernel.org>
---
 drivers/mmc/core/host.c  | 9 +++++----
 include/linux/mmc/host.h | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 2b200c1cfbba..461e6a17fb90 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		spin_unlock_irqrestore(&host->clk_lock, flags);
 		return;
 	}
-	mmc_claim_host(host);
+	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (!host->clk_requests) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
 	}
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mmc_release_host(host);
+	mutex_unlock(&host->clk_gate_mutex);
 }
 
 /*
@@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 {
 	unsigned long flags;
 
-	mmc_claim_host(host);
+	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
@@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host)
 	}
 	host->clk_requests++;
 	spin_unlock_irqrestore(&host->clk_lock, flags);
-	mmc_release_host(host);
+	mutex_unlock(&host->clk_gate_mutex);
 }
 
 /**
@@ -215,6 +215,7 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_gated = false;
 	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
+	mutex_init(&host->clk_gate_mutex);
 }
 
 /**
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eb792cb6d745..bcb793ec7374 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -183,6 +183,7 @@ struct mmc_host {
 	struct work_struct	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
+	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3


From 8d38d74b648513dd8ed8bd2b67d899208ef4e09e Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Mon, 16 May 2011 10:58:57 -0700
Subject: pstore: fix one type of return value in pstore

the return type of function _read_ in pstore is size_t,
but in the callback function of _read_, the logic doesn't
consider it too much, which means if negative value (assuming
error here) is returned, it will be converted to positive because
of type casting. ssize_t is enough for this function.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c | 4 ++--
 fs/pstore/platform.c     | 4 ++--
 include/linux/pstore.h   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index d6cb0ff6988e..d5a89d067f98 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -929,7 +929,7 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 	return 0;
 }
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time);
 static u64 erst_writer(enum pstore_type_id type, size_t size);
 
@@ -957,7 +957,7 @@ struct cper_pstore_record {
 	char data[];
 } __packed;
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time)
 {
 	int rc;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..912403c2a93d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(void)
 {
 	struct pstore_info *psi = psinfo;
-	size_t			size;
+	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
@@ -163,7 +163,7 @@ void pstore_get_records(void)
 
 	mutex_lock(&psinfo->buf_mutex);
 	while ((size = psi->read(&id, &type, &time)) > 0) {
-		if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 				  time, psi->erase))
 			failed++;
 	}
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 41977737bb7d..14ce2f5d08af 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,7 +35,7 @@ struct pstore_info {
 	struct mutex	buf_mutex;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
-	size_t		(*read)(u64 *id, enum pstore_type_id *type,
+	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time);
 	u64		(*write)(enum pstore_type_id type, size_t size);
 	int		(*erase)(u64 id);
-- 
cgit v1.2.3


From 06cf91b4b4aafa50ee0a94c81d2c6922a18af242 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Mon, 16 May 2011 11:00:27 -0700
Subject: pstore: fix pstore filesystem mount/remount issue

Currently after mount/remount operation on pstore filesystem,
the content on pstore will be lost. It is because current ERST
implementation doesn't support multi-user usage, which moves
internal pointer to the end after accessing it. Adding
multi-user support for pstore usage.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c | 48 +++++++++++++++++++++++++++++++++++-------------
 fs/pstore/platform.c     |  8 +++++++-
 include/linux/pstore.h   |  2 ++
 3 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index d5a89d067f98..ddb68c4f8d3e 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -929,6 +929,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 	return 0;
 }
 
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time);
 static u64 erst_writer(enum pstore_type_id type, size_t size);
@@ -936,6 +938,8 @@ static u64 erst_writer(enum pstore_type_id type, size_t size);
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
 	.name		= "erst",
+	.open		= erst_open_pstore,
+	.close		= erst_close_pstore,
 	.read		= erst_reader,
 	.write		= erst_writer,
 	.erase		= erst_clear
@@ -957,12 +961,32 @@ struct cper_pstore_record {
 	char data[];
 } __packed;
 
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = erst_get_record_id_begin(&reader_pos);
+
+	return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time)
 {
 	int rc;
-	ssize_t len;
-	unsigned long flags;
+	ssize_t len = 0;
 	u64 record_id;
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 					(erst_info.buf - sizeof(*rcd));
@@ -970,24 +994,21 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 	if (erst_disable)
 		return -ENODEV;
 
-	raw_spin_lock_irqsave(&erst_lock, flags);
 skip:
-	rc = __erst_get_next_record_id(&record_id);
-	if (rc) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return rc;
-	}
+	rc = erst_get_record_id_next(&reader_pos, &record_id);
+	if (rc)
+		goto out;
+
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return 0;
+		rc = -1;
+		goto out;
 	}
 
-	len = __erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
+	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
 			  erst_erange.size);
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
-	raw_spin_unlock_irqrestore(&erst_lock, flags);
 
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
@@ -1005,7 +1026,8 @@ skip:
 		time->tv_sec = 0;
 	time->tv_nsec = 0;
 
-	return len - sizeof(*rcd);
+out:
+	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
 static u64 erst_writer(enum pstore_type_id type, size_t size)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 912403c2a93d..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -156,17 +156,23 @@ void pstore_get_records(void)
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
-	int			failed = 0;
+	int			failed = 0, rc;
 
 	if (!psi)
 		return;
 
 	mutex_lock(&psinfo->buf_mutex);
+	rc = psi->open(psi);
+	if (rc)
+		goto out;
+
 	while ((size = psi->read(&id, &type, &time)) > 0) {
 		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 				  time, psi->erase))
 			failed++;
 	}
+	psi->close(psi);
+out:
 	mutex_unlock(&psinfo->buf_mutex);
 
 	if (failed)
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 14ce2f5d08af..2455ef2683f0 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,6 +35,8 @@ struct pstore_info {
 	struct mutex	buf_mutex;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
+	int		(*open)(struct pstore_info *psi);
+	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time);
 	u64		(*write)(enum pstore_type_id type, size_t size);
-- 
cgit v1.2.3


From 7527a782e187d1214a5b3dc2897ce441033bb4ef Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 13 May 2011 10:58:57 +0200
Subject: cfg80211: advertise possible interface combinations

Add the ability to advertise interface combinations in nl80211.
This allows the driver to indicate what the combinations are
that it supports. "Combinations" of just a single interface are
implicit, as previously. Note that cfg80211 will enforce that
the restrictions are met, but not for all drivers yet (once all
drivers are updated, we can remove the flag and enforce for all).

When no combinations are actually supported, an empty list will
be exported so that userspace can know if the kernel exported
this info or not (although it isn't clear to me what tools using
the info should do if the kernel didn't export it).

Since some interface types are purely virtual/software and don't
fit the restrictions, those are exposed in a new list of pure SW
types, not subject to restrictions. This mainly exists to handle
AP-VLAN and monitor interfaces in mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 122 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/net/cfg80211.h  |  88 ++++++++++++++++++++++++++++++++++
 net/mac80211/main.c     |  16 +++++--
 net/wireless/core.c     |  69 +++++++++++++++++++++++++++
 net/wireless/core.h     |  11 +++++
 net/wireless/nl80211.c  | 105 +++++++++++++++++++++++++++++++++++------
 net/wireless/util.c     |  80 +++++++++++++++++++++++++++++++
 7 files changed, 472 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 281a2bb6a6ec..0ea497cb607c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -76,6 +76,39 @@
  * below.
  */
 
+/**
+ * DOC: Virtual interface / concurrency capabilities
+ *
+ * Some devices are able to operate with virtual MACs, they can have
+ * more than one virtual interface. The capability handling for this
+ * is a bit complex though, as there may be a number of restrictions
+ * on the types of concurrency that are supported.
+ *
+ * To start with, each device supports the interface types listed in
+ * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the
+ * types there no concurrency is implied.
+ *
+ * Once concurrency is desired, more attributes must be observed:
+ * To start with, since some interface types are purely managed in
+ * software, like the AP-VLAN type in mac80211 for example, there's
+ * an additional list of these, they can be added at any time and
+ * are only restricted by some semantic restrictions (e.g. AP-VLAN
+ * cannot be added without a corresponding AP interface). This list
+ * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute.
+ *
+ * Further, the list of supported combinations is exported. This is
+ * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically,
+ * it exports a list of "groups", and at any point in time the
+ * interfaces that are currently active must fall into any one of
+ * the advertised groups. Within each group, there are restrictions
+ * on the number of interfaces of different types that are supported
+ * and also the number of different channels, along with potentially
+ * some other restrictions. See &enum nl80211_if_combination_attrs.
+ *
+ * All together, these attributes define the concurrency of virtual
+ * interfaces that a given device supports.
+ */
+
 /**
  * enum nl80211_commands - supported nl80211 commands
  *
@@ -954,6 +987,14 @@ enum nl80211_commands {
  * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
  *	cycles, in msecs.
  *
+ * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported
+ *	interface combinations. In each nested item, it contains attributes
+ *	defined in &enum nl80211_if_combination_attrs.
+ * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like
+ *	%NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that
+ *	are managed in software: interfaces of these types aren't subject to
+ *	any restrictions in their number or combinations.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1149,6 +1190,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SCHED_SCAN_INTERVAL,
 
+	NL80211_ATTR_INTERFACE_COMBINATIONS,
+	NL80211_ATTR_SOFTWARE_IFTYPES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1201,7 +1245,9 @@ enum nl80211_attrs {
  * @NL80211_IFTYPE_ADHOC: independent BSS member
  * @NL80211_IFTYPE_STATION: managed BSS member
  * @NL80211_IFTYPE_AP: access point
- * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points
+ * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces
+ *	are a bit special in that they must always be tied to a pre-existing
+ *	AP type interface.
  * @NL80211_IFTYPE_WDS: wireless distribution interface
  * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
  * @NL80211_IFTYPE_MESH_POINT: mesh point
@@ -2206,4 +2252,78 @@ enum nl80211_wowlan_triggers {
 	MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
 };
 
+/**
+ * enum nl80211_iface_limit_attrs - limit attributes
+ * @NL80211_IFACE_LIMIT_UNSPEC: (reserved)
+ * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that
+ *	can be chosen from this set of interface types (u32)
+ * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a
+ *	flag attribute for each interface type in this set
+ * @NUM_NL80211_IFACE_LIMIT: number of attributes
+ * @MAX_NL80211_IFACE_LIMIT: highest attribute number
+ */
+enum nl80211_iface_limit_attrs {
+	NL80211_IFACE_LIMIT_UNSPEC,
+	NL80211_IFACE_LIMIT_MAX,
+	NL80211_IFACE_LIMIT_TYPES,
+
+	/* keep last */
+	NUM_NL80211_IFACE_LIMIT,
+	MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1
+};
+
+/**
+ * enum nl80211_if_combination_attrs -- interface combination attributes
+ *
+ * @NL80211_IFACE_COMB_UNSPEC: (reserved)
+ * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits
+ *	for given interface types, see &enum nl80211_iface_limit_attrs.
+ * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of
+ *	interfaces that can be created in this group. This number doesn't
+ *	apply to interfaces purely managed in software, which are listed
+ *	in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE.
+ * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that
+ *	beacon intervals within this group must be all the same even for
+ *	infrastructure and AP/GO combinations, i.e. the GO(s) must adopt
+ *	the infrastructure network's beacon interval.
+ * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many
+ *	different channels may be used within this group.
+ * @NUM_NL80211_IFACE_COMB: number of attributes
+ * @MAX_NL80211_IFACE_COMB: highest attribute number
+ *
+ * Examples:
+ *	limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2
+ *	=> allows an AP and a STA that must match BIs
+ *
+ *	numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8
+ *	=> allows 8 of AP/GO
+ *
+ *	numbers = [ #{STA} <= 2 ], channels = 2, max = 2
+ *	=> allows two STAs on different channels
+ *
+ *	numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4
+ *	=> allows a STA plus three P2P interfaces
+ *
+ * The list of these four possiblities could completely be contained
+ * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate
+ * that any of these groups must match.
+ *
+ * "Combinations" of just a single interface will not be listed here,
+ * a single interface of any valid interface type is assumed to always
+ * be possible by itself. This means that implicitly, for each valid
+ * interface type, the following group always exists:
+ *	numbers = [ #{<type>} <= 1 ], channels = 1, max = 1
+ */
+enum nl80211_if_combination_attrs {
+	NL80211_IFACE_COMB_UNSPEC,
+	NL80211_IFACE_COMB_LIMITS,
+	NL80211_IFACE_COMB_MAXNUM,
+	NL80211_IFACE_COMB_STA_AP_BI_MATCH,
+	NL80211_IFACE_COMB_NUM_CHANNELS,
+
+	/* keep last */
+	NUM_NL80211_IFACE_COMB,
+	MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e1f1b41f7b13..04afcfb9eaf4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1547,6 +1547,10 @@ struct cfg80211_ops {
  *	hints read the documenation for regulatory_hint_found_beacon()
  * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
  *	wiphy at all
+ * @WIPHY_FLAG_ENFORCE_COMBINATIONS: Set this flag to enforce interface
+ *	combinations for this device. This flag is used for backward
+ *	compatibility only until all drivers advertise combinations and
+ *	they will always be enforced.
  * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
  *	by default -- this flag will be set depending on the kernel's default
  *	on wiphy_new(), but can be changed by the driver if it has a good
@@ -1574,6 +1578,81 @@ enum wiphy_flags {
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_MESH_AUTH			= BIT(10),
 	WIPHY_FLAG_SUPPORTS_SCHED_SCAN		= BIT(11),
+	WIPHY_FLAG_ENFORCE_COMBINATIONS		= BIT(12),
+};
+
+/**
+ * struct ieee80211_iface_limit - limit on certain interface types
+ * @max: maximum number of interfaces of these types
+ * @types: interface types (bits)
+ */
+struct ieee80211_iface_limit {
+	u16 max;
+	u16 types;
+};
+
+/**
+ * struct ieee80211_iface_combination - possible interface combination
+ * @limits: limits for the given interface types
+ * @n_limits: number of limitations
+ * @num_different_channels: can use up to this many different channels
+ * @max_interfaces: maximum number of interfaces in total allowed in this
+ *	group
+ * @beacon_int_infra_match: In this combination, the beacon intervals
+ *	between infrastructure and AP types must match. This is required
+ *	only in special cases.
+ *
+ * These examples can be expressed as follows:
+ *
+ * Allow #STA <= 1, #AP <= 1, matching BI, channels = 1, 2 total:
+ *
+ *  struct ieee80211_iface_limit limits1[] = {
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_AP}, },
+ *  };
+ *  struct ieee80211_iface_combination combination1 = {
+ *	.limits = limits1,
+ *	.n_limits = ARRAY_SIZE(limits1),
+ *	.max_interfaces = 2,
+ *	.beacon_int_infra_match = true,
+ *  };
+ *
+ *
+ * Allow #{AP, P2P-GO} <= 8, channels = 1, 8 total:
+ *
+ *  struct ieee80211_iface_limit limits2[] = {
+ *	{ .max = 8, .types = BIT(NL80211_IFTYPE_AP) |
+ *			     BIT(NL80211_IFTYPE_P2P_GO), },
+ *  };
+ *  struct ieee80211_iface_combination combination2 = {
+ *	.limits = limits2,
+ *	.n_limits = ARRAY_SIZE(limits2),
+ *	.max_interfaces = 8,
+ *	.num_different_channels = 1,
+ *  };
+ *
+ *
+ * Allow #STA <= 1, #{P2P-client,P2P-GO} <= 3 on two channels, 4 total.
+ * This allows for an infrastructure connection and three P2P connections.
+ *
+ *  struct ieee80211_iface_limit limits3[] = {
+ *	{ .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *	{ .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) |
+ *			     BIT(NL80211_IFTYPE_P2P_CLIENT), },
+ *  };
+ *  struct ieee80211_iface_combination combination3 = {
+ *	.limits = limits3,
+ *	.n_limits = ARRAY_SIZE(limits3),
+ *	.max_interfaces = 4,
+ *	.num_different_channels = 2,
+ *  };
+ */
+struct ieee80211_iface_combination {
+	const struct ieee80211_iface_limit *limits;
+	u32 num_different_channels;
+	u16 max_interfaces;
+	u8 n_limits;
+	bool beacon_int_infra_match;
 };
 
 struct mac_address {
@@ -1653,6 +1732,11 @@ struct wiphy_wowlan_support {
  * @priv: driver private data (sized according to wiphy_new() parameter)
  * @interface_modes: bitmask of interfaces types valid for this wiphy,
  *	must be set by driver
+ * @iface_combinations: Valid interface combinations array, should not
+ *	list single interface types.
+ * @n_iface_combinations: number of entries in @iface_combinations array.
+ * @software_iftypes: bitmask of software interface types, these are not
+ *	subject to any restrictions since they are purely managed in SW.
  * @flags: wiphy flags, see &enum wiphy_flags
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
@@ -1697,6 +1781,10 @@ struct wiphy {
 
 	const struct ieee80211_txrx_stypes *mgmt_stypes;
 
+	const struct ieee80211_iface_combination *iface_combinations;
+	int n_iface_combinations;
+	u16 software_iftypes;
+
 	u16 n_addresses;
 
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7f89011fa22d..79a2281678bf 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -685,7 +685,7 @@ EXPORT_SYMBOL(ieee80211_alloc_hw);
 int ieee80211_register_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	int result;
+	int result, i;
 	enum ieee80211_band band;
 	int channels, max_bitrates;
 	bool supp_ht;
@@ -743,11 +743,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		return -ENOMEM;
 
 	/* if low-level driver supports AP, we also support VLAN */
-	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
-		local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
+	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) {
+		hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
+		hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
+	}
 
 	/* mac80211 always supports monitor */
-	local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
+	hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
+	hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
+
+	/* mac80211 doesn't support more than 1 channel */
+	for (i = 0; i < hw->wiphy->n_iface_combinations; i++)
+		if (hw->wiphy->iface_combinations[i].num_different_channels > 1)
+			return -EINVAL;
 
 #ifndef CONFIG_MAC80211_MESH
 	/* mesh depends on Kconfig, but drivers should set it if they want */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 18b002f16860..c22ef3492ee6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -416,6 +416,67 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 }
 EXPORT_SYMBOL(wiphy_new);
 
+static int wiphy_verify_combinations(struct wiphy *wiphy)
+{
+	const struct ieee80211_iface_combination *c;
+	int i, j;
+
+	/* If we have combinations enforce them */
+	if (wiphy->n_iface_combinations)
+		wiphy->flags |= WIPHY_FLAG_ENFORCE_COMBINATIONS;
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		u32 cnt = 0;
+		u16 all_iftypes = 0;
+
+		c = &wiphy->iface_combinations[i];
+
+		/* Combinations with just one interface aren't real */
+		if (WARN_ON(c->max_interfaces < 2))
+			return -EINVAL;
+
+		/* Need at least one channel */
+		if (WARN_ON(!c->num_different_channels))
+			return -EINVAL;
+
+		if (WARN_ON(!c->n_limits))
+			return -EINVAL;
+
+		for (j = 0; j < c->n_limits; j++) {
+			u16 types = c->limits[j].types;
+
+			/*
+			 * interface types shouldn't overlap, this is
+			 * used in cfg80211_can_change_interface()
+			 */
+			if (WARN_ON(types & all_iftypes))
+				return -EINVAL;
+			all_iftypes |= types;
+
+			if (WARN_ON(!c->limits[j].max))
+				return -EINVAL;
+
+			/* Shouldn't list software iftypes in combinations! */
+			if (WARN_ON(wiphy->software_iftypes & types))
+				return -EINVAL;
+
+			cnt += c->limits[j].max;
+			/*
+			 * Don't advertise an unsupported type
+			 * in a combination.
+			 */
+			if (WARN_ON((wiphy->interface_modes & types) != types))
+				return -EINVAL;
+		}
+
+		/* You can't even choose that many! */
+		if (WARN_ON(cnt < c->max_interfaces))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 int wiphy_register(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -444,6 +505,10 @@ int wiphy_register(struct wiphy *wiphy)
 	if (WARN_ON(ifmodes != wiphy->interface_modes))
 		wiphy->interface_modes = ifmodes;
 
+	res = wiphy_verify_combinations(wiphy);
+	if (res)
+		return res;
+
 	/* sanity check supported bands/channels */
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		sband = wiphy->bands[band];
@@ -698,6 +763,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 	struct net_device *dev = ndev;
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev;
+	int ret;
 
 	if (!wdev)
 		return NOTIFY_DONE;
@@ -893,6 +959,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			return notifier_from_errno(-EOPNOTSUPP);
 		if (rfkill_blocked(rdev->rfkill))
 			return notifier_from_errno(-ERFKILL);
+		ret = cfg80211_can_add_interface(rdev, wdev->iftype);
+		if (ret)
+			return notifier_from_errno(ret);
 		break;
 	}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d4b8f4c0bbbb..bf0fb40e3c8b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -422,6 +422,17 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 
+int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+				  struct wireless_dev *wdev,
+				  enum nl80211_iftype iftype);
+
+static inline int
+cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
+			   enum nl80211_iftype iftype)
+{
+	return cfg80211_can_change_interface(rdev, NULL, iftype);
+}
+
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9ef8e287d61b..beac296b1fde 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -564,6 +564,88 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
 	return 0;
 }
 
+static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes)
+{
+	struct nlattr *nl_modes = nla_nest_start(msg, attr);
+	int i;
+
+	if (!nl_modes)
+		goto nla_put_failure;
+
+	i = 0;
+	while (ifmodes) {
+		if (ifmodes & 1)
+			NLA_PUT_FLAG(msg, i);
+		ifmodes >>= 1;
+		i++;
+	}
+
+	nla_nest_end(msg, nl_modes);
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+static int nl80211_put_iface_combinations(struct wiphy *wiphy,
+					  struct sk_buff *msg)
+{
+	struct nlattr *nl_combis;
+	int i, j;
+
+	nl_combis = nla_nest_start(msg,
+				NL80211_ATTR_INTERFACE_COMBINATIONS);
+	if (!nl_combis)
+		goto nla_put_failure;
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct nlattr *nl_combi, *nl_limits;
+
+		c = &wiphy->iface_combinations[i];
+
+		nl_combi = nla_nest_start(msg, i + 1);
+		if (!nl_combi)
+			goto nla_put_failure;
+
+		nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS);
+		if (!nl_limits)
+			goto nla_put_failure;
+
+		for (j = 0; j < c->n_limits; j++) {
+			struct nlattr *nl_limit;
+
+			nl_limit = nla_nest_start(msg, j + 1);
+			if (!nl_limit)
+				goto nla_put_failure;
+			NLA_PUT_U32(msg, NL80211_IFACE_LIMIT_MAX,
+				    c->limits[j].max);
+			if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES,
+						c->limits[j].types))
+				goto nla_put_failure;
+			nla_nest_end(msg, nl_limit);
+		}
+
+		nla_nest_end(msg, nl_limits);
+
+		if (c->beacon_int_infra_match)
+			NLA_PUT_FLAG(msg,
+				NL80211_IFACE_COMB_STA_AP_BI_MATCH);
+		NLA_PUT_U32(msg, NL80211_IFACE_COMB_NUM_CHANNELS,
+			    c->num_different_channels);
+		NLA_PUT_U32(msg, NL80211_IFACE_COMB_MAXNUM,
+			    c->max_interfaces);
+
+		nla_nest_end(msg, nl_combi);
+	}
+
+	nla_nest_end(msg, nl_combis);
+
+	return 0;
+nla_put_failure:
+	return -ENOBUFS;
+}
+
 static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			      struct cfg80211_registered_device *dev)
 {
@@ -571,13 +653,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	struct nlattr *nl_bands, *nl_band;
 	struct nlattr *nl_freqs, *nl_freq;
 	struct nlattr *nl_rates, *nl_rate;
-	struct nlattr *nl_modes;
 	struct nlattr *nl_cmds;
 	enum ieee80211_band band;
 	struct ieee80211_channel *chan;
 	struct ieee80211_rate *rate;
 	int i;
-	u16 ifmodes = dev->wiphy.interface_modes;
 	const struct ieee80211_txrx_stypes *mgmt_stypes =
 				dev->wiphy.mgmt_stypes;
 
@@ -637,20 +717,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		}
 	}
 
-	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
-	if (!nl_modes)
+	if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
+				dev->wiphy.interface_modes))
 		goto nla_put_failure;
 
-	i = 0;
-	while (ifmodes) {
-		if (ifmodes & 1)
-			NLA_PUT_FLAG(msg, i);
-		ifmodes >>= 1;
-		i++;
-	}
-
-	nla_nest_end(msg, nl_modes);
-
 	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
 	if (!nl_bands)
 		goto nla_put_failure;
@@ -865,6 +935,13 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		nla_nest_end(msg, nl_wowlan);
 	}
 
+	if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
+				dev->wiphy.software_iftypes))
+		goto nla_put_failure;
+
+	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
+		goto nla_put_failure;
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 414c9f604df6..95e4e254da0a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -803,6 +803,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		return -EBUSY;
 
 	if (ntype != otype) {
+		err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
+						    ntype);
+		if (err)
+			return err;
+
 		dev->ieee80211_ptr->use_4addr = false;
 		dev->ieee80211_ptr->mesh_id_up_len = 0;
 
@@ -921,3 +926,78 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 
 	return res;
 }
+
+int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+				  struct wireless_dev *wdev,
+				  enum nl80211_iftype iftype)
+{
+	struct wireless_dev *wdev_iter;
+	int num[NUM_NL80211_IFTYPES];
+	int total = 1;
+	int i, j;
+
+	ASSERT_RTNL();
+
+	/* Always allow software iftypes */
+	if (rdev->wiphy.software_iftypes & BIT(iftype))
+		return 0;
+
+	/*
+	 * Drivers will gradually all set this flag, until all
+	 * have it we only enforce for those that set it.
+	 */
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_ENFORCE_COMBINATIONS))
+		return 0;
+
+	memset(num, 0, sizeof(num));
+
+	num[iftype] = 1;
+
+	mutex_lock(&rdev->devlist_mtx);
+	list_for_each_entry(wdev_iter, &rdev->netdev_list, list) {
+		if (wdev_iter == wdev)
+			continue;
+		if (!netif_running(wdev_iter->netdev))
+			continue;
+
+		if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
+			continue;
+
+		num[wdev_iter->iftype]++;
+		total++;
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+
+	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct ieee80211_iface_limit *limits;
+
+		c = &rdev->wiphy.iface_combinations[i];
+
+		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
+				 GFP_KERNEL);
+		if (!limits)
+			return -ENOMEM;
+		if (total > c->max_interfaces)
+			goto cont;
+
+		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+			if (rdev->wiphy.software_iftypes & BIT(iftype))
+				continue;
+			for (j = 0; j < c->n_limits; j++) {
+				if (!(limits[j].types & iftype))
+					continue;
+				if (limits[j].max < num[iftype])
+					goto cont;
+				limits[j].max -= num[iftype];
+			}
+		}
+		/* yay, it fits */
+		kfree(limits);
+		return 0;
+ cont:
+		kfree(limits);
+	}
+
+	return -EBUSY;
+}
-- 
cgit v1.2.3


From 57cf8043a64b56a10b9f194572548a3dfb62e596 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Fri, 13 May 2011 10:45:43 -0700
Subject: nl80211: Move peer link state definition to nl80211

These definitions need to be exposed now that we can set the peer link
states via NL80211_ATTR_STA_PLINK_STATE.  They were already being
(opaquely) reported by NL80211_STA_INFO_PLINK_STATE.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h   | 41 +++++++++++++++++++++++++--
 include/net/cfg80211.h    | 27 ------------------
 net/mac80211/cfg.c        |  6 ++--
 net/mac80211/mesh_plink.c | 72 +++++++++++++++++++++++------------------------
 net/mac80211/rx.c         |  2 +-
 net/mac80211/sta_info.c   |  2 +-
 net/mac80211/sta_info.h   |  6 ++--
 net/wireless/nl80211.c    |  2 +-
 8 files changed, 83 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0ea497cb607c..fc3a94821b1e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -973,9 +973,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
- * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link. Used when
- *      userspace is driving the peer link management state machine.
- *      @NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as
+ *	defined in &enum nl80211_plink_state. Used when userspace is
+ *	driving the peer link management state machine.
+ *	@NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
  *
  * @NL80211_ATTR_WOWLAN_SUPPORTED: indicates, as part of the wiphy capabilities,
  *	the supported WoWLAN triggers
@@ -1396,6 +1397,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_LLID: the station's mesh LLID
  * @NL80211_STA_INFO_PLID: the station's mesh PLID
  * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
+ *	(see %enum nl80211_plink_state)
  * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
  *	attribute, like NL80211_STA_INFO_TX_BITRATE.
  * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
@@ -2326,4 +2328,37 @@ enum nl80211_if_combination_attrs {
 	MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
 };
 
+
+/**
+ * enum nl80211_plink_state - state of a mesh peer link finite state machine
+ *
+ * @NL80211_PLINK_LISTEN: initial state, considered the implicit
+ *	state of non existant mesh peer links
+ * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to
+ *	this mesh peer
+ * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received
+ *	from this mesh peer
+ * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been
+ *	received from this mesh peer
+ * @NL80211_PLINK_ESTAB: mesh peer link is established
+ * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh
+ *	plink are discarded
+ * @NUM_NL80211_PLINK_STATES: number of peer link states
+ * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states
+ */
+enum nl80211_plink_state {
+	NL80211_PLINK_LISTEN,
+	NL80211_PLINK_OPN_SNT,
+	NL80211_PLINK_OPN_RCVD,
+	NL80211_PLINK_CNF_RCVD,
+	NL80211_PLINK_ESTAB,
+	NL80211_PLINK_HOLDING,
+	NL80211_PLINK_BLOCKED,
+
+	/* keep last */
+	NUM_NL80211_PLINK_STATES,
+	MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 04afcfb9eaf4..0a2d795d35de 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,33 +371,6 @@ enum plink_actions {
 	PLINK_ACTION_BLOCK,
 };
 
-/**
- * enum plink_states - state of a mesh peer link finite state machine
- *
- * @PLINK_LISTEN: initial state, considered the implicit state of non
- * existant mesh peer links
- * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh
- * peer @PLINK_OPN_RCVD: mesh plink open frame has been received from
- * this mesh peer
- * @PLINK_CNF_RCVD: mesh plink confirm frame has been received from
- * this mesh peer
- * @PLINK_ESTAB: mesh peer link is established
- * @PLINK_HOLDING: mesh peer link is being closed or cancelled
- * @PLINK_BLOCKED: all frames transmitted from this mesh plink are
- * discarded
- * @PLINK_INVALID: reserved
- */
-enum plink_state {
-	PLINK_LISTEN,
-	PLINK_OPN_SNT,
-	PLINK_OPN_RCVD,
-	PLINK_CNF_RCVD,
-	PLINK_ESTAB,
-	PLINK_HOLDING,
-	PLINK_BLOCKED,
-	PLINK_INVALID,
-};
-
 /**
  * struct station_parameters - station parameters
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6ecd5862735d..be70c70d3f5b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -730,9 +730,9 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 #ifdef CONFIG_MAC80211_MESH
 		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
 			switch (params->plink_state) {
-			case PLINK_LISTEN:
-			case PLINK_ESTAB:
-			case PLINK_BLOCKED:
+			case NL80211_PLINK_LISTEN:
+			case NL80211_PLINK_ESTAB:
+			case NL80211_PLINK_BLOCKED:
 				sta->plink_state = params->plink_state;
 				break;
 			default:
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 2c37bee3095a..f4adc0917888 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -83,7 +83,7 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
  */
 static inline void mesh_plink_fsm_restart(struct sta_info *sta)
 {
-	sta->plink_state = PLINK_LISTEN;
+	sta->plink_state = NL80211_PLINK_LISTEN;
 	sta->llid = sta->plid = sta->reason = 0;
 	sta->plink_retries = 0;
 }
@@ -126,11 +126,11 @@ static bool __mesh_plink_deactivate(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	bool deactivated = false;
 
-	if (sta->plink_state == PLINK_ESTAB) {
+	if (sta->plink_state == NL80211_PLINK_ESTAB) {
 		mesh_plink_dec_estab_count(sdata);
 		deactivated = true;
 	}
-	sta->plink_state = PLINK_BLOCKED;
+	sta->plink_state = NL80211_PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
 
 	return deactivated;
@@ -268,7 +268,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates,
 	sta->last_rx = jiffies;
 	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
 	if (mesh_peer_accepts_plinks(elems) &&
-			sta->plink_state == PLINK_LISTEN &&
+			sta->plink_state == NL80211_PLINK_LISTEN &&
 			sdata->u.mesh.accepting_plinks &&
 			sdata->u.mesh.mshcfg.auto_open_plinks)
 		mesh_plink_open(sta);
@@ -308,8 +308,8 @@ static void mesh_plink_timer(unsigned long data)
 	sdata = sta->sdata;
 
 	switch (sta->plink_state) {
-	case PLINK_OPN_RCVD:
-	case PLINK_OPN_SNT:
+	case NL80211_PLINK_OPN_RCVD:
+	case NL80211_PLINK_OPN_SNT:
 		/* retry timer */
 		if (sta->plink_retries < dot11MeshMaxRetries(sdata)) {
 			u32 rand;
@@ -328,17 +328,17 @@ static void mesh_plink_timer(unsigned long data)
 		}
 		reason = cpu_to_le16(MESH_MAX_RETRIES);
 		/* fall through on else */
-	case PLINK_CNF_RCVD:
+	case NL80211_PLINK_CNF_RCVD:
 		/* confirm timer */
 		if (!reason)
 			reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT);
-		sta->plink_state = PLINK_HOLDING;
+		sta->plink_state = NL80211_PLINK_HOLDING;
 		mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 		spin_unlock_bh(&sta->lock);
 		mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid,
 				    reason);
 		break;
-	case PLINK_HOLDING:
+	case NL80211_PLINK_HOLDING:
 		/* holding timer */
 		del_timer(&sta->plink_timer);
 		mesh_plink_fsm_restart(sta);
@@ -386,11 +386,11 @@ int mesh_plink_open(struct sta_info *sta)
 	spin_lock_bh(&sta->lock);
 	get_random_bytes(&llid, 2);
 	sta->llid = llid;
-	if (sta->plink_state != PLINK_LISTEN) {
+	if (sta->plink_state != NL80211_PLINK_LISTEN) {
 		spin_unlock_bh(&sta->lock);
 		return -EBUSY;
 	}
-	sta->plink_state = PLINK_OPN_SNT;
+	sta->plink_state = NL80211_PLINK_OPN_SNT;
 	mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata));
 	spin_unlock_bh(&sta->lock);
 	mpl_dbg("Mesh plink: starting establishment with %pM\n",
@@ -407,7 +407,7 @@ void mesh_plink_block(struct sta_info *sta)
 
 	spin_lock_bh(&sta->lock);
 	deactivated = __mesh_plink_deactivate(sta);
-	sta->plink_state = PLINK_BLOCKED;
+	sta->plink_state = NL80211_PLINK_BLOCKED;
 	spin_unlock_bh(&sta->lock);
 
 	if (deactivated)
@@ -430,13 +430,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	__le16 plid, llid, reason;
 #ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG
 	static const char *mplstates[] = {
-		[PLINK_LISTEN] = "LISTEN",
-		[PLINK_OPN_SNT] = "OPN-SNT",
-		[PLINK_OPN_RCVD] = "OPN-RCVD",
-		[PLINK_CNF_RCVD] = "CNF_RCVD",
-		[PLINK_ESTAB] = "ESTAB",
-		[PLINK_HOLDING] = "HOLDING",
-		[PLINK_BLOCKED] = "BLOCKED"
+		[NL80211_PLINK_LISTEN] = "LISTEN",
+		[NL80211_PLINK_OPN_SNT] = "OPN-SNT",
+		[NL80211_PLINK_OPN_RCVD] = "OPN-RCVD",
+		[NL80211_PLINK_CNF_RCVD] = "CNF_RCVD",
+		[NL80211_PLINK_ESTAB] = "ESTAB",
+		[NL80211_PLINK_HOLDING] = "HOLDING",
+		[NL80211_PLINK_BLOCKED] = "BLOCKED"
 	};
 #endif
 
@@ -502,7 +502,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		return;
 	}
 
-	if (sta && sta->plink_state == PLINK_BLOCKED) {
+	if (sta && sta->plink_state == NL80211_PLINK_BLOCKED) {
 		rcu_read_unlock();
 		return;
 	}
@@ -572,7 +572,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 				event = CNF_ACPT;
 			break;
 		case PLINK_CLOSE:
-			if (sta->plink_state == PLINK_ESTAB)
+			if (sta->plink_state == NL80211_PLINK_ESTAB)
 				/* Do not check for llid or plid. This does not
 				 * follow the standard but since multiple plinks
 				 * per sta are not supported, it is necessary in
@@ -607,14 +607,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 	reason = 0;
 	switch (sta->plink_state) {
 		/* spin_unlock as soon as state is updated at each case */
-	case PLINK_LISTEN:
+	case NL80211_PLINK_LISTEN:
 		switch (event) {
 		case CLS_ACPT:
 			mesh_plink_fsm_restart(sta);
 			spin_unlock_bh(&sta->lock);
 			break;
 		case OPN_ACPT:
-			sta->plink_state = PLINK_OPN_RCVD;
+			sta->plink_state = NL80211_PLINK_OPN_RCVD;
 			sta->plid = plid;
 			get_random_bytes(&llid, 2);
 			sta->llid = llid;
@@ -631,7 +631,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_OPN_SNT:
+	case NL80211_PLINK_OPN_SNT:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -640,7 +640,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -652,7 +652,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case OPN_ACPT:
 			/* retry timer is left untouched */
-			sta->plink_state = PLINK_OPN_RCVD;
+			sta->plink_state = NL80211_PLINK_OPN_RCVD;
 			sta->plid = plid;
 			llid = sta->llid;
 			spin_unlock_bh(&sta->lock);
@@ -660,7 +660,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 					    plid, 0);
 			break;
 		case CNF_ACPT:
-			sta->plink_state = PLINK_CNF_RCVD;
+			sta->plink_state = NL80211_PLINK_CNF_RCVD;
 			if (!mod_plink_timer(sta,
 					     dot11MeshConfirmTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -673,7 +673,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_OPN_RCVD:
+	case NL80211_PLINK_OPN_RCVD:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -682,7 +682,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -700,7 +700,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case CNF_ACPT:
 			del_timer(&sta->plink_timer);
-			sta->plink_state = PLINK_ESTAB;
+			sta->plink_state = NL80211_PLINK_ESTAB;
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_inc_estab_count(sdata);
 			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -713,7 +713,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_CNF_RCVD:
+	case NL80211_PLINK_CNF_RCVD:
 		switch (event) {
 		case OPN_RJCT:
 		case CNF_RJCT:
@@ -722,7 +722,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			if (!reason)
 				reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			if (!mod_plink_timer(sta,
 					     dot11MeshHoldingTimeout(sdata)))
 				sta->ignore_plink_timer = true;
@@ -734,7 +734,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		case OPN_ACPT:
 			del_timer(&sta->plink_timer);
-			sta->plink_state = PLINK_ESTAB;
+			sta->plink_state = NL80211_PLINK_ESTAB;
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_inc_estab_count(sdata);
 			ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
@@ -749,13 +749,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		}
 		break;
 
-	case PLINK_ESTAB:
+	case NL80211_PLINK_ESTAB:
 		switch (event) {
 		case CLS_ACPT:
 			reason = cpu_to_le16(MESH_CLOSE_RCVD);
 			sta->reason = reason;
 			deactivated = __mesh_plink_deactivate(sta);
-			sta->plink_state = PLINK_HOLDING;
+			sta->plink_state = NL80211_PLINK_HOLDING;
 			llid = sta->llid;
 			mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
 			spin_unlock_bh(&sta->lock);
@@ -775,7 +775,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			break;
 		}
 		break;
-	case PLINK_HOLDING:
+	case NL80211_PLINK_HOLDING:
 		switch (event) {
 		case CLS_ACPT:
 			if (del_timer(&sta->plink_timer))
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1b9413fb3839..3a9515cb7ce1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -490,7 +490,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 	 * establisment frame, beacon or probe, drop the frame.
 	 */
 
-	if (!rx->sta || sta_plink_state(rx->sta) != PLINK_ESTAB) {
+	if (!rx->sta || sta_plink_state(rx->sta) != NL80211_PLINK_ESTAB) {
 		struct ieee80211_mgmt *mgmt;
 
 		if (!ieee80211_is_mgmt(hdr->frame_control))
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 82ab6b4643fc..4a15f9603562 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -277,7 +277,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
 
 #ifdef CONFIG_MAC80211_MESH
-	sta->plink_state = PLINK_LISTEN;
+	sta->plink_state = NL80211_PLINK_LISTEN;
 	init_timer(&sta->plink_timer);
 #endif
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d6b566076f05..c6ae8718bd57 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -318,7 +318,7 @@ struct sta_info {
 	u8 plink_retries;
 	bool ignore_plink_timer;
 	bool plink_timer_was_running;
-	enum plink_state plink_state;
+	enum nl80211_plink_state plink_state;
 	u32 plink_timeout;
 	struct timer_list plink_timer;
 #endif
@@ -336,12 +336,12 @@ struct sta_info {
 	struct ieee80211_sta sta;
 };
 
-static inline enum plink_state sta_plink_state(struct sta_info *sta)
+static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
 {
 #ifdef CONFIG_MAC80211_MESH
 	return sta->plink_state;
 #endif
-	return PLINK_LISTEN;
+	return NL80211_PLINK_LISTEN;
 }
 
 static inline void set_sta_flags(struct sta_info *sta, const u32 flags)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index beac296b1fde..2222ce08ee91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2335,7 +2335,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
-	params.plink_state = PLINK_INVALID;
+	params.plink_state = -1;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
-- 
cgit v1.2.3


From 2064af917b3ba7589070064ca4ed12cecd99a63c Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Fri, 29 Apr 2011 00:37:26 +0200
Subject: PM: Revert "driver core: platform_bus: allow runtime override of
 dev_pm_ops"

The platform_bus_set_pm_ops() operation is deprecated in favor of the
new device power domain infrastructre implemented in commit
7538e3db6e015e890825fbd9f8659952896ddd5b (PM: add support for device
power domains)

Signed-off-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 35 -----------------------------------
 include/linux/platform_device.h |  3 ---
 2 files changed, 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 079c18a5e471..48425f183029 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -916,41 +916,6 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
-/**
- * platform_bus_get_pm_ops() - return pointer to busses dev_pm_ops
- *
- * This function can be used by platform code to get the current
- * set of dev_pm_ops functions used by the platform_bus_type.
- */
-const struct dev_pm_ops * __init platform_bus_get_pm_ops(void)
-{
-	return platform_bus_type.pm;
-}
-
-/**
- * platform_bus_set_pm_ops() - update dev_pm_ops for the platform_bus_type
- *
- * @pm: pointer to new dev_pm_ops struct to be used for platform_bus_type
- *
- * Platform code can override the dev_pm_ops methods of
- * platform_bus_type by using this function.  It is expected that
- * platform code will first do a platform_bus_get_pm_ops(), then
- * kmemdup it, then customize selected methods and pass a pointer to
- * the new struct dev_pm_ops to this function.
- *
- * Since platform-specific code is customizing methods for *all*
- * devices (not just platform-specific devices) it is expected that
- * any custom overrides of these functions will keep existing behavior
- * and simply extend it.  For example, any customization of the
- * runtime PM methods should continue to call the pm_generic_*
- * functions as the default ones do in addition to the
- * platform-specific behavior.
- */
-void __init platform_bus_set_pm_ops(const struct dev_pm_ops *pm)
-{
-	platform_bus_type.pm = pm;
-}
-
 int __init platform_bus_init(void)
 {
 	int error;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index e0093e061b08..ede1a80e3358 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -150,9 +150,6 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr
 					struct resource *res, unsigned int n_res,
 					const void *data, size_t size);
 
-extern const struct dev_pm_ops * platform_bus_get_pm_ops(void);
-extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm);
-
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3


From f0201738b61b1adcf6b2c4719c5c415745014c1c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 12 Apr 2011 19:06:39 -0400
Subject: ftrace: Avoid recording mcount on .init sections directly

The init and exit sections should not be traced and adding a call to
mcount to them is a waste of text and instruction cache. Have the
macro section attributes include notrace to ignore these functions
for tracing from the build.

Link: http://lkml.kernel.org/r/20110421023738.953028219@goodmis.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/init.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 577671c55153..9146f39cdddf 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -79,29 +79,29 @@
 #define __exitused  __used
 #endif
 
-#define __exit          __section(.exit.text) __exitused __cold
+#define __exit          __section(.exit.text) __exitused __cold notrace
 
 /* Used for HOTPLUG */
-#define __devinit        __section(.devinit.text) __cold
+#define __devinit        __section(.devinit.text) __cold notrace
 #define __devinitdata    __section(.devinit.data)
 #define __devinitconst   __section(.devinit.rodata)
-#define __devexit        __section(.devexit.text) __exitused __cold
+#define __devexit        __section(.devexit.text) __exitused __cold notrace
 #define __devexitdata    __section(.devexit.data)
 #define __devexitconst   __section(.devexit.rodata)
 
 /* Used for HOTPLUG_CPU */
-#define __cpuinit        __section(.cpuinit.text) __cold
+#define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
 #define __cpuinitconst   __section(.cpuinit.rodata)
-#define __cpuexit        __section(.cpuexit.text) __exitused __cold
+#define __cpuexit        __section(.cpuexit.text) __exitused __cold notrace
 #define __cpuexitdata    __section(.cpuexit.data)
 #define __cpuexitconst   __section(.cpuexit.rodata)
 
 /* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(.meminit.text) __cold
+#define __meminit        __section(.meminit.text) __cold notrace
 #define __meminitdata    __section(.meminit.data)
 #define __meminitconst   __section(.meminit.rodata)
-#define __memexit        __section(.memexit.text) __exitused __cold
+#define __memexit        __section(.memexit.text) __exitused __cold notrace
 #define __memexitdata    __section(.memexit.data)
 #define __memexitconst   __section(.memexit.rodata)
 
-- 
cgit v1.2.3


From a144c6a6c924aa1da04dd77fb84b89927354fdff Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 6 May 2011 20:09:42 +0200
Subject: PM: Print a warning if firmware is requested when tasks are frozen

Some drivers erroneously use request_firmware() from their ->resume()
(or ->thaw(), or ->restore()) callbacks, which is not going to work
unless the firmware has been built in.  This causes system resume to
stall until the firmware-loading timeout expires, which makes users
think that the resume has failed and reboot their machines
unnecessarily.  For this reason, make _request_firmware() print a
warning and return immediately with error code if it has been called
when tasks are frozen and it's impossible to start any new usermode
helpers.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Reviewed-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
---
 drivers/base/firmware_class.c | 5 +++++
 include/linux/kmod.h          | 5 +++++
 kernel/kmod.c                 | 9 +++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8c798ef7f13f..bbb03e6f7255 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -521,6 +521,11 @@ static int _request_firmware(const struct firmware **firmware_p,
 	if (!firmware_p)
 		return -EINVAL;
 
+	if (WARN_ON(usermodehelper_is_disabled())) {
+		dev_err(device, "firmware: %s will not be loaded\n", name);
+		return -EBUSY;
+	}
+
 	*firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
 	if (!firmware) {
 		dev_err(device, "%s: kmalloc(struct firmware) failed\n",
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 6efd7a78de6a..7f3dbcb78116 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -111,7 +111,12 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 
 extern void usermodehelper_init(void);
 
+#ifdef CONFIG_PM_SLEEP
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
+extern bool usermodehelper_is_disabled(void);
+#else
+static inline bool usermodehelper_is_disabled(void) { return false; }
+#endif
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9cd0591c96a2..9ab513bd0c3c 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -301,6 +301,15 @@ void usermodehelper_enable(void)
 	usermodehelper_disabled = 0;
 }
 
+/**
+ * usermodehelper_is_disabled - check if new helpers are allowed to be started
+ */
+bool usermodehelper_is_disabled(void)
+{
+	return usermodehelper_disabled;
+}
+EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
+
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
-- 
cgit v1.2.3


From 13d53f8775c6a00b070a3eef6833795412eb7fcd Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 10 May 2011 21:27:34 +0200
Subject: kmod: always provide usermodehelper_disable()

We need to prevent kernel-forked processes during system poweroff.
Such processes try to access the filesystem whose disks we are
trying to shutdown at the same time. This causes delays and exceptions
in the storage drivers.

A follow-up patch will add these calls and need usermodehelper_disable()
also on systems without suspend support.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/kmod.h | 4 ----
 kernel/kmod.c        | 7 -------
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 7f3dbcb78116..310231823852 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -111,12 +111,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 
 extern void usermodehelper_init(void);
 
-#ifdef CONFIG_PM_SLEEP
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
 extern bool usermodehelper_is_disabled(void);
-#else
-static inline bool usermodehelper_is_disabled(void) { return false; }
-#endif
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9ab513bd0c3c..5ae0ff38425f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -245,7 +245,6 @@ static void __call_usermodehelper(struct work_struct *work)
 	}
 }
 
-#ifdef CONFIG_PM_SLEEP
 /*
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
@@ -321,12 +320,6 @@ static void helper_unlock(void)
 	if (atomic_dec_and_test(&running_helpers))
 		wake_up(&running_helpers_waitq);
 }
-#else /* CONFIG_PM_SLEEP */
-#define usermodehelper_disabled	0
-
-static inline void helper_lock(void) {}
-static inline void helper_unlock(void) {}
-#endif /* CONFIG_PM_SLEEP */
 
 /**
  * call_usermodehelper_setup - prepare to call a usermode helper
-- 
cgit v1.2.3


From 91e7c75ba93c48a82670d630b9daac92ff70095d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 May 2011 23:26:00 +0200
Subject: PM: Allow drivers to allocate memory from .prepare() callbacks safely

If device drivers allocate substantial amounts of memory (above 1 MB)
in their hibernate .freeze() callbacks (or in their legacy suspend
callbcks during hibernation), the subsequent creation of hibernate
image may fail due to the lack of memory.  This is the case, because
the drivers' .freeze() callbacks are executed after the hibernate
memory preallocation has been carried out and the preallocated amount
of memory may be too small to cover the new driver allocations.
Unfortunately, the drivers' .prepare() callbacks also are executed
after the hibernate memory preallocation has completed, so they are
not suitable for allocating additional memory either.  Thus the only
way a driver can safely allocate memory during hibernation is to use
a hibernate/suspend notifier.  However, the notifiers are called
before the freezing of user space and the drivers wanting to use them
for allocating additional memory may not know how much memory needs
to be allocated at that point.

To let device drivers overcome this difficulty rework the hibernation
sequence so that the memory preallocation is carried out after the
drivers' .prepare() callbacks have been executed, so that the
.prepare() callbacks can be used for allocating additional memory
to be used by the drivers' .freeze() callbacks.  Update documentation
to match the new behavior of the code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/devices.txt   | 14 +++++++----
 Documentation/power/notifiers.txt | 51 ++++++++++++++++++---------------------
 drivers/base/power/main.c         | 18 +++++++++-----
 include/linux/pm.h                |  4 +++
 kernel/power/hibernate.c          | 17 ++++++++++---
 5 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 1971bcf48a60..88880839ece4 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -279,11 +279,15 @@ When the system goes into the standby or memory sleep state, the phases are:
 	time.)  Unlike the other suspend-related phases, during the prepare
 	phase the device tree is traversed top-down.
 
-	The prepare phase uses only a bus callback.  After the callback method
-	returns, no new children may be registered below the device.  The method
-	may also prepare the device or driver in some way for the upcoming
-	system power transition, but it should not put the device into a
-	low-power state.
+	In addition to that, if device drivers need to allocate additional
+	memory to be able to hadle device suspend correctly, that should be
+	done in the prepare phase.
+
+	After the prepare callback method returns, no new children may be
+	registered below the device.  The method may also prepare the device or
+	driver in some way for the upcoming system power transition (for
+	example, by allocating additional memory required for this purpose), but
+	it should not put the device into a low-power state.
 
     2.	The suspend methods should quiesce the device to stop it from performing
 	I/O.  They also may save the device registers and put it into the
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt
index cf980709122a..c2a4a346c0d9 100644
--- a/Documentation/power/notifiers.txt
+++ b/Documentation/power/notifiers.txt
@@ -1,46 +1,41 @@
 Suspend notifiers
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-There are some operations that device drivers may want to carry out in their
-.suspend() routines, but shouldn't, because they can cause the hibernation or
-suspend to fail. For example, a driver may want to allocate a substantial amount
-of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
-swsusp's memory shrinker has run.
-
-Also, there may be some operations, that subsystems want to carry out before a
-hibernation/suspend or after a restore/resume, requiring the system to be fully
-functional, so the drivers' .suspend() and .resume() routines are not suitable
-for this purpose.  For example, device drivers may want to upload firmware to
-their devices after a restore from a hibernation image, but they cannot do it by
-calling request_firmware() from their .resume() routines (user land processes
-are frozen at this point).  The solution may be to load the firmware into
-memory before processes are frozen and upload it from there in the .resume()
-routine.  Of course, a hibernation notifier may be used for this purpose.
-
-The subsystems that have such needs can register suspend notifiers that will be
-called upon the following events by the suspend core:
+	(C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+There are some operations that subsystems or drivers may want to carry out
+before hibernation/suspend or after restore/resume, but they require the system
+to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
+or even .prepare() and .complete() callbacks are not suitable for this purpose.
+For example, device drivers may want to upload firmware to their devices after
+resume/restore, but they cannot do it by calling request_firmware() from their
+.resume() or .complete() routines (user land processes are frozen at these
+points).  The solution may be to load the firmware into memory before processes
+are frozen and upload it from there in the .resume() routine.
+A suspend/hibernation notifier may be used for this purpose.
+
+The subsystems or drivers having such needs can register suspend notifiers that
+will be called upon the following events by the PM core:
 
 PM_HIBERNATION_PREPARE	The system is going to hibernate or suspend, tasks will
 			be frozen immediately.
 
 PM_POST_HIBERNATION	The system memory state has been restored from a
-			hibernation image or an error occurred during the
-			hibernation.  Device drivers' .resume() callbacks have
+			hibernation image or an error occurred during
+			hibernation.  Device drivers' restore callbacks have
 			been executed and tasks have been thawed.
 
 PM_RESTORE_PREPARE	The system is going to restore a hibernation image.
-			If all goes well the restored kernel will issue a
+			If all goes well, the restored kernel will issue a
 			PM_POST_HIBERNATION notification.
 
-PM_POST_RESTORE		An error occurred during the hibernation restore.
-			Device drivers' .resume() callbacks have been executed
+PM_POST_RESTORE		An error occurred during restore from hibernation.
+			Device drivers' restore callbacks have been executed
 			and tasks have been thawed.
 
-PM_SUSPEND_PREPARE	The system is preparing for a suspend.
+PM_SUSPEND_PREPARE	The system is preparing for suspend.
 
 PM_POST_SUSPEND		The system has just resumed or an error occurred during
-			the suspend.	Device drivers' .resume() callbacks have
-			been executed and tasks have been thawed.
+			suspend.  Device drivers' resume callbacks have been
+			executed and tasks have been thawed.
 
 It is generally assumed that whatever the notifiers do for
 PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION.  Analogously,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3b354560f306..aa6320207745 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -579,11 +579,13 @@ static bool is_async(struct device *dev)
  * Execute the appropriate "resume" callback for all devices whose status
  * indicates that they are suspended.
  */
-static void dpm_resume(pm_message_t state)
+void dpm_resume(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -656,10 +658,12 @@ static void device_complete(struct device *dev, pm_message_t state)
  * Execute the ->complete() callbacks for all devices whose PM status is not
  * DPM_ON (this allows new devices to be registered).
  */
-static void dpm_complete(pm_message_t state)
+void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	might_sleep();
+
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_prepared_list)) {
@@ -688,7 +692,6 @@ static void dpm_complete(pm_message_t state)
  */
 void dpm_resume_end(pm_message_t state)
 {
-	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
@@ -912,11 +915,13 @@ static int device_suspend(struct device *dev)
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend(pm_message_t state)
+int dpm_suspend(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -1003,10 +1008,12 @@ static int device_prepare(struct device *dev, pm_message_t state)
  *
  * Execute the ->prepare() callback(s) for all devices.
  */
-static int dpm_prepare(pm_message_t state)
+int dpm_prepare(pm_message_t state)
 {
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
@@ -1055,7 +1062,6 @@ int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
-	might_sleep();
 	error = dpm_prepare(state);
 	if (!error)
 		error = dpm_suspend(state);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3cc3e7e589f0..dce7c7148771 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -533,10 +533,14 @@ struct dev_power_domain {
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
+extern void dpm_resume(pm_message_t state);
+extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int dpm_suspend_noirq(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
+extern int dpm_suspend(pm_message_t state);
+extern int dpm_prepare(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 95a2ac40f48c..f9bec56d8825 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -327,20 +327,25 @@ static int create_image(int platform_mode)
 
 int hibernation_snapshot(int platform_mode)
 {
+	pm_message_t msg = PMSG_RECOVER;
 	int error;
 
 	error = platform_begin(platform_mode);
 	if (error)
 		goto Close;
 
+	error = dpm_prepare(PMSG_FREEZE);
+	if (error)
+		goto Complete_devices;
+
 	/* Preallocate image memory before shutting down devices. */
 	error = hibernate_preallocate_memory();
 	if (error)
-		goto Close;
+		goto Complete_devices;
 
 	suspend_console();
 	pm_restrict_gfp_mask();
-	error = dpm_suspend_start(PMSG_FREEZE);
+	error = dpm_suspend(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -358,13 +363,17 @@ int hibernation_snapshot(int platform_mode)
 	if (error || !in_suspend)
 		swsusp_free();
 
-	dpm_resume_end(in_suspend ?
-		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE;
+	dpm_resume(msg);
 
 	if (error || !in_suspend)
 		pm_restore_gfp_mask();
 
 	resume_console();
+
+ Complete_devices:
+	dpm_complete(msg);
+
  Close:
 	platform_end(platform_mode);
 	return error;
-- 
cgit v1.2.3


From 6538df80194e305f1b78cafb556f4bb442f808b3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 May 2011 23:26:21 +0200
Subject: PM: Introduce generic prepare and complete callbacks for subsystems

Introduce generic .prepare() and .complete() power management
callbacks, currently missing, that can be used by subsystems and
power domains and export them.  Provide NULL definitions of all
the generic system sleep callbacks for CONFIG_PM_SLEEP unset.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/generic_ops.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h               | 26 +++++++++++++++++++-------
 2 files changed, 58 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 42f97f925629..cb3bb368681c 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -73,6 +73,23 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
+/**
+ * pm_generic_prepare - Generic routine preparing a device for power transition.
+ * @dev: Device to prepare.
+ *
+ * Prepare a device for a system-wide power transition.
+ */
+int pm_generic_prepare(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (drv && drv->pm && drv->pm->prepare)
+		ret = drv->pm->prepare(dev);
+
+	return ret;
+}
+
 /**
  * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
  * @dev: Device to handle.
@@ -213,16 +230,38 @@ int pm_generic_restore(struct device *dev)
 	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
+
+/**
+ * pm_generic_complete - Generic routine competing a device power transition.
+ * @dev: Device to handle.
+ *
+ * Complete a device power transition during a system-wide power transition.
+ */
+void pm_generic_complete(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+
+	if (drv && drv->pm && drv->pm->complete)
+		drv->pm->complete(dev);
+
+	/*
+	 * Let runtime PM try to suspend devices that haven't been in use before
+	 * going into the system-wide sleep state we're resuming from.
+	 */
+	pm_runtime_idle(dev);
+}
 #endif /* CONFIG_PM_SLEEP */
 
 struct dev_pm_ops generic_subsys_pm_ops = {
 #ifdef CONFIG_PM_SLEEP
+	.prepare = pm_generic_prepare,
 	.suspend = pm_generic_suspend,
 	.resume = pm_generic_resume,
 	.freeze = pm_generic_freeze,
 	.thaw = pm_generic_thaw,
 	.poweroff = pm_generic_poweroff,
 	.restore = pm_generic_restore,
+	.complete = pm_generic_complete,
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	.runtime_suspend = pm_generic_runtime_suspend,
diff --git a/include/linux/pm.h b/include/linux/pm.h
index dce7c7148771..3160648ccdda 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -550,6 +550,16 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
+
+extern int pm_generic_prepare(struct device *dev);
+extern int pm_generic_suspend(struct device *dev);
+extern int pm_generic_resume(struct device *dev);
+extern int pm_generic_freeze(struct device *dev);
+extern int pm_generic_thaw(struct device *dev);
+extern int pm_generic_restore(struct device *dev);
+extern int pm_generic_poweroff(struct device *dev);
+extern void pm_generic_complete(struct device *dev);
+
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -566,6 +576,15 @@ static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
 {
 	return 0;
 }
+
+#define pm_generic_prepare	NULL
+#define pm_generic_suspend	NULL
+#define pm_generic_resume	NULL
+#define pm_generic_freeze	NULL
+#define pm_generic_thaw		NULL
+#define pm_generic_restore	NULL
+#define pm_generic_poweroff	NULL
+#define pm_generic_complete	NULL
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
@@ -576,11 +595,4 @@ enum dpm_order {
 	DPM_ORDER_DEV_LAST,
 };
 
-extern int pm_generic_suspend(struct device *dev);
-extern int pm_generic_resume(struct device *dev);
-extern int pm_generic_freeze(struct device *dev);
-extern int pm_generic_thaw(struct device *dev);
-extern int pm_generic_restore(struct device *dev);
-extern int pm_generic_poweroff(struct device *dev);
-
 #endif /* _LINUX_PM_H */
-- 
cgit v1.2.3


From a934a00a69e940b126b9bdbf83e630ef5fe43523 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 18 May 2011 10:37:35 +0200
Subject: block: Fix discard topology stacking and reporting

In some cases we would end up stacking discard_zeroes_data incorrectly.
Fix this by enabling the feature by default for stacking drivers and
clearing it for low-level drivers. Incorporating a device that does not
support dzd will then cause the feature to be disabled in the stacking
driver.

Also ensure that the maximum discard value does not overflow when
exported in sysfs and return 0 in the alignment and dzd fields for
devices that don't support discard.

Reported-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 3 ++-
 block/blk-sysfs.c      | 3 ++-
 include/linux/blkdev.h | 7 +++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index cd3c428e194f..fa1eb0449a05 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = -1;
+	lim->discard_zeroes_data = 1;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 
 	blk_set_default_limits(&q->limits);
 	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6d735122bc59..53bd0c77bfda 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+	return sprintf(page, "%llu\n",
+		       (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9f921bf4bf8c..520d8618ed76 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -257,7 +257,7 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
-	signed char		discard_zeroes_data;
+	unsigned char		discard_zeroes_data;
 };
 
 struct request_queue
@@ -1069,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 {
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
+	if (!lim->max_discard_sectors)
+		return 0;
+
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
 		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
-	if (q->limits.discard_zeroes_data == 1)
+	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 17 May 2011 15:44:12 -0700
Subject: procfs: add stub for proc_mkdir_mode()

Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not
enabled, just like the stub for proc_mkdir().

Fixes this linux-next build error:

  drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..eaf4350c0f90 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
 	struct proc_dir_entry *parent) {return NULL;}
+static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *base, 
-- 
cgit v1.2.3


From fe12bc2c996d3e492b2920e32ac79f7bbae3e15d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 12:48:00 +0200
Subject: genirq: Uninline and sanity check generic_handle_irq()

generic_handle_irq() is missing a NULL pointer check for the result of
irq_to_desc. This was a not a big problem, but we want to expose it to
drivers, so we better have sanity checks in place. Add a return value
as well, which indicates that the irq number was valid and the handler
was invoked.

Based on the pure code move from Jonathan Cameron.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jonathan Cameron <jic23@cam.ac.uk>
---
 include/linux/irqdesc.h |  5 +----
 kernel/irq/irqdesc.c    | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c70b1aa4b93a..2d921b35212c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -111,10 +111,7 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de
 	desc->handle_irq(irq, desc);
 }
 
-static inline void generic_handle_irq(unsigned int irq)
-{
-	generic_handle_irq_desc(irq, irq_to_desc(irq));
-}
+int generic_handle_irq(unsigned int irq);
 
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index e07b975fdc5a..9f65b0225d6a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -290,6 +290,21 @@ static int irq_expand_nr_irqs(unsigned int nr)
 
 #endif /* !CONFIG_SPARSE_IRQ */
 
+/**
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq:	The irq number to handle
+ *
+ */
+int generic_handle_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc)
+		return -EINVAL;
+	generic_handle_irq_desc(irq, desc);
+	return 0;
+}
+
 /* Dynamic interrupt handling */
 
 /**
-- 
cgit v1.2.3


From b2b07e4fdbc51383cfc0ba5618c2ddf5c9d038f2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 18 May 2011 15:08:03 +0200
Subject: signal: trivial, fix the "timespec declared inside parameter list"
 warning

Fix the compile warning, do_sigtimedwait(struct timespec *) in signal.h
needs the forward declaration of timespec.

Reported-and-acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/signal.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 7e2526374fd7..a44e7f062238 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -234,6 +234,9 @@ static inline int valid_signal(unsigned long sig)
 	return sig <= _NSIG ? 1 : 0;
 }
 
+struct timespec;
+struct pt_regs;
+
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
@@ -248,7 +251,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void exit_signals(struct task_struct *tsk);
 
-- 
cgit v1.2.3


From 01294d82622d6d9d64bde8e4530c7e2c6dbb6ee6 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 18 May 2011 10:27:39 -0500
Subject: of: fix race when matching drivers

If two drivers are probing devices at the same time, both will write
their match table result to the dev->of_match cache at the same time.

Only write the result if the device matches.

In a thread titled "SBus devices sometimes detected, sometimes not",
Meelis reported his SBus hme was not detected about 50% of the time.
From the debug suggested by Grant it was obvious another driver matched
some devices between the call to match the hme and the hme discovery
failling.

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Milton Miller <miltonm@bga.com>
[grant.likely: modified to only call of_match_device() once]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_device.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8bfe6c1d4365..b33d68814a73 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,8 +21,12 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	dev->of_match = of_match_device(drv->of_match_table, dev);
-	return dev->of_match != NULL;
+	const struct of_device_id *match;
+
+	match = of_match_device(drv->of_match_table, dev);
+	if (match)
+		dev->of_match = match;
+	return match != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
-- 
cgit v1.2.3


From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 18 May 2011 11:19:24 -0600
Subject: drivercore: revert addition of of_match to struct device

Commit b826291c, "drivercore/dt: add a match table pointer to struct
device" added an of_match pointer to struct device to cache the
of_match_table entry discovered at driver match time.  This was unsafe
because matching is not an atomic operation with probing a driver.  If
two or more drivers are attempted to be matched to a driver at the
same time, then the cached matching entry pointer could get
overwritten.

This patch reverts the of_match cache pointer and reworks all users to
call of_match_device() directly instead.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/83xx/suspend.c      |  7 +++++--
 arch/powerpc/sysdev/fsl_msi.c              |  7 +++++--
 arch/sparc/kernel/pci_sabre.c              |  5 ++++-
 arch/sparc/kernel/pci_schizo.c             |  8 ++++++--
 drivers/atm/fore200e.c                     |  7 +++++--
 drivers/char/hw_random/n2-drv.c            |  7 +++++--
 drivers/char/ipmi/ipmi_si_intf.c           |  7 +++++--
 drivers/char/xilinx_hwicap/xilinx_hwicap.c | 14 +++++++++-----
 drivers/edac/ppc4xx_edac.c                 |  2 +-
 drivers/i2c/busses/i2c-mpc.c               |  9 ++++++---
 drivers/mmc/host/sdhci-of-core.c           |  7 +++++--
 drivers/mtd/maps/physmap_of.c              |  7 +++++--
 drivers/net/can/mscan/mpc5xxx_can.c        |  7 +++++--
 drivers/net/fs_enet/fs_enet-main.c         |  9 ++++++---
 drivers/net/fs_enet/mii-fec.c              |  7 +++++--
 drivers/net/sunhme.c                       |  7 +++++--
 drivers/scsi/qlogicpti.c                   |  7 +++++--
 drivers/tty/serial/of_serial.c             |  7 +++++--
 drivers/usb/gadget/fsl_qe_udc.c            |  7 +++++--
 drivers/watchdog/mpc8xxx_wdt.c             |  7 +++++--
 include/linux/device.h                     |  1 -
 include/linux/of_device.h                  | 12 ++++++------
 22 files changed, 108 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cfb..104faa8aa23c 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
 	struct pmc_type *type;
 	int ret = 0;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
-	type = ofdev->dev.of_match->data;
+	type = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..01cd2f089512 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
 	int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 	u32 offset;
 	static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-	if (!dev->dev.of_match)
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	features = dev->dev.of_match->data;
+	features = match->data;
 
 	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083fc..d1840dbdaa2f 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
 	sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c8..283fbc329a43 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
 	return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-	if (!op->dev.of_match)
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+	return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index bdd2719f3f68..bc9e702186dd 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent)
 }
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id fore200e_sba_match[];
 static int __devinit fore200e_sba_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct fore200e_bus *bus;
 	struct fore200e *fore200e;
 	static int index = 0;
 	int err;
 
-	if (!op->dev.of_match)
+	match = of_match_device(fore200e_sba_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	bus = op->dev.of_match->data;
+	bus = match->data;
 
 	fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
 	if (!fore200e)
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 43ac61978d8b..ac6739e085e3 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void)
 		pr_info("%s", version);
 }
 
+static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	int victoria_falls;
 	int err = -ENOMEM;
 	struct n2rng *np;
 
-	if (!op->dev.of_match)
+	match = of_match_device(n2rng_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	victoria_falls = (op->dev.of_match->data != NULL);
+	victoria_falls = (match->data != NULL);
 
 	n2rng_driver_version();
 	np = kzalloc(sizeof(*np), GFP_KERNEL);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index cc6c9b2546a3..64c6b8530615 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
+static struct of_device_id ipmi_match[];
 static int __devinit ipmi_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_OF
+	const struct of_device_id *match;
 	struct smi_info *info;
 	struct resource resource;
 	const __be32 *regsize, *regspacing, *regshift;
@@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 
 	dev_info(&dev->dev, "probing via device tree\n");
 
-	if (!dev->dev.of_match)
+	match = of_match_device(ipmi_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
 
 	ret = of_address_to_resource(np, 0, &resource);
@@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 		return -ENOMEM;
 	}
 
-	info->si_type		= (enum si_type) dev->dev.of_match->data;
+	info->si_type		= (enum si_type) match->data;
 	info->addr_source	= SI_DEVICETREE;
 	info->irq_setup		= std_irq_setup;
 
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index d6412c16385f..39ccdeada791 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev)
 }
 
 #ifdef CONFIG_OF
-static int __devinit hwicap_of_probe(struct platform_device *op)
+static int __devinit hwicap_of_probe(struct platform_device *op,
+				     const struct hwicap_driver_config *config)
 {
 	struct resource res;
 	const unsigned int *id;
 	const char *family;
 	int rc;
-	const struct hwicap_driver_config *config = op->dev.of_match->data;
 	const struct config_registers *regs;
 
 
@@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op)
 			regs);
 }
 #else
-static inline int hwicap_of_probe(struct platform_device *op)
+static inline int hwicap_of_probe(struct platform_device *op,
+				  const struct hwicap_driver_config *config)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_OF */
 
+static const struct of_device_id __devinitconst hwicap_of_match[];
 static int __devinit hwicap_drv_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *match;
 	struct resource *res;
 	const struct config_registers *regs;
 	const char *family;
 
-	if (pdev->dev.of_match)
-		return hwicap_of_probe(pdev);
+	match = of_match_device(hwicap_of_match, &pdev->dev);
+	if (match)
+		return hwicap_of_probe(pdev, match->data);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index c1f0045ceb8e..af8e7b1aa290 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
 	struct ppc4xx_edac_pdata *pdata = NULL;
 	const struct device_node *np = op->dev.of_node;
 
-	if (op->dev.of_match == NULL)
+	if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
 		return -EINVAL;
 
 	/* Initial driver pointers and private data */
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 75b984c519ac..107397a606b4 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = {
 	.timeout = HZ,
 };
 
+static const struct of_device_id mpc_i2c_of_match[];
 static int __devinit fsl_i2c_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct mpc_i2c *i2c;
 	const u32 *prop;
 	u32 clock = MPC_I2C_CLOCK_LEGACY;
 	int result = 0;
 	int plen;
 
-	if (!op->dev.of_match)
+	match = of_match_device(mpc_i2c_of_match, &op->dev);
+	if (!match)
 		return -EINVAL;
 
 	i2c = kzalloc(sizeof(*i2c), GFP_KERNEL);
@@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op)
 			clock = *prop;
 	}
 
-	if (op->dev.of_match->data) {
-		struct mpc_i2c_data *data = op->dev.of_match->data;
+	if (match->data) {
+		struct mpc_i2c_data *data = match->data;
 		data->setup(op->dev.of_node, i2c, clock, data->prescaler);
 	} else {
 		/* Backwards compatibility */
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
index f9b611fc773e..60e4186a4345 100644
--- a/drivers/mmc/host/sdhci-of-core.c
+++ b/drivers/mmc/host/sdhci-of-core.c
@@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
 #endif
 }
 
+static const struct of_device_id sdhci_of_match[];
 static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct sdhci_of_data *sdhci_of_data;
 	struct sdhci_host *host;
@@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 	int size;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(sdhci_of_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	sdhci_of_data = ofdev->dev.of_match->data;
+	sdhci_of_data = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index bd483f0c57e1..c1d33464aee8 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes)
 }
 #endif
 
+static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_MTD_PARTITIONS
 	const char **part_probe_types;
 #endif
+	const struct of_device_id *match;
 	struct device_node *dp = dev->dev.of_node;
 	struct resource res;
 	struct of_flash *info;
@@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	struct mtd_info **mtd_list = NULL;
 	resource_size_t res_size;
 
-	if (!dev->dev.of_match)
+	match = of_match_device(of_flash_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	probe_type = dev->dev.of_match->data;
+	probe_type = match->data;
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index bd1d811c204f..5fedc3375562 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
 }
 #endif /* CONFIG_PPC_MPC512x */
 
+static struct of_device_id mpc5xxx_can_table[];
 static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct mpc5xxx_can_data *data;
 	struct device_node *np = ofdev->dev.of_node;
 	struct net_device *dev;
@@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 	int irq, mscan_clksrc = 0;
 	int err = -ENOMEM;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data;
+	data = match->data;
 
 	base = of_iomap(np, 0);
 	if (!base) {
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 24cb953900dd..5131e61c358c 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static struct of_device_id fs_enet_match[];
 static int __devinit fs_enet_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct net_device *ndev;
 	struct fs_enet_private *fep;
 	struct fs_platform_info *fpi;
@@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	const u8 *mac_addr;
 	int privsize, len, ret = -ENODEV;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	fpi = kzalloc(sizeof(*fpi), GFP_KERNEL);
 	if (!fpi)
 		return -ENOMEM;
 
-	if (!IS_FEC(ofdev->dev.of_match)) {
+	if (!IS_FEC(match)) {
 		data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len);
 		if (!data || len != 4)
 			goto out_free_fpi;
@@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	fep->dev = &ofdev->dev;
 	fep->ndev = ndev;
 	fep->fpi = fpi;
-	fep->ops = ofdev->dev.of_match->data;
+	fep->ops = match->data;
 
 	ret = fep->ops->setup_data(ndev);
 	if (ret)
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 7e840d373ab3..6a2e150e75bb 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 	return 0;
 }
 
+static struct of_device_id fs_enet_mdio_fec_match[];
 static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct resource res;
 	struct mii_bus *new_bus;
 	struct fec_info *fec;
 	int (*get_bus_freq)(struct device_node *);
 	int ret = -ENOMEM, clock, speed;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	get_bus_freq = ofdev->dev.of_match->data;
+	get_bus_freq = match->data;
 
 	new_bus = mdiobus_alloc();
 	if (!new_bus)
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index eb4f59fb01e9..bff2f7999ff0 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void)
 #endif
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id hme_sbus_match[];
 static int __devinit hme_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct device_node *dp = op->dev.of_node;
 	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe;
 
-	if (!op->dev.of_match)
+	match = of_match_device(hme_sbus_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	is_qfe = (op->dev.of_match->data != NULL);
+	is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
 		is_qfe = 1;
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index e2d45c91b8e8..9689d41c7888 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 };
 
+static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct scsi_host_template *tpnt;
 	struct device_node *dp = op->dev.of_node;
 	struct Scsi_Host *host;
@@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op)
 	static int nqptis;
 	const char *fcode;
 
-	if (!op->dev.of_match)
+	match = of_match_device(qpti_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	tpnt = op->dev.of_match->data;
+	tpnt = match->data;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c
index 0e8eec516df4..c911b2419abb 100644
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 /*
  * Try to register a serial port
  */
+static struct of_device_id of_platform_serial_table[];
 static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct of_serial_info *info;
 	struct uart_port port;
 	int port_type;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(of_platform_serial_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL))
@@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 	if (info == NULL)
 		return -ENOMEM;
 
-	port_type = (unsigned long)ofdev->dev.of_match->data;
+	port_type = (unsigned long)match->data;
 	ret = of_platform_serial_setup(ofdev, port_type, &port);
 	if (ret)
 		goto out;
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index 36613b37c504..3a68e09309f7 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev)
 }
 
 /* Driver probe functions */
+static const struct of_device_id qe_udc_match[];
 static int __devinit qe_udc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct qe_ep *ep;
 	unsigned int ret = 0;
 	unsigned int i;
 	const void *prop;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(qe_udc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	prop = of_get_property(np, "mode", NULL);
@@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev)
 		return -ENOMEM;
 	}
 
-	udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data;
+	udc_controller->soc_type = (unsigned long)match->data;
 	udc_controller->usb_regs = of_iomap(np, 0);
 	if (!udc_controller->usb_regs) {
 		ret = -ENOMEM;
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 528bceb220fd..eed5436ffb51 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = {
 	.fops	= &mpc8xxx_wdt_fops,
 };
 
+static const struct of_device_id mpc8xxx_wdt_match[];
 static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev)
 {
 	int ret;
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct mpc8xxx_wdt_type *wdt_type;
 	u32 freq = fsl_get_sys_freq();
 	bool enabled;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	wdt_type = ofdev->dev.of_match->data;
+	wdt_type = match->data;
 
 	if (!freq || freq == -1)
 		return -EINVAL;
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..d08399db6e2c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -442,7 +442,6 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
-	const struct of_device_id *of_match; /* matching of_device_id from driver */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index b33d68814a73..ae5638480ef2 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	const struct of_device_id *match;
-
-	match = of_match_device(drv->of_match_table, dev);
-	if (match)
-		dev->of_match = match;
-	return match != NULL;
+	return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
@@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev,
 
 static inline void of_device_node_put(struct device *dev) { }
 
+static inline const struct of_device_id *of_match_device(
+		const struct of_device_id *matches, const struct device *dev)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3


From b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Apr 2011 15:12:32 -0400
Subject: ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored
 functions

To prepare for the accounting system that will allow multiple users of
the function tracer, having the FTRACE_FL_NOTRACE as a flag in the
dyn_trace record does not make sense.

All ftrace_ops will soon have a hash of functions they should trace
and not trace. By making a global hash of functions not to trace makes
this easier for the transition.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   1 -
 kernel/trace/ftrace.c  | 176 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 150 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 32047449b309..fe0a90a09243 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -149,7 +149,6 @@ enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FILTER	= (1 << 1),
 	FTRACE_FL_ENABLED	= (1 << 2),
-	FTRACE_FL_NOTRACE	= (1 << 3),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d3406346ced6..04c002a491fb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -57,6 +57,7 @@
 /* hash bits for specific function selection */
 #define FTRACE_HASH_BITS 7
 #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
+#define FTRACE_HASH_MAX_BITS 10
 
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
@@ -865,6 +866,22 @@ enum {
 	FTRACE_START_FUNC_RET		= (1 << 3),
 	FTRACE_STOP_FUNC_RET		= (1 << 4),
 };
+struct ftrace_func_entry {
+	struct hlist_node hlist;
+	unsigned long ip;
+};
+
+struct ftrace_hash {
+	unsigned long		size_bits;
+	struct hlist_head	*buckets;
+	unsigned long		count;
+};
+
+static struct hlist_head notrace_buckets[1 << FTRACE_HASH_MAX_BITS];
+static struct ftrace_hash notrace_hash = {
+	.size_bits = FTRACE_HASH_MAX_BITS,
+	.buckets = notrace_buckets,
+};
 
 static int ftrace_filtered;
 
@@ -889,6 +906,79 @@ static struct ftrace_page	*ftrace_pages;
 
 static struct dyn_ftrace *ftrace_free_records;
 
+static struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+{
+	unsigned long key;
+	struct ftrace_func_entry *entry;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+
+	if (!hash->count)
+		return NULL;
+
+	if (hash->size_bits > 0)
+		key = hash_long(ip, hash->size_bits);
+	else
+		key = 0;
+
+	hhd = &hash->buckets[key];
+
+	hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
+		if (entry->ip == ip)
+			return entry;
+	}
+	return NULL;
+}
+
+static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+{
+	struct ftrace_func_entry *entry;
+	struct hlist_head *hhd;
+	unsigned long key;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	if (hash->size_bits)
+		key = hash_long(ip, hash->size_bits);
+	else
+		key = 0;
+
+	entry->ip = ip;
+	hhd = &hash->buckets[key];
+	hlist_add_head(&entry->hlist, hhd);
+	hash->count++;
+
+	return 0;
+}
+
+static void
+remove_hash_entry(struct ftrace_hash *hash,
+		  struct ftrace_func_entry *entry)
+{
+	hlist_del(&entry->hlist);
+	kfree(entry);
+	hash->count--;
+}
+
+static void ftrace_hash_clear(struct ftrace_hash *hash)
+{
+	struct hlist_head *hhd;
+	struct hlist_node *tp, *tn;
+	struct ftrace_func_entry *entry;
+	int size = 1 << hash->size_bits;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		hhd = &hash->buckets[i];
+		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
+			remove_hash_entry(hash, entry);
+	}
+	FTRACE_WARN_ON(hash->count);
+}
+
 /*
  * This is a double for. Do not use 'break' to break out of the loop,
  * you must use a goto.
@@ -1032,7 +1122,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * If we want to enable it and filtering is on, enable it only if
 	 * it's filtered
 	 */
-	if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
+	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
 		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
 			flag = FTRACE_FL_ENABLED;
 	}
@@ -1465,7 +1555,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		     !(rec->flags & FTRACE_FL_FILTER)) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !(rec->flags & FTRACE_FL_NOTRACE))) {
+		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
 			rec = NULL;
 			goto retry;
 		}
@@ -1609,14 +1699,15 @@ static void ftrace_filter_reset(int enable)
 {
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
-	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 
 	mutex_lock(&ftrace_lock);
-	if (enable)
+	if (enable) {
 		ftrace_filtered = 0;
-	do_for_each_ftrace_rec(pg, rec) {
-		rec->flags &= ~type;
-	} while_for_each_ftrace_rec();
+		do_for_each_ftrace_rec(pg, rec) {
+			rec->flags &= ~FTRACE_FL_FILTER;
+		} while_for_each_ftrace_rec();
+	} else
+		ftrace_hash_clear(&notrace_hash);
 	mutex_unlock(&ftrace_lock);
 }
 
@@ -1716,13 +1807,36 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 	return matched;
 }
 
-static void
-update_record(struct dyn_ftrace *rec, unsigned long flag, int not)
+static int
+update_record(struct dyn_ftrace *rec, int enable, int not)
 {
-	if (not)
-		rec->flags &= ~flag;
-	else
-		rec->flags |= flag;
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *hash = &notrace_hash;
+	int ret = 0;
+
+	if (enable) {
+		if (not)
+			rec->flags &= ~FTRACE_FL_FILTER;
+		else
+			rec->flags |= FTRACE_FL_FILTER;
+	} else {
+		if (not) {
+			/* Do nothing if it doesn't exist */
+			entry = ftrace_lookup_ip(hash, rec->ip);
+			if (!entry)
+				return 0;
+
+			remove_hash_entry(hash, entry);
+		} else {
+			/* Do nothing if it exists */
+			entry = ftrace_lookup_ip(hash, rec->ip);
+			if (entry)
+				return 0;
+
+			ret = add_hash_entry(hash, rec->ip);
+		}
+	}
+	return ret;
 }
 
 static int
@@ -1754,16 +1868,14 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	struct dyn_ftrace *rec;
 	int type = MATCH_FULL;
 	char *search = buff;
-	unsigned long flag;
 	int found = 0;
+	int ret;
 
 	if (len) {
 		type = filter_parse_regex(buff, len, &search, &not);
 		search_len = strlen(search);
 	}
 
-	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
-
 	mutex_lock(&ftrace_lock);
 
 	if (unlikely(ftrace_disabled))
@@ -1772,7 +1884,11 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
-			update_record(rec, flag, not);
+			ret = update_record(rec, enable, not);
+			if (ret < 0) {
+				found = ret;
+				goto out_unlock;
+			}
 			found = 1;
 		}
 		/*
@@ -1821,6 +1937,7 @@ static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
 	char *mod;
+	int ret = -EINVAL;
 
 	/*
 	 * cmd == 'mod' because we only registered this func
@@ -1832,15 +1949,19 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 
 	/* we must have a module name */
 	if (!param)
-		return -EINVAL;
+		return ret;
 
 	mod = strsep(&param, ":");
 	if (!strlen(mod))
-		return -EINVAL;
+		return ret;
 
-	if (ftrace_match_module_records(func, mod, enable))
-		return 0;
-	return -EINVAL;
+	ret = ftrace_match_module_records(func, mod, enable);
+	if (!ret)
+		ret = -EINVAL;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static struct ftrace_func_command ftrace_mod_cmd = {
@@ -2132,14 +2253,17 @@ static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
 	struct ftrace_func_command *p;
-	int ret = -EINVAL;
+	int ret;
 
 	func = strsep(&next, ":");
 
 	if (!next) {
-		if (ftrace_match_records(func, len, enable))
-			return 0;
-		return ret;
+		ret = ftrace_match_records(func, len, enable);
+		if (!ret)
+			ret = -EINVAL;
+		if (ret < 0)
+			return ret;
+		return 0;
 	}
 
 	/* command found */
-- 
cgit v1.2.3


From 1cf41dd79993389b012e4542ab502ce36ae7343f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Apr 2011 20:59:51 -0400
Subject: ftrace: Use hash instead for FTRACE_FL_FILTER

When multiple users are allowed to have their own set of functions
to trace, having the FTRACE_FL_FILTER flag will not be enough to
handle the accounting of those users. Each user will need their own
set of functions.

Replace the FTRACE_FL_FILTER with a filter_hash instead. This is
temporary until the rest of the function filtering accounting
gets in.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   3 +-
 kernel/trace/ftrace.c  | 151 ++++++++++++++++++++++---------------------------
 2 files changed, 70 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fe0a90a09243..52fc5d4e6edd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -147,8 +147,7 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_FILTER	= (1 << 1),
-	FTRACE_FL_ENABLED	= (1 << 2),
+	FTRACE_FL_ENABLED	= (1 << 1),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 04c002a491fb..222eca4c3022 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -883,7 +883,11 @@ static struct ftrace_hash notrace_hash = {
 	.buckets = notrace_buckets,
 };
 
-static int ftrace_filtered;
+static struct hlist_head filter_buckets[1 << FTRACE_HASH_MAX_BITS];
+static struct ftrace_hash filter_hash = {
+	.size_bits = FTRACE_HASH_MAX_BITS,
+	.buckets = filter_buckets,
+};
 
 static struct dyn_ftrace *ftrace_new_addrs;
 
@@ -1123,7 +1127,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * it's filtered
 	 */
 	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
-		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
+		if (!filter_hash.count || ftrace_lookup_ip(&filter_hash, rec->ip))
 			flag = FTRACE_FL_ENABLED;
 	}
 
@@ -1430,6 +1434,7 @@ struct ftrace_iterator {
 	struct dyn_ftrace		*func;
 	struct ftrace_func_probe	*probe;
 	struct trace_parser		parser;
+	struct ftrace_hash		*hash;
 	int				hidx;
 	int				idx;
 	unsigned			flags;
@@ -1552,7 +1557,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if ((rec->flags & FTRACE_FL_FREE) ||
 
 		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(ftrace_lookup_ip(&filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
 		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
@@ -1598,7 +1603,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * off, we can short cut and just print out that all
 	 * functions are enabled.
 	 */
-	if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
+	if (iter->flags & FTRACE_ITER_FILTER && !filter_hash.count) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1695,24 +1700,16 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static void ftrace_filter_reset(int enable)
+static void ftrace_filter_reset(struct ftrace_hash *hash)
 {
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-
 	mutex_lock(&ftrace_lock);
-	if (enable) {
-		ftrace_filtered = 0;
-		do_for_each_ftrace_rec(pg, rec) {
-			rec->flags &= ~FTRACE_FL_FILTER;
-		} while_for_each_ftrace_rec();
-	} else
-		ftrace_hash_clear(&notrace_hash);
+	ftrace_hash_clear(hash);
 	mutex_unlock(&ftrace_lock);
 }
 
 static int
-ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+ftrace_regex_open(struct ftrace_hash *hash, int flag,
+		  struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
 	int ret = 0;
@@ -1729,15 +1726,16 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 		return -ENOMEM;
 	}
 
+	iter->hash = hash;
+
 	mutex_lock(&ftrace_regex_lock);
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(hash);
 
 	if (file->f_mode & FMODE_READ) {
 		iter->pg = ftrace_pages_start;
-		iter->flags = enable ? FTRACE_ITER_FILTER :
-			FTRACE_ITER_NOTRACE;
+		iter->flags = flag;
 
 		ret = seq_open(file, &show_ftrace_seq_ops);
 		if (!ret) {
@@ -1757,13 +1755,15 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 1);
+	return ftrace_regex_open(&filter_hash, FTRACE_ITER_FILTER,
+				 inode, file);
 }
 
 static int
 ftrace_notrace_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 0);
+	return ftrace_regex_open(&notrace_hash, FTRACE_ITER_NOTRACE,
+				 inode, file);
 }
 
 static loff_t
@@ -1808,33 +1808,24 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 }
 
 static int
-update_record(struct dyn_ftrace *rec, int enable, int not)
+enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not)
 {
 	struct ftrace_func_entry *entry;
-	struct ftrace_hash *hash = &notrace_hash;
 	int ret = 0;
 
-	if (enable) {
-		if (not)
-			rec->flags &= ~FTRACE_FL_FILTER;
-		else
-			rec->flags |= FTRACE_FL_FILTER;
-	} else {
-		if (not) {
-			/* Do nothing if it doesn't exist */
-			entry = ftrace_lookup_ip(hash, rec->ip);
-			if (!entry)
-				return 0;
+	entry = ftrace_lookup_ip(hash, rec->ip);
+	if (not) {
+		/* Do nothing if it doesn't exist */
+		if (!entry)
+			return 0;
 
-			remove_hash_entry(hash, entry);
-		} else {
-			/* Do nothing if it exists */
-			entry = ftrace_lookup_ip(hash, rec->ip);
-			if (entry)
-				return 0;
+		remove_hash_entry(hash, entry);
+	} else {
+		/* Do nothing if it exists */
+		if (entry)
+			return 0;
 
-			ret = add_hash_entry(hash, rec->ip);
-		}
+		ret = add_hash_entry(hash, rec->ip);
 	}
 	return ret;
 }
@@ -1861,7 +1852,9 @@ ftrace_match_record(struct dyn_ftrace *rec, char *mod,
 	return ftrace_match(str, regex, len, type);
 }
 
-static int match_records(char *buff, int len, char *mod, int enable, int not)
+static int
+match_records(struct ftrace_hash *hash, char *buff,
+	      int len, char *mod, int not)
 {
 	unsigned search_len = 0;
 	struct ftrace_page *pg;
@@ -1884,20 +1877,13 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 	do_for_each_ftrace_rec(pg, rec) {
 
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
-			ret = update_record(rec, enable, not);
+			ret = enter_record(hash, rec, not);
 			if (ret < 0) {
 				found = ret;
 				goto out_unlock;
 			}
 			found = 1;
 		}
-		/*
-		 * Only enable filtering if we have a function that
-		 * is filtered on.
-		 */
-		if (enable && (rec->flags & FTRACE_FL_FILTER))
-			ftrace_filtered = 1;
-
 	} while_for_each_ftrace_rec();
  out_unlock:
 	mutex_unlock(&ftrace_lock);
@@ -1906,12 +1892,13 @@ static int match_records(char *buff, int len, char *mod, int enable, int not)
 }
 
 static int
-ftrace_match_records(char *buff, int len, int enable)
+ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
 {
-	return match_records(buff, len, NULL, enable, 0);
+	return match_records(hash, buff, len, NULL, 0);
 }
 
-static int ftrace_match_module_records(char *buff, char *mod, int enable)
+static int
+ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 {
 	int not = 0;
 
@@ -1925,7 +1912,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 		not = 1;
 	}
 
-	return match_records(buff, strlen(buff), mod, enable, not);
+	return match_records(hash, buff, strlen(buff), mod, not);
 }
 
 /*
@@ -1936,6 +1923,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
+	struct ftrace_hash *hash;
 	char *mod;
 	int ret = -EINVAL;
 
@@ -1955,7 +1943,12 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 	if (!strlen(mod))
 		return ret;
 
-	ret = ftrace_match_module_records(func, mod, enable);
+	if (enable)
+		hash = &filter_hash;
+	else
+		hash = &notrace_hash;
+
+	ret = ftrace_match_module_records(hash, func, mod);
 	if (!ret)
 		ret = -EINVAL;
 	if (ret < 0)
@@ -2253,12 +2246,18 @@ static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
 	struct ftrace_func_command *p;
+	struct ftrace_hash *hash;
 	int ret;
 
+	if (enable)
+		hash = &filter_hash;
+	else
+		hash = &notrace_hash;
+
 	func = strsep(&next, ":");
 
 	if (!next) {
-		ret = ftrace_match_records(func, len, enable);
+		ret = ftrace_match_records(hash, func, len);
 		if (!ret)
 			ret = -EINVAL;
 		if (ret < 0)
@@ -2340,16 +2339,16 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
 }
 
 static void
-ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
+ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int reset)
 {
 	if (unlikely(ftrace_disabled))
 		return;
 
 	mutex_lock(&ftrace_regex_lock);
 	if (reset)
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(hash);
 	if (buf)
-		ftrace_match_records(buf, len, enable);
+		ftrace_match_records(hash, buf, len);
 	mutex_unlock(&ftrace_regex_lock);
 }
 
@@ -2364,7 +2363,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
  */
 void ftrace_set_filter(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 1);
+	ftrace_set_regex(&filter_hash, buf, len, reset);
 }
 
 /**
@@ -2379,7 +2378,7 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  */
 void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 0);
+	ftrace_set_regex(&notrace_hash, buf, len, reset);
 }
 
 /*
@@ -2431,22 +2430,22 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init set_ftrace_early_filter(char *buf, int enable)
+static void __init set_ftrace_early_filter(struct ftrace_hash *hash, char *buf)
 {
 	char *func;
 
 	while (buf) {
 		func = strsep(&buf, ",");
-		ftrace_set_regex(func, strlen(func), 0, enable);
+		ftrace_set_regex(hash, func, strlen(func), 0);
 	}
 }
 
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(ftrace_filter_buf, 1);
+		set_ftrace_early_filter(&filter_hash, ftrace_filter_buf);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+		set_ftrace_early_filter(&notrace_hash, ftrace_notrace_buf);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
@@ -2454,7 +2453,7 @@ static void __init set_ftrace_early_filters(void)
 }
 
 static int
-ftrace_regex_release(struct inode *inode, struct file *file, int enable)
+ftrace_regex_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct ftrace_iterator *iter;
@@ -2471,7 +2470,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 	parser = &iter->parser;
 	if (trace_parser_loaded(parser)) {
 		parser->buffer[parser->idx] = 0;
-		ftrace_match_records(parser->buffer, parser->idx, enable);
+		ftrace_match_records(iter->hash, parser->buffer, parser->idx);
 	}
 
 	trace_parser_put(parser);
@@ -2488,18 +2487,6 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 	return 0;
 }
 
-static int
-ftrace_filter_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 1);
-}
-
-static int
-ftrace_notrace_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 0);
-}
-
 static const struct file_operations ftrace_avail_fops = {
 	.open = ftrace_avail_open,
 	.read = seq_read,
@@ -2512,7 +2499,7 @@ static const struct file_operations ftrace_filter_fops = {
 	.read = seq_read,
 	.write = ftrace_filter_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_filter_release,
+	.release = ftrace_regex_release,
 };
 
 static const struct file_operations ftrace_notrace_fops = {
@@ -2520,7 +2507,7 @@ static const struct file_operations ftrace_notrace_fops = {
 	.read = seq_read,
 	.write = ftrace_notrace_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_notrace_release,
+	.release = ftrace_regex_release,
 };
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-- 
cgit v1.2.3


From f45948e898e7bc76a73a468796d2ce80dd040058 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 2 May 2011 12:29:25 -0400
Subject: ftrace: Create a global_ops to hold the filter and notrace hashes

Combine the filter and notrace hashes to be accessed by a single entity,
the global_ops. The global_ops is a ftrace_ops structure that is passed
to different functions that can read or modify the filtering of the
function tracer.

The ftrace_ops structure was modified to hold a filter and notrace
hashes so that later patches may allow each ftrace_ops to have its own
set of rules to what functions may be filtered.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 10 ++++++--
 kernel/trace/ftrace.c  | 65 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 52fc5d4e6edd..6658a51390fe 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -29,9 +29,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
+struct ftrace_hash;
+
 struct ftrace_ops {
-	ftrace_func_t	  func;
-	struct ftrace_ops *next;
+	ftrace_func_t			func;
+	struct ftrace_ops		*next;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	struct ftrace_hash		*notrace_hash;
+	struct ftrace_hash		*filter_hash;
+#endif
 };
 
 extern int function_trace_stop;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 222eca4c3022..a517a6c40645 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -889,6 +889,12 @@ static struct ftrace_hash filter_hash = {
 	.buckets = filter_buckets,
 };
 
+struct ftrace_ops global_ops = {
+	.func			= ftrace_stub,
+	.notrace_hash		= &notrace_hash,
+	.filter_hash		= &filter_hash,
+};
+
 static struct dyn_ftrace *ftrace_new_addrs;
 
 static DEFINE_MUTEX(ftrace_regex_lock);
@@ -1112,6 +1118,7 @@ int ftrace_text_reserved(void *start, void *end)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
+	struct ftrace_ops *ops = &global_ops;
 	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
@@ -1126,8 +1133,9 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	 * If we want to enable it and filtering is on, enable it only if
 	 * it's filtered
 	 */
-	if (enable && !ftrace_lookup_ip(&notrace_hash, rec->ip)) {
-		if (!filter_hash.count || ftrace_lookup_ip(&filter_hash, rec->ip))
+	if (enable && !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) {
+		if (!ops->filter_hash->count ||
+		    ftrace_lookup_ip(ops->filter_hash, rec->ip))
 			flag = FTRACE_FL_ENABLED;
 	}
 
@@ -1531,6 +1539,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	struct dyn_ftrace *rec = NULL;
 
 	if (unlikely(ftrace_disabled))
@@ -1557,10 +1566,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if ((rec->flags & FTRACE_FL_FREE) ||
 
 		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(ftrace_lookup_ip(&filter_hash, rec->ip))) ||
+		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !ftrace_lookup_ip(&notrace_hash, rec->ip))) {
+		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip))) {
 			rec = NULL;
 			goto retry;
 		}
@@ -1584,6 +1593,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	void *p = NULL;
 	loff_t l;
 
@@ -1603,7 +1613,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * off, we can short cut and just print out that all
 	 * functions are enabled.
 	 */
-	if (iter->flags & FTRACE_ITER_FILTER && !filter_hash.count) {
+	if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1708,10 +1718,11 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 }
 
 static int
-ftrace_regex_open(struct ftrace_hash *hash, int flag,
+ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		  struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
+	struct ftrace_hash *hash;
 	int ret = 0;
 
 	if (unlikely(ftrace_disabled))
@@ -1726,6 +1737,11 @@ ftrace_regex_open(struct ftrace_hash *hash, int flag,
 		return -ENOMEM;
 	}
 
+	if (flag & FTRACE_ITER_NOTRACE)
+		hash = ops->notrace_hash;
+	else
+		hash = ops->filter_hash;
+
 	iter->hash = hash;
 
 	mutex_lock(&ftrace_regex_lock);
@@ -1755,14 +1771,14 @@ ftrace_regex_open(struct ftrace_hash *hash, int flag,
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&filter_hash, FTRACE_ITER_FILTER,
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
 				 inode, file);
 }
 
 static int
 ftrace_notrace_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&notrace_hash, FTRACE_ITER_NOTRACE,
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE,
 				 inode, file);
 }
 
@@ -1923,6 +1939,7 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
+	struct ftrace_ops *ops = &global_ops;
 	struct ftrace_hash *hash;
 	char *mod;
 	int ret = -EINVAL;
@@ -1944,9 +1961,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 		return ret;
 
 	if (enable)
-		hash = &filter_hash;
+		hash = ops->filter_hash;
 	else
-		hash = &notrace_hash;
+		hash = ops->notrace_hash;
 
 	ret = ftrace_match_module_records(hash, func, mod);
 	if (!ret)
@@ -2245,14 +2262,15 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd)
 static int ftrace_process_regex(char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
+	struct ftrace_ops *ops = &global_ops;
 	struct ftrace_func_command *p;
 	struct ftrace_hash *hash;
 	int ret;
 
 	if (enable)
-		hash = &filter_hash;
+		hash = ops->filter_hash;
 	else
-		hash = &notrace_hash;
+		hash = ops->notrace_hash;
 
 	func = strsep(&next, ":");
 
@@ -2339,11 +2357,19 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
 }
 
 static void
-ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int reset)
+ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
+		 int reset, int enable)
 {
+	struct ftrace_hash *hash;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
+	if (enable)
+		hash = ops->filter_hash;
+	else
+		hash = ops->notrace_hash;
+
 	mutex_lock(&ftrace_regex_lock);
 	if (reset)
 		ftrace_filter_reset(hash);
@@ -2363,7 +2389,7 @@ ftrace_set_regex(struct ftrace_hash *hash, unsigned char *buf, int len, int rese
  */
 void ftrace_set_filter(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(&filter_hash, buf, len, reset);
+	ftrace_set_regex(&global_ops, buf, len, reset, 1);
 }
 
 /**
@@ -2378,7 +2404,7 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  */
 void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 {
-	ftrace_set_regex(&notrace_hash, buf, len, reset);
+	ftrace_set_regex(&global_ops, buf, len, reset, 0);
 }
 
 /*
@@ -2430,22 +2456,23 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init set_ftrace_early_filter(struct ftrace_hash *hash, char *buf)
+static void __init
+set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
 	while (buf) {
 		func = strsep(&buf, ",");
-		ftrace_set_regex(hash, func, strlen(func), 0);
+		ftrace_set_regex(ops, func, strlen(func), 0, enable);
 	}
 }
 
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(&filter_hash, ftrace_filter_buf);
+		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(&notrace_hash, ftrace_notrace_buf);
+		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
-- 
cgit v1.2.3


From ed926f9b35cda0988234c356e16a7cb30f4e5338 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 3 May 2011 13:25:24 -0400
Subject: ftrace: Use counters to enable functions to trace

Every function has its own record that stores the instruction
pointer and flags for the function to be traced. There are only
two flags: enabled and free. The enabled flag states that tracing
for the function has been enabled (actively traced), and the free
flag states that the record no longer points to a function and can
be used by new functions (loaded modules).

These flags are now moved to the MSB of the flags (actually just
the top 32bits). The rest of the bits (30 bits) are now used as
a ref counter. Everytime a tracer register functions to trace,
those functions will have its counter incremented.

When tracing is enabled, to determine if a function should be traced,
the counter is examined, and if it is non-zero it is set to trace.

When a ftrace_ops is registered to trace functions, its hashes
are examined. If the ftrace_ops filter_hash count is zero, then
all functions are set to be traced, otherwise only the functions
in the hash are to be traced. The exception to this is if a function
is also in the ftrace_ops notrace_hash. Then that function's counter
is not incremented for this ftrace_ops.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   8 ++-
 kernel/trace/ftrace.c  | 158 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 148 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6658a51390fe..ab1c46e70bb6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -37,6 +37,7 @@ struct ftrace_ops {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
+	unsigned long			flags;
 #endif
 };
 
@@ -152,10 +153,13 @@ extern void unregister_ftrace_function_probe_all(char *glob);
 extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
-	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_ENABLED	= (1 << 1),
+	FTRACE_FL_ENABLED	= (1 << 30),
+	FTRACE_FL_FREE		= (1 << 31),
 };
 
+#define FTRACE_FL_MASK		(0x3UL << 30)
+#define FTRACE_REF_MAX		((1 << 30) - 1)
+
 struct dyn_ftrace {
 	union {
 		unsigned long		ip; /* address of mcount call-site */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46f08264980b..5dd332cc5aa8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -890,6 +890,10 @@ static const struct ftrace_hash empty_hash = {
 };
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
+enum {
+	FTRACE_OPS_FL_ENABLED		= 1,
+};
+
 struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.notrace_hash		= EMPTY_HASH,
@@ -1161,6 +1165,105 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 		}				\
 	}
 
+static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
+				     int filter_hash,
+				     bool inc)
+{
+	struct ftrace_hash *hash;
+	struct ftrace_hash *other_hash;
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+	int count = 0;
+	int all = 0;
+
+	/* Only update if the ops has been registered */
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return;
+
+	/*
+	 * In the filter_hash case:
+	 *   If the count is zero, we update all records.
+	 *   Otherwise we just update the items in the hash.
+	 *
+	 * In the notrace_hash case:
+	 *   We enable the update in the hash.
+	 *   As disabling notrace means enabling the tracing,
+	 *   and enabling notrace means disabling, the inc variable
+	 *   gets inversed.
+	 */
+	if (filter_hash) {
+		hash = ops->filter_hash;
+		other_hash = ops->notrace_hash;
+		if (!hash->count)
+			all = 1;
+	} else {
+		inc = !inc;
+		hash = ops->notrace_hash;
+		other_hash = ops->filter_hash;
+		/*
+		 * If the notrace hash has no items,
+		 * then there's nothing to do.
+		 */
+		if (!hash->count)
+			return;
+	}
+
+	do_for_each_ftrace_rec(pg, rec) {
+		int in_other_hash = 0;
+		int in_hash = 0;
+		int match = 0;
+
+		if (all) {
+			/*
+			 * Only the filter_hash affects all records.
+			 * Update if the record is not in the notrace hash.
+			 */
+			if (!ftrace_lookup_ip(other_hash, rec->ip))
+				match = 1;
+		} else {
+			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
+			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
+
+			/*
+			 *
+			 */
+			if (filter_hash && in_hash && !in_other_hash)
+				match = 1;
+			else if (!filter_hash && in_hash &&
+				 (in_other_hash || !other_hash->count))
+				match = 1;
+		}
+		if (!match)
+			continue;
+
+		if (inc) {
+			rec->flags++;
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
+				return;
+		} else {
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
+				return;
+			rec->flags--;
+		}
+		count++;
+		/* Shortcut, if we handled all records, we are done. */
+		if (!all && count == hash->count)
+			return;
+	} while_for_each_ftrace_rec();
+}
+
+static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
+				    int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 0);
+}
+
+static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
+				   int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 1);
+}
+
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
 	rec->freelist = ftrace_free_records;
@@ -1276,26 +1379,24 @@ int ftrace_text_reserved(void *start, void *end)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
-	struct ftrace_ops *ops = &global_ops;
 	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
 	ftrace_addr = (unsigned long)FTRACE_ADDR;
 
 	/*
-	 * If this record is not to be traced or we want to disable it,
-	 * then disable it.
+	 * If we are enabling tracing:
 	 *
-	 * If we want to enable it and filtering is off, then enable it.
+	 *   If the record has a ref count, then we need to enable it
+	 *   because someone is using it.
 	 *
-	 * If we want to enable it and filtering is on, enable it only if
-	 * it's filtered
+	 *   Otherwise we make sure its disabled.
+	 *
+	 * If we are disabling tracing, then disable all records that
+	 * are enabled.
 	 */
-	if (enable && !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) {
-		if (!ops->filter_hash->count ||
-		    ftrace_lookup_ip(ops->filter_hash, rec->ip))
-			flag = FTRACE_FL_ENABLED;
-	}
+	if (enable && (rec->flags & ~FTRACE_FL_MASK))
+		flag = FTRACE_FL_ENABLED;
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
@@ -1423,17 +1524,25 @@ static void ftrace_startup_enable(int command)
 
 static void ftrace_startup(int command)
 {
+	struct ftrace_ops *ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
 
+	ops->flags |= FTRACE_OPS_FL_ENABLED;
+	if (ftrace_start_up == 1)
+		ftrace_hash_rec_enable(ops, 1);
+
 	ftrace_startup_enable(command);
 }
 
 static void ftrace_shutdown(int command)
 {
+	struct ftrace_ops *ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1446,7 +1555,12 @@ static void ftrace_shutdown(int command)
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
 	if (!ftrace_start_up)
+		ftrace_hash_rec_disable(ops, 1);
+
+	if (!ftrace_start_up) {
 		command |= FTRACE_DISABLE_CALLS;
+		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	}
 
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
@@ -2668,6 +2782,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 	struct ftrace_iterator *iter;
 	struct ftrace_hash **orig_hash;
 	struct trace_parser *parser;
+	int filter_hash;
 	int ret;
 
 	mutex_lock(&ftrace_regex_lock);
@@ -2687,15 +2802,26 @@ ftrace_regex_release(struct inode *inode, struct file *file)
 	trace_parser_put(parser);
 
 	if (file->f_mode & FMODE_WRITE) {
-		if (iter->flags & FTRACE_ITER_NOTRACE)
-			orig_hash = &iter->ops->notrace_hash;
-		else
+		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
+
+		if (filter_hash)
 			orig_hash = &iter->ops->filter_hash;
+		else
+			orig_hash = &iter->ops->notrace_hash;
 
 		mutex_lock(&ftrace_lock);
+		/*
+		 * Remove the current set, update the hash and add
+		 * them back.
+		 */
+		ftrace_hash_rec_disable(iter->ops, filter_hash);
 		ret = ftrace_hash_move(orig_hash, iter->hash);
-		if (!ret && ftrace_start_up && ftrace_enabled)
-			ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+		if (!ret) {
+			ftrace_hash_rec_enable(iter->ops, filter_hash);
+			if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
+			    && ftrace_enabled)
+				ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+		}
 		mutex_unlock(&ftrace_lock);
 	}
 	free_ftrace_hash(iter->hash);
-- 
cgit v1.2.3


From b848914ce39589d89ee0078a6d1ef452b464729e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 4 May 2011 09:27:52 -0400
Subject: ftrace: Implement separate user function filtering

ftrace_ops that are registered to trace functions can now be
agnostic to each other in respect to what functions they trace.
Each ops has their own hash of the functions they want to trace
and a hash to what they do not want to trace. A empty hash for
the functions they want to trace denotes all functions should
be traced that are not in the notrace hash.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |   7 +-
 kernel/trace/ftrace.c             | 193 ++++++++++++++++++++++++++++++--------
 kernel/trace/trace_functions.c    |   2 +
 kernel/trace/trace_irqsoff.c      |   1 +
 kernel/trace/trace_sched_wakeup.c |   1 +
 kernel/trace/trace_stack.c        |   1 +
 6 files changed, 166 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ab1c46e70bb6..4609c0ece79a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,13 +31,18 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
 struct ftrace_hash;
 
+enum {
+	FTRACE_OPS_FL_ENABLED		= 1 << 0,
+	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
+};
+
 struct ftrace_ops {
 	ftrace_func_t			func;
 	struct ftrace_ops		*next;
+	unsigned long			flags;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
-	unsigned long			flags;
 #endif
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 92b6fdf49ae5..6c7e1df39b57 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -87,24 +87,29 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
 	.func		= ftrace_stub,
 };
 
-static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
+
 /*
- * Traverse the ftrace_list, invoking all entries.  The reason that we
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
  * can use rcu_dereference_raw() is that elements removed from this list
  * are simply leaked, so there is no need to interact with a grace-period
  * mechanism.  The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_list.
+ * concurrent insertions into the ftrace_global_list.
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
-static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_global_list_func(unsigned long ip,
+				    unsigned long parent_ip)
 {
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
+	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 
 	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip);
@@ -163,11 +168,11 @@ static void update_global_ops(void)
 	 * function directly. Otherwise, we need to iterate over the
 	 * registered callers.
 	 */
-	if (ftrace_list == &ftrace_list_end ||
-	    ftrace_list->next == &ftrace_list_end)
-		func = ftrace_list->func;
+	if (ftrace_global_list == &ftrace_list_end ||
+	    ftrace_global_list->next == &ftrace_list_end)
+		func = ftrace_global_list->func;
 	else
-		func = ftrace_list_func;
+		func = ftrace_global_list_func;
 
 	/* If we filter on pids, update to use the pid function */
 	if (!list_empty(&ftrace_pids)) {
@@ -184,7 +189,11 @@ static void update_ftrace_function(void)
 
 	update_global_ops();
 
-	func = global_ops.func;
+	if (ftrace_ops_list == &ftrace_list_end ||
+	    ftrace_ops_list->next == &ftrace_list_end)
+		func = ftrace_ops_list->func;
+	else
+		func = ftrace_ops_list_func;
 
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	ftrace_trace_function = func;
@@ -198,10 +207,10 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 {
 	ops->next = *list;
 	/*
-	 * We are entering ops into the ftrace_list but another
+	 * We are entering ops into the list but another
 	 * CPU might be walking that list. We need to make sure
 	 * the ops->next pointer is valid before another CPU sees
-	 * the ops pointer included into the ftrace_list.
+	 * the ops pointer included into the list.
 	 */
 	rcu_assign_pointer(*list, ops);
 }
@@ -238,7 +247,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (FTRACE_WARN_ON(ops == &global_ops))
 		return -EINVAL;
 
-	add_ftrace_ops(&ftrace_list, ops);
+	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return -EBUSY;
+
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		int first = ftrace_global_list == &ftrace_list_end;
+		add_ftrace_ops(&ftrace_global_list, ops);
+		ops->flags |= FTRACE_OPS_FL_ENABLED;
+		if (first)
+			add_ftrace_ops(&ftrace_ops_list, &global_ops);
+	} else
+		add_ftrace_ops(&ftrace_ops_list, ops);
+
 	if (ftrace_enabled)
 		update_ftrace_function();
 
@@ -252,12 +272,24 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (ftrace_disabled)
 		return -ENODEV;
 
+	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
+		return -EBUSY;
+
 	if (FTRACE_WARN_ON(ops == &global_ops))
 		return -EINVAL;
 
-	ret = remove_ftrace_ops(&ftrace_list, ops);
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ret = remove_ftrace_ops(&ftrace_global_list, ops);
+		if (!ret && ftrace_global_list == &ftrace_list_end)
+			ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
+		if (!ret)
+			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	} else
+		ret = remove_ftrace_ops(&ftrace_ops_list, ops);
+
 	if (ret < 0)
 		return ret;
+
 	if (ftrace_enabled)
 		update_ftrace_function();
 
@@ -928,10 +960,6 @@ static const struct ftrace_hash empty_hash = {
 };
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
-enum {
-	FTRACE_OPS_FL_ENABLED		= 1,
-};
-
 static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.notrace_hash		= EMPTY_HASH,
@@ -1189,6 +1217,40 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
 	return 0;
 }
 
+/*
+ * Test the hashes for this ops to see if we want to call
+ * the ops->func or not.
+ *
+ * It's a match if the ip is in the ops->filter_hash or
+ * the filter_hash does not exist or is empty,
+ *  AND
+ * the ip is not in the ops->notrace_hash.
+ */
+static int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	struct ftrace_hash *filter_hash;
+	struct ftrace_hash *notrace_hash;
+	int ret;
+
+	/* The hashes are freed with call_rcu_sched() */
+	preempt_disable_notrace();
+
+	filter_hash = rcu_dereference_raw(ops->filter_hash);
+	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
+
+	if ((!filter_hash || !filter_hash->count ||
+	     ftrace_lookup_ip(filter_hash, ip)) &&
+	    (!notrace_hash || !notrace_hash->count ||
+	     !ftrace_lookup_ip(notrace_hash, ip)))
+		ret = 1;
+	else
+		ret = 0;
+	preempt_enable_notrace();
+
+	return ret;
+}
+
 /*
  * This is a double for. Do not use 'break' to break out of the loop,
  * you must use a goto.
@@ -1232,7 +1294,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 	if (filter_hash) {
 		hash = ops->filter_hash;
 		other_hash = ops->notrace_hash;
-		if (!hash->count)
+		if (!hash || !hash->count)
 			all = 1;
 	} else {
 		inc = !inc;
@@ -1242,7 +1304,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 		 * If the notrace hash has no items,
 		 * then there's nothing to do.
 		 */
-		if (!hash->count)
+		if (hash && !hash->count)
 			return;
 	}
 
@@ -1256,11 +1318,11 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 * Only the filter_hash affects all records.
 			 * Update if the record is not in the notrace hash.
 			 */
-			if (!ftrace_lookup_ip(other_hash, rec->ip))
+			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
 				match = 1;
 		} else {
-			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
-			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
+			in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
+			in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
 
 			/*
 			 *
@@ -1546,6 +1608,7 @@ static void ftrace_run_update_code(int command)
 
 static ftrace_func_t saved_ftrace_func;
 static int ftrace_start_up;
+static int global_start_up;
 
 static void ftrace_startup_enable(int command)
 {
@@ -1562,14 +1625,25 @@ static void ftrace_startup_enable(int command)
 
 static void ftrace_startup(struct ftrace_ops *ops, int command)
 {
+	bool hash_enable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
 
+	/* ops marked global share the filter hashes */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		/* Don't update hash if global is already set */
+		if (global_start_up)
+			hash_enable = false;
+		global_start_up++;
+	}
+
 	ops->flags |= FTRACE_OPS_FL_ENABLED;
-	if (ftrace_start_up == 1)
+	if (hash_enable)
 		ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
@@ -1577,6 +1651,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command)
 
 static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 {
+	bool hash_disable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1588,13 +1664,25 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
-	if (!ftrace_start_up)
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		global_start_up--;
+		WARN_ON_ONCE(global_start_up < 0);
+		/* Don't update hash if global still has users */
+		if (global_start_up) {
+			WARN_ON_ONCE(!ftrace_start_up);
+			hash_disable = false;
+		}
+	}
+
+	if (hash_disable)
 		ftrace_hash_rec_disable(ops, 1);
 
-	if (!ftrace_start_up) {
-		command |= FTRACE_DISABLE_CALLS;
+	if (ops != &global_ops || !global_start_up)
 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
-	}
+
+	if (!ftrace_start_up)
+		command |= FTRACE_DISABLE_CALLS;
 
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
@@ -2381,6 +2469,7 @@ static int ftrace_probe_registered;
 
 static void __enable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (ftrace_probe_registered)
@@ -2395,13 +2484,16 @@ static void __enable_ftrace_function_probe(void)
 	if (i == FTRACE_FUNC_HASHSIZE)
 		return;
 
-	__register_ftrace_function(&trace_probe_ops);
-	ftrace_startup(&global_ops, 0);
+	ret = __register_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_startup(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 1;
 }
 
 static void __disable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (!ftrace_probe_registered)
@@ -2414,8 +2506,10 @@ static void __disable_ftrace_function_probe(void)
 	}
 
 	/* no more funcs left */
-	__unregister_ftrace_function(&trace_probe_ops);
-	ftrace_shutdown(&global_ops, 0);
+	ret = __unregister_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_shutdown(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 0;
 }
 
@@ -3319,8 +3413,28 @@ static inline void ftrace_startup_enable(int command) { }
 # define ftrace_shutdown(ops, command)	do { } while (0)
 # define ftrace_startup_sysctl()	do { } while (0)
 # define ftrace_shutdown_sysctl()	do { } while (0)
+
+static inline int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	return 1;
+}
+
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+{
+	/* see comment above ftrace_global_list_func */
+	struct ftrace_ops *op = rcu_dereference_raw(ftrace_ops_list);
+
+	while (op != &ftrace_list_end) {
+		if (ftrace_ops_test(op, ip))
+			op->func(ip, parent_ip);
+		op = rcu_dereference_raw(op->next);
+	};
+}
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
@@ -3621,7 +3735,9 @@ int register_ftrace_function(struct ftrace_ops *ops)
 		goto out_unlock;
 
 	ret = __register_ftrace_function(ops);
-	ftrace_startup(&global_ops, 0);
+	if (!ret)
+		ftrace_startup(ops, 0);
+
 
  out_unlock:
 	mutex_unlock(&ftrace_lock);
@@ -3640,7 +3756,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	mutex_lock(&ftrace_lock);
 	ret = __unregister_ftrace_function(ops);
-	ftrace_shutdown(&global_ops, 0);
+	if (!ret)
+		ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
 	return ret;
@@ -3670,11 +3787,11 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		ftrace_startup_sysctl();
 
 		/* we are starting ftrace again */
-		if (ftrace_list != &ftrace_list_end) {
-			if (ftrace_list->next == &ftrace_list_end)
-				ftrace_trace_function = ftrace_list->func;
+		if (ftrace_ops_list != &ftrace_list_end) {
+			if (ftrace_ops_list->next == &ftrace_list_end)
+				ftrace_trace_function = ftrace_ops_list->func;
 			else
-				ftrace_trace_function = ftrace_list_func;
+				ftrace_trace_function = ftrace_ops_list_func;
 		}
 
 	} else {
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 16aee4d44e8f..8d0e1cc4e974 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = function_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static struct ftrace_ops trace_stack_ops __read_mostly =
 {
 	.func = function_stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 /* Our two options */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a4969b47afc1..c77424be284d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -153,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = irqsoff_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 7319559ed59f..f029dd4fd2ca 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -129,6 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = wakeup_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4c5dead0c239..b0b53b8e4c25 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static ssize_t
-- 
cgit v1.2.3


From cdbe61bfe70440939e457fb4a8d0995eaaed17de Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 5 May 2011 21:14:55 -0400
Subject: ftrace: Allow dynamically allocated function tracers

Now that functions may be selected individually, it only makes sense
that we should allow dynamically allocated trace structures to
be traced. This will allow perf to allocate a ftrace_ops structure
at runtime and use it to pick and choose which functions that
structure will trace.

Note, a dynamically allocated ftrace_ops will always be called
indirectly instead of being called directly from the mcount in
entry.S. This is because there's no safe way to prevent mcount
from being preempted before calling the function, unless we
modify every entry.S to do so (not likely). Thus, dynamically allocated
functions will now be called by the ftrace_ops_list_func() that
loops through the ops that are allocated if there are more than
one op allocated at a time. This loop is protected with a
preempt_disable.

To determine if an ftrace_ops structure is allocated or not, a new
util function was added to the kernel/extable.c called
core_kernel_data(), which returns 1 if the address is between
_sdata and _edata.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  1 +
 include/linux/kernel.h |  1 +
 kernel/extable.c       |  8 ++++++++
 kernel/trace/ftrace.c  | 37 ++++++++++++++++++++++++++++++-------
 4 files changed, 40 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4609c0ece79a..caba694a62b6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -34,6 +34,7 @@ struct ftrace_hash;
 enum {
 	FTRACE_OPS_FL_ENABLED		= 1 << 0,
 	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
+	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
 };
 
 struct ftrace_ops {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 00cec4dc0ae2..f37ba716ef8b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 
 extern int core_kernel_text(unsigned long addr);
+extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
diff --git a/kernel/extable.c b/kernel/extable.c
index 7f8f263f8524..c2d625fcda77 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -72,6 +72,14 @@ int core_kernel_text(unsigned long addr)
 	return 0;
 }
 
+int core_kernel_data(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sdata &&
+	    addr < (unsigned long)_edata)
+		return 1;
+	return 0;
+}
+
 int __kernel_text_address(unsigned long addr)
 {
 	if (core_kernel_text(addr))
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6c7e1df39b57..5b3ee04e39d9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -189,8 +189,14 @@ static void update_ftrace_function(void)
 
 	update_global_ops();
 
+	/*
+	 * If we are at the end of the list and this ops is
+	 * not dynamic, then have the mcount trampoline call
+	 * the function directly
+	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
-	    ftrace_ops_list->next == &ftrace_list_end)
+	    (ftrace_ops_list->next == &ftrace_list_end &&
+	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
 		func = ftrace_ops_list->func;
 	else
 		func = ftrace_ops_list_func;
@@ -250,6 +256,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
+	if (!core_kernel_data((unsigned long)ops))
+		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
+
 	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
 		int first = ftrace_global_list == &ftrace_list_end;
 		add_ftrace_ops(&ftrace_global_list, ops);
@@ -293,6 +302,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (ftrace_enabled)
 		update_ftrace_function();
 
+	/*
+	 * Dynamic ops may be freed, we must make sure that all
+	 * callers are done before leaving this function.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+		synchronize_sched();
+
 	return 0;
 }
 
@@ -1225,6 +1241,9 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
  * the filter_hash does not exist or is empty,
  *  AND
  * the ip is not in the ops->notrace_hash.
+ *
+ * This needs to be called with preemption disabled as
+ * the hashes are freed with call_rcu_sched().
  */
 static int
 ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
@@ -1233,9 +1252,6 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 	struct ftrace_hash *notrace_hash;
 	int ret;
 
-	/* The hashes are freed with call_rcu_sched() */
-	preempt_disable_notrace();
-
 	filter_hash = rcu_dereference_raw(ops->filter_hash);
 	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
 
@@ -1246,7 +1262,6 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 		ret = 1;
 	else
 		ret = 0;
-	preempt_enable_notrace();
 
 	return ret;
 }
@@ -3425,14 +3440,20 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
-	/* see comment above ftrace_global_list_func */
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_ops_list);
+	struct ftrace_ops *op;
 
+	/*
+	 * Some of the ops may be dynamically allocated,
+	 * they must be freed after a synchronize_sched().
+	 */
+	preempt_disable_notrace();
+	op = rcu_dereference_raw(ftrace_ops_list);
 	while (op != &ftrace_list_end) {
 		if (ftrace_ops_test(op, ip))
 			op->func(ip, parent_ip);
 		op = rcu_dereference_raw(op->next);
 	};
+	preempt_enable_notrace();
 }
 
 static void clear_ftrace_swapper(void)
@@ -3743,6 +3764,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
 	mutex_unlock(&ftrace_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_function);
 
 /**
  * unregister_ftrace_function - unregister a function for profiling.
@@ -3762,6 +3784,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_function);
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From 936e074b286ae779f134312178dbab139ee7ea52 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 5 May 2011 22:54:01 -0400
Subject: ftrace: Modify ftrace_set_filter/notrace to take ops

Since users of the function tracer can now pick and choose which
functions they want to trace agnostically from other users of the
function tracer, we need to pass the ops struct to the ftrace_set_filter()
functions.

The functions ftrace_set_global_filter() and ftrace_set_global_notrace()
is added to keep the old filter functions which are used to modify
the generic function tracers.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h        |  7 ++++++-
 kernel/trace/ftrace.c         | 46 +++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/trace_selftest.c |  4 ++--
 3 files changed, 52 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index caba694a62b6..9d88e1cb5dbb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -179,7 +179,12 @@ struct dyn_ftrace {
 };
 
 int ftrace_force_update(void);
-void ftrace_set_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset);
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset);
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b3ee04e39d9..d017c2c82c44 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2826,6 +2826,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 	struct ftrace_hash *hash;
 	int ret;
 
+	/* All global ops uses the global ops filters */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
+		ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -2856,6 +2860,41 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 
 /**
  * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
+ * @buf - the string that holds the function filter text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled.
+ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ */
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset)
+{
+	ftrace_set_regex(ops, buf, len, reset, 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_filter);
+
+/**
+ * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
+ * @buf - the string that holds the function notrace text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Notrace Filters denote which functions should not be enabled when tracing
+ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
+ * for tracing.
+ */
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset)
+{
+	ftrace_set_regex(ops, buf, len, reset, 0);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_notrace);
+/**
+ * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
  * @buf - the string that holds the function filter text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2863,13 +2902,15 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
  * Filters denote which functions should be enabled when tracing is enabled.
  * If @buf is NULL and reset is set, all functions will be enabled for tracing.
  */
-void ftrace_set_filter(unsigned char *buf, int len, int reset)
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
 {
 	ftrace_set_regex(&global_ops, buf, len, reset, 1);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
 
 /**
  * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
  * @buf - the string that holds the function notrace text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2878,10 +2919,11 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  * is enabled. If @buf is NULL and reset is set, all functions will be enabled
  * for tracing.
  */
-void ftrace_set_notrace(unsigned char *buf, int len, int reset)
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset)
 {
 	ftrace_set_regex(&global_ops, buf, len, reset, 0);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
 
 /*
  * command line interface to allow users to set filters on boot up.
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 659732eba07c..0fa2db305b7c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -131,7 +131,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
 
 	/* filter only on our function */
-	ftrace_set_filter(func_name, strlen(func_name), 1);
+	ftrace_set_global_filter(func_name, strlen(func_name), 1);
 
 	/* enable tracing */
 	ret = tracer_init(trace, tr);
@@ -181,7 +181,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	tracer_enabled = save_tracer_enabled;
 
 	/* Enable tracing on all functions again */
-	ftrace_set_filter(NULL, 0, 1);
+	ftrace_set_global_filter(NULL, 0, 1);
 
 	return ret;
 }
-- 
cgit v1.2.3


From b4bc842802db3314f9a657094da0450a903ea619 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Mon, 7 Feb 2011 16:02:25 -0800
Subject: module: deal with alignment issues in built-in module versions

On m68k natural alignment is 2-byte boundary but we are trying to
align structures in __modver section on sizeof(void *) boundary.
This causes trouble when we try to access elements in this section
in array-like fashion when create "version" attributes for built-in
modules.

Moreover, as DaveM said, we can't reliably put structures into
independent objects, put them into a special section, and then expect
array access over them (via the section boundaries) after linking the
objects together to just "work" due to variable alignment choices in
different situations. The only solution that seems to work reliably
is to make an array of plain pointers to the objects in question and
put those pointers in the special section.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 10 +++++-----
 kernel/params.c        |  9 ++++++---
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 5de42043dff0..4cfebc211695 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -174,10 +174,7 @@ extern struct module __this_module;
 #define MODULE_VERSION(_version)					\
 	extern ssize_t __modver_version_show(struct module_attribute *,	\
 					     struct module *, char *);	\
-	static struct module_version_attribute __modver_version_attr	\
-	__used								\
-    __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \
-	= {								\
+	static struct module_version_attribute ___modver_attr = {	\
 		.mattr	= {						\
 			.attr	= {					\
 				.name	= "version",			\
@@ -187,7 +184,10 @@ extern struct module __this_module;
 		},							\
 		.module_name	= KBUILD_MODNAME,			\
 		.version	= _version,				\
-	}
+	};								\
+	static const struct module_version_attribute			\
+	__used __attribute__ ((__section__ ("__modver")))		\
+	* __moduleparam_const __modver_attr = &___modver_attr
 #endif
 
 /* Optional firmware file (or files) needed by the module
diff --git a/kernel/params.c b/kernel/params.c
index 7ab388a48a2e..28c5d5c83f6b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -821,15 +821,18 @@ ssize_t __modver_version_show(struct module_attribute *mattr,
 	return sprintf(buf, "%s\n", vattr->version);
 }
 
-extern struct module_version_attribute __start___modver[], __stop___modver[];
+extern const struct module_version_attribute *__start___modver[];
+extern const struct module_version_attribute *__stop___modver[];
 
 static void __init version_sysfs_builtin(void)
 {
-	const struct module_version_attribute *vattr;
+	const struct module_version_attribute **p;
 	struct module_kobject *mk;
 	int err;
 
-	for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
+	for (p = __start___modver; p < __stop___modver; p++) {
+		const struct module_version_attribute *vattr = *p;
+
 		mk = locate_module_kobject(vattr->module_name);
 		if (mk) {
 			err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
-- 
cgit v1.2.3


From 9b73a5840c7d5f77e5766626716df13787cb258c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Mon, 7 Feb 2011 16:02:27 -0800
Subject: module: do not hide __modver_version_show declaration behind ifdef

Doing so prevents the following warning from sparse:

  CHECK   kernel/params.c
kernel/params.c:817:9: warning: symbol '__modver_version_show' was not
declared. Should it be static?

since kernel/params.c is never compiled with MODULE being set.

Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 4cfebc211695..23996ad147e7 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -64,6 +64,9 @@ struct module_version_attribute {
 	const char *version;
 } __attribute__ ((__aligned__(sizeof(void *))));
 
+extern ssize_t __modver_version_show(struct module_attribute *,
+				     struct module *, char *);
+
 struct module_kobject
 {
 	struct kobject kobj;
@@ -172,8 +175,6 @@ extern struct module __this_module;
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 #else
 #define MODULE_VERSION(_version)					\
-	extern ssize_t __modver_version_show(struct module_attribute *,	\
-					     struct module *, char *);	\
 	static struct module_version_attribute ___modver_attr = {	\
 		.mattr	= {						\
 			.attr	= {					\
-- 
cgit v1.2.3


From a288bd651f4180c224cfddf837a0416157a36661 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 19 May 2011 16:55:25 -0600
Subject: module: remove 64 bit alignment padding from struct module with
 CONFIG_TRACE*

Reorder struct module to remove 24 bytes of alignment padding on 64 bit
builds when the CONFIG_TRACE options are selected. This allows the
structure to fit into one fewer cache lines, and its size drops from 592
to 568 on x86_64.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 23996ad147e7..65cc6cc73ca8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -368,34 +368,35 @@ struct module
 	struct module_notes_attrs *notes_attrs;
 #endif
 
+	/* The command line arguments (may be mangled).  People like
+	   keeping pointers to this stuff */
+	char *args;
+
 #ifdef CONFIG_SMP
 	/* Per-cpu data. */
 	void __percpu *percpu;
 	unsigned int percpu_size;
 #endif
 
-	/* The command line arguments (may be mangled).  People like
-	   keeping pointers to this stuff */
-	char *args;
 #ifdef CONFIG_TRACEPOINTS
-	struct tracepoint * const *tracepoints_ptrs;
 	unsigned int num_tracepoints;
+	struct tracepoint * const *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
 	unsigned int num_jump_entries;
 #endif
 #ifdef CONFIG_TRACING
-	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
+	const char **trace_bprintk_fmt_start;
 #endif
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-	unsigned long *ftrace_callsites;
 	unsigned int num_ftrace_callsites;
+	unsigned long *ftrace_callsites;
 #endif
 
 #ifdef CONFIG_MODULE_UNLOAD
-- 
cgit v1.2.3


From c5be0b2eb1ca05e0cd747f9c0ba552c6ee8827a0 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 19 May 2011 16:55:25 -0600
Subject: module: reorder kparam_array to remove alignment padding on 64 bit
 builds

Reorder structure kparam_array to remove 8 bytes of alignment padding on
64 bit builds, dropping its size from 40 to 32 bytes.

Also update the macro module_param_array_named to initialise the
structure using its member names to allow it to be changed without
touching all its call sites.

'git grep' finds module_param_array in 1037 places so this patch will
save a small amount of data space across many modules.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 07b41951e3fa..ddaae98c53f9 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -67,9 +67,9 @@ struct kparam_string {
 struct kparam_array
 {
 	unsigned int max;
+	unsigned int elemsize;
 	unsigned int *num;
 	const struct kernel_param_ops *ops;
-	unsigned int elemsize;
 	void *elem;
 };
 
@@ -371,8 +371,9 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
  */
 #define module_param_array_named(name, array, type, nump, perm)		\
 	static const struct kparam_array __param_arr_##name		\
-	= { ARRAY_SIZE(array), nump, &param_ops_##type,			\
-	    sizeof(array[0]), array };					\
+	= { .max = ARRAY_SIZE(array), .num = nump,                      \
+	    .ops = &param_ops_##type,					\
+	    .elemsize = sizeof(array[0]), .elem = array };		\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-- 
cgit v1.2.3


From de4d8d53465483168d6a627d409ee2d09d8e3308 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 19 Apr 2011 21:49:58 +0200
Subject: module: each_symbol_section instead of each_symbol

Instead of having a callback function for each symbol in the kernel,
have a callback for each array of symbols.

This eases the logic when we move to sorted symbols and binary search.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
---
 include/linux/module.h |  5 +++--
 kernel/module.c        | 42 +++++++++++++++++++++++++++---------------
 2 files changed, 30 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 65cc6cc73ca8..49f4ad0ddec2 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -477,8 +477,9 @@ const struct kernel_symbol *find_symbol(const char *name,
 					bool warn);
 
 /* Walk the exported symbol table */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data);
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data), void *data);
 
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
diff --git a/kernel/module.c b/kernel/module.c
index 0e6f97f43c88..e8aa462301e7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -240,23 +240,24 @@ static bool each_symbol_in_section(const struct symsearch *arr,
 				   struct module *owner,
 				   bool (*fn)(const struct symsearch *syms,
 					      struct module *owner,
-					      unsigned int symnum, void *data),
+					      void *data),
 				   void *data)
 {
-	unsigned int i, j;
+	unsigned int j;
 
 	for (j = 0; j < arrsize; j++) {
-		for (i = 0; i < arr[j].stop - arr[j].start; i++)
-			if (fn(&arr[j], owner, i, data))
-				return true;
+		if (fn(&arr[j], owner, data))
+			return true;
 	}
 
 	return false;
 }
 
 /* Returns true as soon as fn returns true, otherwise false. */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data)
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data),
+			 void *data)
 {
 	struct module *mod;
 	static const struct symsearch arr[] = {
@@ -309,7 +310,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
 	}
 	return false;
 }
-EXPORT_SYMBOL_GPL(each_symbol);
+EXPORT_SYMBOL_GPL(each_symbol_section);
 
 struct find_symbol_arg {
 	/* Input */
@@ -323,15 +324,12 @@ struct find_symbol_arg {
 	const struct kernel_symbol *sym;
 };
 
-static bool find_symbol_in_section(const struct symsearch *syms,
-				   struct module *owner,
-				   unsigned int symnum, void *data)
+static bool check_symbol(const struct symsearch *syms,
+				 struct module *owner,
+				 unsigned int symnum, void *data)
 {
 	struct find_symbol_arg *fsa = data;
 
-	if (strcmp(syms->start[symnum].name, fsa->name) != 0)
-		return false;
-
 	if (!fsa->gplok) {
 		if (syms->licence == GPL_ONLY)
 			return false;
@@ -365,6 +363,20 @@ static bool find_symbol_in_section(const struct symsearch *syms,
 	return true;
 }
 
+static bool find_symbol_in_section(const struct symsearch *syms,
+				   struct module *owner,
+				   void *data)
+{
+	struct find_symbol_arg *fsa = data;
+	unsigned int i;
+
+	for (i = 0; i < syms->stop - syms->start; i++) {
+		if (strcmp(syms->start[i].name, fsa->name) == 0)
+			return check_symbol(syms, owner, i, data);
+	}
+	return false;
+}
+
 /* Find a symbol and return it, along with, (optional) crc and
  * (optional) module which owns it.  Needs preempt disabled or module_mutex. */
 const struct kernel_symbol *find_symbol(const char *name,
@@ -379,7 +391,7 @@ const struct kernel_symbol *find_symbol(const char *name,
 	fsa.gplok = gplok;
 	fsa.warn = warn;
 
-	if (each_symbol(find_symbol_in_section, &fsa)) {
+	if (each_symbol_section(find_symbol_in_section, &fsa)) {
 		if (owner)
 			*owner = fsa.owner;
 		if (crc)
-- 
cgit v1.2.3


From f02e8a6596b7dc9b2171f7ff5654039ef0950cdc Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@kernel.org>
Date: Thu, 14 Apr 2011 14:59:39 +0200
Subject: module: Sort exported symbols

This patch places every exported symbol in its own section
(i.e. "___ksymtab+printk").  Thus the linker will use its SORT() directive
to sort and finally merge all symbol in the right and final section
(i.e. "__ksymtab").

The symbol prefixed archs use an underscore as prefix for symbols.
To avoid collision we use a different character to create the temporary
section names.

This work was supported by a hardware donation from the CE Linux Forum.

Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (folded in '+' fixup)
Tested-by: Dirk Behme <dirk.behme@googlemail.com>
---
 include/asm-generic/vmlinux.lds.h | 20 ++++++++++----------
 include/linux/module.h            |  4 ++--
 scripts/module-common.lds         | 11 +++++++++++
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index bd297a20ab98..b27445e00b6c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -274,70 +274,70 @@
 	/* Kernel symbol table: Normal symbols */			\
 	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___ksymtab) = .;			\
-		*(__ksymtab)						\
+		*(SORT(___ksymtab+*))					\
 		VMLINUX_SYMBOL(__stop___ksymtab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;		\
-		*(__ksymtab_gpl)					\
+		*(SORT(___ksymtab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;		\
-		*(__ksymtab_unused)					\
+		*(SORT(___ksymtab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;	\
-		*(__ksymtab_unused_gpl)					\
+		*(SORT(___ksymtab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;	\
-		*(__ksymtab_gpl_future)					\
+		*(SORT(___ksymtab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: Normal symbols */			\
 	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___kcrctab) = .;			\
-		*(__kcrctab)						\
+		*(SORT(___kcrctab+*))					\
 		VMLINUX_SYMBOL(__stop___kcrctab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;		\
-		*(__kcrctab_gpl)					\
+		*(SORT(___kcrctab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;		\
-		*(__kcrctab_unused)					\
+		*(SORT(___kcrctab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;	\
-		*(__kcrctab_unused_gpl)					\
+		*(SORT(___kcrctab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;	\
-		*(__kcrctab_gpl_future)					\
+		*(SORT(___kcrctab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
 	}								\
 									\
diff --git a/include/linux/module.h b/include/linux/module.h
index 49f4ad0ddec2..d9ca2d5dc6d0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -224,7 +224,7 @@ struct module_use {
 	extern void *__crc_##sym __attribute__((weak));		\
 	static const unsigned long __kcrctab_##sym		\
 	__used							\
-	__attribute__((section("__kcrctab" sec), unused))	\
+	__attribute__((section("___kcrctab" sec "+" #sym), unused))	\
 	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)
@@ -239,7 +239,7 @@ struct module_use {
 	= MODULE_SYMBOL_PREFIX #sym;                    	\
 	static const struct kernel_symbol __ksymtab_##sym	\
 	__used							\
-	__attribute__((section("__ksymtab" sec), unused))	\
+	__attribute__((section("___ksymtab" sec "+" #sym), unused))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
 #define EXPORT_SYMBOL(sym)					\
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 47a1f9ae0ede..0865b3e752be 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -5,4 +5,15 @@
  */
 SECTIONS {
 	/DISCARD/ : { *(.discard) }
+
+	__ksymtab		: { *(SORT(___ksymtab+*)) }
+	__ksymtab_gpl		: { *(SORT(___ksymtab_gpl+*)) }
+	__ksymtab_unused	: { *(SORT(___ksymtab_unused+*)) }
+	__ksymtab_unused_gpl	: { *(SORT(___ksymtab_unused_gpl+*)) }
+	__ksymtab_gpl_future	: { *(SORT(___ksymtab_gpl_future+*)) }
+	__kcrctab		: { *(SORT(___kcrctab+*)) }
+	__kcrctab_gpl		: { *(SORT(___kcrctab_gpl+*)) }
+	__kcrctab_unused	: { *(SORT(___kcrctab_unused+*)) }
+	__kcrctab_unused_gpl	: { *(SORT(___kcrctab_unused_gpl+*)) }
+	__kcrctab_gpl_future	: { *(SORT(___kcrctab_gpl_future+*)) }
 }
-- 
cgit v1.2.3


From 1a94dc35bc5c166d89913dc01a49d27a3c21a455 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Thu, 14 Apr 2011 20:00:19 +0200
Subject: lib: Add generic binary search function to the kernel.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There a large number hand-coded binary searches in the kernel (run
"git grep search | grep binary" to find many of them).  Since in my
experience, hand-coding binary searches can be error-prone, it seems
worth cleaning this up by providing a generic binary search function.

This generic binary search implementation comes from Ksplice.  It has
the same basic API as the C library bsearch() function.  Ksplice uses
it in half a dozen places with 4 different comparison functions, and I
think our code is substantially cleaner because of this.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Extra-bikeshedding-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Extra-bikeshedding-by: André Goddard Rosa <andre.goddard@gmail.com>
Extra-bikeshedding-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/bsearch.h |  9 +++++++++
 lib/Makefile            |  3 ++-
 lib/bsearch.c           | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bsearch.h
 create mode 100644 lib/bsearch.c

(limited to 'include/linux')

diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h
new file mode 100644
index 000000000000..90b1aa867224
--- /dev/null
+++ b/include/linux/bsearch.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_BSEARCH_H
+#define _LINUX_BSEARCH_H
+
+#include <linux/types.h>
+
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt));
+
+#endif /* _LINUX_BSEARCH_H */
diff --git a/lib/Makefile b/lib/Makefile
index ef0f28571156..4b49a249064b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,8 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
+	 bsearch.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
diff --git a/lib/bsearch.c b/lib/bsearch.c
new file mode 100644
index 000000000000..5b54758e2afb
--- /dev/null
+++ b/lib/bsearch.c
@@ -0,0 +1,53 @@
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/bsearch.h>
+
+/*
+ * bsearch - binary search an array of elements
+ * @key: pointer to item being searched for
+ * @base: pointer to first element to search
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ *
+ * This function does a binary search on the given array.  The
+ * contents of the array should already be in ascending sorted order
+ * under the provided comparison function.
+ *
+ * Note that the key need not have the same type as the elements in
+ * the array, e.g. key could be a string and the comparison function
+ * could compare the string with the struct's name field.  However, if
+ * the key and elements in the array are of the same type, you can use
+ * the same comparison function for both sort() and bsearch().
+ */
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt))
+{
+	size_t start = 0, end = num;
+	int result;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = cmp(key, base + mid * size);
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return (void *)base + mid * size;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(bsearch);
-- 
cgit v1.2.3


From d0f1fed29e6e73d9d17f4c91a5896a4ce3938d45 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 19 Apr 2011 12:43:45 +0100
Subject: Add a strtobool function matching semantics of existing in kernel
 equivalents

This is a rename of the usr_strtobool proposal, which was a renamed,
relocated and fixed version of previous kstrtobool RFC

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/string.h |  1 +
 lib/string.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index a716ee2a8adb..a176db2f2c85 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
+extern int strtobool(const char *s, bool *res);
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit v1.2.3


From b3ae52b6b0335eba547221aad2cb3c50902e3d2d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:30 +0200
Subject: SSB: Change fallback sprom to callback mechanism.

Some embedded devices like the Netgear WNDR3300 have two SSB based cards
without an own sprom on the pci bus. We have to provide two different
fallback sproms for these and this was not possible with the old solution.
In the bcm47xx architecture the sprom data is stored in the nvram in the
main flash storage. The architecture code will be able to fill the sprom
with the stored data based on the bus where the device was found.

The bcm63xx code should do the same thing as before, just using the new
API.

Acked-by: Michael Buesch <mb@bu3sch.de>
Cc: netdev@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2362/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/boards/board_bcm963xx.c | 16 ++++++++++--
 drivers/ssb/pci.c                         | 16 ++++++++----
 drivers/ssb/sprom.c                       | 43 ++++++++++++++++++-------------
 drivers/ssb/ssb_private.h                 |  3 ++-
 include/linux/ssb/ssb.h                   |  4 ++-
 5 files changed, 55 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 8dba8cfb752f..40b223b603be 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = {
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -793,8 +804,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 6f34963b3c64..7ad48585c5e6 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -662,7 +662,6 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
-	const struct ssb_sprom *fallback;
 	int err;
 	u16 *buf;
 
@@ -707,10 +706,17 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		if (err) {
 			/* All CRC attempts failed.
 			 * Maybe there is no SPROM on the device?
-			 * If we have a fallback, use that. */
-			fallback = ssb_get_fallback_sprom();
-			if (fallback) {
-				memcpy(sprom, fallback, sizeof(*sprom));
+			 * Now we ask the arch code if there is some sprom
+			 * available for this device in some other storage */
+			err = ssb_fill_sprom_with_fallback(bus, sprom);
+			if (err) {
+				ssb_printk(KERN_WARNING PFX "WARNING: Using"
+					   " fallback SPROM failed (err %d)\n",
+					   err);
+			} else {
+				ssb_dprintk(KERN_DEBUG PFX "Using SPROM"
+					    " revision %d provided by"
+					    " platform.\n", sprom->revision);
 				err = 0;
 				goto out_free;
 			}
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 5f34d7a3e3a5..45ff0e3a3828 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 
 
-static const struct ssb_sprom *fallback_sprom;
+static int(*get_fallback_sprom)(struct ssb_bus *dev, struct ssb_sprom *out);
 
 
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
@@ -145,36 +145,43 @@ out:
 }
 
 /**
- * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ * ssb_arch_register_fallback_sprom - Registers a method providing a
+ * fallback SPROM if no SPROM is found.
  *
- * @sprom: The SPROM data structure to register.
+ * @sprom_callback: The callback function.
  *
- * With this function the architecture implementation may register a fallback
- * SPROM data structure. The fallback is only used for PCI based SSB devices,
- * where no valid SPROM can be found in the shadow registers.
+ * With this function the architecture implementation may register a
+ * callback handler which fills the SPROM data structure. The fallback is
+ * only used for PCI based SSB devices, where no valid SPROM can be found
+ * in the shadow registers.
  *
- * This function is useful for weird architectures that have a half-assed SSB device
- * hardwired to their PCI bus.
+ * This function is useful for weird architectures that have a half-assed
+ * SSB device hardwired to their PCI bus.
  *
- * Note that it does only work with PCI attached SSB devices. PCMCIA devices currently
- * don't use this fallback.
- * Architectures must provide the SPROM for native SSB devices anyway,
- * so the fallback also isn't used for native devices.
+ * Note that it does only work with PCI attached SSB devices. PCMCIA
+ * devices currently don't use this fallback.
+ * Architectures must provide the SPROM for native SSB devices anyway, so
+ * the fallback also isn't used for native devices.
  *
- * This function is available for architecture code, only. So it is not exported.
+ * This function is available for architecture code, only. So it is not
+ * exported.
  */
-int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+int ssb_arch_register_fallback_sprom(int (*sprom_callback)(struct ssb_bus *bus,
+				     struct ssb_sprom *out))
 {
-	if (fallback_sprom)
+	if (get_fallback_sprom)
 		return -EEXIST;
-	fallback_sprom = sprom;
+	get_fallback_sprom = sprom_callback;
 
 	return 0;
 }
 
-const struct ssb_sprom *ssb_get_fallback_sprom(void)
+int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 {
-	return fallback_sprom;
+	if (!get_fallback_sprom)
+		return -ENOENT;
+
+	return get_fallback_sprom(bus, out);
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 0331139a726f..77653014db0b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -171,7 +171,8 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
-extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
+extern int ssb_fill_sprom_with_fallback(struct ssb_bus *bus,
+					struct ssb_sprom *out);
 
 
 /* core.c */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 9659eff52ca2..045f72ab5dfd 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struct ssb_bus *bus);
 
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
-extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+extern int ssb_arch_register_fallback_sprom(
+		int (*sprom_callback)(struct ssb_bus *bus,
+		struct ssb_sprom *out));
 
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
-- 
cgit v1.2.3


From 369db4c9524b7487faf1ff89646eee396c1363e1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:40 +0000
Subject: clocksource: Restructure clocksource struct members

Group the hot path members of struct clocksource together so we have a
better cache line footprint. Make it cacheline aligned.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.003081882%40linutronix.de%3E
---
 include/linux/clocksource.h | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0fb0b7e79394..c918fbd33ee5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -159,42 +159,38 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  */
 struct clocksource {
 	/*
-	 * First part of structure is read mostly
+	 * Hotpath data, fits in a single cache line when the
+	 * clocksource itself is cacheline aligned.
 	 */
-	const char *name;
-	struct list_head list;
-	int rating;
 	cycle_t (*read)(struct clocksource *cs);
-	int (*enable)(struct clocksource *cs);
-	void (*disable)(struct clocksource *cs);
+	cycle_t cycle_last;
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-	unsigned long flags;
-	cycle_t (*vread)(void);
-	void (*suspend)(struct clocksource *cs);
-	void (*resume)(struct clocksource *cs);
+
 #ifdef CONFIG_IA64
 	void *fsys_mmio;        /* used by fsyscall asm code */
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
 #else
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
 #endif
-
-	/*
-	 * Second part is written at each timer interrupt
-	 * Keep it in a different cache line to dirty no
-	 * more than one cache line.
-	 */
-	cycle_t cycle_last ____cacheline_aligned_in_smp;
+	const char *name;
+	struct list_head list;
+	int rating;
+	cycle_t (*vread)(void);
+	int (*enable)(struct clocksource *cs);
+	void (*disable)(struct clocksource *cs);
+	unsigned long flags;
+	void (*suspend)(struct clocksource *cs);
+	void (*resume)(struct clocksource *cs);
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
 	cycle_t wd_last;
 #endif
-};
+} ____cacheline_aligned;
 
 /*
  * Clock source flags bits::
-- 
cgit v1.2.3


From 847b2f42be203f3cff7f243fdd3ee50c1e06c882 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:41 +0000
Subject: clockevents: Restructure clock_event_device members

Group the hot path members of struct clock_event_device together so we
have a better cache line footprint. Make it cacheline aligned.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.223607682%40linutronix.de%3E
---
 include/linux/clockchips.h | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index fc53492b6ad7..9466eebc8e19 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -56,46 +56,47 @@ enum clock_event_nofitiers {
 
 /**
  * struct clock_event_device - clock event device descriptor
- * @name:		ptr to clock event name
- * @features:		features
+ * @event_handler:	Assigned by the framework to be called by the low
+ *			level handler of the event source
+ * @set_next_event:	set next event function
+ * @next_event:		local storage for the next event in oneshot mode
  * @max_delta_ns:	maximum delta value in ns
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
+ * @mode:		operating mode assigned by the management code
+ * @features:		features
+ * @retries:		number of forced programming retries
+ * @set_mode:		set mode function
+ * @broadcast:		function to broadcast events
+ * @name:		ptr to clock event name
  * @rating:		variable to rate clock event devices
  * @irq:		IRQ number (only for non CPU local devices)
  * @cpumask:		cpumask to indicate for which CPUs this device works
- * @set_next_event:	set next event function
- * @set_mode:		set mode function
- * @event_handler:	Assigned by the framework to be called by the low
- *			level handler of the event source
- * @broadcast:		function to broadcast events
  * @list:		list head for the management code
- * @mode:		operating mode assigned by the management code
- * @next_event:		local storage for the next event in oneshot mode
- * @retries:		number of forced programming retries
  */
 struct clock_event_device {
-	const char		*name;
-	unsigned int		features;
+	void			(*event_handler)(struct clock_event_device *);
+	int			(*set_next_event)(unsigned long evt,
+						  struct clock_event_device *);
+	ktime_t			next_event;
 	u64			max_delta_ns;
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
+	enum clock_event_mode	mode;
+	unsigned int		features;
+	unsigned long		retries;
+
+	void			(*broadcast)(const struct cpumask *mask);
+	void			(*set_mode)(enum clock_event_mode mode,
+					    struct clock_event_device *);
+	const char		*name;
 	int			rating;
 	int			irq;
 	const struct cpumask	*cpumask;
-	int			(*set_next_event)(unsigned long evt,
-						  struct clock_event_device *);
-	void			(*set_mode)(enum clock_event_mode mode,
-					    struct clock_event_device *);
-	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
-	enum clock_event_mode	mode;
-	ktime_t			next_event;
-	unsigned long		retries;
-};
+} ____cacheline_aligned;
 
 /*
  * Calculate a multiplication factor for scaled math, which is used to convert
-- 
cgit v1.2.3


From 57f0fcbe1dea8a36c9d1673086326059991c5f81 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:41 +0000
Subject: clockevents: Provide combined configure and register function

All clockevent devices have the same open coded initialization
functions. Provide an interface which does all necessary
initialization in the core code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.331975870%40linutronix.de%3E
---
 include/linux/clockchips.h |  9 +++++++++
 kernel/time/clockevents.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9466eebc8e19..80acc79e0dc5 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -69,6 +69,8 @@ enum clock_event_nofitiers {
  * @retries:		number of forced programming retries
  * @set_mode:		set mode function
  * @broadcast:		function to broadcast events
+ * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
+ * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
  * @name:		ptr to clock event name
  * @rating:		variable to rate clock event devices
  * @irq:		IRQ number (only for non CPU local devices)
@@ -91,6 +93,9 @@ struct clock_event_device {
 	void			(*broadcast)(const struct cpumask *mask);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
+	unsigned long		min_delta_ticks;
+	unsigned long		max_delta_ticks;
+
 	const char		*name;
 	int			rating;
 	int			irq;
@@ -123,6 +128,10 @@ extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
+extern void clockevents_config_and_register(struct clock_event_device *dev,
+					    u32 freq, unsigned long min_delta,
+					    unsigned long max_delta);
+
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0d74b9ba90c8..c69e88c94446 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -194,6 +194,50 @@ void clockevents_register_device(struct clock_event_device *dev)
 }
 EXPORT_SYMBOL_GPL(clockevents_register_device);
 
+static void clockevents_config(struct clock_event_device *dev,
+			       u32 freq)
+{
+	unsigned long sec;
+
+	if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return;
+
+	/*
+	 * Calculate the maximum number of seconds we can sleep. Limit
+	 * to 10 minutes for hardware which can program more than
+	 * 32bit ticks so we still get reasonable conversion values.
+	 */
+	sec = dev->max_delta_ticks;
+	do_div(sec, freq);
+	if (!sec)
+		sec = 1;
+	else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
+		sec = 600;
+
+	clockevents_calc_mult_shift(dev, freq, sec);
+	dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
+	dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+}
+
+/**
+ * clockevents_config_and_register - Configure and register a clock event device
+ * @dev:	device to register
+ * @freq:	The clock frequency
+ * @min_delta:	The minimum clock ticks to program in oneshot mode
+ * @max_delta:	The maximum clock ticks to program in oneshot mode
+ *
+ * min/max_delta can be 0 for devices which do not support oneshot mode.
+ */
+void clockevents_config_and_register(struct clock_event_device *dev,
+				     u32 freq, unsigned long min_delta,
+				     unsigned long max_delta)
+{
+	dev->min_delta_ticks = min_delta;
+	dev->max_delta_ticks = max_delta;
+	clockevents_config(dev, freq);
+	clockevents_register_device(dev);
+}
+
 /*
  * Noop handler when we shut down an event device
  */
-- 
cgit v1.2.3


From 80b816b736cfa5b9582279127099b20a479ab7d9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:42 +0000
Subject: clockevents: Provide interface to reconfigure an active clock event
 device

Some ARM SoCs have clock event devices which have their frequency
modified due to frequency scaling. Provide an interface which allows
to reconfigure an active device. After reconfiguration reprogram the
current pending event.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: LAK <linux-arm-kernel@lists.infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.437459958%40linutronix.de%3E
---
 include/linux/clockchips.h |  2 ++
 kernel/time/clockevents.c  | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 80acc79e0dc5..d6733e27af34 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,8 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
 					    u32 freq, unsigned long min_delta,
 					    unsigned long max_delta);
 
+extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
+
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c69e88c94446..22a9da9a9c96 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -238,6 +238,26 @@ void clockevents_config_and_register(struct clock_event_device *dev,
 	clockevents_register_device(dev);
 }
 
+/**
+ * clockevents_update_freq - Update frequency and reprogram a clock event device.
+ * @dev:	device to modify
+ * @freq:	new device frequency
+ *
+ * Reconfigure and reprogram a clock event device in oneshot
+ * mode. Must be called on the cpu for which the device delivers per
+ * cpu timer events with interrupts disabled!  Returns 0 on success,
+ * -ETIME when the event is in the past.
+ */
+int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
+{
+	clockevents_config(dev, freq);
+
+	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+		return 0;
+
+	return clockevents_program_event(dev, dev->next_event, ktime_get());
+}
+
 /*
  * Noop handler when we shut down an event device
  */
-- 
cgit v1.2.3


From 99a0378de9f887fd4d501f1baa50aaf16d01a8e8 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:36 -0400
Subject: hwmon: (sht15) general code clean-up

* Add a documentation file for the device.
* Respect a bit more the kernel-doc syntax.
* Rename some variables for clarity.
* Use bool type for flags.
* Use an enum for states (actions being done).

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |  42 ++++++++
 drivers/hwmon/sht15.c     | 268 ++++++++++++++++++++++++++--------------------
 include/linux/sht15.h     |  12 ++-
 3 files changed, 203 insertions(+), 119 deletions(-)
 create mode 100644 Documentation/hwmon/sht15

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
new file mode 100644
index 000000000000..2919c516fdbc
--- /dev/null
+++ b/Documentation/hwmon/sht15
@@ -0,0 +1,42 @@
+Kernel driver sht15
+===================
+
+Authors:
+  * Wouter Horre
+  * Jonathan Cameron
+
+Supported chips:
+  * Sensirion SHT10
+    Prefix: 'sht10'
+
+  * Sensirion SHT11
+    Prefix: 'sht11'
+
+  * Sensirion SHT15
+    Prefix: 'sht15'
+
+  * Sensirion SHT71
+    Prefix: 'sht71'
+
+  * Sensirion SHT75
+    Prefix: 'sht75'
+
+Datasheet: Publicly available at the Sensirion website
+http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
+
+Description
+-----------
+
+The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature
+sensors.
+
+The devices communicate using two GPIO lines and use the default
+resolution settings of 14 bits for temperature and 12 bits for humidity.
+
+Note: The regulator supply name is set to "vcc".
+
+Sysfs interface
+---------------
+
+* temp1_input:     temperature input
+* humidity1_input: humidity input
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index f4e617adb220..080af75c517b 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -9,16 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Currently ignoring checksum on readings.
- * Default resolution only (14bit temp, 12bit humidity)
- * Ignoring battery status.
- * Heater not enabled.
- * Timings are all conservative.
- *
- * Data sheet available (1/2009) at
- * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
- *
- * Regulator supply name = vcc
+ * For further information, see the Documentation/hwmon/sht15 file.
  */
 
 #include <linux/interrupt.h>
@@ -39,17 +30,21 @@
 #include <linux/slab.h>
 #include <asm/atomic.h>
 
-#define SHT15_MEASURE_TEMP	3
-#define SHT15_MEASURE_RH	5
+/* Commands */
+#define SHT15_MEASURE_TEMP		0x03
+#define SHT15_MEASURE_RH		0x05
 
-#define SHT15_READING_NOTHING	0
-#define SHT15_READING_TEMP	1
-#define SHT15_READING_HUMID	2
+/* Min timings */
+#define SHT15_TSCKL			100	/* (nsecs) clock low */
+#define SHT15_TSCKH			100	/* (nsecs) clock high */
+#define SHT15_TSU			150	/* (nsecs) data setup time */
 
-/* Min timings in nsecs */
-#define SHT15_TSCKL		100	/* clock low */
-#define SHT15_TSCKH		100	/* clock high */
-#define SHT15_TSU		150	/* data setup time */
+/* Actions the driver may be doing */
+enum sht15_state {
+	SHT15_READING_NOTHING,
+	SHT15_READING_TEMP,
+	SHT15_READING_HUMID
+};
 
 /**
  * struct sht15_temppair - elements of voltage dependent temp calc
@@ -61,9 +56,7 @@ struct sht15_temppair {
 	int d1;
 };
 
-/* Table 9 from data sheet - relates temperature calculation
- * to supply voltage.
- */
+/* Table 9 from datasheet - relates temperature calculation to supply voltage */
 static const struct sht15_temppair temppoints[] = {
 	{ 2500000, -39400 },
 	{ 3000000, -39600 },
@@ -74,27 +67,27 @@ static const struct sht15_temppair temppoints[] = {
 
 /**
  * struct sht15_data - device instance specific data
- * @pdata:	platform data (gpio's etc)
- * @read_work:	bh of interrupt handler
- * @wait_queue:	wait queue for getting values from device
- * @val_temp:	last temperature value read from device
- * @val_humid: 	last humidity value read from device
- * @flag:	status flag used to identify what the last request was
- * @valid:	are the current stored values valid (start condition)
- * @last_updat:	time of last update
- * @read_lock:	mutex to ensure only one read in progress
- *		at a time.
- * @dev:	associate device structure
- * @hwmon_dev:	device associated with hwmon subsystem
- * @reg:	associated regulator (if specified)
- * @nb:		notifier block to handle notifications of voltage changes
- * @supply_uV:	local copy of supply voltage used to allow
- *		use of regulator consumer if available
- * @supply_uV_valid:   indicates that an updated value has not yet
- *		been obtained from the regulator and so any calculations
- *		based upon it will be invalid.
- * @update_supply_work:	work struct that is used to update the supply_uV
- * @interrupt_handled:	flag used to indicate a hander has been scheduled
+ * @pdata:		platform data (gpio's etc).
+ * @read_work:		bh of interrupt handler.
+ * @wait_queue:		wait queue for getting values from device.
+ * @val_temp:		last temperature value read from device.
+ * @val_humid:		last humidity value read from device.
+ * @state:		state identifying the action the driver is doing.
+ * @measurements_valid:	are the current stored measures valid (start condition).
+ * @last_measurement:	time of last measure.
+ * @read_lock:		mutex to ensure only one read in progress at a time.
+ * @dev:		associate device structure.
+ * @hwmon_dev:		device associated with hwmon subsystem.
+ * @reg:		associated regulator (if specified).
+ * @nb:			notifier block to handle notifications of voltage
+ *                      changes.
+ * @supply_uV:		local copy of supply voltage used to allow use of
+ *                      regulator consumer if available.
+ * @supply_uV_valid:	indicates that an updated value has not yet been
+ *			obtained from the regulator and so any calculations
+ *			based upon it will be invalid.
+ * @update_supply_work:	work struct that is used to update the supply_uV.
+ * @interrupt_handled:	flag used to indicate a handler has been scheduled.
  */
 struct sht15_data {
 	struct sht15_platform_data	*pdata;
@@ -102,16 +95,16 @@ struct sht15_data {
 	wait_queue_head_t		wait_queue;
 	uint16_t			val_temp;
 	uint16_t			val_humid;
-	u8				flag;
-	u8				valid;
-	unsigned long			last_updat;
+	enum sht15_state		state;
+	bool				measurements_valid;
+	unsigned long			last_measurement;
 	struct mutex			read_lock;
 	struct device			*dev;
 	struct device			*hwmon_dev;
 	struct regulator		*reg;
 	struct notifier_block		nb;
 	int				supply_uV;
-	int				supply_uV_valid;
+	bool				supply_uV_valid;
 	struct work_struct		update_supply_work;
 	atomic_t			interrupt_handled;
 };
@@ -125,6 +118,7 @@ struct sht15_data {
 static void sht15_connection_reset(struct sht15_data *data)
 {
 	int i;
+
 	gpio_direction_output(data->pdata->gpio_data, 1);
 	ndelay(SHT15_TSCKL);
 	gpio_set_value(data->pdata->gpio_sck, 0);
@@ -136,14 +130,14 @@ static void sht15_connection_reset(struct sht15_data *data)
 		ndelay(SHT15_TSCKL);
 	}
 }
+
 /**
  * sht15_send_bit() - send an individual bit to the device
  * @data:	device state data
  * @val:	value of bit to be sent
- **/
+ */
 static inline void sht15_send_bit(struct sht15_data *data, int val)
 {
-
 	gpio_set_value(data->pdata->gpio_data, val);
 	ndelay(SHT15_TSU);
 	gpio_set_value(data->pdata->gpio_sck, 1);
@@ -154,12 +148,12 @@ static inline void sht15_send_bit(struct sht15_data *data, int val)
 
 /**
  * sht15_transmission_start() - specific sequence for new transmission
- *
  * @data:	device state data
+ *
  * Timings for this are not documented on the data sheet, so very
  * conservative ones used in implementation. This implements
  * figure 12 on the data sheet.
- **/
+ */
 static void sht15_transmission_start(struct sht15_data *data)
 {
 	/* ensure data is high and output */
@@ -180,23 +174,26 @@ static void sht15_transmission_start(struct sht15_data *data)
 	gpio_set_value(data->pdata->gpio_sck, 0);
 	ndelay(SHT15_TSCKL);
 }
+
 /**
  * sht15_send_byte() - send a single byte to the device
  * @data:	device state
  * @byte:	value to be sent
- **/
+ */
 static void sht15_send_byte(struct sht15_data *data, u8 byte)
 {
 	int i;
+
 	for (i = 0; i < 8; i++) {
 		sht15_send_bit(data, !!(byte & 0x80));
 		byte <<= 1;
 	}
 }
+
 /**
  * sht15_wait_for_response() - checks for ack from device
  * @data:	device state
- **/
+ */
 static int sht15_wait_for_response(struct sht15_data *data)
 {
 	gpio_direction_input(data->pdata->gpio_data);
@@ -220,27 +217,30 @@ static int sht15_wait_for_response(struct sht15_data *data)
  *
  * On entry, sck is output low, data is output pull high
  * and the interrupt disabled.
- **/
+ */
 static int sht15_send_cmd(struct sht15_data *data, u8 cmd)
 {
 	int ret = 0;
+
 	sht15_transmission_start(data);
 	sht15_send_byte(data, cmd);
 	ret = sht15_wait_for_response(data);
 	return ret;
 }
+
 /**
- * sht15_update_single_val() - get a new value from device
+ * sht15_measurement() - get a new value from device
  * @data:		device instance specific data
  * @command:		command sent to request value
  * @timeout_msecs:	timeout after which comms are assumed
  *			to have failed are reset.
- **/
-static inline int sht15_update_single_val(struct sht15_data *data,
-					  int command,
-					  int timeout_msecs)
+ */
+static int sht15_measurement(struct sht15_data *data,
+			     int command,
+			     int timeout_msecs)
 {
 	int ret;
+
 	ret = sht15_send_cmd(data, command);
 	if (ret)
 		return ret;
@@ -256,7 +256,7 @@ static inline int sht15_update_single_val(struct sht15_data *data,
 			schedule_work(&data->read_work);
 	}
 	ret = wait_event_timeout(data->wait_queue,
-				 (data->flag == SHT15_READING_NOTHING),
+				 (data->state == SHT15_READING_NOTHING),
 				 msecs_to_jiffies(timeout_msecs));
 	if (ret == 0) {/* timeout occurred */
 		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
@@ -267,27 +267,27 @@ static inline int sht15_update_single_val(struct sht15_data *data,
 }
 
 /**
- * sht15_update_vals() - get updated readings from device if too old
+ * sht15_update_measurements() - get updated measures from device if too old
  * @data:	device state
- **/
-static int sht15_update_vals(struct sht15_data *data)
+ */
+static int sht15_update_measurements(struct sht15_data *data)
 {
 	int ret = 0;
 	int timeout = HZ;
 
 	mutex_lock(&data->read_lock);
-	if (time_after(jiffies, data->last_updat + timeout)
-	    || !data->valid) {
-		data->flag = SHT15_READING_HUMID;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160);
+	if (time_after(jiffies, data->last_measurement + timeout)
+	    || !data->measurements_valid) {
+		data->state = SHT15_READING_HUMID;
+		ret = sht15_measurement(data, SHT15_MEASURE_RH, 160);
 		if (ret)
 			goto error_ret;
-		data->flag = SHT15_READING_TEMP;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400);
+		data->state = SHT15_READING_TEMP;
+		ret = sht15_measurement(data, SHT15_MEASURE_TEMP, 400);
 		if (ret)
 			goto error_ret;
-		data->valid = 1;
-		data->last_updat = jiffies;
+		data->measurements_valid = true;
+		data->last_measurement = jiffies;
 	}
 error_ret:
 	mutex_unlock(&data->read_lock);
@@ -300,7 +300,7 @@ error_ret:
  * @data:	device state
  *
  * As per section 4.3 of the data sheet.
- **/
+ */
 static inline int sht15_calc_temp(struct sht15_data *data)
 {
 	int d1 = temppoints[0].d1;
@@ -316,7 +316,7 @@ static inline int sht15_calc_temp(struct sht15_data *data)
 			break;
 		}
 
-	return data->val_temp*10 + d1;
+	return data->val_temp * 10 + d1;
 }
 
 /**
@@ -325,23 +325,35 @@ static inline int sht15_calc_temp(struct sht15_data *data)
  *
  * This is the temperature compensated version as per section 4.2 of
  * the data sheet.
- **/
+ *
+ * The sensor is assumed to be V3, which is compatible with V4.
+ * Humidity conversion coefficients are shown in table 7 of the datasheet.
+ */
 static inline int sht15_calc_humid(struct sht15_data *data)
 {
-	int RHlinear; /* milli percent */
+	int rh_linear; /* milli percent */
 	int temp = sht15_calc_temp(data);
 
 	const int c1 = -4;
 	const int c2 = 40500; /* x 10 ^ -6 */
-	const int c3 = -28; /* x 10 ^ -7 */
+	const int c3 = -28;   /* x 10 ^ -7 */
 
-	RHlinear = c1*1000
-		+ c2 * data->val_humid/1000
+	rh_linear = c1 * 1000
+		+ c2 * data->val_humid / 1000
 		+ (data->val_humid * data->val_humid * c3) / 10000;
 	return (temp - 25000) * (10000 + 80 * data->val_humid)
-		/ 1000000 + RHlinear;
+		/ 1000000 + rh_linear;
 }
 
+/**
+ * sht15_show_temp() - show temperature measurement value in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where measurement values are written to.
+ *
+ * Will be called on read access to temp1_input sysfs attribute.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
 static ssize_t sht15_show_temp(struct device *dev,
 			       struct device_attribute *attr,
 			       char *buf)
@@ -350,12 +362,21 @@ static ssize_t sht15_show_temp(struct device *dev,
 	struct sht15_data *data = dev_get_drvdata(dev);
 
 	/* Technically no need to read humidity as well */
-	ret = sht15_update_vals(data);
+	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n",
 				   sht15_calc_temp(data));
 }
 
+/**
+ * sht15_show_humidity() - show humidity measurement value in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where measurement values are written to.
+ *
+ * Will be called on read access to humidity1_input sysfs attribute.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
 static ssize_t sht15_show_humidity(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
@@ -363,11 +384,12 @@ static ssize_t sht15_show_humidity(struct device *dev,
 	int ret;
 	struct sht15_data *data = dev_get_drvdata(dev);
 
-	ret = sht15_update_vals(data);
+	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
 
-};
+}
+
 static ssize_t show_name(struct device *dev,
 			 struct device_attribute *attr,
 			 char *buf)
@@ -376,12 +398,10 @@ static ssize_t show_name(struct device *dev,
 	return sprintf(buf, "%s\n", pdev->name);
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input,
-			  S_IRUGO, sht15_show_temp,
-			  NULL, 0);
-static SENSOR_DEVICE_ATTR(humidity1_input,
-			  S_IRUGO, sht15_show_humidity,
-			  NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+			  sht15_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(humidity1_input, S_IRUGO,
+			  sht15_show_humidity, NULL, 0);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static struct attribute *sht15_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -397,16 +417,20 @@ static const struct attribute_group sht15_attr_group = {
 static irqreturn_t sht15_interrupt_fired(int irq, void *d)
 {
 	struct sht15_data *data = d;
+
 	/* First disable the interrupt */
 	disable_irq_nosync(irq);
 	atomic_inc(&data->interrupt_handled);
 	/* Then schedule a reading work struct */
-	if (data->flag != SHT15_READING_NOTHING)
+	if (data->state != SHT15_READING_NOTHING)
 		schedule_work(&data->read_work);
 	return IRQ_HANDLED;
 }
 
-/* Each byte of data is acknowledged by pulling the data line
+/**
+ * sht15_ack() - Send an ack to the device
+ *
+ * Each byte of data is acknowledged by pulling the data line
  * low for one clock pulse.
  */
 static void sht15_ack(struct sht15_data *data)
@@ -421,12 +445,13 @@ static void sht15_ack(struct sht15_data *data)
 
 	gpio_direction_input(data->pdata->gpio_data);
 }
+
 /**
  * sht15_end_transmission() - notify device of end of transmission
  * @data:	device state
  *
  * This is basically a NAK. (single clock pulse, data high)
- **/
+ */
 static void sht15_end_transmission(struct sht15_data *data)
 {
 	gpio_direction_output(data->pdata->gpio_data, 1);
@@ -444,12 +469,13 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
 			       read_work);
+
 	/* Firstly, verify the line is low */
 	if (gpio_get_value(data->pdata->gpio_data)) {
-		/* If not, then start the interrupt again - care
-		   here as could have gone low in meantime so verify
-		   it hasn't!
-		*/
+		/*
+		 * If not, then start the interrupt again - care here as could
+		 * have gone low in meantime so verify it hasn't!
+		 */
 		atomic_set(&data->interrupt_handled, 0);
 		enable_irq(gpio_to_irq(data->pdata->gpio_data));
 		/* If still not occurred or another handler has been scheduled */
@@ -457,6 +483,7 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 		    || atomic_read(&data->interrupt_handled))
 			return;
 	}
+
 	/* Read the data back from the device */
 	for (i = 0; i < 16; ++i) {
 		val <<= 1;
@@ -468,19 +495,22 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 		if (i == 7)
 			sht15_ack(data);
 	}
+
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
 
-	switch (data->flag) {
+	switch (data->state) {
 	case SHT15_READING_TEMP:
 		data->val_temp = val;
 		break;
 	case SHT15_READING_HUMID:
 		data->val_humid = val;
 		break;
+	default:
+		break;
 	}
 
-	data->flag = SHT15_READING_NOTHING;
+	data->state = SHT15_READING_NOTHING;
 	wake_up(&data->wait_queue);
 }
 
@@ -500,10 +530,10 @@ static void sht15_update_voltage(struct work_struct *work_s)
  *
  * Note that as the notification code holds the regulator lock, we have
  * to schedule an update of the supply voltage rather than getting it directly.
- **/
+ */
 static int sht15_invalidate_voltage(struct notifier_block *nb,
-				unsigned long event,
-				void *ignored)
+				    unsigned long event,
+				    void *ignored)
 {
 	struct sht15_data *data = container_of(nb, struct sht15_data, nb);
 
@@ -521,7 +551,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 
 	if (!data) {
 		ret = -ENOMEM;
-		dev_err(&pdev->dev, "kzalloc failed");
+		dev_err(&pdev->dev, "kzalloc failed\n");
 		goto error_ret;
 	}
 
@@ -533,13 +563,16 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	init_waitqueue_head(&data->wait_queue);
 
 	if (pdev->dev.platform_data == NULL) {
-		dev_err(&pdev->dev, "no platform data supplied");
+		dev_err(&pdev->dev, "no platform data supplied\n");
 		goto err_free_data;
 	}
 	data->pdata = pdev->dev.platform_data;
-	data->supply_uV = data->pdata->supply_mv*1000;
+	data->supply_uV = data->pdata->supply_mv * 1000;
 
-/* If a regulator is available, query what the supply voltage actually is!*/
+	/*
+	 * If a regulator is available,
+	 * query what the supply voltage actually is!
+	 */
 	data->reg = regulator_get(data->dev, "vcc");
 	if (!IS_ERR(data->reg)) {
 		int voltage;
@@ -549,21 +582,25 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 			data->supply_uV = voltage;
 
 		regulator_enable(data->reg);
-		/* setup a notifier block to update this if another device
-		 *  causes the voltage to change */
+		/*
+		 * Setup a notifier block to update this if another device
+		 * causes the voltage to change
+		 */
 		data->nb.notifier_call = &sht15_invalidate_voltage;
 		ret = regulator_register_notifier(data->reg, &data->nb);
 	}
-/* Try requesting the GPIOs */
+
+	/* Try requesting the GPIOs */
 	ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck");
 	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
+		dev_err(&pdev->dev, "gpio request failed\n");
 		goto err_free_data;
 	}
 	gpio_direction_output(data->pdata->gpio_sck, 0);
+
 	ret = gpio_request(data->pdata->gpio_data, "SHT15 data");
 	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
+		dev_err(&pdev->dev, "gpio request failed\n");
 		goto err_release_gpio_sck;
 	}
 	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
@@ -578,7 +615,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 			  "sht15 data",
 			  data);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to get irq for data line");
+		dev_err(&pdev->dev, "failed to get irq for data line\n");
 		goto err_release_gpio_data;
 	}
 	disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
@@ -590,6 +627,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 		ret = PTR_ERR(data->hwmon_dev);
 		goto err_release_irq;
 	}
+
 	return 0;
 
 err_release_irq:
@@ -601,7 +639,6 @@ err_release_gpio_sck:
 err_free_data:
 	kfree(data);
 error_ret:
-
 	return ret;
 }
 
@@ -609,8 +646,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 {
 	struct sht15_data *data = platform_get_drvdata(pdev);
 
-	/* Make sure any reads from the device are done and
-	 * prevent new ones from beginning */
+	/*
+	 * Make sure any reads from the device are done and
+	 * prevent new ones beginning
+	 */
 	mutex_lock(&data->read_lock);
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
@@ -625,10 +664,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 	gpio_free(data->pdata->gpio_sck);
 	mutex_unlock(&data->read_lock);
 	kfree(data);
+
 	return 0;
 }
 
-
 /*
  * sht_drivers simultaneously refers to __devinit and __devexit function
  * which causes spurious section mismatch warning. So use __refdata to
@@ -673,7 +712,6 @@ static struct platform_driver __refdata sht_drivers[] = {
 	},
 };
 
-
 static int __init sht15_init(void)
 {
 	int ret;
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index 046bce05ecab..1e302146bdc8 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -8,14 +8,18 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * For further information, see the Documentation/hwmon/sht15 file.
  */
 
 /**
  * struct sht15_platform_data - sht15 connectivity info
- * @gpio_data:	no. of gpio to which bidirectional data line is connected
- * @gpio_sck:	no. of gpio to which the data clock is connected.
- * @supply_mv:	supply voltage in mv. Overridden by regulator if available.
- **/
+ * @gpio_data:		no. of gpio to which bidirectional data line is
+ *			connected.
+ * @gpio_sck:		no. of gpio to which the data clock is connected.
+ * @supply_mv:		supply voltage in mv. Overridden by regulator if
+ *			available.
+ */
 struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
-- 
cgit v1.2.3


From cc15c7ebb424e45ba2c5ceecbe52d025219ee970 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:38 -0400
Subject: hwmon: (sht15) add support for the status register

* Add support for:
  - Heater.
  - End of battery notice.
  - Ability not to reload from OTP.
  - Low resolution (12bit temp, 8bit humidity).
* Add an utility function to read individual bytes from the device.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |  29 +++++-
 drivers/hwmon/sht15.c     | 247 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/sht15.h     |   4 +
 3 files changed, 245 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
index 2919c516fdbc..d1939b25eb16 100644
--- a/Documentation/hwmon/sht15
+++ b/Documentation/hwmon/sht15
@@ -4,6 +4,7 @@ Kernel driver sht15
 Authors:
   * Wouter Horre
   * Jonathan Cameron
+  * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
 
 Supported chips:
   * Sensirion SHT10
@@ -30,13 +31,37 @@ Description
 The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature
 sensors.
 
-The devices communicate using two GPIO lines and use the default
-resolution settings of 14 bits for temperature and 12 bits for humidity.
+The devices communicate using two GPIO lines.
+
+Supported resolutions for the measurements are 14 bits for temperature and 12
+bits for humidity, or 12 bits for temperature and 8 bits for humidity.
+
+The humidity calibration coefficients are programmed into an OTP memory on the
+chip. These coefficients are used to internally calibrate the signals from the
+sensors. Disabling the reload of those coefficients allows saving 10ms for each
+measurement and decrease power consumption, while loosing on precision.
+
+Some options may be set directly in the sht15_platform_data structure
+or via sysfs attributes.
 
 Note: The regulator supply name is set to "vcc".
 
+Platform data
+-------------
+
+* no_otp_reload:
+  flag to indicate not to reload from OTP (default to false).
+* low_resolution:
+  flag to indicate the temp/humidity resolution to use (default to false).
+
 Sysfs interface
 ---------------
 
 * temp1_input:     temperature input
 * humidity1_input: humidity input
+* heater_enable:   write 1 in this attribute to enable the on-chip heater,
+                   0 to disable it. Be careful not to enable the heater
+                   for too long.
+* temp1_fault:     if 1, this means that the voltage is low (below 2.47V) and
+                   measurement may be invalid.
+* humidity1_fault: same as temp1_fault.
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 3182b3f578e2..49da089b5de9 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -1,6 +1,9 @@
 /*
  * sht15.c - support for the SHT15 Temperature and Humidity Sensor
  *
+ * Portions Copyright (c) 2010-2011 Savoir-faire Linux Inc.
+ *          Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
  * Copyright (c) 2009 Jonathan Cameron
  *
  * Copyright (c) 2007 Wouter Horre
@@ -33,6 +36,8 @@
 /* Commands */
 #define SHT15_MEASURE_TEMP		0x03
 #define SHT15_MEASURE_RH		0x05
+#define SHT15_WRITE_STATUS		0x06
+#define SHT15_READ_STATUS		0x07
 #define SHT15_SOFT_RESET		0x1E
 
 /* Min timings */
@@ -41,6 +46,12 @@
 #define SHT15_TSU			150	/* (nsecs) data setup time */
 #define SHT15_TSRST			11	/* (msecs) soft reset time */
 
+/* Status Register Bits */
+#define SHT15_STATUS_LOW_RESOLUTION	0x01
+#define SHT15_STATUS_NO_OTP_RELOAD	0x02
+#define SHT15_STATUS_HEATER		0x04
+#define SHT15_STATUS_LOW_BATTERY	0x40
+
 /* Actions the driver may be doing */
 enum sht15_state {
 	SHT15_READING_NOTHING,
@@ -74,9 +85,12 @@ static const struct sht15_temppair temppoints[] = {
  * @wait_queue:		wait queue for getting values from device.
  * @val_temp:		last temperature value read from device.
  * @val_humid:		last humidity value read from device.
+ * @val_status:		last status register value read from device.
  * @state:		state identifying the action the driver is doing.
  * @measurements_valid:	are the current stored measures valid (start condition).
+ * @status_valid:	is the current stored status valid (start condition).
  * @last_measurement:	time of last measure.
+ * @last_status:	time of last status reading.
  * @read_lock:		mutex to ensure only one read in progress at a time.
  * @dev:		associate device structure.
  * @hwmon_dev:		device associated with hwmon subsystem.
@@ -97,9 +111,12 @@ struct sht15_data {
 	wait_queue_head_t		wait_queue;
 	uint16_t			val_temp;
 	uint16_t			val_humid;
+	u8				val_status;
 	enum sht15_state		state;
 	bool				measurements_valid;
+	bool				status_valid;
 	unsigned long			last_measurement;
+	unsigned long			last_status;
 	struct mutex			read_lock;
 	struct device			*dev;
 	struct device			*hwmon_dev;
@@ -244,10 +261,105 @@ static int sht15_soft_reset(struct sht15_data *data)
 	if (ret)
 		return ret;
 	msleep(SHT15_TSRST);
+	/* device resets default hardware status register value */
+	data->val_status = 0;
+
+	return ret;
+}
+
+/**
+ * sht15_end_transmission() - notify device of end of transmission
+ * @data:	device state.
+ *
+ * This is basically a NAK (single clock pulse, data high).
+ */
+static void sht15_end_transmission(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+}
+
+/**
+ * sht15_read_byte() - Read a byte back from the device
+ * @data:	device state.
+ */
+static u8 sht15_read_byte(struct sht15_data *data)
+{
+	int i;
+	u8 byte = 0;
+
+	for (i = 0; i < 8; ++i) {
+		byte <<= 1;
+		gpio_set_value(data->pdata->gpio_sck, 1);
+		ndelay(SHT15_TSCKH);
+		byte |= !!gpio_get_value(data->pdata->gpio_data);
+		gpio_set_value(data->pdata->gpio_sck, 0);
+		ndelay(SHT15_TSCKL);
+	}
+	return byte;
+}
+
+/**
+ * sht15_send_status() - write the status register byte
+ * @data:	sht15 specific data.
+ * @status:	the byte to set the status register with.
+ *
+ * As described in figure 14 and table 5 of the datasheet.
+ */
+static int sht15_send_status(struct sht15_data *data, u8 status)
+{
+	int ret;
+
+	ret = sht15_send_cmd(data, SHT15_WRITE_STATUS);
+	if (ret)
+		return ret;
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	sht15_send_byte(data, status);
+	ret = sht15_wait_for_response(data);
+	if (ret)
+		return ret;
 
+	data->val_status = status;
 	return 0;
 }
 
+/**
+ * sht15_update_status() - get updated status register from device if too old
+ * @data:	device instance specific data.
+ *
+ * As described in figure 15 and table 5 of the datasheet.
+ */
+static int sht15_update_status(struct sht15_data *data)
+{
+	int ret = 0;
+	u8 status;
+	int timeout = HZ;
+
+	mutex_lock(&data->read_lock);
+	if (time_after(jiffies, data->last_status + timeout)
+			|| !data->status_valid) {
+		ret = sht15_send_cmd(data, SHT15_READ_STATUS);
+		if (ret)
+			goto error_ret;
+		status = sht15_read_byte(data);
+
+		sht15_end_transmission(data);
+
+		data->val_status = status;
+		data->status_valid = true;
+		data->last_status = jiffies;
+	}
+error_ret:
+	mutex_unlock(&data->read_lock);
+
+	return ret;
+}
+
 /**
  * sht15_measurement() - get a new value from device
  * @data:		device instance specific data
@@ -324,6 +436,7 @@ error_ret:
 static inline int sht15_calc_temp(struct sht15_data *data)
 {
 	int d1 = temppoints[0].d1;
+	int d2 = (data->val_status & SHT15_STATUS_LOW_RESOLUTION) ? 40 : 10;
 	int i;
 
 	for (i = ARRAY_SIZE(temppoints) - 1; i > 0; i--)
@@ -336,7 +449,7 @@ static inline int sht15_calc_temp(struct sht15_data *data)
 			break;
 		}
 
-	return data->val_temp * 10 + d1;
+	return data->val_temp * d2 + d1;
 }
 
 /**
@@ -353,18 +466,85 @@ static inline int sht15_calc_humid(struct sht15_data *data)
 {
 	int rh_linear; /* milli percent */
 	int temp = sht15_calc_temp(data);
-
+	int c2, c3;
+	int t2;
 	const int c1 = -4;
-	const int c2 = 40500; /* x 10 ^ -6 */
-	const int c3 = -28;   /* x 10 ^ -7 */
+
+	if (data->val_status & SHT15_STATUS_LOW_RESOLUTION) {
+		c2 = 648000; /* x 10 ^ -6 */
+		c3 = -7200;  /* x 10 ^ -7 */
+		t2 = 1280;
+	} else {
+		c2 = 40500;  /* x 10 ^ -6 */
+		c3 = -28;    /* x 10 ^ -7 */
+		t2 = 80;
+	}
 
 	rh_linear = c1 * 1000
 		+ c2 * data->val_humid / 1000
 		+ (data->val_humid * data->val_humid * c3) / 10000;
-	return (temp - 25000) * (10000 + 80 * data->val_humid)
+	return (temp - 25000) * (10000 + t2 * data->val_humid)
 		/ 1000000 + rh_linear;
 }
 
+/**
+ * sht15_show_status() - show status information in sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer where information is written to.
+ *
+ * Will be called on read access to temp1_fault, humidity1_fault
+ * and heater_enable sysfs attributes.
+ * Returns number of bytes written into buffer, negative errno on error.
+ */
+static ssize_t sht15_show_status(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+	u8 bit = to_sensor_dev_attr(attr)->index;
+
+	ret = sht15_update_status(data);
+
+	return ret ? ret : sprintf(buf, "%d\n", !!(data->val_status & bit));
+}
+
+/**
+ * sht15_store_heater() - change heater state via sysfs
+ * @dev:	device.
+ * @attr:	device attribute.
+ * @buf:	sysfs buffer to read the new heater state from.
+ * @count:	length of the data.
+ *
+ * Will be called on read access to heater_enable sysfs attribute.
+ * Returns number of bytes actually decoded, negative errno on error.
+ */
+static ssize_t sht15_store_heater(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+	long value;
+	u8 status;
+
+	if (strict_strtol(buf, 10, &value))
+		return -EINVAL;
+
+	mutex_lock(&data->read_lock);
+	status = data->val_status & 0x07;
+	if (!!value)
+		status |= SHT15_STATUS_HEATER;
+	else
+		status &= ~SHT15_STATUS_HEATER;
+
+	ret = sht15_send_status(data, status);
+	mutex_unlock(&data->read_lock);
+
+	return ret ? ret : count;
+}
+
 /**
  * sht15_show_temp() - show temperature measurement value in sysfs
  * @dev:	device.
@@ -407,7 +587,6 @@ static ssize_t sht15_show_humidity(struct device *dev,
 	ret = sht15_update_measurements(data);
 
 	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
-
 }
 
 static ssize_t show_name(struct device *dev,
@@ -422,10 +601,19 @@ static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
 			  sht15_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(humidity1_input, S_IRUGO,
 			  sht15_show_humidity, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_fault, S_IRUGO, sht15_show_status, NULL,
+			  SHT15_STATUS_LOW_BATTERY);
+static SENSOR_DEVICE_ATTR(humidity1_fault, S_IRUGO, sht15_show_status, NULL,
+			  SHT15_STATUS_LOW_BATTERY);
+static SENSOR_DEVICE_ATTR(heater_enable, S_IRUGO | S_IWUSR, sht15_show_status,
+			  sht15_store_heater, SHT15_STATUS_HEATER);
 static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static struct attribute *sht15_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_humidity1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_fault.dev_attr.attr,
+	&sensor_dev_attr_humidity1_fault.dev_attr.attr,
+	&sensor_dev_attr_heater_enable.dev_attr.attr,
 	&dev_attr_name.attr,
 	NULL,
 };
@@ -466,25 +654,8 @@ static void sht15_ack(struct sht15_data *data)
 	gpio_direction_input(data->pdata->gpio_data);
 }
 
-/**
- * sht15_end_transmission() - notify device of end of transmission
- * @data:	device state
- *
- * This is basically a NAK. (single clock pulse, data high)
- */
-static void sht15_end_transmission(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-}
-
 static void sht15_bh_read_data(struct work_struct *work_s)
 {
-	int i;
 	uint16_t val = 0;
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
@@ -505,16 +676,10 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	}
 
 	/* Read the data back from the device */
-	for (i = 0; i < 16; ++i) {
-		val <<= 1;
-		gpio_set_value(data->pdata->gpio_sck, 1);
-		ndelay(SHT15_TSCKH);
-		val |= !!gpio_get_value(data->pdata->gpio_data);
-		gpio_set_value(data->pdata->gpio_sck, 0);
-		ndelay(SHT15_TSCKL);
-		if (i == 7)
-			sht15_ack(data);
-	}
+	val = sht15_read_byte(data);
+	val <<= 8;
+	sht15_ack(data);
+	val |= sht15_read_byte(data);
 
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
@@ -568,6 +733,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
+	u8 status = 0;
 
 	if (!data) {
 		ret = -ENOMEM;
@@ -588,6 +754,10 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	}
 	data->pdata = pdev->dev.platform_data;
 	data->supply_uV = data->pdata->supply_mv * 1000;
+	if (data->pdata->no_otp_reload)
+		status |= SHT15_STATUS_NO_OTP_RELOAD;
+	if (data->pdata->low_resolution)
+		status |= SHT15_STATUS_LOW_RESOLUTION;
 
 	/*
 	 * If a regulator is available,
@@ -646,6 +816,13 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_release_irq;
 
+	/* write status with platform data options */
+	if (status) {
+		ret = sht15_send_status(data, status);
+		if (ret)
+			goto err_release_irq;
+	}
+
 	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
 	if (ret) {
 		dev_err(&pdev->dev, "sysfs create failed\n");
@@ -689,6 +866,10 @@ static int __devexit sht15_remove(struct platform_device *pdev)
 	 * prevent new ones beginning
 	 */
 	mutex_lock(&data->read_lock);
+	if (sht15_soft_reset(data)) {
+		mutex_unlock(&data->read_lock);
+		return -EFAULT;
+	}
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
 	if (!IS_ERR(data->reg)) {
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index 1e302146bdc8..d836ca617323 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -19,10 +19,14 @@
  * @gpio_sck:		no. of gpio to which the data clock is connected.
  * @supply_mv:		supply voltage in mv. Overridden by regulator if
  *			available.
+ * @no_otp_reload:	flag to indicate no reload from OTP.
+ * @low_resolution:	flag to indicate the temp/humidity resolution to use.
  */
 struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
 	int supply_mv;
+	bool no_otp_reload;
+	bool low_resolution;
 };
 
-- 
cgit v1.2.3


From 82c7465b4dd013d19858bfeac084849ae17682c0 Mon Sep 17 00:00:00 2001
From: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Date: Tue, 12 Apr 2011 15:34:39 -0400
Subject: hwmon: (sht15) add support for CRC validation

The sht15 sensor allows validating exchanges to and from the device
using a crc8 function. An utility function to reverse a byte has also
been added.

Signed-off-by: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/sht15 |   9 ++-
 drivers/hwmon/sht15.c     | 192 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/sht15.h     |   2 +
 3 files changed, 183 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
index d1939b25eb16..02850bdfac18 100644
--- a/Documentation/hwmon/sht15
+++ b/Documentation/hwmon/sht15
@@ -5,6 +5,7 @@ Authors:
   * Wouter Horre
   * Jonathan Cameron
   * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+  * Jerome Oufella <jerome.oufella@savoirfairelinux.com>
 
 Supported chips:
   * Sensirion SHT10
@@ -44,11 +45,17 @@ measurement and decrease power consumption, while loosing on precision.
 Some options may be set directly in the sht15_platform_data structure
 or via sysfs attributes.
 
-Note: The regulator supply name is set to "vcc".
+Notes:
+  * The regulator supply name is set to "vcc".
+  * If a CRC validation fails, a soft reset command is sent, which resets
+    status register to its hardware default value, but the driver will try to
+    restore the previous device configuration.
 
 Platform data
 -------------
 
+* checksum:
+  set it to true to enable CRC validation of the readings (default to false).
 * no_otp_reload:
   flag to indicate not to reload from OTP (default to false).
 * low_resolution:
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 49da089b5de9..cf4330b352ef 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -2,6 +2,7 @@
  * sht15.c - support for the SHT15 Temperature and Humidity Sensor
  *
  * Portions Copyright (c) 2010-2011 Savoir-faire Linux Inc.
+ *          Jerome Oufella <jerome.oufella@savoirfairelinux.com>
  *          Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  *
  * Copyright (c) 2009 Jonathan Cameron
@@ -78,6 +79,42 @@ static const struct sht15_temppair temppoints[] = {
 	{ 5000000, -40100 },
 };
 
+/* Table from CRC datasheet, section 2.4 */
+static const u8 sht15_crc8_table[] = {
+	0,	49,	98,	83,	196,	245,	166,	151,
+	185,	136,	219,	234,	125,	76,	31,	46,
+	67,	114,	33,	16,	135,	182,	229,	212,
+	250,	203,	152,	169,	62,	15,	92,	109,
+	134,	183,	228,	213,	66,	115,	32,	17,
+	63,	14,	93,	108,	251,	202,	153,	168,
+	197,	244,	167,	150,	1,	48,	99,	82,
+	124,	77,	30,	47,	184,	137,	218,	235,
+	61,	12,	95,	110,	249,	200,	155,	170,
+	132,	181,	230,	215,	64,	113,	34,	19,
+	126,	79,	28,	45,	186,	139,	216,	233,
+	199,	246,	165,	148,	3,	50,	97,	80,
+	187,	138,	217,	232,	127,	78,	29,	44,
+	2,	51,	96,	81,	198,	247,	164,	149,
+	248,	201,	154,	171,	60,	13,	94,	111,
+	65,	112,	35,	18,	133,	180,	231,	214,
+	122,	75,	24,	41,	190,	143,	220,	237,
+	195,	242,	161,	144,	7,	54,	101,	84,
+	57,	8,	91,	106,	253,	204,	159,	174,
+	128,	177,	226,	211,	68,	117,	38,	23,
+	252,	205,	158,	175,	56,	9,	90,	107,
+	69,	116,	39,	22,	129,	176,	227,	210,
+	191,	142,	221,	236,	123,	74,	25,	40,
+	6,	55,	100,	85,	194,	243,	160,	145,
+	71,	118,	37,	20,	131,	178,	225,	208,
+	254,	207,	156,	173,	58,	11,	88,	105,
+	4,	53,	102,	87,	192,	241,	162,	147,
+	189,	140,	223,	238,	121,	72,	27,	42,
+	193,	240,	163,	146,	5,	52,	103,	86,
+	120,	73,	26,	43,	188,	141,	222,	239,
+	130,	179,	224,	209,	70,	119,	36,	21,
+	59,	10,	89,	104,	255,	206,	157,	172
+};
+
 /**
  * struct sht15_data - device instance specific data
  * @pdata:		platform data (gpio's etc).
@@ -86,6 +123,8 @@ static const struct sht15_temppair temppoints[] = {
  * @val_temp:		last temperature value read from device.
  * @val_humid:		last humidity value read from device.
  * @val_status:		last status register value read from device.
+ * @checksum_ok:	last value read from the device passed CRC validation.
+ * @checksumming:	flag used to enable the data validation with CRC.
  * @state:		state identifying the action the driver is doing.
  * @measurements_valid:	are the current stored measures valid (start condition).
  * @status_valid:	is the current stored status valid (start condition).
@@ -112,6 +151,8 @@ struct sht15_data {
 	uint16_t			val_temp;
 	uint16_t			val_humid;
 	u8				val_status;
+	bool				checksum_ok;
+	bool				checksumming;
 	enum sht15_state		state;
 	bool				measurements_valid;
 	bool				status_valid;
@@ -128,6 +169,40 @@ struct sht15_data {
 	atomic_t			interrupt_handled;
 };
 
+/**
+ * sht15_reverse() - reverse a byte
+ * @byte:    byte to reverse.
+ */
+static u8 sht15_reverse(u8 byte)
+{
+	u8 i, c;
+
+	for (c = 0, i = 0; i < 8; i++)
+		c |= (!!(byte & (1 << i))) << (7 - i);
+	return c;
+}
+
+/**
+ * sht15_crc8() - compute crc8
+ * @data:	sht15 specific data.
+ * @value:	sht15 retrieved data.
+ *
+ * This implements section 2 of the CRC datasheet.
+ */
+static u8 sht15_crc8(struct sht15_data *data,
+		const u8 *value,
+		int len)
+{
+	u8 crc = sht15_reverse(data->val_status & 0x0F);
+
+	while (len--) {
+		crc = sht15_crc8_table[*value ^ crc];
+		value++;
+	}
+
+	return crc;
+}
+
 /**
  * sht15_connection_reset() - reset the comms interface
  * @data:	sht15 specific data
@@ -267,6 +342,26 @@ static int sht15_soft_reset(struct sht15_data *data)
 	return ret;
 }
 
+/**
+ * sht15_ack() - send a ack
+ * @data:	sht15 specific data.
+ *
+ * Each byte of data is acknowledged by pulling the data line
+ * low for one clock pulse.
+ */
+static void sht15_ack(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_data, 1);
+
+	gpio_direction_input(data->pdata->gpio_data);
+}
+
 /**
  * sht15_end_transmission() - notify device of end of transmission
  * @data:	device state.
@@ -338,6 +433,9 @@ static int sht15_update_status(struct sht15_data *data)
 {
 	int ret = 0;
 	u8 status;
+	u8 previous_config;
+	u8 dev_checksum = 0;
+	u8 checksum_vals[2];
 	int timeout = HZ;
 
 	mutex_lock(&data->read_lock);
@@ -348,8 +446,40 @@ static int sht15_update_status(struct sht15_data *data)
 			goto error_ret;
 		status = sht15_read_byte(data);
 
+		if (data->checksumming) {
+			sht15_ack(data);
+			dev_checksum = sht15_reverse(sht15_read_byte(data));
+			checksum_vals[0] = SHT15_READ_STATUS;
+			checksum_vals[1] = status;
+			data->checksum_ok = (sht15_crc8(data, checksum_vals, 2)
+					== dev_checksum);
+		}
+
 		sht15_end_transmission(data);
 
+		/*
+		 * Perform checksum validation on the received data.
+		 * Specification mentions that in case a checksum verification
+		 * fails, a soft reset command must be sent to the device.
+		 */
+		if (data->checksumming && !data->checksum_ok) {
+			previous_config = data->val_status & 0x07;
+			ret = sht15_soft_reset(data);
+			if (ret)
+				goto error_ret;
+			if (previous_config) {
+				ret = sht15_send_status(data, previous_config);
+				if (ret) {
+					dev_err(data->dev,
+						"CRC validation failed, unable "
+						"to restore device settings\n");
+					goto error_ret;
+				}
+			}
+			ret = -EAGAIN;
+			goto error_ret;
+		}
+
 		data->val_status = status;
 		data->status_valid = true;
 		data->last_status = jiffies;
@@ -372,6 +502,7 @@ static int sht15_measurement(struct sht15_data *data,
 			     int timeout_msecs)
 {
 	int ret;
+	u8 previous_config;
 
 	ret = sht15_send_cmd(data, command);
 	if (ret)
@@ -395,6 +526,29 @@ static int sht15_measurement(struct sht15_data *data,
 		sht15_connection_reset(data);
 		return -ETIME;
 	}
+
+	/*
+	 *  Perform checksum validation on the received data.
+	 *  Specification mentions that in case a checksum verification fails,
+	 *  a soft reset command must be sent to the device.
+	 */
+	if (data->checksumming && !data->checksum_ok) {
+		previous_config = data->val_status & 0x07;
+		ret = sht15_soft_reset(data);
+		if (ret)
+			return ret;
+		if (previous_config) {
+			ret = sht15_send_status(data, previous_config);
+			if (ret) {
+				dev_err(data->dev,
+					"CRC validation failed, unable "
+					"to restore device settings\n");
+				return ret;
+			}
+		}
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
@@ -635,28 +789,11 @@ static irqreturn_t sht15_interrupt_fired(int irq, void *d)
 	return IRQ_HANDLED;
 }
 
-/**
- * sht15_ack() - Send an ack to the device
- *
- * Each byte of data is acknowledged by pulling the data line
- * low for one clock pulse.
- */
-static void sht15_ack(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_data, 1);
-
-	gpio_direction_input(data->pdata->gpio_data);
-}
-
 static void sht15_bh_read_data(struct work_struct *work_s)
 {
 	uint16_t val = 0;
+	u8 dev_checksum = 0;
+	u8 checksum_vals[3];
 	struct sht15_data *data
 		= container_of(work_s, struct sht15_data,
 			       read_work);
@@ -681,6 +818,21 @@ static void sht15_bh_read_data(struct work_struct *work_s)
 	sht15_ack(data);
 	val |= sht15_read_byte(data);
 
+	if (data->checksumming) {
+		/*
+		 * Ask the device for a checksum and read it back.
+		 * Note: the device sends the checksum byte reversed.
+		 */
+		sht15_ack(data);
+		dev_checksum = sht15_reverse(sht15_read_byte(data));
+		checksum_vals[0] = (data->state == SHT15_READING_TEMP) ?
+			SHT15_MEASURE_TEMP : SHT15_MEASURE_RH;
+		checksum_vals[1] = (u8) (val >> 8);
+		checksum_vals[2] = (u8) val;
+		data->checksum_ok
+			= (sht15_crc8(data, checksum_vals, 3) == dev_checksum);
+	}
+
 	/* Tell the device we are done */
 	sht15_end_transmission(data);
 
@@ -754,6 +906,8 @@ static int __devinit sht15_probe(struct platform_device *pdev)
 	}
 	data->pdata = pdev->dev.platform_data;
 	data->supply_uV = data->pdata->supply_mv * 1000;
+	if (data->pdata->checksum)
+		data->checksumming = true;
 	if (data->pdata->no_otp_reload)
 		status |= SHT15_STATUS_NO_OTP_RELOAD;
 	if (data->pdata->low_resolution)
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
index d836ca617323..f85c7c523da0 100644
--- a/include/linux/sht15.h
+++ b/include/linux/sht15.h
@@ -19,6 +19,7 @@
  * @gpio_sck:		no. of gpio to which the data clock is connected.
  * @supply_mv:		supply voltage in mv. Overridden by regulator if
  *			available.
+ * @checksum:		flag to indicate the checksum should be validated.
  * @no_otp_reload:	flag to indicate no reload from OTP.
  * @low_resolution:	flag to indicate the temp/humidity resolution to use.
  */
@@ -26,6 +27,7 @@ struct sht15_platform_data {
 	int gpio_data;
 	int gpio_sck;
 	int supply_mv;
+	bool checksum;
 	bool no_otp_reload;
 	bool low_resolution;
 };
-- 
cgit v1.2.3


From 7176ba23f8b589b1df3229574ff46fb904ce9ec5 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 16 May 2011 14:41:48 -0700
Subject: net: rfkill: add generic gpio rfkill driver

This adds a new generic gpio rfkill driver to support rfkill switches
which are controlled by gpios. The driver also supports passing in
data about the clock for the radio, so that when rfkill is blocking,
it can disable the clock.

This driver assumes platform data is passed from the board files to
configure it for specific devices.

Original-patch-by: Anantha Idapalapati <aidapalapati@nvidia.com>
Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill-gpio.h |  43 +++++++++
 net/rfkill/Kconfig          |   9 ++
 net/rfkill/Makefile         |   1 +
 net/rfkill/rfkill-gpio.c    | 227 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 280 insertions(+)
 create mode 100644 include/linux/rfkill-gpio.h
 create mode 100644 net/rfkill/rfkill-gpio.c

(limited to 'include/linux')

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
new file mode 100644
index 000000000000..a175d0598033
--- /dev/null
+++ b/include/linux/rfkill-gpio.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+#ifndef __RFKILL_GPIO_H
+#define __RFKILL_GPIO_H
+
+#include <linux/types.h>
+#include <linux/rfkill.h>
+
+/**
+ * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
+ * for unused gpio's, the expected value is -1.
+ * @name:		name for the gpio rf kill instance
+ * @reset_gpio:		GPIO which is used for reseting rfkill switch
+ * @shutdown_gpio:	GPIO which is used for shutdown of rfkill switch
+ * @power_clk_name:	[optional] name of clk to turn off while blocked
+ */
+
+struct rfkill_gpio_platform_data {
+	char			*name;
+	int			reset_gpio;
+	int			shutdown_gpio;
+	const char		*power_clk_name;
+	enum rfkill_type	type;
+};
+
+#endif /* __RFKILL_GPIO_H */
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 48464ca13b24..78efe895b663 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -33,3 +33,12 @@ config RFKILL_REGULATOR
 
           To compile this driver as a module, choose M here: the module will
           be called rfkill-regulator.
+
+config RFKILL_GPIO
+	tristate "GPIO RFKILL driver"
+	depends on RFKILL && GPIOLIB && HAVE_CLK
+	default n
+	help
+	  If you say yes here you get support of a generic gpio RFKILL
+	  driver. The platform should fill in the appropriate fields in the
+	  rfkill_gpio_platform_data structure and pass that to the driver.
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index d9a5a58ffd8c..311768783f4a 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -6,3 +6,4 @@ rfkill-y			+= core.o
 rfkill-$(CONFIG_RFKILL_INPUT)	+= input.o
 obj-$(CONFIG_RFKILL)		+= rfkill.o
 obj-$(CONFIG_RFKILL_REGULATOR)	+= rfkill-regulator.o
+obj-$(CONFIG_RFKILL_GPIO)	+= rfkill-gpio.o
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
new file mode 100644
index 000000000000..256c5ddd2d72
--- /dev/null
+++ b/net/rfkill/rfkill-gpio.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2011, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rfkill.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+
+#include <linux/rfkill-gpio.h>
+
+enum rfkill_gpio_clk_state {
+	UNSPECIFIED = 0,
+	PWR_ENABLED,
+	PWR_DISABLED
+};
+
+#define PWR_CLK_SET(_RF, _EN) \
+	((_RF)->pwr_clk_enabled = (!(_EN) ? PWR_ENABLED : PWR_DISABLED))
+#define PWR_CLK_ENABLED(_RF) ((_RF)->pwr_clk_enabled == PWR_ENABLED)
+#define PWR_CLK_DISABLED(_RF) ((_RF)->pwr_clk_enabled != PWR_ENABLED)
+
+struct rfkill_gpio_data {
+	struct rfkill_gpio_platform_data	*pdata;
+	struct rfkill				*rfkill_dev;
+	char					*reset_name;
+	char					*shutdown_name;
+	enum rfkill_gpio_clk_state		pwr_clk_enabled;
+	struct clk				*pwr_clk;
+};
+
+static int rfkill_gpio_set_power(void *data, bool blocked)
+{
+	struct rfkill_gpio_data *rfkill = data;
+
+	if (blocked) {
+		if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+			gpio_direction_output(rfkill->pdata->shutdown_gpio, 0);
+		if (gpio_is_valid(rfkill->pdata->reset_gpio))
+			gpio_direction_output(rfkill->pdata->reset_gpio, 0);
+		if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
+			clk_disable(rfkill->pwr_clk);
+	} else {
+		if (rfkill->pwr_clk && PWR_CLK_DISABLED(rfkill))
+			clk_enable(rfkill->pwr_clk);
+		if (gpio_is_valid(rfkill->pdata->reset_gpio))
+			gpio_direction_output(rfkill->pdata->reset_gpio, 1);
+		if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+			gpio_direction_output(rfkill->pdata->shutdown_gpio, 1);
+	}
+
+	if (rfkill->pwr_clk)
+		PWR_CLK_SET(rfkill, blocked);
+
+	return 0;
+}
+
+static const struct rfkill_ops rfkill_gpio_ops = {
+	.set_block = rfkill_gpio_set_power,
+};
+
+static int rfkill_gpio_probe(struct platform_device *pdev)
+{
+	struct rfkill_gpio_data *rfkill;
+	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
+	int ret = 0;
+	int len = 0;
+
+	if (!pdata) {
+		pr_warn("%s: No platform data specified\n", __func__);
+		return -EINVAL;
+	}
+
+	/* make sure at-least one of the GPIO is defined and that
+	 * a name is specified for this instance */
+	if (!pdata->name || (!gpio_is_valid(pdata->reset_gpio) &&
+		!gpio_is_valid(pdata->shutdown_gpio))) {
+		pr_warn("%s: invalid platform data\n", __func__);
+		return -EINVAL;
+	}
+
+	rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
+	if (!rfkill)
+		return -ENOMEM;
+
+	rfkill->pdata = pdata;
+
+	len = strlen(pdata->name);
+	rfkill->reset_name = kzalloc(len + 7, GFP_KERNEL);
+	if (!rfkill->reset_name) {
+		ret = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	rfkill->shutdown_name = kzalloc(len + 10, GFP_KERNEL);
+	if (!rfkill->shutdown_name) {
+		ret = -ENOMEM;
+		goto fail_reset_name;
+	}
+
+	snprintf(rfkill->reset_name, len + 6 , "%s_reset", pdata->name);
+	snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", pdata->name);
+
+	if (pdata->power_clk_name) {
+		rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);
+		if (IS_ERR(rfkill->pwr_clk)) {
+			pr_warn("%s: can't find pwr_clk.\n", __func__);
+			goto fail_shutdown_name;
+		}
+	}
+
+	if (gpio_is_valid(pdata->reset_gpio)) {
+		ret = gpio_request(pdata->reset_gpio, rfkill->reset_name);
+		if (ret) {
+			pr_warn("%s: failed to get reset gpio.\n", __func__);
+			goto fail_clock;
+		}
+	}
+
+	if (gpio_is_valid(pdata->shutdown_gpio)) {
+		ret = gpio_request(pdata->shutdown_gpio, rfkill->shutdown_name);
+		if (ret) {
+			pr_warn("%s: failed to get shutdown gpio.\n", __func__);
+			goto fail_reset;
+		}
+	}
+
+	rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type,
+				&rfkill_gpio_ops, rfkill);
+	if (!rfkill->rfkill_dev)
+		goto fail_shutdown;
+
+	ret = rfkill_register(rfkill->rfkill_dev);
+	if (ret < 0)
+		goto fail_rfkill;
+
+	platform_set_drvdata(pdev, rfkill);
+
+	dev_info(&pdev->dev, "%s device registered.\n", pdata->name);
+
+	return 0;
+
+fail_rfkill:
+	rfkill_destroy(rfkill->rfkill_dev);
+fail_shutdown:
+	if (gpio_is_valid(pdata->shutdown_gpio))
+		gpio_free(pdata->shutdown_gpio);
+fail_reset:
+	if (gpio_is_valid(pdata->reset_gpio))
+		gpio_free(pdata->reset_gpio);
+fail_clock:
+	if (rfkill->pwr_clk)
+		clk_put(rfkill->pwr_clk);
+fail_shutdown_name:
+	kfree(rfkill->shutdown_name);
+fail_reset_name:
+	kfree(rfkill->reset_name);
+fail_alloc:
+	kfree(rfkill);
+
+	return ret;
+}
+
+static int rfkill_gpio_remove(struct platform_device *pdev)
+{
+	struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
+
+	rfkill_unregister(rfkill->rfkill_dev);
+	rfkill_destroy(rfkill->rfkill_dev);
+	if (gpio_is_valid(rfkill->pdata->shutdown_gpio))
+		gpio_free(rfkill->pdata->shutdown_gpio);
+	if (gpio_is_valid(rfkill->pdata->reset_gpio))
+		gpio_free(rfkill->pdata->reset_gpio);
+	if (rfkill->pwr_clk && PWR_CLK_ENABLED(rfkill))
+		clk_disable(rfkill->pwr_clk);
+	if (rfkill->pwr_clk)
+		clk_put(rfkill->pwr_clk);
+	kfree(rfkill->shutdown_name);
+	kfree(rfkill->reset_name);
+	kfree(rfkill);
+
+	return 0;
+}
+
+static struct platform_driver rfkill_gpio_driver = {
+	.probe = rfkill_gpio_probe,
+	.remove = __devexit_p(rfkill_gpio_remove),
+	.driver = {
+		   .name = "rfkill_gpio",
+		   .owner = THIS_MODULE,
+	},
+};
+
+static int __init rfkill_gpio_init(void)
+{
+	return platform_driver_register(&rfkill_gpio_driver);
+}
+
+static void __exit rfkill_gpio_exit(void)
+{
+	platform_driver_unregister(&rfkill_gpio_driver);
+}
+
+module_init(rfkill_gpio_init);
+module_exit(rfkill_gpio_exit);
+
+MODULE_DESCRIPTION("gpio rfkill");
+MODULE_AUTHOR("NVIDIA");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 75d65a425c0163d3ec476ddc12b51087217a070c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 19 May 2011 13:50:07 -0700
Subject: hlist: remove software prefetching in hlist iterators

They not only increase the code footprint, they actually make things
slower rather than faster.  On internationally acclaimed benchmarks
("make -j16" on an already fully built kernel source tree) the hlist
prefetching slows down the build by up to 1%.

(Almost all of it comes from hlist_for_each_entry_rcu() as used by
avc_has_perm_noaudit(), which is very hot due to all the pathname
lookups to see if there is anything to do).

The cause seems to be two-fold:

 - on at least some Intel cores, prefetch(NULL) ends up with some
   microarchitectural stall due to the TLB miss that it incurs.  The
   hlist case triggers this very commonly, since the NULL pointer is the
   last entry in the list.

 - the prefetch appears to cause more D$ activity, probably because it
   prefetches hash list entries that are never actually used (because we
   ended the search early due to a hit).

Regardless, the numbers clearly say that the implicit prefetching is
simply a bad idea.  If some _particular_ user of the hlist iterators
wants to prefetch the next list entry, they can do so themselves
explicitly, rather than depend on all list iterators doing so
implicitly.

Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h    |  9 ++++-----
 include/linux/rculist.h | 10 +++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 3a54266a1e85..9ac11148e037 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -664,8 +664,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
+	for (pos = (head)->first; pos ; pos = pos->next)
 
 #define hlist_for_each_safe(pos, n, head) \
 	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
@@ -680,7 +679,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry(tpos, pos, head, member)			 \
 	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -692,7 +691,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry_continue(tpos, pos, member)		 \
 	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -703,7 +702,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  * @member:	the name of the hlist_node within the struct.
  */
 #define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+	for (; pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2dea94fc4402..900a97a44769 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 
 #define __hlist_for_each_rcu(pos, head)				\
 	for (pos = rcu_dereference(hlist_first_rcu(head));	\
-	     pos && ({ prefetch(pos->next); 1; });		\
+	     pos;						\
 	     pos = rcu_dereference(hlist_next_rcu(pos)))
 
 /**
@@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		\
 	for (pos = rcu_dereference_raw(hlist_first_rcu(head));		\
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_next_rcu(pos)))
 
@@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
 	for (pos = rcu_dereference_bh((head)->first);			 \
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_bh(pos->next))
 
@@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
 	for (pos = rcu_dereference((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference(pos->next))
 
@@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
 	for (pos = rcu_dereference_bh((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference_bh(pos->next))
 
-- 
cgit v1.2.3


From e66eed651fd18a961f11cda62f3b5286c8cc4f9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 19 May 2011 14:15:29 -0700
Subject: list: remove prefetching from regular list iterators

This is removes the use of software prefetching from the regular list
iterators.  We don't want it.  If you do want to prefetch in some
iterator of yours, go right ahead.  Just don't expect the iterator to do
it, since normally the downsides are bigger than the upsides.

It also replaces <linux/prefetch.h> with <linux/const.h>, because the
use of LIST_POISON ends up needing it.  <linux/poison.h> is sadly not
self-contained, and including prefetch.h just happened to hide that.

Suggested by David Miller (networking has a lot of regular lists that
are often empty or a single entry, and prefetching is not going to do
anything but add useless instructions).

Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h    | 26 +++++++++++---------------
 include/linux/rculist.h |  6 +++---
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 9ac11148e037..cc6d2aa6b415 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/poison.h>
-#include <linux/prefetch.h>
+#include <linux/const.h>
 
 /*
  * Simple doubly linked list implementation.
@@ -367,18 +367,15 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
+	for (pos = (head)->next; pos != (head); pos = pos->next)
 
 /**
  * __list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
  * @head:	the head for your list.
  *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
+ * This variant doesn't differ from list_for_each() any more.
+ * We don't do prefetching in either case.
  */
 #define __list_for_each(pos, head) \
 	for (pos = (head)->next; pos != (head); pos = pos->next)
@@ -389,8 +386,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
+	for (pos = (head)->prev; pos != (head); pos = pos->prev)
 
 /**
  * list_for_each_safe - iterate over a list safe against removal of list entry
@@ -410,7 +406,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_prev_safe(pos, n, head) \
 	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
+	     pos != (head); \
 	     pos = n, n = pos->prev)
 
 /**
@@ -421,7 +417,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -432,7 +428,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -457,7 +453,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue(pos, head, member) 		\
 	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -471,7 +467,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue_reverse(pos, head, member)		\
 	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -483,7 +479,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Iterate over list of given type, continuing from current position.
  */
 #define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	for (; &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 900a97a44769..e3beb315517a 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -253,7 +253,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_rcu(pos, head, member) \
 	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		prefetch(pos->member.next), &pos->member != (head); \
+		&pos->member != (head); \
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
@@ -270,7 +270,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_continue_rcu(pos, head) \
 	for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
-		prefetch((pos)->next), (pos) != (head); \
+		(pos) != (head); \
 		(pos) = rcu_dereference_raw(list_next_rcu(pos)))
 
 /**
@@ -284,7 +284,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_continue_rcu(pos, head, member) 		\
 	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
-- 
cgit v1.2.3


From d410fa4ef99112386de5f218dd7df7b4fca910b4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 19 May 2011 15:59:38 -0700
Subject: Create Documentation/security/, move LSM-, credentials-, and
 keys-related files from Documentation/   to Documentation/security/, add
 Documentation/security/00-INDEX, and update all occurrences of
 Documentation/<moved_file>   to Documentation/security/<moved_file>.

---
 Documentation/00-INDEX                            |    6 +-
 Documentation/SELinux.txt                         |   27 -
 Documentation/Smack.txt                           |  541 ---------
 Documentation/apparmor.txt                        |   39 -
 Documentation/credentials.txt                     |  581 ----------
 Documentation/filesystems/nfs/idmapper.txt        |    4 +-
 Documentation/keys-request-key.txt                |  202 ----
 Documentation/keys-trusted-encrypted.txt          |  145 ---
 Documentation/keys.txt                            | 1290 ---------------------
 Documentation/networking/dns_resolver.txt         |    4 +-
 Documentation/security/00-INDEX                   |   18 +
 Documentation/security/SELinux.txt                |   27 +
 Documentation/security/Smack.txt                  |  541 +++++++++
 Documentation/security/apparmor.txt               |   39 +
 Documentation/security/credentials.txt            |  581 ++++++++++
 Documentation/security/keys-request-key.txt       |  202 ++++
 Documentation/security/keys-trusted-encrypted.txt |  145 +++
 Documentation/security/keys.txt                   | 1290 +++++++++++++++++++++
 Documentation/security/tomoyo.txt                 |   55 +
 Documentation/tomoyo.txt                          |   55 -
 MAINTAINERS                                       |    6 +-
 include/linux/cred.h                              |    2 +-
 include/linux/key.h                               |    2 +-
 kernel/cred.c                                     |    2 +-
 scripts/selinux/README                            |    2 +-
 security/apparmor/match.c                         |    2 +-
 security/apparmor/policy_unpack.c                 |    4 +-
 security/keys/encrypted.c                         |    2 +-
 security/keys/request_key.c                       |    2 +-
 security/keys/request_key_auth.c                  |    2 +-
 security/keys/trusted.c                           |    2 +-
 31 files changed, 2918 insertions(+), 2902 deletions(-)
 delete mode 100644 Documentation/SELinux.txt
 delete mode 100644 Documentation/Smack.txt
 delete mode 100644 Documentation/apparmor.txt
 delete mode 100644 Documentation/credentials.txt
 delete mode 100644 Documentation/keys-request-key.txt
 delete mode 100644 Documentation/keys-trusted-encrypted.txt
 delete mode 100644 Documentation/keys.txt
 create mode 100644 Documentation/security/00-INDEX
 create mode 100644 Documentation/security/SELinux.txt
 create mode 100644 Documentation/security/Smack.txt
 create mode 100644 Documentation/security/apparmor.txt
 create mode 100644 Documentation/security/credentials.txt
 create mode 100644 Documentation/security/keys-request-key.txt
 create mode 100644 Documentation/security/keys-trusted-encrypted.txt
 create mode 100644 Documentation/security/keys.txt
 create mode 100644 Documentation/security/tomoyo.txt
 delete mode 100644 Documentation/tomoyo.txt

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index c17cd4bb2290..c8c1cf631b37 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -192,10 +192,6 @@ kernel-docs.txt
 	- listing of various WWW + books that document kernel internals.
 kernel-parameters.txt
 	- summary listing of command line / boot prompt args for the kernel.
-keys-request-key.txt
-	- description of the kernel key request service.
-keys.txt
-	- description of the kernel key retention service.
 kobject.txt
 	- info of the kobject infrastructure of the Linux kernel.
 kprobes.txt
@@ -294,6 +290,8 @@ scheduler/
 	- directory with info on the scheduler.
 scsi/
 	- directory with info on Linux scsi support.
+security/
+	- directory that contains security-related info
 serial/
 	- directory with info on the low level serial API.
 serial-console.txt
diff --git a/Documentation/SELinux.txt b/Documentation/SELinux.txt
deleted file mode 100644
index 07eae00f3314..000000000000
--- a/Documentation/SELinux.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-If you want to use SELinux, chances are you will want
-to use the distro-provided policies, or install the
-latest reference policy release from
-	http://oss.tresys.com/projects/refpolicy
-
-However, if you want to install a dummy policy for
-testing, you can do using 'mdp' provided under
-scripts/selinux.  Note that this requires the selinux
-userspace to be installed - in particular you will
-need checkpolicy to compile a kernel, and setfiles and
-fixfiles to label the filesystem.
-
-	1. Compile the kernel with selinux enabled.
-	2. Type 'make' to compile mdp.
-	3. Make sure that you are not running with
-	   SELinux enabled and a real policy.  If
-	   you are, reboot with selinux disabled
-	   before continuing.
-	4. Run install_policy.sh:
-		cd scripts/selinux
-		sh install_policy.sh
-
-Step 4 will create a new dummy policy valid for your
-kernel, with a single selinux user, role, and type.
-It will compile the policy, will set your SELINUXTYPE to
-dummy in /etc/selinux/config, install the compiled policy
-as 'dummy', and relabel your filesystem.
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
deleted file mode 100644
index e9dab41c0fe0..000000000000
--- a/Documentation/Smack.txt
+++ /dev/null
@@ -1,541 +0,0 @@
-
-
-    "Good for you, you've decided to clean the elevator!"
-    - The Elevator, from Dark Star
-
-Smack is the the Simplified Mandatory Access Control Kernel.
-Smack is a kernel based implementation of mandatory access
-control that includes simplicity in its primary design goals.
-
-Smack is not the only Mandatory Access Control scheme
-available for Linux. Those new to Mandatory Access Control
-are encouraged to compare Smack with the other mechanisms
-available to determine which is best suited to the problem
-at hand.
-
-Smack consists of three major components:
-    - The kernel
-    - A start-up script and a few modified applications
-    - Configuration data
-
-The kernel component of Smack is implemented as a Linux
-Security Modules (LSM) module. It requires netlabel and
-works best with file systems that support extended attributes,
-although xattr support is not strictly required.
-It is safe to run a Smack kernel under a "vanilla" distribution.
-Smack kernels use the CIPSO IP option. Some network
-configurations are intolerant of IP options and can impede
-access to systems that use them as Smack does.
-
-The startup script etc-init.d-smack should be installed
-in /etc/init.d/smack and should be invoked early in the
-start-up process. On Fedora rc5.d/S02smack is recommended.
-This script ensures that certain devices have the correct
-Smack attributes and loads the Smack configuration if
-any is defined. This script invokes two programs that
-ensure configuration data is properly formatted. These
-programs are /usr/sbin/smackload and /usr/sin/smackcipso.
-The system will run just fine without these programs,
-but it will be difficult to set access rules properly.
-
-A version of "ls" that provides a "-M" option to display
-Smack labels on long listing is available.
-
-A hacked version of sshd that allows network logins by users
-with specific Smack labels is available. This version does
-not work for scp. You must set the /etc/ssh/sshd_config
-line:
-   UsePrivilegeSeparation no
-
-The format of /etc/smack/usr is:
-
-   username smack
-
-In keeping with the intent of Smack, configuration data is
-minimal and not strictly required. The most important
-configuration step is mounting the smackfs pseudo filesystem.
-
-Add this line to /etc/fstab:
-
-    smackfs /smack smackfs smackfsdef=* 0 0
-
-and create the /smack directory for mounting.
-
-Smack uses extended attributes (xattrs) to store file labels.
-The command to set a Smack label on a file is:
-
-    # attr -S -s SMACK64 -V "value" path
-
-NOTE: Smack labels are limited to 23 characters. The attr command
-      does not enforce this restriction and can be used to set
-      invalid Smack labels on files.
-
-If you don't do anything special all users will get the floor ("_")
-label when they log in. If you do want to log in via the hacked ssh
-at other labels use the attr command to set the smack value on the
-home directory and its contents.
-
-You can add access rules in /etc/smack/accesses. They take the form:
-
-    subjectlabel objectlabel access
-
-access is a combination of the letters rwxa which specify the
-kind of access permitted a subject with subjectlabel on an
-object with objectlabel. If there is no rule no access is allowed.
-
-A process can see the smack label it is running with by
-reading /proc/self/attr/current. A privileged process can
-set the process smack by writing there.
-
-Look for additional programs on http://schaufler-ca.com
-
-From the Smack Whitepaper:
-
-The Simplified Mandatory Access Control Kernel
-
-Casey Schaufler
-casey@schaufler-ca.com
-
-Mandatory Access Control
-
-Computer systems employ a variety of schemes to constrain how information is
-shared among the people and services using the machine. Some of these schemes
-allow the program or user to decide what other programs or users are allowed
-access to pieces of data. These schemes are called discretionary access
-control mechanisms because the access control is specified at the discretion
-of the user. Other schemes do not leave the decision regarding what a user or
-program can access up to users or programs. These schemes are called mandatory
-access control mechanisms because you don't have a choice regarding the users
-or programs that have access to pieces of data.
-
-Bell & LaPadula
-
-From the middle of the 1980's until the turn of the century Mandatory Access
-Control (MAC) was very closely associated with the Bell & LaPadula security
-model, a mathematical description of the United States Department of Defense
-policy for marking paper documents. MAC in this form enjoyed a following
-within the Capital Beltway and Scandinavian supercomputer centers but was
-often sited as failing to address general needs.
-
-Domain Type Enforcement
-
-Around the turn of the century Domain Type Enforcement (DTE) became popular.
-This scheme organizes users, programs, and data into domains that are
-protected from each other. This scheme has been widely deployed as a component
-of popular Linux distributions. The administrative overhead required to
-maintain this scheme and the detailed understanding of the whole system
-necessary to provide a secure domain mapping leads to the scheme being
-disabled or used in limited ways in the majority of cases.
-
-Smack
-
-Smack is a Mandatory Access Control mechanism designed to provide useful MAC
-while avoiding the pitfalls of its predecessors. The limitations of Bell &
-LaPadula are addressed by providing a scheme whereby access can be controlled
-according to the requirements of the system and its purpose rather than those
-imposed by an arcane government policy. The complexity of Domain Type
-Enforcement and avoided by defining access controls in terms of the access
-modes already in use.
-
-Smack Terminology
-
-The jargon used to talk about Smack will be familiar to those who have dealt
-with other MAC systems and shouldn't be too difficult for the uninitiated to
-pick up. There are four terms that are used in a specific way and that are
-especially important:
-
-	Subject: A subject is an active entity on the computer system.
-	On Smack a subject is a task, which is in turn the basic unit
-	of execution.
-
-	Object: An object is a passive entity on the computer system.
-	On Smack files of all types, IPC, and tasks can be objects.
-
-	Access: Any attempt by a subject to put information into or get
-	information from an object is an access.
-
-	Label: Data that identifies the Mandatory Access Control
-	characteristics of a subject or an object.
-
-These definitions are consistent with the traditional use in the security
-community. There are also some terms from Linux that are likely to crop up:
-
-	Capability: A task that possesses a capability has permission to
-	violate an aspect of the system security policy, as identified by
-	the specific capability. A task that possesses one or more
-	capabilities is a privileged task, whereas a task with no
-	capabilities is an unprivileged task.
-
-	Privilege: A task that is allowed to violate the system security
-	policy is said to have privilege. As of this writing a task can
-	have privilege either by possessing capabilities or by having an
-	effective user of root.
-
-Smack Basics
-
-Smack is an extension to a Linux system. It enforces additional restrictions
-on what subjects can access which objects, based on the labels attached to
-each of the subject and the object.
-
-Labels
-
-Smack labels are ASCII character strings, one to twenty-three characters in
-length. Single character labels using special characters, that being anything
-other than a letter or digit, are reserved for use by the Smack development
-team. Smack labels are unstructured, case sensitive, and the only operation
-ever performed on them is comparison for equality. Smack labels cannot
-contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
-(quote) and '"' (double-quote) characters.
-Smack labels cannot begin with a '-', which is reserved for special options.
-
-There are some predefined labels:
-
-	_ 	Pronounced "floor", a single underscore character.
-	^ 	Pronounced "hat", a single circumflex character.
-	* 	Pronounced "star", a single asterisk character.
-	? 	Pronounced "huh", a single question mark character.
-	@ 	Pronounced "Internet", a single at sign character.
-
-Every task on a Smack system is assigned a label. System tasks, such as
-init(8) and systems daemons, are run with the floor ("_") label. User tasks
-are assigned labels according to the specification found in the
-/etc/smack/user configuration file.
-
-Access Rules
-
-Smack uses the traditional access modes of Linux. These modes are read,
-execute, write, and occasionally append. There are a few cases where the
-access mode may not be obvious. These include:
-
-	Signals: A signal is a write operation from the subject task to
-	the object task.
-	Internet Domain IPC: Transmission of a packet is considered a
-	write operation from the source task to the destination task.
-
-Smack restricts access based on the label attached to a subject and the label
-attached to the object it is trying to access. The rules enforced are, in
-order:
-
-	1. Any access requested by a task labeled "*" is denied.
-	2. A read or execute access requested by a task labeled "^"
-	   is permitted.
-	3. A read or execute access requested on an object labeled "_"
-	   is permitted.
-	4. Any access requested on an object labeled "*" is permitted.
-	5. Any access requested by a task on an object with the same
-	   label is permitted.
-	6. Any access requested that is explicitly defined in the loaded
-	   rule set is permitted.
-	7. Any other access is denied.
-
-Smack Access Rules
-
-With the isolation provided by Smack access separation is simple. There are
-many interesting cases where limited access by subjects to objects with
-different labels is desired. One example is the familiar spy model of
-sensitivity, where a scientist working on a highly classified project would be
-able to read documents of lower classifications and anything she writes will
-be "born" highly classified. To accommodate such schemes Smack includes a
-mechanism for specifying rules allowing access between labels.
-
-Access Rule Format
-
-The format of an access rule is:
-
-	subject-label object-label access
-
-Where subject-label is the Smack label of the task, object-label is the Smack
-label of the thing being accessed, and access is a string specifying the sort
-of access allowed. The Smack labels are limited to 23 characters. The access
-specification is searched for letters that describe access modes:
-
-	a: indicates that append access should be granted.
-	r: indicates that read access should be granted.
-	w: indicates that write access should be granted.
-	x: indicates that execute access should be granted.
-
-Uppercase values for the specification letters are allowed as well.
-Access mode specifications can be in any order. Examples of acceptable rules
-are:
-
-	TopSecret Secret  rx
-	Secret    Unclass R
-	Manager   Game    x
-	User      HR      w
-	New       Old     rRrRr
-	Closed    Off     -
-
-Examples of unacceptable rules are:
-
-	Top Secret Secret     rx
-	Ace        Ace        r
-	Odd        spells     waxbeans
-
-Spaces are not allowed in labels. Since a subject always has access to files
-with the same label specifying a rule for that case is pointless. Only
-valid letters (rwxaRWXA) and the dash ('-') character are allowed in
-access specifications. The dash is a placeholder, so "a-r" is the same
-as "ar". A lone dash is used to specify that no access should be allowed.
-
-Applying Access Rules
-
-The developers of Linux rarely define new sorts of things, usually importing
-schemes and concepts from other systems. Most often, the other systems are
-variants of Unix. Unix has many endearing properties, but consistency of
-access control models is not one of them. Smack strives to treat accesses as
-uniformly as is sensible while keeping with the spirit of the underlying
-mechanism.
-
-File system objects including files, directories, named pipes, symbolic links,
-and devices require access permissions that closely match those used by mode
-bit access. To open a file for reading read access is required on the file. To
-search a directory requires execute access. Creating a file with write access
-requires both read and write access on the containing directory. Deleting a
-file requires read and write access to the file and to the containing
-directory. It is possible that a user may be able to see that a file exists
-but not any of its attributes by the circumstance of having read access to the
-containing directory but not to the differently labeled file. This is an
-artifact of the file name being data in the directory, not a part of the file.
-
-IPC objects, message queues, semaphore sets, and memory segments exist in flat
-namespaces and access requests are only required to match the object in
-question.
-
-Process objects reflect tasks on the system and the Smack label used to access
-them is the same Smack label that the task would use for its own access
-attempts. Sending a signal via the kill() system call is a write operation
-from the signaler to the recipient. Debugging a process requires both reading
-and writing. Creating a new task is an internal operation that results in two
-tasks with identical Smack labels and requires no access checks.
-
-Sockets are data structures attached to processes and sending a packet from
-one process to another requires that the sender have write access to the
-receiver. The receiver is not required to have read access to the sender.
-
-Setting Access Rules
-
-The configuration file /etc/smack/accesses contains the rules to be set at
-system startup. The contents are written to the special file /smack/load.
-Rules can be written to /smack/load at any time and take effect immediately.
-For any pair of subject and object labels there can be only one rule, with the
-most recently specified overriding any earlier specification.
-
-The program smackload is provided to ensure data is formatted
-properly when written to /smack/load. This program reads lines
-of the form
-
-    subjectlabel objectlabel mode.
-
-Task Attribute
-
-The Smack label of a process can be read from /proc/<pid>/attr/current. A
-process can read its own Smack label from /proc/self/attr/current. A
-privileged process can change its own Smack label by writing to
-/proc/self/attr/current but not the label of another process.
-
-File Attribute
-
-The Smack label of a filesystem object is stored as an extended attribute
-named SMACK64 on the file. This attribute is in the security namespace. It can
-only be changed by a process with privilege.
-
-Privilege
-
-A process with CAP_MAC_OVERRIDE is privileged.
-
-Smack Networking
-
-As mentioned before, Smack enforces access control on network protocol
-transmissions. Every packet sent by a Smack process is tagged with its Smack
-label. This is done by adding a CIPSO tag to the header of the IP packet. Each
-packet received is expected to have a CIPSO tag that identifies the label and
-if it lacks such a tag the network ambient label is assumed. Before the packet
-is delivered a check is made to determine that a subject with the label on the
-packet has write access to the receiving process and if that is not the case
-the packet is dropped.
-
-CIPSO Configuration
-
-It is normally unnecessary to specify the CIPSO configuration. The default
-values used by the system handle all internal cases. Smack will compose CIPSO
-label values to match the Smack labels being used without administrative
-intervention. Unlabeled packets that come into the system will be given the
-ambient label.
-
-Smack requires configuration in the case where packets from a system that is
-not smack that speaks CIPSO may be encountered. Usually this will be a Trusted
-Solaris system, but there are other, less widely deployed systems out there.
-CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level,
-and a category set with each packet. The DOI is intended to identify a group
-of systems that use compatible labeling schemes, and the DOI specified on the
-smack system must match that of the remote system or packets will be
-discarded. The DOI is 3 by default. The value can be read from /smack/doi and
-can be changed by writing to /smack/doi.
-
-The label and category set are mapped to a Smack label as defined in
-/etc/smack/cipso.
-
-A Smack/CIPSO mapping has the form:
-
-	smack level [category [category]*]
-
-Smack does not expect the level or category sets to be related in any
-particular way and does not assume or assign accesses based on them. Some
-examples of mappings:
-
-	TopSecret 7
-	TS:A,B    7 1 2
-	SecBDE    5 2 4 6
-	RAFTERS   7 12 26
-
-The ":" and "," characters are permitted in a Smack label but have no special
-meaning.
-
-The mapping of Smack labels to CIPSO values is defined by writing to
-/smack/cipso. Again, the format of data written to this special file
-is highly restrictive, so the program smackcipso is provided to
-ensure the writes are done properly. This program takes mappings
-on the standard input and sends them to /smack/cipso properly.
-
-In addition to explicit mappings Smack supports direct CIPSO mappings. One
-CIPSO level is used to indicate that the category set passed in the packet is
-in fact an encoding of the Smack label. The level used is 250 by default. The
-value can be read from /smack/direct and changed by writing to /smack/direct.
-
-Socket Attributes
-
-There are two attributes that are associated with sockets. These attributes
-can only be set by privileged tasks, but any task can read them for their own
-sockets.
-
-	SMACK64IPIN: The Smack label of the task object. A privileged
-	program that will enforce policy may set this to the star label.
-
-	SMACK64IPOUT: The Smack label transmitted with outgoing packets.
-	A privileged program may set this to match the label of another
-	task with which it hopes to communicate.
-
-Smack Netlabel Exceptions
-
-You will often find that your labeled application has to talk to the outside,
-unlabeled world. To do this there's a special file /smack/netlabel where you can
-add some exceptions in the form of :
-@IP1	   LABEL1 or
-@IP2/MASK  LABEL2
-
-It means that your application will have unlabeled access to @IP1 if it has
-write access on LABEL1, and access to the subnet @IP2/MASK if it has write
-access on LABEL2.
-
-Entries in the /smack/netlabel file are matched by longest mask first, like in
-classless IPv4 routing.
-
-A special label '@' and an option '-CIPSO' can be used there :
-@      means Internet, any application with any label has access to it
--CIPSO means standard CIPSO networking
-
-If you don't know what CIPSO is and don't plan to use it, you can just do :
-echo 127.0.0.1 -CIPSO > /smack/netlabel
-echo 0.0.0.0/0 @      > /smack/netlabel
-
-If you use CIPSO on your 192.168.0.0/16 local network and need also unlabeled
-Internet access, you can have :
-echo 127.0.0.1      -CIPSO > /smack/netlabel
-echo 192.168.0.0/16 -CIPSO > /smack/netlabel
-echo 0.0.0.0/0      @      > /smack/netlabel
-
-
-Writing Applications for Smack
-
-There are three sorts of applications that will run on a Smack system. How an
-application interacts with Smack will determine what it will have to do to
-work properly under Smack.
-
-Smack Ignorant Applications
-
-By far the majority of applications have no reason whatever to care about the
-unique properties of Smack. Since invoking a program has no impact on the
-Smack label associated with the process the only concern likely to arise is
-whether the process has execute access to the program.
-
-Smack Relevant Applications
-
-Some programs can be improved by teaching them about Smack, but do not make
-any security decisions themselves. The utility ls(1) is one example of such a
-program.
-
-Smack Enforcing Applications
-
-These are special programs that not only know about Smack, but participate in
-the enforcement of system policy. In most cases these are the programs that
-set up user sessions. There are also network services that provide information
-to processes running with various labels.
-
-File System Interfaces
-
-Smack maintains labels on file system objects using extended attributes. The
-Smack label of a file, directory, or other file system object can be obtained
-using getxattr(2).
-
-	len = getxattr("/", "security.SMACK64", value, sizeof (value));
-
-will put the Smack label of the root directory into value. A privileged
-process can set the Smack label of a file system object with setxattr(2).
-
-	len = strlen("Rubble");
-	rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0);
-
-will set the Smack label of /foo to "Rubble" if the program has appropriate
-privilege.
-
-Socket Interfaces
-
-The socket attributes can be read using fgetxattr(2).
-
-A privileged process can set the Smack label of outgoing packets with
-fsetxattr(2).
-
-	len = strlen("Rubble");
-	rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0);
-
-will set the Smack label "Rubble" on packets going out from the socket if the
-program has appropriate privilege.
-
-	rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0);
-
-will set the Smack label "*" as the object label against which incoming
-packets will be checked if the program has appropriate privilege.
-
-Administration
-
-Smack supports some mount options:
-
-	smackfsdef=label: specifies the label to give files that lack
-	the Smack label extended attribute.
-
-	smackfsroot=label: specifies the label to assign the root of the
-	file system if it lacks the Smack extended attribute.
-
-	smackfshat=label: specifies a label that must have read access to
-	all labels set on the filesystem. Not yet enforced.
-
-	smackfsfloor=label: specifies a label to which all labels set on the
-	filesystem must have read access. Not yet enforced.
-
-These mount options apply to all file system types.
-
-Smack auditing
-
-If you want Smack auditing of security events, you need to set CONFIG_AUDIT
-in your kernel configuration.
-By default, all denied events will be audited. You can change this behavior by
-writing a single character to the /smack/logging file :
-0 : no logging
-1 : log denied (default)
-2 : log accepted
-3 : log denied & accepted
-
-Events are logged as 'key=value' pairs, for each event you at least will get
-the subjet, the object, the rights requested, the action, the kernel function
-that triggered the event, plus other pairs depending on the type of event
-audited.
diff --git a/Documentation/apparmor.txt b/Documentation/apparmor.txt
deleted file mode 100644
index 93c1fd7d0635..000000000000
--- a/Documentation/apparmor.txt
+++ /dev/null
@@ -1,39 +0,0 @@
---- What is AppArmor? ---
-
-AppArmor is MAC style security extension for the Linux kernel.  It implements
-a task centered policy, with task "profiles" being created and loaded
-from user space.  Tasks on the system that do not have a profile defined for
-them run in an unconfined state which is equivalent to standard Linux DAC
-permissions.
-
---- How to enable/disable ---
-
-set CONFIG_SECURITY_APPARMOR=y
-
-If AppArmor should be selected as the default security module then
-   set CONFIG_DEFAULT_SECURITY="apparmor"
-   and CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE=1
-
-Build the kernel
-
-If AppArmor is not the default security module it can be enabled by passing
-security=apparmor on the kernel's command line.
-
-If AppArmor is the default security module it can be disabled by passing
-apparmor=0, security=XXXX (where XXX is valid security module), on the
-kernel's command line
-
-For AppArmor to enforce any restrictions beyond standard Linux DAC permissions
-policy must be loaded into the kernel from user space (see the Documentation
-and tools links).
-
---- Documentation ---
-
-Documentation can be found on the wiki.
-
---- Links ---
-
-Mailing List - apparmor@lists.ubuntu.com
-Wiki - http://apparmor.wiki.kernel.org/
-User space tools - https://launchpad.net/apparmor
-Kernel module - git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt
deleted file mode 100644
index 995baf379c07..000000000000
--- a/Documentation/credentials.txt
+++ /dev/null
@@ -1,581 +0,0 @@
-			     ====================
-			     CREDENTIALS IN LINUX
-			     ====================
-
-By: David Howells <dhowells@redhat.com>
-
-Contents:
-
- (*) Overview.
-
- (*) Types of credentials.
-
- (*) File markings.
-
- (*) Task credentials.
-
-     - Immutable credentials.
-     - Accessing task credentials.
-     - Accessing another task's credentials.
-     - Altering credentials.
-     - Managing credentials.
-
- (*) Open file credentials.
-
- (*) Overriding the VFS's use of credentials.
-
-
-========
-OVERVIEW
-========
-
-There are several parts to the security check performed by Linux when one
-object acts upon another:
-
- (1) Objects.
-
-     Objects are things in the system that may be acted upon directly by
-     userspace programs.  Linux has a variety of actionable objects, including:
-
-	- Tasks
-	- Files/inodes
-	- Sockets
-	- Message queues
-	- Shared memory segments
-	- Semaphores
-	- Keys
-
-     As a part of the description of all these objects there is a set of
-     credentials.  What's in the set depends on the type of object.
-
- (2) Object ownership.
-
-     Amongst the credentials of most objects, there will be a subset that
-     indicates the ownership of that object.  This is used for resource
-     accounting and limitation (disk quotas and task rlimits for example).
-
-     In a standard UNIX filesystem, for instance, this will be defined by the
-     UID marked on the inode.
-
- (3) The objective context.
-
-     Also amongst the credentials of those objects, there will be a subset that
-     indicates the 'objective context' of that object.  This may or may not be
-     the same set as in (2) - in standard UNIX files, for instance, this is the
-     defined by the UID and the GID marked on the inode.
-
-     The objective context is used as part of the security calculation that is
-     carried out when an object is acted upon.
-
- (4) Subjects.
-
-     A subject is an object that is acting upon another object.
-
-     Most of the objects in the system are inactive: they don't act on other
-     objects within the system.  Processes/tasks are the obvious exception:
-     they do stuff; they access and manipulate things.
-
-     Objects other than tasks may under some circumstances also be subjects.
-     For instance an open file may send SIGIO to a task using the UID and EUID
-     given to it by a task that called fcntl(F_SETOWN) upon it.  In this case,
-     the file struct will have a subjective context too.
-
- (5) The subjective context.
-
-     A subject has an additional interpretation of its credentials.  A subset
-     of its credentials forms the 'subjective context'.  The subjective context
-     is used as part of the security calculation that is carried out when a
-     subject acts.
-
-     A Linux task, for example, has the FSUID, FSGID and the supplementary
-     group list for when it is acting upon a file - which are quite separate
-     from the real UID and GID that normally form the objective context of the
-     task.
-
- (6) Actions.
-
-     Linux has a number of actions available that a subject may perform upon an
-     object.  The set of actions available depends on the nature of the subject
-     and the object.
-
-     Actions include reading, writing, creating and deleting files; forking or
-     signalling and tracing tasks.
-
- (7) Rules, access control lists and security calculations.
-
-     When a subject acts upon an object, a security calculation is made.  This
-     involves taking the subjective context, the objective context and the
-     action, and searching one or more sets of rules to see whether the subject
-     is granted or denied permission to act in the desired manner on the
-     object, given those contexts.
-
-     There are two main sources of rules:
-
-     (a) Discretionary access control (DAC):
-
-	 Sometimes the object will include sets of rules as part of its
-	 description.  This is an 'Access Control List' or 'ACL'.  A Linux
-	 file may supply more than one ACL.
-
-	 A traditional UNIX file, for example, includes a permissions mask that
-	 is an abbreviated ACL with three fixed classes of subject ('user',
-	 'group' and 'other'), each of which may be granted certain privileges
-	 ('read', 'write' and 'execute' - whatever those map to for the object
-	 in question).  UNIX file permissions do not allow the arbitrary
-	 specification of subjects, however, and so are of limited use.
-
-	 A Linux file might also sport a POSIX ACL.  This is a list of rules
-	 that grants various permissions to arbitrary subjects.
-
-     (b) Mandatory access control (MAC):
-
-	 The system as a whole may have one or more sets of rules that get
-	 applied to all subjects and objects, regardless of their source.
-	 SELinux and Smack are examples of this.
-
-	 In the case of SELinux and Smack, each object is given a label as part
-	 of its credentials.  When an action is requested, they take the
-	 subject label, the object label and the action and look for a rule
-	 that says that this action is either granted or denied.
-
-
-====================
-TYPES OF CREDENTIALS
-====================
-
-The Linux kernel supports the following types of credentials:
-
- (1) Traditional UNIX credentials.
-
-	Real User ID
-	Real Group ID
-
-     The UID and GID are carried by most, if not all, Linux objects, even if in
-     some cases it has to be invented (FAT or CIFS files for example, which are
-     derived from Windows).  These (mostly) define the objective context of
-     that object, with tasks being slightly different in some cases.
-
-	Effective, Saved and FS User ID
-	Effective, Saved and FS Group ID
-	Supplementary groups
-
-     These are additional credentials used by tasks only.  Usually, an
-     EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
-     will be used as the objective.  For tasks, it should be noted that this is
-     not always true.
-
- (2) Capabilities.
-
-	Set of permitted capabilities
-	Set of inheritable capabilities
-	Set of effective capabilities
-	Capability bounding set
-
-     These are only carried by tasks.  They indicate superior capabilities
-     granted piecemeal to a task that an ordinary task wouldn't otherwise have.
-     These are manipulated implicitly by changes to the traditional UNIX
-     credentials, but can also be manipulated directly by the capset() system
-     call.
-
-     The permitted capabilities are those caps that the process might grant
-     itself to its effective or permitted sets through capset().  This
-     inheritable set might also be so constrained.
-
-     The effective capabilities are the ones that a task is actually allowed to
-     make use of itself.
-
-     The inheritable capabilities are the ones that may get passed across
-     execve().
-
-     The bounding set limits the capabilities that may be inherited across
-     execve(), especially when a binary is executed that will execute as UID 0.
-
- (3) Secure management flags (securebits).
-
-     These are only carried by tasks.  These govern the way the above
-     credentials are manipulated and inherited over certain operations such as
-     execve().  They aren't used directly as objective or subjective
-     credentials.
-
- (4) Keys and keyrings.
-
-     These are only carried by tasks.  They carry and cache security tokens
-     that don't fit into the other standard UNIX credentials.  They are for
-     making such things as network filesystem keys available to the file
-     accesses performed by processes, without the necessity of ordinary
-     programs having to know about security details involved.
-
-     Keyrings are a special type of key.  They carry sets of other keys and can
-     be searched for the desired key.  Each process may subscribe to a number
-     of keyrings:
-
-	Per-thread keying
-	Per-process keyring
-	Per-session keyring
-
-     When a process accesses a key, if not already present, it will normally be
-     cached on one of these keyrings for future accesses to find.
-
-     For more information on using keys, see Documentation/keys.txt.
-
- (5) LSM
-
-     The Linux Security Module allows extra controls to be placed over the
-     operations that a task may do.  Currently Linux supports two main
-     alternate LSM options: SELinux and Smack.
-
-     Both work by labelling the objects in a system and then applying sets of
-     rules (policies) that say what operations a task with one label may do to
-     an object with another label.
-
- (6) AF_KEY
-
-     This is a socket-based approach to credential management for networking
-     stacks [RFC 2367].  It isn't discussed by this document as it doesn't
-     interact directly with task and file credentials; rather it keeps system
-     level credentials.
-
-
-When a file is opened, part of the opening task's subjective context is
-recorded in the file struct created.  This allows operations using that file
-struct to use those credentials instead of the subjective context of the task
-that issued the operation.  An example of this would be a file opened on a
-network filesystem where the credentials of the opened file should be presented
-to the server, regardless of who is actually doing a read or a write upon it.
-
-
-=============
-FILE MARKINGS
-=============
-
-Files on disk or obtained over the network may have annotations that form the
-objective security context of that file.  Depending on the type of filesystem,
-this may include one or more of the following:
-
- (*) UNIX UID, GID, mode;
-
- (*) Windows user ID;
-
- (*) Access control list;
-
- (*) LSM security label;
-
- (*) UNIX exec privilege escalation bits (SUID/SGID);
-
- (*) File capabilities exec privilege escalation bits.
-
-These are compared to the task's subjective security context, and certain
-operations allowed or disallowed as a result.  In the case of execve(), the
-privilege escalation bits come into play, and may allow the resulting process
-extra privileges, based on the annotations on the executable file.
-
-
-================
-TASK CREDENTIALS
-================
-
-In Linux, all of a task's credentials are held in (uid, gid) or through
-(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
-Each task points to its credentials by a pointer called 'cred' in its
-task_struct.
-
-Once a set of credentials has been prepared and committed, it may not be
-changed, barring the following exceptions:
-
- (1) its reference count may be changed;
-
- (2) the reference count on the group_info struct it points to may be changed;
-
- (3) the reference count on the security data it points to may be changed;
-
- (4) the reference count on any keyrings it points to may be changed;
-
- (5) any keyrings it points to may be revoked, expired or have their security
-     attributes changed; and
-
- (6) the contents of any keyrings to which it points may be changed (the whole
-     point of keyrings being a shared set of credentials, modifiable by anyone
-     with appropriate access).
-
-To alter anything in the cred struct, the copy-and-replace principle must be
-adhered to.  First take a copy, then alter the copy and then use RCU to change
-the task pointer to make it point to the new copy.  There are wrappers to aid
-with this (see below).
-
-A task may only alter its _own_ credentials; it is no longer permitted for a
-task to alter another's credentials.  This means the capset() system call is no
-longer permitted to take any PID other than the one of the current process.
-Also keyctl_instantiate() and keyctl_negate() functions no longer permit
-attachment to process-specific keyrings in the requesting process as the
-instantiating process may need to create them.
-
-
-IMMUTABLE CREDENTIALS
----------------------
-
-Once a set of credentials has been made public (by calling commit_creds() for
-example), it must be considered immutable, barring two exceptions:
-
- (1) The reference count may be altered.
-
- (2) Whilst the keyring subscriptions of a set of credentials may not be
-     changed, the keyrings subscribed to may have their contents altered.
-
-To catch accidental credential alteration at compile time, struct task_struct
-has _const_ pointers to its credential sets, as does struct file.  Furthermore,
-certain functions such as get_cred() and put_cred() operate on const pointers,
-thus rendering casts unnecessary, but require to temporarily ditch the const
-qualification to be able to alter the reference count.
-
-
-ACCESSING TASK CREDENTIALS
---------------------------
-
-A task being able to alter only its own credentials permits the current process
-to read or replace its own credentials without the need for any form of locking
-- which simplifies things greatly.  It can just call:
-
-	const struct cred *current_cred()
-
-to get a pointer to its credentials structure, and it doesn't have to release
-it afterwards.
-
-There are convenience wrappers for retrieving specific aspects of a task's
-credentials (the value is simply returned in each case):
-
-	uid_t current_uid(void)		Current's real UID
-	gid_t current_gid(void)		Current's real GID
-	uid_t current_euid(void)	Current's effective UID
-	gid_t current_egid(void)	Current's effective GID
-	uid_t current_fsuid(void)	Current's file access UID
-	gid_t current_fsgid(void)	Current's file access GID
-	kernel_cap_t current_cap(void)	Current's effective capabilities
-	void *current_security(void)	Current's LSM security pointer
-	struct user_struct *current_user(void)  Current's user account
-
-There are also convenience wrappers for retrieving specific associated pairs of
-a task's credentials:
-
-	void current_uid_gid(uid_t *, gid_t *);
-	void current_euid_egid(uid_t *, gid_t *);
-	void current_fsuid_fsgid(uid_t *, gid_t *);
-
-which return these pairs of values through their arguments after retrieving
-them from the current task's credentials.
-
-
-In addition, there is a function for obtaining a reference on the current
-process's current set of credentials:
-
-	const struct cred *get_current_cred(void);
-
-and functions for getting references to one of the credentials that don't
-actually live in struct cred:
-
-	struct user_struct *get_current_user(void);
-	struct group_info *get_current_groups(void);
-
-which get references to the current process's user accounting structure and
-supplementary groups list respectively.
-
-Once a reference has been obtained, it must be released with put_cred(),
-free_uid() or put_group_info() as appropriate.
-
-
-ACCESSING ANOTHER TASK'S CREDENTIALS
-------------------------------------
-
-Whilst a task may access its own credentials without the need for locking, the
-same is not true of a task wanting to access another task's credentials.  It
-must use the RCU read lock and rcu_dereference().
-
-The rcu_dereference() is wrapped by:
-
-	const struct cred *__task_cred(struct task_struct *task);
-
-This should be used inside the RCU read lock, as in the following example:
-
-	void foo(struct task_struct *t, struct foo_data *f)
-	{
-		const struct cred *tcred;
-		...
-		rcu_read_lock();
-		tcred = __task_cred(t);
-		f->uid = tcred->uid;
-		f->gid = tcred->gid;
-		f->groups = get_group_info(tcred->groups);
-		rcu_read_unlock();
-		...
-	}
-
-Should it be necessary to hold another task's credentials for a long period of
-time, and possibly to sleep whilst doing so, then the caller should get a
-reference on them using:
-
-	const struct cred *get_task_cred(struct task_struct *task);
-
-This does all the RCU magic inside of it.  The caller must call put_cred() on
-the credentials so obtained when they're finished with.
-
- [*] Note: The result of __task_cred() should not be passed directly to
-     get_cred() as this may race with commit_cred().
-
-There are a couple of convenience functions to access bits of another task's
-credentials, hiding the RCU magic from the caller:
-
-	uid_t task_uid(task)		Task's real UID
-	uid_t task_euid(task)		Task's effective UID
-
-If the caller is holding the RCU read lock at the time anyway, then:
-
-	__task_cred(task)->uid
-	__task_cred(task)->euid
-
-should be used instead.  Similarly, if multiple aspects of a task's credentials
-need to be accessed, RCU read lock should be used, __task_cred() called, the
-result stored in a temporary pointer and then the credential aspects called
-from that before dropping the lock.  This prevents the potentially expensive
-RCU magic from being invoked multiple times.
-
-Should some other single aspect of another task's credentials need to be
-accessed, then this can be used:
-
-	task_cred_xxx(task, member)
-
-where 'member' is a non-pointer member of the cred struct.  For instance:
-
-	uid_t task_cred_xxx(task, suid);
-
-will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
-magic.  This may not be used for pointer members as what they point to may
-disappear the moment the RCU read lock is dropped.
-
-
-ALTERING CREDENTIALS
---------------------
-
-As previously mentioned, a task may only alter its own credentials, and may not
-alter those of another task.  This means that it doesn't need to use any
-locking to alter its own credentials.
-
-To alter the current process's credentials, a function should first prepare a
-new set of credentials by calling:
-
-	struct cred *prepare_creds(void);
-
-this locks current->cred_replace_mutex and then allocates and constructs a
-duplicate of the current process's credentials, returning with the mutex still
-held if successful.  It returns NULL if not successful (out of memory).
-
-The mutex prevents ptrace() from altering the ptrace state of a process whilst
-security checks on credentials construction and changing is taking place as
-the ptrace state may alter the outcome, particularly in the case of execve().
-
-The new credentials set should be altered appropriately, and any security
-checks and hooks done.  Both the current and the proposed sets of credentials
-are available for this purpose as current_cred() will return the current set
-still at this point.
-
-
-When the credential set is ready, it should be committed to the current process
-by calling:
-
-	int commit_creds(struct cred *new);
-
-This will alter various aspects of the credentials and the process, giving the
-LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually
-commit the new credentials to current->cred, it will release
-current->cred_replace_mutex to allow ptrace() to take place, and it will notify
-the scheduler and others of the changes.
-
-This function is guaranteed to return 0, so that it can be tail-called at the
-end of such functions as sys_setresuid().
-
-Note that this function consumes the caller's reference to the new credentials.
-The caller should _not_ call put_cred() on the new credentials afterwards.
-
-Furthermore, once this function has been called on a new set of credentials,
-those credentials may _not_ be changed further.
-
-
-Should the security checks fail or some other error occur after prepare_creds()
-has been called, then the following function should be invoked:
-
-	void abort_creds(struct cred *new);
-
-This releases the lock on current->cred_replace_mutex that prepare_creds() got
-and then releases the new credentials.
-
-
-A typical credentials alteration function would look something like this:
-
-	int alter_suid(uid_t suid)
-	{
-		struct cred *new;
-		int ret;
-
-		new = prepare_creds();
-		if (!new)
-			return -ENOMEM;
-
-		new->suid = suid;
-		ret = security_alter_suid(new);
-		if (ret < 0) {
-			abort_creds(new);
-			return ret;
-		}
-
-		return commit_creds(new);
-	}
-
-
-MANAGING CREDENTIALS
---------------------
-
-There are some functions to help manage credentials:
-
- (*) void put_cred(const struct cred *cred);
-
-     This releases a reference to the given set of credentials.  If the
-     reference count reaches zero, the credentials will be scheduled for
-     destruction by the RCU system.
-
- (*) const struct cred *get_cred(const struct cred *cred);
-
-     This gets a reference on a live set of credentials, returning a pointer to
-     that set of credentials.
-
- (*) struct cred *get_new_cred(struct cred *cred);
-
-     This gets a reference on a set of credentials that is under construction
-     and is thus still mutable, returning a pointer to that set of credentials.
-
-
-=====================
-OPEN FILE CREDENTIALS
-=====================
-
-When a new file is opened, a reference is obtained on the opening task's
-credentials and this is attached to the file struct as 'f_cred' in place of
-'f_uid' and 'f_gid'.  Code that used to access file->f_uid and file->f_gid
-should now access file->f_cred->fsuid and file->f_cred->fsgid.
-
-It is safe to access f_cred without the use of RCU or locking because the
-pointer will not change over the lifetime of the file struct, and nor will the
-contents of the cred struct pointed to, barring the exceptions listed above
-(see the Task Credentials section).
-
-
-=======================================
-OVERRIDING THE VFS'S USE OF CREDENTIALS
-=======================================
-
-Under some circumstances it is desirable to override the credentials used by
-the VFS, and that can be done by calling into such as vfs_mkdir() with a
-different set of credentials.  This is done in the following places:
-
- (*) sys_faccessat().
-
- (*) do_coredump().
-
- (*) nfs4recover.c.
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
index b9b4192ea8b5..9c8fd6148656 100644
--- a/Documentation/filesystems/nfs/idmapper.txt
+++ b/Documentation/filesystems/nfs/idmapper.txt
@@ -47,8 +47,8 @@ request-key will find the first matching line and corresponding program.  In
 this case, /some/other/program will handle all uid lookups and
 /usr/sbin/nfs.idmap will handle gid, user, and group lookups.
 
-See <file:Documentation/keys-request-keys.txt> for more information about the
-request-key function.
+See <file:Documentation/security/keys-request-keys.txt> for more information
+about the request-key function.
 
 
 =========
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
deleted file mode 100644
index 69686ad12c66..000000000000
--- a/Documentation/keys-request-key.txt
+++ /dev/null
@@ -1,202 +0,0 @@
-			      ===================
-			      KEY REQUEST SERVICE
-			      ===================
-
-The key request service is part of the key retention service (refer to
-Documentation/keys.txt).  This document explains more fully how the requesting
-algorithm works.
-
-The process starts by either the kernel requesting a service by calling
-request_key*():
-
-	struct key *request_key(const struct key_type *type,
-				const char *description,
-				const char *callout_info);
-
-or:
-
-	struct key *request_key_with_auxdata(const struct key_type *type,
-					     const char *description,
-					     const char *callout_info,
-					     size_t callout_len,
-					     void *aux);
-
-or:
-
-	struct key *request_key_async(const struct key_type *type,
-				      const char *description,
-				      const char *callout_info,
-				      size_t callout_len);
-
-or:
-
-	struct key *request_key_async_with_auxdata(const struct key_type *type,
-						   const char *description,
-						   const char *callout_info,
-					     	   size_t callout_len,
-						   void *aux);
-
-Or by userspace invoking the request_key system call:
-
-	key_serial_t request_key(const char *type,
-				 const char *description,
-				 const char *callout_info,
-				 key_serial_t dest_keyring);
-
-The main difference between the access points is that the in-kernel interface
-does not need to link the key to a keyring to prevent it from being immediately
-destroyed.  The kernel interface returns a pointer directly to the key, and
-it's up to the caller to destroy the key.
-
-The request_key*_with_auxdata() calls are like the in-kernel request_key*()
-calls, except that they permit auxiliary data to be passed to the upcaller (the
-default is NULL).  This is only useful for those key types that define their
-own upcall mechanism rather than using /sbin/request-key.
-
-The two async in-kernel calls may return keys that are still in the process of
-being constructed.  The two non-async ones will wait for construction to
-complete first.
-
-The userspace interface links the key to a keyring associated with the process
-to prevent the key from going away, and returns the serial number of the key to
-the caller.
-
-
-The following example assumes that the key types involved don't define their
-own upcall mechanisms.  If they do, then those should be substituted for the
-forking and execution of /sbin/request-key.
-
-
-===========
-THE PROCESS
-===========
-
-A request proceeds in the following manner:
-
- (1) Process A calls request_key() [the userspace syscall calls the kernel
-     interface].
-
- (2) request_key() searches the process's subscribed keyrings to see if there's
-     a suitable key there.  If there is, it returns the key.  If there isn't,
-     and callout_info is not set, an error is returned.  Otherwise the process
-     proceeds to the next step.
-
- (3) request_key() sees that A doesn't have the desired key yet, so it creates
-     two things:
-
-     (a) An uninstantiated key U of requested type and description.
-
-     (b) An authorisation key V that refers to key U and notes that process A
-     	 is the context in which key U should be instantiated and secured, and
-     	 from which associated key requests may be satisfied.
-
- (4) request_key() then forks and executes /sbin/request-key with a new session
-     keyring that contains a link to auth key V.
-
- (5) /sbin/request-key assumes the authority associated with key U.
-
- (6) /sbin/request-key execs an appropriate program to perform the actual
-     instantiation.
-
- (7) The program may want to access another key from A's context (say a
-     Kerberos TGT key).  It just requests the appropriate key, and the keyring
-     search notes that the session keyring has auth key V in its bottom level.
-
-     This will permit it to then search the keyrings of process A with the
-     UID, GID, groups and security info of process A as if it was process A,
-     and come up with key W.
-
- (8) The program then does what it must to get the data with which to
-     instantiate key U, using key W as a reference (perhaps it contacts a
-     Kerberos server using the TGT) and then instantiates key U.
-
- (9) Upon instantiating key U, auth key V is automatically revoked so that it
-     may not be used again.
-
-(10) The program then exits 0 and request_key() deletes key V and returns key
-     U to the caller.
-
-This also extends further.  If key W (step 7 above) didn't exist, key W would
-be created uninstantiated, another auth key (X) would be created (as per step
-3) and another copy of /sbin/request-key spawned (as per step 4); but the
-context specified by auth key X will still be process A, as it was in auth key
-V.
-
-This is because process A's keyrings can't simply be attached to
-/sbin/request-key at the appropriate places because (a) execve will discard two
-of them, and (b) it requires the same UID/GID/Groups all the way through.
-
-
-====================================
-NEGATIVE INSTANTIATION AND REJECTION
-====================================
-
-Rather than instantiating a key, it is possible for the possessor of an
-authorisation key to negatively instantiate a key that's under construction.
-This is a short duration placeholder that causes any attempt at re-requesting
-the key whilst it exists to fail with error ENOKEY if negated or the specified
-error if rejected.
-
-This is provided to prevent excessive repeated spawning of /sbin/request-key
-processes for a key that will never be obtainable.
-
-Should the /sbin/request-key process exit anything other than 0 or die on a
-signal, the key under construction will be automatically negatively
-instantiated for a short amount of time.
-
-
-====================
-THE SEARCH ALGORITHM
-====================
-
-A search of any particular keyring proceeds in the following fashion:
-
- (1) When the key management code searches for a key (keyring_search_aux) it
-     firstly calls key_permission(SEARCH) on the keyring it's starting with,
-     if this denies permission, it doesn't search further.
-
- (2) It considers all the non-keyring keys within that keyring and, if any key
-     matches the criteria specified, calls key_permission(SEARCH) on it to see
-     if the key is allowed to be found.  If it is, that key is returned; if
-     not, the search continues, and the error code is retained if of higher
-     priority than the one currently set.
-
- (3) It then considers all the keyring-type keys in the keyring it's currently
-     searching.  It calls key_permission(SEARCH) on each keyring, and if this
-     grants permission, it recurses, executing steps (2) and (3) on that
-     keyring.
-
-The process stops immediately a valid key is found with permission granted to
-use it.  Any error from a previous match attempt is discarded and the key is
-returned.
-
-When search_process_keyrings() is invoked, it performs the following searches
-until one succeeds:
-
- (1) If extant, the process's thread keyring is searched.
-
- (2) If extant, the process's process keyring is searched.
-
- (3) The process's session keyring is searched.
-
- (4) If the process has assumed the authority associated with a request_key()
-     authorisation key then:
-
-     (a) If extant, the calling process's thread keyring is searched.
-
-     (b) If extant, the calling process's process keyring is searched.
-
-     (c) The calling process's session keyring is searched.
-
-The moment one succeeds, all pending errors are discarded and the found key is
-returned.
-
-Only if all these fail does the whole thing fail with the highest priority
-error.  Note that several errors may have come from LSM.
-
-The error priority is:
-
-	EKEYREVOKED > EKEYEXPIRED > ENOKEY
-
-EACCES/EPERM are only returned on a direct search of a specific keyring where
-the basal keyring does not grant Search permission.
diff --git a/Documentation/keys-trusted-encrypted.txt b/Documentation/keys-trusted-encrypted.txt
deleted file mode 100644
index 8fb79bc1ac4b..000000000000
--- a/Documentation/keys-trusted-encrypted.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-			Trusted and Encrypted Keys
-
-Trusted and Encrypted Keys are two new key types added to the existing kernel
-key ring service.  Both of these new types are variable length symmetic keys,
-and in both cases all keys are created in the kernel, and user space sees,
-stores, and loads only encrypted blobs.  Trusted Keys require the availability
-of a Trusted Platform Module (TPM) chip for greater security, while Encrypted
-Keys can be used on any system.  All user level blobs, are displayed and loaded
-in hex ascii for convenience, and are integrity verified.
-
-Trusted Keys use a TPM both to generate and to seal the keys.  Keys are sealed
-under a 2048 bit RSA key in the TPM, and optionally sealed to specified PCR
-(integrity measurement) values, and only unsealed by the TPM, if PCRs and blob
-integrity verifications match.  A loaded Trusted Key can be updated with new
-(future) PCR values, so keys are easily migrated to new pcr values, such as
-when the kernel and initramfs are updated.  The same key can have many saved
-blobs under different PCR values, so multiple boots are easily supported.
-
-By default, trusted keys are sealed under the SRK, which has the default
-authorization value (20 zeros).  This can be set at takeownership time with the
-trouser's utility: "tpm_takeownership -u -z".
-
-Usage:
-    keyctl add trusted name "new keylen [options]" ring
-    keyctl add trusted name "load hex_blob [pcrlock=pcrnum]" ring
-    keyctl update key "update [options]"
-    keyctl print keyid
-
-    options:
-       keyhandle= ascii hex value of sealing key default 0x40000000 (SRK)
-       keyauth=	  ascii hex auth for sealing key default 0x00...i
-		  (40 ascii zeros)
-       blobauth=  ascii hex auth for sealed data default 0x00...
-		  (40 ascii zeros)
-       blobauth=  ascii hex auth for sealed data default 0x00...
-		  (40 ascii zeros)
-       pcrinfo=	  ascii hex of PCR_INFO or PCR_INFO_LONG (no default)
-       pcrlock=	  pcr number to be extended to "lock" blob
-       migratable= 0|1 indicating permission to reseal to new PCR values,
-                   default 1 (resealing allowed)
-
-"keyctl print" returns an ascii hex copy of the sealed key, which is in standard
-TPM_STORED_DATA format.  The key length for new keys are always in bytes.
-Trusted Keys can be 32 - 128 bytes (256 - 1024 bits), the upper limit is to fit
-within the 2048 bit SRK (RSA) keylength, with all necessary structure/padding.
-
-Encrypted keys do not depend on a TPM, and are faster, as they use AES for
-encryption/decryption.  New keys are created from kernel generated random
-numbers, and are encrypted/decrypted using a specified 'master' key.  The
-'master' key can either be a trusted-key or user-key type.  The main
-disadvantage of encrypted keys is that if they are not rooted in a trusted key,
-they are only as secure as the user key encrypting them.  The master user key
-should therefore be loaded in as secure a way as possible, preferably early in
-boot.
-
-Usage:
-  keyctl add encrypted name "new key-type:master-key-name keylen" ring
-  keyctl add encrypted name "load hex_blob" ring
-  keyctl update keyid "update key-type:master-key-name"
-
-where 'key-type' is either 'trusted' or 'user'.
-
-Examples of trusted and encrypted key usage:
-
-Create and save a trusted key named "kmk" of length 32 bytes:
-
-    $ keyctl add trusted kmk "new 32" @u
-    440502848
-
-    $ keyctl show
-    Session Keyring
-           -3 --alswrv    500   500  keyring: _ses
-     97833714 --alswrv    500    -1   \_ keyring: _uid.500
-    440502848 --alswrv    500   500       \_ trusted: kmk
-
-    $ keyctl print 440502848
-    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
-    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
-    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
-    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
-    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
-    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
-    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
-    e4a8aea2b607ec96931e6f4d4fe563ba
-
-    $ keyctl pipe 440502848 > kmk.blob
-
-Load a trusted key from the saved blob:
-
-    $ keyctl add trusted kmk "load `cat kmk.blob`" @u
-    268728824
-
-    $ keyctl print 268728824
-    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
-    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
-    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
-    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
-    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
-    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
-    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
-    e4a8aea2b607ec96931e6f4d4fe563ba
-
-Reseal a trusted key under new pcr values:
-
-    $ keyctl update 268728824 "update pcrinfo=`cat pcr.blob`"
-    $ keyctl print 268728824
-    010100000000002c0002800093c35a09b70fff26e7a98ae786c641e678ec6ffb6b46d805
-    77c8a6377aed9d3219c6dfec4b23ffe3000001005d37d472ac8a44023fbb3d18583a4f73
-    d3a076c0858f6f1dcaa39ea0f119911ff03f5406df4f7f27f41da8d7194f45c9f4e00f2e
-    df449f266253aa3f52e55c53de147773e00f0f9aca86c64d94c95382265968c354c5eab4
-    9638c5ae99c89de1e0997242edfb0b501744e11ff9762dfd951cffd93227cc513384e7e6
-    e782c29435c7ec2edafaa2f4c1fe6e7a781b59549ff5296371b42133777dcc5b8b971610
-    94bc67ede19e43ddb9dc2baacad374a36feaf0314d700af0a65c164b7082401740e489c9
-    7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
-    df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
-
-Create and save an encrypted key "evm" using the above trusted key "kmk":
-
-    $ keyctl add encrypted evm "new trusted:kmk 32" @u
-    159771175
-
-    $ keyctl print 159771175
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
-
-    $ keyctl pipe 159771175 > evm.blob
-
-Load an encrypted key "evm" from saved blob:
-
-    $ keyctl add encrypted evm "load `cat evm.blob`" @u
-    831684262
-
-    $ keyctl print 831684262
-    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
-    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
-    5972dcb82ab2dde83376d82b2e3c09ffc
-
-
-The initial consumer of trusted keys is EVM, which at boot time needs a high
-quality symmetric key for HMAC protection of file metadata.  The use of a
-trusted key provides strong guarantees that the EVM key has not been
-compromised by a user level problem, and when sealed to specific boot PCR
-values, protects against boot and offline attacks.  Other uses for trusted and
-encrypted keys, such as for disk and file encryption are anticipated.
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
deleted file mode 100644
index 6523a9e6f293..000000000000
--- a/Documentation/keys.txt
+++ /dev/null
@@ -1,1290 +0,0 @@
-			 ============================
-			 KERNEL KEY RETENTION SERVICE
-			 ============================
-
-This service allows cryptographic keys, authentication tokens, cross-domain
-user mappings, and similar to be cached in the kernel for the use of
-filesystems and other kernel services.
-
-Keyrings are permitted; these are a special type of key that can hold links to
-other keys. Processes each have three standard keyring subscriptions that a
-kernel service can search for relevant keys.
-
-The key service can be configured on by enabling:
-
-	"Security options"/"Enable access key retention support" (CONFIG_KEYS)
-
-This document has the following sections:
-
-	- Key overview
-	- Key service overview
-	- Key access permissions
-	- SELinux support
-	- New procfs files
-	- Userspace system call interface
-	- Kernel services
-	- Notes on accessing payload contents
-	- Defining a key type
-	- Request-key callback service
-	- Garbage collection
-
-
-============
-KEY OVERVIEW
-============
-
-In this context, keys represent units of cryptographic data, authentication
-tokens, keyrings, etc.. These are represented in the kernel by struct key.
-
-Each key has a number of attributes:
-
-	- A serial number.
-	- A type.
-	- A description (for matching a key in a search).
-	- Access control information.
-	- An expiry time.
-	- A payload.
-	- State.
-
-
- (*) Each key is issued a serial number of type key_serial_t that is unique for
-     the lifetime of that key. All serial numbers are positive non-zero 32-bit
-     integers.
-
-     Userspace programs can use a key's serial numbers as a way to gain access
-     to it, subject to permission checking.
-
- (*) Each key is of a defined "type". Types must be registered inside the
-     kernel by a kernel service (such as a filesystem) before keys of that type
-     can be added or used. Userspace programs cannot define new types directly.
-
-     Key types are represented in the kernel by struct key_type. This defines a
-     number of operations that can be performed on a key of that type.
-
-     Should a type be removed from the system, all the keys of that type will
-     be invalidated.
-
- (*) Each key has a description. This should be a printable string. The key
-     type provides an operation to perform a match between the description on a
-     key and a criterion string.
-
- (*) Each key has an owner user ID, a group ID and a permissions mask. These
-     are used to control what a process may do to a key from userspace, and
-     whether a kernel service will be able to find the key.
-
- (*) Each key can be set to expire at a specific time by the key type's
-     instantiation function. Keys can also be immortal.
-
- (*) Each key can have a payload. This is a quantity of data that represent the
-     actual "key". In the case of a keyring, this is a list of keys to which
-     the keyring links; in the case of a user-defined key, it's an arbitrary
-     blob of data.
-
-     Having a payload is not required; and the payload can, in fact, just be a
-     value stored in the struct key itself.
-
-     When a key is instantiated, the key type's instantiation function is
-     called with a blob of data, and that then creates the key's payload in
-     some way.
-
-     Similarly, when userspace wants to read back the contents of the key, if
-     permitted, another key type operation will be called to convert the key's
-     attached payload back into a blob of data.
-
- (*) Each key can be in one of a number of basic states:
-
-     (*) Uninstantiated. The key exists, but does not have any data attached.
-     	 Keys being requested from userspace will be in this state.
-
-     (*) Instantiated. This is the normal state. The key is fully formed, and
-	 has data attached.
-
-     (*) Negative. This is a relatively short-lived state. The key acts as a
-	 note saying that a previous call out to userspace failed, and acts as
-	 a throttle on key lookups. A negative key can be updated to a normal
-	 state.
-
-     (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded,
-	 they traverse to this state. An expired key can be updated back to a
-	 normal state.
-
-     (*) Revoked. A key is put in this state by userspace action. It can't be
-	 found or operated upon (apart from by unlinking it).
-
-     (*) Dead. The key's type was unregistered, and so the key is now useless.
-
-Keys in the last three states are subject to garbage collection.  See the
-section on "Garbage collection".
-
-
-====================
-KEY SERVICE OVERVIEW
-====================
-
-The key service provides a number of features besides keys:
-
- (*) The key service defines two special key types:
-
-     (+) "keyring"
-
-	 Keyrings are special keys that contain a list of other keys. Keyring
-	 lists can be modified using various system calls. Keyrings should not
-	 be given a payload when created.
-
-     (+) "user"
-
-	 A key of this type has a description and a payload that are arbitrary
-	 blobs of data. These can be created, updated and read by userspace,
-	 and aren't intended for use by kernel services.
-
- (*) Each process subscribes to three keyrings: a thread-specific keyring, a
-     process-specific keyring, and a session-specific keyring.
-
-     The thread-specific keyring is discarded from the child when any sort of
-     clone, fork, vfork or execve occurs. A new keyring is created only when
-     required.
-
-     The process-specific keyring is replaced with an empty one in the child on
-     clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is
-     shared. execve also discards the process's process keyring and creates a
-     new one.
-
-     The session-specific keyring is persistent across clone, fork, vfork and
-     execve, even when the latter executes a set-UID or set-GID binary. A
-     process can, however, replace its current session keyring with a new one
-     by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous
-     new one, or to attempt to create or join one of a specific name.
-
-     The ownership of the thread keyring changes when the real UID and GID of
-     the thread changes.
-
- (*) Each user ID resident in the system holds two special keyrings: a user
-     specific keyring and a default user session keyring. The default session
-     keyring is initialised with a link to the user-specific keyring.
-
-     When a process changes its real UID, if it used to have no session key, it
-     will be subscribed to the default session key for the new UID.
-
-     If a process attempts to access its session key when it doesn't have one,
-     it will be subscribed to the default for its current UID.
-
- (*) Each user has two quotas against which the keys they own are tracked. One
-     limits the total number of keys and keyrings, the other limits the total
-     amount of description and payload space that can be consumed.
-
-     The user can view information on this and other statistics through procfs
-     files.  The root user may also alter the quota limits through sysctl files
-     (see the section "New procfs files").
-
-     Process-specific and thread-specific keyrings are not counted towards a
-     user's quota.
-
-     If a system call that modifies a key or keyring in some way would put the
-     user over quota, the operation is refused and error EDQUOT is returned.
-
- (*) There's a system call interface by which userspace programs can create and
-     manipulate keys and keyrings.
-
- (*) There's a kernel interface by which services can register types and search
-     for keys.
-
- (*) There's a way for the a search done from the kernel to call back to
-     userspace to request a key that can't be found in a process's keyrings.
-
- (*) An optional filesystem is available through which the key database can be
-     viewed and manipulated.
-
-
-======================
-KEY ACCESS PERMISSIONS
-======================
-
-Keys have an owner user ID, a group access ID, and a permissions mask. The mask
-has up to eight bits each for possessor, user, group and other access. Only
-six of each set of eight bits are defined. These permissions granted are:
-
- (*) View
-
-     This permits a key or keyring's attributes to be viewed - including key
-     type and description.
-
- (*) Read
-
-     This permits a key's payload to be viewed or a keyring's list of linked
-     keys.
-
- (*) Write
-
-     This permits a key's payload to be instantiated or updated, or it allows a
-     link to be added to or removed from a keyring.
-
- (*) Search
-
-     This permits keyrings to be searched and keys to be found. Searches can
-     only recurse into nested keyrings that have search permission set.
-
- (*) Link
-
-     This permits a key or keyring to be linked to. To create a link from a
-     keyring to a key, a process must have Write permission on the keyring and
-     Link permission on the key.
-
- (*) Set Attribute
-
-     This permits a key's UID, GID and permissions mask to be changed.
-
-For changing the ownership, group ID or permissions mask, being the owner of
-the key or having the sysadmin capability is sufficient.
-
-
-===============
-SELINUX SUPPORT
-===============
-
-The security class "key" has been added to SELinux so that mandatory access
-controls can be applied to keys created within various contexts.  This support
-is preliminary, and is likely to change quite significantly in the near future.
-Currently, all of the basic permissions explained above are provided in SELinux
-as well; SELinux is simply invoked after all basic permission checks have been
-performed.
-
-The value of the file /proc/self/attr/keycreate influences the labeling of
-newly-created keys.  If the contents of that file correspond to an SELinux
-security context, then the key will be assigned that context.  Otherwise, the
-key will be assigned the current context of the task that invoked the key
-creation request.  Tasks must be granted explicit permission to assign a
-particular context to newly-created keys, using the "create" permission in the
-key security class.
-
-The default keyrings associated with users will be labeled with the default
-context of the user if and only if the login programs have been instrumented to
-properly initialize keycreate during the login process.  Otherwise, they will
-be labeled with the context of the login program itself.
-
-Note, however, that the default keyrings associated with the root user are
-labeled with the default kernel context, since they are created early in the
-boot process, before root has a chance to log in.
-
-The keyrings associated with new threads are each labeled with the context of
-their associated thread, and both session and process keyrings are handled
-similarly.
-
-
-================
-NEW PROCFS FILES
-================
-
-Two files have been added to procfs by which an administrator can find out
-about the status of the key service:
-
- (*) /proc/keys
-
-     This lists the keys that are currently viewable by the task reading the
-     file, giving information about their type, description and permissions.
-     It is not possible to view the payload of the key this way, though some
-     information about it may be given.
-
-     The only keys included in the list are those that grant View permission to
-     the reading process whether or not it possesses them.  Note that LSM
-     security checks are still performed, and may further filter out keys that
-     the current process is not authorised to view.
-
-     The contents of the file look like this:
-
-	SERIAL   FLAGS  USAGE EXPY PERM     UID   GID   TYPE      DESCRIPTION: SUMMARY
-	00000001 I-----    39 perm 1f3f0000     0     0 keyring   _uid_ses.0: 1/4
-	00000002 I-----     2 perm 1f3f0000     0     0 keyring   _uid.0: empty
-	00000007 I-----     1 perm 1f3f0000     0     0 keyring   _pid.1: empty
-	0000018d I-----     1 perm 1f3f0000     0     0 keyring   _pid.412: empty
-	000004d2 I--Q--     1 perm 1f3f0000    32    -1 keyring   _uid.32: 1/4
-	000004d3 I--Q--     3 perm 1f3f0000    32    -1 keyring   _uid_ses.32: empty
-	00000892 I--QU-     1 perm 1f000000     0     0 user      metal:copper: 0
-	00000893 I--Q-N     1  35s 1f3f0000     0     0 user      metal:silver: 0
-	00000894 I--Q--     1  10h 003f0000     0     0 user      metal:gold: 0
-
-     The flags are:
-
-	I	Instantiated
-	R	Revoked
-	D	Dead
-	Q	Contributes to user's quota
-	U	Under construction by callback to userspace
-	N	Negative key
-
-     This file must be enabled at kernel configuration time as it allows anyone
-     to list the keys database.
-
- (*) /proc/key-users
-
-     This file lists the tracking data for each user that has at least one key
-     on the system.  Such data includes quota information and statistics:
-
-	[root@andromeda root]# cat /proc/key-users
-	0:     46 45/45 1/100 13/10000
-	29:     2 2/2 2/100 40/10000
-	32:     2 2/2 2/100 40/10000
-	38:     2 2/2 2/100 40/10000
-
-     The format of each line is
-	<UID>:			User ID to which this applies
-	<usage>			Structure refcount
-	<inst>/<keys>		Total number of keys and number instantiated
-	<keys>/<max>		Key count quota
-	<bytes>/<max>		Key size quota
-
-
-Four new sysctl files have been added also for the purpose of controlling the
-quota limits on keys:
-
- (*) /proc/sys/kernel/keys/root_maxkeys
-     /proc/sys/kernel/keys/root_maxbytes
-
-     These files hold the maximum number of keys that root may have and the
-     maximum total number of bytes of data that root may have stored in those
-     keys.
-
- (*) /proc/sys/kernel/keys/maxkeys
-     /proc/sys/kernel/keys/maxbytes
-
-     These files hold the maximum number of keys that each non-root user may
-     have and the maximum total number of bytes of data that each of those
-     users may have stored in their keys.
-
-Root may alter these by writing each new limit as a decimal number string to
-the appropriate file.
-
-
-===============================
-USERSPACE SYSTEM CALL INTERFACE
-===============================
-
-Userspace can manipulate keys directly through three new syscalls: add_key,
-request_key and keyctl. The latter provides a number of functions for
-manipulating keys.
-
-When referring to a key directly, userspace programs should use the key's
-serial number (a positive 32-bit integer). However, there are some special
-values available for referring to special keys and keyrings that relate to the
-process making the call:
-
-	CONSTANT			VALUE	KEY REFERENCED
-	==============================	======	===========================
-	KEY_SPEC_THREAD_KEYRING		-1	thread-specific keyring
-	KEY_SPEC_PROCESS_KEYRING	-2	process-specific keyring
-	KEY_SPEC_SESSION_KEYRING	-3	session-specific keyring
-	KEY_SPEC_USER_KEYRING		-4	UID-specific keyring
-	KEY_SPEC_USER_SESSION_KEYRING	-5	UID-session keyring
-	KEY_SPEC_GROUP_KEYRING		-6	GID-specific keyring
-	KEY_SPEC_REQKEY_AUTH_KEY	-7	assumed request_key()
-						  authorisation key
-
-
-The main syscalls are:
-
- (*) Create a new key of given type, description and payload and add it to the
-     nominated keyring:
-
-	key_serial_t add_key(const char *type, const char *desc,
-			     const void *payload, size_t plen,
-			     key_serial_t keyring);
-
-     If a key of the same type and description as that proposed already exists
-     in the keyring, this will try to update it with the given payload, or it
-     will return error EEXIST if that function is not supported by the key
-     type. The process must also have permission to write to the key to be able
-     to update it. The new key will have all user permissions granted and no
-     group or third party permissions.
-
-     Otherwise, this will attempt to create a new key of the specified type and
-     description, and to instantiate it with the supplied payload and attach it
-     to the keyring. In this case, an error will be generated if the process
-     does not have permission to write to the keyring.
-
-     The payload is optional, and the pointer can be NULL if not required by
-     the type. The payload is plen in size, and plen can be zero for an empty
-     payload.
-
-     A new keyring can be generated by setting type "keyring", the keyring name
-     as the description (or NULL) and setting the payload to NULL.
-
-     User defined keys can be created by specifying type "user". It is
-     recommended that a user defined key's description by prefixed with a type
-     ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting
-     ticket.
-
-     Any other type must have been registered with the kernel in advance by a
-     kernel service such as a filesystem.
-
-     The ID of the new or updated key is returned if successful.
-
-
- (*) Search the process's keyrings for a key, potentially calling out to
-     userspace to create it.
-
-	key_serial_t request_key(const char *type, const char *description,
-				 const char *callout_info,
-				 key_serial_t dest_keyring);
-
-     This function searches all the process's keyrings in the order thread,
-     process, session for a matching key. This works very much like
-     KEYCTL_SEARCH, including the optional attachment of the discovered key to
-     a keyring.
-
-     If a key cannot be found, and if callout_info is not NULL, then
-     /sbin/request-key will be invoked in an attempt to obtain a key. The
-     callout_info string will be passed as an argument to the program.
-
-     See also Documentation/keys-request-key.txt.
-
-
-The keyctl syscall functions are:
-
- (*) Map a special key ID to a real key ID for this process:
-
-	key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id,
-			    int create);
-
-     The special key specified by "id" is looked up (with the key being created
-     if necessary) and the ID of the key or keyring thus found is returned if
-     it exists.
-
-     If the key does not yet exist, the key will be created if "create" is
-     non-zero; and the error ENOKEY will be returned if "create" is zero.
-
-
- (*) Replace the session keyring this process subscribes to with a new one:
-
-	key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name);
-
-     If name is NULL, an anonymous keyring is created attached to the process
-     as its session keyring, displacing the old session keyring.
-
-     If name is not NULL, if a keyring of that name exists, the process
-     attempts to attach it as the session keyring, returning an error if that
-     is not permitted; otherwise a new keyring of that name is created and
-     attached as the session keyring.
-
-     To attach to a named keyring, the keyring must have search permission for
-     the process's ownership.
-
-     The ID of the new session keyring is returned if successful.
-
-
- (*) Update the specified key:
-
-	long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload,
-		    size_t plen);
-
-     This will try to update the specified key with the given payload, or it
-     will return error EOPNOTSUPP if that function is not supported by the key
-     type. The process must also have permission to write to the key to be able
-     to update it.
-
-     The payload is of length plen, and may be absent or empty as for
-     add_key().
-
-
- (*) Revoke a key:
-
-	long keyctl(KEYCTL_REVOKE, key_serial_t key);
-
-     This makes a key unavailable for further operations. Further attempts to
-     use the key will be met with error EKEYREVOKED, and the key will no longer
-     be findable.
-
-
- (*) Change the ownership of a key:
-
-	long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid);
-
-     This function permits a key's owner and group ID to be changed. Either one
-     of uid or gid can be set to -1 to suppress that change.
-
-     Only the superuser can change a key's owner to something other than the
-     key's current owner. Similarly, only the superuser can change a key's
-     group ID to something other than the calling process's group ID or one of
-     its group list members.
-
-
- (*) Change the permissions mask on a key:
-
-	long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm);
-
-     This function permits the owner of a key or the superuser to change the
-     permissions mask on a key.
-
-     Only bits the available bits are permitted; if any other bits are set,
-     error EINVAL will be returned.
-
-
- (*) Describe a key:
-
-	long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer,
-		    size_t buflen);
-
-     This function returns a summary of the key's attributes (but not its
-     payload data) as a string in the buffer provided.
-
-     Unless there's an error, it always returns the amount of data it could
-     produce, even if that's too big for the buffer, but it won't copy more
-     than requested to userspace. If the buffer pointer is NULL then no copy
-     will take place.
-
-     A process must have view permission on the key for this function to be
-     successful.
-
-     If successful, a string is placed in the buffer in the following format:
-
-	<type>;<uid>;<gid>;<perm>;<description>
-
-     Where type and description are strings, uid and gid are decimal, and perm
-     is hexadecimal. A NUL character is included at the end of the string if
-     the buffer is sufficiently big.
-
-     This can be parsed with
-
-	sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc);
-
-
- (*) Clear out a keyring:
-
-	long keyctl(KEYCTL_CLEAR, key_serial_t keyring);
-
-     This function clears the list of keys attached to a keyring. The calling
-     process must have write permission on the keyring, and it must be a
-     keyring (or else error ENOTDIR will result).
-
-
- (*) Link a key into a keyring:
-
-	long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key);
-
-     This function creates a link from the keyring to the key. The process must
-     have write permission on the keyring and must have link permission on the
-     key.
-
-     Should the keyring not be a keyring, error ENOTDIR will result; and if the
-     keyring is full, error ENFILE will result.
-
-     The link procedure checks the nesting of the keyrings, returning ELOOP if
-     it appears too deep or EDEADLK if the link would introduce a cycle.
-
-     Any links within the keyring to keys that match the new key in terms of
-     type and description will be discarded from the keyring as the new one is
-     added.
-
-
- (*) Unlink a key or keyring from another keyring:
-
-	long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
-
-     This function looks through the keyring for the first link to the
-     specified key, and removes it if found. Subsequent links to that key are
-     ignored. The process must have write permission on the keyring.
-
-     If the keyring is not a keyring, error ENOTDIR will result; and if the key
-     is not present, error ENOENT will be the result.
-
-
- (*) Search a keyring tree for a key:
-
-	key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring,
-			    const char *type, const char *description,
-			    key_serial_t dest_keyring);
-
-     This searches the keyring tree headed by the specified keyring until a key
-     is found that matches the type and description criteria. Each keyring is
-     checked for keys before recursion into its children occurs.
-
-     The process must have search permission on the top level keyring, or else
-     error EACCES will result. Only keyrings that the process has search
-     permission on will be recursed into, and only keys and keyrings for which
-     a process has search permission can be matched. If the specified keyring
-     is not a keyring, ENOTDIR will result.
-
-     If the search succeeds, the function will attempt to link the found key
-     into the destination keyring if one is supplied (non-zero ID). All the
-     constraints applicable to KEYCTL_LINK apply in this case too.
-
-     Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search
-     fails. On success, the resulting key ID will be returned.
-
-
- (*) Read the payload data from a key:
-
-	long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer,
-		    size_t buflen);
-
-     This function attempts to read the payload data from the specified key
-     into the buffer. The process must have read permission on the key to
-     succeed.
-
-     The returned data will be processed for presentation by the key type. For
-     instance, a keyring will return an array of key_serial_t entries
-     representing the IDs of all the keys to which it is subscribed. The user
-     defined key type will return its data as is. If a key type does not
-     implement this function, error EOPNOTSUPP will result.
-
-     As much of the data as can be fitted into the buffer will be copied to
-     userspace if the buffer pointer is not NULL.
-
-     On a successful return, the function will always return the amount of data
-     available rather than the amount copied.
-
-
- (*) Instantiate a partially constructed key.
-
-	long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
-		    const void *payload, size_t plen,
-		    key_serial_t keyring);
-	long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
-		    const struct iovec *payload_iov, unsigned ioc,
-		    key_serial_t keyring);
-
-     If the kernel calls back to userspace to complete the instantiation of a
-     key, userspace should use this call to supply data for the key before the
-     invoked process returns, or else the key will be marked negative
-     automatically.
-
-     The process must have write access on the key to be able to instantiate
-     it, and the key must be uninstantiated.
-
-     If a keyring is specified (non-zero), the key will also be linked into
-     that keyring, however all the constraints applying in KEYCTL_LINK apply in
-     this case too.
-
-     The payload and plen arguments describe the payload data as for add_key().
-
-     The payload_iov and ioc arguments describe the payload data in an iovec
-     array instead of a single buffer.
-
-
- (*) Negatively instantiate a partially constructed key.
-
-	long keyctl(KEYCTL_NEGATE, key_serial_t key,
-		    unsigned timeout, key_serial_t keyring);
-	long keyctl(KEYCTL_REJECT, key_serial_t key,
-		    unsigned timeout, unsigned error, key_serial_t keyring);
-
-     If the kernel calls back to userspace to complete the instantiation of a
-     key, userspace should use this call mark the key as negative before the
-     invoked process returns if it is unable to fulfil the request.
-
-     The process must have write access on the key to be able to instantiate
-     it, and the key must be uninstantiated.
-
-     If a keyring is specified (non-zero), the key will also be linked into
-     that keyring, however all the constraints applying in KEYCTL_LINK apply in
-     this case too.
-
-     If the key is rejected, future searches for it will return the specified
-     error code until the rejected key expires.  Negating the key is the same
-     as rejecting the key with ENOKEY as the error code.
-
-
- (*) Set the default request-key destination keyring.
-
-	long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl);
-
-     This sets the default keyring to which implicitly requested keys will be
-     attached for this thread. reqkey_defl should be one of these constants:
-
-	CONSTANT				VALUE	NEW DEFAULT KEYRING
-	======================================	======	=======================
-	KEY_REQKEY_DEFL_NO_CHANGE		-1	No change
-	KEY_REQKEY_DEFL_DEFAULT			0	Default[1]
-	KEY_REQKEY_DEFL_THREAD_KEYRING		1	Thread keyring
-	KEY_REQKEY_DEFL_PROCESS_KEYRING		2	Process keyring
-	KEY_REQKEY_DEFL_SESSION_KEYRING		3	Session keyring
-	KEY_REQKEY_DEFL_USER_KEYRING		4	User keyring
-	KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5	User session keyring
-	KEY_REQKEY_DEFL_GROUP_KEYRING		6	Group keyring
-
-     The old default will be returned if successful and error EINVAL will be
-     returned if reqkey_defl is not one of the above values.
-
-     The default keyring can be overridden by the keyring indicated to the
-     request_key() system call.
-
-     Note that this setting is inherited across fork/exec.
-
-     [1] The default is: the thread keyring if there is one, otherwise
-     the process keyring if there is one, otherwise the session keyring if
-     there is one, otherwise the user default session keyring.
-
-
- (*) Set the timeout on a key.
-
-	long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
-
-     This sets or clears the timeout on a key. The timeout can be 0 to clear
-     the timeout or a number of seconds to set the expiry time that far into
-     the future.
-
-     The process must have attribute modification access on a key to set its
-     timeout. Timeouts may not be set with this function on negative, revoked
-     or expired keys.
-
-
- (*) Assume the authority granted to instantiate a key
-
-	long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
-
-     This assumes or divests the authority required to instantiate the
-     specified key. Authority can only be assumed if the thread has the
-     authorisation key associated with the specified key in its keyrings
-     somewhere.
-
-     Once authority is assumed, searches for keys will also search the
-     requester's keyrings using the requester's security label, UID, GID and
-     groups.
-
-     If the requested authority is unavailable, error EPERM will be returned,
-     likewise if the authority has been revoked because the target key is
-     already instantiated.
-
-     If the specified key is 0, then any assumed authority will be divested.
-
-     The assumed authoritative key is inherited across fork and exec.
-
-
- (*) Get the LSM security context attached to a key.
-
-	long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
-		    size_t buflen)
-
-     This function returns a string that represents the LSM security context
-     attached to a key in the buffer provided.
-
-     Unless there's an error, it always returns the amount of data it could
-     produce, even if that's too big for the buffer, but it won't copy more
-     than requested to userspace. If the buffer pointer is NULL then no copy
-     will take place.
-
-     A NUL character is included at the end of the string if the buffer is
-     sufficiently big.  This is included in the returned count.  If no LSM is
-     in force then an empty string will be returned.
-
-     A process must have view permission on the key for this function to be
-     successful.
-
-
- (*) Install the calling process's session keyring on its parent.
-
-	long keyctl(KEYCTL_SESSION_TO_PARENT);
-
-     This functions attempts to install the calling process's session keyring
-     on to the calling process's parent, replacing the parent's current session
-     keyring.
-
-     The calling process must have the same ownership as its parent, the
-     keyring must have the same ownership as the calling process, the calling
-     process must have LINK permission on the keyring and the active LSM module
-     mustn't deny permission, otherwise error EPERM will be returned.
-
-     Error ENOMEM will be returned if there was insufficient memory to complete
-     the operation, otherwise 0 will be returned to indicate success.
-
-     The keyring will be replaced next time the parent process leaves the
-     kernel and resumes executing userspace.
-
-
-===============
-KERNEL SERVICES
-===============
-
-The kernel services for key management are fairly simple to deal with. They can
-be broken down into two areas: keys and key types.
-
-Dealing with keys is fairly straightforward. Firstly, the kernel service
-registers its type, then it searches for a key of that type. It should retain
-the key as long as it has need of it, and then it should release it. For a
-filesystem or device file, a search would probably be performed during the open
-call, and the key released upon close. How to deal with conflicting keys due to
-two different users opening the same file is left to the filesystem author to
-solve.
-
-To access the key manager, the following header must be #included:
-
-	<linux/key.h>
-
-Specific key types should have a header file under include/keys/ that should be
-used to access that type.  For keys of type "user", for example, that would be:
-
-	<keys/user-type.h>
-
-Note that there are two different types of pointers to keys that may be
-encountered:
-
- (*) struct key *
-
-     This simply points to the key structure itself. Key structures will be at
-     least four-byte aligned.
-
- (*) key_ref_t
-
-     This is equivalent to a struct key *, but the least significant bit is set
-     if the caller "possesses" the key. By "possession" it is meant that the
-     calling processes has a searchable link to the key from one of its
-     keyrings. There are three functions for dealing with these:
-
-	key_ref_t make_key_ref(const struct key *key,
-			       unsigned long possession);
-
-	struct key *key_ref_to_ptr(const key_ref_t key_ref);
-
-	unsigned long is_key_possessed(const key_ref_t key_ref);
-
-     The first function constructs a key reference from a key pointer and
-     possession information (which must be 0 or 1 and not any other value).
-
-     The second function retrieves the key pointer from a reference and the
-     third retrieves the possession flag.
-
-When accessing a key's payload contents, certain precautions must be taken to
-prevent access vs modification races. See the section "Notes on accessing
-payload contents" for more information.
-
-(*) To search for a key, call:
-
-	struct key *request_key(const struct key_type *type,
-				const char *description,
-				const char *callout_info);
-
-    This is used to request a key or keyring with a description that matches
-    the description specified according to the key type's match function. This
-    permits approximate matching to occur. If callout_string is not NULL, then
-    /sbin/request-key will be invoked in an attempt to obtain the key from
-    userspace. In that case, callout_string will be passed as an argument to
-    the program.
-
-    Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be
-    returned.
-
-    If successful, the key will have been attached to the default keyring for
-    implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
-
-    See also Documentation/keys-request-key.txt.
-
-
-(*) To search for a key, passing auxiliary data to the upcaller, call:
-
-	struct key *request_key_with_auxdata(const struct key_type *type,
-					     const char *description,
-					     const void *callout_info,
-					     size_t callout_len,
-					     void *aux);
-
-    This is identical to request_key(), except that the auxiliary data is
-    passed to the key_type->request_key() op if it exists, and the callout_info
-    is a blob of length callout_len, if given (the length may be 0).
-
-
-(*) A key can be requested asynchronously by calling one of:
-
-	struct key *request_key_async(const struct key_type *type,
-				      const char *description,
-				      const void *callout_info,
-				      size_t callout_len);
-
-    or:
-
-	struct key *request_key_async_with_auxdata(const struct key_type *type,
-						   const char *description,
-						   const char *callout_info,
-					     	   size_t callout_len,
-					     	   void *aux);
-
-    which are asynchronous equivalents of request_key() and
-    request_key_with_auxdata() respectively.
-
-    These two functions return with the key potentially still under
-    construction.  To wait for construction completion, the following should be
-    called:
-
-	int wait_for_key_construction(struct key *key, bool intr);
-
-    The function will wait for the key to finish being constructed and then
-    invokes key_validate() to return an appropriate value to indicate the state
-    of the key (0 indicates the key is usable).
-
-    If intr is true, then the wait can be interrupted by a signal, in which
-    case error ERESTARTSYS will be returned.
-
-
-(*) When it is no longer required, the key should be released using:
-
-	void key_put(struct key *key);
-
-    Or:
-
-	void key_ref_put(key_ref_t key_ref);
-
-    These can be called from interrupt context. If CONFIG_KEYS is not set then
-    the argument will not be parsed.
-
-
-(*) Extra references can be made to a key by calling the following function:
-
-	struct key *key_get(struct key *key);
-
-    These need to be disposed of by calling key_put() when they've been
-    finished with. The key pointer passed in will be returned. If the pointer
-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
-    no increment will take place.
-
-
-(*) A key's serial number can be obtained by calling:
-
-	key_serial_t key_serial(struct key *key);
-
-    If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the
-    latter case without parsing the argument).
-
-
-(*) If a keyring was found in the search, this can be further searched by:
-
-	key_ref_t keyring_search(key_ref_t keyring_ref,
-				 const struct key_type *type,
-				 const char *description)
-
-    This searches the keyring tree specified for a matching key. Error ENOKEY
-    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
-    the returned key will need to be released.
-
-    The possession attribute from the keyring reference is used to control
-    access through the permissions mask and is propagated to the returned key
-    reference pointer if successful.
-
-
-(*) To check the validity of a key, this function can be called:
-
-	int validate_key(struct key *key);
-
-    This checks that the key in question hasn't expired or and hasn't been
-    revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will
-    be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be
-    returned (in the latter case without parsing the argument).
-
-
-(*) To register a key type, the following function should be called:
-
-	int register_key_type(struct key_type *type);
-
-    This will return error EEXIST if a type of the same name is already
-    present.
-
-
-(*) To unregister a key type, call:
-
-	void unregister_key_type(struct key_type *type);
-
-
-Under some circumstances, it may be desirable to deal with a bundle of keys.
-The facility provides access to the keyring type for managing such a bundle:
-
-	struct key_type key_type_keyring;
-
-This can be used with a function such as request_key() to find a specific
-keyring in a process's keyrings.  A keyring thus found can then be searched
-with keyring_search().  Note that it is not possible to use request_key() to
-search a specific keyring, so using keyrings in this way is of limited utility.
-
-
-===================================
-NOTES ON ACCESSING PAYLOAD CONTENTS
-===================================
-
-The simplest payload is just a number in key->payload.value. In this case,
-there's no need to indulge in RCU or locking when accessing the payload.
-
-More complex payload contents must be allocated and a pointer to them set in
-key->payload.data. One of the following ways must be selected to access the
-data:
-
- (1) Unmodifiable key type.
-
-     If the key type does not have a modify method, then the key's payload can
-     be accessed without any form of locking, provided that it's known to be
-     instantiated (uninstantiated keys cannot be "found").
-
- (2) The key's semaphore.
-
-     The semaphore could be used to govern access to the payload and to control
-     the payload pointer. It must be write-locked for modifications and would
-     have to be read-locked for general access. The disadvantage of doing this
-     is that the accessor may be required to sleep.
-
- (3) RCU.
-
-     RCU must be used when the semaphore isn't already held; if the semaphore
-     is held then the contents can't change under you unexpectedly as the
-     semaphore must still be used to serialise modifications to the key. The
-     key management code takes care of this for the key type.
-
-     However, this means using:
-
-	rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock()
-
-     to read the pointer, and:
-
-	rcu_dereference() ... rcu_assign_pointer() ... call_rcu()
-
-     to set the pointer and dispose of the old contents after a grace period.
-     Note that only the key type should ever modify a key's payload.
-
-     Furthermore, an RCU controlled payload must hold a struct rcu_head for the
-     use of call_rcu() and, if the payload is of variable size, the length of
-     the payload. key->datalen cannot be relied upon to be consistent with the
-     payload just dereferenced if the key's semaphore is not held.
-
-
-===================
-DEFINING A KEY TYPE
-===================
-
-A kernel service may want to define its own key type. For instance, an AFS
-filesystem might want to define a Kerberos 5 ticket key type. To do this, it
-author fills in a key_type struct and registers it with the system.
-
-Source files that implement key types should include the following header file:
-
-	<linux/key-type.h>
-
-The structure has a number of fields, some of which are mandatory:
-
- (*) const char *name
-
-     The name of the key type. This is used to translate a key type name
-     supplied by userspace into a pointer to the structure.
-
-
- (*) size_t def_datalen
-
-     This is optional - it supplies the default payload data length as
-     contributed to the quota. If the key type's payload is always or almost
-     always the same size, then this is a more efficient way to do things.
-
-     The data length (and quota) on a particular key can always be changed
-     during instantiation or update by calling:
-
-	int key_payload_reserve(struct key *key, size_t datalen);
-
-     With the revised data length. Error EDQUOT will be returned if this is not
-     viable.
-
-
- (*) int (*vet_description)(const char *description);
-
-     This optional method is called to vet a key description.  If the key type
-     doesn't approve of the key description, it may return an error, otherwise
-     it should return 0.
-
-
- (*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
-
-     This method is called to attach a payload to a key during construction.
-     The payload attached need not bear any relation to the data passed to this
-     function.
-
-     If the amount of data attached to the key differs from the size in
-     keytype->def_datalen, then key_payload_reserve() should be called.
-
-     This method does not have to lock the key in order to attach a payload.
-     The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents
-     anything else from gaining access to the key.
-
-     It is safe to sleep in this method.
-
-
- (*) int (*update)(struct key *key, const void *data, size_t datalen);
-
-     If this type of key can be updated, then this method should be provided.
-     It is called to update a key's payload from the blob of data provided.
-
-     key_payload_reserve() should be called if the data length might change
-     before any changes are actually made. Note that if this succeeds, the type
-     is committed to changing the key because it's already been altered, so all
-     memory allocation must be done first.
-
-     The key will have its semaphore write-locked before this method is called,
-     but this only deters other writers; any changes to the key's payload must
-     be made under RCU conditions, and call_rcu() must be used to dispose of
-     the old payload.
-
-     key_payload_reserve() should be called before the changes are made, but
-     after all allocations and other potentially failing function calls are
-     made.
-
-     It is safe to sleep in this method.
-
-
- (*) int (*match)(const struct key *key, const void *desc);
-
-     This method is called to match a key against a description. It should
-     return non-zero if the two match, zero if they don't.
-
-     This method should not need to lock the key in any way. The type and
-     description can be considered invariant, and the payload should not be
-     accessed (the key may not yet be instantiated).
-
-     It is not safe to sleep in this method; the caller may hold spinlocks.
-
-
- (*) void (*revoke)(struct key *key);
-
-     This method is optional.  It is called to discard part of the payload
-     data upon a key being revoked.  The caller will have the key semaphore
-     write-locked.
-
-     It is safe to sleep in this method, though care should be taken to avoid
-     a deadlock against the key semaphore.
-
-
- (*) void (*destroy)(struct key *key);
-
-     This method is optional. It is called to discard the payload data on a key
-     when it is being destroyed.
-
-     This method does not need to lock the key to access the payload; it can
-     consider the key as being inaccessible at this time. Note that the key's
-     type may have been changed before this function is called.
-
-     It is not safe to sleep in this method; the caller may hold spinlocks.
-
-
- (*) void (*describe)(const struct key *key, struct seq_file *p);
-
-     This method is optional. It is called during /proc/keys reading to
-     summarise a key's description and payload in text form.
-
-     This method will be called with the RCU read lock held. rcu_dereference()
-     should be used to read the payload pointer if the payload is to be
-     accessed. key->datalen cannot be trusted to stay consistent with the
-     contents of the payload.
-
-     The description will not change, though the key's state may.
-
-     It is not safe to sleep in this method; the RCU read lock is held by the
-     caller.
-
-
- (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen);
-
-     This method is optional. It is called by KEYCTL_READ to translate the
-     key's payload into something a blob of data for userspace to deal with.
-     Ideally, the blob should be in the same format as that passed in to the
-     instantiate and update methods.
-
-     If successful, the blob size that could be produced should be returned
-     rather than the size copied.
-
-     This method will be called with the key's semaphore read-locked. This will
-     prevent the key's payload changing. It is not necessary to use RCU locking
-     when accessing the key's payload. It is safe to sleep in this method, such
-     as might happen when the userspace buffer is accessed.
-
-
- (*) int (*request_key)(struct key_construction *cons, const char *op,
-			void *aux);
-
-     This method is optional.  If provided, request_key() and friends will
-     invoke this function rather than upcalling to /sbin/request-key to operate
-     upon a key of this type.
-
-     The aux parameter is as passed to request_key_async_with_auxdata() and
-     similar or is NULL otherwise.  Also passed are the construction record for
-     the key to be operated upon and the operation type (currently only
-     "create").
-
-     This method is permitted to return before the upcall is complete, but the
-     following function must be called under all circumstances to complete the
-     instantiation process, whether or not it succeeds, whether or not there's
-     an error:
-
-	void complete_request_key(struct key_construction *cons, int error);
-
-     The error parameter should be 0 on success, -ve on error.  The
-     construction record is destroyed by this action and the authorisation key
-     will be revoked.  If an error is indicated, the key under construction
-     will be negatively instantiated if it wasn't already instantiated.
-
-     If this method returns an error, that error will be returned to the
-     caller of request_key*().  complete_request_key() must be called prior to
-     returning.
-
-     The key under construction and the authorisation key can be found in the
-     key_construction struct pointed to by cons:
-
-     (*) struct key *key;
-
-     	 The key under construction.
-
-     (*) struct key *authkey;
-
-     	 The authorisation key.
-
-
-============================
-REQUEST-KEY CALLBACK SERVICE
-============================
-
-To create a new key, the kernel will attempt to execute the following command
-line:
-
-	/sbin/request-key create <key> <uid> <gid> \
-		<threadring> <processring> <sessionring> <callout_info>
-
-<key> is the key being constructed, and the three keyrings are the process
-keyrings from the process that caused the search to be issued. These are
-included for two reasons:
-
-  (1) There may be an authentication token in one of the keyrings that is
-      required to obtain the key, eg: a Kerberos Ticket-Granting Ticket.
-
-  (2) The new key should probably be cached in one of these rings.
-
-This program should set it UID and GID to those specified before attempting to
-access any more keys. It may then look around for a user specific process to
-hand the request off to (perhaps a path held in placed in another key by, for
-example, the KDE desktop manager).
-
-The program (or whatever it calls) should finish construction of the key by
-calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
-cache the key in one of the keyrings (probably the session ring) before
-returning.  Alternatively, the key can be marked as negative with KEYCTL_NEGATE
-or KEYCTL_REJECT; this also permits the key to be cached in one of the
-keyrings.
-
-If it returns with the key remaining in the unconstructed state, the key will
-be marked as being negative, it will be added to the session keyring, and an
-error will be returned to the key requestor.
-
-Supplementary information may be provided from whoever or whatever invoked this
-service. This will be passed as the <callout_info> parameter. If no such
-information was made available, then "-" will be passed as this parameter
-instead.
-
-
-Similarly, the kernel may attempt to update an expired or a soon to expire key
-by executing:
-
-	/sbin/request-key update <key> <uid> <gid> \
-		<threadring> <processring> <sessionring>
-
-In this case, the program isn't required to actually attach the key to a ring;
-the rings are provided for reference.
-
-
-==================
-GARBAGE COLLECTION
-==================
-
-Dead keys (for which the type has been removed) will be automatically unlinked
-from those keyrings that point to them and deleted as soon as possible by a
-background garbage collector.
-
-Similarly, revoked and expired keys will be garbage collected, but only after a
-certain amount of time has passed.  This time is set as a number of seconds in:
-
-	/proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
index 04ca06325b08..7f531ad83285 100644
--- a/Documentation/networking/dns_resolver.txt
+++ b/Documentation/networking/dns_resolver.txt
@@ -139,8 +139,8 @@ the key will be discarded and recreated when the data it holds has expired.
 dns_query() returns a copy of the value attached to the key, or an error if
 that is indicated instead.
 
-See <file:Documentation/keys-request-key.txt> for further information about
-request-key function.
+See <file:Documentation/security/keys-request-key.txt> for further
+information about request-key function.
 
 
 =========
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
new file mode 100644
index 000000000000..19bc49439cac
--- /dev/null
+++ b/Documentation/security/00-INDEX
@@ -0,0 +1,18 @@
+00-INDEX
+	- this file.
+SELinux.txt
+	- how to get started with the SELinux security enhancement.
+Smack.txt
+	- documentation on the Smack Linux Security Module.
+apparmor.txt
+	- documentation on the AppArmor security extension.
+credentials.txt
+	- documentation about credentials in Linux.
+keys-request-key.txt
+	- description of the kernel key request service.
+keys-trusted-encrypted.txt
+	- info on the Trusted and Encrypted keys in the kernel key ring service.
+keys.txt
+	- description of the kernel key retention service.
+tomoyo.txt
+	- documentation on the TOMOYO Linux Security Module.
diff --git a/Documentation/security/SELinux.txt b/Documentation/security/SELinux.txt
new file mode 100644
index 000000000000..07eae00f3314
--- /dev/null
+++ b/Documentation/security/SELinux.txt
@@ -0,0 +1,27 @@
+If you want to use SELinux, chances are you will want
+to use the distro-provided policies, or install the
+latest reference policy release from
+	http://oss.tresys.com/projects/refpolicy
+
+However, if you want to install a dummy policy for
+testing, you can do using 'mdp' provided under
+scripts/selinux.  Note that this requires the selinux
+userspace to be installed - in particular you will
+need checkpolicy to compile a kernel, and setfiles and
+fixfiles to label the filesystem.
+
+	1. Compile the kernel with selinux enabled.
+	2. Type 'make' to compile mdp.
+	3. Make sure that you are not running with
+	   SELinux enabled and a real policy.  If
+	   you are, reboot with selinux disabled
+	   before continuing.
+	4. Run install_policy.sh:
+		cd scripts/selinux
+		sh install_policy.sh
+
+Step 4 will create a new dummy policy valid for your
+kernel, with a single selinux user, role, and type.
+It will compile the policy, will set your SELINUXTYPE to
+dummy in /etc/selinux/config, install the compiled policy
+as 'dummy', and relabel your filesystem.
diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt
new file mode 100644
index 000000000000..e9dab41c0fe0
--- /dev/null
+++ b/Documentation/security/Smack.txt
@@ -0,0 +1,541 @@
+
+
+    "Good for you, you've decided to clean the elevator!"
+    - The Elevator, from Dark Star
+
+Smack is the the Simplified Mandatory Access Control Kernel.
+Smack is a kernel based implementation of mandatory access
+control that includes simplicity in its primary design goals.
+
+Smack is not the only Mandatory Access Control scheme
+available for Linux. Those new to Mandatory Access Control
+are encouraged to compare Smack with the other mechanisms
+available to determine which is best suited to the problem
+at hand.
+
+Smack consists of three major components:
+    - The kernel
+    - A start-up script and a few modified applications
+    - Configuration data
+
+The kernel component of Smack is implemented as a Linux
+Security Modules (LSM) module. It requires netlabel and
+works best with file systems that support extended attributes,
+although xattr support is not strictly required.
+It is safe to run a Smack kernel under a "vanilla" distribution.
+Smack kernels use the CIPSO IP option. Some network
+configurations are intolerant of IP options and can impede
+access to systems that use them as Smack does.
+
+The startup script etc-init.d-smack should be installed
+in /etc/init.d/smack and should be invoked early in the
+start-up process. On Fedora rc5.d/S02smack is recommended.
+This script ensures that certain devices have the correct
+Smack attributes and loads the Smack configuration if
+any is defined. This script invokes two programs that
+ensure configuration data is properly formatted. These
+programs are /usr/sbin/smackload and /usr/sin/smackcipso.
+The system will run just fine without these programs,
+but it will be difficult to set access rules properly.
+
+A version of "ls" that provides a "-M" option to display
+Smack labels on long listing is available.
+
+A hacked version of sshd that allows network logins by users
+with specific Smack labels is available. This version does
+not work for scp. You must set the /etc/ssh/sshd_config
+line:
+   UsePrivilegeSeparation no
+
+The format of /etc/smack/usr is:
+
+   username smack
+
+In keeping with the intent of Smack, configuration data is
+minimal and not strictly required. The most important
+configuration step is mounting the smackfs pseudo filesystem.
+
+Add this line to /etc/fstab:
+
+    smackfs /smack smackfs smackfsdef=* 0 0
+
+and create the /smack directory for mounting.
+
+Smack uses extended attributes (xattrs) to store file labels.
+The command to set a Smack label on a file is:
+
+    # attr -S -s SMACK64 -V "value" path
+
+NOTE: Smack labels are limited to 23 characters. The attr command
+      does not enforce this restriction and can be used to set
+      invalid Smack labels on files.
+
+If you don't do anything special all users will get the floor ("_")
+label when they log in. If you do want to log in via the hacked ssh
+at other labels use the attr command to set the smack value on the
+home directory and its contents.
+
+You can add access rules in /etc/smack/accesses. They take the form:
+
+    subjectlabel objectlabel access
+
+access is a combination of the letters rwxa which specify the
+kind of access permitted a subject with subjectlabel on an
+object with objectlabel. If there is no rule no access is allowed.
+
+A process can see the smack label it is running with by
+reading /proc/self/attr/current. A privileged process can
+set the process smack by writing there.
+
+Look for additional programs on http://schaufler-ca.com
+
+From the Smack Whitepaper:
+
+The Simplified Mandatory Access Control Kernel
+
+Casey Schaufler
+casey@schaufler-ca.com
+
+Mandatory Access Control
+
+Computer systems employ a variety of schemes to constrain how information is
+shared among the people and services using the machine. Some of these schemes
+allow the program or user to decide what other programs or users are allowed
+access to pieces of data. These schemes are called discretionary access
+control mechanisms because the access control is specified at the discretion
+of the user. Other schemes do not leave the decision regarding what a user or
+program can access up to users or programs. These schemes are called mandatory
+access control mechanisms because you don't have a choice regarding the users
+or programs that have access to pieces of data.
+
+Bell & LaPadula
+
+From the middle of the 1980's until the turn of the century Mandatory Access
+Control (MAC) was very closely associated with the Bell & LaPadula security
+model, a mathematical description of the United States Department of Defense
+policy for marking paper documents. MAC in this form enjoyed a following
+within the Capital Beltway and Scandinavian supercomputer centers but was
+often sited as failing to address general needs.
+
+Domain Type Enforcement
+
+Around the turn of the century Domain Type Enforcement (DTE) became popular.
+This scheme organizes users, programs, and data into domains that are
+protected from each other. This scheme has been widely deployed as a component
+of popular Linux distributions. The administrative overhead required to
+maintain this scheme and the detailed understanding of the whole system
+necessary to provide a secure domain mapping leads to the scheme being
+disabled or used in limited ways in the majority of cases.
+
+Smack
+
+Smack is a Mandatory Access Control mechanism designed to provide useful MAC
+while avoiding the pitfalls of its predecessors. The limitations of Bell &
+LaPadula are addressed by providing a scheme whereby access can be controlled
+according to the requirements of the system and its purpose rather than those
+imposed by an arcane government policy. The complexity of Domain Type
+Enforcement and avoided by defining access controls in terms of the access
+modes already in use.
+
+Smack Terminology
+
+The jargon used to talk about Smack will be familiar to those who have dealt
+with other MAC systems and shouldn't be too difficult for the uninitiated to
+pick up. There are four terms that are used in a specific way and that are
+especially important:
+
+	Subject: A subject is an active entity on the computer system.
+	On Smack a subject is a task, which is in turn the basic unit
+	of execution.
+
+	Object: An object is a passive entity on the computer system.
+	On Smack files of all types, IPC, and tasks can be objects.
+
+	Access: Any attempt by a subject to put information into or get
+	information from an object is an access.
+
+	Label: Data that identifies the Mandatory Access Control
+	characteristics of a subject or an object.
+
+These definitions are consistent with the traditional use in the security
+community. There are also some terms from Linux that are likely to crop up:
+
+	Capability: A task that possesses a capability has permission to
+	violate an aspect of the system security policy, as identified by
+	the specific capability. A task that possesses one or more
+	capabilities is a privileged task, whereas a task with no
+	capabilities is an unprivileged task.
+
+	Privilege: A task that is allowed to violate the system security
+	policy is said to have privilege. As of this writing a task can
+	have privilege either by possessing capabilities or by having an
+	effective user of root.
+
+Smack Basics
+
+Smack is an extension to a Linux system. It enforces additional restrictions
+on what subjects can access which objects, based on the labels attached to
+each of the subject and the object.
+
+Labels
+
+Smack labels are ASCII character strings, one to twenty-three characters in
+length. Single character labels using special characters, that being anything
+other than a letter or digit, are reserved for use by the Smack development
+team. Smack labels are unstructured, case sensitive, and the only operation
+ever performed on them is comparison for equality. Smack labels cannot
+contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
+(quote) and '"' (double-quote) characters.
+Smack labels cannot begin with a '-', which is reserved for special options.
+
+There are some predefined labels:
+
+	_ 	Pronounced "floor", a single underscore character.
+	^ 	Pronounced "hat", a single circumflex character.
+	* 	Pronounced "star", a single asterisk character.
+	? 	Pronounced "huh", a single question mark character.
+	@ 	Pronounced "Internet", a single at sign character.
+
+Every task on a Smack system is assigned a label. System tasks, such as
+init(8) and systems daemons, are run with the floor ("_") label. User tasks
+are assigned labels according to the specification found in the
+/etc/smack/user configuration file.
+
+Access Rules
+
+Smack uses the traditional access modes of Linux. These modes are read,
+execute, write, and occasionally append. There are a few cases where the
+access mode may not be obvious. These include:
+
+	Signals: A signal is a write operation from the subject task to
+	the object task.
+	Internet Domain IPC: Transmission of a packet is considered a
+	write operation from the source task to the destination task.
+
+Smack restricts access based on the label attached to a subject and the label
+attached to the object it is trying to access. The rules enforced are, in
+order:
+
+	1. Any access requested by a task labeled "*" is denied.
+	2. A read or execute access requested by a task labeled "^"
+	   is permitted.
+	3. A read or execute access requested on an object labeled "_"
+	   is permitted.
+	4. Any access requested on an object labeled "*" is permitted.
+	5. Any access requested by a task on an object with the same
+	   label is permitted.
+	6. Any access requested that is explicitly defined in the loaded
+	   rule set is permitted.
+	7. Any other access is denied.
+
+Smack Access Rules
+
+With the isolation provided by Smack access separation is simple. There are
+many interesting cases where limited access by subjects to objects with
+different labels is desired. One example is the familiar spy model of
+sensitivity, where a scientist working on a highly classified project would be
+able to read documents of lower classifications and anything she writes will
+be "born" highly classified. To accommodate such schemes Smack includes a
+mechanism for specifying rules allowing access between labels.
+
+Access Rule Format
+
+The format of an access rule is:
+
+	subject-label object-label access
+
+Where subject-label is the Smack label of the task, object-label is the Smack
+label of the thing being accessed, and access is a string specifying the sort
+of access allowed. The Smack labels are limited to 23 characters. The access
+specification is searched for letters that describe access modes:
+
+	a: indicates that append access should be granted.
+	r: indicates that read access should be granted.
+	w: indicates that write access should be granted.
+	x: indicates that execute access should be granted.
+
+Uppercase values for the specification letters are allowed as well.
+Access mode specifications can be in any order. Examples of acceptable rules
+are:
+
+	TopSecret Secret  rx
+	Secret    Unclass R
+	Manager   Game    x
+	User      HR      w
+	New       Old     rRrRr
+	Closed    Off     -
+
+Examples of unacceptable rules are:
+
+	Top Secret Secret     rx
+	Ace        Ace        r
+	Odd        spells     waxbeans
+
+Spaces are not allowed in labels. Since a subject always has access to files
+with the same label specifying a rule for that case is pointless. Only
+valid letters (rwxaRWXA) and the dash ('-') character are allowed in
+access specifications. The dash is a placeholder, so "a-r" is the same
+as "ar". A lone dash is used to specify that no access should be allowed.
+
+Applying Access Rules
+
+The developers of Linux rarely define new sorts of things, usually importing
+schemes and concepts from other systems. Most often, the other systems are
+variants of Unix. Unix has many endearing properties, but consistency of
+access control models is not one of them. Smack strives to treat accesses as
+uniformly as is sensible while keeping with the spirit of the underlying
+mechanism.
+
+File system objects including files, directories, named pipes, symbolic links,
+and devices require access permissions that closely match those used by mode
+bit access. To open a file for reading read access is required on the file. To
+search a directory requires execute access. Creating a file with write access
+requires both read and write access on the containing directory. Deleting a
+file requires read and write access to the file and to the containing
+directory. It is possible that a user may be able to see that a file exists
+but not any of its attributes by the circumstance of having read access to the
+containing directory but not to the differently labeled file. This is an
+artifact of the file name being data in the directory, not a part of the file.
+
+IPC objects, message queues, semaphore sets, and memory segments exist in flat
+namespaces and access requests are only required to match the object in
+question.
+
+Process objects reflect tasks on the system and the Smack label used to access
+them is the same Smack label that the task would use for its own access
+attempts. Sending a signal via the kill() system call is a write operation
+from the signaler to the recipient. Debugging a process requires both reading
+and writing. Creating a new task is an internal operation that results in two
+tasks with identical Smack labels and requires no access checks.
+
+Sockets are data structures attached to processes and sending a packet from
+one process to another requires that the sender have write access to the
+receiver. The receiver is not required to have read access to the sender.
+
+Setting Access Rules
+
+The configuration file /etc/smack/accesses contains the rules to be set at
+system startup. The contents are written to the special file /smack/load.
+Rules can be written to /smack/load at any time and take effect immediately.
+For any pair of subject and object labels there can be only one rule, with the
+most recently specified overriding any earlier specification.
+
+The program smackload is provided to ensure data is formatted
+properly when written to /smack/load. This program reads lines
+of the form
+
+    subjectlabel objectlabel mode.
+
+Task Attribute
+
+The Smack label of a process can be read from /proc/<pid>/attr/current. A
+process can read its own Smack label from /proc/self/attr/current. A
+privileged process can change its own Smack label by writing to
+/proc/self/attr/current but not the label of another process.
+
+File Attribute
+
+The Smack label of a filesystem object is stored as an extended attribute
+named SMACK64 on the file. This attribute is in the security namespace. It can
+only be changed by a process with privilege.
+
+Privilege
+
+A process with CAP_MAC_OVERRIDE is privileged.
+
+Smack Networking
+
+As mentioned before, Smack enforces access control on network protocol
+transmissions. Every packet sent by a Smack process is tagged with its Smack
+label. This is done by adding a CIPSO tag to the header of the IP packet. Each
+packet received is expected to have a CIPSO tag that identifies the label and
+if it lacks such a tag the network ambient label is assumed. Before the packet
+is delivered a check is made to determine that a subject with the label on the
+packet has write access to the receiving process and if that is not the case
+the packet is dropped.
+
+CIPSO Configuration
+
+It is normally unnecessary to specify the CIPSO configuration. The default
+values used by the system handle all internal cases. Smack will compose CIPSO
+label values to match the Smack labels being used without administrative
+intervention. Unlabeled packets that come into the system will be given the
+ambient label.
+
+Smack requires configuration in the case where packets from a system that is
+not smack that speaks CIPSO may be encountered. Usually this will be a Trusted
+Solaris system, but there are other, less widely deployed systems out there.
+CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level,
+and a category set with each packet. The DOI is intended to identify a group
+of systems that use compatible labeling schemes, and the DOI specified on the
+smack system must match that of the remote system or packets will be
+discarded. The DOI is 3 by default. The value can be read from /smack/doi and
+can be changed by writing to /smack/doi.
+
+The label and category set are mapped to a Smack label as defined in
+/etc/smack/cipso.
+
+A Smack/CIPSO mapping has the form:
+
+	smack level [category [category]*]
+
+Smack does not expect the level or category sets to be related in any
+particular way and does not assume or assign accesses based on them. Some
+examples of mappings:
+
+	TopSecret 7
+	TS:A,B    7 1 2
+	SecBDE    5 2 4 6
+	RAFTERS   7 12 26
+
+The ":" and "," characters are permitted in a Smack label but have no special
+meaning.
+
+The mapping of Smack labels to CIPSO values is defined by writing to
+/smack/cipso. Again, the format of data written to this special file
+is highly restrictive, so the program smackcipso is provided to
+ensure the writes are done properly. This program takes mappings
+on the standard input and sends them to /smack/cipso properly.
+
+In addition to explicit mappings Smack supports direct CIPSO mappings. One
+CIPSO level is used to indicate that the category set passed in the packet is
+in fact an encoding of the Smack label. The level used is 250 by default. The
+value can be read from /smack/direct and changed by writing to /smack/direct.
+
+Socket Attributes
+
+There are two attributes that are associated with sockets. These attributes
+can only be set by privileged tasks, but any task can read them for their own
+sockets.
+
+	SMACK64IPIN: The Smack label of the task object. A privileged
+	program that will enforce policy may set this to the star label.
+
+	SMACK64IPOUT: The Smack label transmitted with outgoing packets.
+	A privileged program may set this to match the label of another
+	task with which it hopes to communicate.
+
+Smack Netlabel Exceptions
+
+You will often find that your labeled application has to talk to the outside,
+unlabeled world. To do this there's a special file /smack/netlabel where you can
+add some exceptions in the form of :
+@IP1	   LABEL1 or
+@IP2/MASK  LABEL2
+
+It means that your application will have unlabeled access to @IP1 if it has
+write access on LABEL1, and access to the subnet @IP2/MASK if it has write
+access on LABEL2.
+
+Entries in the /smack/netlabel file are matched by longest mask first, like in
+classless IPv4 routing.
+
+A special label '@' and an option '-CIPSO' can be used there :
+@      means Internet, any application with any label has access to it
+-CIPSO means standard CIPSO networking
+
+If you don't know what CIPSO is and don't plan to use it, you can just do :
+echo 127.0.0.1 -CIPSO > /smack/netlabel
+echo 0.0.0.0/0 @      > /smack/netlabel
+
+If you use CIPSO on your 192.168.0.0/16 local network and need also unlabeled
+Internet access, you can have :
+echo 127.0.0.1      -CIPSO > /smack/netlabel
+echo 192.168.0.0/16 -CIPSO > /smack/netlabel
+echo 0.0.0.0/0      @      > /smack/netlabel
+
+
+Writing Applications for Smack
+
+There are three sorts of applications that will run on a Smack system. How an
+application interacts with Smack will determine what it will have to do to
+work properly under Smack.
+
+Smack Ignorant Applications
+
+By far the majority of applications have no reason whatever to care about the
+unique properties of Smack. Since invoking a program has no impact on the
+Smack label associated with the process the only concern likely to arise is
+whether the process has execute access to the program.
+
+Smack Relevant Applications
+
+Some programs can be improved by teaching them about Smack, but do not make
+any security decisions themselves. The utility ls(1) is one example of such a
+program.
+
+Smack Enforcing Applications
+
+These are special programs that not only know about Smack, but participate in
+the enforcement of system policy. In most cases these are the programs that
+set up user sessions. There are also network services that provide information
+to processes running with various labels.
+
+File System Interfaces
+
+Smack maintains labels on file system objects using extended attributes. The
+Smack label of a file, directory, or other file system object can be obtained
+using getxattr(2).
+
+	len = getxattr("/", "security.SMACK64", value, sizeof (value));
+
+will put the Smack label of the root directory into value. A privileged
+process can set the Smack label of a file system object with setxattr(2).
+
+	len = strlen("Rubble");
+	rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0);
+
+will set the Smack label of /foo to "Rubble" if the program has appropriate
+privilege.
+
+Socket Interfaces
+
+The socket attributes can be read using fgetxattr(2).
+
+A privileged process can set the Smack label of outgoing packets with
+fsetxattr(2).
+
+	len = strlen("Rubble");
+	rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0);
+
+will set the Smack label "Rubble" on packets going out from the socket if the
+program has appropriate privilege.
+
+	rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0);
+
+will set the Smack label "*" as the object label against which incoming
+packets will be checked if the program has appropriate privilege.
+
+Administration
+
+Smack supports some mount options:
+
+	smackfsdef=label: specifies the label to give files that lack
+	the Smack label extended attribute.
+
+	smackfsroot=label: specifies the label to assign the root of the
+	file system if it lacks the Smack extended attribute.
+
+	smackfshat=label: specifies a label that must have read access to
+	all labels set on the filesystem. Not yet enforced.
+
+	smackfsfloor=label: specifies a label to which all labels set on the
+	filesystem must have read access. Not yet enforced.
+
+These mount options apply to all file system types.
+
+Smack auditing
+
+If you want Smack auditing of security events, you need to set CONFIG_AUDIT
+in your kernel configuration.
+By default, all denied events will be audited. You can change this behavior by
+writing a single character to the /smack/logging file :
+0 : no logging
+1 : log denied (default)
+2 : log accepted
+3 : log denied & accepted
+
+Events are logged as 'key=value' pairs, for each event you at least will get
+the subjet, the object, the rights requested, the action, the kernel function
+that triggered the event, plus other pairs depending on the type of event
+audited.
diff --git a/Documentation/security/apparmor.txt b/Documentation/security/apparmor.txt
new file mode 100644
index 000000000000..93c1fd7d0635
--- /dev/null
+++ b/Documentation/security/apparmor.txt
@@ -0,0 +1,39 @@
+--- What is AppArmor? ---
+
+AppArmor is MAC style security extension for the Linux kernel.  It implements
+a task centered policy, with task "profiles" being created and loaded
+from user space.  Tasks on the system that do not have a profile defined for
+them run in an unconfined state which is equivalent to standard Linux DAC
+permissions.
+
+--- How to enable/disable ---
+
+set CONFIG_SECURITY_APPARMOR=y
+
+If AppArmor should be selected as the default security module then
+   set CONFIG_DEFAULT_SECURITY="apparmor"
+   and CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE=1
+
+Build the kernel
+
+If AppArmor is not the default security module it can be enabled by passing
+security=apparmor on the kernel's command line.
+
+If AppArmor is the default security module it can be disabled by passing
+apparmor=0, security=XXXX (where XXX is valid security module), on the
+kernel's command line
+
+For AppArmor to enforce any restrictions beyond standard Linux DAC permissions
+policy must be loaded into the kernel from user space (see the Documentation
+and tools links).
+
+--- Documentation ---
+
+Documentation can be found on the wiki.
+
+--- Links ---
+
+Mailing List - apparmor@lists.ubuntu.com
+Wiki - http://apparmor.wiki.kernel.org/
+User space tools - https://launchpad.net/apparmor
+Kernel module - git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt
new file mode 100644
index 000000000000..fc0366cbd7ce
--- /dev/null
+++ b/Documentation/security/credentials.txt
@@ -0,0 +1,581 @@
+			     ====================
+			     CREDENTIALS IN LINUX
+			     ====================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Overview.
+
+ (*) Types of credentials.
+
+ (*) File markings.
+
+ (*) Task credentials.
+
+     - Immutable credentials.
+     - Accessing task credentials.
+     - Accessing another task's credentials.
+     - Altering credentials.
+     - Managing credentials.
+
+ (*) Open file credentials.
+
+ (*) Overriding the VFS's use of credentials.
+
+
+========
+OVERVIEW
+========
+
+There are several parts to the security check performed by Linux when one
+object acts upon another:
+
+ (1) Objects.
+
+     Objects are things in the system that may be acted upon directly by
+     userspace programs.  Linux has a variety of actionable objects, including:
+
+	- Tasks
+	- Files/inodes
+	- Sockets
+	- Message queues
+	- Shared memory segments
+	- Semaphores
+	- Keys
+
+     As a part of the description of all these objects there is a set of
+     credentials.  What's in the set depends on the type of object.
+
+ (2) Object ownership.
+
+     Amongst the credentials of most objects, there will be a subset that
+     indicates the ownership of that object.  This is used for resource
+     accounting and limitation (disk quotas and task rlimits for example).
+
+     In a standard UNIX filesystem, for instance, this will be defined by the
+     UID marked on the inode.
+
+ (3) The objective context.
+
+     Also amongst the credentials of those objects, there will be a subset that
+     indicates the 'objective context' of that object.  This may or may not be
+     the same set as in (2) - in standard UNIX files, for instance, this is the
+     defined by the UID and the GID marked on the inode.
+
+     The objective context is used as part of the security calculation that is
+     carried out when an object is acted upon.
+
+ (4) Subjects.
+
+     A subject is an object that is acting upon another object.
+
+     Most of the objects in the system are inactive: they don't act on other
+     objects within the system.  Processes/tasks are the obvious exception:
+     they do stuff; they access and manipulate things.
+
+     Objects other than tasks may under some circumstances also be subjects.
+     For instance an open file may send SIGIO to a task using the UID and EUID
+     given to it by a task that called fcntl(F_SETOWN) upon it.  In this case,
+     the file struct will have a subjective context too.
+
+ (5) The subjective context.
+
+     A subject has an additional interpretation of its credentials.  A subset
+     of its credentials forms the 'subjective context'.  The subjective context
+     is used as part of the security calculation that is carried out when a
+     subject acts.
+
+     A Linux task, for example, has the FSUID, FSGID and the supplementary
+     group list for when it is acting upon a file - which are quite separate
+     from the real UID and GID that normally form the objective context of the
+     task.
+
+ (6) Actions.
+
+     Linux has a number of actions available that a subject may perform upon an
+     object.  The set of actions available depends on the nature of the subject
+     and the object.
+
+     Actions include reading, writing, creating and deleting files; forking or
+     signalling and tracing tasks.
+
+ (7) Rules, access control lists and security calculations.
+
+     When a subject acts upon an object, a security calculation is made.  This
+     involves taking the subjective context, the objective context and the
+     action, and searching one or more sets of rules to see whether the subject
+     is granted or denied permission to act in the desired manner on the
+     object, given those contexts.
+
+     There are two main sources of rules:
+
+     (a) Discretionary access control (DAC):
+
+	 Sometimes the object will include sets of rules as part of its
+	 description.  This is an 'Access Control List' or 'ACL'.  A Linux
+	 file may supply more than one ACL.
+
+	 A traditional UNIX file, for example, includes a permissions mask that
+	 is an abbreviated ACL with three fixed classes of subject ('user',
+	 'group' and 'other'), each of which may be granted certain privileges
+	 ('read', 'write' and 'execute' - whatever those map to for the object
+	 in question).  UNIX file permissions do not allow the arbitrary
+	 specification of subjects, however, and so are of limited use.
+
+	 A Linux file might also sport a POSIX ACL.  This is a list of rules
+	 that grants various permissions to arbitrary subjects.
+
+     (b) Mandatory access control (MAC):
+
+	 The system as a whole may have one or more sets of rules that get
+	 applied to all subjects and objects, regardless of their source.
+	 SELinux and Smack are examples of this.
+
+	 In the case of SELinux and Smack, each object is given a label as part
+	 of its credentials.  When an action is requested, they take the
+	 subject label, the object label and the action and look for a rule
+	 that says that this action is either granted or denied.
+
+
+====================
+TYPES OF CREDENTIALS
+====================
+
+The Linux kernel supports the following types of credentials:
+
+ (1) Traditional UNIX credentials.
+
+	Real User ID
+	Real Group ID
+
+     The UID and GID are carried by most, if not all, Linux objects, even if in
+     some cases it has to be invented (FAT or CIFS files for example, which are
+     derived from Windows).  These (mostly) define the objective context of
+     that object, with tasks being slightly different in some cases.
+
+	Effective, Saved and FS User ID
+	Effective, Saved and FS Group ID
+	Supplementary groups
+
+     These are additional credentials used by tasks only.  Usually, an
+     EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
+     will be used as the objective.  For tasks, it should be noted that this is
+     not always true.
+
+ (2) Capabilities.
+
+	Set of permitted capabilities
+	Set of inheritable capabilities
+	Set of effective capabilities
+	Capability bounding set
+
+     These are only carried by tasks.  They indicate superior capabilities
+     granted piecemeal to a task that an ordinary task wouldn't otherwise have.
+     These are manipulated implicitly by changes to the traditional UNIX
+     credentials, but can also be manipulated directly by the capset() system
+     call.
+
+     The permitted capabilities are those caps that the process might grant
+     itself to its effective or permitted sets through capset().  This
+     inheritable set might also be so constrained.
+
+     The effective capabilities are the ones that a task is actually allowed to
+     make use of itself.
+
+     The inheritable capabilities are the ones that may get passed across
+     execve().
+
+     The bounding set limits the capabilities that may be inherited across
+     execve(), especially when a binary is executed that will execute as UID 0.
+
+ (3) Secure management flags (securebits).
+
+     These are only carried by tasks.  These govern the way the above
+     credentials are manipulated and inherited over certain operations such as
+     execve().  They aren't used directly as objective or subjective
+     credentials.
+
+ (4) Keys and keyrings.
+
+     These are only carried by tasks.  They carry and cache security tokens
+     that don't fit into the other standard UNIX credentials.  They are for
+     making such things as network filesystem keys available to the file
+     accesses performed by processes, without the necessity of ordinary
+     programs having to know about security details involved.
+
+     Keyrings are a special type of key.  They carry sets of other keys and can
+     be searched for the desired key.  Each process may subscribe to a number
+     of keyrings:
+
+	Per-thread keying
+	Per-process keyring
+	Per-session keyring
+
+     When a process accesses a key, if not already present, it will normally be
+     cached on one of these keyrings for future accesses to find.
+
+     For more information on using keys, see Documentation/security/keys.txt.
+
+ (5) LSM
+
+     The Linux Security Module allows extra controls to be placed over the
+     operations that a task may do.  Currently Linux supports two main
+     alternate LSM options: SELinux and Smack.
+
+     Both work by labelling the objects in a system and then applying sets of
+     rules (policies) that say what operations a task with one label may do to
+     an object with another label.
+
+ (6) AF_KEY
+
+     This is a socket-based approach to credential management for networking
+     stacks [RFC 2367].  It isn't discussed by this document as it doesn't
+     interact directly with task and file credentials; rather it keeps system
+     level credentials.
+
+
+When a file is opened, part of the opening task's subjective context is
+recorded in the file struct created.  This allows operations using that file
+struct to use those credentials instead of the subjective context of the task
+that issued the operation.  An example of this would be a file opened on a
+network filesystem where the credentials of the opened file should be presented
+to the server, regardless of who is actually doing a read or a write upon it.
+
+
+=============
+FILE MARKINGS
+=============
+
+Files on disk or obtained over the network may have annotations that form the
+objective security context of that file.  Depending on the type of filesystem,
+this may include one or more of the following:
+
+ (*) UNIX UID, GID, mode;
+
+ (*) Windows user ID;
+
+ (*) Access control list;
+
+ (*) LSM security label;
+
+ (*) UNIX exec privilege escalation bits (SUID/SGID);
+
+ (*) File capabilities exec privilege escalation bits.
+
+These are compared to the task's subjective security context, and certain
+operations allowed or disallowed as a result.  In the case of execve(), the
+privilege escalation bits come into play, and may allow the resulting process
+extra privileges, based on the annotations on the executable file.
+
+
+================
+TASK CREDENTIALS
+================
+
+In Linux, all of a task's credentials are held in (uid, gid) or through
+(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
+Each task points to its credentials by a pointer called 'cred' in its
+task_struct.
+
+Once a set of credentials has been prepared and committed, it may not be
+changed, barring the following exceptions:
+
+ (1) its reference count may be changed;
+
+ (2) the reference count on the group_info struct it points to may be changed;
+
+ (3) the reference count on the security data it points to may be changed;
+
+ (4) the reference count on any keyrings it points to may be changed;
+
+ (5) any keyrings it points to may be revoked, expired or have their security
+     attributes changed; and
+
+ (6) the contents of any keyrings to which it points may be changed (the whole
+     point of keyrings being a shared set of credentials, modifiable by anyone
+     with appropriate access).
+
+To alter anything in the cred struct, the copy-and-replace principle must be
+adhered to.  First take a copy, then alter the copy and then use RCU to change
+the task pointer to make it point to the new copy.  There are wrappers to aid
+with this (see below).
+
+A task may only alter its _own_ credentials; it is no longer permitted for a
+task to alter another's credentials.  This means the capset() system call is no
+longer permitted to take any PID other than the one of the current process.
+Also keyctl_instantiate() and keyctl_negate() functions no longer permit
+attachment to process-specific keyrings in the requesting process as the
+instantiating process may need to create them.
+
+
+IMMUTABLE CREDENTIALS
+---------------------
+
+Once a set of credentials has been made public (by calling commit_creds() for
+example), it must be considered immutable, barring two exceptions:
+
+ (1) The reference count may be altered.
+
+ (2) Whilst the keyring subscriptions of a set of credentials may not be
+     changed, the keyrings subscribed to may have their contents altered.
+
+To catch accidental credential alteration at compile time, struct task_struct
+has _const_ pointers to its credential sets, as does struct file.  Furthermore,
+certain functions such as get_cred() and put_cred() operate on const pointers,
+thus rendering casts unnecessary, but require to temporarily ditch the const
+qualification to be able to alter the reference count.
+
+
+ACCESSING TASK CREDENTIALS
+--------------------------
+
+A task being able to alter only its own credentials permits the current process
+to read or replace its own credentials without the need for any form of locking
+- which simplifies things greatly.  It can just call:
+
+	const struct cred *current_cred()
+
+to get a pointer to its credentials structure, and it doesn't have to release
+it afterwards.
+
+There are convenience wrappers for retrieving specific aspects of a task's
+credentials (the value is simply returned in each case):
+
+	uid_t current_uid(void)		Current's real UID
+	gid_t current_gid(void)		Current's real GID
+	uid_t current_euid(void)	Current's effective UID
+	gid_t current_egid(void)	Current's effective GID
+	uid_t current_fsuid(void)	Current's file access UID
+	gid_t current_fsgid(void)	Current's file access GID
+	kernel_cap_t current_cap(void)	Current's effective capabilities
+	void *current_security(void)	Current's LSM security pointer
+	struct user_struct *current_user(void)  Current's user account
+
+There are also convenience wrappers for retrieving specific associated pairs of
+a task's credentials:
+
+	void current_uid_gid(uid_t *, gid_t *);
+	void current_euid_egid(uid_t *, gid_t *);
+	void current_fsuid_fsgid(uid_t *, gid_t *);
+
+which return these pairs of values through their arguments after retrieving
+them from the current task's credentials.
+
+
+In addition, there is a function for obtaining a reference on the current
+process's current set of credentials:
+
+	const struct cred *get_current_cred(void);
+
+and functions for getting references to one of the credentials that don't
+actually live in struct cred:
+
+	struct user_struct *get_current_user(void);
+	struct group_info *get_current_groups(void);
+
+which get references to the current process's user accounting structure and
+supplementary groups list respectively.
+
+Once a reference has been obtained, it must be released with put_cred(),
+free_uid() or put_group_info() as appropriate.
+
+
+ACCESSING ANOTHER TASK'S CREDENTIALS
+------------------------------------
+
+Whilst a task may access its own credentials without the need for locking, the
+same is not true of a task wanting to access another task's credentials.  It
+must use the RCU read lock and rcu_dereference().
+
+The rcu_dereference() is wrapped by:
+
+	const struct cred *__task_cred(struct task_struct *task);
+
+This should be used inside the RCU read lock, as in the following example:
+
+	void foo(struct task_struct *t, struct foo_data *f)
+	{
+		const struct cred *tcred;
+		...
+		rcu_read_lock();
+		tcred = __task_cred(t);
+		f->uid = tcred->uid;
+		f->gid = tcred->gid;
+		f->groups = get_group_info(tcred->groups);
+		rcu_read_unlock();
+		...
+	}
+
+Should it be necessary to hold another task's credentials for a long period of
+time, and possibly to sleep whilst doing so, then the caller should get a
+reference on them using:
+
+	const struct cred *get_task_cred(struct task_struct *task);
+
+This does all the RCU magic inside of it.  The caller must call put_cred() on
+the credentials so obtained when they're finished with.
+
+ [*] Note: The result of __task_cred() should not be passed directly to
+     get_cred() as this may race with commit_cred().
+
+There are a couple of convenience functions to access bits of another task's
+credentials, hiding the RCU magic from the caller:
+
+	uid_t task_uid(task)		Task's real UID
+	uid_t task_euid(task)		Task's effective UID
+
+If the caller is holding the RCU read lock at the time anyway, then:
+
+	__task_cred(task)->uid
+	__task_cred(task)->euid
+
+should be used instead.  Similarly, if multiple aspects of a task's credentials
+need to be accessed, RCU read lock should be used, __task_cred() called, the
+result stored in a temporary pointer and then the credential aspects called
+from that before dropping the lock.  This prevents the potentially expensive
+RCU magic from being invoked multiple times.
+
+Should some other single aspect of another task's credentials need to be
+accessed, then this can be used:
+
+	task_cred_xxx(task, member)
+
+where 'member' is a non-pointer member of the cred struct.  For instance:
+
+	uid_t task_cred_xxx(task, suid);
+
+will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
+magic.  This may not be used for pointer members as what they point to may
+disappear the moment the RCU read lock is dropped.
+
+
+ALTERING CREDENTIALS
+--------------------
+
+As previously mentioned, a task may only alter its own credentials, and may not
+alter those of another task.  This means that it doesn't need to use any
+locking to alter its own credentials.
+
+To alter the current process's credentials, a function should first prepare a
+new set of credentials by calling:
+
+	struct cred *prepare_creds(void);
+
+this locks current->cred_replace_mutex and then allocates and constructs a
+duplicate of the current process's credentials, returning with the mutex still
+held if successful.  It returns NULL if not successful (out of memory).
+
+The mutex prevents ptrace() from altering the ptrace state of a process whilst
+security checks on credentials construction and changing is taking place as
+the ptrace state may alter the outcome, particularly in the case of execve().
+
+The new credentials set should be altered appropriately, and any security
+checks and hooks done.  Both the current and the proposed sets of credentials
+are available for this purpose as current_cred() will return the current set
+still at this point.
+
+
+When the credential set is ready, it should be committed to the current process
+by calling:
+
+	int commit_creds(struct cred *new);
+
+This will alter various aspects of the credentials and the process, giving the
+LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually
+commit the new credentials to current->cred, it will release
+current->cred_replace_mutex to allow ptrace() to take place, and it will notify
+the scheduler and others of the changes.
+
+This function is guaranteed to return 0, so that it can be tail-called at the
+end of such functions as sys_setresuid().
+
+Note that this function consumes the caller's reference to the new credentials.
+The caller should _not_ call put_cred() on the new credentials afterwards.
+
+Furthermore, once this function has been called on a new set of credentials,
+those credentials may _not_ be changed further.
+
+
+Should the security checks fail or some other error occur after prepare_creds()
+has been called, then the following function should be invoked:
+
+	void abort_creds(struct cred *new);
+
+This releases the lock on current->cred_replace_mutex that prepare_creds() got
+and then releases the new credentials.
+
+
+A typical credentials alteration function would look something like this:
+
+	int alter_suid(uid_t suid)
+	{
+		struct cred *new;
+		int ret;
+
+		new = prepare_creds();
+		if (!new)
+			return -ENOMEM;
+
+		new->suid = suid;
+		ret = security_alter_suid(new);
+		if (ret < 0) {
+			abort_creds(new);
+			return ret;
+		}
+
+		return commit_creds(new);
+	}
+
+
+MANAGING CREDENTIALS
+--------------------
+
+There are some functions to help manage credentials:
+
+ (*) void put_cred(const struct cred *cred);
+
+     This releases a reference to the given set of credentials.  If the
+     reference count reaches zero, the credentials will be scheduled for
+     destruction by the RCU system.
+
+ (*) const struct cred *get_cred(const struct cred *cred);
+
+     This gets a reference on a live set of credentials, returning a pointer to
+     that set of credentials.
+
+ (*) struct cred *get_new_cred(struct cred *cred);
+
+     This gets a reference on a set of credentials that is under construction
+     and is thus still mutable, returning a pointer to that set of credentials.
+
+
+=====================
+OPEN FILE CREDENTIALS
+=====================
+
+When a new file is opened, a reference is obtained on the opening task's
+credentials and this is attached to the file struct as 'f_cred' in place of
+'f_uid' and 'f_gid'.  Code that used to access file->f_uid and file->f_gid
+should now access file->f_cred->fsuid and file->f_cred->fsgid.
+
+It is safe to access f_cred without the use of RCU or locking because the
+pointer will not change over the lifetime of the file struct, and nor will the
+contents of the cred struct pointed to, barring the exceptions listed above
+(see the Task Credentials section).
+
+
+=======================================
+OVERRIDING THE VFS'S USE OF CREDENTIALS
+=======================================
+
+Under some circumstances it is desirable to override the credentials used by
+the VFS, and that can be done by calling into such as vfs_mkdir() with a
+different set of credentials.  This is done in the following places:
+
+ (*) sys_faccessat().
+
+ (*) do_coredump().
+
+ (*) nfs4recover.c.
diff --git a/Documentation/security/keys-request-key.txt b/Documentation/security/keys-request-key.txt
new file mode 100644
index 000000000000..51987bfecfed
--- /dev/null
+++ b/Documentation/security/keys-request-key.txt
@@ -0,0 +1,202 @@
+			      ===================
+			      KEY REQUEST SERVICE
+			      ===================
+
+The key request service is part of the key retention service (refer to
+Documentation/security/keys.txt).  This document explains more fully how
+the requesting algorithm works.
+
+The process starts by either the kernel requesting a service by calling
+request_key*():
+
+	struct key *request_key(const struct key_type *type,
+				const char *description,
+				const char *callout_info);
+
+or:
+
+	struct key *request_key_with_auxdata(const struct key_type *type,
+					     const char *description,
+					     const char *callout_info,
+					     size_t callout_len,
+					     void *aux);
+
+or:
+
+	struct key *request_key_async(const struct key_type *type,
+				      const char *description,
+				      const char *callout_info,
+				      size_t callout_len);
+
+or:
+
+	struct key *request_key_async_with_auxdata(const struct key_type *type,
+						   const char *description,
+						   const char *callout_info,
+					     	   size_t callout_len,
+						   void *aux);
+
+Or by userspace invoking the request_key system call:
+
+	key_serial_t request_key(const char *type,
+				 const char *description,
+				 const char *callout_info,
+				 key_serial_t dest_keyring);
+
+The main difference between the access points is that the in-kernel interface
+does not need to link the key to a keyring to prevent it from being immediately
+destroyed.  The kernel interface returns a pointer directly to the key, and
+it's up to the caller to destroy the key.
+
+The request_key*_with_auxdata() calls are like the in-kernel request_key*()
+calls, except that they permit auxiliary data to be passed to the upcaller (the
+default is NULL).  This is only useful for those key types that define their
+own upcall mechanism rather than using /sbin/request-key.
+
+The two async in-kernel calls may return keys that are still in the process of
+being constructed.  The two non-async ones will wait for construction to
+complete first.
+
+The userspace interface links the key to a keyring associated with the process
+to prevent the key from going away, and returns the serial number of the key to
+the caller.
+
+
+The following example assumes that the key types involved don't define their
+own upcall mechanisms.  If they do, then those should be substituted for the
+forking and execution of /sbin/request-key.
+
+
+===========
+THE PROCESS
+===========
+
+A request proceeds in the following manner:
+
+ (1) Process A calls request_key() [the userspace syscall calls the kernel
+     interface].
+
+ (2) request_key() searches the process's subscribed keyrings to see if there's
+     a suitable key there.  If there is, it returns the key.  If there isn't,
+     and callout_info is not set, an error is returned.  Otherwise the process
+     proceeds to the next step.
+
+ (3) request_key() sees that A doesn't have the desired key yet, so it creates
+     two things:
+
+     (a) An uninstantiated key U of requested type and description.
+
+     (b) An authorisation key V that refers to key U and notes that process A
+     	 is the context in which key U should be instantiated and secured, and
+     	 from which associated key requests may be satisfied.
+
+ (4) request_key() then forks and executes /sbin/request-key with a new session
+     keyring that contains a link to auth key V.
+
+ (5) /sbin/request-key assumes the authority associated with key U.
+
+ (6) /sbin/request-key execs an appropriate program to perform the actual
+     instantiation.
+
+ (7) The program may want to access another key from A's context (say a
+     Kerberos TGT key).  It just requests the appropriate key, and the keyring
+     search notes that the session keyring has auth key V in its bottom level.
+
+     This will permit it to then search the keyrings of process A with the
+     UID, GID, groups and security info of process A as if it was process A,
+     and come up with key W.
+
+ (8) The program then does what it must to get the data with which to
+     instantiate key U, using key W as a reference (perhaps it contacts a
+     Kerberos server using the TGT) and then instantiates key U.
+
+ (9) Upon instantiating key U, auth key V is automatically revoked so that it
+     may not be used again.
+
+(10) The program then exits 0 and request_key() deletes key V and returns key
+     U to the caller.
+
+This also extends further.  If key W (step 7 above) didn't exist, key W would
+be created uninstantiated, another auth key (X) would be created (as per step
+3) and another copy of /sbin/request-key spawned (as per step 4); but the
+context specified by auth key X will still be process A, as it was in auth key
+V.
+
+This is because process A's keyrings can't simply be attached to
+/sbin/request-key at the appropriate places because (a) execve will discard two
+of them, and (b) it requires the same UID/GID/Groups all the way through.
+
+
+====================================
+NEGATIVE INSTANTIATION AND REJECTION
+====================================
+
+Rather than instantiating a key, it is possible for the possessor of an
+authorisation key to negatively instantiate a key that's under construction.
+This is a short duration placeholder that causes any attempt at re-requesting
+the key whilst it exists to fail with error ENOKEY if negated or the specified
+error if rejected.
+
+This is provided to prevent excessive repeated spawning of /sbin/request-key
+processes for a key that will never be obtainable.
+
+Should the /sbin/request-key process exit anything other than 0 or die on a
+signal, the key under construction will be automatically negatively
+instantiated for a short amount of time.
+
+
+====================
+THE SEARCH ALGORITHM
+====================
+
+A search of any particular keyring proceeds in the following fashion:
+
+ (1) When the key management code searches for a key (keyring_search_aux) it
+     firstly calls key_permission(SEARCH) on the keyring it's starting with,
+     if this denies permission, it doesn't search further.
+
+ (2) It considers all the non-keyring keys within that keyring and, if any key
+     matches the criteria specified, calls key_permission(SEARCH) on it to see
+     if the key is allowed to be found.  If it is, that key is returned; if
+     not, the search continues, and the error code is retained if of higher
+     priority than the one currently set.
+
+ (3) It then considers all the keyring-type keys in the keyring it's currently
+     searching.  It calls key_permission(SEARCH) on each keyring, and if this
+     grants permission, it recurses, executing steps (2) and (3) on that
+     keyring.
+
+The process stops immediately a valid key is found with permission granted to
+use it.  Any error from a previous match attempt is discarded and the key is
+returned.
+
+When search_process_keyrings() is invoked, it performs the following searches
+until one succeeds:
+
+ (1) If extant, the process's thread keyring is searched.
+
+ (2) If extant, the process's process keyring is searched.
+
+ (3) The process's session keyring is searched.
+
+ (4) If the process has assumed the authority associated with a request_key()
+     authorisation key then:
+
+     (a) If extant, the calling process's thread keyring is searched.
+
+     (b) If extant, the calling process's process keyring is searched.
+
+     (c) The calling process's session keyring is searched.
+
+The moment one succeeds, all pending errors are discarded and the found key is
+returned.
+
+Only if all these fail does the whole thing fail with the highest priority
+error.  Note that several errors may have come from LSM.
+
+The error priority is:
+
+	EKEYREVOKED > EKEYEXPIRED > ENOKEY
+
+EACCES/EPERM are only returned on a direct search of a specific keyring where
+the basal keyring does not grant Search permission.
diff --git a/Documentation/security/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt
new file mode 100644
index 000000000000..8fb79bc1ac4b
--- /dev/null
+++ b/Documentation/security/keys-trusted-encrypted.txt
@@ -0,0 +1,145 @@
+			Trusted and Encrypted Keys
+
+Trusted and Encrypted Keys are two new key types added to the existing kernel
+key ring service.  Both of these new types are variable length symmetic keys,
+and in both cases all keys are created in the kernel, and user space sees,
+stores, and loads only encrypted blobs.  Trusted Keys require the availability
+of a Trusted Platform Module (TPM) chip for greater security, while Encrypted
+Keys can be used on any system.  All user level blobs, are displayed and loaded
+in hex ascii for convenience, and are integrity verified.
+
+Trusted Keys use a TPM both to generate and to seal the keys.  Keys are sealed
+under a 2048 bit RSA key in the TPM, and optionally sealed to specified PCR
+(integrity measurement) values, and only unsealed by the TPM, if PCRs and blob
+integrity verifications match.  A loaded Trusted Key can be updated with new
+(future) PCR values, so keys are easily migrated to new pcr values, such as
+when the kernel and initramfs are updated.  The same key can have many saved
+blobs under different PCR values, so multiple boots are easily supported.
+
+By default, trusted keys are sealed under the SRK, which has the default
+authorization value (20 zeros).  This can be set at takeownership time with the
+trouser's utility: "tpm_takeownership -u -z".
+
+Usage:
+    keyctl add trusted name "new keylen [options]" ring
+    keyctl add trusted name "load hex_blob [pcrlock=pcrnum]" ring
+    keyctl update key "update [options]"
+    keyctl print keyid
+
+    options:
+       keyhandle= ascii hex value of sealing key default 0x40000000 (SRK)
+       keyauth=	  ascii hex auth for sealing key default 0x00...i
+		  (40 ascii zeros)
+       blobauth=  ascii hex auth for sealed data default 0x00...
+		  (40 ascii zeros)
+       blobauth=  ascii hex auth for sealed data default 0x00...
+		  (40 ascii zeros)
+       pcrinfo=	  ascii hex of PCR_INFO or PCR_INFO_LONG (no default)
+       pcrlock=	  pcr number to be extended to "lock" blob
+       migratable= 0|1 indicating permission to reseal to new PCR values,
+                   default 1 (resealing allowed)
+
+"keyctl print" returns an ascii hex copy of the sealed key, which is in standard
+TPM_STORED_DATA format.  The key length for new keys are always in bytes.
+Trusted Keys can be 32 - 128 bytes (256 - 1024 bits), the upper limit is to fit
+within the 2048 bit SRK (RSA) keylength, with all necessary structure/padding.
+
+Encrypted keys do not depend on a TPM, and are faster, as they use AES for
+encryption/decryption.  New keys are created from kernel generated random
+numbers, and are encrypted/decrypted using a specified 'master' key.  The
+'master' key can either be a trusted-key or user-key type.  The main
+disadvantage of encrypted keys is that if they are not rooted in a trusted key,
+they are only as secure as the user key encrypting them.  The master user key
+should therefore be loaded in as secure a way as possible, preferably early in
+boot.
+
+Usage:
+  keyctl add encrypted name "new key-type:master-key-name keylen" ring
+  keyctl add encrypted name "load hex_blob" ring
+  keyctl update keyid "update key-type:master-key-name"
+
+where 'key-type' is either 'trusted' or 'user'.
+
+Examples of trusted and encrypted key usage:
+
+Create and save a trusted key named "kmk" of length 32 bytes:
+
+    $ keyctl add trusted kmk "new 32" @u
+    440502848
+
+    $ keyctl show
+    Session Keyring
+           -3 --alswrv    500   500  keyring: _ses
+     97833714 --alswrv    500    -1   \_ keyring: _uid.500
+    440502848 --alswrv    500   500       \_ trusted: kmk
+
+    $ keyctl print 440502848
+    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+    e4a8aea2b607ec96931e6f4d4fe563ba
+
+    $ keyctl pipe 440502848 > kmk.blob
+
+Load a trusted key from the saved blob:
+
+    $ keyctl add trusted kmk "load `cat kmk.blob`" @u
+    268728824
+
+    $ keyctl print 268728824
+    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+    e4a8aea2b607ec96931e6f4d4fe563ba
+
+Reseal a trusted key under new pcr values:
+
+    $ keyctl update 268728824 "update pcrinfo=`cat pcr.blob`"
+    $ keyctl print 268728824
+    010100000000002c0002800093c35a09b70fff26e7a98ae786c641e678ec6ffb6b46d805
+    77c8a6377aed9d3219c6dfec4b23ffe3000001005d37d472ac8a44023fbb3d18583a4f73
+    d3a076c0858f6f1dcaa39ea0f119911ff03f5406df4f7f27f41da8d7194f45c9f4e00f2e
+    df449f266253aa3f52e55c53de147773e00f0f9aca86c64d94c95382265968c354c5eab4
+    9638c5ae99c89de1e0997242edfb0b501744e11ff9762dfd951cffd93227cc513384e7e6
+    e782c29435c7ec2edafaa2f4c1fe6e7a781b59549ff5296371b42133777dcc5b8b971610
+    94bc67ede19e43ddb9dc2baacad374a36feaf0314d700af0a65c164b7082401740e489c9
+    7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
+    df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
+
+Create and save an encrypted key "evm" using the above trusted key "kmk":
+
+    $ keyctl add encrypted evm "new trusted:kmk 32" @u
+    159771175
+
+    $ keyctl print 159771175
+    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
+    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
+    5972dcb82ab2dde83376d82b2e3c09ffc
+
+    $ keyctl pipe 159771175 > evm.blob
+
+Load an encrypted key "evm" from saved blob:
+
+    $ keyctl add encrypted evm "load `cat evm.blob`" @u
+    831684262
+
+    $ keyctl print 831684262
+    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
+    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
+    5972dcb82ab2dde83376d82b2e3c09ffc
+
+
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata.  The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks.  Other uses for trusted and
+encrypted keys, such as for disk and file encryption are anticipated.
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
new file mode 100644
index 000000000000..4d75931d2d79
--- /dev/null
+++ b/Documentation/security/keys.txt
@@ -0,0 +1,1290 @@
+			 ============================
+			 KERNEL KEY RETENTION SERVICE
+			 ============================
+
+This service allows cryptographic keys, authentication tokens, cross-domain
+user mappings, and similar to be cached in the kernel for the use of
+filesystems and other kernel services.
+
+Keyrings are permitted; these are a special type of key that can hold links to
+other keys. Processes each have three standard keyring subscriptions that a
+kernel service can search for relevant keys.
+
+The key service can be configured on by enabling:
+
+	"Security options"/"Enable access key retention support" (CONFIG_KEYS)
+
+This document has the following sections:
+
+	- Key overview
+	- Key service overview
+	- Key access permissions
+	- SELinux support
+	- New procfs files
+	- Userspace system call interface
+	- Kernel services
+	- Notes on accessing payload contents
+	- Defining a key type
+	- Request-key callback service
+	- Garbage collection
+
+
+============
+KEY OVERVIEW
+============
+
+In this context, keys represent units of cryptographic data, authentication
+tokens, keyrings, etc.. These are represented in the kernel by struct key.
+
+Each key has a number of attributes:
+
+	- A serial number.
+	- A type.
+	- A description (for matching a key in a search).
+	- Access control information.
+	- An expiry time.
+	- A payload.
+	- State.
+
+
+ (*) Each key is issued a serial number of type key_serial_t that is unique for
+     the lifetime of that key. All serial numbers are positive non-zero 32-bit
+     integers.
+
+     Userspace programs can use a key's serial numbers as a way to gain access
+     to it, subject to permission checking.
+
+ (*) Each key is of a defined "type". Types must be registered inside the
+     kernel by a kernel service (such as a filesystem) before keys of that type
+     can be added or used. Userspace programs cannot define new types directly.
+
+     Key types are represented in the kernel by struct key_type. This defines a
+     number of operations that can be performed on a key of that type.
+
+     Should a type be removed from the system, all the keys of that type will
+     be invalidated.
+
+ (*) Each key has a description. This should be a printable string. The key
+     type provides an operation to perform a match between the description on a
+     key and a criterion string.
+
+ (*) Each key has an owner user ID, a group ID and a permissions mask. These
+     are used to control what a process may do to a key from userspace, and
+     whether a kernel service will be able to find the key.
+
+ (*) Each key can be set to expire at a specific time by the key type's
+     instantiation function. Keys can also be immortal.
+
+ (*) Each key can have a payload. This is a quantity of data that represent the
+     actual "key". In the case of a keyring, this is a list of keys to which
+     the keyring links; in the case of a user-defined key, it's an arbitrary
+     blob of data.
+
+     Having a payload is not required; and the payload can, in fact, just be a
+     value stored in the struct key itself.
+
+     When a key is instantiated, the key type's instantiation function is
+     called with a blob of data, and that then creates the key's payload in
+     some way.
+
+     Similarly, when userspace wants to read back the contents of the key, if
+     permitted, another key type operation will be called to convert the key's
+     attached payload back into a blob of data.
+
+ (*) Each key can be in one of a number of basic states:
+
+     (*) Uninstantiated. The key exists, but does not have any data attached.
+     	 Keys being requested from userspace will be in this state.
+
+     (*) Instantiated. This is the normal state. The key is fully formed, and
+	 has data attached.
+
+     (*) Negative. This is a relatively short-lived state. The key acts as a
+	 note saying that a previous call out to userspace failed, and acts as
+	 a throttle on key lookups. A negative key can be updated to a normal
+	 state.
+
+     (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded,
+	 they traverse to this state. An expired key can be updated back to a
+	 normal state.
+
+     (*) Revoked. A key is put in this state by userspace action. It can't be
+	 found or operated upon (apart from by unlinking it).
+
+     (*) Dead. The key's type was unregistered, and so the key is now useless.
+
+Keys in the last three states are subject to garbage collection.  See the
+section on "Garbage collection".
+
+
+====================
+KEY SERVICE OVERVIEW
+====================
+
+The key service provides a number of features besides keys:
+
+ (*) The key service defines two special key types:
+
+     (+) "keyring"
+
+	 Keyrings are special keys that contain a list of other keys. Keyring
+	 lists can be modified using various system calls. Keyrings should not
+	 be given a payload when created.
+
+     (+) "user"
+
+	 A key of this type has a description and a payload that are arbitrary
+	 blobs of data. These can be created, updated and read by userspace,
+	 and aren't intended for use by kernel services.
+
+ (*) Each process subscribes to three keyrings: a thread-specific keyring, a
+     process-specific keyring, and a session-specific keyring.
+
+     The thread-specific keyring is discarded from the child when any sort of
+     clone, fork, vfork or execve occurs. A new keyring is created only when
+     required.
+
+     The process-specific keyring is replaced with an empty one in the child on
+     clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is
+     shared. execve also discards the process's process keyring and creates a
+     new one.
+
+     The session-specific keyring is persistent across clone, fork, vfork and
+     execve, even when the latter executes a set-UID or set-GID binary. A
+     process can, however, replace its current session keyring with a new one
+     by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous
+     new one, or to attempt to create or join one of a specific name.
+
+     The ownership of the thread keyring changes when the real UID and GID of
+     the thread changes.
+
+ (*) Each user ID resident in the system holds two special keyrings: a user
+     specific keyring and a default user session keyring. The default session
+     keyring is initialised with a link to the user-specific keyring.
+
+     When a process changes its real UID, if it used to have no session key, it
+     will be subscribed to the default session key for the new UID.
+
+     If a process attempts to access its session key when it doesn't have one,
+     it will be subscribed to the default for its current UID.
+
+ (*) Each user has two quotas against which the keys they own are tracked. One
+     limits the total number of keys and keyrings, the other limits the total
+     amount of description and payload space that can be consumed.
+
+     The user can view information on this and other statistics through procfs
+     files.  The root user may also alter the quota limits through sysctl files
+     (see the section "New procfs files").
+
+     Process-specific and thread-specific keyrings are not counted towards a
+     user's quota.
+
+     If a system call that modifies a key or keyring in some way would put the
+     user over quota, the operation is refused and error EDQUOT is returned.
+
+ (*) There's a system call interface by which userspace programs can create and
+     manipulate keys and keyrings.
+
+ (*) There's a kernel interface by which services can register types and search
+     for keys.
+
+ (*) There's a way for the a search done from the kernel to call back to
+     userspace to request a key that can't be found in a process's keyrings.
+
+ (*) An optional filesystem is available through which the key database can be
+     viewed and manipulated.
+
+
+======================
+KEY ACCESS PERMISSIONS
+======================
+
+Keys have an owner user ID, a group access ID, and a permissions mask. The mask
+has up to eight bits each for possessor, user, group and other access. Only
+six of each set of eight bits are defined. These permissions granted are:
+
+ (*) View
+
+     This permits a key or keyring's attributes to be viewed - including key
+     type and description.
+
+ (*) Read
+
+     This permits a key's payload to be viewed or a keyring's list of linked
+     keys.
+
+ (*) Write
+
+     This permits a key's payload to be instantiated or updated, or it allows a
+     link to be added to or removed from a keyring.
+
+ (*) Search
+
+     This permits keyrings to be searched and keys to be found. Searches can
+     only recurse into nested keyrings that have search permission set.
+
+ (*) Link
+
+     This permits a key or keyring to be linked to. To create a link from a
+     keyring to a key, a process must have Write permission on the keyring and
+     Link permission on the key.
+
+ (*) Set Attribute
+
+     This permits a key's UID, GID and permissions mask to be changed.
+
+For changing the ownership, group ID or permissions mask, being the owner of
+the key or having the sysadmin capability is sufficient.
+
+
+===============
+SELINUX SUPPORT
+===============
+
+The security class "key" has been added to SELinux so that mandatory access
+controls can be applied to keys created within various contexts.  This support
+is preliminary, and is likely to change quite significantly in the near future.
+Currently, all of the basic permissions explained above are provided in SELinux
+as well; SELinux is simply invoked after all basic permission checks have been
+performed.
+
+The value of the file /proc/self/attr/keycreate influences the labeling of
+newly-created keys.  If the contents of that file correspond to an SELinux
+security context, then the key will be assigned that context.  Otherwise, the
+key will be assigned the current context of the task that invoked the key
+creation request.  Tasks must be granted explicit permission to assign a
+particular context to newly-created keys, using the "create" permission in the
+key security class.
+
+The default keyrings associated with users will be labeled with the default
+context of the user if and only if the login programs have been instrumented to
+properly initialize keycreate during the login process.  Otherwise, they will
+be labeled with the context of the login program itself.
+
+Note, however, that the default keyrings associated with the root user are
+labeled with the default kernel context, since they are created early in the
+boot process, before root has a chance to log in.
+
+The keyrings associated with new threads are each labeled with the context of
+their associated thread, and both session and process keyrings are handled
+similarly.
+
+
+================
+NEW PROCFS FILES
+================
+
+Two files have been added to procfs by which an administrator can find out
+about the status of the key service:
+
+ (*) /proc/keys
+
+     This lists the keys that are currently viewable by the task reading the
+     file, giving information about their type, description and permissions.
+     It is not possible to view the payload of the key this way, though some
+     information about it may be given.
+
+     The only keys included in the list are those that grant View permission to
+     the reading process whether or not it possesses them.  Note that LSM
+     security checks are still performed, and may further filter out keys that
+     the current process is not authorised to view.
+
+     The contents of the file look like this:
+
+	SERIAL   FLAGS  USAGE EXPY PERM     UID   GID   TYPE      DESCRIPTION: SUMMARY
+	00000001 I-----    39 perm 1f3f0000     0     0 keyring   _uid_ses.0: 1/4
+	00000002 I-----     2 perm 1f3f0000     0     0 keyring   _uid.0: empty
+	00000007 I-----     1 perm 1f3f0000     0     0 keyring   _pid.1: empty
+	0000018d I-----     1 perm 1f3f0000     0     0 keyring   _pid.412: empty
+	000004d2 I--Q--     1 perm 1f3f0000    32    -1 keyring   _uid.32: 1/4
+	000004d3 I--Q--     3 perm 1f3f0000    32    -1 keyring   _uid_ses.32: empty
+	00000892 I--QU-     1 perm 1f000000     0     0 user      metal:copper: 0
+	00000893 I--Q-N     1  35s 1f3f0000     0     0 user      metal:silver: 0
+	00000894 I--Q--     1  10h 003f0000     0     0 user      metal:gold: 0
+
+     The flags are:
+
+	I	Instantiated
+	R	Revoked
+	D	Dead
+	Q	Contributes to user's quota
+	U	Under construction by callback to userspace
+	N	Negative key
+
+     This file must be enabled at kernel configuration time as it allows anyone
+     to list the keys database.
+
+ (*) /proc/key-users
+
+     This file lists the tracking data for each user that has at least one key
+     on the system.  Such data includes quota information and statistics:
+
+	[root@andromeda root]# cat /proc/key-users
+	0:     46 45/45 1/100 13/10000
+	29:     2 2/2 2/100 40/10000
+	32:     2 2/2 2/100 40/10000
+	38:     2 2/2 2/100 40/10000
+
+     The format of each line is
+	<UID>:			User ID to which this applies
+	<usage>			Structure refcount
+	<inst>/<keys>		Total number of keys and number instantiated
+	<keys>/<max>		Key count quota
+	<bytes>/<max>		Key size quota
+
+
+Four new sysctl files have been added also for the purpose of controlling the
+quota limits on keys:
+
+ (*) /proc/sys/kernel/keys/root_maxkeys
+     /proc/sys/kernel/keys/root_maxbytes
+
+     These files hold the maximum number of keys that root may have and the
+     maximum total number of bytes of data that root may have stored in those
+     keys.
+
+ (*) /proc/sys/kernel/keys/maxkeys
+     /proc/sys/kernel/keys/maxbytes
+
+     These files hold the maximum number of keys that each non-root user may
+     have and the maximum total number of bytes of data that each of those
+     users may have stored in their keys.
+
+Root may alter these by writing each new limit as a decimal number string to
+the appropriate file.
+
+
+===============================
+USERSPACE SYSTEM CALL INTERFACE
+===============================
+
+Userspace can manipulate keys directly through three new syscalls: add_key,
+request_key and keyctl. The latter provides a number of functions for
+manipulating keys.
+
+When referring to a key directly, userspace programs should use the key's
+serial number (a positive 32-bit integer). However, there are some special
+values available for referring to special keys and keyrings that relate to the
+process making the call:
+
+	CONSTANT			VALUE	KEY REFERENCED
+	==============================	======	===========================
+	KEY_SPEC_THREAD_KEYRING		-1	thread-specific keyring
+	KEY_SPEC_PROCESS_KEYRING	-2	process-specific keyring
+	KEY_SPEC_SESSION_KEYRING	-3	session-specific keyring
+	KEY_SPEC_USER_KEYRING		-4	UID-specific keyring
+	KEY_SPEC_USER_SESSION_KEYRING	-5	UID-session keyring
+	KEY_SPEC_GROUP_KEYRING		-6	GID-specific keyring
+	KEY_SPEC_REQKEY_AUTH_KEY	-7	assumed request_key()
+						  authorisation key
+
+
+The main syscalls are:
+
+ (*) Create a new key of given type, description and payload and add it to the
+     nominated keyring:
+
+	key_serial_t add_key(const char *type, const char *desc,
+			     const void *payload, size_t plen,
+			     key_serial_t keyring);
+
+     If a key of the same type and description as that proposed already exists
+     in the keyring, this will try to update it with the given payload, or it
+     will return error EEXIST if that function is not supported by the key
+     type. The process must also have permission to write to the key to be able
+     to update it. The new key will have all user permissions granted and no
+     group or third party permissions.
+
+     Otherwise, this will attempt to create a new key of the specified type and
+     description, and to instantiate it with the supplied payload and attach it
+     to the keyring. In this case, an error will be generated if the process
+     does not have permission to write to the keyring.
+
+     The payload is optional, and the pointer can be NULL if not required by
+     the type. The payload is plen in size, and plen can be zero for an empty
+     payload.
+
+     A new keyring can be generated by setting type "keyring", the keyring name
+     as the description (or NULL) and setting the payload to NULL.
+
+     User defined keys can be created by specifying type "user". It is
+     recommended that a user defined key's description by prefixed with a type
+     ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting
+     ticket.
+
+     Any other type must have been registered with the kernel in advance by a
+     kernel service such as a filesystem.
+
+     The ID of the new or updated key is returned if successful.
+
+
+ (*) Search the process's keyrings for a key, potentially calling out to
+     userspace to create it.
+
+	key_serial_t request_key(const char *type, const char *description,
+				 const char *callout_info,
+				 key_serial_t dest_keyring);
+
+     This function searches all the process's keyrings in the order thread,
+     process, session for a matching key. This works very much like
+     KEYCTL_SEARCH, including the optional attachment of the discovered key to
+     a keyring.
+
+     If a key cannot be found, and if callout_info is not NULL, then
+     /sbin/request-key will be invoked in an attempt to obtain a key. The
+     callout_info string will be passed as an argument to the program.
+
+     See also Documentation/security/keys-request-key.txt.
+
+
+The keyctl syscall functions are:
+
+ (*) Map a special key ID to a real key ID for this process:
+
+	key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id,
+			    int create);
+
+     The special key specified by "id" is looked up (with the key being created
+     if necessary) and the ID of the key or keyring thus found is returned if
+     it exists.
+
+     If the key does not yet exist, the key will be created if "create" is
+     non-zero; and the error ENOKEY will be returned if "create" is zero.
+
+
+ (*) Replace the session keyring this process subscribes to with a new one:
+
+	key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name);
+
+     If name is NULL, an anonymous keyring is created attached to the process
+     as its session keyring, displacing the old session keyring.
+
+     If name is not NULL, if a keyring of that name exists, the process
+     attempts to attach it as the session keyring, returning an error if that
+     is not permitted; otherwise a new keyring of that name is created and
+     attached as the session keyring.
+
+     To attach to a named keyring, the keyring must have search permission for
+     the process's ownership.
+
+     The ID of the new session keyring is returned if successful.
+
+
+ (*) Update the specified key:
+
+	long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload,
+		    size_t plen);
+
+     This will try to update the specified key with the given payload, or it
+     will return error EOPNOTSUPP if that function is not supported by the key
+     type. The process must also have permission to write to the key to be able
+     to update it.
+
+     The payload is of length plen, and may be absent or empty as for
+     add_key().
+
+
+ (*) Revoke a key:
+
+	long keyctl(KEYCTL_REVOKE, key_serial_t key);
+
+     This makes a key unavailable for further operations. Further attempts to
+     use the key will be met with error EKEYREVOKED, and the key will no longer
+     be findable.
+
+
+ (*) Change the ownership of a key:
+
+	long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid);
+
+     This function permits a key's owner and group ID to be changed. Either one
+     of uid or gid can be set to -1 to suppress that change.
+
+     Only the superuser can change a key's owner to something other than the
+     key's current owner. Similarly, only the superuser can change a key's
+     group ID to something other than the calling process's group ID or one of
+     its group list members.
+
+
+ (*) Change the permissions mask on a key:
+
+	long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm);
+
+     This function permits the owner of a key or the superuser to change the
+     permissions mask on a key.
+
+     Only bits the available bits are permitted; if any other bits are set,
+     error EINVAL will be returned.
+
+
+ (*) Describe a key:
+
+	long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer,
+		    size_t buflen);
+
+     This function returns a summary of the key's attributes (but not its
+     payload data) as a string in the buffer provided.
+
+     Unless there's an error, it always returns the amount of data it could
+     produce, even if that's too big for the buffer, but it won't copy more
+     than requested to userspace. If the buffer pointer is NULL then no copy
+     will take place.
+
+     A process must have view permission on the key for this function to be
+     successful.
+
+     If successful, a string is placed in the buffer in the following format:
+
+	<type>;<uid>;<gid>;<perm>;<description>
+
+     Where type and description are strings, uid and gid are decimal, and perm
+     is hexadecimal. A NUL character is included at the end of the string if
+     the buffer is sufficiently big.
+
+     This can be parsed with
+
+	sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc);
+
+
+ (*) Clear out a keyring:
+
+	long keyctl(KEYCTL_CLEAR, key_serial_t keyring);
+
+     This function clears the list of keys attached to a keyring. The calling
+     process must have write permission on the keyring, and it must be a
+     keyring (or else error ENOTDIR will result).
+
+
+ (*) Link a key into a keyring:
+
+	long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key);
+
+     This function creates a link from the keyring to the key. The process must
+     have write permission on the keyring and must have link permission on the
+     key.
+
+     Should the keyring not be a keyring, error ENOTDIR will result; and if the
+     keyring is full, error ENFILE will result.
+
+     The link procedure checks the nesting of the keyrings, returning ELOOP if
+     it appears too deep or EDEADLK if the link would introduce a cycle.
+
+     Any links within the keyring to keys that match the new key in terms of
+     type and description will be discarded from the keyring as the new one is
+     added.
+
+
+ (*) Unlink a key or keyring from another keyring:
+
+	long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
+
+     This function looks through the keyring for the first link to the
+     specified key, and removes it if found. Subsequent links to that key are
+     ignored. The process must have write permission on the keyring.
+
+     If the keyring is not a keyring, error ENOTDIR will result; and if the key
+     is not present, error ENOENT will be the result.
+
+
+ (*) Search a keyring tree for a key:
+
+	key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring,
+			    const char *type, const char *description,
+			    key_serial_t dest_keyring);
+
+     This searches the keyring tree headed by the specified keyring until a key
+     is found that matches the type and description criteria. Each keyring is
+     checked for keys before recursion into its children occurs.
+
+     The process must have search permission on the top level keyring, or else
+     error EACCES will result. Only keyrings that the process has search
+     permission on will be recursed into, and only keys and keyrings for which
+     a process has search permission can be matched. If the specified keyring
+     is not a keyring, ENOTDIR will result.
+
+     If the search succeeds, the function will attempt to link the found key
+     into the destination keyring if one is supplied (non-zero ID). All the
+     constraints applicable to KEYCTL_LINK apply in this case too.
+
+     Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search
+     fails. On success, the resulting key ID will be returned.
+
+
+ (*) Read the payload data from a key:
+
+	long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer,
+		    size_t buflen);
+
+     This function attempts to read the payload data from the specified key
+     into the buffer. The process must have read permission on the key to
+     succeed.
+
+     The returned data will be processed for presentation by the key type. For
+     instance, a keyring will return an array of key_serial_t entries
+     representing the IDs of all the keys to which it is subscribed. The user
+     defined key type will return its data as is. If a key type does not
+     implement this function, error EOPNOTSUPP will result.
+
+     As much of the data as can be fitted into the buffer will be copied to
+     userspace if the buffer pointer is not NULL.
+
+     On a successful return, the function will always return the amount of data
+     available rather than the amount copied.
+
+
+ (*) Instantiate a partially constructed key.
+
+	long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
+		    const void *payload, size_t plen,
+		    key_serial_t keyring);
+	long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
+		    const struct iovec *payload_iov, unsigned ioc,
+		    key_serial_t keyring);
+
+     If the kernel calls back to userspace to complete the instantiation of a
+     key, userspace should use this call to supply data for the key before the
+     invoked process returns, or else the key will be marked negative
+     automatically.
+
+     The process must have write access on the key to be able to instantiate
+     it, and the key must be uninstantiated.
+
+     If a keyring is specified (non-zero), the key will also be linked into
+     that keyring, however all the constraints applying in KEYCTL_LINK apply in
+     this case too.
+
+     The payload and plen arguments describe the payload data as for add_key().
+
+     The payload_iov and ioc arguments describe the payload data in an iovec
+     array instead of a single buffer.
+
+
+ (*) Negatively instantiate a partially constructed key.
+
+	long keyctl(KEYCTL_NEGATE, key_serial_t key,
+		    unsigned timeout, key_serial_t keyring);
+	long keyctl(KEYCTL_REJECT, key_serial_t key,
+		    unsigned timeout, unsigned error, key_serial_t keyring);
+
+     If the kernel calls back to userspace to complete the instantiation of a
+     key, userspace should use this call mark the key as negative before the
+     invoked process returns if it is unable to fulfil the request.
+
+     The process must have write access on the key to be able to instantiate
+     it, and the key must be uninstantiated.
+
+     If a keyring is specified (non-zero), the key will also be linked into
+     that keyring, however all the constraints applying in KEYCTL_LINK apply in
+     this case too.
+
+     If the key is rejected, future searches for it will return the specified
+     error code until the rejected key expires.  Negating the key is the same
+     as rejecting the key with ENOKEY as the error code.
+
+
+ (*) Set the default request-key destination keyring.
+
+	long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl);
+
+     This sets the default keyring to which implicitly requested keys will be
+     attached for this thread. reqkey_defl should be one of these constants:
+
+	CONSTANT				VALUE	NEW DEFAULT KEYRING
+	======================================	======	=======================
+	KEY_REQKEY_DEFL_NO_CHANGE		-1	No change
+	KEY_REQKEY_DEFL_DEFAULT			0	Default[1]
+	KEY_REQKEY_DEFL_THREAD_KEYRING		1	Thread keyring
+	KEY_REQKEY_DEFL_PROCESS_KEYRING		2	Process keyring
+	KEY_REQKEY_DEFL_SESSION_KEYRING		3	Session keyring
+	KEY_REQKEY_DEFL_USER_KEYRING		4	User keyring
+	KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5	User session keyring
+	KEY_REQKEY_DEFL_GROUP_KEYRING		6	Group keyring
+
+     The old default will be returned if successful and error EINVAL will be
+     returned if reqkey_defl is not one of the above values.
+
+     The default keyring can be overridden by the keyring indicated to the
+     request_key() system call.
+
+     Note that this setting is inherited across fork/exec.
+
+     [1] The default is: the thread keyring if there is one, otherwise
+     the process keyring if there is one, otherwise the session keyring if
+     there is one, otherwise the user default session keyring.
+
+
+ (*) Set the timeout on a key.
+
+	long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
+
+     This sets or clears the timeout on a key. The timeout can be 0 to clear
+     the timeout or a number of seconds to set the expiry time that far into
+     the future.
+
+     The process must have attribute modification access on a key to set its
+     timeout. Timeouts may not be set with this function on negative, revoked
+     or expired keys.
+
+
+ (*) Assume the authority granted to instantiate a key
+
+	long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
+
+     This assumes or divests the authority required to instantiate the
+     specified key. Authority can only be assumed if the thread has the
+     authorisation key associated with the specified key in its keyrings
+     somewhere.
+
+     Once authority is assumed, searches for keys will also search the
+     requester's keyrings using the requester's security label, UID, GID and
+     groups.
+
+     If the requested authority is unavailable, error EPERM will be returned,
+     likewise if the authority has been revoked because the target key is
+     already instantiated.
+
+     If the specified key is 0, then any assumed authority will be divested.
+
+     The assumed authoritative key is inherited across fork and exec.
+
+
+ (*) Get the LSM security context attached to a key.
+
+	long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
+		    size_t buflen)
+
+     This function returns a string that represents the LSM security context
+     attached to a key in the buffer provided.
+
+     Unless there's an error, it always returns the amount of data it could
+     produce, even if that's too big for the buffer, but it won't copy more
+     than requested to userspace. If the buffer pointer is NULL then no copy
+     will take place.
+
+     A NUL character is included at the end of the string if the buffer is
+     sufficiently big.  This is included in the returned count.  If no LSM is
+     in force then an empty string will be returned.
+
+     A process must have view permission on the key for this function to be
+     successful.
+
+
+ (*) Install the calling process's session keyring on its parent.
+
+	long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+     This functions attempts to install the calling process's session keyring
+     on to the calling process's parent, replacing the parent's current session
+     keyring.
+
+     The calling process must have the same ownership as its parent, the
+     keyring must have the same ownership as the calling process, the calling
+     process must have LINK permission on the keyring and the active LSM module
+     mustn't deny permission, otherwise error EPERM will be returned.
+
+     Error ENOMEM will be returned if there was insufficient memory to complete
+     the operation, otherwise 0 will be returned to indicate success.
+
+     The keyring will be replaced next time the parent process leaves the
+     kernel and resumes executing userspace.
+
+
+===============
+KERNEL SERVICES
+===============
+
+The kernel services for key management are fairly simple to deal with. They can
+be broken down into two areas: keys and key types.
+
+Dealing with keys is fairly straightforward. Firstly, the kernel service
+registers its type, then it searches for a key of that type. It should retain
+the key as long as it has need of it, and then it should release it. For a
+filesystem or device file, a search would probably be performed during the open
+call, and the key released upon close. How to deal with conflicting keys due to
+two different users opening the same file is left to the filesystem author to
+solve.
+
+To access the key manager, the following header must be #included:
+
+	<linux/key.h>
+
+Specific key types should have a header file under include/keys/ that should be
+used to access that type.  For keys of type "user", for example, that would be:
+
+	<keys/user-type.h>
+
+Note that there are two different types of pointers to keys that may be
+encountered:
+
+ (*) struct key *
+
+     This simply points to the key structure itself. Key structures will be at
+     least four-byte aligned.
+
+ (*) key_ref_t
+
+     This is equivalent to a struct key *, but the least significant bit is set
+     if the caller "possesses" the key. By "possession" it is meant that the
+     calling processes has a searchable link to the key from one of its
+     keyrings. There are three functions for dealing with these:
+
+	key_ref_t make_key_ref(const struct key *key,
+			       unsigned long possession);
+
+	struct key *key_ref_to_ptr(const key_ref_t key_ref);
+
+	unsigned long is_key_possessed(const key_ref_t key_ref);
+
+     The first function constructs a key reference from a key pointer and
+     possession information (which must be 0 or 1 and not any other value).
+
+     The second function retrieves the key pointer from a reference and the
+     third retrieves the possession flag.
+
+When accessing a key's payload contents, certain precautions must be taken to
+prevent access vs modification races. See the section "Notes on accessing
+payload contents" for more information.
+
+(*) To search for a key, call:
+
+	struct key *request_key(const struct key_type *type,
+				const char *description,
+				const char *callout_info);
+
+    This is used to request a key or keyring with a description that matches
+    the description specified according to the key type's match function. This
+    permits approximate matching to occur. If callout_string is not NULL, then
+    /sbin/request-key will be invoked in an attempt to obtain the key from
+    userspace. In that case, callout_string will be passed as an argument to
+    the program.
+
+    Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be
+    returned.
+
+    If successful, the key will have been attached to the default keyring for
+    implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
+
+    See also Documentation/security/keys-request-key.txt.
+
+
+(*) To search for a key, passing auxiliary data to the upcaller, call:
+
+	struct key *request_key_with_auxdata(const struct key_type *type,
+					     const char *description,
+					     const void *callout_info,
+					     size_t callout_len,
+					     void *aux);
+
+    This is identical to request_key(), except that the auxiliary data is
+    passed to the key_type->request_key() op if it exists, and the callout_info
+    is a blob of length callout_len, if given (the length may be 0).
+
+
+(*) A key can be requested asynchronously by calling one of:
+
+	struct key *request_key_async(const struct key_type *type,
+				      const char *description,
+				      const void *callout_info,
+				      size_t callout_len);
+
+    or:
+
+	struct key *request_key_async_with_auxdata(const struct key_type *type,
+						   const char *description,
+						   const char *callout_info,
+					     	   size_t callout_len,
+					     	   void *aux);
+
+    which are asynchronous equivalents of request_key() and
+    request_key_with_auxdata() respectively.
+
+    These two functions return with the key potentially still under
+    construction.  To wait for construction completion, the following should be
+    called:
+
+	int wait_for_key_construction(struct key *key, bool intr);
+
+    The function will wait for the key to finish being constructed and then
+    invokes key_validate() to return an appropriate value to indicate the state
+    of the key (0 indicates the key is usable).
+
+    If intr is true, then the wait can be interrupted by a signal, in which
+    case error ERESTARTSYS will be returned.
+
+
+(*) When it is no longer required, the key should be released using:
+
+	void key_put(struct key *key);
+
+    Or:
+
+	void key_ref_put(key_ref_t key_ref);
+
+    These can be called from interrupt context. If CONFIG_KEYS is not set then
+    the argument will not be parsed.
+
+
+(*) Extra references can be made to a key by calling the following function:
+
+	struct key *key_get(struct key *key);
+
+    These need to be disposed of by calling key_put() when they've been
+    finished with. The key pointer passed in will be returned. If the pointer
+    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
+    no increment will take place.
+
+
+(*) A key's serial number can be obtained by calling:
+
+	key_serial_t key_serial(struct key *key);
+
+    If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the
+    latter case without parsing the argument).
+
+
+(*) If a keyring was found in the search, this can be further searched by:
+
+	key_ref_t keyring_search(key_ref_t keyring_ref,
+				 const struct key_type *type,
+				 const char *description)
+
+    This searches the keyring tree specified for a matching key. Error ENOKEY
+    is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
+    the returned key will need to be released.
+
+    The possession attribute from the keyring reference is used to control
+    access through the permissions mask and is propagated to the returned key
+    reference pointer if successful.
+
+
+(*) To check the validity of a key, this function can be called:
+
+	int validate_key(struct key *key);
+
+    This checks that the key in question hasn't expired or and hasn't been
+    revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will
+    be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be
+    returned (in the latter case without parsing the argument).
+
+
+(*) To register a key type, the following function should be called:
+
+	int register_key_type(struct key_type *type);
+
+    This will return error EEXIST if a type of the same name is already
+    present.
+
+
+(*) To unregister a key type, call:
+
+	void unregister_key_type(struct key_type *type);
+
+
+Under some circumstances, it may be desirable to deal with a bundle of keys.
+The facility provides access to the keyring type for managing such a bundle:
+
+	struct key_type key_type_keyring;
+
+This can be used with a function such as request_key() to find a specific
+keyring in a process's keyrings.  A keyring thus found can then be searched
+with keyring_search().  Note that it is not possible to use request_key() to
+search a specific keyring, so using keyrings in this way is of limited utility.
+
+
+===================================
+NOTES ON ACCESSING PAYLOAD CONTENTS
+===================================
+
+The simplest payload is just a number in key->payload.value. In this case,
+there's no need to indulge in RCU or locking when accessing the payload.
+
+More complex payload contents must be allocated and a pointer to them set in
+key->payload.data. One of the following ways must be selected to access the
+data:
+
+ (1) Unmodifiable key type.
+
+     If the key type does not have a modify method, then the key's payload can
+     be accessed without any form of locking, provided that it's known to be
+     instantiated (uninstantiated keys cannot be "found").
+
+ (2) The key's semaphore.
+
+     The semaphore could be used to govern access to the payload and to control
+     the payload pointer. It must be write-locked for modifications and would
+     have to be read-locked for general access. The disadvantage of doing this
+     is that the accessor may be required to sleep.
+
+ (3) RCU.
+
+     RCU must be used when the semaphore isn't already held; if the semaphore
+     is held then the contents can't change under you unexpectedly as the
+     semaphore must still be used to serialise modifications to the key. The
+     key management code takes care of this for the key type.
+
+     However, this means using:
+
+	rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock()
+
+     to read the pointer, and:
+
+	rcu_dereference() ... rcu_assign_pointer() ... call_rcu()
+
+     to set the pointer and dispose of the old contents after a grace period.
+     Note that only the key type should ever modify a key's payload.
+
+     Furthermore, an RCU controlled payload must hold a struct rcu_head for the
+     use of call_rcu() and, if the payload is of variable size, the length of
+     the payload. key->datalen cannot be relied upon to be consistent with the
+     payload just dereferenced if the key's semaphore is not held.
+
+
+===================
+DEFINING A KEY TYPE
+===================
+
+A kernel service may want to define its own key type. For instance, an AFS
+filesystem might want to define a Kerberos 5 ticket key type. To do this, it
+author fills in a key_type struct and registers it with the system.
+
+Source files that implement key types should include the following header file:
+
+	<linux/key-type.h>
+
+The structure has a number of fields, some of which are mandatory:
+
+ (*) const char *name
+
+     The name of the key type. This is used to translate a key type name
+     supplied by userspace into a pointer to the structure.
+
+
+ (*) size_t def_datalen
+
+     This is optional - it supplies the default payload data length as
+     contributed to the quota. If the key type's payload is always or almost
+     always the same size, then this is a more efficient way to do things.
+
+     The data length (and quota) on a particular key can always be changed
+     during instantiation or update by calling:
+
+	int key_payload_reserve(struct key *key, size_t datalen);
+
+     With the revised data length. Error EDQUOT will be returned if this is not
+     viable.
+
+
+ (*) int (*vet_description)(const char *description);
+
+     This optional method is called to vet a key description.  If the key type
+     doesn't approve of the key description, it may return an error, otherwise
+     it should return 0.
+
+
+ (*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
+
+     This method is called to attach a payload to a key during construction.
+     The payload attached need not bear any relation to the data passed to this
+     function.
+
+     If the amount of data attached to the key differs from the size in
+     keytype->def_datalen, then key_payload_reserve() should be called.
+
+     This method does not have to lock the key in order to attach a payload.
+     The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents
+     anything else from gaining access to the key.
+
+     It is safe to sleep in this method.
+
+
+ (*) int (*update)(struct key *key, const void *data, size_t datalen);
+
+     If this type of key can be updated, then this method should be provided.
+     It is called to update a key's payload from the blob of data provided.
+
+     key_payload_reserve() should be called if the data length might change
+     before any changes are actually made. Note that if this succeeds, the type
+     is committed to changing the key because it's already been altered, so all
+     memory allocation must be done first.
+
+     The key will have its semaphore write-locked before this method is called,
+     but this only deters other writers; any changes to the key's payload must
+     be made under RCU conditions, and call_rcu() must be used to dispose of
+     the old payload.
+
+     key_payload_reserve() should be called before the changes are made, but
+     after all allocations and other potentially failing function calls are
+     made.
+
+     It is safe to sleep in this method.
+
+
+ (*) int (*match)(const struct key *key, const void *desc);
+
+     This method is called to match a key against a description. It should
+     return non-zero if the two match, zero if they don't.
+
+     This method should not need to lock the key in any way. The type and
+     description can be considered invariant, and the payload should not be
+     accessed (the key may not yet be instantiated).
+
+     It is not safe to sleep in this method; the caller may hold spinlocks.
+
+
+ (*) void (*revoke)(struct key *key);
+
+     This method is optional.  It is called to discard part of the payload
+     data upon a key being revoked.  The caller will have the key semaphore
+     write-locked.
+
+     It is safe to sleep in this method, though care should be taken to avoid
+     a deadlock against the key semaphore.
+
+
+ (*) void (*destroy)(struct key *key);
+
+     This method is optional. It is called to discard the payload data on a key
+     when it is being destroyed.
+
+     This method does not need to lock the key to access the payload; it can
+     consider the key as being inaccessible at this time. Note that the key's
+     type may have been changed before this function is called.
+
+     It is not safe to sleep in this method; the caller may hold spinlocks.
+
+
+ (*) void (*describe)(const struct key *key, struct seq_file *p);
+
+     This method is optional. It is called during /proc/keys reading to
+     summarise a key's description and payload in text form.
+
+     This method will be called with the RCU read lock held. rcu_dereference()
+     should be used to read the payload pointer if the payload is to be
+     accessed. key->datalen cannot be trusted to stay consistent with the
+     contents of the payload.
+
+     The description will not change, though the key's state may.
+
+     It is not safe to sleep in this method; the RCU read lock is held by the
+     caller.
+
+
+ (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen);
+
+     This method is optional. It is called by KEYCTL_READ to translate the
+     key's payload into something a blob of data for userspace to deal with.
+     Ideally, the blob should be in the same format as that passed in to the
+     instantiate and update methods.
+
+     If successful, the blob size that could be produced should be returned
+     rather than the size copied.
+
+     This method will be called with the key's semaphore read-locked. This will
+     prevent the key's payload changing. It is not necessary to use RCU locking
+     when accessing the key's payload. It is safe to sleep in this method, such
+     as might happen when the userspace buffer is accessed.
+
+
+ (*) int (*request_key)(struct key_construction *cons, const char *op,
+			void *aux);
+
+     This method is optional.  If provided, request_key() and friends will
+     invoke this function rather than upcalling to /sbin/request-key to operate
+     upon a key of this type.
+
+     The aux parameter is as passed to request_key_async_with_auxdata() and
+     similar or is NULL otherwise.  Also passed are the construction record for
+     the key to be operated upon and the operation type (currently only
+     "create").
+
+     This method is permitted to return before the upcall is complete, but the
+     following function must be called under all circumstances to complete the
+     instantiation process, whether or not it succeeds, whether or not there's
+     an error:
+
+	void complete_request_key(struct key_construction *cons, int error);
+
+     The error parameter should be 0 on success, -ve on error.  The
+     construction record is destroyed by this action and the authorisation key
+     will be revoked.  If an error is indicated, the key under construction
+     will be negatively instantiated if it wasn't already instantiated.
+
+     If this method returns an error, that error will be returned to the
+     caller of request_key*().  complete_request_key() must be called prior to
+     returning.
+
+     The key under construction and the authorisation key can be found in the
+     key_construction struct pointed to by cons:
+
+     (*) struct key *key;
+
+     	 The key under construction.
+
+     (*) struct key *authkey;
+
+     	 The authorisation key.
+
+
+============================
+REQUEST-KEY CALLBACK SERVICE
+============================
+
+To create a new key, the kernel will attempt to execute the following command
+line:
+
+	/sbin/request-key create <key> <uid> <gid> \
+		<threadring> <processring> <sessionring> <callout_info>
+
+<key> is the key being constructed, and the three keyrings are the process
+keyrings from the process that caused the search to be issued. These are
+included for two reasons:
+
+  (1) There may be an authentication token in one of the keyrings that is
+      required to obtain the key, eg: a Kerberos Ticket-Granting Ticket.
+
+  (2) The new key should probably be cached in one of these rings.
+
+This program should set it UID and GID to those specified before attempting to
+access any more keys. It may then look around for a user specific process to
+hand the request off to (perhaps a path held in placed in another key by, for
+example, the KDE desktop manager).
+
+The program (or whatever it calls) should finish construction of the key by
+calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
+cache the key in one of the keyrings (probably the session ring) before
+returning.  Alternatively, the key can be marked as negative with KEYCTL_NEGATE
+or KEYCTL_REJECT; this also permits the key to be cached in one of the
+keyrings.
+
+If it returns with the key remaining in the unconstructed state, the key will
+be marked as being negative, it will be added to the session keyring, and an
+error will be returned to the key requestor.
+
+Supplementary information may be provided from whoever or whatever invoked this
+service. This will be passed as the <callout_info> parameter. If no such
+information was made available, then "-" will be passed as this parameter
+instead.
+
+
+Similarly, the kernel may attempt to update an expired or a soon to expire key
+by executing:
+
+	/sbin/request-key update <key> <uid> <gid> \
+		<threadring> <processring> <sessionring>
+
+In this case, the program isn't required to actually attach the key to a ring;
+the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed.  This time is set as a number of seconds in:
+
+	/proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/security/tomoyo.txt b/Documentation/security/tomoyo.txt
new file mode 100644
index 000000000000..200a2d37cbc8
--- /dev/null
+++ b/Documentation/security/tomoyo.txt
@@ -0,0 +1,55 @@
+--- What is TOMOYO? ---
+
+TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
+
+LiveCD-based tutorials are available at
+http://tomoyo.sourceforge.jp/1.7/1st-step/ubuntu10.04-live/
+http://tomoyo.sourceforge.jp/1.7/1st-step/centos5-live/ .
+Though these tutorials use non-LSM version of TOMOYO, they are useful for you
+to know what TOMOYO is.
+
+--- How to enable TOMOYO? ---
+
+Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
+kernel's command line.
+
+Please see http://tomoyo.sourceforge.jp/2.3/ for details.
+
+--- Where is documentation? ---
+
+User <-> Kernel interface documentation is available at
+http://tomoyo.sourceforge.jp/2.3/policy-reference.html .
+
+Materials we prepared for seminars and symposiums are available at
+http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+Below lists are chosen from three aspects.
+
+What is TOMOYO?
+  TOMOYO Linux Overview
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+  TOMOYO Linux: pragmatic and manageable security for Linux
+    http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+  TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
+    http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+
+What can TOMOYO do?
+  Deep inside TOMOYO Linux
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+  The role of "pathname based access control" in security.
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+
+History of TOMOYO?
+  Realities of Mainlining
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
+
+--- What is future plan? ---
+
+We believe that inode based security and name based security are complementary
+and both should be used together. But unfortunately, so far, we cannot enable
+multiple LSM modules at the same time. We feel sorry that you have to give up
+SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
+
+We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
+version of TOMOYO, available at http://tomoyo.sourceforge.jp/1.7/ .
+LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
+to port non-LSM version's functionalities to LSM versions.
diff --git a/Documentation/tomoyo.txt b/Documentation/tomoyo.txt
deleted file mode 100644
index 200a2d37cbc8..000000000000
--- a/Documentation/tomoyo.txt
+++ /dev/null
@@ -1,55 +0,0 @@
---- What is TOMOYO? ---
-
-TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
-
-LiveCD-based tutorials are available at
-http://tomoyo.sourceforge.jp/1.7/1st-step/ubuntu10.04-live/
-http://tomoyo.sourceforge.jp/1.7/1st-step/centos5-live/ .
-Though these tutorials use non-LSM version of TOMOYO, they are useful for you
-to know what TOMOYO is.
-
---- How to enable TOMOYO? ---
-
-Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
-kernel's command line.
-
-Please see http://tomoyo.sourceforge.jp/2.3/ for details.
-
---- Where is documentation? ---
-
-User <-> Kernel interface documentation is available at
-http://tomoyo.sourceforge.jp/2.3/policy-reference.html .
-
-Materials we prepared for seminars and symposiums are available at
-http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
-Below lists are chosen from three aspects.
-
-What is TOMOYO?
-  TOMOYO Linux Overview
-    http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
-  TOMOYO Linux: pragmatic and manageable security for Linux
-    http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
-  TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
-    http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
-
-What can TOMOYO do?
-  Deep inside TOMOYO Linux
-    http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
-  The role of "pathname based access control" in security.
-    http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
-
-History of TOMOYO?
-  Realities of Mainlining
-    http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
-
---- What is future plan? ---
-
-We believe that inode based security and name based security are complementary
-and both should be used together. But unfortunately, so far, we cannot enable
-multiple LSM modules at the same time. We feel sorry that you have to give up
-SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
-
-We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
-version of TOMOYO, available at http://tomoyo.sourceforge.jp/1.7/ .
-LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
-to port non-LSM version's functionalities to LSM versions.
diff --git a/MAINTAINERS b/MAINTAINERS
index 69f19f10314a..3fa170ba5f98 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3705,7 +3705,7 @@ KEYS/KEYRINGS:
 M:	David Howells <dhowells@redhat.com>
 L:	keyrings@linux-nfs.org
 S:	Maintained
-F:	Documentation/keys.txt
+F:	Documentation/security/keys.txt
 F:	include/linux/key.h
 F:	include/linux/key-type.h
 F:	include/keys/
@@ -3717,7 +3717,7 @@ M:	Mimi Zohar <zohar@us.ibm.com>
 L:	linux-security-module@vger.kernel.org
 L:	keyrings@linux-nfs.org
 S:	Supported
-F:	Documentation/keys-trusted-encrypted.txt
+F:	Documentation/security/keys-trusted-encrypted.txt
 F:	include/keys/trusted-type.h
 F:	security/keys/trusted.c
 F:	security/keys/trusted.h
@@ -3728,7 +3728,7 @@ M:	David Safford <safford@watson.ibm.com>
 L:	linux-security-module@vger.kernel.org
 L:	keyrings@linux-nfs.org
 S:	Supported
-F:	Documentation/keys-trusted-encrypted.txt
+F:	Documentation/security/keys-trusted-encrypted.txt
 F:	include/keys/encrypted-type.h
 F:	security/keys/encrypted.c
 F:	security/keys/encrypted.h
diff --git a/include/linux/cred.h b/include/linux/cred.h
index be16b61283cc..82607992f308 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -1,4 +1,4 @@
-/* Credentials management - see Documentation/credentials.txt
+/* Credentials management - see Documentation/security/credentials.txt
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/include/linux/key.h b/include/linux/key.h
index b2bb01719561..303982a69933 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  *
- * See Documentation/keys.txt for information on keys/keyrings.
+ * See Documentation/security/keys.txt for information on keys/keyrings.
  */
 
 #ifndef _LINUX_KEY_H
diff --git a/kernel/cred.c b/kernel/cred.c
index 8093c16b84b1..004e3679624d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -1,4 +1,4 @@
-/* Task credentials management - see Documentation/credentials.txt
+/* Task credentials management - see Documentation/security/credentials.txt
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/scripts/selinux/README b/scripts/selinux/README
index a936315ba2c8..4d020ecb7524 100644
--- a/scripts/selinux/README
+++ b/scripts/selinux/README
@@ -1,2 +1,2 @@
-Please see Documentation/SELinux.txt for information on
+Please see Documentation/security/SELinux.txt for information on
 installing a dummy SELinux policy.
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index 06d764ccbbe5..94de6b4907c8 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -194,7 +194,7 @@ void aa_dfa_free_kref(struct kref *kref)
  * @flags: flags controlling what type of accept tables are acceptable
  *
  * Unpack a dfa that has been serialized.  To find information on the dfa
- * format look in Documentation/apparmor.txt
+ * format look in Documentation/security/apparmor.txt
  * Assumes the dfa @blob stream has been aligned on a 8 byte boundary
  *
  * Returns: an unpacked dfa ready for matching or ERR_PTR on failure
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index e33aaf7e5744..d6d9a57b5652 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -12,8 +12,8 @@
  * published by the Free Software Foundation, version 2 of the
  * License.
  *
- * AppArmor uses a serialized binary format for loading policy.
- * To find policy format documentation look in Documentation/apparmor.txt
+ * AppArmor uses a serialized binary format for loading policy. To find
+ * policy format documentation look in Documentation/security/apparmor.txt
  * All policy is validated before it is used.
  */
 
diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c
index 69907a58a683..b1cba5bf0a5e 100644
--- a/security/keys/encrypted.c
+++ b/security/keys/encrypted.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, version 2 of the License.
  *
- * See Documentation/keys-trusted-encrypted.txt
+ * See Documentation/security/keys-trusted-encrypted.txt
  */
 
 #include <linux/uaccess.h>
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index df3c0417ee40..d41cc153a313 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * See Documentation/keys-request-key.txt
+ * See Documentation/security/keys-request-key.txt
  */
 
 #include <linux/module.h>
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 68164031a74e..3c0cfdec6e37 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * See Documentation/keys-request-key.txt
+ * See Documentation/security/keys-request-key.txt
  */
 
 #include <linux/module.h>
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index c99b9368368c..0c33e2ea1f3c 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, version 2 of the License.
  *
- * See Documentation/keys-trusted-encrypted.txt
+ * See Documentation/security/keys-trusted-encrypted.txt
  */
 
 #include <linux/uaccess.h>
-- 
cgit v1.2.3


From 4539c24fe4f92c09ee668ef959d3e8180df619b9 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 May 2011 16:12:36 -0600
Subject: tty/serial: Add explicit PORT_TEGRA type

Tegra's UART is currently auto-detected as PORT_XSCALE due to register
bit UART_IER.UUE being writable. However, the Tegra documentation states
that this register bit is reserved. Hence, we should not program it.

Instead, the documentation specifies that the UART is 16550 compatible.
However, Tegra does need register bit UART_IER.RTOIE set, which is not
enabled by any 16550 port type. This was not noticed before, since
PORT_XSCALE enables CAP_UUE, which conflates both UUE and RTOIE bit
programming.

This change defines PORT_TEGRA that doesn't set UART_CAP_UUE, but does
set UART_CAP_RTOIE, which is a new capability indicating that the RTOIE
bit needs to be enabled.

Based-on-code-by: Laxman Dewangan <ldewangan@nvidia.com>
Cc: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c   | 14 ++++++++++++--
 drivers/tty/serial/8250.h   |  1 +
 include/linux/serial_core.h |  3 ++-
 include/linux/serial_reg.h  |  1 +
 4 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index 54482d724fee..a5e290de8c93 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -271,7 +271,7 @@ static const struct serial8250_config uart_config[] = {
 		.fifo_size	= 32,
 		.tx_loadsz	= 32,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_UUE,
+		.flags		= UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
 	},
 	[PORT_RM9000] = {
 		.name		= "RM9000",
@@ -301,6 +301,14 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
+	[PORT_TEGRA] = {
+		.name		= "Tegra",
+		.fifo_size	= 32,
+		.tx_loadsz	= 8,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
+				  UART_FCR_T_TRIG_01,
+		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
+	},
 };
 
 #if defined(CONFIG_MIPS_ALCHEMY)
@@ -2403,7 +2411,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 			UART_ENABLE_MS(&up->port, termios->c_cflag))
 		up->ier |= UART_IER_MSI;
 	if (up->capabilities & UART_CAP_UUE)
-		up->ier |= UART_IER_UUE | UART_IER_RTOIE;
+		up->ier |= UART_IER_UUE;
+	if (up->capabilities & UART_CAP_RTOIE)
+		up->ier |= UART_IER_RTOIE;
 
 	serial_out(up, UART_IER, up->ier);
 
diff --git a/drivers/tty/serial/8250.h b/drivers/tty/serial/8250.h
index d13b586c0f72..6edf4a6a22d4 100644
--- a/drivers/tty/serial/8250.h
+++ b/drivers/tty/serial/8250.h
@@ -42,6 +42,7 @@ struct serial8250_config {
 #define UART_CAP_SLEEP	(1 << 10)	/* UART has IER sleep */
 #define UART_CAP_AFE	(1 << 11)	/* MCR-based hw flow control */
 #define UART_CAP_UUE	(1 << 12)	/* UART needs IER bit 6 set (Xscale) */
+#define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
 
 #define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 95d479ba514e..a5c31146a337 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -45,7 +45,8 @@
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
 #define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
 #define PORT_U6_16550A	19	/* ST-Ericsson U6xxx internal UART */
-#define PORT_MAX_8250	19	/* max port ID */
+#define PORT_TEGRA	20	/* NVIDIA Tegra internal UART */
+#define PORT_MAX_8250	20	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 3ecb71a9e505..5f66e8499fb9 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -57,6 +57,7 @@
  * ST16C654:	 8  16  56  60		 8  16  32  56	PORT_16654
  * TI16C750:	 1  16  32  56		xx  xx  xx  xx	PORT_16750
  * TI16C752:	 8  16  56  60		 8  16  32  56
+ * Tegra:	 1   4   8  14		16   8   4   1	PORT_TEGRA
  */
 #define UART_FCR_R_TRIG_00	0x00
 #define UART_FCR_R_TRIG_01	0x40
-- 
cgit v1.2.3


From 5f873bae704cf8b7cbd64b5720912266286c9146 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 May 2011 16:12:37 -0600
Subject: tty/serial: Fix break handling for PORT_TEGRA

When a break is received, Tegra's UART apparently fills the FIFO with
0 bytes. These must be drained so that they aren't interpreted as actual
data received. This allows e.g. MAGIC_SYSRQ to work on Tegra's UARTs.

v2: Added FIXME comment to clear_rx_fifo

Originally-by: Laxman Dewangan <ldewangan@nvidia.com>
Cc: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c  | 28 ++++++++++++++++++++++++++++
 include/linux/serial_reg.h |  1 +
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index a5e290de8c93..b40f7b90c81d 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1433,6 +1433,27 @@ static void serial8250_enable_ms(struct uart_port *port)
 	serial_out(up, UART_IER, up->ier);
 }
 
+/*
+ * Clear the Tegra rx fifo after a break
+ *
+ * FIXME: This needs to become a port specific callback once we have a
+ * framework for this
+ */
+static void clear_rx_fifo(struct uart_8250_port *up)
+{
+	unsigned int status, tmout = 10000;
+	do {
+		status = serial_in(up, UART_LSR);
+		if (status & (UART_LSR_FIFOE | UART_LSR_BRK_ERROR_BITS))
+			status = serial_in(up, UART_RX);
+		else
+			break;
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while (1);
+}
+
 static void
 receive_chars(struct uart_8250_port *up, unsigned int *status)
 {
@@ -1467,6 +1488,13 @@ receive_chars(struct uart_8250_port *up, unsigned int *status)
 			if (lsr & UART_LSR_BI) {
 				lsr &= ~(UART_LSR_FE | UART_LSR_PE);
 				up->port.icount.brk++;
+				/*
+				 * If tegra port then clear the rx fifo to
+				 * accept another break/character.
+				 */
+				if (up->port.type == PORT_TEGRA)
+					clear_rx_fifo(up);
+
 				/*
 				 * We do the SysRQ and SAK checking
 				 * here because otherwise the break
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 5f66e8499fb9..c75bda37c18e 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -119,6 +119,7 @@
 #define UART_MCR_DTR		0x01 /* DTR complement */
 
 #define UART_LSR	5	/* In:  Line Status Register */
+#define UART_LSR_FIFOE		0x80 /* Fifo error */
 #define UART_LSR_TEMT		0x40 /* Transmitter empty */
 #define UART_LSR_THRE		0x20 /* Transmit-hold-register empty */
 #define UART_LSR_BI		0x10 /* Break interrupt indicator */
-- 
cgit v1.2.3


From 8a745f1f39b7a20047a362b67ce9151c07d14440 Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen@linux.intel.com>
Date: Fri, 4 Mar 2011 10:24:11 -0800
Subject: libata: Power off empty ports

Give users the option of completely powering off unoccupied
SATA ports using the existing min_power link_power_management_policy
option.  When the use selects this option on an empty port, we
will power the port off by setting DET to off.  For occupied ports,
behavior is unchanged.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/ata/libata-core.c | 10 ++++++++--
 drivers/ata/libata-eh.c   |  2 +-
 include/linux/libata.h    |  1 +
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 76c3c15cb1e6..736bee5dafeb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3619,8 +3619,14 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 		scontrol |= (0x2 << 8);
 		break;
 	case ATA_LPM_MIN_POWER:
-		/* no restrictions on LPM transitions */
-		scontrol &= ~(0x3 << 8);
+		if (ata_link_nr_enabled(link) > 0)
+			/* no restrictions on LPM transitions */
+			scontrol &= ~(0x3 << 8);
+		else {
+			/* empty port, power off */
+			scontrol &= ~0xf;
+			scontrol |= (0x1 << 2);
+		}
 		break;
 	default:
 		WARN_ON(1);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index dad9fd660f37..dfb6e9d3d759 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3423,7 +3423,7 @@ fail:
 	return rc;
 }
 
-static int ata_link_nr_enabled(struct ata_link *link)
+int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
 	int cnt = 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 04f32a3eb26b..5a9926b34072 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1151,6 +1151,7 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 		      ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
 		      ata_postreset_fn_t postreset);
 extern void ata_std_error_handler(struct ata_port *ap);
+extern int ata_link_nr_enabled(struct ata_link *link);
 
 /*
  * Base operations to inherit from and initializers for sht
-- 
cgit v1.2.3


From 1477fcc290b3d5c2614bde98bf3b1154c538860d Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 20 May 2011 11:11:53 +1000
Subject: signal.h need a definition of struct task_struct

This fixes these build errors on powerpc:

  In file included from arch/powerpc/mm/fault.c:18:
  include/linux/signal.h:239: error: 'struct task_struct' declared inside parameter list
  include/linux/signal.h:239: error: its scope is only this definition or declaration, which is probably not what you want
  include/linux/signal.h:240: error: 'struct task_struct' declared inside parameter list
  ..

Exposed by commit e66eed651fd1 ("list: remove prefetching from regular
list iterators"), which removed the include of <linux/prefetch.h> from
<linux/list.h>.

Without that, linux/signal.h no longer accidentally got the declaration
of 'struct task_struct'.

Fix by properly declaring the struct, rather than introducing any new
header file dependency.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fcd2b14b1932..29a68ac7af83 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+struct task_struct;
+
 /* for sysctl */
 extern int print_fatal_signals;
 /*
-- 
cgit v1.2.3


From 4800a5bb13c09a572f7c74662a77c9eca229eba1 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 17 May 2011 14:41:06 -0400
Subject: include/linux/compat.h: coding-style fixes

I touched this file when adding support for the "tilegx" sub-architecture,
and Andrew Morton observed "The file's a mismash of old-style, wrong-style
and right-style.  There's no point in doing mishmash preservation!
May as well fix things up when we touch them."

Accordingly, this change makes <linux/compat.h> as checkpatch-clean
as possible.  It makes no semantic changes whatsoever.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compat.h | 74 ++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index e94184834b71..644e1a4692b1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -27,7 +27,7 @@ typedef __compat_gid32_t	compat_gid_t;
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_itimerspec { 
+struct compat_itimerspec {
 	struct compat_timespec it_interval;
 	struct compat_timespec it_value;
 };
@@ -71,9 +71,9 @@ struct compat_timex {
 	compat_long_t stbcnt;
 	compat_int_t tai;
 
-	compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32;
-	compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32;
-	compat_int_t :32; compat_int_t :32; compat_int_t :32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
+	compat_int_t:32; compat_int_t:32; compat_int_t:32;
 };
 
 #define _COMPAT_NSIG_WORDS	(_COMPAT_NSIG / _COMPAT_NSIG_BPW)
@@ -82,8 +82,10 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
-extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
-extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);
+extern int get_compat_timespec(struct timespec *,
+			       const struct compat_timespec __user *);
+extern int put_compat_timespec(const struct timespec *,
+			       struct compat_timespec __user *);
 
 struct compat_iovec {
 	compat_uptr_t	iov_base;
@@ -114,7 +116,8 @@ struct compat_rusage {
 	compat_long_t	ru_nivcsw;
 };
 
-extern int put_compat_rusage(const struct rusage *, struct compat_rusage __user *);
+extern int put_compat_rusage(const struct rusage *,
+			     struct compat_rusage __user *);
 
 struct compat_siginfo;
 
@@ -167,8 +170,7 @@ struct compat_ifmap {
 	unsigned char port;
 };
 
-struct compat_if_settings
-{
+struct compat_if_settings {
 	unsigned int type;	/* Type of physical device or protocol */
 	unsigned int size;	/* Size of the data allocated by the caller */
 	compat_uptr_t ifs_ifsu;	/* union of pointers */
@@ -196,8 +198,8 @@ struct compat_ifreq {
 };
 
 struct compat_ifconf {
-        compat_int_t	ifc_len;                        /* size of buffer       */
-        compat_caddr_t  ifcbuf;
+	compat_int_t	ifc_len;                /* size of buffer */
+	compat_caddr_t  ifcbuf;
 };
 
 struct compat_robust_list {
@@ -256,8 +258,8 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
 
-int compat_do_execve(char * filename, compat_uptr_t __user *argv,
-	        compat_uptr_t __user *envp, struct pt_regs * regs);
+int compat_do_execve(char *filename, compat_uptr_t __user *argv,
+		     compat_uptr_t __user *envp, struct pt_regs *regs);
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
@@ -346,14 +348,16 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 
 asmlinkage long compat_sys_utime(const char __user *filename,
 				 struct compat_utimbuf __user *t);
-asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename,
-				struct compat_timespec __user *t, int flags);
+asmlinkage long compat_sys_utimensat(unsigned int dfd,
+				     const char __user *filename,
+				     struct compat_timespec __user *t,
+				     int flags);
 
 asmlinkage long compat_sys_time(compat_time_t __user *tloc);
 asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
 asmlinkage long compat_sys_signalfd(int ufd,
-				const compat_sigset_t __user *sigmask,
-                                compat_size_t sigsetsize);
+				    const compat_sigset_t __user *sigmask,
+				    compat_size_t sigsetsize);
 asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
 				   const struct compat_itimerspec __user *utmr,
 				   struct compat_itimerspec __user *otmr);
@@ -365,19 +369,21 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
 				      const int __user *nodes,
 				      int __user *status,
 				      int flags);
-asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename,
+asmlinkage long compat_sys_futimesat(unsigned int dfd,
+				     const char __user *filename,
 				     struct compat_timeval __user *t);
 asmlinkage long compat_sys_utimes(const char __user *filename,
 				  struct compat_timeval __user *t);
-asmlinkage long compat_sys_newstat(const char __user * filename,
+asmlinkage long compat_sys_newstat(const char __user *filename,
 				   struct compat_stat __user *statbuf);
-asmlinkage long compat_sys_newlstat(const char __user * filename,
+asmlinkage long compat_sys_newlstat(const char __user *filename,
 				    struct compat_stat __user *statbuf);
-asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user * filename,
+asmlinkage long compat_sys_newfstatat(unsigned int dfd,
+				      const char __user *filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
 asmlinkage long compat_sys_newfstat(unsigned int fd,
-				    struct compat_stat __user * statbuf);
+				    struct compat_stat __user *statbuf);
 asmlinkage long compat_sys_statfs(const char __user *pathname,
 				  struct compat_statfs __user *buf);
 asmlinkage long compat_sys_fstatfs(unsigned int fd,
@@ -399,10 +405,10 @@ asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id,
 					struct compat_timespec __user *timeout);
 asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr,
 				     u32 __user *iocb);
-asmlinkage long compat_sys_mount(const char __user * dev_name,
-				 const char __user * dir_name,
-				 const char __user * type, unsigned long flags,
-				 const void __user * data);
+asmlinkage long compat_sys_mount(const char __user *dev_name,
+				 const char __user *dir_name,
+				 const char __user *type, unsigned long flags,
+				 const void __user *data);
 asmlinkage long compat_sys_old_readdir(unsigned int fd,
 				       struct compat_old_linux_dirent __user *,
 				       unsigned int count);
@@ -410,7 +416,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 				    struct compat_linux_dirent __user *dirent,
 				    unsigned int count);
 asmlinkage long compat_sys_getdents64(unsigned int fd,
-				      struct linux_dirent64 __user * dirent,
+				      struct linux_dirent64 __user *dirent,
 				      unsigned int count);
 asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
 				    unsigned int nr_segs, unsigned int flags);
@@ -431,14 +437,15 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 				 struct compat_timespec __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
-#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
+#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && \
+	!defined(CONFIG_NFSD_DEPRECATED)
 union compat_nfsctl_res;
 struct compat_nfsctl_arg;
 asmlinkage long compat_sys_nfsservctl(int cmd,
 				      struct compat_nfsctl_arg __user *arg,
 				      union compat_nfsctl_res __user *res);
 #else
-long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
+asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
 #endif
 asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
@@ -479,8 +486,8 @@ asmlinkage long compat_sys_setitimer(int which,
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 				     struct compat_rlimit __user *rlim);
-asmlinkage long compat_sys_getrlimit (unsigned int resource,
-				      struct compat_rlimit __user *rlim);
+asmlinkage long compat_sys_getrlimit(unsigned int resource,
+				     struct compat_rlimit __user *rlim);
 asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
 asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
 				     unsigned int len,
@@ -507,7 +514,7 @@ asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
 asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
 					   struct compat_timespec __user *rqtp,
 					   struct compat_timespec __user *rmtp);
-asmlinkage long compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
 		struct compat_timespec __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
@@ -544,7 +551,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
 
 extern ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector, unsigned long nr_segs,
+		const struct compat_iovec __user *uvector,
+		unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
 		struct iovec **ret_pointer);
 
-- 
cgit v1.2.3


From 449f4544267e73d5db372971da63634707c32299 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 May 2011 12:24:16 +0000
Subject: macvlan: remove one synchronize_rcu() call

When one macvlan device is dismantled, we can avoid one
synchronize_rcu() call done after deletion from hash list, since caller
will perform a synchronize_net() call after its ndo_stop() call.

Add a new netdev->dismantle field to signal this dismantle intent.

Reduces RTNL hold time.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 9 +++++----
 include/linux/netdevice.h | 4 +++-
 net/core/dev.c            | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d7c0bc62da7f..07bcb8084d78 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -70,16 +70,17 @@ static void macvlan_hash_add(struct macvlan_dev *vlan)
 	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[addr[5]]);
 }
 
-static void macvlan_hash_del(struct macvlan_dev *vlan)
+static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync)
 {
 	hlist_del_rcu(&vlan->hlist);
-	synchronize_rcu();
+	if (sync)
+		synchronize_rcu();
 }
 
 static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 					const unsigned char *addr)
 {
-	macvlan_hash_del(vlan);
+	macvlan_hash_del(vlan, true);
 	/* Now that we are unhashed it is safe to change the device
 	 * address without confusing packet delivery.
 	 */
@@ -345,7 +346,7 @@ static int macvlan_stop(struct net_device *dev)
 	dev_uc_del(lowerdev, dev->dev_addr);
 
 hash_del:
-	macvlan_hash_del(vlan);
+	macvlan_hash_del(vlan, !dev->dismantle);
 	return 0;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a134d809125b..ca333e79e10f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1293,7 +1293,9 @@ struct net_device {
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
 	       NETREG_DUMMY,		/* dummy device for NAPI poll */
-	} reg_state:16;
+	} reg_state:8;
+
+	bool dismantle; /* device is going do be freed */
 
 	enum {
 		RTNL_LINK_INITIALIZED,
diff --git a/net/core/dev.c b/net/core/dev.c
index 155de2094e71..d94537914a71 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5126,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head)
 			list_del(&dev->unreg_list);
 			continue;
 		}
-
+		dev->dismantle = true;
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
 	}
 
-- 
cgit v1.2.3


From 924e7a9fc6da124588e27c611841d07047c157b4 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 20 May 2011 00:40:15 -0600
Subject: basic_mmio_gpio: allow overriding number of gpio

Some platforms may have a number of GPIO that is less than the register
width of the peripheral.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/gpio/basic_mmio_gpio.c  | 18 ++++++++++++------
 include/linux/basic_mmio_gpio.h |  1 +
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/basic_mmio_gpio.c b/drivers/gpio/basic_mmio_gpio.c
index 5db5de438324..2b2d38454463 100644
--- a/drivers/gpio/basic_mmio_gpio.c
+++ b/drivers/gpio/basic_mmio_gpio.c
@@ -239,6 +239,7 @@ static int __devinit bgpio_probe(struct platform_device *pdev)
 	resource_size_t dat_sz;
 	int bits;
 	int ret;
+	int ngpio;
 
 	res_dat = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat");
 	if (!res_dat)
@@ -249,6 +250,7 @@ static int __devinit bgpio_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	bits = dat_sz * 8;
+	ngpio = bits;
 	if (bits > BITS_PER_LONG)
 		return -EINVAL;
 
@@ -277,13 +279,22 @@ static int __devinit bgpio_probe(struct platform_device *pdev)
 
 	spin_lock_init(&bgc->lock);
 
+	if (pdata) {
+		bgc->gc.base = pdata->base;
+		if (pdata->ngpio > 0)
+			ngpio = pdata->ngpio;
+	} else {
+		bgc->gc.base = -1;
+	}
+
 	bgc->bits = bits;
 	ret = bgpio_setup_accessors(pdev, bgc);
 	if (ret)
 		return ret;
 
 	bgc->data = bgc->read_reg(bgc->reg_dat);
-	bgc->gc.ngpio = bits;
+
+	bgc->gc.ngpio = ngpio;
 	bgc->gc.direction_input = bgpio_dir_in;
 	bgc->gc.direction_output = bgpio_dir_out;
 	bgc->gc.get = bgpio_get;
@@ -291,11 +302,6 @@ static int __devinit bgpio_probe(struct platform_device *pdev)
 	bgc->gc.dev = dev;
 	bgc->gc.label = dev_name(dev);
 
-	if (pdata)
-		bgc->gc.base = pdata->base;
-	else
-		bgc->gc.base = -1;
-
 	platform_set_drvdata(pdev, bgc);
 
 	ret = gpiochip_add(&bgc->gc);
diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index 198087a16fc4..f23ec73b944b 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -15,6 +15,7 @@
 
 struct bgpio_pdata {
 	int base;
+	int ngpio;
 };
 
 #endif /* __BASIC_MMIO_GPIO_H */
-- 
cgit v1.2.3


From 280df6b3c3ad777a91f1011cd98d50df891bfef8 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 20 May 2011 00:40:19 -0600
Subject: basic_mmio_gpio: split into a gpio library and platform device

Allow GPIO_BASIC_MMIO_CORE to be used to provide an accessor library
for implementing GPIO drivers whilst abstracting the register access
detail.  Based on a patch from Anton Vorontsov[1] and adapted to allow
bgpio_chip to be embedded in another structure.

Changes since v1:
	- Register the gpio_chip in the platform device probe

1. https://lkml.org/lkml/2011/4/19/401

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/gpio/Kconfig            |   6 +
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/basic_mmio_gpio.c  | 317 +++++++++++++++++++++-------------------
 include/linux/basic_mmio_gpio.h |  55 +++++++
 4 files changed, 231 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 44235bd4f9dd..b57ec09af891 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -70,8 +70,14 @@ config GPIO_MAX730X
 
 comment "Memory mapped GPIO drivers:"
 
+config GPIO_BASIC_MMIO_CORE
+	tristate
+	help
+	  Provides core functionality for basic memory-mapped GPIO controllers.
+
 config GPIO_BASIC_MMIO
 	tristate "Basic memory-mapped GPIO controllers support"
+	select GPIO_BASIC_MMIO_CORE
 	help
 	  Say yes here to support basic memory-mapped GPIO controllers.
 
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 7b2bdceca080..d92ce3a62ae5 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
 obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
+obj-$(CONFIG_GPIO_BASIC_MMIO_CORE)	+= basic_mmio_gpio.o
 obj-$(CONFIG_GPIO_BASIC_MMIO)	+= basic_mmio_gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX730X)	+= max730x.o
diff --git a/drivers/gpio/basic_mmio_gpio.c b/drivers/gpio/basic_mmio_gpio.c
index b2ec45ffe447..8152e9f516b0 100644
--- a/drivers/gpio/basic_mmio_gpio.c
+++ b/drivers/gpio/basic_mmio_gpio.c
@@ -45,6 +45,7 @@ o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
  */
 
 #include <linux/init.h>
+#include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -61,44 +62,6 @@ o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
 #include <linux/mod_devicetable.h>
 #include <linux/basic_mmio_gpio.h>
 
-struct bgpio_chip {
-	struct gpio_chip gc;
-
-	unsigned long (*read_reg)(void __iomem *reg);
-	void (*write_reg)(void __iomem *reg, unsigned long data);
-
-	void __iomem *reg_dat;
-	void __iomem *reg_set;
-	void __iomem *reg_clr;
-	void __iomem *reg_dir;
-
-	/* Number of bits (GPIOs): <register width> * 8. */
-	int bits;
-
-	/*
-	 * Some GPIO controllers work with the big-endian bits notation,
-	 * e.g. in a 8-bits register, GPIO7 is the least significant bit.
-	 */
-	unsigned long (*pin2mask)(struct bgpio_chip *bgc, unsigned int pin);
-
-	/*
-	 * Used to lock bgpio_chip->data. Also, this is needed to keep
-	 * shadowed and real data registers writes together.
-	 */
-	spinlock_t lock;
-
-	/* Shadowed data register to clear/set bits safely. */
-	unsigned long data;
-
-	/* Shadowed direction registers to clear/set direction safely. */
-	unsigned long dir;
-};
-
-static struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc)
-{
-	return container_of(gc, struct bgpio_chip, gc);
-}
-
 static void bgpio_write8(void __iomem *reg, unsigned long data)
 {
 	writeb(data, reg);
@@ -284,20 +247,10 @@ static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val)
 	return 0;
 }
 
-static void __iomem *bgpio_request_and_map(struct device *dev,
-					   struct resource *res)
-{
-	if (!devm_request_mem_region(dev, res->start, resource_size(res),
-				     res->name ?: "mmio_gpio"))
-		return NULL;
-
-	return devm_ioremap(dev, res->start, resource_size(res));
-}
-
-static int bgpio_setup_accessors(struct platform_device *pdev,
-				 struct bgpio_chip *bgc)
+static int bgpio_setup_accessors(struct device *dev,
+				 struct bgpio_chip *bgc,
+				 bool be)
 {
-	const struct platform_device_id *platid = platform_get_device_id(pdev);
 
 	switch (bgc->bits) {
 	case 8:
@@ -319,13 +272,11 @@ static int bgpio_setup_accessors(struct platform_device *pdev,
 		break;
 #endif /* BITS_PER_LONG >= 64 */
 	default:
-		dev_err(&pdev->dev, "unsupported data width %u bits\n",
-			bgc->bits);
+		dev_err(dev, "unsupported data width %u bits\n", bgc->bits);
 		return -EINVAL;
 	}
 
-	bgc->pin2mask = strcmp(platid->name, "basic-mmio-gpio-be") ?
-		bgpio_pin2mask : bgpio_pin2mask_be;
+	bgc->pin2mask = be ? bgpio_pin2mask_be : bgpio_pin2mask;
 
 	return 0;
 }
@@ -352,51 +303,22 @@ static int bgpio_setup_accessors(struct platform_device *pdev,
  *	- an input direction register (named "dirin") where a 1 bit indicates
  *	the GPIO is an input.
  */
-static int bgpio_setup_io(struct platform_device *pdev,
-			  struct bgpio_chip *bgc)
+static int bgpio_setup_io(struct bgpio_chip *bgc,
+			  void __iomem *dat,
+			  void __iomem *set,
+			  void __iomem *clr)
 {
-	struct resource *res_set;
-	struct resource *res_clr;
-	struct resource *res_dat;
-	resource_size_t dat_sz;
 
-	res_dat = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat");
-	if (!res_dat)
-		return -EINVAL;
-
-	dat_sz = resource_size(res_dat);
-	if (!is_power_of_2(dat_sz))
-		return -EINVAL;
-
-	bgc->bits = dat_sz * 8;
-	if (bgc->bits > BITS_PER_LONG)
-		return -EINVAL;
-
-	bgc->reg_dat = bgpio_request_and_map(&pdev->dev, res_dat);
+	bgc->reg_dat = dat;
 	if (!bgc->reg_dat)
-		return -ENOMEM;
-
-	res_set = platform_get_resource_byname(pdev, IORESOURCE_MEM, "set");
-	res_clr = platform_get_resource_byname(pdev, IORESOURCE_MEM, "clr");
-	if (res_set && res_clr) {
-		if (resource_size(res_set) != resource_size(res_clr) ||
-		    resource_size(res_set) != resource_size(res_dat))
-			return -EINVAL;
-
-		bgc->reg_set = bgpio_request_and_map(&pdev->dev, res_set);
-		bgc->reg_clr = bgpio_request_and_map(&pdev->dev, res_clr);
-		if (!bgc->reg_set || !bgc->reg_clr)
-			return -ENOMEM;
+		return -EINVAL;
 
+	if (set && clr) {
+		bgc->reg_set = set;
+		bgc->reg_clr = clr;
 		bgc->gc.set = bgpio_set_with_clear;
-	} else if (res_set && !res_clr) {
-		if (resource_size(res_set) != resource_size(res_dat))
-			return -EINVAL;
-
-		bgc->reg_set = bgpio_request_and_map(&pdev->dev, res_set);
-		if (!bgc->reg_set)
-			return -ENOMEM;
-
+	} else if (set && !clr) {
+		bgc->reg_set = set;
 		bgc->gc.set = bgpio_set_set;
 	} else {
 		bgc->gc.set = bgpio_set;
@@ -407,27 +329,18 @@ static int bgpio_setup_io(struct platform_device *pdev,
 	return 0;
 }
 
-static int bgpio_setup_direction(struct platform_device *pdev,
-				 struct bgpio_chip *bgc)
+static int bgpio_setup_direction(struct bgpio_chip *bgc,
+				 void __iomem *dirout,
+				 void __iomem *dirin)
 {
-	struct resource *res_dirout;
-	struct resource *res_dirin;
-
-	res_dirout = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						  "dirout");
-	res_dirin = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dirin");
-	if (res_dirout && res_dirin) {
+	if (dirout && dirin) {
 		return -EINVAL;
-	} else if (res_dirout) {
-		bgc->reg_dir = bgpio_request_and_map(&pdev->dev, res_dirout);
-		if (!bgc->reg_dir)
-			return -ENOMEM;
+	} else if (dirout) {
+		bgc->reg_dir = dirout;
 		bgc->gc.direction_output = bgpio_dir_out;
 		bgc->gc.direction_input = bgpio_dir_in;
-	} else if (res_dirin) {
-		bgc->reg_dir = bgpio_request_and_map(&pdev->dev, res_dirin);
-		if (!bgc->reg_dir)
-			return -ENOMEM;
+	} else if (dirin) {
+		bgc->reg_dir = dirin;
 		bgc->gc.direction_output = bgpio_dir_out_inv;
 		bgc->gc.direction_input = bgpio_dir_in_inv;
 	} else {
@@ -438,60 +351,166 @@ static int bgpio_setup_direction(struct platform_device *pdev,
 	return 0;
 }
 
-static int __devinit bgpio_probe(struct platform_device *pdev)
+int __devexit bgpio_remove(struct bgpio_chip *bgc)
+{
+	int err = gpiochip_remove(&bgc->gc);
+
+	kfree(bgc);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(bgpio_remove);
+
+int __devinit bgpio_init(struct bgpio_chip *bgc,
+			 struct device *dev,
+			 unsigned long sz,
+			 void __iomem *dat,
+			 void __iomem *set,
+			 void __iomem *clr,
+			 void __iomem *dirout,
+			 void __iomem *dirin,
+			 bool big_endian)
 {
-	struct device *dev = &pdev->dev;
-	struct bgpio_pdata *pdata = dev_get_platdata(dev);
-	struct bgpio_chip *bgc;
 	int ret;
-	int ngpio;
 
-	bgc = devm_kzalloc(dev, sizeof(*bgc), GFP_KERNEL);
-	if (!bgc)
-		return -ENOMEM;
+	if (!is_power_of_2(sz))
+		return -EINVAL;
 
-	ret = bgpio_setup_io(pdev, bgc);
+	bgc->bits = sz * 8;
+	if (bgc->bits > BITS_PER_LONG)
+		return -EINVAL;
+
+	spin_lock_init(&bgc->lock);
+	bgc->gc.dev = dev;
+	bgc->gc.label = dev_name(dev);
+	bgc->gc.base = -1;
+	bgc->gc.ngpio = bgc->bits;
+
+	ret = bgpio_setup_io(bgc, dat, set, clr);
 	if (ret)
 		return ret;
 
-	ngpio = bgc->bits;
-	if (pdata) {
-		bgc->gc.base = pdata->base;
-		if (pdata->ngpio > 0)
-			ngpio = pdata->ngpio;
-	} else {
-		bgc->gc.base = -1;
-	}
-
-	ret = bgpio_setup_accessors(pdev, bgc);
+	ret = bgpio_setup_accessors(dev, bgc, big_endian);
 	if (ret)
 		return ret;
 
-	spin_lock_init(&bgc->lock);
-	ret = bgpio_setup_direction(pdev, bgc);
+	ret = bgpio_setup_direction(bgc, dirout, dirin);
 	if (ret)
 		return ret;
 
 	bgc->data = bgc->read_reg(bgc->reg_dat);
 
-	bgc->gc.ngpio = ngpio;
-	bgc->gc.dev = dev;
-	bgc->gc.label = dev_name(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bgpio_init);
 
-	platform_set_drvdata(pdev, bgc);
+#ifdef CONFIG_GPIO_BASIC_MMIO
 
-	ret = gpiochip_add(&bgc->gc);
-	if (ret)
-		dev_err(dev, "gpiochip_add() failed: %d\n", ret);
+static void __iomem *bgpio_map(struct platform_device *pdev,
+			       const char *name,
+			       resource_size_t sane_sz,
+			       int *err)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	resource_size_t start;
+	resource_size_t sz;
+	void __iomem *ret;
+
+	*err = 0;
+
+	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	if (!r)
+		return NULL;
+
+	sz = resource_size(r);
+	if (sz != sane_sz) {
+		*err = -EINVAL;
+		return NULL;
+	}
+
+	start = r->start;
+	if (!devm_request_mem_region(dev, start, sz, r->name)) {
+		*err = -EBUSY;
+		return NULL;
+	}
+
+	ret = devm_ioremap(dev, start, sz);
+	if (!ret) {
+		*err = -ENOMEM;
+		return NULL;
+	}
 
 	return ret;
 }
 
-static int __devexit bgpio_remove(struct platform_device *pdev)
+static int __devinit bgpio_pdev_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	void __iomem *dat;
+	void __iomem *set;
+	void __iomem *clr;
+	void __iomem *dirout;
+	void __iomem *dirin;
+	unsigned long sz;
+	bool be;
+	int err;
+	struct bgpio_chip *bgc;
+	struct bgpio_pdata *pdata = dev_get_platdata(dev);
+
+	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat");
+	if (!r)
+		return -EINVAL;
+
+	sz = resource_size(r);
+
+	dat = bgpio_map(pdev, "dat", sz, &err);
+	if (!dat)
+		return err ? err : -EINVAL;
+
+	set = bgpio_map(pdev, "set", sz, &err);
+	if (err)
+		return err;
+
+	clr = bgpio_map(pdev, "clr", sz, &err);
+	if (err)
+		return err;
+
+	dirout = bgpio_map(pdev, "dirout", sz, &err);
+	if (err)
+		return err;
+
+	dirin = bgpio_map(pdev, "dirin", sz, &err);
+	if (err)
+		return err;
+
+	be = !strcmp(platform_get_device_id(pdev)->name, "basic-mmio-gpio-be");
+
+	bgc = devm_kzalloc(&pdev->dev, sizeof(*bgc), GFP_KERNEL);
+	if (!bgc)
+		return -ENOMEM;
+
+	err = bgpio_init(bgc, dev, sz, dat, set, clr, dirout, dirin, be);
+	if (err)
+		return err;
+
+	if (pdata) {
+		bgc->gc.base = pdata->base;
+		if (pdata->ngpio > 0)
+			bgc->gc.ngpio = pdata->ngpio;
+	}
+
+	platform_set_drvdata(pdev, bgc);
+
+	return gpiochip_add(&bgc->gc);
+}
+
+static int __devexit bgpio_pdev_remove(struct platform_device *pdev)
 {
 	struct bgpio_chip *bgc = platform_get_drvdata(pdev);
 
-	return gpiochip_remove(&bgc->gc);
+	return bgpio_remove(bgc);
 }
 
 static const struct platform_device_id bgpio_id_table[] = {
@@ -506,21 +525,23 @@ static struct platform_driver bgpio_driver = {
 		.name = "basic-mmio-gpio",
 	},
 	.id_table = bgpio_id_table,
-	.probe = bgpio_probe,
-	.remove = __devexit_p(bgpio_remove),
+	.probe = bgpio_pdev_probe,
+	.remove = __devexit_p(bgpio_pdev_remove),
 };
 
-static int __init bgpio_init(void)
+static int __init bgpio_platform_init(void)
 {
 	return platform_driver_register(&bgpio_driver);
 }
-module_init(bgpio_init);
+module_init(bgpio_platform_init);
 
-static void __exit bgpio_exit(void)
+static void __exit bgpio_platform_exit(void)
 {
 	platform_driver_unregister(&bgpio_driver);
 }
-module_exit(bgpio_exit);
+module_exit(bgpio_platform_exit);
+
+#endif /* CONFIG_GPIO_BASIC_MMIO */
 
 MODULE_DESCRIPTION("Driver for basic memory-mapped GPIO controllers");
 MODULE_AUTHOR("Anton Vorontsov <cbouatmailru@gmail.com>");
diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index f23ec73b944b..1ae12710d732 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -13,9 +13,64 @@
 #ifndef __BASIC_MMIO_GPIO_H
 #define __BASIC_MMIO_GPIO_H
 
+#include <linux/gpio.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+
 struct bgpio_pdata {
 	int base;
 	int ngpio;
 };
 
+struct device;
+
+struct bgpio_chip {
+	struct gpio_chip gc;
+
+	unsigned long (*read_reg)(void __iomem *reg);
+	void (*write_reg)(void __iomem *reg, unsigned long data);
+
+	void __iomem *reg_dat;
+	void __iomem *reg_set;
+	void __iomem *reg_clr;
+	void __iomem *reg_dir;
+
+	/* Number of bits (GPIOs): <register width> * 8. */
+	int bits;
+
+	/*
+	 * Some GPIO controllers work with the big-endian bits notation,
+	 * e.g. in a 8-bits register, GPIO7 is the least significant bit.
+	 */
+	unsigned long (*pin2mask)(struct bgpio_chip *bgc, unsigned int pin);
+
+	/*
+	 * Used to lock bgpio_chip->data. Also, this is needed to keep
+	 * shadowed and real data registers writes together.
+	 */
+	spinlock_t lock;
+
+	/* Shadowed data register to clear/set bits safely. */
+	unsigned long data;
+
+	/* Shadowed direction registers to clear/set direction safely. */
+	unsigned long dir;
+};
+
+static inline struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc)
+{
+	return container_of(gc, struct bgpio_chip, gc);
+}
+
+int __devexit bgpio_remove(struct bgpio_chip *bgc);
+int __devinit bgpio_init(struct bgpio_chip *bgc,
+			 struct device *dev,
+			 unsigned long sz,
+			 void __iomem *dat,
+			 void __iomem *set,
+			 void __iomem *clr,
+			 void __iomem *dirout,
+			 void __iomem *dirin,
+			 bool big_endian);
+
 #endif /* __BASIC_MMIO_GPIO_H */
-- 
cgit v1.2.3


From 0c4a1590189b426814748d2e03b95541852b3af6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 11 May 2011 00:09:30 +0200
Subject: spi: Use void pointers for data in simple SPI I/O operations

Currently the simple SPI I/O operations all take pointers to u8 * buffers
to operate on. This creates needless type compatibility issues and the
underlying spi_transfer structure uses void pointers anyway so convert the
API over to take void pointers too.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/spi.c       | 4 ++--
 include/linux/spi/spi.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 82b9a428c323..2e13a14bba3f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1047,8 +1047,8 @@ static u8	*buf;
  * spi_{async,sync}() calls with dma-safe buffers.
  */
 int spi_write_then_read(struct spi_device *spi,
-		const u8 *txbuf, unsigned n_tx,
-		u8 *rxbuf, unsigned n_rx)
+		const void *txbuf, unsigned n_tx,
+		void *rxbuf, unsigned n_rx)
 {
 	static DEFINE_MUTEX(lock);
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index b4d7710bc38d..bb4f5fbbbd8e 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -581,7 +581,7 @@ extern int spi_bus_unlock(struct spi_master *master);
  * Callable only from contexts that can sleep.
  */
 static inline int
-spi_write(struct spi_device *spi, const u8 *buf, size_t len)
+spi_write(struct spi_device *spi, const void *buf, size_t len)
 {
 	struct spi_transfer	t = {
 			.tx_buf		= buf,
@@ -605,7 +605,7 @@ spi_write(struct spi_device *spi, const u8 *buf, size_t len)
  * Callable only from contexts that can sleep.
  */
 static inline int
-spi_read(struct spi_device *spi, u8 *buf, size_t len)
+spi_read(struct spi_device *spi, void *buf, size_t len)
 {
 	struct spi_transfer	t = {
 			.rx_buf		= buf,
@@ -620,8 +620,8 @@ spi_read(struct spi_device *spi, u8 *buf, size_t len)
 
 /* this copies txbuf and rxbuf data; for small transfers only! */
 extern int spi_write_then_read(struct spi_device *spi,
-		const u8 *txbuf, unsigned n_tx,
-		u8 *rxbuf, unsigned n_rx);
+		const void *txbuf, unsigned n_tx,
+		void *rxbuf, unsigned n_rx);
 
 /**
  * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read
-- 
cgit v1.2.3


From 1399fa7807a1a5998bbf147e80668e9950661dfa Mon Sep 17 00:00:00 2001
From: Nikhil Rao <ncrao@google.com>
Date: Wed, 18 May 2011 10:09:39 -0700
Subject: sched: Introduce SCHED_POWER_SCALE to scale cpu_power calculations

SCHED_LOAD_SCALE is used to increase nice resolution and to
scale cpu_power calculations in the scheduler. This patch
introduces SCHED_POWER_SCALE and converts all uses of
SCHED_LOAD_SCALE for scaling cpu_power to use SCHED_POWER_SCALE
instead.

This is a preparatory patch for increasing the resolution of
SCHED_LOAD_SCALE, and there is no need to increase resolution
for cpu_power calculations.

Signed-off-by: Nikhil Rao <ncrao@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Stephan Barwolf <stephan.baerwolf@tu-ilmenau.de>
Cc: Mike Galbraith <efault@gmx.de>
Link: http://lkml.kernel.org/r/1305738580-9924-3-git-send-email-ncrao@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 13 ++++++++-----
 kernel/sched.c        |  4 ++--
 kernel/sched_fair.c   | 52 ++++++++++++++++++++++++++-------------------------
 3 files changed, 37 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12211e1666e2..f2f440221b70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -787,18 +787,21 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-
 /*
  * Increase resolution of nice-level calculations:
  */
 #define SCHED_LOAD_SHIFT	10
 #define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
 
-#define SCHED_LOAD_SCALE_FUZZ	SCHED_LOAD_SCALE
+/*
+ * Increase resolution of cpu_power calculations
+ */
+#define SCHED_POWER_SHIFT	10
+#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
 
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
 #ifdef CONFIG_SMP
 #define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
 #define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
diff --git a/kernel/sched.c b/kernel/sched.c
index d036048b59a4..375e9c677d58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6530,7 +6530,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->cpu_power != SCHED_LOAD_SCALE) {
+		if (group->cpu_power != SCHED_POWER_SCALE) {
 			printk(KERN_CONT " (cpu_power = %d)",
 				group->cpu_power);
 		}
@@ -7905,7 +7905,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_power = SCHED_LOAD_SCALE;
+		rq->cpu_power = SCHED_POWER_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37f22626225e..e32a9b70ee9c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1584,7 +1584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 
 		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -1722,7 +1722,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 				nr_running += cpu_rq(i)->cfs.nr_running;
 			}
 
-			capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+			capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
 
 			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
 				nr_running /= 2;
@@ -2570,7 +2570,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 
 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
 {
-	return SCHED_LOAD_SCALE;
+	return SCHED_POWER_SCALE;
 }
 
 unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
@@ -2607,10 +2607,10 @@ unsigned long scale_rt_power(int cpu)
 		available = total - rq->rt_avg;
 	}
 
-	if (unlikely((s64)total < SCHED_LOAD_SCALE))
-		total = SCHED_LOAD_SCALE;
+	if (unlikely((s64)total < SCHED_POWER_SCALE))
+		total = SCHED_POWER_SCALE;
 
-	total >>= SCHED_LOAD_SHIFT;
+	total >>= SCHED_POWER_SHIFT;
 
 	return div_u64(available, total);
 }
@@ -2618,7 +2618,7 @@ unsigned long scale_rt_power(int cpu)
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long power = SCHED_LOAD_SCALE;
+	unsigned long power = SCHED_POWER_SCALE;
 	struct sched_group *sdg = sd->groups;
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
@@ -2627,7 +2627,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		else
 			power *= default_scale_smt_power(sd, cpu);
 
-		power >>= SCHED_LOAD_SHIFT;
+		power >>= SCHED_POWER_SHIFT;
 	}
 
 	sdg->cpu_power_orig = power;
@@ -2637,10 +2637,10 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	else
 		power *= default_scale_freq_power(sd, cpu);
 
-	power >>= SCHED_LOAD_SHIFT;
+	power >>= SCHED_POWER_SHIFT;
 
 	power *= scale_rt_power(cpu);
-	power >>= SCHED_LOAD_SHIFT;
+	power >>= SCHED_POWER_SHIFT;
 
 	if (!power)
 		power = 1;
@@ -2682,7 +2682,7 @@ static inline int
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_LOAD_SCALE
+	 * Only siblings can have significantly less than SCHED_POWER_SCALE
 	 */
 	if (!(sd->flags & SD_SHARE_CPUPOWER))
 		return 0;
@@ -2770,7 +2770,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	}
 
 	/* Adjust by relative CPU power of the group */
-	sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
 
 	/*
 	 * Consider the group unbalanced when the imbalance is larger
@@ -2787,7 +2787,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
 		sgs->group_imb = 1;
 
-	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
+						SCHED_POWER_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
 	sgs->group_weight = group->group_weight;
@@ -2961,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
 		return 0;
 
 	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
-				       SCHED_LOAD_SCALE);
+				       SCHED_POWER_SCALE);
 	return 1;
 }
 
@@ -2990,7 +2991,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 			cpu_avg_load_per_task(this_cpu);
 
 	scaled_busy_load_per_task = sds->busiest_load_per_task
-						 * SCHED_LOAD_SCALE;
+					 * SCHED_POWER_SCALE;
 	scaled_busy_load_per_task /= sds->busiest->cpu_power;
 
 	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
@@ -3009,10 +3010,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 			min(sds->busiest_load_per_task, sds->max_load);
 	pwr_now += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load);
-	pwr_now /= SCHED_LOAD_SCALE;
+	pwr_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
-	tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
 		sds->busiest->cpu_power;
 	if (sds->max_load > tmp)
 		pwr_move += sds->busiest->cpu_power *
@@ -3020,15 +3021,15 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 
 	/* Amount of load we'd add */
 	if (sds->max_load * sds->busiest->cpu_power <
-		sds->busiest_load_per_task * SCHED_LOAD_SCALE)
+		sds->busiest_load_per_task * SCHED_POWER_SCALE)
 		tmp = (sds->max_load * sds->busiest->cpu_power) /
 			sds->this->cpu_power;
 	else
-		tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
 			sds->this->cpu_power;
 	pwr_move += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load + tmp);
-	pwr_move /= SCHED_LOAD_SCALE;
+	pwr_move /= SCHED_POWER_SCALE;
 
 	/* Move if we gain throughput */
 	if (pwr_move > pwr_now)
@@ -3070,7 +3071,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 		load_above_capacity = (sds->busiest_nr_running -
 						sds->busiest_group_capacity);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE);
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
 
 		load_above_capacity /= sds->busiest->cpu_power;
 	}
@@ -3090,7 +3091,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 	/* How much load to actually move to equalise the imbalance */
 	*imbalance = min(max_pull * sds->busiest->cpu_power,
 		(sds->avg_load - sds->this_load) * sds->this->cpu_power)
-			/ SCHED_LOAD_SCALE;
+			/ SCHED_POWER_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -3159,7 +3160,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -3238,7 +3239,8 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 
 	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long power = power_of(i);
-		unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+		unsigned long capacity = DIV_ROUND_CLOSEST(power,
+							   SCHED_POWER_SCALE);
 		unsigned long wl;
 
 		if (!capacity)
@@ -3263,7 +3265,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
 		 * the load can be moved away from the cpu that is potentially
 		 * running at a lower capacity.
 		 */
-		wl = (wl * SCHED_LOAD_SCALE) / power;
+		wl = (wl * SCHED_POWER_SCALE) / power;
 
 		if (wl > max_load) {
 			max_load = wl;
-- 
cgit v1.2.3


From c8b281161dfa4bb5d5be63fb036ce19347b88c63 Mon Sep 17 00:00:00 2001
From: Nikhil Rao <ncrao@google.com>
Date: Wed, 18 May 2011 14:37:48 -0700
Subject: sched: Increase SCHED_LOAD_SCALE resolution

Introduce SCHED_LOAD_RESOLUTION, which scales is added to
SCHED_LOAD_SHIFT and increases the resolution of
SCHED_LOAD_SCALE. This patch sets the value of
SCHED_LOAD_RESOLUTION to 10, scaling up the weights for all
sched entities by a factor of 1024. With this extra resolution,
we can handle deeper cgroup hiearchies and the scheduler can do
better shares distribution and load load balancing on larger
systems (especially for low weight task groups).

This does not change the existing user interface, the scaled
weights are only used internally. We do not modify
prio_to_weight values or inverses, but use the original weights
when calculating the inverse which is used to scale execution
time delta in calc_delta_mine(). This ensures we do not lose
accuracy when accounting time to the sched entities. Thanks to
Nikunj Dadhania for fixing an bug in c_d_m() that broken fairness.

Below is some analysis of the performance costs/improvements of
this patch.

1. Micro-arch performance costs:

Experiment was to run Ingo's pipe_test_100k 200 times with the
task pinned to one cpu. I measured instruction, cycles and
stalled-cycles for the runs. See:

   http://thread.gmane.org/gmane.linux.kernel/1129232/focus=1129389

for more info.

-tip (baseline):

 Performance counter stats for '/root/load-scale/pipe-test-100k' (200 runs):

       964,991,769 instructions             #    0.82  insns per cycle
                                            #    0.33  stalled cycles per insn
                                            #    ( +-  0.05% )
     1,171,186,635 cycles                   #    0.000 GHz                      ( +-  0.08% )
       306,373,664 stalled-cycles-backend   #   26.16% backend  cycles idle     ( +-  0.28% )
       314,933,621 stalled-cycles-frontend  #   26.89% frontend cycles idle     ( +-  0.34% )

        1.122405684  seconds time elapsed  ( +-  0.05% )

-tip+patches:

 Performance counter stats for './load-scale/pipe-test-100k' (200 runs):

       963,624,821 instructions             #    0.82  insns per cycle
                                            #    0.33  stalled cycles per insn
                                            #    ( +-  0.04% )
     1,175,215,649 cycles                   #    0.000 GHz                      ( +-  0.08% )
       315,321,126 stalled-cycles-backend   #   26.83% backend  cycles idle     ( +-  0.28% )
       316,835,873 stalled-cycles-frontend  #   26.96% frontend cycles idle     ( +-  0.29% )

        1.122238659  seconds time elapsed  ( +-  0.06% )

With this patch, instructions decrease by ~0.10% and cycles
increase by 0.27%. This doesn't look statistically significant.
The number of stalled cycles in the backend increased from
26.16% to 26.83%. This can be attributed to the shifts we do in
c_d_m() and other places. The fraction of stalled cycles in the
frontend remains about the same, at 26.96% compared to 26.89% in -tip.

2. Balancing low-weight task groups

Test setup: run 50 tasks with random sleep/busy times (biased
around 100ms) in a low weight container (with cpu.shares = 2).
Measure %idle as reported by mpstat over a 10s window.

-tip (baseline):

06:47:48 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle    intr/s
06:47:49 PM  all   94.32    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.62  15888.00
06:47:50 PM  all   94.57    0.00    0.62    0.00    0.00    0.00    0.00    0.00    4.81  16180.00
06:47:51 PM  all   94.69    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.25  15966.00
06:47:52 PM  all   95.81    0.00    0.00    0.00    0.00    0.00    0.00    0.00    4.19  16053.00
06:47:53 PM  all   94.88    0.06    0.00    0.00    0.00    0.00    0.00    0.00    5.06  15984.00
06:47:54 PM  all   93.31    0.00    0.00    0.00    0.00    0.00    0.00    0.00    6.69  15806.00
06:47:55 PM  all   94.19    0.00    0.06    0.00    0.00    0.00    0.00    0.00    5.75  15896.00
06:47:56 PM  all   92.87    0.00    0.00    0.00    0.00    0.00    0.00    0.00    7.13  15716.00
06:47:57 PM  all   94.88    0.00    0.00    0.00    0.00    0.00    0.00    0.00    5.12  15982.00
06:47:58 PM  all   95.44    0.00    0.00    0.00    0.00    0.00    0.00    0.00    4.56  16075.00
Average:     all   94.49    0.01    0.08    0.00    0.00    0.00    0.00    0.00    5.42  15954.60

-tip+patches:

06:47:03 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle    intr/s
06:47:04 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16630.00
06:47:05 PM  all   99.69    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.31  16580.20
06:47:06 PM  all   99.69    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.25  16596.00
06:47:07 PM  all   99.20    0.00    0.74    0.00    0.00    0.06    0.00    0.00    0.00  17838.61
06:47:08 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16540.00
06:47:09 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16575.00
06:47:10 PM  all  100.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  16614.00
06:47:11 PM  all   99.94    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.06  16588.00
06:47:12 PM  all   99.94    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.00  16593.00
06:47:13 PM  all   99.94    0.00    0.06    0.00    0.00    0.00    0.00    0.00    0.00  16551.00
Average:     all   99.84    0.00    0.09    0.00    0.00    0.01    0.00    0.00    0.06  16711.58

We see an improvement in idle% on the system (drops from 5.42% on -tip to 0.06%
with the patches).

We see an improvement in idle% on the system (drops from 5.42%
on -tip to 0.06% with the patches).

Signed-off-by: Nikhil Rao <ncrao@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Stephan Barwolf <stephan.baerwolf@tu-ilmenau.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1305754668-18792-1-git-send-email-ncrao@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 23 +++++++++++++++++++++--
 kernel/sched.c        | 28 ++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f440221b70..c34a718e20dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -788,9 +788,28 @@ enum cpu_idle_type {
 };
 
 /*
- * Increase resolution of nice-level calculations:
+ * Increase resolution of nice-level calculations for 64-bit architectures.
+ * The extra resolution improves shares distribution and load balancing of
+ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
+ * hierarchies, especially on larger systems. This is not a user-visible change
+ * and does not change the user-interface for setting shares/weights.
+ *
+ * We increase resolution only if we have enough bits to allow this increased
+ * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
+ * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
+ * increased costs.
  */
-#define SCHED_LOAD_SHIFT	10
+#if BITS_PER_LONG > 32
+# define SCHED_LOAD_RESOLUTION	10
+# define scale_load(w)		((w) << SCHED_LOAD_RESOLUTION)
+# define scale_load_down(w)	((w) >> SCHED_LOAD_RESOLUTION)
+#else
+# define SCHED_LOAD_RESOLUTION	0
+# define scale_load(w)		(w)
+# define scale_load_down(w)	(w)
+#endif
+
+#define SCHED_LOAD_SHIFT	(10 + SCHED_LOAD_RESOLUTION)
 #define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 375e9c677d58..bb504e1839e5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock);
  *  limitation from this.)
  */
 #define MIN_SHARES	2
-#define MAX_SHARES	(1UL << 18)
+#define MAX_SHARES	(1UL << (18 + SCHED_LOAD_RESOLUTION))
 
 static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
 #endif
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 {
 	u64 tmp;
 
-	tmp = (u64)delta_exec * weight;
+	/*
+	 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+	 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+	 * 2^SCHED_LOAD_RESOLUTION.
+	 */
+	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+		tmp = (u64)delta_exec * scale_load_down(weight);
+	else
+		tmp = (u64)delta_exec;
 
 	if (!lw->inv_weight) {
-		if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+		unsigned long w = scale_load_down(lw->weight);
+
+		if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
 			lw->inv_weight = 1;
+		else if (unlikely(!w))
+			lw->inv_weight = WMULT_CONST;
 		else
-			lw->inv_weight = WMULT_CONST / lw->weight;
+			lw->inv_weight = WMULT_CONST / w;
 	}
 
 	/*
@@ -1785,12 +1797,12 @@ static void set_load_weight(struct task_struct *p)
 	 * SCHED_IDLE tasks get minimal weight:
 	 */
 	if (p->policy == SCHED_IDLE) {
-		load->weight = WEIGHT_IDLEPRIO;
+		load->weight = scale_load(WEIGHT_IDLEPRIO);
 		load->inv_weight = WMULT_IDLEPRIO;
 		return;
 	}
 
-	load->weight = prio_to_weight[prio];
+	load->weight = scale_load(prio_to_weight[prio]);
 	load->inv_weight = prio_to_wmult[prio];
 }
 
@@ -8809,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
 				u64 shareval)
 {
-	return sched_group_set_shares(cgroup_tg(cgrp), shareval);
+	return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
 }
 
 static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
 
-	return (u64) tg->shares;
+	return (u64) scale_load_down(tg->shares);
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-- 
cgit v1.2.3


From 8bb36c2139f7bcea32a78472272f1d0de3b00f7b Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Thu, 7 Apr 2011 12:45:51 -0300
Subject: [media] Add Y10B, a 10 bpp bit-packed greyscale format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a 10 bits per pixel greyscale format in a bit-packed array representation,
naming it Y10B. Such pixel format is supplied for instance by the Kinect
sensor device.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Jean-François Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |  1 +
 Documentation/DocBook/v4l/pixfmt-y10b.xml | 43 +++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |  1 +
 Documentation/DocBook/v4l/videodev2.h.xml |  3 +++
 include/linux/videodev2.h                 |  3 +++
 5 files changed, 51 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-y10b.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index fea63b45471a..7a9570887d21 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -295,6 +295,7 @@
 <!ENTITY sub-srggb8 SYSTEM "v4l/pixfmt-srggb8.xml">
 <!ENTITY sub-y10 SYSTEM "v4l/pixfmt-y10.xml">
 <!ENTITY sub-y12 SYSTEM "v4l/pixfmt-y12.xml">
+<!ENTITY sub-y10b SYSTEM "v4l/pixfmt-y10b.xml">
 <!ENTITY sub-pixfmt SYSTEM "v4l/pixfmt.xml">
 <!ENTITY sub-cropcap SYSTEM "v4l/vidioc-cropcap.xml">
 <!ENTITY sub-dbg-g-register SYSTEM "v4l/vidioc-dbg-g-register.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-y10b.xml b/Documentation/DocBook/v4l/pixfmt-y10b.xml
new file mode 100644
index 000000000000..adb0ad808c93
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-y10b.xml
@@ -0,0 +1,43 @@
+<refentry id="V4L2-PIX-FMT-Y10BPACK">
+  <refmeta>
+    <refentrytitle>V4L2_PIX_FMT_Y10BPACK ('Y10B')</refentrytitle>
+    &manvol;
+  </refmeta>
+  <refnamediv>
+    <refname><constant>V4L2_PIX_FMT_Y10BPACK</constant></refname>
+    <refpurpose>Grey-scale image as a bit-packed array</refpurpose>
+  </refnamediv>
+  <refsect1>
+    <title>Description</title>
+
+    <para>This is a packed grey-scale image format with a depth of 10 bits per
+      pixel. Pixels are stored in a bit-packed array of 10bit bits per pixel,
+      with no padding between them and with the most significant bits coming
+      first from the left.</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_Y10BPACK</constant> 4 pixel data stream taking 5 bytes</title>
+
+      <formalpara>
+	<title>Bit-packed representation</title>
+	<para>pixels cross the byte boundary and have a ratio of 5 bytes for each 4
+          pixels.
+	  <informaltable frame="all">
+	    <tgroup cols="5" align="center">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>Y'<subscript>00[9:2]</subscript></entry>
+		  <entry>Y'<subscript>00[1:0]</subscript>Y'<subscript>01[9:4]</subscript></entry>
+		  <entry>Y'<subscript>01[3:0]</subscript>Y'<subscript>02[9:6]</subscript></entry>
+		  <entry>Y'<subscript>02[5:0]</subscript>Y'<subscript>03[9:8]</subscript></entry>
+		  <entry>Y'<subscript>03[7:0]</subscript></entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index 40af4beb48b9..3486a068fe46 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -697,6 +697,7 @@ information.</para>
     &sub-grey;
     &sub-y10;
     &sub-y12;
+    &sub-y10b;
     &sub-y16;
     &sub-yuyv;
     &sub-uyvy;
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 2b796a2ee98a..937acf54da9c 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -311,6 +311,9 @@ struct <link linkend="v4l2-pix-format">v4l2_pix_format</link> {
 #define <link linkend="V4L2-PIX-FMT-Y10">V4L2_PIX_FMT_Y10</link>     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define <link linkend="V4L2-PIX-FMT-Y16">V4L2_PIX_FMT_Y16</link>     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
+/* Grey bit-packed formats */
+#define <link linkend="V4L2-PIX-FMT-Y10BPACK">V4L2_PIX_FMT_Y10BPACK</link>    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
+
 /* Palette formats */
 #define <link linkend="V4L2-PIX-FMT-PAL8">V4L2_PIX_FMT_PAL8</link>    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index be82c8ead1af..a417270b337f 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -311,6 +311,9 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
+/* Grey bit-packed formats */
+#define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
+
 /* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
 
-- 
cgit v1.2.3


From 5f7088127e800df2cd5ff08140bdca087ab0fbac Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 21 Nov 2010 17:15:44 -0300
Subject: [media] uvcvideo: Make the API public

Move the public API definitions to include/linux/uvcvideo.h and bump the
version number to 1.1.0. Compatibility with the old API is kept,
application can still be compiled against the private header and will
not break.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/ioctl/ioctl-number.txt |  2 +-
 drivers/media/video/uvc/uvc_v4l2.c   | 13 +++----
 drivers/media/video/uvc/uvcvideo.h   | 68 ++++++++++++++++-------------------
 include/linux/Kbuild                 |  1 +
 include/linux/uvcvideo.h             | 69 ++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 45 deletions(-)
 create mode 100644 include/linux/uvcvideo.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index a0a5d82b6b0b..2d1ad12e2b3e 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -166,7 +166,6 @@ Code  Seq#(hex)	Include File		Comments
 'T'	all	arch/x86/include/asm/ioctls.h	conflict!
 'T'	C0-DF	linux/if_tun.h		conflict!
 'U'	all	sound/asound.h		conflict!
-'U'	00-0F	drivers/media/video/uvc/uvcvideo.h	conflict!
 'U'	00-CF	linux/uinput.h		conflict!
 'U'	00-EF	linux/usbdevice_fs.h
 'U'	C0-CF	drivers/bluetooth/hci_uart.h
@@ -259,6 +258,7 @@ Code  Seq#(hex)	Include File		Comments
 't'	80-8F	linux/isdn_ppp.h
 't'	90	linux/toshiba.h
 'u'	00-1F	linux/smb_fs.h		gone
+'u'	20-3F	linux/uvcvideo.h	USB video class host driver
 'v'	00-1F	linux/ext2_fs.h		conflict!
 'v'	00-1F	linux/fs.h		conflict!
 'v'	00-0F	linux/sonypi.h		conflict!
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 2e2a556d1666..6e8aad44c4bd 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -1036,24 +1036,25 @@ static long uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	 * UVCIOC_CTRL_GET and UVCIOC_CTRL_SET are deprecated and scheduled for
 	 * removal in 2.6.42.
 	 */
-	case UVCIOC_CTRL_ADD:
+	case __UVCIOC_CTRL_ADD:
 		uvc_v4l2_ioctl_warn();
 		return -EEXIST;
 
-	case UVCIOC_CTRL_MAP_OLD:
+	case __UVCIOC_CTRL_MAP_OLD:
 		uvc_v4l2_ioctl_warn();
+	case __UVCIOC_CTRL_MAP:
 	case UVCIOC_CTRL_MAP:
 		return uvc_ioctl_ctrl_map(chain, arg,
-					  cmd == UVCIOC_CTRL_MAP_OLD);
+					  cmd == __UVCIOC_CTRL_MAP_OLD);
 
-	case UVCIOC_CTRL_GET:
-	case UVCIOC_CTRL_SET:
+	case __UVCIOC_CTRL_GET:
+	case __UVCIOC_CTRL_SET:
 	{
 		struct uvc_xu_control *xctrl = arg;
 		struct uvc_xu_control_query xqry = {
 			.unit		= xctrl->unit,
 			.selector	= xctrl->selector,
-			.query		= cmd == UVCIOC_CTRL_GET
+			.query		= cmd == __UVCIOC_CTRL_GET
 					? UVC_GET_CUR : UVC_SET_CUR,
 			.size		= xctrl->size,
 			.data		= xctrl->data,
diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index cd58ea81320b..38dd4e18a2ca 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -4,6 +4,14 @@
 #include <linux/kernel.h>
 #include <linux/videodev2.h>
 
+#ifndef __KERNEL__
+/*
+ * This header provides binary compatibility with applications using the private
+ * uvcvideo API. This API is deprecated and will be removed in 2.6.42.
+ * Applications should be recompiled against the public linux/uvcvideo.h header.
+ */
+#warn "The uvcvideo.h header is deprecated, use linux/uvcvideo.h instead."
+
 /*
  * Dynamic controls
  */
@@ -17,23 +25,6 @@
 #define UVC_CTRL_DATA_TYPE_BITMASK	5
 
 /* Control flags */
-#define UVC_CTRL_FLAG_SET_CUR		(1 << 0)
-#define UVC_CTRL_FLAG_GET_CUR		(1 << 1)
-#define UVC_CTRL_FLAG_GET_MIN		(1 << 2)
-#define UVC_CTRL_FLAG_GET_MAX		(1 << 3)
-#define UVC_CTRL_FLAG_GET_RES		(1 << 4)
-#define UVC_CTRL_FLAG_GET_DEF		(1 << 5)
-/* Control should be saved at suspend and restored at resume. */
-#define UVC_CTRL_FLAG_RESTORE		(1 << 6)
-/* Control can be updated by the camera. */
-#define UVC_CTRL_FLAG_AUTO_UPDATE	(1 << 7)
-
-#define UVC_CTRL_FLAG_GET_RANGE \
-	(UVC_CTRL_FLAG_GET_CUR | UVC_CTRL_FLAG_GET_MIN | \
-	 UVC_CTRL_FLAG_GET_MAX | UVC_CTRL_FLAG_GET_RES | \
-	 UVC_CTRL_FLAG_GET_DEF)
-
-/* Old control flags, don't use */
 #define UVC_CONTROL_SET_CUR	(1 << 0)
 #define UVC_CONTROL_GET_CUR	(1 << 1)
 #define UVC_CONTROL_GET_MIN	(1 << 2)
@@ -47,23 +38,11 @@
 				 UVC_CONTROL_GET_MAX | UVC_CONTROL_GET_RES | \
 				 UVC_CONTROL_GET_DEF)
 
-struct uvc_xu_control_info {
-	__u8 entity[16];
-	__u8 index;
-	__u8 selector;
-	__u16 size;
-	__u32 flags;
-};
-
 struct uvc_menu_info {
 	__u32 value;
 	__u8 name[32];
 };
 
-struct uvc_xu_control_mapping_old {
-	__u8 reserved[64];
-};
-
 struct uvc_xu_control_mapping {
 	__u32 id;
 	__u8 name[32];
@@ -81,32 +60,46 @@ struct uvc_xu_control_mapping {
 	__u32 reserved[4];
 };
 
-struct uvc_xu_control {
-	__u8 unit;
+#endif
+
+struct uvc_xu_control_info {
+	__u8 entity[16];
+	__u8 index;
 	__u8 selector;
 	__u16 size;
-	__u8 __user *data;
+	__u32 flags;
 };
 
-struct uvc_xu_control_query {
+struct uvc_xu_control_mapping_old {
+	__u8 reserved[64];
+};
+
+struct uvc_xu_control {
 	__u8 unit;
 	__u8 selector;
-	__u8 query;
 	__u16 size;
 	__u8 __user *data;
 };
 
+#ifndef __KERNEL__
 #define UVCIOC_CTRL_ADD		_IOW('U', 1, struct uvc_xu_control_info)
 #define UVCIOC_CTRL_MAP_OLD	_IOWR('U', 2, struct uvc_xu_control_mapping_old)
 #define UVCIOC_CTRL_MAP		_IOWR('U', 2, struct uvc_xu_control_mapping)
 #define UVCIOC_CTRL_GET		_IOWR('U', 3, struct uvc_xu_control)
 #define UVCIOC_CTRL_SET		_IOW('U', 4, struct uvc_xu_control)
-#define UVCIOC_CTRL_QUERY	_IOWR('U', 5, struct uvc_xu_control_query)
+#else
+#define __UVCIOC_CTRL_ADD	_IOW('U', 1, struct uvc_xu_control_info)
+#define __UVCIOC_CTRL_MAP_OLD	_IOWR('U', 2, struct uvc_xu_control_mapping_old)
+#define __UVCIOC_CTRL_MAP	_IOWR('U', 2, struct uvc_xu_control_mapping)
+#define __UVCIOC_CTRL_GET	_IOWR('U', 3, struct uvc_xu_control)
+#define __UVCIOC_CTRL_SET	_IOW('U', 4, struct uvc_xu_control)
+#endif
 
 #ifdef __KERNEL__
 
 #include <linux/poll.h>
 #include <linux/usb/video.h>
+#include <linux/uvcvideo.h>
 
 /* --------------------------------------------------------------------------
  * UVC constants
@@ -184,8 +177,8 @@ struct uvc_xu_control_query {
  * Driver specific constants.
  */
 
-#define DRIVER_VERSION_NUMBER	KERNEL_VERSION(1, 0, 0)
-#define DRIVER_VERSION		"v1.0.0"
+#define DRIVER_VERSION_NUMBER	KERNEL_VERSION(1, 1, 0)
+#define DRIVER_VERSION		"v1.1.0"
 
 /* Number of isochronous URBs. */
 #define UVC_URBS		5
@@ -682,4 +675,3 @@ void uvc_video_decode_isight(struct urb *urb, struct uvc_streaming *stream,
 #endif /* __KERNEL__ */
 
 #endif
-
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..cb1ded2bd545 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -372,6 +372,7 @@ header-y += unistd.h
 header-y += usbdevice_fs.h
 header-y += utime.h
 header-y += utsname.h
+header-y += uvcvideo.h
 header-y += v4l2-mediabus.h
 header-y += v4l2-subdev.h
 header-y += veth.h
diff --git a/include/linux/uvcvideo.h b/include/linux/uvcvideo.h
new file mode 100644
index 000000000000..f46a53f060d7
--- /dev/null
+++ b/include/linux/uvcvideo.h
@@ -0,0 +1,69 @@
+#ifndef __LINUX_UVCVIDEO_H_
+#define __LINUX_UVCVIDEO_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * Dynamic controls
+ */
+
+/* Data types for UVC control data */
+#define UVC_CTRL_DATA_TYPE_RAW		0
+#define UVC_CTRL_DATA_TYPE_SIGNED	1
+#define UVC_CTRL_DATA_TYPE_UNSIGNED	2
+#define UVC_CTRL_DATA_TYPE_BOOLEAN	3
+#define UVC_CTRL_DATA_TYPE_ENUM		4
+#define UVC_CTRL_DATA_TYPE_BITMASK	5
+
+/* Control flags */
+#define UVC_CTRL_FLAG_SET_CUR		(1 << 0)
+#define UVC_CTRL_FLAG_GET_CUR		(1 << 1)
+#define UVC_CTRL_FLAG_GET_MIN		(1 << 2)
+#define UVC_CTRL_FLAG_GET_MAX		(1 << 3)
+#define UVC_CTRL_FLAG_GET_RES		(1 << 4)
+#define UVC_CTRL_FLAG_GET_DEF		(1 << 5)
+/* Control should be saved at suspend and restored at resume. */
+#define UVC_CTRL_FLAG_RESTORE		(1 << 6)
+/* Control can be updated by the camera. */
+#define UVC_CTRL_FLAG_AUTO_UPDATE	(1 << 7)
+
+#define UVC_CTRL_FLAG_GET_RANGE \
+	(UVC_CTRL_FLAG_GET_CUR | UVC_CTRL_FLAG_GET_MIN | \
+	 UVC_CTRL_FLAG_GET_MAX | UVC_CTRL_FLAG_GET_RES | \
+	 UVC_CTRL_FLAG_GET_DEF)
+
+struct uvc_menu_info {
+	__u32 value;
+	__u8 name[32];
+};
+
+struct uvc_xu_control_mapping {
+	__u32 id;
+	__u8 name[32];
+	__u8 entity[16];
+	__u8 selector;
+
+	__u8 size;
+	__u8 offset;
+	__u32 v4l2_type;
+	__u32 data_type;
+
+	struct uvc_menu_info __user *menu_info;
+	__u32 menu_count;
+
+	__u32 reserved[4];
+};
+
+struct uvc_xu_control_query {
+	__u8 unit;
+	__u8 selector;
+	__u8 query;
+	__u16 size;
+	__u8 __user *data;
+};
+
+#define UVCIOC_CTRL_MAP		_IOWR('u', 0x20, struct uvc_xu_control_mapping)
+#define UVCIOC_CTRL_QUERY	_IOWR('u', 0x21, struct uvc_xu_control_query)
+
+#endif
-- 
cgit v1.2.3


From 16846524afc200e795aaae659356001780fa91db Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Fri, 25 Mar 2011 12:09:43 -0300
Subject: [media] v4l: Add V4L2_MBUS_FMT_JPEG_1X8 media bus format

Add V4L2_MBUS_FMT_JPEG_1X8 format and the corresponding Docbook
documentation.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/subdev-formats.xml | 46 ++++++++++++++++++++++++++++
 include/linux/v4l2-mediabus.h                |  3 ++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml
index d7ccd25edcc1..a26b10c07857 100644
--- a/Documentation/DocBook/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/v4l/subdev-formats.xml
@@ -2522,5 +2522,51 @@
 	</tgroup>
       </table>
     </section>
+
+    <section>
+      <title>JPEG Compressed Formats</title>
+
+      <para>Those data formats consist of an ordered sequence of 8-bit bytes
+	obtained from JPEG compression process. Additionally to the
+	<constant>_JPEG</constant> prefix the format code is made of
+	the following information.
+	<itemizedlist>
+	  <listitem>The number of bus samples per entropy encoded byte.</listitem>
+	  <listitem>The bus width.</listitem>
+	</itemizedlist>
+
+	<para>For instance, for a JPEG baseline process and an 8-bit bus width
+	  the format will be named <constant>V4L2_MBUS_FMT_JPEG_1X8</constant>.
+	</para>
+      </para>
+
+      <para>The following table lists existing JPEG compressed formats.</para>
+
+      <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-jpeg">
+	<title>JPEG Formats</title>
+	<tgroup cols="3">
+	  <colspec colname="id" align="left" />
+	  <colspec colname="code" align="left"/>
+	  <colspec colname="remarks" align="left"/>
+	  <thead>
+	    <row>
+	      <entry>Identifier</entry>
+	      <entry>Code</entry>
+	      <entry>Remarks</entry>
+	    </row>
+	  </thead>
+	  <tbody valign="top">
+	    <row id="V4L2-MBUS-FMT-JPEG-1X8">
+	      <entry>V4L2_MBUS_FMT_JPEG_1X8</entry>
+	      <entry>0x4001</entry>
+	      <entry>Besides of its usage for the parallel bus this format is
+		recommended for transmission of JPEG data over MIPI CSI bus
+		using the User Defined 8-bit Data types.
+	      </entry>
+	    </row>
+	  </tbody>
+	</tgroup>
+      </table>
+    </section>
   </section>
 </section>
diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h
index de5c15921025..5ea7f753a348 100644
--- a/include/linux/v4l2-mediabus.h
+++ b/include/linux/v4l2-mediabus.h
@@ -89,6 +89,9 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_SGBRG12_1X12 = 0x3010,
 	V4L2_MBUS_FMT_SGRBG12_1X12 = 0x3011,
 	V4L2_MBUS_FMT_SRGGB12_1X12 = 0x3012,
+
+	/* JPEG compressed formats - next is 0x4002 */
+	V4L2_MBUS_FMT_JPEG_1X8 = 0x4001,
 };
 
 /**
-- 
cgit v1.2.3


From 0e59fd0592430cee4595c600427899a3404861e7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 11 May 2011 09:56:20 -0300
Subject: [media] v4l: Add M420 format definition

M420 is a hybrid YUV 4:2:0 packet/planar format. Two Y lines are
followed by an interleaved U/V line.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[laurent.pinchart@ideasonboard.com: split into v4l/uvcvideo patches]
[laurent.pinchart@ideasonboard.com: add documentation]
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media-entities.tmpl |   1 +
 Documentation/DocBook/v4l/pixfmt-m420.xml | 147 ++++++++++++++++++++++++++++++
 Documentation/DocBook/v4l/pixfmt.xml      |   1 +
 Documentation/DocBook/v4l/videodev2.h.xml |   1 +
 include/linux/videodev2.h                 |   1 +
 5 files changed, 151 insertions(+)
 create mode 100644 Documentation/DocBook/v4l/pixfmt-m420.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index 7a9570887d21..c8abb23ef1e7 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -270,6 +270,7 @@
 <!ENTITY sub-write SYSTEM "v4l/func-write.xml">
 <!ENTITY sub-io SYSTEM "v4l/io.xml">
 <!ENTITY sub-grey SYSTEM "v4l/pixfmt-grey.xml">
+<!ENTITY sub-m420 SYSTEM "v4l/pixfmt-m420.xml">
 <!ENTITY sub-nv12 SYSTEM "v4l/pixfmt-nv12.xml">
 <!ENTITY sub-nv12m SYSTEM "v4l/pixfmt-nv12m.xml">
 <!ENTITY sub-nv12mt SYSTEM "v4l/pixfmt-nv12mt.xml">
diff --git a/Documentation/DocBook/v4l/pixfmt-m420.xml b/Documentation/DocBook/v4l/pixfmt-m420.xml
new file mode 100644
index 000000000000..ce4bc019e5c0
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-m420.xml
@@ -0,0 +1,147 @@
+    <refentry id="V4L2-PIX-FMT-M420">
+      <refmeta>
+	<refentrytitle>V4L2_PIX_FMT_M420 ('M420')</refentrytitle>
+	&manvol;
+      </refmeta>
+      <refnamediv>
+	<refname><constant>V4L2_PIX_FMT_M420</constant></refname>
+	<refpurpose>Format with &frac12; horizontal and vertical chroma
+	resolution, also known as YUV 4:2:0. Hybrid plane line-interleaved
+	layout.</refpurpose>
+      </refnamediv>
+      <refsect1>
+	<title>Description</title>
+
+	<para>M420 is a YUV format with &frac12; horizontal and vertical chroma
+	subsampling (YUV 4:2:0). Pixels are organized as interleaved luma and
+	chroma planes. Two lines of luma data are followed by one line of chroma
+	data.</para>
+	<para>The luma plane has one byte per pixel. The chroma plane contains
+	interleaved CbCr pixels subsampled by &frac12; in the horizontal and
+	vertical directions. Each CbCr pair belongs to four pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>,
+Y'<subscript>10</subscript>, Y'<subscript>11</subscript>.</para>
+
+	<para>All line lengths are identical: if the Y lines include pad bytes
+	so do the CbCr lines.</para>
+
+	<example>
+	  <title><constant>V4L2_PIX_FMT_M420</constant> 4 &times; 4
+pixel image</title>
+
+	  <formalpara>
+	    <title>Byte Order.</title>
+	    <para>Each cell is one byte.
+		<informaltable frame="none">
+		<tgroup cols="5" align="center">
+		  <colspec align="left" colwidth="2*" />
+		  <tbody valign="top">
+		    <row>
+		      <entry>start&nbsp;+&nbsp;0:</entry>
+		      <entry>Y'<subscript>00</subscript></entry>
+		      <entry>Y'<subscript>01</subscript></entry>
+		      <entry>Y'<subscript>02</subscript></entry>
+		      <entry>Y'<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;4:</entry>
+		      <entry>Y'<subscript>10</subscript></entry>
+		      <entry>Y'<subscript>11</subscript></entry>
+		      <entry>Y'<subscript>12</subscript></entry>
+		      <entry>Y'<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;8:</entry>
+		      <entry>Cb<subscript>00</subscript></entry>
+		      <entry>Cr<subscript>00</subscript></entry>
+		      <entry>Cb<subscript>01</subscript></entry>
+		      <entry>Cr<subscript>01</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;16:</entry>
+		      <entry>Y'<subscript>20</subscript></entry>
+		      <entry>Y'<subscript>21</subscript></entry>
+		      <entry>Y'<subscript>22</subscript></entry>
+		      <entry>Y'<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;20:</entry>
+		      <entry>Y'<subscript>30</subscript></entry>
+		      <entry>Y'<subscript>31</subscript></entry>
+		      <entry>Y'<subscript>32</subscript></entry>
+		      <entry>Y'<subscript>33</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;24:</entry>
+		      <entry>Cb<subscript>10</subscript></entry>
+		      <entry>Cr<subscript>10</subscript></entry>
+		      <entry>Cb<subscript>11</subscript></entry>
+		      <entry>Cr<subscript>11</subscript></entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+
+	  <formalpara>
+	    <title>Color Sample Location.</title>
+	    <para>
+		<informaltable frame="none">
+		<tgroup cols="7" align="center">
+		  <tbody valign="top">
+		    <row>
+		      <entry></entry>
+		      <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+		      <entry>2</entry><entry></entry><entry>3</entry>
+		    </row>
+		    <row>
+		      <entry>0</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry><entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry>
+		    </row>
+		    <row>
+		      <entry>1</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		    </row>
+		    <row>
+		      <entry>2</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		    <row>
+		      <entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry><entry></entry>
+		      <entry></entry><entry>C</entry><entry></entry>
+		    </row>
+		    <row>
+		      <entry>3</entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+		      <entry>Y</entry><entry></entry><entry>Y</entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+	</example>
+      </refsect1>
+    </refentry>
+
+  <!--
+Local Variables:
+mode: sgml
+sgml-parent-document: "pixfmt.sgml"
+indent-tabs-mode: nil
+End:
+  -->
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index 3486a068fe46..dbfe3b08435f 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -713,6 +713,7 @@ information.</para>
     &sub-nv12m;
     &sub-nv12mt;
     &sub-nv16;
+    &sub-m420;
   </section>
 
   <section>
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 937acf54da9c..c50536a4f596 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -336,6 +336,7 @@ struct <link linkend="v4l2-pix-format">v4l2_pix_format</link> {
 #define <link linkend="V4L2-PIX-FMT-YUV420">V4L2_PIX_FMT_YUV420</link>  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
 #define <link linkend="V4L2-PIX-FMT-HI240">V4L2_PIX_FMT_HI240</link>   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define <link linkend="V4L2-PIX-FMT-HM12">V4L2_PIX_FMT_HM12</link>    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define <link linkend="V4L2-PIX-FMT-M420">V4L2_PIX_FMT_M420</link>    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define <link linkend="V4L2-PIX-FMT-NV12">V4L2_PIX_FMT_NV12</link>    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index a417270b337f..8a4c309d2344 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -336,6 +336,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
-- 
cgit v1.2.3


From b2cbae2c248776d81cc265ff7d48405b6a4cc463 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Fri, 20 May 2011 11:46:11 -0700
Subject: RDMA: Add netlink infrastructure

Add basic RDMA netlink infrastructure that allows for registration of
RDMA clients for which data is to be exported and supplies message
construction callbacks.

Signed-off-by: Nir Muchtar <nirm@voltaire.com>

[ Reorganize a few things, add CONFIG_NET dependency.  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/Kconfig        |   1 +
 drivers/infiniband/core/Makefile  |   2 +-
 drivers/infiniband/core/device.c  |  13 ++-
 drivers/infiniband/core/netlink.c | 190 ++++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/qib/Kconfig |   2 +-
 include/linux/netlink.h           |   1 +
 include/rdma/rdma_netlink.h       |  64 +++++++++++++
 7 files changed, 270 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/core/netlink.c
 create mode 100644 include/rdma/rdma_netlink.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6e35eccc9caa..0f9a84c1046a 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -2,6 +2,7 @@ menuconfig INFINIBAND
 	tristate "InfiniBand support"
 	depends on PCI || BROKEN
 	depends on HAS_IOMEM
+	depends on NET
 	---help---
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  any protocols you wish to use as well as drivers for your
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index cb1ab3ea4998..c8bbaef1becb 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 					$(user_access-y)
 
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
-				device.o fmr_pool.o cache.o
+				device.o fmr_pool.o cache.o netlink.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 46a7a3febc12..4007f721d25d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
+#include <rdma/rdma_netlink.h>
 
 #include "core_priv.h"
 
@@ -730,14 +731,23 @@ static int __init ib_core_init(void)
 		goto err;
 	}
 
+	ret = ibnl_init();
+	if (ret) {
+		printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+		goto err_sysfs;
+	}
+
 	ret = ib_cache_setup();
 	if (ret) {
 		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
-		goto err_sysfs;
+		goto err_nl;
 	}
 
 	return 0;
 
+err_nl:
+	ibnl_cleanup();
+
 err_sysfs:
 	ib_sysfs_cleanup();
 
@@ -749,6 +759,7 @@ err:
 static void __exit ib_core_cleanup(void)
 {
 	ib_cache_cleanup();
+	ibnl_cleanup();
 	ib_sysfs_cleanup();
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
new file mode 100644
index 000000000000..4a5abaf0a25c
--- /dev/null
+++ b/drivers/infiniband/core/netlink.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2010 Voltaire Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <rdma/rdma_netlink.h>
+
+struct ibnl_client {
+	struct list_head		list;
+	int				index;
+	int				nops;
+	const struct ibnl_client_cbs   *cb_table;
+};
+
+static DEFINE_MUTEX(ibnl_mutex);
+static struct sock *nls;
+static LIST_HEAD(client_list);
+
+int ibnl_add_client(int index, int nops,
+		    const struct ibnl_client_cbs cb_table[])
+{
+	struct ibnl_client *cur;
+	struct ibnl_client *nl_client;
+
+	nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
+	if (!nl_client)
+		return -ENOMEM;
+
+	nl_client->index	= index;
+	nl_client->nops		= nops;
+	nl_client->cb_table	= cb_table;
+
+	mutex_lock(&ibnl_mutex);
+
+	list_for_each_entry(cur, &client_list, list) {
+		if (cur->index == index) {
+			pr_warn("Client for %d already exists\n", index);
+			mutex_unlock(&ibnl_mutex);
+			kfree(nl_client);
+			return -EINVAL;
+		}
+	}
+
+	list_add_tail(&nl_client->list, &client_list);
+
+	mutex_unlock(&ibnl_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(ibnl_add_client);
+
+int ibnl_remove_client(int index)
+{
+	struct ibnl_client *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &client_list, list) {
+		if (cur->index == index) {
+			list_del(&(cur->list));
+			mutex_unlock(&ibnl_mutex);
+			kfree(cur);
+			return 0;
+		}
+	}
+	pr_warn("Can't remove callback for client idx %d. Not found\n", index);
+	mutex_unlock(&ibnl_mutex);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ibnl_remove_client);
+
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+		   int len, int client, int op)
+{
+	unsigned char *prev_tail;
+
+	prev_tail = skb_tail_pointer(skb);
+	*nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
+			len, NLM_F_MULTI);
+	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
+	return NLMSG_DATA(*nlh);
+
+nlmsg_failure:
+	nlmsg_trim(skb, prev_tail);
+	return NULL;
+}
+EXPORT_SYMBOL(ibnl_put_msg);
+
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+		  int len, void *data, int type)
+{
+	unsigned char *prev_tail;
+
+	prev_tail = skb_tail_pointer(skb);
+	NLA_PUT(skb, type, len, data);
+	nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(ibnl_put_attr);
+
+static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct ibnl_client *client;
+	int type = nlh->nlmsg_type;
+	int index = RDMA_NL_GET_CLIENT(type);
+	int op = RDMA_NL_GET_OP(type);
+
+	list_for_each_entry(client, &client_list, list) {
+		if (client->index == index) {
+			if (op < 0 || op >= client->nops ||
+			    !client->cb_table[RDMA_NL_GET_OP(op)].dump)
+				return -EINVAL;
+			return netlink_dump_start(nls, skb, nlh,
+						  client->cb_table[op].dump,
+						  NULL);
+		}
+	}
+
+	pr_info("Index %d wasn't found in client list\n", index);
+	return -EINVAL;
+}
+
+static void ibnl_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&ibnl_mutex);
+	netlink_rcv_skb(skb, &ibnl_rcv_msg);
+	mutex_unlock(&ibnl_mutex);
+}
+
+int __init ibnl_init(void)
+{
+	nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv,
+				    NULL, THIS_MODULE);
+	if (!nls) {
+		pr_warn("Failed to create netlink socket\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void ibnl_cleanup(void)
+{
+	struct ibnl_client *cur, *next;
+
+	mutex_lock(&ibnl_mutex);
+	list_for_each_entry_safe(cur, next, &client_list, list) {
+		list_del(&(cur->list));
+		kfree(cur);
+	}
+	mutex_unlock(&ibnl_mutex);
+
+	netlink_kernel_release(nls);
+}
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 7c03a70c55a2..8349f9c5064c 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_QIB
 	tristate "QLogic PCIe HCA support"
-	depends on 64BIT && NET
+	depends on 64BIT
 	---help---
 	This is a low-level driver for QLogic PCIe QLE InfiniBand host
 	channel adapters.  This driver does not support the QLogic
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 4c4ac3f3ce5a..a9dd89552f9c 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -24,6 +24,7 @@
 /* leave room for NETLINK_DM (DM Events) */
 #define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 #define NETLINK_ECRYPTFS	19
+#define NETLINK_RDMA		20
 
 #define MAX_LINKS 32		
 
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
new file mode 100644
index 000000000000..c983a193cecf
--- /dev/null
+++ b/include/rdma/rdma_netlink.h
@@ -0,0 +1,64 @@
+#ifndef _RDMA_NETLINK_H
+#define _RDMA_NETLINK_H
+
+#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
+#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
+#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
+
+#ifdef __KERNEL__
+
+#include <linux/netlink.h>
+
+struct ibnl_client_cbs {
+	int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
+};
+
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Add a a client to the list of IB netlink exporters.
+ * @index: Index of the added client
+ * @nops: Number of supported ops by the added client.
+ * @cb_table: A table for op->callback
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_add_client(int index, int nops,
+		    const struct ibnl_client_cbs cb_table[]);
+
+/**
+ * Remove a client from IB netlink.
+ * @index: Index of the removed IB client.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_remove_client(int index);
+
+/**
+ * Put a new message in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Pointer to put the header of the new netlink message.
+ * @seq: The message sequence number.
+ * @len: The requested message length to allocate.
+ * @client: Calling IB netlink client.
+ * @op: message content op.
+ * Returns the allocated buffer on success and NULL on failure.
+ */
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+		   int len, int client, int op);
+/**
+ * Put a new attribute in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Header of the netlink message to append the attribute to.
+ * @len: The length of the attribute data.
+ * @data: The attribute data to put.
+ * @type: The attribute type.
+ * Returns the 0 and a negative error code on failure.
+ */
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+		  int len, void *data, int type);
+
+#endif /* __KERNEL__ */
+
+#endif /* _RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 771949d03b4f5295f648f09141325fd478f6c7ce Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Fri, 20 May 2011 20:52:16 +0200
Subject: block: get rid of on-stack plugging debug checks

We don't need them anymore, so kill:

- REQ_ON_PLUG checks in various places
- !rq_mergeable() check in plug merging

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c          | 27 ---------------------------
 block/elevator.c          |  4 ----
 include/linux/blk_types.h |  2 --
 3 files changed, 33 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e8e297374b9..7369eeeafe23 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -569,8 +569,6 @@ int blk_get_queue(struct request_queue *q)
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
@@ -1110,14 +1108,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 {
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "back");
-		return false;
-	}
-
 	if (!ll_back_merge_fn(q, req, bio))
 		return false;
 
@@ -1141,14 +1131,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	sector_t sector;
 
-	/*
-	 * Debug stuff, kill later
-	 */
-	if (!rq_mergeable(req)) {
-		blk_dump_rq_flags(req, "front");
-		return false;
-	}
-
 	if (!ll_front_merge_fn(q, req, bio))
 		return false;
 
@@ -1258,14 +1240,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_back_merge(q, req, bio)) {
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
-		BUG_ON(req->cmd_flags & REQ_ON_PLUG);
 		if (bio_attempt_front_merge(q, req, bio)) {
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
@@ -1320,10 +1300,6 @@ get_rq:
 			if (__rq->q != q)
 				plug->should_sort = 1;
 		}
-		/*
-		 * Debug flag, kill later
-		 */
-		req->cmd_flags |= REQ_ON_PLUG;
 		list_add_tail(&req->queuelist, &plug->list);
 		drive_stat_acct(req, 1);
 	} else {
@@ -2749,7 +2725,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
-		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
 			/*
@@ -2761,8 +2736,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 			depth = 0;
 			spin_lock(q->queue_lock);
 		}
-		rq->cmd_flags &= ~REQ_ON_PLUG;
-
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
diff --git a/block/elevator.c b/block/elevator.c
index 2a0b653c90fd..b0b38ce0dcb6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -416,8 +416,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 	struct list_head *entry;
 	int stop_flags;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
 
@@ -656,8 +654,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 
 	rq->q = q;
 
-	BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
-
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {
 		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index be50d9e70a7d..2a7cea53ca0d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -151,7 +151,6 @@ enum rq_flag_bits {
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_SECURE,		/* secure discard (used with __REQ_DISCARD) */
-	__REQ_ON_PLUG,		/* on plug list */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -192,6 +191,5 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
-#define REQ_ON_PLUG		(1 << __REQ_ON_PLUG)
 
 #endif /* __LINUX_BLK_TYPES_H */
-- 
cgit v1.2.3


From d974d905cbfc1039a73ba0c7eea3f4d4e13c0624 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 20 May 2011 15:48:17 +1000
Subject: spinlock_up.h: include asm/processor.h in for cpu_relax

Commit e66eed651fd1 ("list: remove prefetching from regular list
iterators") removed the include of prefetch.h from list.h and this was a
path to including asm/processor.h.  We need to include it excplicitly
now.

Fixes this build error on sparc32 (at least):

  In file included from include/linux/seqlock.h:29,
                   from include/linux/time.h:8,
                   from include/linux/timex.h:56,
                   from include/linux/sched.h:57,
                   from arch/sparc/kernel/asm-offsets.c:13:
  include/linux/spinlock.h: In function 'spin_unlock_wait':
  include/linux/spinlock.h:360: error: implicit declaration of function 'cpu_relax

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spinlock_up.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index b14f6a91e19f..a26e2fb604e6 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -5,6 +5,8 @@
 # error "please don't include this file directly"
 #endif
 
+#include <asm/processor.h>	/* for cpu_relax() */
+
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
  *
-- 
cgit v1.2.3


From b7281ca2a4b00044c60c25059f467d05772cdbe3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 18 May 2011 10:51:48 +0100
Subject: ARM: 6904/1: MTD: Add integrator-flash feature to physmap

In the process of moving platforms away from integrator-flash
(aka armflash), add to physmap the few features that make
armflash unique:

- optionnal probing for the AFS partition type
- init() and exit() methods, used by Integrator to control
  write access to the various onboard programmable components

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mtd/maps/physmap.c  | 16 +++++++++++++++-
 include/linux/mtd/physmap.h |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 7522df4f71f1..49676b7a53a4 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -67,6 +67,10 @@ static int physmap_flash_remove(struct platform_device *dev)
 		if (info->mtd[i] != NULL)
 			map_destroy(info->mtd[i]);
 	}
+
+	if (physmap_data->exit)
+		physmap_data->exit(dev);
+
 	return 0;
 }
 
@@ -77,7 +81,11 @@ static const char *rom_probe_types[] = {
 					"map_rom",
 					NULL };
 #ifdef CONFIG_MTD_PARTITIONS
-static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
+static const char *part_probe_types[] = { "cmdlinepart", "RedBoot",
+#ifdef CONFIG_MTD_AFS_PARTS
+					  "afs",
+#endif
+					  NULL };
 #endif
 
 static int physmap_flash_probe(struct platform_device *dev)
@@ -100,6 +108,12 @@ static int physmap_flash_probe(struct platform_device *dev)
 		goto err_out;
 	}
 
+	if (physmap_data->init) {
+		err = physmap_data->init(dev);
+		if (err)
+			goto err_out;
+	}
+
 	platform_set_drvdata(dev, info);
 
 	for (i = 0; i < dev->num_resources; i++) {
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index bcfd9f777454..d37cca05e62c 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -22,6 +22,8 @@ struct map_info;
 
 struct physmap_flash_data {
 	unsigned int		width;
+	int			(*init)(struct platform_device *);
+	void			(*exit)(struct platform_device *);
 	void			(*set_vpp)(struct map_info *, int);
 	unsigned int		nr_parts;
 	unsigned int		pfow_base;
-- 
cgit v1.2.3


From 667f390bee987d45351402e42008c52cdfb77d76 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 18 May 2011 10:51:55 +0100
Subject: ARM: 6910/1: MTD: physmap: let set_vpp() pass a platform_device
 instead of a map_info

The set_vpp() method provided by physmap passes a map_info back to
the platform code, which has little relevance as far as the platform
is concerned (this parameter is completely unused).

Instead, pass the platform_device, which can be used in the pismo
driver to retrieve some important information in a nicer way, instead
of the hack that was in place.

The empty set_vpp function in board-at572d940hf_ek.c is left untouched,
as the board/SoC is scheduled for removal.

Cc: Andrew Victor <linux@maxim.org.za>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Philipp Zabel <philipp.zabel@gmail.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-integrator/integrator_ap.c |  2 +-
 arch/arm/mach-integrator/integrator_cp.c |  2 +-
 arch/arm/mach-omap1/flash.c              |  2 +-
 arch/arm/mach-pxa/hx4700.c               |  2 +-
 arch/arm/mach-pxa/magician.c             |  2 +-
 arch/arm/mach-realview/core.c            |  2 +-
 arch/arm/mach-s3c2410/nor-simtec.c       |  2 +-
 arch/arm/mach-versatile/core.c           |  2 +-
 arch/arm/mach-vexpress/v2m.c             |  2 +-
 arch/arm/plat-omap/include/plat/flash.h  |  2 +-
 drivers/mtd/maps/physmap.c               | 20 ++++++++++++----
 drivers/mtd/maps/pismo.c                 | 40 +++-----------------------------
 include/linux/mtd/physmap.h              |  2 +-
 13 files changed, 29 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 11bd49e8b663..2aa98ee41b8d 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -263,7 +263,7 @@ static void ap_flash_exit(struct platform_device *dev)
 	}
 }
 
-static void ap_flash_set_vpp(struct map_info *map, int on)
+static void ap_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	void __iomem *reg = on ? SC_CTRLS : SC_CTRLC;
 
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index ec9628fe7109..b676b41d70e2 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -259,7 +259,7 @@ static void intcp_flash_exit(struct platform_device *dev)
 	writel(val, INTCP_VA_CTRL_BASE + INTCP_FLASHPROG);
 }
 
-static void intcp_flash_set_vpp(struct map_info *map, int on)
+static void intcp_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-omap1/flash.c b/arch/arm/mach-omap1/flash.c
index acd161666408..1749cb37dda0 100644
--- a/arch/arm/mach-omap1/flash.c
+++ b/arch/arm/mach-omap1/flash.c
@@ -13,7 +13,7 @@
 #include <plat/tc.h>
 #include <plat/flash.h>
 
-void omap1_set_vpp(struct map_info *map, int enable)
+void omap1_set_vpp(struct platform_device *pdev, int enable)
 {
 	static int count;
 	u32 l;
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 9cdcca597924..f941a495a4a8 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -735,7 +735,7 @@ static struct platform_device bq24022 = {
  * StrataFlash
  */
 
-static void hx4700_set_vpp(struct map_info *map, int vpp)
+static void hx4700_set_vpp(struct platform_device *pdev, int vpp)
 {
 	gpio_set_value(GPIO91_HX4700_FLASH_VPEN, vpp);
 }
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 9984ef70bd79..e1920572948a 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -662,7 +662,7 @@ static struct pxaohci_platform_data magician_ohci_info = {
  * StrataFlash
  */
 
-static void magician_set_vpp(struct map_info *map, int vpp)
+static void magician_set_vpp(struct platform_device *pdev, int vpp)
 {
 	gpio_set_value(EGPIO_MAGICIAN_FLASH_VPP, vpp);
 }
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index d3f1dde70fc9..c8ec08886633 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -77,7 +77,7 @@ void __init realview_adjust_zones(unsigned long *size, unsigned long *hole)
 
 #define REALVIEW_FLASHCTRL    (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_FLASH_OFFSET)
 
-static void realview_flash_set_vpp(struct map_info *map, int on)
+static void realview_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-s3c2410/nor-simtec.c b/arch/arm/mach-s3c2410/nor-simtec.c
index 598d130633dc..ad9f750f1e55 100644
--- a/arch/arm/mach-s3c2410/nor-simtec.c
+++ b/arch/arm/mach-s3c2410/nor-simtec.c
@@ -32,7 +32,7 @@
 
 #include "nor-simtec.h"
 
-static void simtec_nor_vpp(struct map_info *map, int vpp)
+static void simtec_nor_vpp(struct platform_device *pdev, int vpp)
 {
 	unsigned int val;
 	unsigned long flags;
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 06f406ad8edd..335d8250e364 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -190,7 +190,7 @@ void __init versatile_map_io(void)
 
 #define VERSATILE_FLASHCTRL    (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_FLASH_OFFSET)
 
-static void versatile_flash_set_vpp(struct map_info *map, int on)
+static void versatile_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index e3268152c834..f860314ef7f6 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -206,7 +206,7 @@ static struct platform_device v2m_usb_device = {
 	.dev.platform_data = &v2m_usb_config,
 };
 
-static void v2m_flash_set_vpp(struct map_info *map, int on)
+static void v2m_flash_set_vpp(struct platform_device *pdev, int on)
 {
 	writel(on != 0, MMIO_P2V(V2M_SYS_FLASH));
 }
diff --git a/arch/arm/plat-omap/include/plat/flash.h b/arch/arm/plat-omap/include/plat/flash.h
index 3e6327016b40..3083195123ea 100644
--- a/arch/arm/plat-omap/include/plat/flash.h
+++ b/arch/arm/plat-omap/include/plat/flash.h
@@ -11,6 +11,6 @@
 
 #include <linux/mtd/map.h>
 
-extern void omap1_set_vpp(struct map_info *map, int enable);
+extern void omap1_set_vpp(struct platform_device *pdev, int enable);
 
 #endif
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 49676b7a53a4..1a9b94f0ee54 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -74,6 +74,18 @@ static int physmap_flash_remove(struct platform_device *dev)
 	return 0;
 }
 
+static void physmap_set_vpp(struct map_info *map, int state)
+{
+	struct platform_device *pdev;
+	struct physmap_flash_data *physmap_data;
+
+	pdev = (struct platform_device *)map->map_priv_1;
+	physmap_data = pdev->dev.platform_data;
+
+	if (physmap_data->set_vpp)
+		physmap_data->set_vpp(pdev, state);
+}
+
 static const char *rom_probe_types[] = {
 					"cfi_probe",
 					"jedec_probe",
@@ -81,10 +93,7 @@ static const char *rom_probe_types[] = {
 					"map_rom",
 					NULL };
 #ifdef CONFIG_MTD_PARTITIONS
-static const char *part_probe_types[] = { "cmdlinepart", "RedBoot",
-#ifdef CONFIG_MTD_AFS_PARTS
-					  "afs",
-#endif
+static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", "afs",
 					  NULL };
 #endif
 
@@ -134,8 +143,9 @@ static int physmap_flash_probe(struct platform_device *dev)
 		info->map[i].phys = dev->resource[i].start;
 		info->map[i].size = resource_size(&dev->resource[i]);
 		info->map[i].bankwidth = physmap_data->width;
-		info->map[i].set_vpp = physmap_data->set_vpp;
+		info->map[i].set_vpp = physmap_set_vpp;
 		info->map[i].pfow_base = physmap_data->pfow_base;
+		info->map[i].map_priv_1 = (unsigned long)dev;
 
 		info->map[i].virt = devm_ioremap(&dev->dev, info->map[i].phys,
 						 info->map[i].size);
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
index f4ce273e93fd..65bd1cd4d627 100644
--- a/drivers/mtd/maps/pismo.c
+++ b/drivers/mtd/maps/pismo.c
@@ -50,39 +50,13 @@ struct pismo_data {
 	struct platform_device	*dev[PISMO_NUM_CS];
 };
 
-/* FIXME: set_vpp could do with a better calling convention */
-static struct pismo_data *vpp_pismo;
-static DEFINE_MUTEX(pismo_mutex);
-
-static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+static void pismo_set_vpp(struct platform_device *pdev, int on)
 {
-	mutex_lock(&pismo_mutex);
-	if (vpp_pismo) {
-		mutex_unlock(&pismo_mutex);
-		kfree(pismo);
-		return -EBUSY;
-	}
-	vpp_pismo = pismo;
-	mutex_unlock(&pismo_mutex);
-	return 0;
-}
-
-static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
-{
-	mutex_lock(&pismo_mutex);
-	if (vpp_pismo == pismo)
-		vpp_pismo = NULL;
-	mutex_unlock(&pismo_mutex);
-}
-
-static void pismo_set_vpp(struct map_info *map, int on)
-{
-	struct pismo_data *pismo = vpp_pismo;
+	struct i2c_client *client = to_i2c_client(pdev->dev.parent);
+	struct pismo_data *pismo = i2c_get_clientdata(client);
 
 	pismo->vpp(pismo->vpp_data, on);
 }
-/* end of hack */
-
 
 static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
 {
@@ -231,9 +205,6 @@ static int __devexit pismo_remove(struct i2c_client *client)
 	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
 		platform_device_unregister(pismo->dev[i]);
 
-	/* FIXME: set_vpp needs saner arguments */
-	pismo_setvpp_remove_fix(pismo);
-
 	kfree(pismo);
 
 	return 0;
@@ -257,11 +228,6 @@ static int __devinit pismo_probe(struct i2c_client *client,
 	if (!pismo)
 		return -ENOMEM;
 
-	/* FIXME: set_vpp needs saner arguments */
-	ret = pismo_setvpp_probe_fix(pismo);
-	if (ret)
-		return ret;
-
 	pismo->client = client;
 	if (pdata) {
 		pismo->vpp = pdata->set_vpp;
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index d37cca05e62c..49b959029417 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -24,7 +24,7 @@ struct physmap_flash_data {
 	unsigned int		width;
 	int			(*init)(struct platform_device *);
 	void			(*exit)(struct platform_device *);
-	void			(*set_vpp)(struct map_info *, int);
+	void			(*set_vpp)(struct platform_device *, int);
 	unsigned int		nr_parts;
 	unsigned int		pfow_base;
 	char                    *probe_type;
-- 
cgit v1.2.3


From 94d56ffa0a9bf11dfb602dae9223089e09a8e050 Mon Sep 17 00:00:00 2001
From: Andreas Oberritter <obi@linuxtv.org>
Date: Thu, 12 May 2011 18:11:06 -0300
Subject: [media] DVB: Add basic API support for DVB-T2 and bump minor version

[steve@stevekerrison.com: Remove private definitions from cxd2820r that existed before API was defined]
Signed-off-by: Andreas Oberritter <obi@linuxtv.org>
Signed-off-by: Steve Kerrison <steve@stevekerrison.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c   | 13 +++++++++----
 drivers/media/dvb/dvb-core/dvb_frontend.h   |  3 +++
 drivers/media/dvb/frontends/cxd2820r_priv.h | 12 ------------
 include/linux/dvb/frontend.h                | 20 ++++++++++++++++----
 include/linux/dvb/version.h                 |  2 +-
 5 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 95c3fec6ae40..3639edce65e0 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1148,10 +1148,9 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 		break;
 	}
 
-	if(c->delivery_system == SYS_ISDBT) {
-		/* Fake out a generic DVB-T request so we pass validation in the ioctl */
-		p->frequency = c->frequency;
-		p->inversion = c->inversion;
+	/* Fake out a generic DVB-T request so we pass validation in the ioctl */
+	if ((c->delivery_system == SYS_ISDBT) ||
+	    (c->delivery_system == SYS_DVBT2)) {
 		p->u.ofdm.constellation = QAM_AUTO;
 		p->u.ofdm.code_rate_HP = FEC_AUTO;
 		p->u.ofdm.code_rate_LP = FEC_AUTO;
@@ -1324,6 +1323,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_ISDBS_TS_ID:
 		tvp->u.data = fe->dtv_property_cache.isdbs_ts_id;
 		break;
+	case DTV_DVBT2_PLP_ID:
+		tvp->u.data = fe->dtv_property_cache.dvbt2_plp_id;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1479,6 +1481,9 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_ISDBS_TS_ID:
 		fe->dtv_property_cache.isdbs_ts_id = tvp->u.data;
 		break;
+	case DTV_DVBT2_PLP_ID:
+		fe->dtv_property_cache.dvbt2_plp_id = tvp->u.data;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 3b860504bf04..5590eb6eb408 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -358,6 +358,9 @@ struct dtv_frontend_properties {
 
 	/* ISDB-T specifics */
 	u32			isdbs_ts_id;
+
+	/* DVB-T2 specifics */
+	u32                     dvbt2_plp_id;
 };
 
 struct dvb_frontend {
diff --git a/drivers/media/dvb/frontends/cxd2820r_priv.h b/drivers/media/dvb/frontends/cxd2820r_priv.h
index d4e2e0b76c10..25adbeefa6d3 100644
--- a/drivers/media/dvb/frontends/cxd2820r_priv.h
+++ b/drivers/media/dvb/frontends/cxd2820r_priv.h
@@ -40,18 +40,6 @@
 #undef warn
 #define warn(f, arg...) printk(KERN_WARNING LOG_PREFIX": " f "\n" , ## arg)
 
-/*
- * FIXME: These are totally wrong and must be added properly to the API.
- * Only temporary solution in order to get driver compile.
- */
-#define SYS_DVBT2             SYS_DAB
-#define TRANSMISSION_MODE_1K  0
-#define TRANSMISSION_MODE_16K 0
-#define TRANSMISSION_MODE_32K 0
-#define GUARD_INTERVAL_1_128  0
-#define GUARD_INTERVAL_19_128 0
-#define GUARD_INTERVAL_19_256 0
-
 struct reg_val_mask {
 	u32 reg;
 	u8  val;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 493a2bf85f62..36a3ed63f571 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -175,14 +175,20 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
 	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K
+	TRANSMISSION_MODE_4K,
+	TRANSMISSION_MODE_1K,
+	TRANSMISSION_MODE_16K,
+	TRANSMISSION_MODE_32K,
 } fe_transmit_mode_t;
 
 typedef enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
 	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO
+	BANDWIDTH_AUTO,
+	BANDWIDTH_5_MHZ,
+	BANDWIDTH_10_MHZ,
+	BANDWIDTH_1_712_MHZ,
 } fe_bandwidth_t;
 
 
@@ -191,7 +197,10 @@ typedef enum fe_guard_interval {
 	GUARD_INTERVAL_1_16,
 	GUARD_INTERVAL_1_8,
 	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO
+	GUARD_INTERVAL_AUTO,
+	GUARD_INTERVAL_1_128,
+	GUARD_INTERVAL_19_128,
+	GUARD_INTERVAL_19_256,
 } fe_guard_interval_t;
 
 
@@ -305,7 +314,9 @@ struct dvb_frontend_event {
 
 #define DTV_ISDBS_TS_ID		42
 
-#define DTV_MAX_COMMAND				DTV_ISDBS_TS_ID
+#define DTV_DVBT2_PLP_ID	43
+
+#define DTV_MAX_COMMAND				DTV_DVBT2_PLP_ID
 
 typedef enum fe_pilot {
 	PILOT_ON,
@@ -337,6 +348,7 @@ typedef enum fe_delivery_system {
 	SYS_DMBTH,
 	SYS_CMMB,
 	SYS_DAB,
+	SYS_DVBT2,
 } fe_delivery_system_t;
 
 struct dtv_cmds_h {
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 5a7546c12688..1421cc84afaa 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 2
+#define DVB_API_VERSION_MINOR 3
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 3e0c2ab67e48f77c2da0a5c826aac397792a214e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 20 May 2011 09:42:48 -0500
Subject: slub: Deal with hyperthetical case of PAGE_SIZE > 2M

kmalloc_index() currently returns -1 if the PAGE_SIZE is larger than 2M
which seems to cause some concern since the callers do not check for -1.

Insert a BUG() and add a comment to the -1 explaining that the code
cannot be reached.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ca0c076b2374..c8668d161dd8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -177,7 +177,8 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <=   4 * 1024) return 12;
 /*
  * The following is only needed to support architectures with a larger page
- * size than 4k.
+ * size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page
+ * size we would have to go up to 128k.
  */
 	if (size <=   8 * 1024) return 13;
 	if (size <=  16 * 1024) return 14;
@@ -188,7 +189,8 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <= 512 * 1024) return 19;
 	if (size <= 1024 * 1024) return 20;
 	if (size <=  2 * 1024 * 1024) return 21;
-	return -1;
+	BUG();
+	return -1; /* Will never be reached */
 
 /*
  * What we really wanted to do and cannot do because of compiler issues is:
-- 
cgit v1.2.3


From 9f728f53dd70396f3183d2f0861022259471824b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 12 May 2011 17:11:47 -0700
Subject: PCI/e1000e: Add and use pci_disable_link_state_locked()

Need to use it in _e1000e_disable_aspm.  This routine is used for error
recovery, where the pci_bus_sem is already held, and we don't want
pci_disable_link_state to try to take it again.  So add a locked variant
for use in cases like this.

Found lock up:

[ 2374.654557] kworker/32:1    D ffff881027f6b0f0     0  6075      2 0x00000000
[ 2374.654816]  ffff88503f099a68 0000000000000046 ffff88503f098000 0000000000004000
[ 2374.654837]  00000000001d1ec0 ffff88503f099fd8 00000000001d1ec0 ffff88503f099fd8
[ 2374.654860]  0000000000004000 00000000001d1ec0 ffff88503dcc8000 ffff88503f090000
[ 2374.654880] Call Trace:
[ 2374.654898]  [<ffffffff810b1302>] ? __lock_acquired+0x3a/0x224
[ 2374.654914]  [<ffffffff81c2b59c>] ? _raw_spin_unlock_irq+0x30/0x36
[ 2374.654925]  [<ffffffff810b069d>] ? trace_hardirqs_on_caller+0x1f/0x178
[ 2374.654936]  [<ffffffff81c2ab24>] rwsem_down_failed_common+0xd3/0x103
[ 2374.654945]  [<ffffffff810b158f>] ? __lock_contended+0x3a/0x2a2
[ 2374.654955]  [<ffffffff81c2ab7b>] rwsem_down_read_failed+0x12/0x14
[ 2374.654967]  [<ffffffff813371e4>] call_rwsem_down_read_failed+0x14/0x30
[ 2374.654981]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.654990]  [<ffffffff81c2a0e6>] ? down_read+0x7e/0x91
[ 2374.654999]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.655008]  [<ffffffff8135df20>] pci_disable_link_state+0x5f/0xf5
[ 2374.655024]  [<ffffffff81661796>] e1000e_disable_aspm+0x55/0x5a
[ 2374.655037]  [<ffffffff816677eb>] e1000_io_slot_reset+0x59/0xea
[ 2374.655048]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655057]  [<ffffffff8135fe3b>] report_slot_reset+0x2e/0x5d
[ 2374.655072]  [<ffffffff8135369e>] pci_walk_bus+0x8a/0xb7
[ 2374.655081]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655091]  [<ffffffff813603be>] broadcast_error_message+0xa4/0xb2
[ 2374.655101]  [<ffffffff81352c71>] ? pci_bus_read_config_dword+0x72/0x80
[ 2374.655110]  [<ffffffff813606df>] do_recovery+0x9e/0xf9
[ 2374.655120]  [<ffffffff81360786>] handle_error_source+0x4c/0x51
[ 2374.655129]  [<ffffffff81360974>] aer_isr_one_error+0x1e9/0x21a
[ 2374.655138]  [<ffffffff81360a6c>] aer_isr+0xc7/0xcc
[ 2374.655147]  [<ffffffff813609a5>] ? aer_isr_one_error+0x21a/0x21a
[ 2374.655159]  [<ffffffff81096d9f>] process_one_work+0x237/0x3ec
[ 2374.655168]  [<ffffffff81096d10>] ? process_one_work+0x1a8/0x3ec
[ 2374.655178]  [<ffffffff8109728d>] worker_thread+0x17c/0x240
[ 2374.655186]  [<ffffffff810b0803>] ? trace_hardirqs_on+0xd/0xf
[ 2374.655196]  [<ffffffff81097111>] ? manage_workers+0xab/0xab
[ 2374.655209]  [<ffffffff8109c8ed>] kthread+0xa0/0xa8
[ 2374.655223]  [<ffffffff81c332d4>] kernel_thread_helper+0x4/0x10
[ 2374.655232]  [<ffffffff81c2b880>] ? retint_restore_args+0xe/0xe
[ 2374.655243]  [<ffffffff8109c84d>] ? __init_kthread_worker+0x5b/0x5b
[ 2374.655252]  [<ffffffff81c332d0>] ? gs_change+0xb/0xb

when aer happens,
pci_walk_bus already have down_read(&pci_bus_sem)...
then report_slot_reset
        ==> e1000_io_slot_reset
                ==> e1000e_disable_aspm
                        ==> pci_disable_link_state...

We can not use pci_disable_link_state, and it will try to hold pci_bus_sem again.

Try to have __pci_disable_link_state that will not need to hold pci_bus_sem.

-v2: change name to pci_disable_link_state_locked() according to Jesse.

[jbarnes: make sure new function is exported for modules]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/e1000e/netdev.c |  2 +-
 drivers/pci/pcie/aspm.c     | 19 ++++++++++++++++---
 include/linux/pci-aspm.h    |  1 +
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 506a0a0043b3..5fb43f098f17 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5347,7 +5347,7 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep,
 #ifdef CONFIG_PCIEASPM
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
 {
-	pci_disable_link_state(pdev, state);
+	pci_disable_link_state_locked(pdev, state);
 }
 #else
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 3eb667b24787..6892601fc76f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-void pci_disable_link_state(struct pci_dev *pdev, int state)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
@@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
 	if (!parent || !parent->link_state)
 		return;
 
-	down_read(&pci_bus_sem);
+	if (sem)
+		down_read(&pci_bus_sem);
 	mutex_lock(&aspm_lock);
 	link = parent->link_state;
 	if (state & PCIE_LINK_STATE_L0S)
@@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
 		pcie_set_clkpm(link, 0);
 	}
 	mutex_unlock(&aspm_lock);
-	up_read(&pci_bus_sem);
+	if (sem)
+		up_read(&pci_bus_sem);
+}
+
+void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{
+	__pci_disable_link_state(pdev, state, false);
+}
+EXPORT_SYMBOL(pci_disable_link_state_locked);
+
+void pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	__pci_disable_link_state(pdev, state, true);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 67cb3ae38016..7cea7b6c1413 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -28,6 +28,7 @@ extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
 extern void pcie_clear_aspm(void);
 extern void pcie_no_aspm(void);
 #else
-- 
cgit v1.2.3


From 24a4742f0be6226eb0106fbb17caf4d711d1ad43 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:11 -0600
Subject: PCI: Track the size of each saved capability data area

This will allow us to store and load it later.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 12 +++++++-----
 include/linux/pci.h | 11 ++++++++---
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 53302cbdb94c..d6e5b8ea9194 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 	}
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
 
@@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!save_state || pos <= 0)
 		return;
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags);
 
@@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev)
 		return -ENOMEM;
 	}
 
-	pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data);
+	pci_read_config_word(dev, pos + PCI_X_CMD,
+			     (u16 *)save_state->cap.data);
 
 	return 0;
 }
@@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
 	if (!save_state || pos <= 0)
 		return;
-	cap = (u16 *)&save_state->data[0];
+	cap = (u16 *)&save_state->cap.data[0];
 
 	pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
 }
@@ -1771,7 +1772,8 @@ static int pci_add_cap_save_buffer(
 	if (!save_state)
 		return -ENOMEM;
 
-	save_state->cap_nr = cap;
+	save_state->cap.cap_nr = cap;
+	save_state->cap.size = size;
 	pci_add_saved_cap(dev, save_state);
 
 	return 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index df4d69b82144..61ef8f2f9b19 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,12 +214,17 @@ enum pci_bus_speed {
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
-struct pci_cap_saved_state {
-	struct hlist_node next;
+struct pci_cap_saved_data {
 	char cap_nr;
+	unsigned int size;
 	u32 data[0];
 };
 
+struct pci_cap_saved_state {
+	struct hlist_node next;
+	struct pci_cap_saved_data cap;
+};
+
 struct pcie_link_state;
 struct pci_vpd;
 struct pci_sriov;
@@ -366,7 +371,7 @@ static inline struct pci_cap_saved_state *pci_find_saved_cap(
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
-		if (tmp->cap_nr == cap)
+		if (tmp->cap.cap_nr == cap)
 			return tmp;
 	}
 	return NULL;
-- 
cgit v1.2.3


From ffbdd3f7931fb7cb7e36d00d16303ec433be5145 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:27 -0600
Subject: PCI: Add interfaces to store and load the device saved state

For KVM device assignment, we'd like to save off the state of a device
prior to passing it to the guest and restore it later.  We also want
to allow pci_reset_funciton() to be called while the device is owned
by the guest.  This however overwrites and invalidates the struct pci_dev
buffers, so we can't just manually call save and restore.  Add generic
interfaces for the saved state to be stored and reloaded back into
struct pci_dev at a later time.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  4 +++
 2 files changed, 102 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d6e5b8ea9194..22c9b27fdd8d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -976,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev)
 	dev->state_saved = false;
 }
 
+struct pci_saved_state {
+	u32 config_space[16];
+	struct pci_cap_saved_data cap[0];
+};
+
+/**
+ * pci_store_saved_state - Allocate and return an opaque struct containing
+ *			   the device saved state.
+ * @dev: PCI device that we're dealing with
+ *
+ * Rerturn NULL if no state or error.
+ */
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
+{
+	struct pci_saved_state *state;
+	struct pci_cap_saved_state *tmp;
+	struct pci_cap_saved_data *cap;
+	struct hlist_node *pos;
+	size_t size;
+
+	if (!dev->state_saved)
+		return NULL;
+
+	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
+
+	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
+		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+
+	state = kzalloc(size, GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	memcpy(state->config_space, dev->saved_config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
+		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
+		memcpy(cap, &tmp->cap, len);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
+	}
+	/* Empty cap_save terminates list */
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(pci_store_saved_state);
+
+/**
+ * pci_load_saved_state - Reload the provided save state into struct pci_dev.
+ * @dev: PCI device that we're dealing with
+ * @state: Saved state returned from pci_store_saved_state()
+ */
+int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state)
+{
+	struct pci_cap_saved_data *cap;
+
+	dev->state_saved = false;
+
+	if (!state)
+		return 0;
+
+	memcpy(dev->saved_config_space, state->config_space,
+	       sizeof(state->config_space));
+
+	cap = state->cap;
+	while (cap->size) {
+		struct pci_cap_saved_state *tmp;
+
+		tmp = pci_find_saved_cap(dev, cap->cap_nr);
+		if (!tmp || tmp->cap.size != cap->size)
+			return -EINVAL;
+
+		memcpy(tmp->cap.data, cap->data, tmp->cap.size);
+		cap = (struct pci_cap_saved_data *)((u8 *)cap +
+		       sizeof(struct pci_cap_saved_data) + cap->size);
+	}
+
+	dev->state_saved = true;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
+
+/**
+ * pci_load_and_free_saved_state - Reload the save state pointed to by state,
+ *				   and free the memory allocated for it.
+ * @dev: PCI device that we're dealing with
+ * @state: Pointer to saved state returned from pci_store_saved_state()
+ */
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+				  struct pci_saved_state **state)
+{
+	int ret = pci_load_saved_state(dev, *state);
+	kfree(*state);
+	*state = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state);
+
 static int do_pci_enable_device(struct pci_dev *dev, int bars)
 {
 	int err;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61ef8f2f9b19..4604d1d5514d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -812,6 +812,10 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
 /* Power management related routines */
 int pci_save_state(struct pci_dev *dev);
 void pci_restore_state(struct pci_dev *dev);
+struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev);
+int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state);
+int pci_load_and_free_saved_state(struct pci_dev *dev,
+				  struct pci_saved_state **state);
 int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
-- 
cgit v1.2.3


From f8fcfd775523347afe460dc3a0f45d0479e784a2 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 10 May 2011 10:02:39 -0600
Subject: KVM: Use pci_store/load_saved_state() around VM device usage

Store the device saved state so that we can reload the device back
to the original state when it's unassigned.  This has the benefit
that the state survives across pci_reset_function() calls via
the PCI sysfs reset interface while the VM is using the device.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/assigned-dev.c  | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab428552af8e..9272db03a3e5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -513,6 +513,7 @@ struct kvm_assigned_dev_kernel {
 	struct kvm *kvm;
 	spinlock_t intx_lock;
 	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ae72ae604c89..6cc4b97ec458 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -197,8 +197,13 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 {
 	kvm_free_assigned_irq(kvm, assigned_dev);
 
-	__pci_reset_function(assigned_dev->dev);
-	pci_restore_state(assigned_dev->dev);
+	pci_reset_function(assigned_dev->dev);
+	if (pci_load_and_free_saved_state(assigned_dev->dev,
+					  &assigned_dev->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&assigned_dev->dev->dev));
+	else
+		pci_restore_state(assigned_dev->dev);
 
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
@@ -516,7 +521,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	pci_reset_function(dev);
 	pci_save_state(dev);
-
+	match->pci_saved_state = pci_store_saved_state(dev);
+	if (!match->pci_saved_state)
+		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
+		       __func__, dev_name(&dev->dev));
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_segnr = assigned_dev->segnr;
 	match->host_busnr = assigned_dev->busnr;
@@ -546,7 +554,9 @@ out:
 	mutex_unlock(&kvm->lock);
 	return r;
 out_list_del:
-	pci_restore_state(dev);
+	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&dev->dev));
 	list_del(&match->list);
 	pci_release_regions(dev);
 out_disable:
-- 
cgit v1.2.3


From 5ce941ee4258b836cf818d2ac159d8cf3ebad648 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 27 Apr 2011 17:24:21 -0500
Subject: KVM: PPC: booke: add sregs support

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/kvm/api.txt           |   6 +-
 arch/powerpc/include/asm/kvm.h      | 184 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/kvm_44x.h  |   1 -
 arch/powerpc/include/asm/kvm_e500.h |   1 +
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/include/asm/kvm_ppc.h  |   9 ++
 arch/powerpc/kvm/44x.c              |  10 ++
 arch/powerpc/kvm/booke.c            | 154 +++++++++++++++++++++++++++++-
 arch/powerpc/kvm/e500.c             |  75 +++++++++++++++
 arch/powerpc/kvm/e500_emulate.c     |   5 +-
 arch/powerpc/kvm/e500_tlb.c         |   8 ++
 arch/powerpc/kvm/emulate.c          |  13 ++-
 arch/powerpc/kvm/powerpc.c          |   4 +
 include/linux/kvm.h                 |   1 +
 14 files changed, 461 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 1b9eaa7e8856..f64c41f8ba61 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure.
 4.13 KVM_GET_SREGS
 
 Capability: basic
-Architectures: x86
+Architectures: x86, ppc
 Type: vcpu ioctl
 Parameters: struct kvm_sregs (out)
 Returns: 0 on success, -1 on error
@@ -279,6 +279,8 @@ struct kvm_sregs {
 	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
 };
 
+/* ppc -- see arch/powerpc/include/asm/kvm.h */
+
 interrupt_bitmap is a bitmap of pending external interrupts.  At most
 one bit may be set.  This interrupt has been acknowledged by the APIC
 but not yet injected into the cpu core.
@@ -286,7 +288,7 @@ but not yet injected into the cpu core.
 4.14 KVM_SET_SREGS
 
 Capability: basic
-Architectures: x86
+Architectures: x86, ppc
 Type: vcpu ioctl
 Parameters: struct kvm_sregs (in)
 Returns: 0 on success, -1 on error
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 18ea6963ad77..d2ca5ed3877b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -45,6 +45,114 @@ struct kvm_regs {
 	__u64 gpr[32];
 };
 
+#define KVM_SREGS_E_IMPL_NONE	0
+#define KVM_SREGS_E_IMPL_FSL	1
+
+#define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified.  This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE		(1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206		(1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64			(1 << 2)
+
+#define KVM_SREGS_E_SPRG8		(1 << 3)
+#define KVM_SREGS_E_MCIVPR		(1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR		(1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU		(1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG		(1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED			(1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE			(1 << 9)
+
+/* External Proxy (EXP) -- EPR */
+#define KVM_SREGS_EXP			(1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD			(1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC			(1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT			(1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM			(1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS.  To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR		(1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR		(1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value.  It does not trigger any special semantics such
+ * as write-one-to-clear.  Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
 struct kvm_sregs {
 	__u32 pvr;
 	union {
@@ -62,6 +170,82 @@ struct kvm_sregs {
 				__u64 dbat[8]; 
 			} ppc32;
 		} s;
+		struct {
+			union {
+				struct { /* KVM_SREGS_E_IMPL_FSL */
+					__u32 features; /* KVM_SREGS_E_FSL_ */
+					__u32 svr;
+					__u64 mcar;
+					__u32 hid0;
+
+					/* KVM_SREGS_E_FSL_PIDn */
+					__u32 pid1, pid2;
+				} fsl;
+				__u8 pad[256];
+			} impl;
+
+			__u32 features; /* KVM_SREGS_E_ */
+			__u32 impl_id;	/* KVM_SREGS_E_IMPL_ */
+			__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+			__u32 pir;	/* read-only */
+			__u64 sprg8;
+			__u64 sprg9;	/* E.ED */
+			__u64 csrr0;
+			__u64 dsrr0;	/* E.ED */
+			__u64 mcsrr0;
+			__u32 csrr1;
+			__u32 dsrr1;	/* E.ED */
+			__u32 mcsrr1;
+			__u32 esr;
+			__u64 dear;
+			__u64 ivpr;
+			__u64 mcivpr;
+			__u64 mcsr;	/* KVM_SREGS_E_UPDATE_MCSR */
+
+			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
+			__u32 tcr;
+			__u32 decar;
+			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+
+			/*
+			 * Userspace can read TB directly, but the
+			 * value reported here is consistent with "dec".
+			 *
+			 * Read-only.
+			 */
+			__u64 tb;
+
+			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
+			__u32 dbcr[3];
+			__u32 iac[4];
+			__u32 dac[2];
+			__u32 dvc[2];
+			__u8 num_iac;	/* read-only */
+			__u8 num_dac;	/* read-only */
+			__u8 num_dvc;	/* read-only */
+			__u8 pad;
+
+			__u32 epr;	/* EXP */
+			__u32 vrsave;	/* a.k.a. USPRG0 */
+			__u32 epcr;	/* KVM_SREGS_E_64 */
+
+			__u32 mas0;
+			__u32 mas1;
+			__u64 mas2;
+			__u64 mas7_3;
+			__u32 mas4;
+			__u32 mas6;
+
+			__u32 ivor_low[16]; /* IVOR0-15 */
+			__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+			__u32 mmucfg;	/* read-only */
+			__u32 eptcfg;	/* E.PT, read-only */
+			__u32 tlbcfg[4];/* read-only */
+			__u32 tlbps[4]; /* read-only */
+
+			__u32 eplc, epsc; /* E.PD */
+		} e;
 		__u8 pad[1020];
 	} u;
 };
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index d22d39942a92..a0e57618ff33 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
 }
 
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
 void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
 void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index bb2a0890600f..7a2a565f88c4 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -59,6 +59,7 @@ struct kvmppc_vcpu_e500 {
 	u32 hid1;
 	u32 tlb0cfg;
 	u32 tlb1cfg;
+	u64 mcar;
 
 	struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a16804399165..186f150b9b89 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -233,6 +233,9 @@ struct kvm_vcpu_arch {
 	ulong csrr1;
 	ulong dsrr0;
 	ulong dsrr1;
+	ulong mcsrr0;
+	ulong mcsrr1;
+	ulong mcsr;
 	ulong esr;
 	u32 dec;
 	u32 decar;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ecb3bc74c344..9345238edecf 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 
 /* Core-specific hooks */
 
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 	return r;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e7421143..da3a1225c0ac 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef76acb455c3..8462b3a1c1c7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.shared->srr0 = regs->srr0;
 	vcpu->arch.shared->srr1 = regs->srr1;
+	kvmppc_set_pid(vcpu, regs->pid);
 	vcpu->arch.shared->sprg0 = regs->sprg0;
 	vcpu->arch.shared->sprg1 = regs->sprg1;
 	vcpu->arch.shared->sprg2 = regs->sprg2;
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+                           struct kvm_sregs *sregs)
+{
+	u64 tb = get_tb();
+
+	sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+	sregs->u.e.csrr0 = vcpu->arch.csrr0;
+	sregs->u.e.csrr1 = vcpu->arch.csrr1;
+	sregs->u.e.mcsr = vcpu->arch.mcsr;
+	sregs->u.e.esr = vcpu->arch.esr;
+	sregs->u.e.dear = vcpu->arch.shared->dar;
+	sregs->u.e.tsr = vcpu->arch.tsr;
+	sregs->u.e.tcr = vcpu->arch.tcr;
+	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+	sregs->u.e.tb = tb;
+	sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+                          struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+		return 0;
+
+	vcpu->arch.csrr0 = sregs->u.e.csrr0;
+	vcpu->arch.csrr1 = sregs->u.e.csrr1;
+	vcpu->arch.mcsr = sregs->u.e.mcsr;
+	vcpu->arch.esr = sregs->u.e.esr;
+	vcpu->arch.shared->dar = sregs->u.e.dear;
+	vcpu->arch.vrsave = sregs->u.e.vrsave;
+	vcpu->arch.tcr = sregs->u.e.tcr;
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
+		vcpu->arch.dec = sregs->u.e.dec;
+
+	kvmppc_emulate_dec(vcpu);
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		/*
+		 * FIXME: existing KVM timer handling is incomplete.
+		 * TSR cannot be read by the guest, and its value in
+		 * vcpu->arch is always zero.  For now, just handle
+		 * the case where the caller is trying to inject a
+		 * decrementer interrupt.
+		 */
+
+		if ((sregs->u.e.tsr & TSR_DIS) &&
+		    (vcpu->arch.tcr & TCR_DIE))
+			kvmppc_core_queue_dec(vcpu);
+	}
+
+	return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+                              struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+	sregs->u.e.pir = 0;
+	sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+	sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+	sregs->u.e.decar = vcpu->arch.decar;
+	sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+                             struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+		return 0;
+
+	if (sregs->u.e.pir != 0)
+		return -EINVAL;
+
+	vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+	vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+	vcpu->arch.decar = sregs->u.e.decar;
+	vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+	return 0;
+}
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+	sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+	sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+	sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+	sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+	sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+	sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+	sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+	sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+	sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+	sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+	sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+	sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+	sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	sregs->pvr = vcpu->arch.pvr;
+
+	get_sregs_base(vcpu, sregs);
+	get_sregs_arch206(vcpu, sregs);
+	kvmppc_core_get_sregs(vcpu, sregs);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	int ret;
+
+	if (vcpu->arch.pvr != sregs->pvr)
+		return -EINVAL;
+
+	ret = set_sregs_base(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	ret = set_sregs_arch206(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	return kvmppc_core_set_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 0c1af1267843..318dbc61ba44 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -97,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+	                       KVM_SREGS_E_PM;
+	sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+	sregs->u.e.impl.fsl.features = 0;
+	sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+	sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+	sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+	sregs->u.e.mas0 = vcpu_e500->mas0;
+	sregs->u.e.mas1 = vcpu_e500->mas1;
+	sregs->u.e.mas2 = vcpu_e500->mas2;
+	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas4 = vcpu_e500->mas4;
+	sregs->u.e.mas6 = vcpu_e500->mas6;
+
+	sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
+	sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
+	sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
+	sregs->u.e.tlbcfg[2] = 0;
+	sregs->u.e.tlbcfg[3] = 0;
+
+	sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+	sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+	sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+	sregs->u.e.ivor_high[3] =
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+		vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+		vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+		vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+		vcpu_e500->mas0 = sregs->u.e.mas0;
+		vcpu_e500->mas1 = sregs->u.e.mas1;
+		vcpu_e500->mas2 = sregs->u.e.mas2;
+		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
+		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas4 = sregs->u.e.mas4;
+		vcpu_e500->mas6 = sregs->u.e.mas6;
+	}
+
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+			sregs->u.e.ivor_high[0];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+			sregs->u.e.ivor_high[1];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+			sregs->u.e.ivor_high[2];
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_PM) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+			sregs->u.e.ivor_high[3];
+	}
+
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e2fb47f035a5..69cd665a0caf 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, <yu.liu@freescale.com>
  *
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 	switch (sprn) {
 	case SPRN_PID:
-		vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
-			vcpu->arch.pid = spr_val;
+		kvmppc_set_pid(vcpu, spr_val);
 		break;
 	case SPRN_PID1:
 		vcpu_e500->pid[1] = spr_val; break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 56ac4523857d..b18fe353397d 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -675,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
 	return -1;
 }
 
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+		vcpu->arch.pid = pid;
+}
+
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	struct tlbe *tlbe;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 8f7a3aa03c26..141dce3c6810 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+	u64 jd = tb - vcpu->arch.dec_jiffies;
+	return vcpu->arch.dec - jd;
+}
+
 /* XXX to do:
  * lhax
  * lhaux
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 			case SPRN_DEC:
 			{
-				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
-				pr_debug("mfDEC: %x - %llx = %lx\n",
-					 vcpu->arch.dec, jd,
-					 kvmppc_get_gpr(vcpu, rt));
+				kvmppc_set_gpr(vcpu, rt,
+					       kvmppc_get_dec(vcpu, get_tb()));
 				break;
 			}
 			default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9e6aa8bfd160..616dd516ca1f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_SREGS:
+#else
 	case KVM_CAP_PPC_SEGSTATE:
+#endif
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2f63ebeac639..55ef181521ff 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -543,6 +543,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_ASYNC_PF 59
 #define KVM_CAP_TSC_CONTROL 60
 #define KVM_CAP_GET_TSC_KHZ 61
+#define KVM_CAP_PPC_BOOKE_SREGS 62
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 8fa2206821953a50a3a02ea33fcfb3ced2fd9997 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 4 May 2011 16:31:04 +0300
Subject: KVM: make guest mode entry to be rcu quiescent state

KVM does not hold any references to rcu protected data when it switches
CPU into a guest mode. In fact switching to a guest mode is very similar
to exiting to userspase from rcu point of view. In addition CPU may stay
in a guest mode for quite a long time (up to one time slice). Lets treat
guest mode as quiescent state, just like we do with user-mode execution.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0bc3d372e3cb..b9c3299c6a55 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -591,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 
 static inline void kvm_guest_enter(void)
 {
+	BUG_ON(preemptible());
 	account_system_vtime(current);
 	current->flags |= PF_VCPU;
+	/* KVM does not hold any references to rcu protected data when it
+	 * switches CPU into a guest mode. In fact switching to a guest mode
+	 * is very similar to exiting to userspase from rcu point of view. In
+	 * addition CPU may stay in a guest mode for quite a long time (up to
+	 * one time slice). Lets treat guest mode as quiescent state, just like
+	 * we do with user-mode execution.
+	 */
+	rcu_virt_note_context_switch(smp_processor_id());
 }
 
 static inline void kvm_guest_exit(void)
-- 
cgit v1.2.3


From 34ea646c9f8c18fd2e4332ff3b2b509f878c56f1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 22 May 2011 18:55:10 +0200
Subject: net: add missing prefetch.h include

Fixes build errors on s390 and probably other archs as well:

  In file included from net/ipv4/ip_forward.c:32:0:
  include/net/udp.h: In function 'udp_csum_outgoing':
  include/net/udp.h:141:2: error: implicit declaration of function 'prefetch'

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 79aafbbf430a..827681540d6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,6 +28,7 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
+#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
-- 
cgit v1.2.3


From 0fcbe742eaac14bd5032b369c09e9d94be9058ad Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 22 May 2011 20:35:29 -0400
Subject: net: Remove prefetches from SKB list handlers.

Noticed by Linus.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 827681540d6f..09901fdd73ae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1783,7 +1783,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
-		     prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		     (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->next)
 
 #define skb_queue_walk_safe(queue, skb, tmp)					\
@@ -1792,7 +1792,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = tmp, tmp = skb->next)
 
 #define skb_queue_walk_from(queue, skb)						\
-		for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		for (; (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->next)
 
 #define skb_queue_walk_from_safe(queue, skb, tmp)				\
@@ -1802,7 +1802,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_reverse_walk(queue, skb) \
 		for (skb = (queue)->prev;					\
-		     prefetch(skb->prev), (skb != (struct sk_buff *)(queue));	\
+		     (skb != (struct sk_buff *)(queue));			\
 		     skb = skb->prev)
 
 #define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
-- 
cgit v1.2.3


From 67f11f4deda0818640decb19a28c537dbe5d429e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 22 May 2011 20:54:11 -0400
Subject: net: Remove linux/prefetch.h include from linux/skbuff.h

No longer needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 09901fdd73ae..8cac356b77b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
-#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
-- 
cgit v1.2.3


From 8d8fc29d02a33e4bd5f4fa47823c1fd386346093 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 19 May 2011 21:39:10 +0000
Subject: netpoll: disable netpoll when enslave a device

V3: rename NETDEV_ENSLAVE to NETDEV_JOIN

Currently we do nothing when we enslave a net device which is running netconsole.
Neil pointed out that we may get weird results in such case, so let's disable
netpoll on the device being enslaved. I think it is too harsh to prevent
the device being ensalved if it is running netconsole.

By the way, this patch also removes the NETDEV_GOING_DOWN from netconsole
netdev notifier, because netpoll will check if the device is running or not
and we don't handle NETDEV_PRE_UP neither.

This patch is based on net-next-2.6.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 ++
 drivers/net/netconsole.c        | 26 +++++++++++++++++---------
 include/linux/notifier.h        |  1 +
 3 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 088fd845ffdf..f4960f516c39 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1640,6 +1640,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		}
 	}
 
+	call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
+
 	/* If this is the first slave, then we need to set the master's hardware
 	 * address to be the same as the slave's. */
 	if (is_zero_ether_addr(bond->dev->dev_addr))
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index a83e101440fd..4190786de403 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -621,11 +621,10 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	bool stopped = false;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_JOIN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
-restart:
 	list_for_each_entry(nt, &target_list, list) {
 		netconsole_target_get(nt);
 		if (nt->np.dev == dev) {
@@ -633,6 +632,8 @@ restart:
 			case NETDEV_CHANGENAME:
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
+			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_JOIN:
 			case NETDEV_UNREGISTER:
 				/*
 				 * rtnl_lock already held
@@ -647,11 +648,7 @@ restart:
 					dev_put(nt->np.dev);
 					nt->np.dev = NULL;
 					netconsole_target_put(nt);
-					goto restart;
 				}
-				/* Fall through */
-			case NETDEV_GOING_DOWN:
-			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
 				stopped = true;
 				break;
@@ -660,10 +657,21 @@ restart:
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
-	if (stopped && (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE))
+	if (stopped) {
 		printk(KERN_INFO "netconsole: network logging stopped on "
-			"interface %s as it %s\n",  dev->name,
-			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
+		       "interface %s as it ", dev->name);
+		switch (event) {
+		case NETDEV_UNREGISTER:
+			printk(KERN_CONT "unregistered\n");
+			break;
+		case NETDEV_BONDING_DESLAVE:
+			printk(KERN_CONT "released slaves\n");
+			break;
+		case NETDEV_JOIN:
+			printk(KERN_CONT "is joining a master device\n");
+			break;
+		}
+	}
 
 done:
 	return NOTIFY_DONE;
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 621dfa16acc0..a577762afbe7 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -211,6 +211,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_UNREGISTER_BATCH 0x0011
 #define NETDEV_BONDING_DESLAVE  0x0012
 #define NETDEV_NOTIFY_PEERS	0x0013
+#define NETDEV_JOIN		0x0014
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
-- 
cgit v1.2.3


From daf9209bb2c8b07ca025eac82e3d175534086c77 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 19 May 2011 21:39:12 +0000
Subject: net: rename NETDEV_BONDING_DESLAVE to NETDEV_RELEASE

s/NETDEV_BONDING_DESLAVE/NETDEV_RELEASE/ as Andy suggested.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 drivers/net/netconsole.c        | 6 +++---
 include/linux/notifier.h        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f4960f516c39..6dc428461541 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1974,7 +1974,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	block_netpoll_tx();
-	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
+	netdev_bonding_change(bond_dev, NETDEV_RELEASE);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 4190786de403..dfc82720065a 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -621,7 +621,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	bool stopped = false;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_JOIN))
+	      event == NETDEV_RELEASE || event == NETDEV_JOIN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -632,7 +632,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 			case NETDEV_CHANGENAME:
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
-			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_RELEASE:
 			case NETDEV_JOIN:
 			case NETDEV_UNREGISTER:
 				/*
@@ -664,7 +664,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 		case NETDEV_UNREGISTER:
 			printk(KERN_CONT "unregistered\n");
 			break;
-		case NETDEV_BONDING_DESLAVE:
+		case NETDEV_RELEASE:
 			printk(KERN_CONT "released slaves\n");
 			break;
 		case NETDEV_JOIN:
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index a577762afbe7..c0688b0168b3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -209,7 +209,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
-#define NETDEV_BONDING_DESLAVE  0x0012
+#define NETDEV_RELEASE		0x0012
 #define NETDEV_NOTIFY_PEERS	0x0013
 #define NETDEV_JOIN		0x0014
 
-- 
cgit v1.2.3


From c4264f27e83968ddfe3f0cfe7a33adfb320e1e42 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 21 May 2011 19:46:09 +0000
Subject: net: skb_trim explicitely check the linearity instead of data_len

The purpose of the check on data_len is to check linearity, so use the inline
helper for this. No overhead and more explicit.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8cac356b77b2..aeaad97e6815 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1442,7 +1442,7 @@ extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
-	if (unlikely(skb->data_len)) {
+	if (unlikely(skb_is_nonlinear(skb))) {
 		WARN_ON(1);
 		return;
 	}
-- 
cgit v1.2.3


From 792d4b5cb16b684958c2590f77688667ddec1f61 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 22 May 2011 07:08:11 +0000
Subject: net: filter: move forward declarations to avoid compile warnings

Get rid of this compile warning:

In file included from arch/s390/kernel/compat_linux.c:37:0:
include/linux/filter.h:139:23: warning: 'struct sk_buff' declared inside parameter list

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4609b85e559d..9ee3f9fb0b4a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -131,6 +131,10 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_LL_OFF    (-0x200000)
 
 #ifdef __KERNEL__
+
+struct sk_buff;
+struct sock;
+
 struct sk_filter
 {
 	atomic_t		refcnt;
@@ -146,9 +150,6 @@ static inline unsigned int sk_filter_len(const struct sk_filter *fp)
 	return fp->len * sizeof(struct sock_filter) + sizeof(*fp);
 }
 
-struct sk_buff;
-struct sock;
-
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
 extern unsigned int sk_run_filter(const struct sk_buff *skb,
 				  const struct sock_filter *filter);
-- 
cgit v1.2.3


From a1e4891fd48d298870b704c6eb48cba0da5ed6b1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 22 May 2011 16:51:43 -0700
Subject: Remove prefetch() from <linux/skbuff.h> and "netlabel_addrlist.h"

Commit e66eed651fd1 ("list: remove prefetching from regular list
iterators") removed the include of prefetch.h from list.h.  The skbuff
list traversal still had them.

Quoth David Miller:
  "Please just remove the prefetches.

  Those are modelled after list.h as I intend to eventually convert
  SKB list handling to "struct list_head" but we're not there yet.

  Therefore if we kill prefetches from list.h we should kill it from
  these things in skbuff.h too."

Requested-by: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/skbuff.h           | 7 +++----
 net/netlabel/netlabel_addrlist.h | 8 ++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 827681540d6f..16c9c091555d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
-#include <linux/prefetch.h>
 #include <linux/hrtimer.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
@@ -1783,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
-		     prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->next)
 
 #define skb_queue_walk_safe(queue, skb, tmp)					\
@@ -1792,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = tmp, tmp = skb->next)
 
 #define skb_queue_walk_from(queue, skb)						\
-		for (; prefetch(skb->next), (skb != (struct sk_buff *)(queue));	\
+		for (; skb != (struct sk_buff *)(queue);			\
 		     skb = skb->next)
 
 #define skb_queue_walk_from_safe(queue, skb, tmp)				\
@@ -1802,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 
 #define skb_queue_reverse_walk(queue, skb) \
 		for (skb = (queue)->prev;					\
-		     prefetch(skb->prev), (skb != (struct sk_buff *)(queue));	\
+		     skb != (struct sk_buff *)(queue);				\
 		     skb = skb->prev)
 
 #define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 1c1c093cf279..2b9644e19de0 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -96,12 +96,12 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
 
 #define netlbl_af4list_foreach(iter, head)				\
 	for (iter = __af4list_valid((head)->next, head);		\
-	     prefetch(iter->list.next), &iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af4list_valid(iter->list.next, head))
 
 #define netlbl_af4list_foreach_rcu(iter, head)				\
 	for (iter = __af4list_valid_rcu((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af4list_valid_rcu(iter->list.next, head))
 
 #define netlbl_af4list_foreach_safe(iter, tmp, head)			\
@@ -163,12 +163,12 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
 
 #define netlbl_af6list_foreach(iter, head)				\
 	for (iter = __af6list_valid((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af6list_valid(iter->list.next, head))
 
 #define netlbl_af6list_foreach_rcu(iter, head)				\
 	for (iter = __af6list_valid_rcu((head)->next, head);		\
-	     prefetch(iter->list.next),	&iter->list != (head);		\
+	     &iter->list != (head);					\
 	     iter = __af6list_valid_rcu(iter->list.next, head))
 
 #define netlbl_af6list_foreach_safe(iter, tmp, head)			\
-- 
cgit v1.2.3


From 2d42552d1c1659b014851cf449ad2fe458509128 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 23 May 2011 10:24:39 +0200
Subject: [S390] merge page_test_dirty and page_clear_dirty

The page_clear_dirty primitive always sets the default storage key
which resets the access control bits and the fetch protection bit.
That will surprise a KVM guest that sets non-zero access control
bits or the fetch protection bit. Merge page_test_dirty and
page_clear_dirty back to a single function and only clear the
dirty bit from the storage key.

In addition move the function page_test_and_clear_dirty and
page_test_and_clear_young to page.h where they belong. This
requires to change the parameter from a struct page * to a page
frame number.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/page.h    | 56 ++++++++++++++++++++++++++++++++++-----
 arch/s390/include/asm/pgtable.h | 58 +++++++----------------------------------
 include/asm-generic/pgtable.h   | 12 +++------
 include/linux/page-flags.h      |  2 +-
 mm/rmap.c                       | 11 +++-----
 5 files changed, 68 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3c987e9ec8d6..81ee2776088d 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -107,8 +107,8 @@ typedef pte_t *pgtable_t;
 #define __pgd(x)        ((pgd_t) { (x) } )
 #define __pgprot(x)     ((pgprot_t) { (x) } )
 
-static inline void
-page_set_storage_key(unsigned long addr, unsigned int skey, int mapped)
+static inline void page_set_storage_key(unsigned long addr,
+					unsigned char skey, int mapped)
 {
 	if (!mapped)
 		asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
@@ -117,15 +117,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped)
 		asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
 }
 
-static inline unsigned int
-page_get_storage_key(unsigned long addr)
+static inline unsigned char page_get_storage_key(unsigned long addr)
 {
-	unsigned int skey;
+	unsigned char skey;
 
-	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0));
+	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
 	return skey;
 }
 
+static inline int page_reset_referenced(unsigned long addr)
+{
+	unsigned int ipm;
+
+	asm volatile(
+		"	rrbe	0,%1\n"
+		"	ipm	%0\n"
+		: "=d" (ipm) : "a" (addr) : "cc");
+	return !!(ipm & 0x20000000);
+}
+
+/* Bits int the storage key */
+#define _PAGE_CHANGED		0x02	/* HW changed bit		*/
+#define _PAGE_REFERENCED	0x04	/* HW referenced bit		*/
+#define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
+#define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
+
+/*
+ * Test and clear dirty bit in storage key.
+ * We can't clear the changed bit atomically. This is a potential
+ * race against modification of the referenced bit. This function
+ * should therefore only be called if it is not mapped in any
+ * address space.
+ */
+#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
+static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped)
+{
+	unsigned char skey;
+
+	skey = page_get_storage_key(pfn << PAGE_SHIFT);
+	if (!(skey & _PAGE_CHANGED))
+		return 0;
+	page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped);
+	return 1;
+}
+
+/*
+ * Test and clear referenced bit in storage key.
+ */
+#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
+static inline int page_test_and_clear_young(unsigned long pfn)
+{
+	return page_reset_referenced(pfn << PAGE_SHIFT);
+}
+
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 763620ec7925..4ca4dd2b329a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -373,10 +373,6 @@ extern unsigned long VMALLOC_START;
 #define _ASCE_USER_BITS		(_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
 				 _ASCE_ALT_EVENT)
 
-/* Bits int the storage key */
-#define _PAGE_CHANGED    0x02          /* HW changed bit                   */
-#define _PAGE_REFERENCED 0x04          /* HW referenced bit                */
-
 /*
  * Page protection definitions.
  */
@@ -555,8 +551,6 @@ static inline void rcp_unlock(pte_t *ptep)
 #endif
 }
 
-/* forward declaration for SetPageUptodate in page-flags.h*/
-static inline void page_clear_dirty(struct page *page, int mapped);
 #include <linux/page-flags.h>
 
 static inline void ptep_rcp_copy(pte_t *ptep)
@@ -566,7 +560,7 @@ static inline void ptep_rcp_copy(pte_t *ptep)
 	unsigned int skey;
 	unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
 
-	skey = page_get_storage_key(page_to_phys(page));
+	skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT);
 	if (skey & _PAGE_CHANGED) {
 		set_bit_simple(RCP_GC_BIT, pgste);
 		set_bit_simple(KVM_UD_BIT, pgste);
@@ -760,6 +754,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
 {
 	int dirty;
 	unsigned long *pgste;
+	unsigned long pfn;
 	struct page *page;
 	unsigned int skey;
 
@@ -767,8 +762,9 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
 		return -EINVAL;
 	rcp_lock(ptep);
 	pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
-	page = virt_to_page(pte_val(*ptep));
-	skey = page_get_storage_key(page_to_phys(page));
+	pfn = pte_val(*ptep) >> PAGE_SHIFT;
+	page = pfn_to_page(pfn);
+	skey = page_get_storage_key(pfn);
 	if (skey & _PAGE_CHANGED) {
 		set_bit_simple(RCP_GC_BIT, pgste);
 		set_bit_simple(KVM_UD_BIT, pgste);
@@ -779,7 +775,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
 	}
 	dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
 	if (skey & _PAGE_CHANGED)
-		page_clear_dirty(page, 1);
+		page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1);
 	rcp_unlock(ptep);
 	return dirty;
 }
@@ -790,16 +786,16 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long physpage;
+	unsigned long pfn;
 	int young;
 	unsigned long *pgste;
 
 	if (!vma->vm_mm->context.has_pgste)
 		return 0;
-	physpage = pte_val(*ptep) & PAGE_MASK;
+	pfn = pte_val(*ptep) >> PAGE_SHIFT;
 	pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
 
-	young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
+	young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0);
 	rcp_lock(ptep);
 	if (young)
 		set_bit_simple(RCP_GR_BIT, pgste);
@@ -936,42 +932,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	__changed;							\
 })
 
-/*
- * Test and clear dirty bit in storage key.
- * We can't clear the changed bit atomically. This is a potential
- * race against modification of the referenced bit. This function
- * should therefore only be called if it is not mapped in any
- * address space.
- */
-#define __HAVE_ARCH_PAGE_TEST_DIRTY
-static inline int page_test_dirty(struct page *page)
-{
-	return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
-}
-
-#define __HAVE_ARCH_PAGE_CLEAR_DIRTY
-static inline void page_clear_dirty(struct page *page, int mapped)
-{
-	page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped);
-}
-
-/*
- * Test and clear referenced bit in storage key.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-static inline int page_test_and_clear_young(struct page *page)
-{
-	unsigned long physpage = page_to_phys(page);
-	int ccode;
-
-	asm volatile(
-		"	rrbe	0,%1\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (ccode) : "a" (physpage) : "cc" );
-	return ccode & 2;
-}
-
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b4bfe338ea0e..e9b8e5926bef 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -184,22 +184,18 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
-#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
-#define page_test_dirty(page)		(0)
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
+#define page_test_and_clear_dirty(pfn, mapped)	(0)
 #endif
 
-#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
-#define page_clear_dirty(page, mapped)	do { } while (0)
-#endif
-
-#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 #define pte_maybe_dirty(pte)		pte_dirty(pte)
 #else
 #define pte_maybe_dirty(pte)		(1)
 #endif
 
 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
-#define page_test_and_clear_young(page) (0)
+#define page_test_and_clear_young(pfn) (0)
 #endif
 
 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 811183de1ef5..79a6700b7162 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page)
 {
 #ifdef CONFIG_S390
 	if (!test_and_set_bit(PG_uptodate, &page->flags))
-		page_clear_dirty(page, 0);
+		page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0);
 #else
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,
diff --git a/mm/rmap.c b/mm/rmap.c
index 8da044a1db0f..522e4a93cadd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -719,7 +719,7 @@ int page_referenced(struct page *page,
 			unlock_page(page);
 	}
 out:
-	if (page_test_and_clear_young(page))
+	if (page_test_and_clear_young(page_to_pfn(page)))
 		referenced++;
 
 	return referenced;
@@ -785,10 +785,8 @@ int page_mkclean(struct page *page)
 		struct address_space *mapping = page_mapping(page);
 		if (mapping) {
 			ret = page_mkclean_file(mapping, page);
-			if (page_test_dirty(page)) {
-				page_clear_dirty(page, 1);
+			if (page_test_and_clear_dirty(page_to_pfn(page), 1))
 				ret = 1;
-			}
 		}
 	}
 
@@ -981,10 +979,9 @@ void page_remove_rmap(struct page *page)
 	 * not if it's in swapcache - there might be another pte slot
 	 * containing the swap entry, but page not yet written to swap.
 	 */
-	if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
-		page_clear_dirty(page, 1);
+	if ((!PageAnon(page) || PageSwapCache(page)) &&
+	    page_test_and_clear_dirty(page_to_pfn(page), 1))
 		set_page_dirty(page);
-	}
 	/*
 	 * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
 	 * and not charged by memcg for now.
-- 
cgit v1.2.3


From 586692a5a5fc5740c8a46abc0f2365495c2d7c5f Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Sun, 22 May 2011 22:10:22 -0700
Subject: watchdog: Disable watchdog when thresh is zero

This restores the previous behavior of softlock_thresh.

Currently, setting watchdog_thresh to zero causes the watchdog
kthreads to consume a lot of CPU.

In addition, the logic of proc_dowatchdog_thresh and
proc_dowatchdog_enabled has been factored into proc_dowatchdog.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1306127423-3347-3-git-send-email-msb@chromium.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20110517071018.GE22305@elte.hu>
---
 include/linux/nmi.h   |  5 +++--
 include/linux/sched.h |  1 -
 kernel/sysctl.c       | 12 ++++++++----
 kernel/watchdog.c     | 25 +++++++++----------------
 4 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index c536f8545f74..5317b8b2198f 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -47,9 +47,10 @@ static inline bool trigger_all_cpu_backtrace(void)
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(void);
 extern int watchdog_enabled;
+extern int watchdog_thresh;
 struct ctl_table;
-extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
-			void __user *, size_t *, loff_t *);
+extern int proc_dowatchdog(struct ctl_table *, int ,
+			   void __user *, size_t *, loff_t *);
 #endif
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 12211e1666e2..d8b2d0bec0d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -315,7 +315,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
-extern int softlockup_thresh;
 void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c0bb32414b17..3dd0c46fa3bb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -730,14 +730,16 @@ static struct ctl_table kern_table[] = {
 		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_dowatchdog_enabled,
+		.proc_handler   = proc_dowatchdog,
+		.extra1		= &zero,
+		.extra2		= &one,
 	},
 	{
 		.procname	= "watchdog_thresh",
-		.data		= &softlockup_thresh,
+		.data		= &watchdog_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dowatchdog_thresh,
+		.proc_handler	= proc_dowatchdog,
 		.extra1		= &neg_one,
 		.extra2		= &sixty,
 	},
@@ -755,7 +757,9 @@ static struct ctl_table kern_table[] = {
 		.data           = &watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_dowatchdog_enabled,
+		.proc_handler   = proc_dowatchdog,
+		.extra1		= &zero,
+		.extra2		= &one,
 	},
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index cf0e09f452e7..60301916f62e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -28,7 +28,7 @@
 #include <linux/perf_event.h>
 
 int watchdog_enabled = 1;
-int __read_mostly softlockup_thresh = 60;
+int __read_mostly watchdog_thresh = 60;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -105,12 +105,12 @@ static unsigned long get_timestamp(int this_cpu)
 static unsigned long get_sample_period(void)
 {
 	/*
-	 * convert softlockup_thresh from seconds to ns
+	 * convert watchdog_thresh from seconds to ns
 	 * the divide by 5 is to give hrtimer 5 chances to
 	 * increment before the hardlockup detector generates
 	 * a warning
 	 */
-	return softlockup_thresh * (NSEC_PER_SEC / 5);
+	return watchdog_thresh * (NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -182,7 +182,7 @@ static int is_softlockup(unsigned long touch_ts)
 	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
-	if (time_after(now, touch_ts + softlockup_thresh))
+	if (time_after(now, touch_ts + watchdog_thresh))
 		return now - touch_ts;
 
 	return 0;
@@ -501,19 +501,19 @@ static void watchdog_disable_all_cpus(void)
 /* sysctl functions */
 #ifdef CONFIG_SYSCTL
 /*
- * proc handler for /proc/sys/kernel/nmi_watchdog
+ * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
  */
 
-int proc_dowatchdog_enabled(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *length, loff_t *ppos)
+int proc_dowatchdog(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dointvec(table, write, buffer, length, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret || !write)
 		goto out;
 
-	if (watchdog_enabled)
+	if (watchdog_enabled && watchdog_thresh)
 		watchdog_enable_all_cpus();
 	else
 		watchdog_disable_all_cpus();
@@ -521,13 +521,6 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
 out:
 	return ret;
 }
-
-int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
 #endif /* CONFIG_SYSCTL */
 
 
-- 
cgit v1.2.3


From 4eec42f392043063d0f019640b4ccc2a45570002 Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Sun, 22 May 2011 22:10:23 -0700
Subject: watchdog: Change the default timeout and configure nmi watchdog
 period based on watchdog_thresh

Before the conversion of the NMI watchdog to perf event, the
watchdog timeout was 5 seconds. Now it is 60 seconds. For my
particular application, netbooks, 5 seconds was a better
timeout. With a short timeout, we catch faults earlier and are
able to send back a panic. With a 60 second timeout, the user is
unlikely to wait and will instead hit the power button, causing
us to lose the panic info.

This change configures the NMI period to watchdog_thresh and
sets the softlockup_thresh to watchdog_thresh * 2. In addition,
watchdog_thresh was reduced to 10 seconds as suggested by Ingo
Molnar.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1306127423-3347-4-git-send-email-msb@chromium.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20110517071642.GF22305@elte.hu>
---
 arch/x86/kernel/apic/hw_nmi.c |  4 ++--
 include/linux/nmi.h           |  2 +-
 kernel/watchdog.c             | 19 +++++++++++++++----
 3 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 5260fe91bcb6..d5e57db0f7be 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -19,9 +19,9 @@
 #include <linux/delay.h>
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(void)
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
-	return (u64)(cpu_khz) * 1000 * 60;
+	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
 }
 #endif
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 5317b8b2198f..2d304efc89df 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -45,7 +45,7 @@ static inline bool trigger_all_cpu_backtrace(void)
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
-u64 hw_nmi_get_sample_period(void);
+u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int watchdog_enabled;
 extern int watchdog_thresh;
 struct ctl_table;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 60301916f62e..6e63097fa73a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -28,7 +28,7 @@
 #include <linux/perf_event.h>
 
 int watchdog_enabled = 1;
-int __read_mostly watchdog_thresh = 60;
+int __read_mostly watchdog_thresh = 10;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -91,6 +91,17 @@ static int __init nosoftlockup_setup(char *str)
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
 
+/*
+ * Hard-lockup warnings should be triggered after just a few seconds. Soft-
+ * lockups can have false positives under extreme conditions. So we generally
+ * want a higher threshold for soft lockups than for hard lockups. So we couple
+ * the thresholds with a factor: we make the soft threshold twice the amount of
+ * time the hard threshold is.
+ */
+static int get_softlockup_thresh()
+{
+	return watchdog_thresh * 2;
+}
 
 /*
  * Returns seconds, approximately.  We don't need nanosecond
@@ -110,7 +121,7 @@ static unsigned long get_sample_period(void)
 	 * increment before the hardlockup detector generates
 	 * a warning
 	 */
-	return watchdog_thresh * (NSEC_PER_SEC / 5);
+	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -182,7 +193,7 @@ static int is_softlockup(unsigned long touch_ts)
 	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
-	if (time_after(now, touch_ts + watchdog_thresh))
+	if (time_after(now, touch_ts + get_softlockup_thresh()))
 		return now - touch_ts;
 
 	return 0;
@@ -359,7 +370,7 @@ static int watchdog_nmi_enable(int cpu)
 
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
-	wd_attr->sample_period = hw_nmi_get_sample_period();
+	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
-- 
cgit v1.2.3


From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 16:18:50 +0200
Subject: timerfd: Manage cancelable timers in timerfd

Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the
timer interrupt. Yes, I did not think about it, because the solution
was so elegant. I didn't like the extra list in timerfd when it was
proposed some time ago, but with a rcu based list the list walk it's
less horrible than the original global lock, which was held over the
list iteration.

Requested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 fs/timerfd.c            | 105 ++++++++++++++++++++++++++++++++++--------------
 include/linux/hrtimer.h |   6 ++-
 include/linux/time.h    |   6 ---
 include/linux/timerfd.h |   4 +-
 kernel/hrtimer.c        |  94 +++++++++++++++----------------------------
 5 files changed, 113 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 7e14c9e7c4ee..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/timerfd.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
 
 struct timerfd_ctx {
 	struct hrtimer tmr;
@@ -31,9 +32,14 @@ struct timerfd_ctx {
 	u64 ticks;
 	int expired;
 	int clockid;
+	struct rcu_head rcu;
+	struct list_head clist;
 	bool might_cancel;
 };
 
+static LIST_HEAD(cancel_list);
+static DEFINE_SPINLOCK(cancel_lock);
+
 /*
  * This gets called when the timer event triggers. We set the "expired"
  * flag, but we do not re-arm the timer (in case it's necessary,
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
 	return HRTIMER_NORESTART;
 }
 
-static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+/*
+ * Called when the clock was set to cancel the timers in the cancel
+ * list.
+ */
+void timerfd_clock_was_set(void)
 {
-	ktime_t remaining;
+	ktime_t moffs = ktime_get_monotonic_offset();
+	struct timerfd_ctx *ctx;
+	unsigned long flags;
 
-	remaining = hrtimer_expires_remaining(&ctx->tmr);
-	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+		if (!ctx->might_cancel)
+			continue;
+		spin_lock_irqsave(&ctx->wqh.lock, flags);
+		if (ctx->moffs.tv64 != moffs.tv64) {
+			ctx->moffs.tv64 = KTIME_MAX;
+			wake_up_locked(&ctx->wqh);
+		}
+		spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+	}
+	rcu_read_unlock();
 }
 
-static bool timerfd_canceled(struct timerfd_ctx *ctx)
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
 {
-	ktime_t moffs;
+	if (ctx->might_cancel) {
+		ctx->might_cancel = false;
+		spin_lock(&cancel_lock);
+		list_del_rcu(&ctx->clist);
+		spin_unlock(&cancel_lock);
+	}
+}
 
-	if (!ctx->might_cancel)
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+	if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
 		return false;
+	ctx->moffs = ktime_get_monotonic_offset();
+	return true;
+}
 
-	moffs = ktime_get_monotonic_offset();
+static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+{
+	if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
+	    (flags & TFD_TIMER_CANCEL_ON_SET)) {
+		if (!ctx->might_cancel) {
+			ctx->might_cancel = true;
+			spin_lock(&cancel_lock);
+			list_add_rcu(&ctx->clist, &cancel_list);
+			spin_unlock(&cancel_lock);
+		}
+	} else if (ctx->might_cancel) {
+		timerfd_remove_cancel(ctx);
+	}
+}
 
-	if (moffs.tv64 == ctx->moffs.tv64)
-		return false;
+static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+{
+	ktime_t remaining;
 
-	ctx->moffs = moffs;
-	return true;
+	remaining = hrtimer_expires_remaining(&ctx->tmr);
+	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 	htmode = (flags & TFD_TIMER_ABSTIME) ?
 		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 
-	ctx->might_cancel = false;
-	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
-	    (flags & TFD_TIMER_CANCELON_SET)) {
-		clockid = CLOCK_REALTIME_COS;
-		ctx->might_cancel = true;
-	}
-
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
 	ctx->ticks = 0;
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
 {
 	struct timerfd_ctx *ctx = file->private_data;
 
+	timerfd_remove_cancel(ctx);
 	hrtimer_cancel(&ctx->tmr);
-	kfree(ctx);
+	kfree_rcu(ctx, rcu);
 	return 0;
 }
 
@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
 
+	/*
+	 * If clock has changed, we do not care about the
+	 * ticks and we do not rearm the timer. Userspace must
+	 * reevaluate anyway.
+	 */
+	if (timerfd_canceled(ctx)) {
+		ctx->ticks = 0;
+		ctx->expired = 0;
+		res = -ECANCELED;
+	}
+
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
 
-		/*
-		 * If clock has changed, we do not care about the
-		 * ticks and we do not rearm the timer. Userspace must
-		 * reevaluate anyway.
-		 */
-		if (timerfd_canceled(ctx)) {
-			ticks = 0;
-			ctx->expired = 0;
-			res = -ECANCELED;
-		}
-
 		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		return PTR_ERR(file);
 	ctx = file->private_data;
 
+	timerfd_setup_cancel(ctx, flags);
+
 	/*
 	 * We need to stop the existing timer before reprogramming
 	 * it to the new values.
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index eda4ccde0730..925c8c01db7b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,7 +155,6 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_BOOTTIME,
-	HRTIMER_BASE_REALTIME_COS,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 #endif
 
 extern void clock_was_set(void);
+#ifdef CONFIG_TIMERFD
+extern void timerfd_clock_was_set(void);
+#else
+static inline void timerfd_clock_was_set(void) { }
+#endif
 extern void hrtimers_resume(void);
 
 extern ktime_t ktime_get(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index a9242773eb24..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,12 +302,6 @@ struct itimerval {
  * The IDs of various hardware clocks:
  */
 #define CLOCK_SGI_CYCLE			10
-
-#ifdef __KERNEL__
-/* This clock is not exposed to user space */
-#define CLOCK_REALTIME_COS		15
-#endif
-
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index e9571fc8f1a0..d3b57fa12225 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,7 +19,7 @@
  * shared O_* flags.
  */
 #define TFD_TIMER_ABSTIME (1 << 0)
-#define TFD_TIMER_CANCELON_SET (1 << 1)
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
@@ -27,6 +27,6 @@
 /* Flags for timerfd_create.  */
 #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
 /* Flags for timerfd_settime.  */
-#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
 
 #endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index eabcbd781433..26dd32f9f6b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
-		{
-			.index = CLOCK_REALTIME_COS,
-			.get_time = &ktime_get_real,
-			.resolution = KTIME_LOW_RES,
-		},
 	}
 };
 
@@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
-	[CLOCK_REALTIME_COS]	= HRTIMER_BASE_REALTIME_COS,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
-	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
 }
 
 /*
@@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
 	trace_hrtimer_cancel(timer);
 }
 
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
-
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 
-static void retrigger_next_event(void *arg);
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+	struct timespec realtime_offset, xtim, wtm, sleep;
+
+	if (!hrtimer_hres_active())
+		return;
+
+	/* Optimized out for !HIGH_RES */
+	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+
+	/* Adjust CLOCK_REALTIME offset */
+	raw_spin_lock(&base->lock);
+	base->clock_base[HRTIMER_BASE_REALTIME].offset =
+		timespec_to_ktime(realtime_offset);
+	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+		timespec_to_ktime(sleep);
+
+	hrtimer_force_reprogram(base, 0);
+	raw_spin_unlock(&base->lock);
+}
 
 /*
  * Switch to high resolution mode
@@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 	return 0;
 }
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+static inline void retrigger_next_event(void *arg) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
-/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
-	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
-	struct timespec realtime_offset, xtim, wtm, sleep;
-
-	if (!hrtimer_hres_active()) {
-		raw_spin_lock(&base->lock);
-		hrtimer_expire_cancelable(base);
-		raw_spin_unlock(&base->lock);
-		return;
-	}
-
-	/* Optimized out for !HIGH_RES */
-	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
-	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
-	/* Adjust CLOCK_REALTIME offset */
-	raw_spin_lock(&base->lock);
-	base->clock_base[HRTIMER_BASE_REALTIME].offset =
-		timespec_to_ktime(realtime_offset);
-	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
-		timespec_to_ktime(sleep);
-	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
-		timespec_to_ktime(realtime_offset);
-
-	hrtimer_expire_cancelable(base);
-
-	hrtimer_force_reprogram(base, 0);
-	raw_spin_unlock(&base->lock);
-}
-
 /*
  * Clock realtime was set
  *
@@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
  */
 void clock_was_set(void)
 {
+#ifdef CONFIG_HIGHRES_TIMERS
 	/* Retrigger the CPU local events everywhere */
 	on_each_cpu(retrigger_next_event, NULL, 1);
+#endif
+	timerfd_clock_was_set();
 }
 
 /*
@@ -777,6 +762,7 @@ void hrtimers_resume(void)
 		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
 
 	retrigger_next_event(NULL);
+	timerfd_clock_was_set();
 }
 
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
 
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
-{
-	struct timerqueue_node *node;
-	struct hrtimer_clock_base *base;
-	ktime_t now = ktime_get_real();
-
-	base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
-
-	while ((node = timerqueue_getnext(&base->active))) {
-			struct hrtimer *timer;
-
-			timer = container_of(node, struct hrtimer, node);
-			__run_hrtimer(timer, &now);
-	}
-}
-
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
-- 
cgit v1.2.3


From f24444b01bf6c51c300fd3ffc73423383d747882 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 13:02:58 +0200
Subject: hrtimers: Make struct hrtimer_cpu_base layout less stupid

In the HIGHRES=y case we access the members at the end of struct
hrtimer_cpu_base first and then the one at the beginning. Move the
hrtimer data to front, so we have linear progressing access.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 925c8c01db7b..cc5f5f51db10 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -174,7 +174,6 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
-	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
@@ -184,6 +183,7 @@ struct hrtimer_cpu_base {
 	unsigned long			nr_hangs;
 	ktime_t				max_hang_time;
 #endif
+	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 };
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
-- 
cgit v1.2.3


From ab8177bc53e8ae3a3ba6d200ce2c2dae263f7ee5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 13:05:15 +0200
Subject: hrtimers: Avoid touching inactive timer bases

Instead of iterating over all possible timer bases avoid it by marking
the active bases in the cpu base.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h     |  7 +++++--
 include/linux/thread_info.h |  2 +-
 kernel/hrtimer.c            | 29 ++++++++++++++++++-----------
 kernel/posix-cpu-timers.c   |  4 ++--
 kernel/posix-timers.c       |  2 +-
 kernel/time/alarmtimer.c    |  4 ++--
 6 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc5f5f51db10..771c95802edc 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -143,7 +143,8 @@ struct hrtimer_sleeper {
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
-	clockid_t		index;
+	int			index;
+	clockid_t		clockid;
 	struct timerqueue_head	active;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
@@ -162,7 +163,7 @@ enum  hrtimer_base_type {
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
- * @clock_base:		array of clock bases for this cpu
+ * @active_bases:	Bitfield to mark bases with active timers
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @hres_active:	State of high resolution mode
@@ -171,9 +172,11 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @clock_base:		array of clock bases for this cpu
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
+	unsigned long			active_bases;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 20fc303947d3..8d03f079688c 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -29,7 +29,7 @@ struct restart_block {
 		} futex;
 		/* For nanosleep */
 		struct {
-			clockid_t index;
+			clockid_t clockid;
 			struct timespec __user *rmtp;
 #ifdef CONFIG_COMPAT
 			struct compat_timespec __user *compat_rmtp;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 26dd32f9f6b2..1b08f6d67f12 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 	.clock_base =
 	{
 		{
-			.index = CLOCK_REALTIME,
+			.index = HRTIMER_BASE_REALTIME,
+			.clockid = CLOCK_REALTIME,
 			.get_time = &ktime_get_real,
 			.resolution = KTIME_LOW_RES,
 		},
 		{
-			.index = CLOCK_MONOTONIC,
+			.index = HRTIMER_BASE_MONOTONIC,
+			.clockid = CLOCK_MONOTONIC,
 			.get_time = &ktime_get,
 			.resolution = KTIME_LOW_RES,
 		},
 		{
-			.index = CLOCK_BOOTTIME,
+			.index = HRTIMER_BASE_BOOTTIME,
+			.clockid = CLOCK_BOOTTIME,
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
@@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 	struct hrtimer_cpu_base *new_cpu_base;
 	int this_cpu = smp_processor_id();
 	int cpu = hrtimer_get_target(this_cpu, pinned);
-	int basenum = hrtimer_clockid_to_base(base->index);
+	int basenum = base->index;
 
 again:
 	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -857,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 	debug_activate(timer);
 
 	timerqueue_add(&base->active, &timer->node);
+	base->cpu_base->active_bases |= 1 << base->index;
 
 	/*
 	 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
@@ -898,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer,
 #endif
 	}
 	timerqueue_del(&base->active, &timer->node);
+	if (!timerqueue_getnext(&base->active))
+		base->cpu_base->active_bases &= ~(1 << base->index);
 out:
 	timer->state = newstate;
 }
@@ -1235,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 void hrtimer_interrupt(struct clock_event_device *dev)
 {
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-	struct hrtimer_clock_base *base;
 	ktime_t expires_next, now, entry_time, delta;
 	int i, retries = 0;
 
@@ -1257,12 +1262,15 @@ retry:
 	 */
 	cpu_base->expires_next.tv64 = KTIME_MAX;
 
-	base = cpu_base->clock_base;
-
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		ktime_t basenow;
+		struct hrtimer_clock_base *base;
 		struct timerqueue_node *node;
+		ktime_t basenow;
+
+		if (!(cpu_base->active_bases & (1 << i)))
+			continue;
 
+		base = cpu_base->clock_base + i;
 		basenow = ktime_add(now, base->offset);
 
 		while ((node = timerqueue_getnext(&base->active))) {
@@ -1295,7 +1303,6 @@ retry:
 
 			__run_hrtimer(timer, &basenow);
 		}
-		base++;
 	}
 
 	/*
@@ -1526,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	struct timespec __user  *rmtp;
 	int ret = 0;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
+	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
 				HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
 
@@ -1578,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 
 	restart = &current_thread_info()->restart_block;
 	restart->fn = hrtimer_nanosleep_restart;
-	restart->nanosleep.index = t.timer.base->index;
+	restart->nanosleep.clockid = t.timer.base->clockid;
 	restart->nanosleep.rmtp = rmtp;
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 0791b13df7bf..58f405b581e7 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 			return -EFAULT;
 
 		restart_block->fn = posix_cpu_nsleep_restart;
-		restart_block->nanosleep.index = which_clock;
+		restart_block->nanosleep.clockid = which_clock;
 		restart_block->nanosleep.rmtp = rmtp;
 		restart_block->nanosleep.expires = timespec_to_ns(rqtp);
 	}
@@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 
 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
 {
-	clockid_t which_clock = restart_block->nanosleep.index;
+	clockid_t which_clock = restart_block->nanosleep.clockid;
 	struct timespec t;
 	struct itimerspec it;
 	int error;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e5498d7405c3..a1b5edf1bf92 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -1056,7 +1056,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
  */
 long clock_nanosleep_restart(struct restart_block *restart_block)
 {
-	clockid_t which_clock = restart_block->nanosleep.index;
+	clockid_t which_clock = restart_block->nanosleep.clockid;
 	struct k_clock *kc = clockid_to_kclock(which_clock);
 
 	if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index c6027fe9a4e6..2d966244ea60 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
  */
 static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
 {
-	enum  alarmtimer_type type = restart->nanosleep.index;
+	enum  alarmtimer_type type = restart->nanosleep.clockid;
 	ktime_t exp;
 	struct timespec __user  *rmtp;
 	struct alarm alarm;
@@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 
 	restart = &current_thread_info()->restart_block;
 	restart->fn = alarm_timer_nsleep_restart;
-	restart->nanosleep.index = type;
+	restart->nanosleep.clockid = type;
 	restart->nanosleep.expires = exp.tv64;
 	restart->nanosleep.rmtp = rmtp;
 	ret = -ERESTART_RESTARTBLOCK;
-- 
cgit v1.2.3


From 68fa61c026057a39d6ccb850aa8785043afbee02 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 May 2011 23:14:04 +0200
Subject: hrtimers: Reorder clock bases

The ordering of the clock bases is historical due to the
CLOCK_REALTIME and CLOCK_MONOTONIC constants. Now the hrtimer bases
have their own enumeration due to the gap between CLOCK_MONOTONIC and
CLOCK_BOOTTIME. So we can be more clever as most timers end up on the
CLOCK_MONOTONIC base due to the virtue of POSIX declaring that
relative CLOCK_REALTIME timers are not affected by time changes. In
desktop environments this is slowly changing as applications switch to
absolute timers, but I've observed empty CLOCK_REALTIME bases often
enough. There is no performance penalty or overhead when
CLOCK_REALTIME timers are active, but in case they are not we don't
skip over a full cache line.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/hrtimer.h |  2 +-
 kernel/hrtimer.c        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 771c95802edc..51932e5acf7c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -153,8 +153,8 @@ struct hrtimer_clock_base {
 };
 
 enum  hrtimer_base_type {
-	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
+	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_MAX_CLOCK_BASES,
 };
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1b08f6d67f12..c541ee527ecb 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -63,18 +63,18 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 
 	.clock_base =
 	{
-		{
-			.index = HRTIMER_BASE_REALTIME,
-			.clockid = CLOCK_REALTIME,
-			.get_time = &ktime_get_real,
-			.resolution = KTIME_LOW_RES,
-		},
 		{
 			.index = HRTIMER_BASE_MONOTONIC,
 			.clockid = CLOCK_MONOTONIC,
 			.get_time = &ktime_get,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = HRTIMER_BASE_REALTIME,
+			.clockid = CLOCK_REALTIME,
+			.get_time = &ktime_get_real,
+			.resolution = KTIME_LOW_RES,
+		},
 		{
 			.index = HRTIMER_BASE_BOOTTIME,
 			.clockid = CLOCK_BOOTTIME,
-- 
cgit v1.2.3


From 901025d2f3194b4868980c8ba80df4cc0aa1282c Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 2 Mar 2011 14:20:04 -0600
Subject: dlm: make plock operation killable

Allow processes blocked on plock requests to be interrupted
when they are killed.  This leaves the problem of cleaning
up the lock state in userspace.  This has three parts:

1. Add a flag to unlock operations sent to userspace
indicating the file is being closed.  Userspace will
then look for and clear any waiting plock operations that
were abandoned by an interrupted process.

2. Queue an unlock-close operation (like in 1) to clean up
userspace from an interrupted plock request.  This is needed
because the vfs will not send a cleanup-unlock if it sees no
locks on the file, which it won't if the interrupted operation
was the only one.

3. Do not use replies from userspace for unlock-close operations
because they are unnecessary (they are just cleaning up for the
process which did not make an unlock call).  This also simplifies
the new unlock-close generated from point 2.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/plock.c            | 65 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/dlm_plock.h |  6 +++--
 2 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
 	wake_up(&send_wq);
 }
 
+/* If a process was killed while waiting for the only plock on a file,
+   locks_remove_posix will not see any lock on the file so it won't
+   send an unlock-close to us to pass on to userspace to clean up the
+   abandoned waiter.  So, we have to insert the unlock-close when the
+   lock call is interrupted. */
+
+static void do_unlock_close(struct dlm_ls *ls, u64 number,
+			    struct file *file, struct file_lock *fl)
+{
+	struct plock_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_NOFS);
+	if (!op)
+		return;
+
+	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
+	op->info.pid		= fl->fl_pid;
+	op->info.fsid		= ls->ls_global_id;
+	op->info.number		= number;
+	op->info.start		= 0;
+	op->info.end		= OFFSET_MAX;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+		op->info.owner	= (__u64) fl->fl_pid;
+	else
+		op->info.owner	= (__u64)(long) fl->fl_owner;
+
+	op->info.flags |= DLM_PLOCK_FL_CLOSE;
+	send_op(op);
+}
+
 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 		   int cmd, struct file_lock *fl)
 {
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 
 	send_op(op);
 
-	if (xop->callback == NULL)
-		wait_event(recv_wq, (op->done != 0));
-	else {
+	if (xop->callback == NULL) {
+		rv = wait_event_killable(recv_wq, (op->done != 0));
+		if (rv == -ERESTARTSYS) {
+			log_debug(ls, "dlm_posix_lock: wait killed %llx",
+				  (unsigned long long)number);
+			spin_lock(&ops_lock);
+			list_del(&op->list);
+			spin_unlock(&ops_lock);
+			kfree(xop);
+			do_unlock_close(ls, number, file, fl);
+			goto out;
+		}
+	} else {
 		rv = FILE_LOCK_DEFERRED;
 		goto out;
 	}
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 	else
 		op->info.owner	= (__u64)(long) fl->fl_owner;
 
+	if (fl->fl_flags & FL_CLOSE) {
+		op->info.flags |= DLM_PLOCK_FL_CLOSE;
+		send_op(op);
+		rv = 0;
+		goto out;
+	}
+
 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
 
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
 	spin_lock(&ops_lock);
 	if (!list_empty(&send_list)) {
 		op = list_entry(send_list.next, struct plock_op, list);
-		list_move(&op->list, &recv_list);
+		if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+			list_del(&op->list);
+		else
+			list_move(&op->list, &recv_list);
 		memcpy(&info, &op->info, sizeof(info));
 	}
 	spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
 	if (!op)
 		return -EAGAIN;
 
+	/* there is no need to get a reply from userspace for unlocks
+	   that were generated by the vfs cleaning up for a close
+	   (the process did not make an unlock call). */
+
+	if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+		kfree(op);
+
 	if (copy_to_user(u, &info, sizeof(info)))
 		return -EFAULT;
 	return sizeof(info);
diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h
index 2dd21243104f..3b1cc1be419f 100644
--- a/include/linux/dlm_plock.h
+++ b/include/linux/dlm_plock.h
@@ -14,7 +14,7 @@
 #define DLM_PLOCK_MISC_NAME		"dlm_plock"
 
 #define DLM_PLOCK_VERSION_MAJOR	1
-#define DLM_PLOCK_VERSION_MINOR	1
+#define DLM_PLOCK_VERSION_MINOR	2
 #define DLM_PLOCK_VERSION_PATCH	0
 
 enum {
@@ -23,12 +23,14 @@ enum {
 	DLM_PLOCK_OP_GET,
 };
 
+#define DLM_PLOCK_FL_CLOSE 1
+
 struct dlm_plock_info {
 	__u32 version[3];
 	__u8 optype;
 	__u8 ex;
 	__u8 wait;
-	__u8 pad;
+	__u8 flags;
 	__u32 pid;
 	__s32 nodeid;
 	__s32 rv;
-- 
cgit v1.2.3


From 442c8176d2efa468577738e3a99a6e051f6e8e55 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 14:06:52 +0100
Subject: clocksource: add common mmio clocksource

Add a generic mmio clocksource, covering both 32-bit and 16-bit register
access sizes, for up or down counters.  This can be used to easily
create clocksources for simple counter-based implementations.

Cc: Alessandro Rubini <rubini@unipv.it>
Cc: Colin Cross <ccross@android.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Erik Gilling <konkers@android.com>
Acked-by: "Hans J. Koch" <hjk@hansjkoch.de>
Cc: Imre Kaloz <kaloz@openwrt.org>
Cc: Krzysztof Halasa <khc@pm.waw.pl>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Lennert Buytenhek <kernel@wantstofly.org>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Cc: linux-omap@vger.kernel.org
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Cc: Olof Johansson <olof@lixom.net>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Viresh Kumar <viresh.kumar@st.com>
Cc: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/clocksource/Kconfig  |  3 ++
 drivers/clocksource/Makefile |  1 +
 drivers/clocksource/mmio.c   | 73 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clocksource.h  |  8 +++++
 4 files changed, 85 insertions(+)
 create mode 100644 drivers/clocksource/mmio.c

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 110aeeb52f9a..96c921910469 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,2 +1,5 @@
 config CLKSRC_I8253
 	bool
+
+config CLKSRC_MMIO
+	bool
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index cfb6383b543a..b995942a5060 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
 obj-$(CONFIG_CLKSRC_I8253)	+= i8253.o
+obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
new file mode 100644
index 000000000000..c0e25125a55e
--- /dev/null
+++ b/drivers/clocksource/mmio.c
@@ -0,0 +1,73 @@
+/*
+ * Generic MMIO clocksource support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+struct clocksource_mmio {
+	void __iomem *reg;
+	struct clocksource clksrc;
+};
+
+static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c)
+{
+	return container_of(c, struct clocksource_mmio, clksrc);
+}
+
+cycle_t clocksource_mmio_readl_up(struct clocksource *c)
+{
+	return readl_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readl_down(struct clocksource *c)
+{
+	return ~readl_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readw_up(struct clocksource *c)
+{
+	return readw_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+cycle_t clocksource_mmio_readw_down(struct clocksource *c)
+{
+	return ~(unsigned)readw_relaxed(to_mmio_clksrc(c)->reg);
+}
+
+/**
+ * clocksource_mmio_init - Initialize a simple mmio based clocksource
+ * @base:	Virtual address of the clock readout register
+ * @name:	Name of the clocksource
+ * @hz:		Frequency of the clocksource in Hz
+ * @rating:	Rating of the clocksource
+ * @bits:	Number of valid bits
+ * @read:	One of clocksource_mmio_read*() above
+ */
+int __init clocksource_mmio_init(void __iomem *base, const char *name,
+	unsigned long hz, int rating, unsigned bits,
+	cycle_t (*read)(struct clocksource *))
+{
+	struct clocksource_mmio *cs;
+
+	if (bits > 32 || bits < 16)
+		return -EINVAL;
+
+	cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
+	if (!cs)
+		return -ENOMEM;
+
+	cs->reg = base;
+	cs->clksrc.name = name;
+	cs->clksrc.rating = rating;
+	cs->clksrc.read = read;
+	cs->clksrc.mask = CLOCKSOURCE_MASK(bits);
+	cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+	return clocksource_register_hz(&cs->clksrc, hz);
+}
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c918fbd33ee5..d4646b48dc4a 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -337,6 +337,14 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern cycle_t clocksource_mmio_readl_up(struct clocksource *);
+extern cycle_t clocksource_mmio_readl_down(struct clocksource *);
+extern cycle_t clocksource_mmio_readw_up(struct clocksource *);
+extern cycle_t clocksource_mmio_readw_down(struct clocksource *);
+
+extern int clocksource_mmio_init(void __iomem *, const char *,
+	unsigned long, int, unsigned, cycle_t (*)(struct clocksource *));
+
 extern int clocksource_i8253_init(void);
 
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From d94ba80ebbea17f036cecb104398fbcd788aa742 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 22 Apr 2011 12:03:08 +0200
Subject: ptp: Added a brand new class driver for ptp clocks.

This patch adds an infrastructure for hardware clocks that implement
IEEE 1588, the Precision Time Protocol (PTP). A class driver offers a
registration method to particular hardware clock drivers. Each clock is
presented as a standard POSIX clock.

The ancillary clock features are exposed in two different ways, via
the sysfs and by a character device.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 Documentation/ABI/testing/sysfs-ptp |  98 ++++++++++
 Documentation/ptp/ptp.txt           |  89 +++++++++
 Documentation/ptp/testptp.c         | 381 ++++++++++++++++++++++++++++++++++++
 Documentation/ptp/testptp.mk        |  33 ++++
 drivers/Kconfig                     |   2 +
 drivers/Makefile                    |   1 +
 drivers/ptp/Kconfig                 |  30 +++
 drivers/ptp/Makefile                |   6 +
 drivers/ptp/ptp_chardev.c           | 159 +++++++++++++++
 drivers/ptp/ptp_clock.c             | 343 ++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_private.h           |  92 +++++++++
 drivers/ptp/ptp_sysfs.c             | 230 ++++++++++++++++++++++
 include/linux/Kbuild                |   1 +
 include/linux/ptp_classify.h        |   7 +
 include/linux/ptp_clock.h           |  84 ++++++++
 include/linux/ptp_clock_kernel.h    | 139 +++++++++++++
 16 files changed, 1695 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-ptp
 create mode 100644 Documentation/ptp/ptp.txt
 create mode 100644 Documentation/ptp/testptp.c
 create mode 100644 Documentation/ptp/testptp.mk
 create mode 100644 drivers/ptp/Kconfig
 create mode 100644 drivers/ptp/Makefile
 create mode 100644 drivers/ptp/ptp_chardev.c
 create mode 100644 drivers/ptp/ptp_clock.c
 create mode 100644 drivers/ptp/ptp_private.h
 create mode 100644 drivers/ptp/ptp_sysfs.c
 create mode 100644 include/linux/ptp_clock.h
 create mode 100644 include/linux/ptp_clock_kernel.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
new file mode 100644
index 000000000000..d40d2b550502
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -0,0 +1,98 @@
+What:		/sys/class/ptp/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains files and directories
+		providing a standardized interface to the ancillary
+		features of PTP hardware clocks.
+
+What:		/sys/class/ptp/ptpN/
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This directory contains the attributes of the Nth PTP
+		hardware clock registered into the PTP class driver
+		subsystem.
+
+What:		/sys/class/ptp/ptpN/clock_name
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the name of the PTP hardware clock
+		as a human readable string.
+
+What:		/sys/class/ptp/ptpN/max_adjustment
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the PTP hardware clock's maximum
+		frequency adjustment value (a positive integer) in
+		parts per billion.
+
+What:		/sys/class/ptp/ptpN/n_alarms
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of periodic or one shot
+		alarms offer by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_external_timestamps
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of external timestamp
+		channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/n_periodic_outputs
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file contains the number of programmable periodic
+		output channels offered by the PTP hardware clock.
+
+What:		/sys/class/ptp/ptpN/pps_avaiable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file indicates whether the PTP hardware clock
+		supports a Pulse Per Second to the host CPU. Reading
+		"1" means that the PPS is supported, while "0" means
+		not supported.
+
+What:		/sys/class/ptp/ptpN/extts_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables external
+		timestamps. To enable external timestamps, write the
+		channel index followed by a "1" into the file.
+		To disable external timestamps, write the channel
+		index followed by a "0" into the file.
+
+What:		/sys/class/ptp/ptpN/fifo
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This file provides timestamps on external events, in
+		the form of three integers: channel index, seconds,
+		and nanoseconds.
+
+What:		/sys/class/ptp/ptpN/period
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables periodic
+		outputs. To enable a periodic output, write five
+		integers into the file: channel index, start time
+		seconds, start time nanoseconds, period seconds, and
+		period nanoseconds. To disable a periodic output, set
+		all the seconds and nanoseconds values to zero.
+
+What:		/sys/class/ptp/ptpN/pps_enable
+Date:		September 2010
+Contact:	Richard Cochran <richardcochran@gmail.com>
+Description:
+		This write-only file enables or disables delivery of
+		PPS events to the Linux PPS subsystem. To enable PPS
+		events, write a "1" into the file. To disable events,
+		write a "0" into the file.
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
new file mode 100644
index 000000000000..ae8fef86b832
--- /dev/null
+++ b/Documentation/ptp/ptp.txt
@@ -0,0 +1,89 @@
+
+* PTP hardware clock infrastructure for Linux
+
+  This patch set introduces support for IEEE 1588 PTP clocks in
+  Linux. Together with the SO_TIMESTAMPING socket options, this
+  presents a standardized method for developing PTP user space
+  programs, synchronizing Linux with external clocks, and using the
+  ancillary features of PTP hardware clocks.
+
+  A new class driver exports a kernel interface for specific clock
+  drivers and a user space interface. The infrastructure supports a
+  complete set of PTP hardware clock functionality.
+
+  + Basic clock operations
+    - Set time
+    - Get time
+    - Shift the clock by a given offset atomically
+    - Adjust clock frequency
+
+  + Ancillary clock features
+    - One short or periodic alarms, with signal delivery to user program
+    - Time stamp external events
+    - Period output signals configurable from user space
+    - Synchronization of the Linux system time via the PPS subsystem
+
+** PTP hardware clock kernel API
+
+   A PTP clock driver registers itself with the class driver. The
+   class driver handles all of the dealings with user space. The
+   author of a clock driver need only implement the details of
+   programming the clock hardware. The clock driver notifies the class
+   driver of asynchronous events (alarms and external time stamps) via
+   a simple message passing interface.
+
+   The class driver supports multiple PTP clock drivers. In normal use
+   cases, only one PTP clock is needed. However, for testing and
+   development, it can be useful to have more than one clock in a
+   single system, in order to allow performance comparisons.
+
+** PTP hardware clock user space API
+
+   The class driver also creates a character device for each
+   registered clock. User space can use an open file descriptor from
+   the character device as a POSIX clock id and may call
+   clock_gettime, clock_settime, and clock_adjtime.  These calls
+   implement the basic clock operations.
+
+   User space programs may control the clock using standardized
+   ioctls. A program may query, enable, configure, and disable the
+   ancillary clock features. User space can receive time stamped
+   events via blocking read() and poll(). One shot and periodic
+   signals may be configured via the POSIX timer_settime() system
+   call.
+
+** Writing clock drivers
+
+   Clock drivers include include/linux/ptp_clock_kernel.h and register
+   themselves by presenting a 'struct ptp_clock_info' to the
+   registration method. Clock drivers must implement all of the
+   functions in the interface. If a clock does not offer a particular
+   ancillary feature, then the driver should just return -EOPNOTSUPP
+   from those functions.
+
+   Drivers must ensure that all of the methods in interface are
+   reentrant. Since most hardware implementations treat the time value
+   as a 64 bit integer accessed as two 32 bit registers, drivers
+   should use spin_lock_irqsave/spin_unlock_irqrestore to protect
+   against concurrent access. This locking cannot be accomplished in
+   class driver, since the lock may also be needed by the clock
+   driver's interrupt service routine.
+
+** Supported hardware
+
+   + Freescale eTSEC gianfar
+     - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
+     - 2 Alarm registers (optional interrupt)
+     - 3 Periodic signals (optional interrupt)
+
+   + National DP83640
+     - 6 GPIOs programmable as inputs or outputs
+     - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
+       used as general inputs or outputs
+     - GPIO inputs can time stamp external triggers
+     - GPIO outputs can produce periodic signals
+     - 1 interrupt pin
+
+   + Intel IXP465
+     - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
+     - Target Time (optional interrupt)
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
new file mode 100644
index 000000000000..f59ded066108
--- /dev/null
+++ b/Documentation/ptp/testptp.c
@@ -0,0 +1,381 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* When glibc offers the syscall, this will go away. */
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+#define FD_TO_CLOCKID(fd)	((~(clockid_t) (fd) << 3) | CLOCKFD)
+
+	return FD_TO_CLOCKID(fd);
+}
+
+static void handle_alarm(int s)
+{
+	printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+	struct sigaction action;
+	sigset_t mask;
+
+	/* Unblock the signal. */
+	sigemptyset(&mask);
+	sigaddset(&mask, signum);
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	/* Install the signal handler. */
+	action.sa_handler = handler;
+	action.sa_flags = 0;
+	sigemptyset(&action.sa_mask);
+	sigaction(signum, &action, NULL);
+
+	return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 * Instead of calculating either one of
+	 *
+	 *    scaled_ppm = (ppb / 1000) << 16  [1]
+	 *    scaled_ppm = (ppb << 16) / 1000  [2]
+	 *
+	 * we simply use double precision math, in order to avoid the
+	 * truncation in [1] and the possible overflow in [2].
+	 */
+	return (long) (ppb * 65.536);
+}
+
+static void usage(char *progname)
+{
+	fprintf(stderr,
+		"usage: %s [options]\n"
+		" -a val     request a one-shot alarm after 'val' seconds\n"
+		" -A val     request a periodic alarm every 'val' seconds\n"
+		" -c         query the ptp clock's capabilities\n"
+		" -d name    device to open\n"
+		" -e val     read 'val' external time stamp events\n"
+		" -f val     adjust the ptp clock frequency by 'val' ppb\n"
+		" -g         get the ptp clock time\n"
+		" -h         prints this message\n"
+		" -p val     enable output with a period of 'val' nanoseconds\n"
+		" -P val     enable or disable (val=1|0) the system clock PPS\n"
+		" -s         set the ptp clock time from the system time\n"
+		" -S         set the system time from the ptp clock time\n"
+		" -t val     shift the ptp clock time by 'val' seconds\n",
+		progname);
+}
+
+int main(int argc, char *argv[])
+{
+	struct ptp_clock_caps caps;
+	struct ptp_extts_event event;
+	struct ptp_extts_request extts_request;
+	struct ptp_perout_request perout_request;
+	struct timespec ts;
+	struct timex tx;
+
+	static timer_t timerid;
+	struct itimerspec timeout;
+	struct sigevent sigevent;
+
+	char *progname;
+	int c, cnt, fd;
+
+	char *device = DEVICE;
+	clockid_t clkid;
+	int adjfreq = 0x7fffffff;
+	int adjtime = 0;
+	int capabilities = 0;
+	int extts = 0;
+	int gettime = 0;
+	int oneshot = 0;
+	int periodic = 0;
+	int perout = -1;
+	int pps = -1;
+	int settime = 0;
+
+	progname = strrchr(argv[0], '/');
+	progname = progname ? 1+progname : argv[0];
+	while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) {
+		switch (c) {
+		case 'a':
+			oneshot = atoi(optarg);
+			break;
+		case 'A':
+			periodic = atoi(optarg);
+			break;
+		case 'c':
+			capabilities = 1;
+			break;
+		case 'd':
+			device = optarg;
+			break;
+		case 'e':
+			extts = atoi(optarg);
+			break;
+		case 'f':
+			adjfreq = atoi(optarg);
+			break;
+		case 'g':
+			gettime = 1;
+			break;
+		case 'p':
+			perout = atoi(optarg);
+			break;
+		case 'P':
+			pps = atoi(optarg);
+			break;
+		case 's':
+			settime = 1;
+			break;
+		case 'S':
+			settime = 2;
+			break;
+		case 't':
+			adjtime = atoi(optarg);
+			break;
+		case 'h':
+			usage(progname);
+			return 0;
+		case '?':
+		default:
+			usage(progname);
+			return -1;
+		}
+	}
+
+	fd = open(device, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+		return -1;
+	}
+
+	clkid = get_clockid(fd);
+	if (CLOCK_INVALID == clkid) {
+		fprintf(stderr, "failed to read clock id\n");
+		return -1;
+	}
+
+	if (capabilities) {
+		if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+			perror("PTP_CLOCK_GETCAPS");
+		} else {
+			printf("capabilities:\n"
+			       "  %d maximum frequency adjustment (ppb)\n"
+			       "  %d programmable alarms\n"
+			       "  %d external time stamp channels\n"
+			       "  %d programmable periodic signals\n"
+			       "  %d pulse per second\n",
+			       caps.max_adj,
+			       caps.n_alarm,
+			       caps.n_ext_ts,
+			       caps.n_per_out,
+			       caps.pps);
+		}
+	}
+
+	if (0x7fffffff != adjfreq) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppb_to_scaled_ppm(adjfreq);
+		if (clock_adjtime(clkid, &tx)) {
+			perror("clock_adjtime");
+		} else {
+			puts("frequency adjustment okay");
+		}
+	}
+
+	if (adjtime) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_SETOFFSET;
+		tx.time.tv_sec = adjtime;
+		tx.time.tv_usec = 0;
+		if (clock_adjtime(clkid, &tx) < 0) {
+			perror("clock_adjtime");
+		} else {
+			puts("time shift okay");
+		}
+	}
+
+	if (gettime) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+		} else {
+			printf("clock time: %ld.%09ld or %s",
+			       ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+		}
+	}
+
+	if (settime == 1) {
+		clock_gettime(CLOCK_REALTIME, &ts);
+		if (clock_settime(clkid, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (settime == 2) {
+		clock_gettime(clkid, &ts);
+		if (clock_settime(CLOCK_REALTIME, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (extts) {
+		memset(&extts_request, 0, sizeof(extts_request));
+		extts_request.index = 0;
+		extts_request.flags = PTP_ENABLE_FEATURE;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+			extts = 0;
+		} else {
+			puts("external time stamp request okay");
+		}
+		for (; extts; extts--) {
+			cnt = read(fd, &event, sizeof(event));
+			if (cnt != sizeof(event)) {
+				perror("read");
+				break;
+			}
+			printf("event index %u at %lld.%09u\n", event.index,
+			       event.t.sec, event.t.nsec);
+			fflush(stdout);
+		}
+		/* Disable the feature again. */
+		extts_request.flags = 0;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+		}
+	}
+
+	if (oneshot) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_value.tv_sec = oneshot;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		pause();
+		timer_delete(timerid);
+	}
+
+	if (periodic) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_interval.tv_sec = periodic;
+		timeout.it_value.tv_sec = periodic;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		while (1) {
+			pause();
+		}
+		timer_delete(timerid);
+	}
+
+	if (perout >= 0) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+			return -1;
+		}
+		memset(&perout_request, 0, sizeof(perout_request));
+		perout_request.index = 0;
+		perout_request.start.sec = ts.tv_sec + 2;
+		perout_request.start.nsec = 0;
+		perout_request.period.sec = 0;
+		perout_request.period.nsec = perout;
+		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+			perror("PTP_PEROUT_REQUEST");
+		} else {
+			puts("periodic output request okay");
+		}
+	}
+
+	if (pps != -1) {
+		int enable = pps ? 1 : 0;
+		if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+			perror("PTP_ENABLE_PPS");
+		} else {
+			puts("pps for system time request okay");
+		}
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 000000000000..4ef2d9755421
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC        = $(CROSS_COMPILE)gcc
+INC       = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS    = -Wall $(INC)
+LDLIBS    = -lrt
+PROGS     = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+	rm -f testptp.o
+
+distclean: clean
+	rm -f $(PROGS)
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 61631edfecc2..3bb154d8c8cc 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -54,6 +54,8 @@ source "drivers/spi/Kconfig"
 
 source "drivers/pps/Kconfig"
 
+source "drivers/ptp/Kconfig"
+
 source "drivers/gpio/Kconfig"
 
 source "drivers/w1/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index a29527f4ded6..3c2960128a7f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_I2O)		+= message/
 obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-y				+= i2c/ media/
 obj-$(CONFIG_PPS)		+= pps/
+obj-$(CONFIG_PTP_1588_CLOCK)	+= ptp/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_POWER_SUPPLY)	+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
new file mode 100644
index 000000000000..70d4bb1cbdab
--- /dev/null
+++ b/drivers/ptp/Kconfig
@@ -0,0 +1,30 @@
+#
+# PTP clock support configuration
+#
+
+menu "PTP clock support"
+
+comment "Enable Device Drivers -> PPS to see the PTP clock options."
+	depends on PPS=n
+
+config PTP_1588_CLOCK
+	tristate "PTP clock support"
+	depends on EXPERIMENTAL
+	depends on PPS
+	help
+	  The IEEE 1588 standard defines a method to precisely
+	  synchronize distributed clocks over Ethernet networks. The
+	  standard defines a Precision Time Protocol (PTP), which can
+	  be used to achieve synchronization within a few dozen
+	  microseconds. In addition, with the help of special hardware
+	  time stamping units, it can be possible to achieve
+	  synchronization to within a few hundred nanoseconds.
+
+	  This driver adds support for PTP clocks as character
+	  devices. If you want to use a PTP clock, then you should
+	  also enable at least one clock driver as well.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp.
+
+endmenu
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
new file mode 100644
index 000000000000..480e2afdc999
--- /dev/null
+++ b/drivers/ptp/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for PTP 1588 clock support.
+#
+
+ptp-y					:= ptp_clock.o ptp_chardev.o ptp_sysfs.o
+obj-$(CONFIG_PTP_1588_CLOCK)		+= ptp.o
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
new file mode 100644
index 000000000000..a8d03aeb4051
--- /dev/null
+++ b/drivers/ptp/ptp_chardev.c
@@ -0,0 +1,159 @@
+/*
+ * PTP 1588 clock support - character device implementation.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+
+#include "ptp_private.h"
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode)
+{
+	return 0;
+}
+
+long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
+{
+	struct ptp_clock_caps caps;
+	struct ptp_clock_request req;
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops = ptp->info;
+	int enable, err = 0;
+
+	switch (cmd) {
+
+	case PTP_CLOCK_GETCAPS:
+		memset(&caps, 0, sizeof(caps));
+		caps.max_adj = ptp->info->max_adj;
+		caps.n_alarm = ptp->info->n_alarm;
+		caps.n_ext_ts = ptp->info->n_ext_ts;
+		caps.n_per_out = ptp->info->n_per_out;
+		caps.pps = ptp->info->pps;
+		err = copy_to_user((void __user *)arg, &caps, sizeof(caps));
+		break;
+
+	case PTP_EXTTS_REQUEST:
+		if (copy_from_user(&req.extts, (void __user *)arg,
+				   sizeof(req.extts))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.extts.index >= ops->n_ext_ts) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_EXTTS;
+		enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_PEROUT_REQUEST:
+		if (copy_from_user(&req.perout, (void __user *)arg,
+				   sizeof(req.perout))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.perout.index >= ops->n_per_out) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_PEROUT;
+		enable = req.perout.period.sec || req.perout.period.nsec;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	case PTP_ENABLE_PPS:
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+		req.type = PTP_CLK_REQ_PPS;
+		enable = arg ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+	return err;
+}
+
+unsigned int ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	poll_wait(fp, &ptp->tsev_wq, wait);
+
+	return queue_cnt(&ptp->tsevq) ? POLLIN : 0;
+}
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint rdflags, char __user *buf, size_t cnt)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event[PTP_BUF_TIMESTAMPS];
+	unsigned long flags;
+	size_t qcnt, i;
+
+	if (cnt % sizeof(struct ptp_extts_event) != 0)
+		return -EINVAL;
+
+	if (cnt > sizeof(event))
+		cnt = sizeof(event);
+
+	cnt = cnt / sizeof(struct ptp_extts_event);
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	if (wait_event_interruptible(ptp->tsev_wq,
+				     ptp->defunct || queue_cnt(queue))) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -ERESTARTSYS;
+	}
+
+	if (ptp->defunct)
+		return -ENODEV;
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	qcnt = queue_cnt(queue);
+
+	if (cnt > qcnt)
+		cnt = qcnt;
+
+	for (i = 0; i < cnt; i++) {
+		event[i] = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	cnt = cnt * sizeof(struct ptp_extts_event);
+
+	mutex_unlock(&ptp->tsevq_mux);
+
+	if (copy_to_user(buf, event, cnt)) {
+		mutex_unlock(&ptp->tsevq_mux);
+		return -EFAULT;
+	}
+
+	return cnt;
+}
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
new file mode 100644
index 000000000000..cf3f9997546d
--- /dev/null
+++ b/drivers/ptp/ptp_clock.c
@@ -0,0 +1,343 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/posix-clock.h>
+#include <linux/pps_kernel.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include "ptp_private.h"
+
+#define PTP_MAX_ALARMS 4
+#define PTP_MAX_CLOCKS 8
+#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
+#define PTP_PPS_EVENT PPS_CAPTUREASSERT
+#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
+
+/* private globals */
+
+static dev_t ptp_devt;
+static struct class *ptp_class;
+
+static DECLARE_BITMAP(ptp_clocks_map, PTP_MAX_CLOCKS);
+static DEFINE_MUTEX(ptp_clocks_mutex); /* protects 'ptp_clocks_map' */
+
+/* time stamp event queue operations */
+
+static inline int queue_free(struct timestamp_event_queue *q)
+{
+	return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
+}
+
+static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
+				       struct ptp_clock_event *src)
+{
+	struct ptp_extts_event *dst;
+	unsigned long flags;
+	s64 seconds;
+	u32 remainder;
+
+	seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
+
+	spin_lock_irqsave(&queue->lock, flags);
+
+	dst = &queue->buf[queue->tail];
+	dst->index = src->index;
+	dst->t.sec = seconds;
+	dst->t.nsec = remainder;
+
+	if (!queue_free(queue))
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+
+	queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+}
+
+static s32 scaled_ppm_to_ppb(long ppm)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 *
+	 * We want to calculate
+	 *
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * which simplifies to
+	 *
+	 *    ppb = scaled_ppm * 125 / 2^13
+	 */
+	s64 ppb = 1 + ppm;
+	ppb *= 125;
+	ppb >>= 13;
+	return (s32) ppb;
+}
+
+/* posix clock implementation */
+
+static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp)
+{
+	return 1; /* always round timer functions to one nanosecond */
+}
+
+static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->settime(ptp->info, tp);
+}
+
+static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	return ptp->info->gettime(ptp->info, tp);
+}
+
+static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock_info *ops;
+	int err = -EOPNOTSUPP;
+
+	ops = ptp->info;
+
+	if (tx->modes & ADJ_SETOFFSET) {
+		struct timespec ts;
+		ktime_t kt;
+		s64 delta;
+
+		ts.tv_sec  = tx->time.tv_sec;
+		ts.tv_nsec = tx->time.tv_usec;
+
+		if (!(tx->modes & ADJ_NANO))
+			ts.tv_nsec *= 1000;
+
+		if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
+			return -EINVAL;
+
+		kt = timespec_to_ktime(ts);
+		delta = ktime_to_ns(kt);
+		err = ops->adjtime(ops, delta);
+
+	} else if (tx->modes & ADJ_FREQUENCY) {
+
+		err = ops->adjfreq(ops, scaled_ppm_to_ppb(tx->freq));
+	}
+
+	return err;
+}
+
+static struct posix_clock_operations ptp_clock_ops = {
+	.owner		= THIS_MODULE,
+	.clock_adjtime	= ptp_clock_adjtime,
+	.clock_gettime	= ptp_clock_gettime,
+	.clock_getres	= ptp_clock_getres,
+	.clock_settime	= ptp_clock_settime,
+	.ioctl		= ptp_ioctl,
+	.open		= ptp_open,
+	.poll		= ptp_poll,
+	.read		= ptp_read,
+};
+
+static void delete_ptp_clock(struct posix_clock *pc)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+
+	mutex_destroy(&ptp->tsevq_mux);
+
+	/* Remove the clock from the bit map. */
+	mutex_lock(&ptp_clocks_mutex);
+	clear_bit(ptp->index, ptp_clocks_map);
+	mutex_unlock(&ptp_clocks_mutex);
+
+	kfree(ptp);
+}
+
+/* public interface */
+
+struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info)
+{
+	struct ptp_clock *ptp;
+	int err = 0, index, major = MAJOR(ptp_devt);
+
+	if (info->n_alarm > PTP_MAX_ALARMS)
+		return ERR_PTR(-EINVAL);
+
+	/* Find a free clock slot and reserve it. */
+	err = -EBUSY;
+	mutex_lock(&ptp_clocks_mutex);
+	index = find_first_zero_bit(ptp_clocks_map, PTP_MAX_CLOCKS);
+	if (index < PTP_MAX_CLOCKS)
+		set_bit(index, ptp_clocks_map);
+	else
+		goto no_slot;
+
+	/* Initialize a clock structure. */
+	err = -ENOMEM;
+	ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
+	if (ptp == NULL)
+		goto no_memory;
+
+	ptp->clock.ops = ptp_clock_ops;
+	ptp->clock.release = delete_ptp_clock;
+	ptp->info = info;
+	ptp->devid = MKDEV(major, index);
+	ptp->index = index;
+	spin_lock_init(&ptp->tsevq.lock);
+	mutex_init(&ptp->tsevq_mux);
+	init_waitqueue_head(&ptp->tsev_wq);
+
+	/* Create a new device in our class. */
+	ptp->dev = device_create(ptp_class, NULL, ptp->devid, ptp,
+				 "ptp%d", ptp->index);
+	if (IS_ERR(ptp->dev))
+		goto no_device;
+
+	dev_set_drvdata(ptp->dev, ptp);
+
+	err = ptp_populate_sysfs(ptp);
+	if (err)
+		goto no_sysfs;
+
+	/* Register a new PPS source. */
+	if (info->pps) {
+		struct pps_source_info pps;
+		memset(&pps, 0, sizeof(pps));
+		snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
+		pps.mode = PTP_PPS_MODE;
+		pps.owner = info->owner;
+		ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
+		if (!ptp->pps_source) {
+			pr_err("failed to register pps source\n");
+			goto no_pps;
+		}
+	}
+
+	/* Create a posix clock. */
+	err = posix_clock_register(&ptp->clock, ptp->devid);
+	if (err) {
+		pr_err("failed to create posix clock\n");
+		goto no_clock;
+	}
+
+	mutex_unlock(&ptp_clocks_mutex);
+	return ptp;
+
+no_clock:
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+no_pps:
+	ptp_cleanup_sysfs(ptp);
+no_sysfs:
+	device_destroy(ptp_class, ptp->devid);
+no_device:
+	mutex_destroy(&ptp->tsevq_mux);
+	kfree(ptp);
+no_memory:
+	clear_bit(index, ptp_clocks_map);
+no_slot:
+	mutex_unlock(&ptp_clocks_mutex);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ptp_clock_register);
+
+int ptp_clock_unregister(struct ptp_clock *ptp)
+{
+	ptp->defunct = 1;
+	wake_up_interruptible(&ptp->tsev_wq);
+
+	/* Release the clock's resources. */
+	if (ptp->pps_source)
+		pps_unregister_source(ptp->pps_source);
+	ptp_cleanup_sysfs(ptp);
+	device_destroy(ptp_class, ptp->devid);
+
+	posix_clock_unregister(&ptp->clock);
+	return 0;
+}
+EXPORT_SYMBOL(ptp_clock_unregister);
+
+void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
+{
+	struct pps_event_time evt;
+
+	switch (event->type) {
+
+	case PTP_CLOCK_ALARM:
+		break;
+
+	case PTP_CLOCK_EXTTS:
+		enqueue_external_timestamp(&ptp->tsevq, event);
+		wake_up_interruptible(&ptp->tsev_wq);
+		break;
+
+	case PTP_CLOCK_PPS:
+		pps_get_ts(&evt);
+		pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
+		break;
+	}
+}
+EXPORT_SYMBOL(ptp_clock_event);
+
+/* module operations */
+
+static void __exit ptp_exit(void)
+{
+	class_destroy(ptp_class);
+	unregister_chrdev_region(ptp_devt, PTP_MAX_CLOCKS);
+}
+
+static int __init ptp_init(void)
+{
+	int err;
+
+	ptp_class = class_create(THIS_MODULE, "ptp");
+	if (IS_ERR(ptp_class)) {
+		pr_err("ptp: failed to allocate class\n");
+		return PTR_ERR(ptp_class);
+	}
+
+	err = alloc_chrdev_region(&ptp_devt, 0, PTP_MAX_CLOCKS, "ptp");
+	if (err < 0) {
+		pr_err("ptp: failed to allocate device region\n");
+		goto no_region;
+	}
+
+	ptp_class->dev_attrs = ptp_dev_attrs;
+	pr_info("PTP clock support registered\n");
+	return 0;
+
+no_region:
+	class_destroy(ptp_class);
+	return err;
+}
+
+subsys_initcall(ptp_init);
+module_exit(ptp_exit);
+
+MODULE_AUTHOR("Richard Cochran <richard.cochran@omicron.at>");
+MODULE_DESCRIPTION("PTP clocks support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
new file mode 100644
index 000000000000..4d5b5082c3b1
--- /dev/null
+++ b/drivers/ptp/ptp_private.h
@@ -0,0 +1,92 @@
+/*
+ * PTP 1588 clock support - private declarations for the core module.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _PTP_PRIVATE_H_
+#define _PTP_PRIVATE_H_
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/ptp_clock.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/time.h>
+
+#define PTP_MAX_TIMESTAMPS 128
+#define PTP_BUF_TIMESTAMPS 30
+
+struct timestamp_event_queue {
+	struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
+	int head;
+	int tail;
+	spinlock_t lock;
+};
+
+struct ptp_clock {
+	struct posix_clock clock;
+	struct device *dev;
+	struct ptp_clock_info *info;
+	dev_t devid;
+	int index; /* index into clocks.map */
+	struct pps_device *pps_source;
+	struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
+	struct mutex tsevq_mux; /* one process at a time reading the fifo */
+	wait_queue_head_t tsev_wq;
+	int defunct; /* tells readers to go away when clock is being removed */
+};
+
+/*
+ * The function queue_cnt() is safe for readers to call without
+ * holding q->lock. Readers use this function to verify that the queue
+ * is nonempty before proceeding with a dequeue operation. The fact
+ * that a writer might concurrently increment the tail does not
+ * matter, since the queue remains nonempty nonetheless.
+ */
+static inline int queue_cnt(struct timestamp_event_queue *q)
+{
+	int cnt = q->tail - q->head;
+	return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
+}
+
+/*
+ * see ptp_chardev.c
+ */
+
+long ptp_ioctl(struct posix_clock *pc,
+	       unsigned int cmd, unsigned long arg);
+
+int ptp_open(struct posix_clock *pc, fmode_t fmode);
+
+ssize_t ptp_read(struct posix_clock *pc,
+		 uint flags, char __user *buf, size_t cnt);
+
+uint ptp_poll(struct posix_clock *pc,
+	      struct file *fp, poll_table *wait);
+
+/*
+ * see ptp_sysfs.c
+ */
+
+extern struct device_attribute ptp_dev_attrs[];
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp);
+
+int ptp_populate_sysfs(struct ptp_clock *ptp);
+
+#endif
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
new file mode 100644
index 000000000000..2f93926ac976
--- /dev/null
+++ b/drivers/ptp/ptp_sysfs.c
@@ -0,0 +1,230 @@
+/*
+ * PTP 1588 clock support - sysfs interface.
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/capability.h>
+
+#include "ptp_private.h"
+
+static ssize_t clock_name_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+}
+
+#define PTP_SHOW_INT(name)						\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *page)	\
+{									\
+	struct ptp_clock *ptp = dev_get_drvdata(dev);			\
+	return snprintf(page, PAGE_SIZE-1, "%d\n", ptp->info->name);	\
+}
+
+PTP_SHOW_INT(max_adj);
+PTP_SHOW_INT(n_alarm);
+PTP_SHOW_INT(n_ext_ts);
+PTP_SHOW_INT(n_per_out);
+PTP_SHOW_INT(pps);
+
+#define PTP_RO_ATTR(_var, _name) {				\
+	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
+	.show	= _var##_show,					\
+}
+
+struct device_attribute ptp_dev_attrs[] = {
+	PTP_RO_ATTR(clock_name,	clock_name),
+	PTP_RO_ATTR(max_adj,	max_adjustment),
+	PTP_RO_ATTR(n_alarm,	n_alarms),
+	PTP_RO_ATTR(n_ext_ts,	n_external_timestamps),
+	PTP_RO_ATTR(n_per_out,	n_periodic_outputs),
+	PTP_RO_ATTR(pps,	pps_available),
+	__ATTR_NULL,
+};
+
+static ssize_t extts_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_EXTTS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %d", &req.extts.index, &enable);
+	if (cnt != 2)
+		goto out;
+	if (req.extts.index >= ops->n_ext_ts)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t extts_fifo_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct timestamp_event_queue *queue = &ptp->tsevq;
+	struct ptp_extts_event event;
+	unsigned long flags;
+	size_t qcnt;
+	int cnt = 0;
+
+	memset(&event, 0, sizeof(event));
+
+	if (mutex_lock_interruptible(&ptp->tsevq_mux))
+		return -ERESTARTSYS;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	qcnt = queue_cnt(queue);
+	if (qcnt) {
+		event = queue->buf[queue->head];
+		queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+	}
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (!qcnt)
+		goto out;
+
+	cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
+		       event.index, event.t.sec, event.t.nsec);
+out:
+	mutex_unlock(&ptp->tsevq_mux);
+	return cnt;
+}
+
+static ssize_t period_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PEROUT };
+	int cnt, enable, err = -EINVAL;
+
+	cnt = sscanf(buf, "%u %lld %u %lld %u", &req.perout.index,
+		     &req.perout.start.sec, &req.perout.start.nsec,
+		     &req.perout.period.sec, &req.perout.period.nsec);
+	if (cnt != 5)
+		goto out;
+	if (req.perout.index >= ops->n_per_out)
+		goto out;
+
+	enable = req.perout.period.sec || req.perout.period.nsec;
+	err = ops->enable(ops, &req, enable);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static ssize_t pps_enable_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+	struct ptp_clock_info *ops = ptp->info;
+	struct ptp_clock_request req = { .type = PTP_CLK_REQ_PPS };
+	int cnt, enable;
+	int err = -EINVAL;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+
+	cnt = sscanf(buf, "%d", &enable);
+	if (cnt != 1)
+		goto out;
+
+	err = ops->enable(ops, &req, enable ? 1 : 0);
+	if (err)
+		goto out;
+
+	return count;
+out:
+	return err;
+}
+
+static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store);
+static DEVICE_ATTR(fifo,         0444, extts_fifo_show, NULL);
+static DEVICE_ATTR(period,       0220, NULL, period_store);
+static DEVICE_ATTR(pps_enable,   0220, NULL, pps_enable_store);
+
+int ptp_cleanup_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+
+	if (info->n_ext_ts) {
+		device_remove_file(dev, &dev_attr_extts_enable);
+		device_remove_file(dev, &dev_attr_fifo);
+	}
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+
+	if (info->pps)
+		device_remove_file(dev, &dev_attr_pps_enable);
+
+	return 0;
+}
+
+int ptp_populate_sysfs(struct ptp_clock *ptp)
+{
+	struct device *dev = ptp->dev;
+	struct ptp_clock_info *info = ptp->info;
+	int err;
+
+	if (info->n_ext_ts) {
+		err = device_create_file(dev, &dev_attr_extts_enable);
+		if (err)
+			goto out1;
+		err = device_create_file(dev, &dev_attr_fifo);
+		if (err)
+			goto out2;
+	}
+	if (info->n_per_out) {
+		err = device_create_file(dev, &dev_attr_period);
+		if (err)
+			goto out3;
+	}
+	if (info->pps) {
+		err = device_create_file(dev, &dev_attr_pps_enable);
+		if (err)
+			goto out4;
+	}
+	return 0;
+out4:
+	if (info->n_per_out)
+		device_remove_file(dev, &dev_attr_period);
+out3:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_fifo);
+out2:
+	if (info->n_ext_ts)
+		device_remove_file(dev, &dev_attr_extts_enable);
+out1:
+	return err;
+}
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..4585836ba664 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -302,6 +302,7 @@ header-y += ppp-comp.h
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
 header-y += qnxtypes.h
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 943a85ab0020..e07e2742a865 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -25,6 +25,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
 #include <linux/filter.h>
 #ifdef __KERNEL__
 #include <linux/in.h>
@@ -58,6 +59,12 @@
 #define OFF_NEXT	6
 #define OFF_UDP_DST	2
 
+#define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
+#define OFF_PTP_SEQUENCE_ID	30
+#define OFF_PTP_CONTROL		32 /* PTPv1 only */
+
+#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
+
 #define IP6_HLEN	40
 #define UDP_HLEN	8
 
diff --git a/include/linux/ptp_clock.h b/include/linux/ptp_clock.h
new file mode 100644
index 000000000000..94e981f810a2
--- /dev/null
+++ b/include/linux/ptp_clock.h
@@ -0,0 +1,84 @@
+/*
+ * PTP 1588 clock support - user space interface
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_H_
+#define _PTP_CLOCK_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* PTP_xxx bits, for the flags field within the request structures. */
+#define PTP_ENABLE_FEATURE (1<<0)
+#define PTP_RISING_EDGE    (1<<1)
+#define PTP_FALLING_EDGE   (1<<2)
+
+/*
+ * struct ptp_clock_time - represents a time value
+ *
+ * The sign of the seconds field applies to the whole value. The
+ * nanoseconds field is always unsigned. The reserved field is
+ * included for sub-nanosecond resolution, should the demand for
+ * this ever appear.
+ *
+ */
+struct ptp_clock_time {
+	__s64 sec;  /* seconds */
+	__u32 nsec; /* nanoseconds */
+	__u32 reserved;
+};
+
+struct ptp_clock_caps {
+	int max_adj;   /* Maximum frequency adjustment in parts per billon. */
+	int n_alarm;   /* Number of programmable alarms. */
+	int n_ext_ts;  /* Number of external time stamp channels. */
+	int n_per_out; /* Number of programmable periodic signals. */
+	int pps;       /* Whether the clock supports a PPS callback. */
+	int rsv[15];   /* Reserved for future use. */
+};
+
+struct ptp_extts_request {
+	unsigned int index;  /* Which channel to configure. */
+	unsigned int flags;  /* Bit field for PTP_xxx flags. */
+	unsigned int rsv[2]; /* Reserved for future use. */
+};
+
+struct ptp_perout_request {
+	struct ptp_clock_time start;  /* Absolute start time. */
+	struct ptp_clock_time period; /* Desired period, zero means disable. */
+	unsigned int index;           /* Which channel to configure. */
+	unsigned int flags;           /* Reserved for future use. */
+	unsigned int rsv[4];          /* Reserved for future use. */
+};
+
+#define PTP_CLK_MAGIC '='
+
+#define PTP_CLOCK_GETCAPS  _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps)
+#define PTP_EXTTS_REQUEST  _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request)
+#define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request)
+#define PTP_ENABLE_PPS     _IOW(PTP_CLK_MAGIC, 4, int)
+
+struct ptp_extts_event {
+	struct ptp_clock_time t; /* Time event occured. */
+	unsigned int index;      /* Which channel produced the event. */
+	unsigned int flags;      /* Reserved for future use. */
+	unsigned int rsv[2];     /* Reserved for future use. */
+};
+
+#endif
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
new file mode 100644
index 000000000000..dd2e44fba63e
--- /dev/null
+++ b/include/linux/ptp_clock_kernel.h
@@ -0,0 +1,139 @@
+/*
+ * PTP 1588 clock support
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _PTP_CLOCK_KERNEL_H_
+#define _PTP_CLOCK_KERNEL_H_
+
+#include <linux/ptp_clock.h>
+
+
+struct ptp_clock_request {
+	enum {
+		PTP_CLK_REQ_EXTTS,
+		PTP_CLK_REQ_PEROUT,
+		PTP_CLK_REQ_PPS,
+	} type;
+	union {
+		struct ptp_extts_request extts;
+		struct ptp_perout_request perout;
+	};
+};
+
+/**
+ * struct ptp_clock_info - decribes a PTP hardware clock
+ *
+ * @owner:     The clock driver should set to THIS_MODULE.
+ * @name:      A short name to identify the clock.
+ * @max_adj:   The maximum possible frequency adjustment, in parts per billon.
+ * @n_alarm:   The number of programmable alarms.
+ * @n_ext_ts:  The number of external time stamp channels.
+ * @n_per_out: The number of programmable periodic signals.
+ * @pps:       Indicates whether the clock supports a PPS callback.
+ *
+ * clock operations
+ *
+ * @adjfreq:  Adjusts the frequency of the hardware clock.
+ *            parameter delta: Desired period change in parts per billion.
+ *
+ * @adjtime:  Shifts the time of the hardware clock.
+ *            parameter delta: Desired change in nanoseconds.
+ *
+ * @gettime:  Reads the current time from the hardware clock.
+ *            parameter ts: Holds the result.
+ *
+ * @settime:  Set the current time on the hardware clock.
+ *            parameter ts: Time value to set.
+ *
+ * @enable:   Request driver to enable or disable an ancillary feature.
+ *            parameter request: Desired resource to enable or disable.
+ *            parameter on: Caller passes one to enable or zero to disable.
+ *
+ * Drivers should embed their ptp_clock_info within a private
+ * structure, obtaining a reference to it using container_of().
+ *
+ * The callbacks must all return zero on success, non-zero otherwise.
+ */
+
+struct ptp_clock_info {
+	struct module *owner;
+	char name[16];
+	s32 max_adj;
+	int n_alarm;
+	int n_ext_ts;
+	int n_per_out;
+	int pps;
+	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
+	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
+	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
+	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
+	int (*enable)(struct ptp_clock_info *ptp,
+		      struct ptp_clock_request *request, int on);
+};
+
+struct ptp_clock;
+
+/**
+ * ptp_clock_register() - register a PTP hardware clock driver
+ *
+ * @info:  Structure describing the new clock.
+ */
+
+extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info);
+
+/**
+ * ptp_clock_unregister() - unregister a PTP hardware clock driver
+ *
+ * @ptp:  The clock to remove from service.
+ */
+
+extern int ptp_clock_unregister(struct ptp_clock *ptp);
+
+
+enum ptp_clock_events {
+	PTP_CLOCK_ALARM,
+	PTP_CLOCK_EXTTS,
+	PTP_CLOCK_PPS,
+};
+
+/**
+ * struct ptp_clock_event - decribes a PTP hardware clock event
+ *
+ * @type:  One of the ptp_clock_events enumeration values.
+ * @index: Identifies the source of the event.
+ * @timestamp: When the event occured.
+ */
+
+struct ptp_clock_event {
+	int type;
+	int index;
+	u64 timestamp;
+};
+
+/**
+ * ptp_clock_event() - notify the PTP layer about an event
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @event:  Message structure describing the event.
+ */
+
+extern void ptp_clock_event(struct ptp_clock *ptp,
+			    struct ptp_clock_event *event);
+
+#endif
-- 
cgit v1.2.3


From 600942e0fdb7ed1565d056d7305c46c7c0544a3e Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Thu, 27 Jan 2011 15:24:58 +0100
Subject: lru_cache.h: fix comments referring to ts_ instead of lc_

For some time we contemplated calling the "struct lru_cache"
a "struct tracked_set", and some comments kept the ts_ prefix.

Fix those to match the member field names.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/lru_cache.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 6a4fab7c6e09..7a71ffad037c 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -139,9 +139,9 @@ write intent log information, three of which are mentioned here.
  * .list is on one of three lists:
  *  in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
  *     lru: unused but ready to be reused or recycled
- *          (ts_refcnt == 0, lc_number != LC_FREE),
+ *          (lc_refcnt == 0, lc_number != LC_FREE),
  *    free: unused but ready to be recycled
- *          (ts_refcnt == 0, lc_number == LC_FREE),
+ *          (lc_refcnt == 0, lc_number == LC_FREE),
  *
  * an element is said to be "in the active set",
  * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
@@ -160,8 +160,8 @@ struct lc_element {
 	struct hlist_node colision;
 	struct list_head list;		 /* LRU list or free list */
 	unsigned refcnt;
-	/* back "pointer" into ts_cache->element[index],
-	 * for paranoia, and for "ts_element_to_index" */
+	/* back "pointer" into lc_cache->element[index],
+	 * for paranoia, and for "lc_element_to_index" */
 	unsigned lc_index;
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
@@ -190,8 +190,8 @@ struct lru_cache {
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
-	 * This also limits the maximum value of ts_element.ts_index, allowing the
-	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+	 * This also limits the maximum value of lc_element.lc_index, allowing the
+	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
 	/* statistics */
-- 
cgit v1.2.3


From 9a0d9d0389ef769e4b01abf50fcc11407706270b Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 2 May 2011 11:51:31 +0200
Subject: drbd: fix schedule in atomic

An administrative detach used to request a state change directly to D_DISKLESS,
first suspending IO to avoid the last put_ldev() occuring from an endio handler,
potentially in irq context.

This is not enough on the receiving side (typically secondary), we may miss
some peer_req on the way to local disk, which then may do the last put_ldev()
from their drbd_peer_request_endio().

This patch makes the detach always go through the intermediate D_FAILED state.
We may consider to rename it D_DETACHING.

Alternative approach would be to create yet an other work item to be scheduled
on the worker, do the destructor work from there, and get the timing right.

manually picked commit 564040f from the drbd 8.4 branch.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h |  4 ++++
 drivers/block/drbd/drbd_nl.c  | 14 +++++++++++---
 include/linux/drbd.h          |  2 +-
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8aa10391115b..a74d3ee04ba8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2157,6 +2157,10 @@ static inline int get_net_conf(struct drbd_conf *mdev)
 static inline void put_ldev(struct drbd_conf *mdev)
 {
 	int i = atomic_dec_return(&mdev->local_cnt);
+
+	/* This may be called from some endio handler,
+	 * so we must not sleep here. */
+
 	__release(local);
 	D_ASSERT(i >= 0);
 	if (i == 0) {
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 7c64ec042124..13569635b922 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1334,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			  struct drbd_nl_cfg_reply *reply)
 {
+	enum drbd_ret_code retcode;
+	int ret;
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	if (mdev->state.disk == D_DISKLESS)
-		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+	/* D_FAILED will transition to DISKLESS. */
+	ret = wait_event_interruptible(mdev->misc_wait,
+			mdev->state.disk != D_FAILED);
 	drbd_resume_io(mdev);
+	if (retcode == SS_IS_DISKLESS)
+		retcode = SS_NOTHING_TO_DO;
+	if (ret)
+		retcode = ERR_INTR;
+	reply->ret_code = retcode;
 	return 0;
 }
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index cec467f5d676..c86283b4fbab 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.10"
+#define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 96
-- 
cgit v1.2.3


From 24c4830c8ec3cbc904d84c213126a35f41a4e455 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sat, 21 May 2011 18:32:29 +0200
Subject: drbd: Fix spelling

Found these with the help of ispell -l.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c   |  2 +-
 drivers/block/drbd/drbd_bitmap.c   |  6 +++---
 drivers/block/drbd/drbd_int.h      |  6 +++---
 drivers/block/drbd/drbd_main.c     |  4 ++--
 drivers/block/drbd/drbd_nl.c       |  6 +++---
 drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++---------------
 drivers/block/drbd/drbd_req.c      | 18 +++++++++---------
 drivers/block/drbd/drbd_req.h      |  4 ++--
 drivers/block/drbd/drbd_worker.c   |  4 ++--
 include/linux/drbd.h               |  8 ++++----
 include/linux/drbd_tag_magic.h     |  2 +-
 11 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c6828b68d77b..09ef9a878ef0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -28,7 +28,7 @@
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial check sum in our on disk activity log.
+/* We maintain a trivial checksum in our on disk activity log.
  * With that we can ensure correct operation even when the storage
  * device might do a partial (last) sector write while losing power.
  */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 76210ba401ac..f440a02dfdb1 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -74,7 +74,7 @@
  *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
  *	seems excessive.
  *
- *	We plan to reduce the amount of in-core bitmap pages by pageing them in
+ *	We plan to reduce the amount of in-core bitmap pages by paging them in
  *	and out against their on-disk location as necessary, but need to make
  *	sure we don't cause too much meta data IO, and must not deadlock in
  *	tight memory situations. This needs some more work.
@@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
  * we if bits have been cleared since last IO. */
 #define BM_PAGE_LAZY_WRITEOUT	28
 
-/* store_page_idx uses non-atomic assingment. It is only used directly after
+/* store_page_idx uses non-atomic assignment. It is only used directly after
  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
  * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
  * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
@@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr)
 /* word offset from start of bitmap to word number _in_page_
  * modulo longs per page
 #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
  so do it explicitly:
  */
 #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a74d3ee04ba8..7952eb90d17f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -699,7 +699,7 @@ struct drbd_request {
 	 * see drbd_endio_pri(). */
 	struct bio *private_bio;
 
-	struct hlist_node colision;
+	struct hlist_node collision;
 	sector_t sector;
 	unsigned int size;
 	unsigned int epoch; /* barrier_nr */
@@ -765,7 +765,7 @@ struct digest_info {
 
 struct drbd_epoch_entry {
 	struct drbd_work w;
-	struct hlist_node colision;
+	struct hlist_node collision;
 	struct drbd_epoch *epoch; /* for writes */
 	struct drbd_conf *mdev;
 	struct page *pages;
@@ -1520,7 +1520,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index cfeb13b5a216..0358e55356c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2732,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 
 		/* double check digest, sometimes buffers have been modified in flight. */
 		if (dgs > 0 && dgs <= 64) {
-			/* 64 byte, 512 bit, is the larges digest size
+			/* 64 byte, 512 bit, is the largest digest size
 			 * currently supported in kernel crypto. */
 			unsigned char digest[64];
 			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
@@ -3287,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor)
 
 	drbd_release_ee_lists(mdev);
 
-	/* should be free'd on disconnect? */
+	/* should be freed on disconnect? */
 	kfree(mdev->ee_hash);
 	/*
 	mdev->ee_hash_s = 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 13569635b922..259ca0b20961 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -596,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -1205,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev, 0);
+	dd = drbd_determine_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1719,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determin_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6ea0a4b51ece..d9f2109fd9e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
 	if (!page)
 		goto fail;
 
-	INIT_HLIST_NODE(&e->colision);
+	INIT_HLIST_NODE(&e->collision);
 	e->epoch = NULL;
 	e->mdev = mdev;
 	e->pages = page;
@@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
 		kfree(e->digest);
 	drbd_pp_free(mdev, e->pages, is_net);
 	D_ASSERT(atomic_read(&e->pending_bios) == 0);
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 	mempool_free(e, drbd_ee_mempool);
 }
 
@@ -1413,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
 	sector_t sector = e->sector;
 	int ok;
 
-	D_ASSERT(hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->collision));
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		drbd_set_in_sync(mdev, sector, e->size);
@@ -1482,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
 		return false;
 	}
 
-	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
 	 * special casing it there for the various failure cases.
 	 * still no race with drbd_fail_pending_reads */
 	ok = recv_dless_read(mdev, req, sector, data_size);
@@ -1553,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
 	if (mdev->net_conf->two_primaries) {
 		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!hlist_unhashed(&e->colision));
-		hlist_del_init(&e->colision);
+		D_ASSERT(!hlist_unhashed(&e->collision));
+		hlist_del_init(&e->collision);
 		spin_unlock_irq(&mdev->req_lock);
 	} else {
-		D_ASSERT(hlist_unhashed(&e->colision));
+		D_ASSERT(hlist_unhashed(&e->collision));
 	}
 
 	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1574,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
 	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
 
 	spin_lock_irq(&mdev->req_lock);
-	D_ASSERT(!hlist_unhashed(&e->colision));
-	hlist_del_init(&e->colision);
+	D_ASSERT(!hlist_unhashed(&e->collision));
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 
 	dec_unacked(mdev);
@@ -1750,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 		spin_lock_irq(&mdev->req_lock);
 
-		hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 		slot = tl_hash_slot(mdev, sector);
@@ -1760,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 			int have_conflict = 0;
 			prepare_to_wait(&mdev->misc_wait, &wait,
 				TASK_INTERRUPTIBLE);
-			hlist_for_each_entry(i, n, slot, colision) {
+			hlist_for_each_entry(i, n, slot, collision) {
 				if (OVERLAPS) {
 					/* only ALERT on first iteration,
 					 * we may be woken up early... */
@@ -1799,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 			}
 
 			if (signal_pending(current)) {
-				hlist_del_init(&e->colision);
+				hlist_del_init(&e->collision);
 
 				spin_unlock_irq(&mdev->req_lock);
 
@@ -1857,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
-	hlist_del_init(&e->colision);
+	hlist_del_init(&e->collision);
 	spin_unlock_irq(&mdev->req_lock);
 	if (e->flags & EE_CALL_AL_COMPLETE_IO)
 		drbd_al_complete_io(mdev, e->sector);
@@ -2988,7 +2988,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return false;
@@ -4261,7 +4261,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			if (req->sector != sector) {
 				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 14645bd40092..3424d675b769 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
 		 * they must have been failed on the spot */
 #define OVERLAPS overlaps(sector, size, i->sector, i->size)
 		slot = tl_hash_slot(mdev, sector);
-		hlist_for_each_entry(i, n, slot, colision) {
+		hlist_for_each_entry(i, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
 				      "other: %p %llus +%u\n",
@@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
 #undef OVERLAPS
 #define OVERLAPS overlaps(sector, size, e->sector, e->size)
 		slot = ee_hash_slot(mdev, req->sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				wake_up(&mdev->misc_wait);
 				break;
@@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 
 		/* remove the request from the conflict detection
 		 * respective block_id verification hash */
-		if (!hlist_unhashed(&req->colision))
-			hlist_del(&req->colision);
+		if (!hlist_unhashed(&req->collision))
+			hlist_del(&req->collision);
 		else
 			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
 
@@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req)
 	struct hlist_node *n;
 	struct hlist_head *slot;
 
-	D_ASSERT(hlist_unhashed(&req->colision));
+	D_ASSERT(hlist_unhashed(&req->collision));
 
 	if (!get_net_conf(mdev))
 		return 0;
@@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req)
 
 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
 	slot = tl_hash_slot(mdev, sector);
-	hlist_for_each_entry(i, n, slot, colision) {
+	hlist_for_each_entry(i, n, slot, collision) {
 		if (OVERLAPS) {
 			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
 			      "[DISCARD L] new: %llus +%u; "
@@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req)
 #undef OVERLAPS
 #define OVERLAPS overlaps(e->sector, e->size, sector, size)
 		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, colision) {
+		hlist_for_each_entry(e, n, slot, collision) {
 			if (OVERLAPS) {
 				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
 				      " [DISCARD L] new: %llus +%u; "
@@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 		/* so we can verify the handle in the answer packet
 		 * corresponding hlist_del is in _req_may_be_done() */
-		hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
 
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
@@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* assert something? */
 		/* from drbd_make_request_common only */
 
-		hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+		hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
 		/* corresponding hlist_del is in _req_may_be_done() */
 
 		/* NOTE
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 32e2c3e6a813..281342dca2c8 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
 	struct hlist_node *n;
 	struct drbd_request *req;
 
-	hlist_for_each_entry(req, n, slot, colision) {
+	hlist_for_each_entry(req, n, slot, collision) {
 		if ((unsigned long)req == (unsigned long)id) {
 			D_ASSERT(req->sector == sector);
 			return req;
@@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
 		req->epoch       = 0;
 		req->sector      = bio_src->bi_sector;
 		req->size        = bio_src->bi_size;
-		INIT_HLIST_NODE(&req->colision);
+		INIT_HLIST_NODE(&req->collision);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->w.list);
 	}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b5e53695fd7e..4d76b06b6b20 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
 	list_del(&e->w.list); /* has been on active_ee or sync_ee */
 	list_add_tail(&e->w.list, &mdev->done_ee);
 
-	/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+	/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
 	 * neither did we wake possibly waiting conflicting requests.
 	 * done from "drbd_process_done_ee" within the appropriate w.cb
 	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
@@ -840,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 			const int ratio =
 				(t == 0)     ? 0 :
 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
-			dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
 			     "transferred %luK total %luK\n",
 			     ratio,
 			     Bit2KB(mdev->rs_same_csum),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index c86283b4fbab..9e5f5607eba3 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -38,7 +38,7 @@
 
 /* Although the Linux source code makes a difference between
    generic endianness and the bitfields' endianness, there is no
-   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   architecture as of Linux-2.6.24-rc4 where the bitfields' endianness
    does not match the generic endianness. */
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -195,7 +195,7 @@ enum drbd_conns {
 	C_WF_REPORT_PARAMS, /* we have a socket */
 	C_CONNECTED,      /* we have introduced each other */
 	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
-	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_STARTING_SYNC_T,  /* starting full sync by admin request. */
 	C_WF_BITMAP_S,
 	C_WF_BITMAP_T,
 	C_WF_SYNC_UUID,
@@ -236,7 +236,7 @@ union drbd_state {
  * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
  * even though we transmit as "cpu_to_be32(state)",
  * the offsets of the bitfields still need to be swapped
- * on different endianess.
+ * on different endianness.
  */
 	struct {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -266,7 +266,7 @@ union drbd_state {
 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
 #else
-# error "this endianess is not supported"
+# error "this endianness is not supported"
 #endif
 	};
 	unsigned int i;
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index f14a165e82dc..069543190516 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -30,7 +30,7 @@ enum packet_types {
 	int tag_and_len ## member;
 #include "linux/drbd_nl.h"
 
-/* declate tag-list-sizes */
+/* declare tag-list-sizes */
 static const int tag_list_sizes[] = {
 #define NL_PACKET(name, number, fields) 2 fields ,
 #define NL_INTEGER(pn, pr, member)      + 4 + 4
-- 
cgit v1.2.3


From 8af088710d1eb3c980e0ef3779c8d47f3f217b48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 24 May 2011 11:12:58 +0200
Subject: posix-timers: RCU conversion

Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
a single spinlock (idr_lock) in posix-timers code, on its 48 core
machine.

Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time
used in ticket_spin_lock, from lock_timer

Ref: http://www.spinics.net/lists/kvm/msg51526.html

Switching to RCU is quite easy, IDR being already RCU ready. idr_lock
should be locked only for an insert/delete, not a lookup.

Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime().

Before :

real    1m18.669s
user    0m1.346s
sys     1m17.180s

After :

real    0m3.296s
user    0m1.366s
sys     0m1.926s

Reported-by: Ben Nagy <ben@iagu.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Ben Nagy <ben@iagu.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Richard Cochran <richard.cochran@omicron.at>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  1 +
 kernel/posix-timers.c        | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 808227d40a64..959c14132f46 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -82,6 +82,7 @@ struct k_itimer {
 			unsigned long expires;
 		} mmtimer;
 		struct alarm alarmtimer;
+		struct rcu_head rcu;
 	} it;
 };
 
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index a1b5edf1bf92..4556182527f3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
 	return tmr;
 }
 
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+	struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+
+	kmem_cache_free(posix_timers_cache, tmr);
+}
+
 #define IT_ID_SET	1
 #define IT_ID_NOT_SET	0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	kmem_cache_free(posix_timers_cache, tmr);
+	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
 }
 
 static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
 	struct k_itimer *timr;
-	/*
-	 * Watch out here.  We do a irqsave on the idr_lock and pass the
-	 * flags part over to the timer lock.  Must not let interrupts in
-	 * while we are moving the lock.
-	 */
-	spin_lock_irqsave(&idr_lock, *flags);
+
+	rcu_read_lock();
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
-		spin_lock(&timr->it_lock);
+		spin_lock_irqsave(&timr->it_lock, *flags);
 		if (timr->it_signal == current->signal) {
-			spin_unlock(&idr_lock);
+			rcu_read_unlock();
 			return timr;
 		}
-		spin_unlock(&timr->it_lock);
+		spin_unlock_irqrestore(&timr->it_lock, *flags);
 	}
-	spin_unlock_irqrestore(&idr_lock, *flags);
+	rcu_read_unlock();
 
 	return NULL;
 }
-- 
cgit v1.2.3


From 1b4ac2a935aaf194241a2f4165d6407ba9650e1a Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 24 May 2011 00:18:05 +0200
Subject: x86: Get rid of asmregparm

As UML does no longer need asmregparm we can remove it.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: namhyung@gmail.com
Cc: davem@davemloft.net
Cc: fweisbec@gmail.com
Cc: dhowells@redhat.com
Link: http://lkml.kernel.org/r/%3C1306189085-29896-1-git-send-email-richard%40nod.at%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/linkage.h | 5 -----
 arch/x86/kernel/ptrace.c       | 4 ++--
 include/linux/linkage.h        | 4 ----
 3 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 12d55e773eb6..48142971b25d 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -8,11 +8,6 @@
 
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-/*
- * For 32-bit UML - mark functions implemented in assembly that use
- * regparm input parameters:
- */
-#define asmregparm __attribute__((regparm(3)))
 
 /*
  * Make sure the compiler doesn't do anything stupid with the
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f65e5b521dbd..807c2a2b80f1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1363,7 +1363,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
  * We must return the syscall number to actually look up in the table.
  * This can be -1L to skip running any syscall at all.
  */
-asmregparm long syscall_trace_enter(struct pt_regs *regs)
+long syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
 
@@ -1408,7 +1408,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 	return ret ?: regs->orig_ax;
 }
 
-asmregparm void syscall_trace_leave(struct pt_regs *regs)
+void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 7135ebc8428c..3f46aedea42f 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -14,10 +14,6 @@
 #define asmlinkage CPP_ASMLINKAGE
 #endif
 
-#ifndef asmregparm
-# define asmregparm
-#endif
-
 #define __page_aligned_data	__section(.data..page_aligned) __aligned(PAGE_SIZE)
 #define __page_aligned_bss	__section(.bss..page_aligned) __aligned(PAGE_SIZE)
 
-- 
cgit v1.2.3


From 45e9683e87b69328175df3a9e42039b9892ca47e Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 24 May 2011 13:28:54 +1000
Subject: compat: include aio_abi.h for aio_context_t

fixes this build error on sparc64 (at least):

In file included from arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/compat.h:401: error: expected ')' before 'ctx_id'
include/linux/compat.h:406: error: expected ')' before 'ctx_id'

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 include/linux/compat.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 644e1a4692b1..ddcb7db38e67 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/if.h>
 #include <linux/fs.h>
+#include <linux/aio_abi.h>	/* for aio_context_t */
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
-- 
cgit v1.2.3


From c4dbe54ed7296ac3249c415d512dd6d649f66f4b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 24 May 2011 14:08:08 +0200
Subject: seqlock: Get rid of SEQLOCK_UNLOCKED

All static seqlock should be initialized with the lockdep friendly
__SEQLOCK_UNLOCKED() macro.

Remove legacy SEQLOCK_UNLOCKED() macro.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/%3C1306238888.3026.31.camel%40edumazet-laptop%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c         | 2 +-
 arch/x86/kernel/vsyscall_64.c   | 2 +-
 include/linux/seqlock.h         | 3 ---
 net/ipv4/inet_connection_sock.c | 2 +-
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 04440cc09b40..85118dfe9bb5 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -36,7 +36,7 @@
 static cycle_t itc_get_cycles(struct clocksource *cs);
 
 struct fsyscall_gtod_data_t fsyscall_gtod_data = {
-	.lock = SEQLOCK_UNLOCKED,
+	.lock = __SEQLOCK_UNLOCKED(fsyscall_gtod_data.lock),
 };
 
 struct itc_jitter_data_t itc_jitter_data;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b694..59be48d0d75c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -59,7 +59,7 @@ int __vgetcpu_mode __section_vgetcpu_mode;
 
 struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
 {
-	.lock = SEQLOCK_UNLOCKED,
+	.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
 	.sysctl_enabled = 1,
 };
 
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 06d69648fc86..e9811892844f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -41,9 +41,6 @@ typedef struct {
 #define __SEQLOCK_UNLOCKED(lockname) \
 		 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
 
-#define SEQLOCK_UNLOCKED \
-		 __SEQLOCK_UNLOCKED(old_style_seqlock_init)
-
 #define seqlock_init(x)					\
 	do {						\
 		(x)->sequence = 0;			\
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61fac4cabc78..c14d88ad348d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  * This struct holds the first and last local port number.
  */
 struct local_ports sysctl_local_ports __read_mostly = {
-	.lock = SEQLOCK_UNLOCKED,
+	.lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
 	.range = { 32768, 61000 },
 };
 
-- 
cgit v1.2.3


From 81be12c8179c1c397d3f179cdd9b3f7146cf47f1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 May 2011 11:52:40 -0400
Subject: jbd2: fix sending of data flush on journal commit

In data=ordered mode, it's theoretically possible (however rare) that
an inode is filed to transaction's t_inode_list and a flusher thread
writes all the data and inode is reclaimed before the transaction
starts to commit.  In such a case, we could erroneously omit sending a
flush to file system device when it is different from the journal
device (because data can still be in disk cache only).

Fix the problem by setting a flag in a transaction when some inode is added
to it and then send disk flush in the commit code when the flag is set.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c      | 3 +--
 fs/jbd2/transaction.c | 7 +++++++
 include/linux/jbd2.h  | 4 +++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 78c299218681..2d5095ecc25f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
-		commit_transaction->t_flushed_data_blocks = 1;
 		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
 		smp_mb__after_clear_bit();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -683,7 +682,7 @@ start_journal_io:
 	 * then we must flush the file system device before we issue
 	 * the commit record
 	 */
-	if (commit_transaction->t_flushed_data_blocks &&
+	if (commit_transaction->t_need_data_flush &&
 	    (journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
 		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 85a055ef93fe..20065c9f2479 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2147,6 +2147,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
 	    jinode->i_next_transaction == transaction)
 		goto done;
 
+	/*
+	 * We only ever set this variable to 1 so the test is safe. Since
+	 * t_need_data_flush is likely to be set, we do the test to save some
+	 * cacheline bouncing
+	 */
+	if (!transaction->t_need_data_flush)
+		transaction->t_need_data_flush = 1;
 	/* On some different transaction's list - should be
 	 * the committing one */
 	if (jinode->i_transaction) {
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a32dcaec04e1..4d57955061f4 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -658,7 +658,9 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
-	unsigned int t_flushed_data_blocks:1;
+
+	/* Disk flush needs to be sent to fs partition [no locking] */
+	int			t_need_data_flush;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From bbd2be36910728f485ac78ea36e0f4f5a38e691e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 May 2011 11:59:18 -0400
Subject: jbd2: Add function jbd2_trans_will_send_data_barrier()

Provide a function which returns whether a transaction with given tid
will send a flush to the filesystem device.  The function will be used
by ext4 to detect whether fsync needs to send a separate flush or not.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c     | 10 +++++++++-
 fs/jbd2/journal.c    | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  4 +++-
 3 files changed, 53 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2d5095ecc25f..5b506e53c70b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -677,6 +677,10 @@ start_journal_io:
 		err = 0;
 	}
 
+	write_lock(&journal->j_state_lock);
+	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+	commit_transaction->t_state = T_COMMIT_DFLUSH;
+	write_unlock(&journal->j_state_lock);
 	/* 
 	 * If the journal is not located on the file system device,
 	 * then we must flush the file system device before we issue
@@ -804,6 +808,10 @@ wait_for_iobuf:
 		jbd2_journal_abort(journal, err);
 
 	jbd_debug(3, "JBD: commit phase 5\n");
+	write_lock(&journal->j_state_lock);
+	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
+	commit_transaction->t_state = T_COMMIT_JFLUSH;
+	write_unlock(&journal->j_state_lock);
 
 	if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
 				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -959,7 +967,7 @@ restart_loop:
 
 	jbd_debug(3, "JBD: commit phase 7\n");
 
-	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
 
 	commit_transaction->t_start = jiffies;
 	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index cd2d341f602e..9a7826990304 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -587,6 +587,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 	return ret;
 }
 
+/*
+ * Return 1 if a given transaction has not yet sent barrier request
+ * connected with a transaction commit. If 0 is returned, transaction
+ * may or may not have sent the barrier. Used to avoid sending barrier
+ * twice in common cases.
+ */
+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
+{
+	int ret = 0;
+	transaction_t *commit_trans;
+
+	if (!(journal->j_flags & JBD2_BARRIER))
+		return 0;
+	read_lock(&journal->j_state_lock);
+	/* Transaction already committed? */
+	if (tid_geq(journal->j_commit_sequence, tid))
+		goto out;
+	commit_trans = journal->j_committing_transaction;
+	if (!commit_trans || commit_trans->t_tid != tid) {
+		ret = 1;
+		goto out;
+	}
+	/*
+	 * Transaction is being committed and we already proceeded to
+	 * submitting a flush to fs partition?
+	 */
+	if (journal->j_fs_dev != journal->j_dev) {
+		if (!commit_trans->t_need_data_flush ||
+		    commit_trans->t_state >= T_COMMIT_DFLUSH)
+			goto out;
+	} else {
+		if (commit_trans->t_state >= T_COMMIT_JFLUSH)
+			goto out;
+	}
+	ret = 1;
+out:
+	read_unlock(&journal->j_state_lock);
+	return ret;
+}
+EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
+
 /*
  * Wait for a specified commit to complete.
  * The caller may not hold the journal lock.
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4d57955061f4..4ecb7b16b278 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -529,9 +529,10 @@ struct transaction_s
 	enum {
 		T_RUNNING,
 		T_LOCKED,
-		T_RUNDOWN,
 		T_FLUSH,
 		T_COMMIT,
+		T_COMMIT_DFLUSH,
+		T_COMMIT_JFLUSH,
 		T_FINISHED
 	}			t_state;
 
@@ -1230,6 +1231,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
 int jbd2_journal_force_commit_nested(journal_t *journal);
 int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
 
 void __jbd2_log_wait_for_space(journal_t *journal);
 extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
-- 
cgit v1.2.3


From 3c454cf21645bc96668e286f6352ac2c4c895fa2 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 6 Apr 2011 09:31:40 -0700
Subject: ceph: use LOOKUPINO to make unconnected nfs fh more reliable

If we are unable to locate an inode by ino, ask the MDS using the new
LOOKUPINO command.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/export.c             | 19 +++++++++++++++++--
 include/linux/ceph/ceph_fs.h |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..f1828af09912 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
 static struct dentry *__fh_to_dentry(struct super_block *sb,
 				     struct ceph_nfs_fh *fh)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 	struct inode *inode;
 	struct dentry *dentry;
 	struct ceph_vino vino;
@@ -95,8 +96,22 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 	vino.ino = fh->ino;
 	vino.snap = CEPH_NOSNAP;
 	inode = ceph_find_inode(sb, vino);
-	if (!inode)
-		return ERR_PTR(-ESTALE);
+	if (!inode) {
+		struct ceph_mds_request *req;
+
+		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+					       USE_ANY_MDS);
+		if (IS_ERR(req))
+			return ERR_CAST(req);
+
+		req->r_ino1 = vino;
+		req->r_num_caps = 1;
+		err = ceph_mdsc_do_request(mdsc, NULL, req);
+		ceph_mdsc_put_request(req);
+		inode = ceph_find_inode(sb, vino);
+		if (!inode)
+			return ERR_PTR(-ESTALE);
+	}
 
 	dentry = d_obtain_alias(inode);
 	if (IS_ERR(dentry)) {
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8e995fbd867..b8c60694b2b0 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -313,6 +313,7 @@ enum {
 	CEPH_MDS_OP_GETATTR    = 0x00101,
 	CEPH_MDS_OP_LOOKUPHASH = 0x00102,
 	CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
+	CEPH_MDS_OP_LOOKUPINO  = 0x00104,
 
 	CEPH_MDS_OP_SETXATTR   = 0x01105,
 	CEPH_MDS_OP_RMXATTR    = 0x01106,
-- 
cgit v1.2.3


From 650c2a2145981696c414be1d540a32447d0e353e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 15 May 2011 22:53:56 +0200
Subject: mach-ux500: move the DB8500 PRCMU driver to MFD

We have decided that this function arbiter fits better in the MFD
subsystem. Since we need to concatenate the split header files we move
it basically like this:

mv mach-ux500/prcmu-db8500.c drivers/mfd/db8500-prcmu.c
mv mach-ux500/include/mach/prcmu-defs.h include/linux/mfd/db8500-prcmu.h
mv mach-ux500/include/mach/prcmu-regs.h drivers/mfd/db8500-prcmu-regs.h
mach-ux500/include/mach/prcmu.h >> include/linux/mfd/db8500-prcmu.h
rm arch/arm/mach-ux500/include/mach/prcmu.h

Then we update different #include statements and Makefile orders etc
to make the PRCMU driver compile, link and boot in the new place
without really changing any code.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig                   |   1 +
 arch/arm/mach-ux500/Makefile                  |   2 +-
 arch/arm/mach-ux500/cpu.c                     |   2 +-
 arch/arm/mach-ux500/cpufreq.c                 |   3 +-
 arch/arm/mach-ux500/include/mach/prcmu-defs.h |  30 --
 arch/arm/mach-ux500/include/mach/prcmu-regs.h |  94 ------
 arch/arm/mach-ux500/include/mach/prcmu.h      |  28 --
 arch/arm/mach-ux500/prcmu-db8500.c            | 394 -------------------------
 drivers/mfd/Kconfig                           |  12 +-
 drivers/mfd/Makefile                          |   4 +-
 drivers/mfd/ab8500-i2c.c                      |   3 +-
 drivers/mfd/db8500-prcmu-regs.h               |  94 ++++++
 drivers/mfd/db8500-prcmu.c                    | 395 ++++++++++++++++++++++++++
 include/linux/mfd/db8500-prcmu.h              |  58 ++++
 14 files changed, 566 insertions(+), 554 deletions(-)
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-defs.h
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu-regs.h
 delete mode 100644 arch/arm/mach-ux500/include/mach/prcmu.h
 delete mode 100644 arch/arm/mach-ux500/prcmu-db8500.c
 create mode 100644 drivers/mfd/db8500-prcmu-regs.h
 create mode 100644 drivers/mfd/db8500-prcmu.c
 create mode 100644 include/linux/mfd/db8500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 58626013aa32..8071d2746f70 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -15,6 +15,7 @@ config UX500_SOC_DB5500
 
 config UX500_SOC_DB8500
 	bool "DB8500"
+	select MFD_DB8500_PRCMU
 
 endmenu
 
diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index 2a08a10e09da..7a1d43e04f97 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -5,7 +5,7 @@
 obj-y				:= clock.o cpu.o devices.o devices-common.o \
 				   id.o usb.o
 obj-$(CONFIG_UX500_SOC_DB5500)	+= cpu-db5500.o dma-db5500.o
-obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o devices-db8500.o prcmu-db8500.o
+obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o devices-db8500.o
 obj-$(CONFIG_MACH_U8500)	+= board-mop500.o board-mop500-sdi.o \
 				board-mop500-regulators.o \
 				board-mop500-uib.o board-mop500-stuib.o \
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 0190e0e68b4d..11360f734cec 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -8,6 +8,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -19,7 +20,6 @@
 #include <mach/hardware.h>
 #include <mach/setup.h>
 #include <mach/devices.h>
-#include <mach/prcmu.h>
 
 #include "clock.h"
 
diff --git a/arch/arm/mach-ux500/cpufreq.c b/arch/arm/mach-ux500/cpufreq.c
index 5c5b747f134d..d196939fcdb9 100644
--- a/arch/arm/mach-ux500/cpufreq.c
+++ b/arch/arm/mach-ux500/cpufreq.c
@@ -17,10 +17,9 @@
 #include <linux/kernel.h>
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 #include <mach/hardware.h>
-#include <mach/prcmu.h>
-#include <mach/prcmu-defs.h>
 
 #define DRIVER_NAME "cpufreq-u8500"
 #define CPUFREQ_NAME "u8500"
diff --git a/arch/arm/mach-ux500/include/mach/prcmu-defs.h b/arch/arm/mach-ux500/include/mach/prcmu-defs.h
deleted file mode 100644
index 848ba64b561f..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu-defs.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Martin Persson <martin.persson@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit definitions
- */
-
-#ifndef __MACH_PRCMU_DEFS_H
-#define __MACH_PRCMU_DEFS_H
-
-enum prcmu_cpu_opp {
-	CPU_OPP_INIT	  = 0x00,
-	CPU_OPP_NO_CHANGE = 0x01,
-	CPU_OPP_100	  = 0x02,
-	CPU_OPP_50	  = 0x03,
-	CPU_OPP_MAX	  = 0x04,
-	CPU_OPP_EXT_CLK	  = 0x07
-};
-enum prcmu_ape_opp {
-	APE_OPP_NO_CHANGE = 0x00,
-	APE_OPP_100	  = 0x02,
-	APE_OPP_50	  = 0x03,
-};
-
-#endif /* __MACH_PRCMU_DEFS_H */
diff --git a/arch/arm/mach-ux500/include/mach/prcmu-regs.h b/arch/arm/mach-ux500/include/mach/prcmu-regs.h
deleted file mode 100644
index c1226da19bfb..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu-regs.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit registers
- */
-
-#ifndef __MACH_PRCMU_REGS_H
-#define __MACH_PRCMU_REGS_H
-
-#include <mach/hardware.h>
-
-#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
-#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
-#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
-#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
-#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
-#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
-#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
-#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
-
-/* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
-#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
-
-/* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
-#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
-#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
-
-/* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
-#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
-#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
-#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
-#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
-#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
-#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
-#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
-#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
-#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
-#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
-
-#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
-#define ARM_WAKEUP_MODEM	0x1
-
-#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
-#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
-#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
-
-#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
-#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
-#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
-#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
-#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
-#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
-#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
-#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
-
-/* System reset register */
-#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
-
-/* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
-#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
-
-/* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
-#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
-#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
-#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
-#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
-#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
-#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
-#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
-#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
-#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
-
-/* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
-#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
-
-/* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
-
-/* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
-
-#endif /* __MACH_PRCMU_REGS_H */
diff --git a/arch/arm/mach-ux500/include/mach/prcmu.h b/arch/arm/mach-ux500/include/mach/prcmu.h
deleted file mode 100644
index c49e456162ef..000000000000
--- a/arch/arm/mach-ux500/include/mach/prcmu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
- *
- * License Terms: GNU General Public License v2
- *
- * PRCM Unit f/w API
- */
-#ifndef __MACH_PRCMU_H
-#define __MACH_PRCMU_H
-#include <mach/prcmu-defs.h>
-
-void __init prcmu_early_init(void);
-int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp);
-int prcmu_get_ape_opp(void);
-int prcmu_get_cpu_opp(void);
-bool prcmu_has_arm_maxopp(void);
-
-#endif /* __MACH_PRCMU_H */
diff --git a/arch/arm/mach-ux500/prcmu-db8500.c b/arch/arm/mach-ux500/prcmu-db8500.c
deleted file mode 100644
index c522d26ef348..000000000000
--- a/arch/arm/mach-ux500/prcmu-db8500.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
- *
- * U8500 PRCM Unit interface driver
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <linux/jiffies.h>
-#include <linux/bitops.h>
-#include <linux/interrupt.h>
-
-#include <mach/hardware.h>
-#include <mach/prcmu-regs.h>
-#include <mach/prcmu-defs.h>
-
-/* Global var to runtime determine TCDM base for v2 or v1 */
-static __iomem void *tcdm_base;
-
-#define _MBOX_HEADER		(tcdm_base + 0xFE8)
-#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
-
-#define REQ_MB1 (tcdm_base + 0xFD0)
-#define REQ_MB5 (tcdm_base + 0xE44)
-
-#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
-#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
-#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
-
-#define ACK_MB1 (tcdm_base + 0xE04)
-#define ACK_MB5 (tcdm_base + 0xDF4)
-
-#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
-#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
-
-#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
-#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
-#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
-#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
-
-#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
-#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
-
-#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
-#define PRCM_AVS_ISMODEENABLE		7
-#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
-
-#define I2C_WRITE(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
-#define I2C_READ(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
-#define I2C_STOP_EN BIT(3)
-
-enum mb1_h {
-	MB1H_ARM_OPP = 1,
-	MB1H_APE_OPP,
-	MB1H_ARM_APE_OPP,
-};
-
-static struct {
-	struct mutex lock;
-	struct completion work;
-	struct {
-		u8 arm_opp;
-		u8 ape_opp;
-		u8 arm_status;
-		u8 ape_status;
-	} ack;
-} mb1_transfer;
-
-enum ack_mb5_status {
-	I2C_WR_OK = 0x01,
-	I2C_RD_OK = 0x02,
-};
-
-#define MBOX_BIT BIT
-#define NUM_MBOX 8
-
-static struct {
-	struct mutex lock;
-	struct completion work;
-	bool failed;
-	struct {
-		u8 status;
-		u8 value;
-	} ack;
-} mb5_transfer;
-
-/**
- * prcmu_abb_read() - Read register value(s) from the ABB.
- * @slave:	The I2C slave address.
- * @reg:	The (start) register address.
- * @value:	The read out value(s).
- * @size:	The number of registers to read.
- *
- * Reads register value(s) from the ABB.
- * @size has to be 1 for the current firmware version.
- */
-int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
-{
-	int r;
-
-	if (size != 1)
-		return -EINVAL;
-
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
-		cpu_relax();
-
-	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
-	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_read timed out.\n");
-		r = -EIO;
-		goto unlock_and_return;
-	}
-	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
-	if (!r)
-		*value = mb5_transfer.ack.value;
-
-unlock_and_return:
-	mutex_unlock(&mb5_transfer.lock);
-	return r;
-}
-EXPORT_SYMBOL(prcmu_abb_read);
-
-/**
- * prcmu_abb_write() - Write register value(s) to the ABB.
- * @slave:	The I2C slave address.
- * @reg:	The (start) register address.
- * @value:	The value(s) to write.
- * @size:	The number of registers to write.
- *
- * Reads register value(s) from the ABB.
- * @size has to be 1 for the current firmware version.
- */
-int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
-{
-	int r;
-
-	if (size != 1)
-		return -EINVAL;
-
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
-
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
-		cpu_relax();
-
-	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-	writeb(*value, REQ_MB5_I2C_VAL);
-
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
-	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_write timed out.\n");
-		r = -EIO;
-		goto unlock_and_return;
-	}
-	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
-
-unlock_and_return:
-	mutex_unlock(&mb5_transfer.lock);
-	return r;
-}
-EXPORT_SYMBOL(prcmu_abb_write);
-
-static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
-			    enum prcmu_cpu_opp cpu_opp)
-{
-	bool do_ape;
-	bool do_arm;
-	int err = 0;
-
-	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
-	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
-
-	mutex_lock(&mb1_transfer.lock);
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
-		cpu_relax();
-
-	writeb(0, MBOX_HEADER_REQ_MB0);
-	writeb(cpu_opp, REQ_MB1_ARMOPP);
-	writeb(ape_opp, REQ_MB1_APEOPP);
-	writeb(0, REQ_MB1_BOOSTOPP);
-	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
-	wait_for_completion(&mb1_transfer.work);
-	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
-		err = -EIO;
-	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
-		err = -EIO;
-
-	mutex_unlock(&mb1_transfer.lock);
-
-	return err;
-}
-
-/**
- * prcmu_set_ape_opp() - Set the OPP of the APE.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the APE.
- */
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
-{
-	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
-}
-EXPORT_SYMBOL(prcmu_set_ape_opp);
-
-/**
- * prcmu_set_cpu_opp() - Set the OPP of the CPU.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the CPU.
- */
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
-{
-	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
-}
-EXPORT_SYMBOL(prcmu_set_cpu_opp);
-
-/**
- * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
- * @ape_opp:	The APE OPP to set.
- * @cpu_opp:	The CPU OPP to set.
- *
- * This function sets the OPPs of the APE and the CPU.
- */
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp)
-{
-	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
-}
-EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
-
-/**
- * prcmu_get_ape_opp() - Get the OPP of the APE.
- *
- * This function gets the OPP of the APE.
- */
-enum prcmu_ape_opp prcmu_get_ape_opp(void)
-{
-	return readb(ACK_MB1_CURR_APEOPP);
-}
-EXPORT_SYMBOL(prcmu_get_ape_opp);
-
-/**
- * prcmu_get_cpu_opp() - Get the OPP of the CPU.
- *
- * This function gets the OPP of the CPU. The OPP is specified in %%.
- * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
- */
-int prcmu_get_cpu_opp(void)
-{
-	return readb(ACK_MB1_CURR_ARMOPP);
-}
-EXPORT_SYMBOL(prcmu_get_cpu_opp);
-
-bool prcmu_has_arm_maxopp(void)
-{
-	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
-		== PRCM_AVS_ISMODEENABLE_MASK;
-}
-
-static void read_mailbox_0(void)
-{
-	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_1(void)
-{
-	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
-	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
-	complete(&mb1_transfer.work);
-	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_2(void)
-{
-	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_3(void)
-{
-	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_4(void)
-{
-	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_5(void)
-{
-	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
-	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
-	complete(&mb5_transfer.work);
-	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_6(void)
-{
-	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
-}
-
-static void read_mailbox_7(void)
-{
-	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
-}
-
-static void (* const read_mailbox[NUM_MBOX])(void) = {
-	read_mailbox_0,
-	read_mailbox_1,
-	read_mailbox_2,
-	read_mailbox_3,
-	read_mailbox_4,
-	read_mailbox_5,
-	read_mailbox_6,
-	read_mailbox_7
-};
-
-static irqreturn_t prcmu_irq_handler(int irq, void *data)
-{
-	u32 bits;
-	u8 n;
-
-	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
-	if (unlikely(!bits))
-		return IRQ_NONE;
-
-	for (n = 0; bits; n++) {
-		if (bits & MBOX_BIT(n)) {
-			bits -= MBOX_BIT(n);
-			read_mailbox[n]();
-		}
-	}
-	return IRQ_HANDLED;
-}
-
-void __init prcmu_early_init(void)
-{
-	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
-		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
-	} else if (cpu_is_u8500v2()) {
-		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
-	} else {
-		pr_err("prcmu: Unsupported chip version\n");
-		BUG();
-	}
-}
-
-static int __init prcmu_init(void)
-{
-	if (cpu_is_u8500ed()) {
-		pr_err("prcmu: Unsupported chip version\n");
-		return 0;
-	}
-
-	mutex_init(&mb1_transfer.lock);
-	init_completion(&mb1_transfer.work);
-	mutex_init(&mb5_transfer.lock);
-	init_completion(&mb5_transfer.work);
-
-	/* Clean up the mailbox interrupts after pre-kernel code. */
-	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
-
-	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
-			   "prcmu", NULL);
-}
-
-arch_initcall(prcmu_init);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3ed3ff06be5d..7eaeb9750793 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -538,7 +538,7 @@ config AB8500_CORE
 
 config AB8500_I2C_CORE
 	bool "AB8500 register access via PRCMU I2C"
-	depends on AB8500_CORE && UX500_SOC_DB8500
+	depends on AB8500_CORE && MFD_DB8500_PRCMU
 	default y
 	help
 	  This enables register access to the AB8500 chip via PRCMU I2C.
@@ -575,6 +575,16 @@ config AB3550_CORE
 	  LEDs, vibrator, system power and temperature, power management
 	  and ALSA sound.
 
+config MFD_DB8500_PRCMU
+	bool "ST-Ericsson DB8500 Power Reset Control Management Unit"
+	depends on UX500_SOC_DB8500
+	select MFD_CORE
+	help
+	  Select this option to enable support for the DB8500 Power Reset
+	  and Control Management Unit. This is basically an autonomous
+	  system controller running an XP70 microprocessor, which is accessed
+	  through a register map.
+
 config MFD_CS5535
 	tristate "Support for CS5535 and CS5536 southbridge core functions"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 419caa9d7dcf..814c57a692a9 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -74,9 +74,11 @@ obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
 obj-$(CONFIG_AB3550_CORE)	+= ab3550-core.o
 obj-$(CONFIG_AB8500_CORE)	+= ab8500-core.o ab8500-sysctrl.o
-obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
 obj-$(CONFIG_AB8500_DEBUG)	+= ab8500-debugfs.o
 obj-$(CONFIG_AB8500_GPADC)	+= ab8500-gpadc.o
+obj-$(CONFIG_MFD_DB8500_PRCMU)	+= db8500-prcmu.o
+# ab8500-i2c need to come after db8500-prcmu (which provides the channel)
+obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
 obj-$(CONFIG_LPC_SCH)		+= lpc_sch.o
diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
index 821e6b86afd2..9be541c6b004 100644
--- a/drivers/mfd/ab8500-i2c.c
+++ b/drivers/mfd/ab8500-i2c.c
@@ -11,8 +11,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ab8500.h>
-
-#include <mach/prcmu.h>
+#include <linux/mfd/db8500-prcmu.h>
 
 static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
 {
diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
new file mode 100644
index 000000000000..c1226da19bfb
--- /dev/null
+++ b/drivers/mfd/db8500-prcmu-regs.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit registers
+ */
+
+#ifndef __MACH_PRCMU_REGS_H
+#define __MACH_PRCMU_REGS_H
+
+#include <mach/hardware.h>
+
+#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
+#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
+#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
+#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
+#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
+#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
+#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
+#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+
+/* ARM WFI Standby signal register */
+#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
+#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
+
+/* CPU mailbox registers */
+#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
+#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
+#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+
+/* Dual A9 core interrupt management unit registers */
+#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
+#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
+#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
+#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
+#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
+#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
+#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
+#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
+#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
+#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
+#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
+
+#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
+#define ARM_WAKEUP_MODEM	0x1
+
+#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
+#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
+#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
+
+#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
+#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
+#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
+#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
+#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
+#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
+#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
+#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+
+/* Level shifter and clamp control registers */
+#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
+#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+
+/* PRCMU clock/PLL/reset registers */
+#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
+#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
+#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
+#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
+#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
+#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
+#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
+#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
+#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
+#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+
+/* ePOD and memory power signal control registers */
+#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
+#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+
+/* Debug power control unit registers */
+#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+
+/* Miscellaneous unit registers */
+#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+
+#endif /* __MACH_PRCMU_REGS_H */
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
new file mode 100644
index 000000000000..31f18c8c6bf8
--- /dev/null
+++ b/drivers/mfd/db8500-prcmu.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * U8500 PRCM Unit interface driver
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/db8500-prcmu.h>
+
+#include <mach/hardware.h>
+
+#include "db8500-prcmu-regs.h"
+
+/* Global var to runtime determine TCDM base for v2 or v1 */
+static __iomem void *tcdm_base;
+
+#define _MBOX_HEADER		(tcdm_base + 0xFE8)
+#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
+
+#define REQ_MB1 (tcdm_base + 0xFD0)
+#define REQ_MB5 (tcdm_base + 0xE44)
+
+#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
+#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
+#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
+
+#define ACK_MB1 (tcdm_base + 0xE04)
+#define ACK_MB5 (tcdm_base + 0xDF4)
+
+#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
+#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
+
+#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
+#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
+#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
+#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
+
+#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
+#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
+
+#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
+#define PRCM_AVS_ISMODEENABLE		7
+#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+
+#define I2C_WRITE(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define I2C_READ(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
+#define I2C_STOP_EN BIT(3)
+
+enum mb1_h {
+	MB1H_ARM_OPP = 1,
+	MB1H_APE_OPP,
+	MB1H_ARM_APE_OPP,
+};
+
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 arm_opp;
+		u8 ape_opp;
+		u8 arm_status;
+		u8 ape_status;
+	} ack;
+} mb1_transfer;
+
+enum ack_mb5_status {
+	I2C_WR_OK = 0x01,
+	I2C_RD_OK = 0x02,
+};
+
+#define MBOX_BIT BIT
+#define NUM_MBOX 8
+
+static struct {
+	struct mutex lock;
+	struct completion work;
+	bool failed;
+	struct {
+		u8 status;
+		u8 value;
+	} ack;
+} mb5_transfer;
+
+/**
+ * prcmu_abb_read() - Read register value(s) from the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The read out value(s).
+ * @size:	The number of registers to read.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be 1 for the current firmware version.
+ */
+int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if (size != 1)
+		return -EINVAL;
+
+	r = mutex_lock_interruptible(&mb5_transfer.lock);
+	if (r)
+		return r;
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+
+	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
+	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
+	writeb(reg, REQ_MB5_I2C_REG);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	if (!wait_for_completion_timeout(&mb5_transfer.work,
+			msecs_to_jiffies(500))) {
+		pr_err("prcmu: prcmu_abb_read timed out.\n");
+		r = -EIO;
+		goto unlock_and_return;
+	}
+	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
+	if (!r)
+		*value = mb5_transfer.ack.value;
+
+unlock_and_return:
+	mutex_unlock(&mb5_transfer.lock);
+	return r;
+}
+EXPORT_SYMBOL(prcmu_abb_read);
+
+/**
+ * prcmu_abb_write() - Write register value(s) to the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The value(s) to write.
+ * @size:	The number of registers to write.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be 1 for the current firmware version.
+ */
+int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if (size != 1)
+		return -EINVAL;
+
+	r = mutex_lock_interruptible(&mb5_transfer.lock);
+	if (r)
+		return r;
+
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+
+	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
+	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
+	writeb(reg, REQ_MB5_I2C_REG);
+	writeb(*value, REQ_MB5_I2C_VAL);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	if (!wait_for_completion_timeout(&mb5_transfer.work,
+			msecs_to_jiffies(500))) {
+		pr_err("prcmu: prcmu_abb_write timed out.\n");
+		r = -EIO;
+		goto unlock_and_return;
+	}
+	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
+
+unlock_and_return:
+	mutex_unlock(&mb5_transfer.lock);
+	return r;
+}
+EXPORT_SYMBOL(prcmu_abb_write);
+
+static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
+			    enum prcmu_cpu_opp cpu_opp)
+{
+	bool do_ape;
+	bool do_arm;
+	int err = 0;
+
+	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
+	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(0, MBOX_HEADER_REQ_MB0);
+	writeb(cpu_opp, REQ_MB1_ARMOPP);
+	writeb(ape_opp, REQ_MB1_APEOPP);
+	writeb(0, REQ_MB1_BOOSTOPP);
+	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb1_transfer.work);
+	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
+		err = -EIO;
+	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
+		err = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return err;
+}
+
+/**
+ * prcmu_set_ape_opp() - Set the OPP of the APE.
+ * @opp:	The OPP to set.
+ *
+ * This function sets the OPP of the APE.
+ */
+int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
+{
+	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
+}
+EXPORT_SYMBOL(prcmu_set_ape_opp);
+
+/**
+ * prcmu_set_cpu_opp() - Set the OPP of the CPU.
+ * @opp:	The OPP to set.
+ *
+ * This function sets the OPP of the CPU.
+ */
+int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
+{
+	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
+}
+EXPORT_SYMBOL(prcmu_set_cpu_opp);
+
+/**
+ * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
+ * @ape_opp:	The APE OPP to set.
+ * @cpu_opp:	The CPU OPP to set.
+ *
+ * This function sets the OPPs of the APE and the CPU.
+ */
+int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
+			   enum prcmu_cpu_opp cpu_opp)
+{
+	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
+}
+EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
+
+/**
+ * prcmu_get_ape_opp() - Get the OPP of the APE.
+ *
+ * This function gets the OPP of the APE.
+ */
+enum prcmu_ape_opp prcmu_get_ape_opp(void)
+{
+	return readb(ACK_MB1_CURR_APEOPP);
+}
+EXPORT_SYMBOL(prcmu_get_ape_opp);
+
+/**
+ * prcmu_get_cpu_opp() - Get the OPP of the CPU.
+ *
+ * This function gets the OPP of the CPU. The OPP is specified in %%.
+ * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
+ */
+int prcmu_get_cpu_opp(void)
+{
+	return readb(ACK_MB1_CURR_ARMOPP);
+}
+EXPORT_SYMBOL(prcmu_get_cpu_opp);
+
+bool prcmu_has_arm_maxopp(void)
+{
+	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
+		== PRCM_AVS_ISMODEENABLE_MASK;
+}
+
+static void read_mailbox_0(void)
+{
+	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_1(void)
+{
+	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
+	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
+	complete(&mb1_transfer.work);
+	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_2(void)
+{
+	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_3(void)
+{
+	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_4(void)
+{
+	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_5(void)
+{
+	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
+	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
+	complete(&mb5_transfer.work);
+	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_6(void)
+{
+	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+}
+
+static void read_mailbox_7(void)
+{
+	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+}
+
+static void (* const read_mailbox[NUM_MBOX])(void) = {
+	read_mailbox_0,
+	read_mailbox_1,
+	read_mailbox_2,
+	read_mailbox_3,
+	read_mailbox_4,
+	read_mailbox_5,
+	read_mailbox_6,
+	read_mailbox_7
+};
+
+static irqreturn_t prcmu_irq_handler(int irq, void *data)
+{
+	u32 bits;
+	u8 n;
+
+	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
+	if (unlikely(!bits))
+		return IRQ_NONE;
+
+	for (n = 0; bits; n++) {
+		if (bits & MBOX_BIT(n)) {
+			bits -= MBOX_BIT(n);
+			read_mailbox[n]();
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+void __init prcmu_early_init(void)
+{
+	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
+		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
+	} else if (cpu_is_u8500v2()) {
+		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
+	} else {
+		pr_err("prcmu: Unsupported chip version\n");
+		BUG();
+	}
+}
+
+static int __init prcmu_init(void)
+{
+	if (cpu_is_u8500ed()) {
+		pr_err("prcmu: Unsupported chip version\n");
+		return 0;
+	}
+
+	mutex_init(&mb1_transfer.lock);
+	init_completion(&mb1_transfer.work);
+	mutex_init(&mb5_transfer.lock);
+	init_completion(&mb5_transfer.work);
+
+	/* Clean up the mailbox interrupts after pre-kernel code. */
+	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
+
+	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
+			   "prcmu", NULL);
+}
+
+arch_initcall(prcmu_init);
diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
new file mode 100644
index 000000000000..d591d79aa6f0
--- /dev/null
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Martin Persson <martin.persson@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit definitions
+ */
+
+#ifndef __MACH_PRCMU_DEFS_H
+#define __MACH_PRCMU_DEFS_H
+
+enum prcmu_cpu_opp {
+	CPU_OPP_INIT	  = 0x00,
+	CPU_OPP_NO_CHANGE = 0x01,
+	CPU_OPP_100	  = 0x02,
+	CPU_OPP_50	  = 0x03,
+	CPU_OPP_MAX	  = 0x04,
+	CPU_OPP_EXT_CLK	  = 0x07
+};
+enum prcmu_ape_opp {
+	APE_OPP_NO_CHANGE = 0x00,
+	APE_OPP_100	  = 0x02,
+	APE_OPP_50	  = 0x03,
+};
+
+#endif /* __MACH_PRCMU_DEFS_H */
+
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit f/w API
+ */
+#ifndef __MACH_PRCMU_H
+#define __MACH_PRCMU_H
+
+void __init prcmu_early_init(void);
+int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
+int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
+int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
+int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
+			   enum prcmu_cpu_opp cpu_opp);
+enum prcmu_ape_opp prcmu_get_ape_opp(void);
+int prcmu_get_cpu_opp(void);
+bool prcmu_has_arm_maxopp(void);
+
+#endif /* __MACH_PRCMU_H */
-- 
cgit v1.2.3


From 3df57bcf5a6ba74572218a811bd0e311414f2aff Mon Sep 17 00:00:00 2001
From: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
Date: Mon, 16 May 2011 00:15:05 +0200
Subject: mfd: update DB8500 PRCMU driver

This updates the DB8500 PRCMU driver to the latest version
available internally. Nominally we would update the dependent
CPUfreq driver at the same time but since that is being moved
around in this patch set we postpone that by simply deactivating
it for the time being.

This is a snapshot of the current PRCMU firmware API as it looks
right now. The PRCMU firmware is still subject to change. This
also updates the CPUfreq driver to a newer version that will
utilize the new API.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
Signed-off-by: Martin Persson <martin.persson@stericsson.com>
Signed-off-by: Per Fransson <per.xx.fransson@stericsson.com>
Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Sebastien Rault <sebastien.rault@stericsson.com>
Signed-off-by: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Signed-off-by: Rickard Andersson <rickard.andersson@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Makefile     |    2 +-
 arch/arm/mach-ux500/cpu-db8500.c |    5 +
 drivers/mfd/db8500-prcmu-regs.h  |  194 ++--
 drivers/mfd/db8500-prcmu.c       | 1879 ++++++++++++++++++++++++++++++++++----
 include/linux/mfd/db8500-prcmu.h |  992 +++++++++++++++++++-
 5 files changed, 2782 insertions(+), 290 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index 7a1d43e04f97..1694916e6822 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -17,4 +17,4 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
 obj-$(CONFIG_LOCAL_TIMERS)	+= localtimer.o
 obj-$(CONFIG_U5500_MODEM_IRQ)	+= modem-irq-db5500.o
 obj-$(CONFIG_U5500_MBOX)	+= mbox-db5500.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o
+
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 3d419fef0470..c3c417656bd9 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -131,9 +131,14 @@ static struct platform_device db8500_pmu_device = {
 	.dev.platform_data	= &db8500_pmu_platdata,
 };
 
+static struct platform_device db8500_prcmu_device = {
+	.name			= "db8500-prcmu",
+};
+
 static struct platform_device *platform_devs[] __initdata = {
 	&u8500_dma40_device,
 	&db8500_pmu_device,
+	&db8500_prcmu_device,
 };
 
 static resource_size_t __initdata db8500_gpio_base[] = {
diff --git a/drivers/mfd/db8500-prcmu-regs.h b/drivers/mfd/db8500-prcmu-regs.h
index c1226da19bfb..3bbf04d58043 100644
--- a/drivers/mfd/db8500-prcmu-regs.h
+++ b/drivers/mfd/db8500-prcmu-regs.h
@@ -9,86 +9,158 @@
  *
  * PRCM Unit registers
  */
+#ifndef __DB8500_PRCMU_REGS_H
+#define __DB8500_PRCMU_REGS_H
 
-#ifndef __MACH_PRCMU_REGS_H
-#define __MACH_PRCMU_REGS_H
-
+#include <linux/bitops.h>
 #include <mach/hardware.h>
 
-#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
-#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
-#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
-#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
-#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
-#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
-#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
-#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+#define BITS(_start, _end) ((BIT(_end) - BIT(_start)) + BIT(_end))
+
+#define PRCM_ARM_PLLDIVPS 0x118
+#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE	BITS(0, 5)
+#define PRCM_ARM_PLLDIVPS_MAX_MASK	0xF
+
+#define PRCM_PLLARM_LOCKP 0x0A8
+#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3 BIT(1)
+
+#define PRCM_ARM_CHGCLKREQ 0x114
+#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ BIT(0)
+
+#define PRCM_PLLARM_ENABLE 0x98
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE	BIT(0)
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON	BIT(8)
+
+#define PRCM_ARMCLKFIX_MGT	0x0
+#define PRCM_A9_RESETN_CLR	0x1f4
+#define PRCM_A9_RESETN_SET	0x1f0
+#define PRCM_ARM_LS_CLAMP	0x30C
+#define PRCM_SRAM_A9		0x308
 
 /* ARM WFI Standby signal register */
-#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
-#define PRCMU_IOCR              (_PRCMU_BASE + 0x310)
+#define PRCM_ARM_WFI_STANDBY	0x130
+#define PRCM_IOCR		0x310
+#define PRCM_IOCR_IOFORCE BIT(0)
 
 /* CPU mailbox registers */
-#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
-#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
-#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+#define PRCM_MBOX_CPU_VAL 0x0FC
+#define PRCM_MBOX_CPU_SET 0x100
 
 /* Dual A9 core interrupt management unit registers */
-#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
-#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
-#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
-#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
-#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
-#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
-#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
-#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
-#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
-#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
-#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
-
-#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
-#define ARM_WAKEUP_MODEM	0x1
-
-#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
-#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
-#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
-
-#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
-#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
-#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
-#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
-#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
-#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
-#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
-#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+#define PRCM_A9_MASK_REQ 0x328
+#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ BIT(0)
+
+#define PRCM_A9_MASK_ACK	0x32C
+#define PRCM_ARMITMSK31TO0	0x11C
+#define PRCM_ARMITMSK63TO32	0x120
+#define PRCM_ARMITMSK95TO64	0x124
+#define PRCM_ARMITMSK127TO96	0x128
+#define PRCM_POWER_STATE_VAL	0x25C
+#define PRCM_ARMITVAL31TO0	0x260
+#define PRCM_ARMITVAL63TO32	0x264
+#define PRCM_ARMITVAL95TO64	0x268
+#define PRCM_ARMITVAL127TO96	0x26C
+
+#define PRCM_HOSTACCESS_REQ 0x334
+#define PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ BIT(0)
+
+#define PRCM_ARM_IT1_CLR 0x48C
+#define PRCM_ARM_IT1_VAL 0x494
+
+#define PRCM_ITSTATUS0		0x148
+#define PRCM_ITSTATUS1		0x150
+#define PRCM_ITSTATUS2		0x158
+#define PRCM_ITSTATUS3		0x160
+#define PRCM_ITSTATUS4		0x168
+#define PRCM_ITSTATUS5		0x484
+#define PRCM_ITCLEAR5		0x488
+#define PRCM_ARMIT_MASKXP70_IT	0x1018
 
 /* System reset register */
-#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+#define PRCM_APE_SOFTRST 0x228
 
 /* Level shifter and clamp control registers */
-#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
-#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+#define PRCM_MMIP_LS_CLAMP_SET 0x420
+#define PRCM_MMIP_LS_CLAMP_CLR 0x424
+
+/* PRCMU HW semaphore */
+#define PRCM_SEM 0x400
+#define PRCM_SEM_PRCM_SEM BIT(0)
 
 /* PRCMU clock/PLL/reset registers */
-#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
-#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
-#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
-#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
-#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
-#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
-#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
-#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
-#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
-#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+#define PRCM_PLLDSI_FREQ	0x500
+#define PRCM_PLLDSI_ENABLE	0x504
+#define PRCM_PLLDSI_LOCKP	0x508
+#define PRCM_DSI_PLLOUT_SEL	0x530
+#define PRCM_DSITVCLK_DIV	0x52C
+#define PRCM_APE_RESETN_SET	0x1E4
+#define PRCM_APE_RESETN_CLR	0x1E8
+
+#define PRCM_TCR		0x1C8
+#define PRCM_TCR_TENSEL_MASK	BITS(0, 7)
+#define PRCM_TCR_STOP_TIMERS	BIT(16)
+#define PRCM_TCR_DOZE_MODE	BIT(17)
+
+#define PRCM_CLKOCR			0x1CC
+#define PRCM_CLKOCR_CLKODIV0_SHIFT	0
+#define PRCM_CLKOCR_CLKODIV0_MASK	BITS(0, 5)
+#define PRCM_CLKOCR_CLKOSEL0_SHIFT	6
+#define PRCM_CLKOCR_CLKOSEL0_MASK	BITS(6, 8)
+#define PRCM_CLKOCR_CLKODIV1_SHIFT	16
+#define PRCM_CLKOCR_CLKODIV1_MASK	BITS(16, 21)
+#define PRCM_CLKOCR_CLKOSEL1_SHIFT	22
+#define PRCM_CLKOCR_CLKOSEL1_MASK	BITS(22, 24)
+#define PRCM_CLKOCR_CLK1TYPE		BIT(28)
+
+#define PRCM_SGACLK_MGT		0x014
+#define PRCM_UARTCLK_MGT	0x018
+#define PRCM_MSP02CLK_MGT	0x01C
+#define PRCM_MSP1CLK_MGT	0x288
+#define PRCM_I2CCLK_MGT		0x020
+#define PRCM_SDMMCCLK_MGT	0x024
+#define PRCM_SLIMCLK_MGT	0x028
+#define PRCM_PER1CLK_MGT	0x02C
+#define PRCM_PER2CLK_MGT	0x030
+#define PRCM_PER3CLK_MGT	0x034
+#define PRCM_PER5CLK_MGT	0x038
+#define PRCM_PER6CLK_MGT	0x03C
+#define PRCM_PER7CLK_MGT	0x040
+#define PRCM_LCDCLK_MGT		0x044
+#define PRCM_BMLCLK_MGT		0x04C
+#define PRCM_HSITXCLK_MGT	0x050
+#define PRCM_HSIRXCLK_MGT	0x054
+#define PRCM_HDMICLK_MGT	0x058
+#define PRCM_APEATCLK_MGT	0x05C
+#define PRCM_APETRACECLK_MGT	0x060
+#define PRCM_MCDECLK_MGT	0x064
+#define PRCM_IPI2CCLK_MGT	0x068
+#define PRCM_DSIALTCLK_MGT	0x06C
+#define PRCM_DMACLK_MGT		0x074
+#define PRCM_B2R2CLK_MGT	0x078
+#define PRCM_TVCLK_MGT		0x07C
+#define PRCM_UNIPROCLK_MGT	0x278
+#define PRCM_SSPCLK_MGT		0x280
+#define PRCM_RNGCLK_MGT		0x284
+#define PRCM_UICCCLK_MGT	0x27C
+
+#define PRCM_CLK_MGT_CLKPLLDIV_MASK	BITS(0, 4)
+#define PRCM_CLK_MGT_CLKPLLSW_MASK	BITS(5, 7)
+#define PRCM_CLK_MGT_CLKEN		BIT(8)
 
 /* ePOD and memory power signal control registers */
-#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
-#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+#define PRCM_EPOD_C_SET		0x410
+#define PRCM_SRAM_LS_SLEEP	0x304
 
 /* Debug power control unit registers */
-#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+#define PRCM_POWER_STATE_SET 0x254
 
 /* Miscellaneous unit registers */
-#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+#define PRCM_DSI_SW_RESET 0x324
+#define PRCM_GPIOCR		0x138
+
+/* GPIOCR register */
+#define PRCM_GPIOCR_SPI2_SELECT BIT(23)
+
+#define PRCM_DDR_SUBSYS_APE_MINBW  0x438
 
-#endif /* __MACH_PRCMU_REGS_H */
+#endif /* __DB8500_PRCMU_REGS_H */
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 31f18c8c6bf8..c44725bd8b9a 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -10,93 +10,1354 @@
  * U8500 PRCM Unit interface driver
  *
  */
-#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
+#include <linux/irq.h>
 #include <linux/jiffies.h>
 #include <linux/bitops.h>
-#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/mfd/core.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <mach/hardware.h>
+#include <mach/irqs.h>
+#include <mach/db8500-regs.h>
+#include <mach/id.h>
+#include "db8500-prcmu-regs.h"
+
+/* Offset for the firmware version within the TCPM */
+#define PRCMU_FW_VERSION_OFFSET 0xA4
+
+/* PRCMU project numbers, defined by PRCMU FW */
+#define PRCMU_PROJECT_ID_8500V1_0 1
+#define PRCMU_PROJECT_ID_8500V2_0 2
+#define PRCMU_PROJECT_ID_8400V2_0 3
+
+/* Index of different voltages to be used when accessing AVSData */
+#define PRCM_AVS_BASE		0x2FC
+#define PRCM_AVS_VBB_RET	(PRCM_AVS_BASE + 0x0)
+#define PRCM_AVS_VBB_MAX_OPP	(PRCM_AVS_BASE + 0x1)
+#define PRCM_AVS_VBB_100_OPP	(PRCM_AVS_BASE + 0x2)
+#define PRCM_AVS_VBB_50_OPP	(PRCM_AVS_BASE + 0x3)
+#define PRCM_AVS_VARM_MAX_OPP	(PRCM_AVS_BASE + 0x4)
+#define PRCM_AVS_VARM_100_OPP	(PRCM_AVS_BASE + 0x5)
+#define PRCM_AVS_VARM_50_OPP	(PRCM_AVS_BASE + 0x6)
+#define PRCM_AVS_VARM_RET	(PRCM_AVS_BASE + 0x7)
+#define PRCM_AVS_VAPE_100_OPP	(PRCM_AVS_BASE + 0x8)
+#define PRCM_AVS_VAPE_50_OPP	(PRCM_AVS_BASE + 0x9)
+#define PRCM_AVS_VMOD_100_OPP	(PRCM_AVS_BASE + 0xA)
+#define PRCM_AVS_VMOD_50_OPP	(PRCM_AVS_BASE + 0xB)
+#define PRCM_AVS_VSAFE		(PRCM_AVS_BASE + 0xC)
+
+#define PRCM_AVS_VOLTAGE		0
+#define PRCM_AVS_VOLTAGE_MASK		0x3f
+#define PRCM_AVS_ISSLOWSTARTUP		6
+#define PRCM_AVS_ISSLOWSTARTUP_MASK	(1 << PRCM_AVS_ISSLOWSTARTUP)
+#define PRCM_AVS_ISMODEENABLE		7
+#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+
+#define PRCM_BOOT_STATUS	0xFFF
+#define PRCM_ROMCODE_A2P	0xFFE
+#define PRCM_ROMCODE_P2A	0xFFD
+#define PRCM_XP70_CUR_PWR_STATE 0xFFC      /* 4 BYTES */
+
+#define PRCM_SW_RST_REASON 0xFF8 /* 2 bytes */
+
+#define _PRCM_MBOX_HEADER		0xFE8 /* 16 bytes */
+#define PRCM_MBOX_HEADER_REQ_MB0	(_PRCM_MBOX_HEADER + 0x0)
+#define PRCM_MBOX_HEADER_REQ_MB1	(_PRCM_MBOX_HEADER + 0x1)
+#define PRCM_MBOX_HEADER_REQ_MB2	(_PRCM_MBOX_HEADER + 0x2)
+#define PRCM_MBOX_HEADER_REQ_MB3	(_PRCM_MBOX_HEADER + 0x3)
+#define PRCM_MBOX_HEADER_REQ_MB4	(_PRCM_MBOX_HEADER + 0x4)
+#define PRCM_MBOX_HEADER_REQ_MB5	(_PRCM_MBOX_HEADER + 0x5)
+#define PRCM_MBOX_HEADER_ACK_MB0	(_PRCM_MBOX_HEADER + 0x8)
+
+/* Req Mailboxes */
+#define PRCM_REQ_MB0 0xFDC /* 12 bytes  */
+#define PRCM_REQ_MB1 0xFD0 /* 12 bytes  */
+#define PRCM_REQ_MB2 0xFC0 /* 16 bytes  */
+#define PRCM_REQ_MB3 0xE4C /* 372 bytes  */
+#define PRCM_REQ_MB4 0xE48 /* 4 bytes  */
+#define PRCM_REQ_MB5 0xE44 /* 4 bytes  */
+
+/* Ack Mailboxes */
+#define PRCM_ACK_MB0 0xE08 /* 52 bytes  */
+#define PRCM_ACK_MB1 0xE04 /* 4 bytes */
+#define PRCM_ACK_MB2 0xE00 /* 4 bytes */
+#define PRCM_ACK_MB3 0xDFC /* 4 bytes */
+#define PRCM_ACK_MB4 0xDF8 /* 4 bytes */
+#define PRCM_ACK_MB5 0xDF4 /* 4 bytes */
+
+/* Mailbox 0 headers */
+#define MB0H_POWER_STATE_TRANS		0
+#define MB0H_CONFIG_WAKEUPS_EXE		1
+#define MB0H_READ_WAKEUP_ACK		3
+#define MB0H_CONFIG_WAKEUPS_SLEEP	4
+
+#define MB0H_WAKEUP_EXE 2
+#define MB0H_WAKEUP_SLEEP 5
+
+/* Mailbox 0 REQs */
+#define PRCM_REQ_MB0_AP_POWER_STATE	(PRCM_REQ_MB0 + 0x0)
+#define PRCM_REQ_MB0_AP_PLL_STATE	(PRCM_REQ_MB0 + 0x1)
+#define PRCM_REQ_MB0_ULP_CLOCK_STATE	(PRCM_REQ_MB0 + 0x2)
+#define PRCM_REQ_MB0_DO_NOT_WFI		(PRCM_REQ_MB0 + 0x3)
+#define PRCM_REQ_MB0_WAKEUP_8500	(PRCM_REQ_MB0 + 0x4)
+#define PRCM_REQ_MB0_WAKEUP_4500	(PRCM_REQ_MB0 + 0x8)
+
+/* Mailbox 0 ACKs */
+#define PRCM_ACK_MB0_AP_PWRSTTR_STATUS	(PRCM_ACK_MB0 + 0x0)
+#define PRCM_ACK_MB0_READ_POINTER	(PRCM_ACK_MB0 + 0x1)
+#define PRCM_ACK_MB0_WAKEUP_0_8500	(PRCM_ACK_MB0 + 0x4)
+#define PRCM_ACK_MB0_WAKEUP_0_4500	(PRCM_ACK_MB0 + 0x8)
+#define PRCM_ACK_MB0_WAKEUP_1_8500	(PRCM_ACK_MB0 + 0x1C)
+#define PRCM_ACK_MB0_WAKEUP_1_4500	(PRCM_ACK_MB0 + 0x20)
+#define PRCM_ACK_MB0_EVENT_4500_NUMBERS	20
+
+/* Mailbox 1 headers */
+#define MB1H_ARM_APE_OPP 0x0
+#define MB1H_RESET_MODEM 0x2
+#define MB1H_REQUEST_APE_OPP_100_VOLT 0x3
+#define MB1H_RELEASE_APE_OPP_100_VOLT 0x4
+#define MB1H_RELEASE_USB_WAKEUP 0x5
+
+/* Mailbox 1 Requests */
+#define PRCM_REQ_MB1_ARM_OPP			(PRCM_REQ_MB1 + 0x0)
+#define PRCM_REQ_MB1_APE_OPP			(PRCM_REQ_MB1 + 0x1)
+#define PRCM_REQ_MB1_APE_OPP_100_RESTORE	(PRCM_REQ_MB1 + 0x4)
+#define PRCM_REQ_MB1_ARM_OPP_100_RESTORE	(PRCM_REQ_MB1 + 0x8)
+
+/* Mailbox 1 ACKs */
+#define PRCM_ACK_MB1_CURRENT_ARM_OPP	(PRCM_ACK_MB1 + 0x0)
+#define PRCM_ACK_MB1_CURRENT_APE_OPP	(PRCM_ACK_MB1 + 0x1)
+#define PRCM_ACK_MB1_APE_VOLTAGE_STATUS	(PRCM_ACK_MB1 + 0x2)
+#define PRCM_ACK_MB1_DVFS_STATUS	(PRCM_ACK_MB1 + 0x3)
+
+/* Mailbox 2 headers */
+#define MB2H_DPS	0x0
+#define MB2H_AUTO_PWR	0x1
+
+/* Mailbox 2 REQs */
+#define PRCM_REQ_MB2_SVA_MMDSP		(PRCM_REQ_MB2 + 0x0)
+#define PRCM_REQ_MB2_SVA_PIPE		(PRCM_REQ_MB2 + 0x1)
+#define PRCM_REQ_MB2_SIA_MMDSP		(PRCM_REQ_MB2 + 0x2)
+#define PRCM_REQ_MB2_SIA_PIPE		(PRCM_REQ_MB2 + 0x3)
+#define PRCM_REQ_MB2_SGA		(PRCM_REQ_MB2 + 0x4)
+#define PRCM_REQ_MB2_B2R2_MCDE		(PRCM_REQ_MB2 + 0x5)
+#define PRCM_REQ_MB2_ESRAM12		(PRCM_REQ_MB2 + 0x6)
+#define PRCM_REQ_MB2_ESRAM34		(PRCM_REQ_MB2 + 0x7)
+#define PRCM_REQ_MB2_AUTO_PM_SLEEP	(PRCM_REQ_MB2 + 0x8)
+#define PRCM_REQ_MB2_AUTO_PM_IDLE	(PRCM_REQ_MB2 + 0xC)
+
+/* Mailbox 2 ACKs */
+#define PRCM_ACK_MB2_DPS_STATUS (PRCM_ACK_MB2 + 0x0)
+#define HWACC_PWR_ST_OK 0xFE
+
+/* Mailbox 3 headers */
+#define MB3H_ANC	0x0
+#define MB3H_SIDETONE	0x1
+#define MB3H_SYSCLK	0xE
+
+/* Mailbox 3 Requests */
+#define PRCM_REQ_MB3_ANC_FIR_COEFF	(PRCM_REQ_MB3 + 0x0)
+#define PRCM_REQ_MB3_ANC_IIR_COEFF	(PRCM_REQ_MB3 + 0x20)
+#define PRCM_REQ_MB3_ANC_SHIFTER	(PRCM_REQ_MB3 + 0x60)
+#define PRCM_REQ_MB3_ANC_WARP		(PRCM_REQ_MB3 + 0x64)
+#define PRCM_REQ_MB3_SIDETONE_FIR_GAIN	(PRCM_REQ_MB3 + 0x68)
+#define PRCM_REQ_MB3_SIDETONE_FIR_COEFF	(PRCM_REQ_MB3 + 0x6C)
+#define PRCM_REQ_MB3_SYSCLK_MGT		(PRCM_REQ_MB3 + 0x16C)
+
+/* Mailbox 4 headers */
+#define MB4H_DDR_INIT	0x0
+#define MB4H_MEM_ST	0x1
+#define MB4H_HOTDOG	0x12
+#define MB4H_HOTMON	0x13
+#define MB4H_HOT_PERIOD	0x14
+
+/* Mailbox 4 Requests */
+#define PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE	(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE	(PRCM_REQ_MB4 + 0x1)
+#define PRCM_REQ_MB4_ESRAM0_ST			(PRCM_REQ_MB4 + 0x3)
+#define PRCM_REQ_MB4_HOTDOG_THRESHOLD		(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_HOTMON_LOW			(PRCM_REQ_MB4 + 0x0)
+#define PRCM_REQ_MB4_HOTMON_HIGH		(PRCM_REQ_MB4 + 0x1)
+#define PRCM_REQ_MB4_HOTMON_CONFIG		(PRCM_REQ_MB4 + 0x2)
+#define PRCM_REQ_MB4_HOT_PERIOD			(PRCM_REQ_MB4 + 0x0)
+#define HOTMON_CONFIG_LOW			BIT(0)
+#define HOTMON_CONFIG_HIGH			BIT(1)
+
+/* Mailbox 5 Requests */
+#define PRCM_REQ_MB5_I2C_SLAVE_OP	(PRCM_REQ_MB5 + 0x0)
+#define PRCM_REQ_MB5_I2C_HW_BITS	(PRCM_REQ_MB5 + 0x1)
+#define PRCM_REQ_MB5_I2C_REG		(PRCM_REQ_MB5 + 0x2)
+#define PRCM_REQ_MB5_I2C_VAL		(PRCM_REQ_MB5 + 0x3)
+#define PRCMU_I2C_WRITE(slave) \
+	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define PRCMU_I2C_READ(slave) \
+	(((slave) << 1) | BIT(0) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define PRCMU_I2C_STOP_EN		BIT(3)
+
+/* Mailbox 5 ACKs */
+#define PRCM_ACK_MB5_I2C_STATUS	(PRCM_ACK_MB5 + 0x1)
+#define PRCM_ACK_MB5_I2C_VAL	(PRCM_ACK_MB5 + 0x3)
+#define I2C_WR_OK 0x1
+#define I2C_RD_OK 0x2
+
+#define NUM_MB 8
+#define MBOX_BIT BIT
+#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1)
+
+/*
+ * Wakeups/IRQs
+ */
+
+#define WAKEUP_BIT_RTC BIT(0)
+#define WAKEUP_BIT_RTT0 BIT(1)
+#define WAKEUP_BIT_RTT1 BIT(2)
+#define WAKEUP_BIT_HSI0 BIT(3)
+#define WAKEUP_BIT_HSI1 BIT(4)
+#define WAKEUP_BIT_CA_WAKE BIT(5)
+#define WAKEUP_BIT_USB BIT(6)
+#define WAKEUP_BIT_ABB BIT(7)
+#define WAKEUP_BIT_ABB_FIFO BIT(8)
+#define WAKEUP_BIT_SYSCLK_OK BIT(9)
+#define WAKEUP_BIT_CA_SLEEP BIT(10)
+#define WAKEUP_BIT_AC_WAKE_ACK BIT(11)
+#define WAKEUP_BIT_SIDE_TONE_OK BIT(12)
+#define WAKEUP_BIT_ANC_OK BIT(13)
+#define WAKEUP_BIT_SW_ERROR BIT(14)
+#define WAKEUP_BIT_AC_SLEEP_ACK BIT(15)
+#define WAKEUP_BIT_ARM BIT(17)
+#define WAKEUP_BIT_HOTMON_LOW BIT(18)
+#define WAKEUP_BIT_HOTMON_HIGH BIT(19)
+#define WAKEUP_BIT_MODEM_SW_RESET_REQ BIT(20)
+#define WAKEUP_BIT_GPIO0 BIT(23)
+#define WAKEUP_BIT_GPIO1 BIT(24)
+#define WAKEUP_BIT_GPIO2 BIT(25)
+#define WAKEUP_BIT_GPIO3 BIT(26)
+#define WAKEUP_BIT_GPIO4 BIT(27)
+#define WAKEUP_BIT_GPIO5 BIT(28)
+#define WAKEUP_BIT_GPIO6 BIT(29)
+#define WAKEUP_BIT_GPIO7 BIT(30)
+#define WAKEUP_BIT_GPIO8 BIT(31)
+
+/*
+ * This vector maps irq numbers to the bits in the bit field used in
+ * communication with the PRCMU firmware.
+ *
+ * The reason for having this is to keep the irq numbers contiguous even though
+ * the bits in the bit field are not. (The bits also have a tendency to move
+ * around, to further complicate matters.)
+ */
+#define IRQ_INDEX(_name) ((IRQ_PRCMU_##_name) - IRQ_PRCMU_BASE)
+#define IRQ_ENTRY(_name)[IRQ_INDEX(_name)] = (WAKEUP_BIT_##_name)
+static u32 prcmu_irq_bit[NUM_PRCMU_WAKEUPS] = {
+	IRQ_ENTRY(RTC),
+	IRQ_ENTRY(RTT0),
+	IRQ_ENTRY(RTT1),
+	IRQ_ENTRY(HSI0),
+	IRQ_ENTRY(HSI1),
+	IRQ_ENTRY(CA_WAKE),
+	IRQ_ENTRY(USB),
+	IRQ_ENTRY(ABB),
+	IRQ_ENTRY(ABB_FIFO),
+	IRQ_ENTRY(CA_SLEEP),
+	IRQ_ENTRY(ARM),
+	IRQ_ENTRY(HOTMON_LOW),
+	IRQ_ENTRY(HOTMON_HIGH),
+	IRQ_ENTRY(MODEM_SW_RESET_REQ),
+	IRQ_ENTRY(GPIO0),
+	IRQ_ENTRY(GPIO1),
+	IRQ_ENTRY(GPIO2),
+	IRQ_ENTRY(GPIO3),
+	IRQ_ENTRY(GPIO4),
+	IRQ_ENTRY(GPIO5),
+	IRQ_ENTRY(GPIO6),
+	IRQ_ENTRY(GPIO7),
+	IRQ_ENTRY(GPIO8)
+};
+
+#define VALID_WAKEUPS (BIT(NUM_PRCMU_WAKEUP_INDICES) - 1)
+#define WAKEUP_ENTRY(_name)[PRCMU_WAKEUP_INDEX_##_name] = (WAKEUP_BIT_##_name)
+static u32 prcmu_wakeup_bit[NUM_PRCMU_WAKEUP_INDICES] = {
+	WAKEUP_ENTRY(RTC),
+	WAKEUP_ENTRY(RTT0),
+	WAKEUP_ENTRY(RTT1),
+	WAKEUP_ENTRY(HSI0),
+	WAKEUP_ENTRY(HSI1),
+	WAKEUP_ENTRY(USB),
+	WAKEUP_ENTRY(ABB),
+	WAKEUP_ENTRY(ABB_FIFO),
+	WAKEUP_ENTRY(ARM)
+};
+
+/*
+ * mb0_transfer - state needed for mailbox 0 communication.
+ * @lock:		The transaction lock.
+ * @dbb_events_lock:	A lock used to handle concurrent access to (parts of)
+ *			the request data.
+ * @mask_work:		Work structure used for (un)masking wakeup interrupts.
+ * @req:		Request data that need to persist between requests.
+ */
+static struct {
+	spinlock_t lock;
+	spinlock_t dbb_irqs_lock;
+	struct work_struct mask_work;
+	struct mutex ac_wake_lock;
+	struct completion ac_wake_work;
+	struct {
+		u32 dbb_irqs;
+		u32 dbb_wakeups;
+		u32 abb_events;
+	} req;
+} mb0_transfer;
+
+/*
+ * mb1_transfer - state needed for mailbox 1 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 header;
+		u8 arm_opp;
+		u8 ape_opp;
+		u8 ape_voltage_status;
+	} ack;
+} mb1_transfer;
+
+/*
+ * mb2_transfer - state needed for mailbox 2 communication.
+ * @lock:            The transaction lock.
+ * @work:            The transaction completion structure.
+ * @auto_pm_lock:    The autonomous power management configuration lock.
+ * @auto_pm_enabled: A flag indicating whether autonomous PM is enabled.
+ * @req:             Request data that need to persist between requests.
+ * @ack:             Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	spinlock_t auto_pm_lock;
+	bool auto_pm_enabled;
+	struct {
+		u8 status;
+	} ack;
+} mb2_transfer;
+
+/*
+ * mb3_transfer - state needed for mailbox 3 communication.
+ * @lock:		The request lock.
+ * @sysclk_lock:	A lock used to handle concurrent sysclk requests.
+ * @sysclk_work:	Work structure used for sysclk requests.
+ */
+static struct {
+	spinlock_t lock;
+	struct mutex sysclk_lock;
+	struct completion sysclk_work;
+} mb3_transfer;
+
+/*
+ * mb4_transfer - state needed for mailbox 4 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+} mb4_transfer;
+
+/*
+ * mb5_transfer - state needed for mailbox 5 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 status;
+		u8 value;
+	} ack;
+} mb5_transfer;
+
+static atomic_t ac_wake_req_state = ATOMIC_INIT(0);
+
+/* Spinlocks */
+static DEFINE_SPINLOCK(clkout_lock);
+static DEFINE_SPINLOCK(gpiocr_lock);
+
+/* Global var to runtime determine TCDM base for v2 or v1 */
+static __iomem void *tcdm_base;
+
+struct clk_mgt {
+	unsigned int offset;
+	u32 pllsw;
+};
+
+static DEFINE_SPINLOCK(clk_mgt_lock);
+
+#define CLK_MGT_ENTRY(_name)[PRCMU_##_name] = { (PRCM_##_name##_MGT), 0 }
+struct clk_mgt clk_mgt[PRCMU_NUM_REG_CLOCKS] = {
+	CLK_MGT_ENTRY(SGACLK),
+	CLK_MGT_ENTRY(UARTCLK),
+	CLK_MGT_ENTRY(MSP02CLK),
+	CLK_MGT_ENTRY(MSP1CLK),
+	CLK_MGT_ENTRY(I2CCLK),
+	CLK_MGT_ENTRY(SDMMCCLK),
+	CLK_MGT_ENTRY(SLIMCLK),
+	CLK_MGT_ENTRY(PER1CLK),
+	CLK_MGT_ENTRY(PER2CLK),
+	CLK_MGT_ENTRY(PER3CLK),
+	CLK_MGT_ENTRY(PER5CLK),
+	CLK_MGT_ENTRY(PER6CLK),
+	CLK_MGT_ENTRY(PER7CLK),
+	CLK_MGT_ENTRY(LCDCLK),
+	CLK_MGT_ENTRY(BMLCLK),
+	CLK_MGT_ENTRY(HSITXCLK),
+	CLK_MGT_ENTRY(HSIRXCLK),
+	CLK_MGT_ENTRY(HDMICLK),
+	CLK_MGT_ENTRY(APEATCLK),
+	CLK_MGT_ENTRY(APETRACECLK),
+	CLK_MGT_ENTRY(MCDECLK),
+	CLK_MGT_ENTRY(IPI2CCLK),
+	CLK_MGT_ENTRY(DSIALTCLK),
+	CLK_MGT_ENTRY(DMACLK),
+	CLK_MGT_ENTRY(B2R2CLK),
+	CLK_MGT_ENTRY(TVCLK),
+	CLK_MGT_ENTRY(SSPCLK),
+	CLK_MGT_ENTRY(RNGCLK),
+	CLK_MGT_ENTRY(UICCCLK),
+};
+
+/*
+* Used by MCDE to setup all necessary PRCMU registers
+*/
+#define PRCMU_RESET_DSIPLL		0x00004000
+#define PRCMU_UNCLAMP_DSIPLL		0x00400800
+
+#define PRCMU_CLK_PLL_DIV_SHIFT		0
+#define PRCMU_CLK_PLL_SW_SHIFT		5
+#define PRCMU_CLK_38			(1 << 9)
+#define PRCMU_CLK_38_SRC		(1 << 10)
+#define PRCMU_CLK_38_DIV		(1 << 11)
+
+/* PLLDIV=12, PLLSW=4 (PLLDDR) */
+#define PRCMU_DSI_CLOCK_SETTING		0x0000008C
+
+/* PLLDIV=8, PLLSW=4 (PLLDDR) */
+#define PRCMU_DSI_CLOCK_SETTING_U8400	0x00000088
+
+/* DPI 50000000 Hz */
+#define PRCMU_DPI_CLOCK_SETTING		((1 << PRCMU_CLK_PLL_SW_SHIFT) | \
+					  (16 << PRCMU_CLK_PLL_DIV_SHIFT))
+#define PRCMU_DSI_LP_CLOCK_SETTING	0x00000E00
+
+/* D=101, N=1, R=4, SELDIV2=0 */
+#define PRCMU_PLLDSI_FREQ_SETTING	0x00040165
+
+/* D=70, N=1, R=3, SELDIV2=0 */
+#define PRCMU_PLLDSI_FREQ_SETTING_U8400	0x00030146
+
+#define PRCMU_ENABLE_PLLDSI		0x00000001
+#define PRCMU_DISABLE_PLLDSI		0x00000000
+#define PRCMU_RELEASE_RESET_DSS		0x0000400C
+#define PRCMU_DSI_PLLOUT_SEL_SETTING	0x00000202
+/* ESC clk, div0=1, div1=1, div2=3 */
+#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV	0x07030101
+#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV	0x00030101
+#define PRCMU_DSI_RESET_SW		0x00000007
+
+#define PRCMU_PLLDSI_LOCKP_LOCKED	0x3
+
+static struct {
+	u8 project_number;
+	u8 api_version;
+	u8 func_version;
+	u8 errata;
+} prcmu_version;
+
+
+int prcmu_enable_dsipll(void)
+{
+	int i;
+	unsigned int plldsifreq;
+
+	/* Clear DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_CLR));
+	/* Unclamp DSIPLL in/out */
+	writel(PRCMU_UNCLAMP_DSIPLL, (_PRCMU_BASE + PRCM_MMIP_LS_CLAMP_CLR));
+
+	if (prcmu_is_u8400())
+		plldsifreq = PRCMU_PLLDSI_FREQ_SETTING_U8400;
+	else
+		plldsifreq = PRCMU_PLLDSI_FREQ_SETTING;
+	/* Set DSI PLL FREQ */
+	writel(plldsifreq, (_PRCMU_BASE + PRCM_PLLDSI_FREQ));
+	writel(PRCMU_DSI_PLLOUT_SEL_SETTING,
+		(_PRCMU_BASE + PRCM_DSI_PLLOUT_SEL));
+	/* Enable Escape clocks */
+	writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV,
+					(_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+
+	/* Start DSI PLL */
+	writel(PRCMU_ENABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+	/* Reset DSI PLL */
+	writel(PRCMU_DSI_RESET_SW, (_PRCMU_BASE + PRCM_DSI_SW_RESET));
+	for (i = 0; i < 10; i++) {
+		if ((readl(_PRCMU_BASE + PRCM_PLLDSI_LOCKP) &
+			PRCMU_PLLDSI_LOCKP_LOCKED)
+					== PRCMU_PLLDSI_LOCKP_LOCKED)
+			break;
+		udelay(100);
+	}
+	/* Set DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, (_PRCMU_BASE + PRCM_APE_RESETN_SET));
+	return 0;
+}
+
+int prcmu_disable_dsipll(void)
+{
+	/* Disable dsi pll */
+	writel(PRCMU_DISABLE_PLLDSI, (_PRCMU_BASE + PRCM_PLLDSI_ENABLE));
+	/* Disable  escapeclock */
+	writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV,
+					(_PRCMU_BASE + PRCM_DSITVCLK_DIV));
+	return 0;
+}
+
+int prcmu_set_display_clocks(void)
+{
+	unsigned long flags;
+	unsigned int dsiclk;
+
+	if (prcmu_is_u8400())
+		dsiclk = PRCMU_DSI_CLOCK_SETTING_U8400;
+	else
+		dsiclk = PRCMU_DSI_CLOCK_SETTING;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	writel(dsiclk, (_PRCMU_BASE + PRCM_HDMICLK_MGT));
+	writel(PRCMU_DSI_LP_CLOCK_SETTING, (_PRCMU_BASE + PRCM_TVCLK_MGT));
+	writel(PRCMU_DPI_CLOCK_SETTING, (_PRCMU_BASE + PRCM_LCDCLK_MGT));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
+
+/**
+ * prcmu_enable_spi2 - Enables pin muxing for SPI2 on OtherAlternateC1.
+ */
+void prcmu_enable_spi2(void)
+{
+	u32 reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpiocr_lock, flags);
+	reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
+	writel(reg | PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+	spin_unlock_irqrestore(&gpiocr_lock, flags);
+}
+
+/**
+ * prcmu_disable_spi2 - Disables pin muxing for SPI2 on OtherAlternateC1.
+ */
+void prcmu_disable_spi2(void)
+{
+	u32 reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpiocr_lock, flags);
+	reg = readl(_PRCMU_BASE + PRCM_GPIOCR);
+	writel(reg & ~PRCM_GPIOCR_SPI2_SELECT, _PRCMU_BASE + PRCM_GPIOCR);
+	spin_unlock_irqrestore(&gpiocr_lock, flags);
+}
+
+bool prcmu_has_arm_maxopp(void)
+{
+	return (readb(tcdm_base + PRCM_AVS_VARM_MAX_OPP) &
+		PRCM_AVS_ISMODEENABLE_MASK) == PRCM_AVS_ISMODEENABLE_MASK;
+}
+
+bool prcmu_is_u8400(void)
+{
+	return prcmu_version.project_number == PRCMU_PROJECT_ID_8400V2_0;
+}
+
+/**
+ * prcmu_get_boot_status - PRCMU boot status checking
+ * Returns: the current PRCMU boot status
+ */
+int prcmu_get_boot_status(void)
+{
+	return readb(tcdm_base + PRCM_BOOT_STATUS);
+}
+
+/**
+ * prcmu_set_rc_a2p - This function is used to run few power state sequences
+ * @val: Value to be set, i.e. transition requested
+ * Returns: 0 on success, -EINVAL on invalid argument
+ *
+ * This function is used to run the following power state sequences -
+ * any state to ApReset,  ApDeepSleep to ApExecute, ApExecute to ApDeepSleep
+ */
+int prcmu_set_rc_a2p(enum romcode_write val)
+{
+	if (val < RDY_2_DS || val > RDY_2_XP70_RST)
+		return -EINVAL;
+	writeb(val, (tcdm_base + PRCM_ROMCODE_A2P));
+	return 0;
+}
+
+/**
+ * prcmu_get_rc_p2a - This function is used to get power state sequences
+ * Returns: the power transition that has last happened
+ *
+ * This function can return the following transitions-
+ * any state to ApReset,  ApDeepSleep to ApExecute, ApExecute to ApDeepSleep
+ */
+enum romcode_read prcmu_get_rc_p2a(void)
+{
+	return readb(tcdm_base + PRCM_ROMCODE_P2A);
+}
+
+/**
+ * prcmu_get_current_mode - Return the current XP70 power mode
+ * Returns: Returns the current AP(ARM) power mode: init,
+ * apBoot, apExecute, apDeepSleep, apSleep, apIdle, apReset
+ */
+enum ap_pwrst prcmu_get_xp70_current_state(void)
+{
+	return readb(tcdm_base + PRCM_XP70_CUR_PWR_STATE);
+}
+
+/**
+ * prcmu_config_clkout - Configure one of the programmable clock outputs.
+ * @clkout:	The CLKOUT number (0 or 1).
+ * @source:	The clock to be used (one of the PRCMU_CLKSRC_*).
+ * @div:	The divider to be applied.
+ *
+ * Configures one of the programmable clock outputs (CLKOUTs).
+ * @div should be in the range [1,63] to request a configuration, or 0 to
+ * inform that the configuration is no longer requested.
+ */
+int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
+{
+	static int requests[2];
+	int r = 0;
+	unsigned long flags;
+	u32 val;
+	u32 bits;
+	u32 mask;
+	u32 div_mask;
+
+	BUG_ON(clkout > 1);
+	BUG_ON(div > 63);
+	BUG_ON((clkout == 0) && (source > PRCMU_CLKSRC_CLK009));
+
+	if (!div && !requests[clkout])
+		return -EINVAL;
+
+	switch (clkout) {
+	case 0:
+		div_mask = PRCM_CLKOCR_CLKODIV0_MASK;
+		mask = (PRCM_CLKOCR_CLKODIV0_MASK | PRCM_CLKOCR_CLKOSEL0_MASK);
+		bits = ((source << PRCM_CLKOCR_CLKOSEL0_SHIFT) |
+			(div << PRCM_CLKOCR_CLKODIV0_SHIFT));
+		break;
+	case 1:
+		div_mask = PRCM_CLKOCR_CLKODIV1_MASK;
+		mask = (PRCM_CLKOCR_CLKODIV1_MASK | PRCM_CLKOCR_CLKOSEL1_MASK |
+			PRCM_CLKOCR_CLK1TYPE);
+		bits = ((source << PRCM_CLKOCR_CLKOSEL1_SHIFT) |
+			(div << PRCM_CLKOCR_CLKODIV1_SHIFT));
+		break;
+	}
+	bits &= mask;
+
+	spin_lock_irqsave(&clkout_lock, flags);
+
+	val = readl(_PRCMU_BASE + PRCM_CLKOCR);
+	if (val & div_mask) {
+		if (div) {
+			if ((val & mask) != bits) {
+				r = -EBUSY;
+				goto unlock_and_return;
+			}
+		} else {
+			if ((val & mask & ~div_mask) != bits) {
+				r = -EINVAL;
+				goto unlock_and_return;
+			}
+		}
+	}
+	writel((bits | (val & ~mask)), (_PRCMU_BASE + PRCM_CLKOCR));
+	requests[clkout] += (div ? 1 : -1);
+
+unlock_and_return:
+	spin_unlock_irqrestore(&clkout_lock, flags);
+
+	return r;
+}
+
+int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll)
+{
+	unsigned long flags;
+
+	BUG_ON((state < PRCMU_AP_SLEEP) || (PRCMU_AP_DEEP_IDLE < state));
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(MB0H_POWER_STATE_TRANS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+	writeb(state, (tcdm_base + PRCM_REQ_MB0_AP_POWER_STATE));
+	writeb((keep_ap_pll ? 1 : 0), (tcdm_base + PRCM_REQ_MB0_AP_PLL_STATE));
+	writeb((keep_ulp_clk ? 1 : 0),
+		(tcdm_base + PRCM_REQ_MB0_ULP_CLOCK_STATE));
+	writeb(0, (tcdm_base + PRCM_REQ_MB0_DO_NOT_WFI));
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+
+	return 0;
+}
+
+/* This function should only be called while mb0_transfer.lock is held. */
+static void config_wakeups(void)
+{
+	const u8 header[2] = {
+		MB0H_CONFIG_WAKEUPS_EXE,
+		MB0H_CONFIG_WAKEUPS_SLEEP
+	};
+	static u32 last_dbb_events;
+	static u32 last_abb_events;
+	u32 dbb_events;
+	u32 abb_events;
+	unsigned int i;
+
+	dbb_events = mb0_transfer.req.dbb_irqs | mb0_transfer.req.dbb_wakeups;
+	dbb_events |= (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK);
+
+	abb_events = mb0_transfer.req.abb_events;
+
+	if ((dbb_events == last_dbb_events) && (abb_events == last_abb_events))
+		return;
+
+	for (i = 0; i < 2; i++) {
+		while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+			cpu_relax();
+		writel(dbb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_8500));
+		writel(abb_events, (tcdm_base + PRCM_REQ_MB0_WAKEUP_4500));
+		writeb(header[i], (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+		writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	}
+	last_dbb_events = dbb_events;
+	last_abb_events = abb_events;
+}
+
+void prcmu_enable_wakeups(u32 wakeups)
+{
+	unsigned long flags;
+	u32 bits;
+	int i;
+
+	BUG_ON(wakeups != (wakeups & VALID_WAKEUPS));
+
+	for (i = 0, bits = 0; i < NUM_PRCMU_WAKEUP_INDICES; i++) {
+		if (wakeups & BIT(i))
+			bits |= prcmu_wakeup_bit[i];
+	}
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	mb0_transfer.req.dbb_wakeups = bits;
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+void prcmu_config_abb_event_readout(u32 abb_events)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	mb0_transfer.req.abb_events = abb_events;
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+void prcmu_get_abb_event_buffer(void __iomem **buf)
+{
+	if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1)
+		*buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_1_4500);
+	else
+		*buf = (tcdm_base + PRCM_ACK_MB0_WAKEUP_0_4500);
+}
+
+/**
+ * prcmu_set_arm_opp - set the appropriate ARM OPP
+ * @opp: The new ARM operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the the operating point of the ARM.
+ */
+int prcmu_set_arm_opp(u8 opp)
+{
+	int r;
+
+	if (opp < ARM_NO_CHANGE || opp > ARM_EXTCLK)
+		return -EINVAL;
+
+	r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writeb(opp, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
+	writeb(APE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
+		(mb1_transfer.ack.arm_opp != opp))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_get_arm_opp - get the current ARM OPP
+ *
+ * Returns: the current ARM OPP
+ */
+int prcmu_get_arm_opp(void)
+{
+	return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_ARM_OPP);
+}
+
+/**
+ * prcmu_get_ddr_opp - get the current DDR OPP
+ *
+ * Returns: the current DDR OPP
+ */
+int prcmu_get_ddr_opp(void)
+{
+	return readb(_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW);
+}
+
+/**
+ * set_ddr_opp - set the appropriate DDR OPP
+ * @opp: The new DDR operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the operating point of the DDR.
+ */
+int prcmu_set_ddr_opp(u8 opp)
+{
+	if (opp < DDR_100_OPP || opp > DDR_25_OPP)
+		return -EINVAL;
+	/* Changing the DDR OPP can hang the hardware pre-v21 */
+	if (cpu_is_u8500v20_or_later() && !cpu_is_u8500v20())
+		writeb(opp, (_PRCMU_BASE + PRCM_DDR_SUBSYS_APE_MINBW));
+
+	return 0;
+}
+/**
+ * set_ape_opp - set the appropriate APE OPP
+ * @opp: The new APE operating point to which transition is to be made
+ * Returns: 0 on success, non-zero on failure
+ *
+ * This function sets the operating point of the APE.
+ */
+int prcmu_set_ape_opp(u8 opp)
+{
+	int r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_ARM_APE_OPP, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writeb(ARM_NO_CHANGE, (tcdm_base + PRCM_REQ_MB1_ARM_OPP));
+	writeb(opp, (tcdm_base + PRCM_REQ_MB1_APE_OPP));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_ARM_APE_OPP) ||
+		(mb1_transfer.ack.ape_opp != opp))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_get_ape_opp - get the current APE OPP
+ *
+ * Returns: the current APE OPP
+ */
+int prcmu_get_ape_opp(void)
+{
+	return readb(tcdm_base + PRCM_ACK_MB1_CURRENT_APE_OPP);
+}
+
+/**
+ * prcmu_request_ape_opp_100_voltage - Request APE OPP 100% voltage
+ * @enable: true to request the higher voltage, false to drop a request.
+ *
+ * Calls to this function to enable and disable requests must be balanced.
+ */
+int prcmu_request_ape_opp_100_voltage(bool enable)
+{
+	int r = 0;
+	u8 header;
+	static unsigned int requests;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	if (enable) {
+		if (0 != requests++)
+			goto unlock_and_return;
+		header = MB1H_REQUEST_APE_OPP_100_VOLT;
+	} else {
+		if (requests == 0) {
+			r = -EIO;
+			goto unlock_and_return;
+		} else if (1 != requests--) {
+			goto unlock_and_return;
+		}
+		header = MB1H_RELEASE_APE_OPP_100_VOLT;
+	}
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(header, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != header) ||
+		((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0))
+		r = -EIO;
+
+unlock_and_return:
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_release_usb_wakeup_state - release the state required by a USB wakeup
+ *
+ * This function releases the power state requirements of a USB wakeup.
+ */
+int prcmu_release_usb_wakeup_state(void)
+{
+	int r = 0;
+
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_RELEASE_USB_WAKEUP,
+		(tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	if ((mb1_transfer.ack.header != MB1H_RELEASE_USB_WAKEUP) ||
+		((mb1_transfer.ack.ape_voltage_status & BIT(0)) != 0))
+		r = -EIO;
+
+	mutex_unlock(&mb1_transfer.lock);
+
+	return r;
+}
+
+/**
+ * prcmu_set_epod - set the state of a EPOD (power domain)
+ * @epod_id: The EPOD to set
+ * @epod_state: The new EPOD state
+ *
+ * This function sets the state of a EPOD (power domain). It may not be called
+ * from interrupt context.
+ */
+int prcmu_set_epod(u16 epod_id, u8 epod_state)
+{
+	int r = 0;
+	bool ram_retention = false;
+	int i;
+
+	/* check argument */
+	BUG_ON(epod_id >= NUM_EPOD_ID);
+
+	/* set flag if retention is possible */
+	switch (epod_id) {
+	case EPOD_ID_SVAMMDSP:
+	case EPOD_ID_SIAMMDSP:
+	case EPOD_ID_ESRAM12:
+	case EPOD_ID_ESRAM34:
+		ram_retention = true;
+		break;
+	}
+
+	/* check argument */
+	BUG_ON(epod_state > EPOD_STATE_ON);
+	BUG_ON(epod_state == EPOD_STATE_RAMRET && !ram_retention);
+
+	/* get lock */
+	mutex_lock(&mb2_transfer.lock);
+
+	/* wait for mailbox */
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(2))
+		cpu_relax();
+
+	/* fill in mailbox */
+	for (i = 0; i < NUM_EPOD_ID; i++)
+		writeb(EPOD_STATE_NO_CHANGE, (tcdm_base + PRCM_REQ_MB2 + i));
+	writeb(epod_state, (tcdm_base + PRCM_REQ_MB2 + epod_id));
+
+	writeb(MB2H_DPS, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB2));
+
+	writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	/*
+	 * The current firmware version does not handle errors correctly,
+	 * and we cannot recover if there is an error.
+	 * This is expected to change when the firmware is updated.
+	 */
+	if (!wait_for_completion_timeout(&mb2_transfer.work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+		r = -EIO;
+		goto unlock_and_return;
+	}
+
+	if (mb2_transfer.ack.status != HWACC_PWR_ST_OK)
+		r = -EIO;
+
+unlock_and_return:
+	mutex_unlock(&mb2_transfer.lock);
+	return r;
+}
+
+/**
+ * prcmu_configure_auto_pm - Configure autonomous power management.
+ * @sleep: Configuration for ApSleep.
+ * @idle:  Configuration for ApIdle.
+ */
+void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle)
+{
+	u32 sleep_cfg;
+	u32 idle_cfg;
+	unsigned long flags;
 
-#include <mach/hardware.h>
+	BUG_ON((sleep == NULL) || (idle == NULL));
 
-#include "db8500-prcmu-regs.h"
+	sleep_cfg = (sleep->sva_auto_pm_enable & 0xF);
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_auto_pm_enable & 0xF));
+	sleep_cfg = ((sleep_cfg << 8) | (sleep->sva_power_on & 0xFF));
+	sleep_cfg = ((sleep_cfg << 8) | (sleep->sia_power_on & 0xFF));
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sva_policy & 0xF));
+	sleep_cfg = ((sleep_cfg << 4) | (sleep->sia_policy & 0xF));
 
-/* Global var to runtime determine TCDM base for v2 or v1 */
-static __iomem void *tcdm_base;
+	idle_cfg = (idle->sva_auto_pm_enable & 0xF);
+	idle_cfg = ((idle_cfg << 4) | (idle->sia_auto_pm_enable & 0xF));
+	idle_cfg = ((idle_cfg << 8) | (idle->sva_power_on & 0xFF));
+	idle_cfg = ((idle_cfg << 8) | (idle->sia_power_on & 0xFF));
+	idle_cfg = ((idle_cfg << 4) | (idle->sva_policy & 0xF));
+	idle_cfg = ((idle_cfg << 4) | (idle->sia_policy & 0xF));
 
-#define _MBOX_HEADER		(tcdm_base + 0xFE8)
-#define MBOX_HEADER_REQ_MB0	(_MBOX_HEADER + 0x0)
+	spin_lock_irqsave(&mb2_transfer.auto_pm_lock, flags);
 
-#define REQ_MB1 (tcdm_base + 0xFD0)
-#define REQ_MB5 (tcdm_base + 0xE44)
+	/*
+	 * The autonomous power management configuration is done through
+	 * fields in mailbox 2, but these fields are only used as shared
+	 * variables - i.e. there is no need to send a message.
+	 */
+	writel(sleep_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_SLEEP));
+	writel(idle_cfg, (tcdm_base + PRCM_REQ_MB2_AUTO_PM_IDLE));
 
-#define REQ_MB1_ARMOPP		(REQ_MB1 + 0x0)
-#define REQ_MB1_APEOPP		(REQ_MB1 + 0x1)
-#define REQ_MB1_BOOSTOPP	(REQ_MB1 + 0x2)
+	mb2_transfer.auto_pm_enabled =
+		((sleep->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (sleep->sia_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (idle->sva_auto_pm_enable == PRCMU_AUTO_PM_ON) ||
+		 (idle->sia_auto_pm_enable == PRCMU_AUTO_PM_ON));
 
-#define ACK_MB1 (tcdm_base + 0xE04)
-#define ACK_MB5 (tcdm_base + 0xDF4)
+	spin_unlock_irqrestore(&mb2_transfer.auto_pm_lock, flags);
+}
+EXPORT_SYMBOL(prcmu_configure_auto_pm);
 
-#define ACK_MB1_CURR_ARMOPP		(ACK_MB1 + 0x0)
-#define ACK_MB1_CURR_APEOPP		(ACK_MB1 + 0x1)
+bool prcmu_is_auto_pm_enabled(void)
+{
+	return mb2_transfer.auto_pm_enabled;
+}
 
-#define REQ_MB5_I2C_SLAVE_OP (REQ_MB5)
-#define REQ_MB5_I2C_HW_BITS (REQ_MB5 + 1)
-#define REQ_MB5_I2C_REG (REQ_MB5 + 2)
-#define REQ_MB5_I2C_VAL (REQ_MB5 + 3)
+static int request_sysclk(bool enable)
+{
+	int r;
+	unsigned long flags;
 
-#define ACK_MB5_I2C_STATUS (ACK_MB5 + 1)
-#define ACK_MB5_I2C_VAL (ACK_MB5 + 3)
+	r = 0;
 
-#define PRCM_AVS_VARM_MAX_OPP		(tcdm_base + 0x2E4)
-#define PRCM_AVS_ISMODEENABLE		7
-#define PRCM_AVS_ISMODEENABLE_MASK	(1 << PRCM_AVS_ISMODEENABLE)
+	mutex_lock(&mb3_transfer.sysclk_lock);
 
-#define I2C_WRITE(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
-#define I2C_READ(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0) | BIT(0))
-#define I2C_STOP_EN BIT(3)
-
-enum mb1_h {
-	MB1H_ARM_OPP = 1,
-	MB1H_APE_OPP,
-	MB1H_ARM_APE_OPP,
-};
+	spin_lock_irqsave(&mb3_transfer.lock, flags);
 
-static struct {
-	struct mutex lock;
-	struct completion work;
-	struct {
-		u8 arm_opp;
-		u8 ape_opp;
-		u8 arm_status;
-		u8 ape_status;
-	} ack;
-} mb1_transfer;
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(3))
+		cpu_relax();
 
-enum ack_mb5_status {
-	I2C_WR_OK = 0x01,
-	I2C_RD_OK = 0x02,
-};
+	writeb((enable ? ON : OFF), (tcdm_base + PRCM_REQ_MB3_SYSCLK_MGT));
 
-#define MBOX_BIT BIT
-#define NUM_MBOX 8
+	writeb(MB3H_SYSCLK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB3));
+	writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-static struct {
-	struct mutex lock;
-	struct completion work;
-	bool failed;
-	struct {
-		u8 status;
-		u8 value;
-	} ack;
-} mb5_transfer;
+	spin_unlock_irqrestore(&mb3_transfer.lock, flags);
+
+	/*
+	 * The firmware only sends an ACK if we want to enable the
+	 * SysClk, and it succeeds.
+	 */
+	if (enable && !wait_for_completion_timeout(&mb3_transfer.sysclk_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+		r = -EIO;
+	}
+
+	mutex_unlock(&mb3_transfer.sysclk_lock);
+
+	return r;
+}
+
+static int request_timclk(bool enable)
+{
+	u32 val = (PRCM_TCR_DOZE_MODE | PRCM_TCR_TENSEL_MASK);
+
+	if (!enable)
+		val |= PRCM_TCR_STOP_TIMERS;
+	writel(val, (_PRCMU_BASE + PRCM_TCR));
+
+	return 0;
+}
+
+static int request_reg_clock(u8 clock, bool enable)
+{
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
+	if (enable) {
+		val |= (PRCM_CLK_MGT_CLKEN | clk_mgt[clock].pllsw);
+	} else {
+		clk_mgt[clock].pllsw = (val & PRCM_CLK_MGT_CLKPLLSW_MASK);
+		val &= ~(PRCM_CLK_MGT_CLKEN | PRCM_CLK_MGT_CLKPLLSW_MASK);
+	}
+	writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
+
+/**
+ * prcmu_request_clock() - Request for a clock to be enabled or disabled.
+ * @clock:      The clock for which the request is made.
+ * @enable:     Whether the clock should be enabled (true) or disabled (false).
+ *
+ * This function should only be used by the clock implementation.
+ * Do not use it from any other place!
+ */
+int prcmu_request_clock(u8 clock, bool enable)
+{
+	if (clock < PRCMU_NUM_REG_CLOCKS)
+		return request_reg_clock(clock, enable);
+	else if (clock == PRCMU_TIMCLK)
+		return request_timclk(enable);
+	else if (clock == PRCMU_SYSCLK)
+		return request_sysclk(enable);
+	else
+		return -EINVAL;
+}
+
+int prcmu_config_esram0_deep_sleep(u8 state)
+{
+	if ((state > ESRAM0_DEEP_SLEEP_STATE_RET) ||
+	    (state < ESRAM0_DEEP_SLEEP_STATE_OFF))
+		return -EINVAL;
+
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(MB4H_MEM_ST, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+	writeb(((DDR_PWR_STATE_OFFHIGHLAT << 4) | DDR_PWR_STATE_ON),
+	       (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_SLEEP_IDLE));
+	writeb(DDR_PWR_STATE_ON,
+	       (tcdm_base + PRCM_REQ_MB4_DDR_ST_AP_DEEP_IDLE));
+	writeb(state, (tcdm_base + PRCM_REQ_MB4_ESRAM0_ST));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_config_hotdog(u8 threshold)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(threshold, (tcdm_base + PRCM_REQ_MB4_HOTDOG_THRESHOLD));
+	writeb(MB4H_HOTDOG, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_config_hotmon(u8 low, u8 high)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writeb(low, (tcdm_base + PRCM_REQ_MB4_HOTMON_LOW));
+	writeb(high, (tcdm_base + PRCM_REQ_MB4_HOTMON_HIGH));
+	writeb((HOTMON_CONFIG_LOW | HOTMON_CONFIG_HIGH),
+		(tcdm_base + PRCM_REQ_MB4_HOTMON_CONFIG));
+	writeb(MB4H_HOTMON, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+static int config_hot_period(u16 val)
+{
+	mutex_lock(&mb4_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(4))
+		cpu_relax();
+
+	writew(val, (tcdm_base + PRCM_REQ_MB4_HOT_PERIOD));
+	writeb(MB4H_HOT_PERIOD, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB4));
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb4_transfer.work);
+
+	mutex_unlock(&mb4_transfer.lock);
+
+	return 0;
+}
+
+int prcmu_start_temp_sense(u16 cycles32k)
+{
+	if (cycles32k == 0xFFFF)
+		return -EINVAL;
+
+	return config_hot_period(cycles32k);
+}
+
+int prcmu_stop_temp_sense(void)
+{
+	return config_hot_period(0xFFFF);
+}
+
+/**
+ * prcmu_set_clock_divider() - Configure the clock divider.
+ * @clock:	The clock for which the request is made.
+ * @divider:	The clock divider. (< 32)
+ *
+ * This function should only be used by the clock implementation.
+ * Do not use it from any other place!
+ */
+int prcmu_set_clock_divider(u8 clock, u8 divider)
+{
+	u32 val;
+	unsigned long flags;
+
+	if ((clock >= PRCMU_NUM_REG_CLOCKS) || (divider < 1) || (31 < divider))
+		return -EINVAL;
+
+	spin_lock_irqsave(&clk_mgt_lock, flags);
+
+	/* Grab the HW semaphore. */
+	while ((readl(_PRCMU_BASE + PRCM_SEM) & PRCM_SEM_PRCM_SEM) != 0)
+		cpu_relax();
+
+	val = readl(_PRCMU_BASE + clk_mgt[clock].offset);
+	val &= ~(PRCM_CLK_MGT_CLKPLLDIV_MASK);
+	val |= (u32)divider;
+	writel(val, (_PRCMU_BASE + clk_mgt[clock].offset));
+
+	/* Release the HW semaphore. */
+	writel(0, (_PRCMU_BASE + PRCM_SEM));
+
+	spin_unlock_irqrestore(&clk_mgt_lock, flags);
+
+	return 0;
+}
 
 /**
  * prcmu_abb_read() - Read register value(s) from the ABB.
@@ -115,33 +1376,34 @@ int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
 	if (size != 1)
 		return -EINVAL;
 
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
+	mutex_lock(&mb5_transfer.lock);
 
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
 		cpu_relax();
 
-	writeb(I2C_READ(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
+	writeb(PRCMU_I2C_READ(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
+	writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS));
+	writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
+	writeb(0, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
+
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_read timed out.\n");
+				msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
 		r = -EIO;
-		goto unlock_and_return;
+	} else {
+		r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
 	}
-	r = ((mb5_transfer.ack.status == I2C_RD_OK) ? 0 : -EIO);
+
 	if (!r)
 		*value = mb5_transfer.ack.value;
 
-unlock_and_return:
 	mutex_unlock(&mb5_transfer.lock);
+
 	return r;
 }
-EXPORT_SYMBOL(prcmu_abb_read);
 
 /**
  * prcmu_abb_write() - Write register value(s) to the ABB.
@@ -160,179 +1422,262 @@ int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
 	if (size != 1)
 		return -EINVAL;
 
-	r = mutex_lock_interruptible(&mb5_transfer.lock);
-	if (r)
-		return r;
+	mutex_lock(&mb5_transfer.lock);
 
-
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
 		cpu_relax();
 
-	writeb(I2C_WRITE(slave), REQ_MB5_I2C_SLAVE_OP);
-	writeb(I2C_STOP_EN, REQ_MB5_I2C_HW_BITS);
-	writeb(reg, REQ_MB5_I2C_REG);
-	writeb(*value, REQ_MB5_I2C_VAL);
+	writeb(PRCMU_I2C_WRITE(slave), (tcdm_base + PRCM_REQ_MB5_I2C_SLAVE_OP));
+	writeb(PRCMU_I2C_STOP_EN, (tcdm_base + PRCM_REQ_MB5_I2C_HW_BITS));
+	writeb(reg, (tcdm_base + PRCM_REQ_MB5_I2C_REG));
+	writeb(*value, (tcdm_base + PRCM_REQ_MB5_I2C_VAL));
+
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
 
-	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
 	if (!wait_for_completion_timeout(&mb5_transfer.work,
-			msecs_to_jiffies(500))) {
-		pr_err("prcmu: prcmu_abb_write timed out.\n");
+				msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
 		r = -EIO;
-		goto unlock_and_return;
+	} else {
+		r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
 	}
-	r = ((mb5_transfer.ack.status == I2C_WR_OK) ? 0 : -EIO);
 
-unlock_and_return:
 	mutex_unlock(&mb5_transfer.lock);
+
 	return r;
 }
-EXPORT_SYMBOL(prcmu_abb_write);
 
-static int set_ape_cpu_opps(u8 header, enum prcmu_ape_opp ape_opp,
-			    enum prcmu_cpu_opp cpu_opp)
+/**
+ * prcmu_ac_wake_req - should be called whenever ARM wants to wakeup Modem
+ */
+void prcmu_ac_wake_req(void)
 {
-	bool do_ape;
-	bool do_arm;
-	int err = 0;
+	u32 val;
 
-	do_ape = ((header == MB1H_APE_OPP) || (header == MB1H_ARM_APE_OPP));
-	do_arm = ((header == MB1H_ARM_OPP) || (header == MB1H_ARM_APE_OPP));
+	mutex_lock(&mb0_transfer.ac_wake_lock);
 
-	mutex_lock(&mb1_transfer.lock);
+	val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+	if (val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ)
+		goto unlock_and_return;
 
-	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
-		cpu_relax();
+	atomic_set(&ac_wake_req_state, 1);
 
-	writeb(0, MBOX_HEADER_REQ_MB0);
-	writeb(cpu_opp, REQ_MB1_ARMOPP);
-	writeb(ape_opp, REQ_MB1_APEOPP);
-	writeb(0, REQ_MB1_BOOSTOPP);
-	writel(MBOX_BIT(1), PRCM_MBOX_CPU_SET);
-	wait_for_completion(&mb1_transfer.work);
-	if ((do_ape) && (mb1_transfer.ack.ape_status != 0))
-		err = -EIO;
-	if ((do_arm) && (mb1_transfer.ack.arm_status != 0))
-		err = -EIO;
+	writel((val | PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
+		(_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
 
-	mutex_unlock(&mb1_transfer.lock);
+	if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+	}
 
-	return err;
+unlock_and_return:
+	mutex_unlock(&mb0_transfer.ac_wake_lock);
 }
 
 /**
- * prcmu_set_ape_opp() - Set the OPP of the APE.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the APE.
+ * prcmu_ac_sleep_req - called when ARM no longer needs to talk to modem
  */
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp)
+void prcmu_ac_sleep_req()
 {
-	return set_ape_cpu_opps(MB1H_APE_OPP, opp, APE_OPP_NO_CHANGE);
+	u32 val;
+
+	mutex_lock(&mb0_transfer.ac_wake_lock);
+
+	val = readl(_PRCMU_BASE + PRCM_HOSTACCESS_REQ);
+	if (!(val & PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ))
+		goto unlock_and_return;
+
+	writel((val & ~PRCM_HOSTACCESS_REQ_HOSTACCESS_REQ),
+		(_PRCMU_BASE + PRCM_HOSTACCESS_REQ));
+
+	if (!wait_for_completion_timeout(&mb0_transfer.ac_wake_work,
+			msecs_to_jiffies(20000))) {
+		pr_err("prcmu: %s timed out (20 s) waiting for a reply.\n",
+			__func__);
+	}
+
+	atomic_set(&ac_wake_req_state, 0);
+
+unlock_and_return:
+	mutex_unlock(&mb0_transfer.ac_wake_lock);
 }
-EXPORT_SYMBOL(prcmu_set_ape_opp);
 
-/**
- * prcmu_set_cpu_opp() - Set the OPP of the CPU.
- * @opp:	The OPP to set.
- *
- * This function sets the OPP of the CPU.
- */
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp)
+bool prcmu_is_ac_wake_requested(void)
 {
-	return set_ape_cpu_opps(MB1H_ARM_OPP, CPU_OPP_NO_CHANGE, opp);
+	return (atomic_read(&ac_wake_req_state) != 0);
 }
-EXPORT_SYMBOL(prcmu_set_cpu_opp);
 
 /**
- * prcmu_set_ape_cpu_opps() - Set the OPPs of the APE and the CPU.
- * @ape_opp:	The APE OPP to set.
- * @cpu_opp:	The CPU OPP to set.
+ * prcmu_system_reset - System reset
  *
- * This function sets the OPPs of the APE and the CPU.
+ * Saves the reset reason code and then sets the APE_SOFRST register which
+ * fires interrupt to fw
  */
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp)
+void prcmu_system_reset(u16 reset_code)
 {
-	return set_ape_cpu_opps(MB1H_ARM_APE_OPP, ape_opp, cpu_opp);
+	writew(reset_code, (tcdm_base + PRCM_SW_RST_REASON));
+	writel(1, (_PRCMU_BASE + PRCM_APE_SOFTRST));
 }
-EXPORT_SYMBOL(prcmu_set_ape_cpu_opps);
 
 /**
- * prcmu_get_ape_opp() - Get the OPP of the APE.
- *
- * This function gets the OPP of the APE.
+ * prcmu_reset_modem - ask the PRCMU to reset modem
  */
-enum prcmu_ape_opp prcmu_get_ape_opp(void)
+void prcmu_modem_reset(void)
 {
-	return readb(ACK_MB1_CURR_APEOPP);
+	mutex_lock(&mb1_transfer.lock);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(1))
+		cpu_relax();
+
+	writeb(MB1H_RESET_MODEM, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB1));
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+	wait_for_completion(&mb1_transfer.work);
+
+	/*
+	 * No need to check return from PRCMU as modem should go in reset state
+	 * This state is already managed by upper layer
+	 */
+
+	mutex_unlock(&mb1_transfer.lock);
 }
-EXPORT_SYMBOL(prcmu_get_ape_opp);
 
-/**
- * prcmu_get_cpu_opp() - Get the OPP of the CPU.
- *
- * This function gets the OPP of the CPU. The OPP is specified in %%.
- * PRCMU_OPP_EXT is a special OPP value, not specified in %%.
- */
-int prcmu_get_cpu_opp(void)
+static void ack_dbb_wakeup(void)
 {
-	return readb(ACK_MB1_CURR_ARMOPP);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(_PRCMU_BASE + PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(MB0H_READ_WAKEUP_ACK, (tcdm_base + PRCM_MBOX_HEADER_REQ_MB0));
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_MBOX_CPU_SET));
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
 }
-EXPORT_SYMBOL(prcmu_get_cpu_opp);
 
-bool prcmu_has_arm_maxopp(void)
+static inline void print_unknown_header_warning(u8 n, u8 header)
 {
-	return (readb(PRCM_AVS_VARM_MAX_OPP) & PRCM_AVS_ISMODEENABLE_MASK)
-		== PRCM_AVS_ISMODEENABLE_MASK;
+	pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n",
+		header, n);
 }
 
-static void read_mailbox_0(void)
+static bool read_mailbox_0(void)
 {
-	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+	bool r;
+	u32 ev;
+	unsigned int n;
+	u8 header;
+
+	header = readb(tcdm_base + PRCM_MBOX_HEADER_ACK_MB0);
+	switch (header) {
+	case MB0H_WAKEUP_EXE:
+	case MB0H_WAKEUP_SLEEP:
+		if (readb(tcdm_base + PRCM_ACK_MB0_READ_POINTER) & 1)
+			ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_1_8500);
+		else
+			ev = readl(tcdm_base + PRCM_ACK_MB0_WAKEUP_0_8500);
+
+		if (ev & (WAKEUP_BIT_AC_WAKE_ACK | WAKEUP_BIT_AC_SLEEP_ACK))
+			complete(&mb0_transfer.ac_wake_work);
+		if (ev & WAKEUP_BIT_SYSCLK_OK)
+			complete(&mb3_transfer.sysclk_work);
+
+		ev &= mb0_transfer.req.dbb_irqs;
+
+		for (n = 0; n < NUM_PRCMU_WAKEUPS; n++) {
+			if (ev & prcmu_irq_bit[n])
+				generic_handle_irq(IRQ_PRCMU_BASE + n);
+		}
+		r = true;
+		break;
+	default:
+		print_unknown_header_warning(0, header);
+		r = false;
+		break;
+	}
+	writel(MBOX_BIT(0), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return r;
 }
 
-static void read_mailbox_1(void)
+static bool read_mailbox_1(void)
 {
-	mb1_transfer.ack.arm_opp = readb(ACK_MB1_CURR_ARMOPP);
-	mb1_transfer.ack.ape_opp = readb(ACK_MB1_CURR_APEOPP);
+	mb1_transfer.ack.header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB1);
+	mb1_transfer.ack.arm_opp = readb(tcdm_base +
+		PRCM_ACK_MB1_CURRENT_ARM_OPP);
+	mb1_transfer.ack.ape_opp = readb(tcdm_base +
+		PRCM_ACK_MB1_CURRENT_APE_OPP);
+	mb1_transfer.ack.ape_voltage_status = readb(tcdm_base +
+		PRCM_ACK_MB1_APE_VOLTAGE_STATUS);
+	writel(MBOX_BIT(1), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
 	complete(&mb1_transfer.work);
-	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+	return false;
 }
 
-static void read_mailbox_2(void)
+static bool read_mailbox_2(void)
 {
-	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+	mb2_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB2_DPS_STATUS);
+	writel(MBOX_BIT(2), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	complete(&mb2_transfer.work);
+	return false;
 }
 
-static void read_mailbox_3(void)
+static bool read_mailbox_3(void)
 {
-	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(3), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void read_mailbox_4(void)
+static bool read_mailbox_4(void)
 {
-	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+	u8 header;
+	bool do_complete = true;
+
+	header = readb(tcdm_base + PRCM_MBOX_HEADER_REQ_MB4);
+	switch (header) {
+	case MB4H_MEM_ST:
+	case MB4H_HOTDOG:
+	case MB4H_HOTMON:
+	case MB4H_HOT_PERIOD:
+		break;
+	default:
+		print_unknown_header_warning(4, header);
+		do_complete = false;
+		break;
+	}
+
+	writel(MBOX_BIT(4), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+
+	if (do_complete)
+		complete(&mb4_transfer.work);
+
+	return false;
 }
 
-static void read_mailbox_5(void)
+static bool read_mailbox_5(void)
 {
-	mb5_transfer.ack.status = readb(ACK_MB5_I2C_STATUS);
-	mb5_transfer.ack.value = readb(ACK_MB5_I2C_VAL);
+	mb5_transfer.ack.status = readb(tcdm_base + PRCM_ACK_MB5_I2C_STATUS);
+	mb5_transfer.ack.value = readb(tcdm_base + PRCM_ACK_MB5_I2C_VAL);
+	writel(MBOX_BIT(5), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
 	complete(&mb5_transfer.work);
-	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+	return false;
 }
 
-static void read_mailbox_6(void)
+static bool read_mailbox_6(void)
 {
-	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(6), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void read_mailbox_7(void)
+static bool read_mailbox_7(void)
 {
-	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+	writel(MBOX_BIT(7), (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+	return false;
 }
 
-static void (* const read_mailbox[NUM_MBOX])(void) = {
+static bool (* const read_mailbox[NUM_MB])(void) = {
 	read_mailbox_0,
 	read_mailbox_1,
 	read_mailbox_2,
@@ -347,49 +1692,199 @@ static irqreturn_t prcmu_irq_handler(int irq, void *data)
 {
 	u32 bits;
 	u8 n;
+	irqreturn_t r;
 
-	bits = (readl(PRCM_ARM_IT1_VAL) & (MBOX_BIT(NUM_MBOX) - 1));
+	bits = (readl(_PRCMU_BASE + PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
 	if (unlikely(!bits))
 		return IRQ_NONE;
 
+	r = IRQ_HANDLED;
 	for (n = 0; bits; n++) {
 		if (bits & MBOX_BIT(n)) {
 			bits -= MBOX_BIT(n);
-			read_mailbox[n]();
+			if (read_mailbox[n]())
+				r = IRQ_WAKE_THREAD;
 		}
 	}
+	return r;
+}
+
+static irqreturn_t prcmu_irq_thread_fn(int irq, void *data)
+{
+	ack_dbb_wakeup();
 	return IRQ_HANDLED;
 }
 
+static void prcmu_mask_work(struct work_struct *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	config_wakeups();
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+static void prcmu_irq_mask(struct irq_data *d)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags);
+
+	mb0_transfer.req.dbb_irqs &= ~prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE];
+
+	spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags);
+
+	if (d->irq != IRQ_PRCMU_CA_SLEEP)
+		schedule_work(&mb0_transfer.mask_work);
+}
+
+static void prcmu_irq_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.dbb_irqs_lock, flags);
+
+	mb0_transfer.req.dbb_irqs |= prcmu_irq_bit[d->irq - IRQ_PRCMU_BASE];
+
+	spin_unlock_irqrestore(&mb0_transfer.dbb_irqs_lock, flags);
+
+	if (d->irq != IRQ_PRCMU_CA_SLEEP)
+		schedule_work(&mb0_transfer.mask_work);
+}
+
+static void noop(struct irq_data *d)
+{
+}
+
+static struct irq_chip prcmu_irq_chip = {
+	.name		= "prcmu",
+	.irq_disable	= prcmu_irq_mask,
+	.irq_ack	= noop,
+	.irq_mask	= prcmu_irq_mask,
+	.irq_unmask	= prcmu_irq_unmask,
+};
+
 void __init prcmu_early_init(void)
 {
-	if (cpu_is_u8500v11() || cpu_is_u8500ed()) {
+	unsigned int i;
+
+	if (cpu_is_u8500v1()) {
 		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE_V1);
 	} else if (cpu_is_u8500v2()) {
+		void *tcpm_base = ioremap_nocache(U8500_PRCMU_TCPM_BASE, SZ_4K);
+
+		if (tcpm_base != NULL) {
+			int version;
+			version = readl(tcpm_base + PRCMU_FW_VERSION_OFFSET);
+			prcmu_version.project_number = version & 0xFF;
+			prcmu_version.api_version = (version >> 8) & 0xFF;
+			prcmu_version.func_version = (version >> 16) & 0xFF;
+			prcmu_version.errata = (version >> 24) & 0xFF;
+			pr_info("PRCMU firmware version %d.%d.%d\n",
+				(version >> 8) & 0xFF, (version >> 16) & 0xFF,
+				(version >> 24) & 0xFF);
+			iounmap(tcpm_base);
+		}
+
 		tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
 	} else {
 		pr_err("prcmu: Unsupported chip version\n");
 		BUG();
 	}
-}
-
-static int __init prcmu_init(void)
-{
-	if (cpu_is_u8500ed()) {
-		pr_err("prcmu: Unsupported chip version\n");
-		return 0;
-	}
 
+	spin_lock_init(&mb0_transfer.lock);
+	spin_lock_init(&mb0_transfer.dbb_irqs_lock);
+	mutex_init(&mb0_transfer.ac_wake_lock);
+	init_completion(&mb0_transfer.ac_wake_work);
 	mutex_init(&mb1_transfer.lock);
 	init_completion(&mb1_transfer.work);
+	mutex_init(&mb2_transfer.lock);
+	init_completion(&mb2_transfer.work);
+	spin_lock_init(&mb2_transfer.auto_pm_lock);
+	spin_lock_init(&mb3_transfer.lock);
+	mutex_init(&mb3_transfer.sysclk_lock);
+	init_completion(&mb3_transfer.sysclk_work);
+	mutex_init(&mb4_transfer.lock);
+	init_completion(&mb4_transfer.work);
 	mutex_init(&mb5_transfer.lock);
 	init_completion(&mb5_transfer.work);
 
+	INIT_WORK(&mb0_transfer.mask_work, prcmu_mask_work);
+
+	/* Initalize irqs. */
+	for (i = 0; i < NUM_PRCMU_WAKEUPS; i++) {
+		unsigned int irq;
+
+		irq = IRQ_PRCMU_BASE + i;
+		irq_set_chip_and_handler(irq, &prcmu_irq_chip,
+					 handle_simple_irq);
+		set_irq_flags(irq, IRQF_VALID);
+	}
+}
+
+static struct mfd_cell db8500_prcmu_devs[] = {
+	{
+		.name = "db8500-prcmu-regulators",
+	},
+	{
+		.name = "cpufreq-u8500",
+	},
+};
+
+/**
+ * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
+ *
+ */
+static int __init db8500_prcmu_probe(struct platform_device *pdev)
+{
+	int err = 0;
+
+	if (ux500_is_svp())
+		return -ENODEV;
+
 	/* Clean up the mailbox interrupts after pre-kernel code. */
-	writel((MBOX_BIT(NUM_MBOX) - 1), PRCM_ARM_IT1_CLEAR);
+	writel(ALL_MBOX_BITS, (_PRCMU_BASE + PRCM_ARM_IT1_CLR));
+
+	err = request_threaded_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler,
+		prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
+	if (err < 0) {
+		pr_err("prcmu: Failed to allocate IRQ_DB8500_PRCMU1.\n");
+		err = -EBUSY;
+		goto no_irq_return;
+	}
+
+	if (cpu_is_u8500v20_or_later())
+		prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
+
+	err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
+			      ARRAY_SIZE(db8500_prcmu_devs), NULL,
+			      0);
 
-	return request_irq(IRQ_DB8500_PRCMU1, prcmu_irq_handler, 0,
-			   "prcmu", NULL);
+	if (err)
+		pr_err("prcmu: Failed to add subdevices\n");
+	else
+		pr_info("DB8500 PRCMU initialized\n");
+
+no_irq_return:
+	return err;
+}
+
+static struct platform_driver db8500_prcmu_driver = {
+	.driver = {
+		.name = "db8500-prcmu",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init db8500_prcmu_init(void)
+{
+	return platform_driver_probe(&db8500_prcmu_driver, db8500_prcmu_probe);
 }
 
-arch_initcall(prcmu_init);
+arch_initcall(db8500_prcmu_init);
+
+MODULE_AUTHOR("Mattias Nilsson <mattias.i.nilsson@stericsson.com>");
+MODULE_DESCRIPTION("DB8500 PRCM Unit driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
index d591d79aa6f0..917dbcab701c 100644
--- a/include/linux/mfd/db8500-prcmu.h
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -2,57 +2,977 @@
  * Copyright (C) STMicroelectronics 2009
  * Copyright (C) ST-Ericsson SA 2010
  *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Martin Persson <martin.persson@stericsson.com>
- *
  * License Terms: GNU General Public License v2
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
  *
- * PRCM Unit definitions
+ * PRCMU f/w APIs
  */
+#ifndef __MFD_DB8500_PRCMU_H
+#define __MFD_DB8500_PRCMU_H
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
 
-#ifndef __MACH_PRCMU_DEFS_H
-#define __MACH_PRCMU_DEFS_H
+/* This portion previously known as <mach/prcmu-fw-defs_v1.h> */
 
-enum prcmu_cpu_opp {
-	CPU_OPP_INIT	  = 0x00,
-	CPU_OPP_NO_CHANGE = 0x01,
-	CPU_OPP_100	  = 0x02,
-	CPU_OPP_50	  = 0x03,
-	CPU_OPP_MAX	  = 0x04,
-	CPU_OPP_EXT_CLK	  = 0x07
+/**
+ * enum state - ON/OFF state definition
+ * @OFF: State is ON
+ * @ON: State is OFF
+ *
+ */
+enum state {
+	OFF = 0x0,
+	ON  = 0x1,
 };
-enum prcmu_ape_opp {
-	APE_OPP_NO_CHANGE = 0x00,
-	APE_OPP_100	  = 0x02,
-	APE_OPP_50	  = 0x03,
+
+/**
+ * enum ret_state - general purpose On/Off/Retention states
+ *
+ */
+enum ret_state {
+	OFFST = 0,
+	ONST  = 1,
+	RETST = 2
 };
 
-#endif /* __MACH_PRCMU_DEFS_H */
+/**
+ * enum clk_arm - ARM Cortex A9 clock schemes
+ * @A9_OFF:
+ * @A9_BOOT:
+ * @A9_OPPT1:
+ * @A9_OPPT2:
+ * @A9_EXTCLK:
+ */
+enum clk_arm {
+	A9_OFF,
+	A9_BOOT,
+	A9_OPPT1,
+	A9_OPPT2,
+	A9_EXTCLK
+};
 
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
+/**
+ * enum clk_gen - GEN#0/GEN#1 clock schemes
+ * @GEN_OFF:
+ * @GEN_BOOT:
+ * @GEN_OPPT1:
+ */
+enum clk_gen {
+	GEN_OFF,
+	GEN_BOOT,
+	GEN_OPPT1,
+};
+
+/* some information between arm and xp70 */
+
+/**
+ * enum romcode_write - Romcode message written by A9 AND read by XP70
+ * @RDY_2_DS: Value set when ApDeepSleep state can be executed by XP70
+ * @RDY_2_XP70_RST: Value set when 0x0F has been successfully polled by the
+ *                 romcode. The xp70 will go into self-reset
+ */
+enum romcode_write {
+	RDY_2_DS = 0x09,
+	RDY_2_XP70_RST = 0x10
+};
+
+/**
+ * enum romcode_read - Romcode message written by XP70 and read by A9
+ * @INIT: Init value when romcode field is not used
+ * @FS_2_DS: Value set when power state is going from ApExecute to
+ *          ApDeepSleep
+ * @END_DS: Value set when ApDeepSleep power state is reached coming from
+ *         ApExecute state
+ * @DS_TO_FS: Value set when power state is going from ApDeepSleep to
+ *           ApExecute
+ * @END_FS: Value set when ApExecute power state is reached coming from
+ *         ApDeepSleep state
+ * @SWR: Value set when power state is going to ApReset
+ * @END_SWR: Value set when the xp70 finished executing ApReset actions and
+ *          waits for romcode acknowledgment to go to self-reset
+ */
+enum romcode_read {
+	INIT = 0x00,
+	FS_2_DS = 0x0A,
+	END_DS = 0x0B,
+	DS_TO_FS = 0x0C,
+	END_FS = 0x0D,
+	SWR = 0x0E,
+	END_SWR = 0x0F
+};
+
+/**
+ * enum ap_pwrst - current power states defined in PRCMU firmware
+ * @NO_PWRST: Current power state init
+ * @AP_BOOT: Current power state is apBoot
+ * @AP_EXECUTE: Current power state is apExecute
+ * @AP_DEEP_SLEEP: Current power state is apDeepSleep
+ * @AP_SLEEP: Current power state is apSleep
+ * @AP_IDLE: Current power state is apIdle
+ * @AP_RESET: Current power state is apReset
+ */
+enum ap_pwrst {
+	NO_PWRST = 0x00,
+	AP_BOOT = 0x01,
+	AP_EXECUTE = 0x02,
+	AP_DEEP_SLEEP = 0x03,
+	AP_SLEEP = 0x04,
+	AP_IDLE = 0x05,
+	AP_RESET = 0x06
+};
+
+/**
+ * enum ap_pwrst_trans - Transition states defined in PRCMU firmware
+ * @NO_TRANSITION: No power state transition
+ * @APEXECUTE_TO_APSLEEP: Power state transition from ApExecute to ApSleep
+ * @APIDLE_TO_APSLEEP: Power state transition from ApIdle to ApSleep
+ * @APBOOT_TO_APEXECUTE: Power state transition from ApBoot to ApExecute
+ * @APEXECUTE_TO_APDEEPSLEEP: Power state transition from ApExecute to
+ *                          ApDeepSleep
+ * @APEXECUTE_TO_APIDLE: Power state transition from ApExecute to ApIdle
+ */
+enum ap_pwrst_trans {
+	NO_TRANSITION			= 0x00,
+	APEXECUTE_TO_APSLEEP		= 0x01,
+	APIDLE_TO_APSLEEP		= 0x02, /* To be removed */
+	PRCMU_AP_SLEEP			= 0x01,
+	APBOOT_TO_APEXECUTE		= 0x03,
+	APEXECUTE_TO_APDEEPSLEEP	= 0x04, /* To be removed */
+	PRCMU_AP_DEEP_SLEEP		= 0x04,
+	APEXECUTE_TO_APIDLE		= 0x05, /* To be removed */
+	PRCMU_AP_IDLE			= 0x05,
+	PRCMU_AP_DEEP_IDLE		= 0x07,
+};
+
+/**
+ * enum ddr_pwrst - DDR power states definition
+ * @DDR_PWR_STATE_UNCHANGED: SDRAM and DDR controller state is unchanged
+ * @DDR_PWR_STATE_ON:
+ * @DDR_PWR_STATE_OFFLOWLAT:
+ * @DDR_PWR_STATE_OFFHIGHLAT:
+ */
+enum ddr_pwrst {
+	DDR_PWR_STATE_UNCHANGED     = 0x00,
+	DDR_PWR_STATE_ON            = 0x01,
+	DDR_PWR_STATE_OFFLOWLAT     = 0x02,
+	DDR_PWR_STATE_OFFHIGHLAT    = 0x03
+};
+
+/**
+ * enum arm_opp - ARM OPP states definition
+ * @ARM_OPP_INIT:
+ * @ARM_NO_CHANGE: The ARM operating point is unchanged
+ * @ARM_100_OPP: The new ARM operating point is arm100opp
+ * @ARM_50_OPP: The new ARM operating point is arm50opp
+ * @ARM_MAX_OPP: Operating point is "max" (more than 100)
+ * @ARM_MAX_FREQ100OPP: Set max opp if available, else 100
+ * @ARM_EXTCLK: The new ARM operating point is armExtClk
+ */
+enum arm_opp {
+	ARM_OPP_INIT = 0x00,
+	ARM_NO_CHANGE = 0x01,
+	ARM_100_OPP = 0x02,
+	ARM_50_OPP = 0x03,
+	ARM_MAX_OPP = 0x04,
+	ARM_MAX_FREQ100OPP = 0x05,
+	ARM_EXTCLK = 0x07
+};
+
+/**
+ * enum ape_opp - APE OPP states definition
+ * @APE_OPP_INIT:
+ * @APE_NO_CHANGE: The APE operating point is unchanged
+ * @APE_100_OPP: The new APE operating point is ape100opp
+ * @APE_50_OPP: 50%
+ */
+enum ape_opp {
+	APE_OPP_INIT = 0x00,
+	APE_NO_CHANGE = 0x01,
+	APE_100_OPP = 0x02,
+	APE_50_OPP = 0x03
+};
+
+/**
+ * enum hw_acc_state - State definition for hardware accelerator
+ * @HW_NO_CHANGE: The hardware accelerator state must remain unchanged
+ * @HW_OFF: The hardware accelerator must be switched off
+ * @HW_OFF_RAMRET: The hardware accelerator must be switched off with its
+ *               internal RAM in retention
+ * @HW_ON: The hwa hardware accelerator hwa must be switched on
  *
- * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ * NOTE! Deprecated, to be removed when all users switched over to use the
+ * regulator API.
+ */
+enum hw_acc_state {
+	HW_NO_CHANGE = 0x00,
+	HW_OFF = 0x01,
+	HW_OFF_RAMRET = 0x02,
+	HW_ON = 0x04
+};
+
+/**
+ * enum  mbox_2_arm_stat - Status messages definition for mbox_arm
+ * @BOOT_TO_EXECUTEOK: The apBoot to apExecute state transition has been
+ *                    completed
+ * @DEEPSLEEPOK: The apExecute to apDeepSleep state transition has been
+ *              completed
+ * @SLEEPOK: The apExecute to apSleep state transition has been completed
+ * @IDLEOK: The apExecute to apIdle state transition has been completed
+ * @SOFTRESETOK: The A9 watchdog/ SoftReset state has been completed
+ * @SOFTRESETGO : The A9 watchdog/SoftReset state is on going
+ * @BOOT_TO_EXECUTE: The apBoot to apExecute state transition is on going
+ * @EXECUTE_TO_DEEPSLEEP: The apExecute to apDeepSleep state transition is on
+ *                       going
+ * @DEEPSLEEP_TO_EXECUTE: The apDeepSleep to apExecute state transition is on
+ *                       going
+ * @DEEPSLEEP_TO_EXECUTEOK: The apDeepSleep to apExecute state transition has
+ *                         been completed
+ * @EXECUTE_TO_SLEEP: The apExecute to apSleep state transition is on going
+ * @SLEEP_TO_EXECUTE: The apSleep to apExecute state transition is on going
+ * @SLEEP_TO_EXECUTEOK: The apSleep to apExecute state transition has been
+ *                     completed
+ * @EXECUTE_TO_IDLE: The apExecute to apIdle state transition is on going
+ * @IDLE_TO_EXECUTE: The apIdle to apExecute state transition is on going
+ * @IDLE_TO_EXECUTEOK: The apIdle to apExecute state transition has been
+ *                    completed
+ * @INIT_STATUS: Status init
+ */
+enum ap_pwrsttr_status {
+	BOOT_TO_EXECUTEOK = 0xFF,
+	DEEPSLEEPOK = 0xFE,
+	SLEEPOK = 0xFD,
+	IDLEOK = 0xFC,
+	SOFTRESETOK = 0xFB,
+	SOFTRESETGO = 0xFA,
+	BOOT_TO_EXECUTE = 0xF9,
+	EXECUTE_TO_DEEPSLEEP = 0xF8,
+	DEEPSLEEP_TO_EXECUTE = 0xF7,
+	DEEPSLEEP_TO_EXECUTEOK = 0xF6,
+	EXECUTE_TO_SLEEP = 0xF5,
+	SLEEP_TO_EXECUTE = 0xF4,
+	SLEEP_TO_EXECUTEOK = 0xF3,
+	EXECUTE_TO_IDLE = 0xF2,
+	IDLE_TO_EXECUTE = 0xF1,
+	IDLE_TO_EXECUTEOK = 0xF0,
+	RDYTODS_RETURNTOEXE    = 0xEF,
+	NORDYTODS_RETURNTOEXE  = 0xEE,
+	EXETOSLEEP_RETURNTOEXE = 0xED,
+	EXETOIDLE_RETURNTOEXE  = 0xEC,
+	INIT_STATUS = 0xEB,
+
+	/*error messages */
+	INITERROR                     = 0x00,
+	PLLARMLOCKP_ER                = 0x01,
+	PLLDDRLOCKP_ER                = 0x02,
+	PLLSOCLOCKP_ER                = 0x03,
+	PLLSOCK1LOCKP_ER              = 0x04,
+	ARMWFI_ER                     = 0x05,
+	SYSCLKOK_ER                   = 0x06,
+	I2C_NACK_DATA_ER              = 0x07,
+	BOOT_ER                       = 0x08,
+	I2C_STATUS_ALWAYS_1           = 0x0A,
+	I2C_NACK_REG_ADDR_ER          = 0x0B,
+	I2C_NACK_DATA0123_ER          = 0x1B,
+	I2C_NACK_ADDR_ER              = 0x1F,
+	CURAPPWRSTISNOT_BOOT          = 0x20,
+	CURAPPWRSTISNOT_EXECUTE       = 0x21,
+	CURAPPWRSTISNOT_SLEEPMODE     = 0x22,
+	CURAPPWRSTISNOT_CORRECTFORIT10 = 0x23,
+	FIFO4500WUISNOT_WUPEVENT      = 0x24,
+	PLL32KLOCKP_ER                = 0x29,
+	DDRDEEPSLEEPOK_ER             = 0x2A,
+	ROMCODEREADY_ER               = 0x50,
+	WUPBEFOREDS                   = 0x51,
+	DDRCONFIG_ER                  = 0x52,
+	WUPBEFORESLEEP                = 0x53,
+	WUPBEFOREIDLE                 = 0x54
+};  /* earlier called as  mbox_2_arm_stat */
+
+/**
+ * enum dvfs_stat - DVFS status messages definition
+ * @DVFS_GO: A state transition DVFS is on going
+ * @DVFS_ARM100OPPOK: The state transition DVFS has been completed for 100OPP
+ * @DVFS_ARM50OPPOK: The state transition DVFS has been completed for 50OPP
+ * @DVFS_ARMEXTCLKOK: The state transition DVFS has been completed for EXTCLK
+ * @DVFS_NOCHGTCLKOK: The state transition DVFS has been completed for
+ *                   NOCHGCLK
+ * @DVFS_INITSTATUS: Value init
+ */
+enum dvfs_stat {
+	DVFS_GO = 0xFF,
+	DVFS_ARM100OPPOK = 0xFE,
+	DVFS_ARM50OPPOK = 0xFD,
+	DVFS_ARMEXTCLKOK = 0xFC,
+	DVFS_NOCHGTCLKOK = 0xFB,
+	DVFS_INITSTATUS = 0x00
+};
+
+/**
+ * enum sva_mmdsp_stat - SVA MMDSP status messages
+ * @SVA_MMDSP_GO: SVAMMDSP interrupt has happened
+ * @SVA_MMDSP_INIT: Status init
+ */
+enum sva_mmdsp_stat {
+	SVA_MMDSP_GO = 0xFF,
+	SVA_MMDSP_INIT = 0x00
+};
+
+/**
+ * enum sia_mmdsp_stat - SIA MMDSP status messages
+ * @SIA_MMDSP_GO: SIAMMDSP interrupt has happened
+ * @SIA_MMDSP_INIT: Status init
+ */
+enum sia_mmdsp_stat {
+	SIA_MMDSP_GO = 0xFF,
+	SIA_MMDSP_INIT = 0x00
+};
+
+/**
+ * enum  mbox_to_arm_err - Error messages definition
+ * @INIT_ERR: Init value
+ * @PLLARMLOCKP_ERR: PLLARM has not been correctly locked in given time
+ * @PLLDDRLOCKP_ERR: PLLDDR has not been correctly locked in the given time
+ * @PLLSOC0LOCKP_ERR: PLLSOC0 has not been correctly locked in the given time
+ * @PLLSOC1LOCKP_ERR: PLLSOC1 has not been correctly locked in the given time
+ * @ARMWFI_ERR: The ARM WFI has not been correctly executed in the given time
+ * @SYSCLKOK_ERR: The SYSCLK is not available in the given time
+ * @BOOT_ERR: Romcode has not validated the XP70 self reset in the given time
+ * @ROMCODESAVECONTEXT: The Romcode didn.t correctly save it secure context
+ * @VARMHIGHSPEEDVALTO_ERR: The ARM high speed supply value transfered
+ *          through I2C has not been correctly executed in the given time
+ * @VARMHIGHSPEEDACCESS_ERR: The command value of VarmHighSpeedVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VARMLOWSPEEDVALTO_ERR:The ARM low speed supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMLOWSPEEDACCESS_ERR: The command value of VarmLowSpeedVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VARMRETENTIONVALTO_ERR: The ARM retention supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMRETENTIONACCESS_ERR: The command value of VarmRetentionVal transfered
+ *             through I2C has not been correctly executed in the given time
+ * @VAPEHIGHSPEEDVALTO_ERR: The APE highspeed supply value transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VSAFEHPVALTO_ERR: The SAFE high power supply value transfered through I2C
+ *                         has not been correctly executed in the given time
+ * @VMODSEL1VALTO_ERR: The MODEM sel1 supply value transfered through I2C has
+ *                             not been correctly executed in the given time
+ * @VMODSEL2VALTO_ERR: The MODEM sel2 supply value transfered through I2C has
+ *                             not been correctly executed in the given time
+ * @VARMOFFACCESS_ERR: The command value of Varm ON/OFF transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VAPEOFFACCESS_ERR: The command value of Vape ON/OFF transfered through
+ *                     I2C has not been correctly executed in the given time
+ * @VARMRETACCES_ERR: The command value of Varm retention ON/OFF transfered
+ *             through I2C has not been correctly executed in the given time
+ * @CURAPPWRSTISNOTBOOT:Generated when Arm want to do power state transition
+ *             ApBoot to ApExecute but the power current state is not Apboot
+ * @CURAPPWRSTISNOTEXECUTE: Generated when Arm want to do power state
+ *              transition from ApExecute to others power state but the
+ *              power current state is not ApExecute
+ * @CURAPPWRSTISNOTSLEEPMODE: Generated when wake up events are transmitted
+ *             but the power current state is not ApDeepSleep/ApSleep/ApIdle
+ * @CURAPPWRSTISNOTCORRECTDBG:  Generated when wake up events are transmitted
+ *              but the power current state is not correct
+ * @ARMREGU1VALTO_ERR:The ArmRegu1 value transferred through I2C has not
+ *                    been correctly executed in the given time
+ * @ARMREGU2VALTO_ERR: The ArmRegu2 value transferred through I2C has not
+ *                    been correctly executed in the given time
+ * @VAPEREGUVALTO_ERR: The VApeRegu value transfered through I2C has not
+ *                    been correctly executed in the given time
+ * @VSMPS3REGUVALTO_ERR: The VSmps3Regu value transfered through I2C has not
+ *                      been correctly executed in the given time
+ * @VMODREGUVALTO_ERR: The VModemRegu value transfered through I2C has not
+ *                    been correctly executed in the given time
+ */
+enum mbox_to_arm_err {
+	INIT_ERR = 0x00,
+	PLLARMLOCKP_ERR = 0x01,
+	PLLDDRLOCKP_ERR = 0x02,
+	PLLSOC0LOCKP_ERR = 0x03,
+	PLLSOC1LOCKP_ERR = 0x04,
+	ARMWFI_ERR = 0x05,
+	SYSCLKOK_ERR = 0x06,
+	BOOT_ERR = 0x07,
+	ROMCODESAVECONTEXT = 0x08,
+	VARMHIGHSPEEDVALTO_ERR = 0x10,
+	VARMHIGHSPEEDACCESS_ERR = 0x11,
+	VARMLOWSPEEDVALTO_ERR = 0x12,
+	VARMLOWSPEEDACCESS_ERR = 0x13,
+	VARMRETENTIONVALTO_ERR = 0x14,
+	VARMRETENTIONACCESS_ERR = 0x15,
+	VAPEHIGHSPEEDVALTO_ERR = 0x16,
+	VSAFEHPVALTO_ERR = 0x17,
+	VMODSEL1VALTO_ERR = 0x18,
+	VMODSEL2VALTO_ERR = 0x19,
+	VARMOFFACCESS_ERR = 0x1A,
+	VAPEOFFACCESS_ERR = 0x1B,
+	VARMRETACCES_ERR = 0x1C,
+	CURAPPWRSTISNOTBOOT = 0x20,
+	CURAPPWRSTISNOTEXECUTE = 0x21,
+	CURAPPWRSTISNOTSLEEPMODE = 0x22,
+	CURAPPWRSTISNOTCORRECTDBG = 0x23,
+	ARMREGU1VALTO_ERR = 0x24,
+	ARMREGU2VALTO_ERR = 0x25,
+	VAPEREGUVALTO_ERR = 0x26,
+	VSMPS3REGUVALTO_ERR = 0x27,
+	VMODREGUVALTO_ERR = 0x28
+};
+
+enum hw_acc {
+	SVAMMDSP = 0,
+	SVAPIPE = 1,
+	SIAMMDSP = 2,
+	SIAPIPE = 3,
+	SGA = 4,
+	B2R2MCDE = 5,
+	ESRAM12 = 6,
+	ESRAM34 = 7,
+};
+
+enum cs_pwrmgt {
+	PWRDNCS0  = 0,
+	WKUPCS0   = 1,
+	PWRDNCS1  = 2,
+	WKUPCS1   = 3
+};
+
+/* Defs related to autonomous power management */
+
+/**
+ * enum sia_sva_pwr_policy - Power policy
+ * @NO_CHGT:	No change
+ * @DSPOFF_HWPOFF:
+ * @DSPOFFRAMRET_HWPOFF:
+ * @DSPCLKOFF_HWPOFF:
+ * @DSPCLKOFF_HWPCLKOFF:
  *
- * License Terms: GNU General Public License v2
+ */
+enum sia_sva_pwr_policy {
+	NO_CHGT			= 0x0,
+	DSPOFF_HWPOFF		= 0x1,
+	DSPOFFRAMRET_HWPOFF	= 0x2,
+	DSPCLKOFF_HWPOFF	= 0x3,
+	DSPCLKOFF_HWPCLKOFF	= 0x4,
+};
+
+/**
+ * enum auto_enable - Auto Power enable
+ * @AUTO_OFF:
+ * @AUTO_ON:
+ *
+ */
+enum auto_enable {
+	AUTO_OFF	= 0x0,
+	AUTO_ON		= 0x1,
+};
+
+/* End of file previously known as prcmu-fw-defs_v1.h */
+
+/* PRCMU Wakeup defines */
+enum prcmu_wakeup_index {
+	PRCMU_WAKEUP_INDEX_RTC,
+	PRCMU_WAKEUP_INDEX_RTT0,
+	PRCMU_WAKEUP_INDEX_RTT1,
+	PRCMU_WAKEUP_INDEX_HSI0,
+	PRCMU_WAKEUP_INDEX_HSI1,
+	PRCMU_WAKEUP_INDEX_USB,
+	PRCMU_WAKEUP_INDEX_ABB,
+	PRCMU_WAKEUP_INDEX_ABB_FIFO,
+	PRCMU_WAKEUP_INDEX_ARM,
+	NUM_PRCMU_WAKEUP_INDICES
+};
+#define PRCMU_WAKEUP(_name) (BIT(PRCMU_WAKEUP_INDEX_##_name))
+
+/* PRCMU QoS APE OPP class */
+#define PRCMU_QOS_APE_OPP 1
+#define PRCMU_QOS_DDR_OPP 2
+#define PRCMU_QOS_DEFAULT_VALUE -1
+
+/**
+ * enum hw_acc_dev - enum for hw accelerators
+ * @HW_ACC_SVAMMDSP: for SVAMMDSP
+ * @HW_ACC_SVAPIPE:  for SVAPIPE
+ * @HW_ACC_SIAMMDSP: for SIAMMDSP
+ * @HW_ACC_SIAPIPE: for SIAPIPE
+ * @HW_ACC_SGA: for SGA
+ * @HW_ACC_B2R2: for B2R2
+ * @HW_ACC_MCDE: for MCDE
+ * @HW_ACC_ESRAM1: for ESRAM1
+ * @HW_ACC_ESRAM2: for ESRAM2
+ * @HW_ACC_ESRAM3: for ESRAM3
+ * @HW_ACC_ESRAM4: for ESRAM4
+ * @NUM_HW_ACC: number of hardware accelerators
+ *
+ * Different hw accelerators which can be turned ON/
+ * OFF or put into retention (MMDSPs and ESRAMs).
+ * Used with EPOD API.
  *
- * PRCM Unit f/w API
+ * NOTE! Deprecated, to be removed when all users switched over to use the
+ * regulator API.
+ */
+enum hw_acc_dev {
+	HW_ACC_SVAMMDSP,
+	HW_ACC_SVAPIPE,
+	HW_ACC_SIAMMDSP,
+	HW_ACC_SIAPIPE,
+	HW_ACC_SGA,
+	HW_ACC_B2R2,
+	HW_ACC_MCDE,
+	HW_ACC_ESRAM1,
+	HW_ACC_ESRAM2,
+	HW_ACC_ESRAM3,
+	HW_ACC_ESRAM4,
+	NUM_HW_ACC
+};
+
+/*
+ * Ids for all EPODs (power domains)
+ * - EPOD_ID_SVAMMDSP: power domain for SVA MMDSP
+ * - EPOD_ID_SVAPIPE: power domain for SVA pipe
+ * - EPOD_ID_SIAMMDSP: power domain for SIA MMDSP
+ * - EPOD_ID_SIAPIPE: power domain for SIA pipe
+ * - EPOD_ID_SGA: power domain for SGA
+ * - EPOD_ID_B2R2_MCDE: power domain for B2R2 and MCDE
+ * - EPOD_ID_ESRAM12: power domain for ESRAM 1 and 2
+ * - EPOD_ID_ESRAM34: power domain for ESRAM 3 and 4
+ * - NUM_EPOD_ID: number of power domains
+ */
+#define EPOD_ID_SVAMMDSP	0
+#define EPOD_ID_SVAPIPE		1
+#define EPOD_ID_SIAMMDSP	2
+#define EPOD_ID_SIAPIPE		3
+#define EPOD_ID_SGA		4
+#define EPOD_ID_B2R2_MCDE	5
+#define EPOD_ID_ESRAM12		6
+#define EPOD_ID_ESRAM34		7
+#define NUM_EPOD_ID		8
+
+/*
+ * state definition for EPOD (power domain)
+ * - EPOD_STATE_NO_CHANGE: The EPOD should remain unchanged
+ * - EPOD_STATE_OFF: The EPOD is switched off
+ * - EPOD_STATE_RAMRET: The EPOD is switched off with its internal RAM in
+ *                         retention
+ * - EPOD_STATE_ON_CLK_OFF: The EPOD is switched on, clock is still off
+ * - EPOD_STATE_ON: Same as above, but with clock enabled
  */
-#ifndef __MACH_PRCMU_H
-#define __MACH_PRCMU_H
+#define EPOD_STATE_NO_CHANGE	0x00
+#define EPOD_STATE_OFF		0x01
+#define EPOD_STATE_RAMRET	0x02
+#define EPOD_STATE_ON_CLK_OFF	0x03
+#define EPOD_STATE_ON		0x04
 
+/*
+ * CLKOUT sources
+ */
+#define PRCMU_CLKSRC_CLK38M		0x00
+#define PRCMU_CLKSRC_ACLK		0x01
+#define PRCMU_CLKSRC_SYSCLK		0x02
+#define PRCMU_CLKSRC_LCDCLK		0x03
+#define PRCMU_CLKSRC_SDMMCCLK		0x04
+#define PRCMU_CLKSRC_TVCLK		0x05
+#define PRCMU_CLKSRC_TIMCLK		0x06
+#define PRCMU_CLKSRC_CLK009		0x07
+/* These are only valid for CLKOUT1: */
+#define PRCMU_CLKSRC_SIAMMDSPCLK	0x40
+#define PRCMU_CLKSRC_I2CCLK		0x41
+#define PRCMU_CLKSRC_MSP02CLK		0x42
+#define PRCMU_CLKSRC_ARMPLL_OBSCLK	0x43
+#define PRCMU_CLKSRC_HSIRXCLK		0x44
+#define PRCMU_CLKSRC_HSITXCLK		0x45
+#define PRCMU_CLKSRC_ARMCLKFIX		0x46
+#define PRCMU_CLKSRC_HDMICLK		0x47
+
+/*
+ * Definitions for autonomous power management configuration.
+ */
+
+#define PRCMU_AUTO_PM_OFF 0
+#define PRCMU_AUTO_PM_ON 1
+
+#define PRCMU_AUTO_PM_POWER_ON_HSEM BIT(0)
+#define PRCMU_AUTO_PM_POWER_ON_ABB_FIFO_IT BIT(1)
+
+enum prcmu_auto_pm_policy {
+	PRCMU_AUTO_PM_POLICY_NO_CHANGE,
+	PRCMU_AUTO_PM_POLICY_DSP_OFF_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_OFF_RAMRET_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_OFF,
+	PRCMU_AUTO_PM_POLICY_DSP_CLK_OFF_HWP_CLK_OFF,
+};
+
+/**
+ * struct prcmu_auto_pm_config - Autonomous power management configuration.
+ * @sia_auto_pm_enable: SIA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON})
+ * @sia_power_on:       SIA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask)
+ * @sia_policy:         SIA power policy. (enum prcmu_auto_pm_policy)
+ * @sva_auto_pm_enable: SVA autonomous pm enable. (PRCMU_AUTO_PM_{OFF,ON})
+ * @sva_power_on:       SVA power ON enable. (PRCMU_AUTO_PM_POWER_ON_* bitmask)
+ * @sva_policy:         SVA power policy. (enum prcmu_auto_pm_policy)
+ */
+struct prcmu_auto_pm_config {
+	u8 sia_auto_pm_enable;
+	u8 sia_power_on;
+	u8 sia_policy;
+	u8 sva_auto_pm_enable;
+	u8 sva_power_on;
+	u8 sva_policy;
+};
+
+/**
+ * enum ddr_opp - DDR OPP states definition
+ * @DDR_100_OPP: The new DDR operating point is ddr100opp
+ * @DDR_50_OPP: The new DDR operating point is ddr50opp
+ * @DDR_25_OPP: The new DDR operating point is ddr25opp
+ */
+enum ddr_opp {
+	DDR_100_OPP = 0x00,
+	DDR_50_OPP = 0x01,
+	DDR_25_OPP = 0x02,
+};
+
+/*
+ * Clock identifiers.
+ */
+enum prcmu_clock {
+	PRCMU_SGACLK,
+	PRCMU_UARTCLK,
+	PRCMU_MSP02CLK,
+	PRCMU_MSP1CLK,
+	PRCMU_I2CCLK,
+	PRCMU_SDMMCCLK,
+	PRCMU_SLIMCLK,
+	PRCMU_PER1CLK,
+	PRCMU_PER2CLK,
+	PRCMU_PER3CLK,
+	PRCMU_PER5CLK,
+	PRCMU_PER6CLK,
+	PRCMU_PER7CLK,
+	PRCMU_LCDCLK,
+	PRCMU_BMLCLK,
+	PRCMU_HSITXCLK,
+	PRCMU_HSIRXCLK,
+	PRCMU_HDMICLK,
+	PRCMU_APEATCLK,
+	PRCMU_APETRACECLK,
+	PRCMU_MCDECLK,
+	PRCMU_IPI2CCLK,
+	PRCMU_DSIALTCLK,
+	PRCMU_DMACLK,
+	PRCMU_B2R2CLK,
+	PRCMU_TVCLK,
+	PRCMU_SSPCLK,
+	PRCMU_RNGCLK,
+	PRCMU_UICCCLK,
+	PRCMU_NUM_REG_CLOCKS,
+	PRCMU_SYSCLK = PRCMU_NUM_REG_CLOCKS,
+	PRCMU_TIMCLK,
+};
+
+/*
+ * Definitions for controlling ESRAM0 in deep sleep.
+ */
+#define ESRAM0_DEEP_SLEEP_STATE_OFF 1
+#define ESRAM0_DEEP_SLEEP_STATE_RET 2
+
+#ifdef CONFIG_MFD_DB8500_PRCMU
 void __init prcmu_early_init(void);
+int prcmu_set_display_clocks(void);
+int prcmu_disable_dsipll(void);
+int prcmu_enable_dsipll(void);
+#else
+static inline void __init prcmu_early_init(void) {}
+#endif
+
+#ifdef CONFIG_MFD_DB8500_PRCMU
+
+int prcmu_set_rc_a2p(enum romcode_write);
+enum romcode_read prcmu_get_rc_p2a(void);
+enum ap_pwrst prcmu_get_xp70_current_state(void);
+int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll);
+
+void prcmu_enable_wakeups(u32 wakeups);
+static inline void prcmu_disable_wakeups(void)
+{
+	prcmu_enable_wakeups(0);
+}
+
+void prcmu_config_abb_event_readout(u32 abb_events);
+void prcmu_get_abb_event_buffer(void __iomem **buf);
+int prcmu_set_arm_opp(u8 opp);
+int prcmu_get_arm_opp(void);
+bool prcmu_has_arm_maxopp(void);
+bool prcmu_is_u8400(void);
+int prcmu_set_ape_opp(u8 opp);
+int prcmu_get_ape_opp(void);
+int prcmu_request_ape_opp_100_voltage(bool enable);
+int prcmu_release_usb_wakeup_state(void);
+int prcmu_set_ddr_opp(u8 opp);
+int prcmu_get_ddr_opp(void);
+unsigned long prcmu_qos_get_cpufreq_opp_delay(void);
+void prcmu_qos_set_cpufreq_opp_delay(unsigned long);
+/* NOTE! Use regulator framework instead */
+int prcmu_set_hwacc(u16 hw_acc_dev, u8 state);
+int prcmu_set_epod(u16 epod_id, u8 epod_state);
+void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle);
+bool prcmu_is_auto_pm_enabled(void);
+
+int prcmu_config_clkout(u8 clkout, u8 source, u8 div);
+int prcmu_request_clock(u8 clock, bool enable);
+int prcmu_set_clock_divider(u8 clock, u8 divider);
+int prcmu_config_esram0_deep_sleep(u8 state);
+int prcmu_config_hotdog(u8 threshold);
+int prcmu_config_hotmon(u8 low, u8 high);
+int prcmu_start_temp_sense(u16 cycles32k);
+int prcmu_stop_temp_sense(void);
 int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
 int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
-int prcmu_set_ape_opp(enum prcmu_ape_opp opp);
-int prcmu_set_cpu_opp(enum prcmu_cpu_opp opp);
-int prcmu_set_ape_cpu_opps(enum prcmu_ape_opp ape_opp,
-			   enum prcmu_cpu_opp cpu_opp);
-enum prcmu_ape_opp prcmu_get_ape_opp(void);
-int prcmu_get_cpu_opp(void);
-bool prcmu_has_arm_maxopp(void);
 
-#endif /* __MACH_PRCMU_H */
+void prcmu_ac_wake_req(void);
+void prcmu_ac_sleep_req(void);
+void prcmu_system_reset(u16 reset_code);
+void prcmu_modem_reset(void);
+bool prcmu_is_ac_wake_requested(void);
+void prcmu_enable_spi2(void);
+void prcmu_disable_spi2(void);
+
+#else /* !CONFIG_MFD_DB8500_PRCMU */
+
+static inline int prcmu_set_rc_a2p(enum romcode_write code)
+{
+	return 0;
+}
+
+static inline enum romcode_read prcmu_get_rc_p2a(void)
+{
+	return INIT;
+}
+
+static inline enum ap_pwrst prcmu_get_xp70_current_state(void)
+{
+	return AP_EXECUTE;
+}
+
+static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk,
+	bool keep_ap_pll)
+{
+	return 0;
+}
+
+static inline void prcmu_enable_wakeups(u32 wakeups) {}
+
+static inline void prcmu_disable_wakeups(void) {}
+
+static inline void prcmu_config_abb_event_readout(u32 abb_events) {}
+
+static inline int prcmu_set_arm_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_arm_opp(void)
+{
+	return ARM_100_OPP;
+}
+
+static bool prcmu_has_arm_maxopp(void)
+{
+	return false;
+}
+
+static bool prcmu_is_u8400(void)
+{
+	return false;
+}
+
+static inline int prcmu_set_ape_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_ape_opp(void)
+{
+	return APE_100_OPP;
+}
+
+static inline int prcmu_request_ape_opp_100_voltage(bool enable)
+{
+	return 0;
+}
+
+static inline int prcmu_release_usb_wakeup_state(void)
+{
+	return 0;
+}
+
+static inline int prcmu_set_ddr_opp(u8 opp)
+{
+	return 0;
+}
+
+static inline int prcmu_get_ddr_opp(void)
+{
+	return DDR_100_OPP;
+}
+
+static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
+{
+	return 0;
+}
+
+static inline void prcmu_qos_set_cpufreq_opp_delay(unsigned long n) {}
+
+static inline int prcmu_set_hwacc(u16 hw_acc_dev, u8 state)
+{
+	return 0;
+}
+
+static inline void prcmu_configure_auto_pm(struct prcmu_auto_pm_config *sleep,
+	struct prcmu_auto_pm_config *idle)
+{
+}
+
+static inline bool prcmu_is_auto_pm_enabled(void)
+{
+	return false;
+}
+
+static inline int prcmu_config_clkout(u8 clkout, u8 source, u8 div)
+{
+	return 0;
+}
+
+static inline int prcmu_request_clock(u8 clock, bool enable)
+{
+	return 0;
+}
+
+static inline int prcmu_set_clock_divider(u8 clock, u8 divider)
+{
+	return 0;
+}
+
+int prcmu_config_esram0_deep_sleep(u8 state)
+{
+	return 0;
+}
+
+static inline int prcmu_config_hotdog(u8 threshold)
+{
+	return 0;
+}
+
+static inline int prcmu_config_hotmon(u8 low, u8 high)
+{
+	return 0;
+}
+
+static inline int prcmu_start_temp_sense(u16 cycles32k)
+{
+	return 0;
+}
+
+static inline int prcmu_stop_temp_sense(void)
+{
+	return 0;
+}
+
+static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline int prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline void prcmu_ac_wake_req(void) {}
+
+static inline void prcmu_ac_sleep_req(void) {}
+
+static inline void prcmu_system_reset(u16 reset_code) {}
+
+static inline void prcmu_modem_reset(void) {}
+
+static inline bool prcmu_is_ac_wake_requested(void)
+{
+	return false;
+}
+
+#ifndef CONFIG_UX500_SOC_DB5500
+static inline int prcmu_set_display_clocks(void)
+{
+	return 0;
+}
+
+static inline int prcmu_disable_dsipll(void)
+{
+	return 0;
+}
+
+static inline int prcmu_enable_dsipll(void)
+{
+	return 0;
+}
+#endif
+
+static inline int prcmu_enable_spi2(void)
+{
+	return 0;
+}
+
+static inline int prcmu_disable_spi2(void)
+{
+	return 0;
+}
+
+#endif /* !CONFIG_MFD_DB8500_PRCMU */
+
+#ifdef CONFIG_UX500_PRCMU_QOS_POWER
+int prcmu_qos_requirement(int pm_qos_class);
+int prcmu_qos_add_requirement(int pm_qos_class, char *name, s32 value);
+int prcmu_qos_update_requirement(int pm_qos_class, char *name, s32 new_value);
+void prcmu_qos_remove_requirement(int pm_qos_class, char *name);
+int prcmu_qos_add_notifier(int prcmu_qos_class,
+			   struct notifier_block *notifier);
+int prcmu_qos_remove_notifier(int prcmu_qos_class,
+			      struct notifier_block *notifier);
+#else
+static inline int prcmu_qos_requirement(int prcmu_qos_class)
+{
+	return 0;
+}
+
+static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
+					    char *name, s32 value)
+{
+	return 0;
+}
+
+static inline int prcmu_qos_update_requirement(int prcmu_qos_class,
+					       char *name, s32 new_value)
+{
+	return 0;
+}
+
+static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
+{
+}
+
+static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
+					 struct notifier_block *notifier)
+{
+	return 0;
+}
+static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
+					    struct notifier_block *notifier)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* __MFD_DB8500_PRCMU_H */
-- 
cgit v1.2.3


From 8317797ca657081ed81312ea3501f3a3d59d52e9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 3 May 2011 18:14:48 +0200
Subject: mfd: add DB5500 PRCMU driver

This adds the DB5500 PRCMU driver. Right now this one is pretty
restricted in functionality, exposing a simple interface to send
I2C messages.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig      |   1 +
 arch/arm/mach-ux500/cpu.c        |   3 +
 drivers/mfd/Kconfig              |  10 +
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/db5500-prcmu-regs.h  | 115 ++++++++++
 drivers/mfd/db5500-prcmu.c       | 448 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/db5500-prcmu.h |  45 ++++
 7 files changed, 623 insertions(+)
 create mode 100644 drivers/mfd/db5500-prcmu-regs.h
 create mode 100644 drivers/mfd/db5500-prcmu.c
 create mode 100644 include/linux/mfd/db5500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 8071d2746f70..365c4c35faa7 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -12,6 +12,7 @@ menu "Ux500 SoC"
 
 config UX500_SOC_DB5500
 	bool "DB5500"
+	select MFD_DB5500_PRCMU
 
 config UX500_SOC_DB8500
 	bool "DB8500"
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 11360f734cec..1da23bb87c16 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -9,6 +9,7 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <linux/mfd/db5500-prcmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -49,6 +50,8 @@ void __init ux500_init_irq(void)
 	 * Init clocks here so that they are available for system timer
 	 * initialization.
 	 */
+	if (cpu_is_u5500())
+		db5500_prcmu_early_init();
 	if (cpu_is_u8500())
 		prcmu_early_init();
 	clk_init();
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 7eaeb9750793..481770ab2716 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -585,6 +585,16 @@ config MFD_DB8500_PRCMU
 	  system controller running an XP70 microprocessor, which is accessed
 	  through a register map.
 
+config MFD_DB5500_PRCMU
+	bool "ST-Ericsson DB5500 Power Reset Control Management Unit"
+	depends on UX500_SOC_DB5500
+	select MFD_CORE
+	help
+	  Select this option to enable support for the DB5500 Power Reset
+	  and Control Management Unit. This is basically an autonomous
+	  system controller running an XP70 microprocessor, which is accessed
+	  through a register map.
+
 config MFD_CS5535
 	tristate "Support for CS5535 and CS5536 southbridge core functions"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 814c57a692a9..24aa44448daf 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_AB8500_GPADC)	+= ab8500-gpadc.o
 obj-$(CONFIG_MFD_DB8500_PRCMU)	+= db8500-prcmu.o
 # ab8500-i2c need to come after db8500-prcmu (which provides the channel)
 obj-$(CONFIG_AB8500_I2C_CORE)	+= ab8500-i2c.o
+obj-$(CONFIG_MFD_DB5500_PRCMU)	+= db5500-prcmu.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
 obj-$(CONFIG_LPC_SCH)		+= lpc_sch.o
diff --git a/drivers/mfd/db5500-prcmu-regs.h b/drivers/mfd/db5500-prcmu-regs.h
new file mode 100644
index 000000000000..9a8e9e4ddd33
--- /dev/null
+++ b/drivers/mfd/db5500-prcmu-regs.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) STMicroelectronics 2009
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * PRCM Unit registers
+ */
+
+#ifndef __MACH_PRCMU_REGS_H
+#define __MACH_PRCMU_REGS_H
+
+#include <mach/hardware.h>
+
+#define PRCM_ARM_PLLDIVPS	(_PRCMU_BASE + 0x118)
+#define PRCM_ARM_PLLDIVPS_ARM_BRM_RATE		0x3f
+#define PRCM_ARM_PLLDIVPS_MAX_MASK		0xf
+
+#define PRCM_PLLARM_LOCKP       (_PRCMU_BASE + 0x0a8)
+#define PRCM_PLLARM_LOCKP_PRCM_PLLARM_LOCKP3	0x2
+
+#define PRCM_ARM_CHGCLKREQ	(_PRCMU_BASE + 0x114)
+#define PRCM_ARM_CHGCLKREQ_PRCM_ARM_CHGCLKREQ	0x1
+
+#define PRCM_PLLARM_ENABLE	(_PRCMU_BASE + 0x98)
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_ENABLE	0x1
+#define PRCM_PLLARM_ENABLE_PRCM_PLLARM_COUNTON	0x100
+
+#define PRCM_ARMCLKFIX_MGT	(_PRCMU_BASE + 0x0)
+#define PRCM_A9_RESETN_CLR	(_PRCMU_BASE + 0x1f4)
+#define PRCM_A9_RESETN_SET	(_PRCMU_BASE + 0x1f0)
+#define PRCM_ARM_LS_CLAMP	(_PRCMU_BASE + 0x30c)
+#define PRCM_SRAM_A9		(_PRCMU_BASE + 0x308)
+
+/* ARM WFI Standby signal register */
+#define PRCM_ARM_WFI_STANDBY    (_PRCMU_BASE + 0x130)
+#define PRCM_IOCR		(_PRCMU_BASE + 0x310)
+#define PRCM_IOCR_IOFORCE			0x1
+
+/* CPU mailbox registers */
+#define PRCM_MBOX_CPU_VAL	(_PRCMU_BASE + 0x0fc)
+#define PRCM_MBOX_CPU_SET	(_PRCMU_BASE + 0x100)
+#define PRCM_MBOX_CPU_CLR	(_PRCMU_BASE + 0x104)
+
+/* Dual A9 core interrupt management unit registers */
+#define PRCM_A9_MASK_REQ	(_PRCMU_BASE + 0x328)
+#define PRCM_A9_MASK_REQ_PRCM_A9_MASK_REQ	0x1
+
+#define PRCM_A9_MASK_ACK	(_PRCMU_BASE + 0x32c)
+#define PRCM_ARMITMSK31TO0	(_PRCMU_BASE + 0x11c)
+#define PRCM_ARMITMSK63TO32	(_PRCMU_BASE + 0x120)
+#define PRCM_ARMITMSK95TO64	(_PRCMU_BASE + 0x124)
+#define PRCM_ARMITMSK127TO96	(_PRCMU_BASE + 0x128)
+#define PRCM_POWER_STATE_VAL	(_PRCMU_BASE + 0x25C)
+#define PRCM_ARMITVAL31TO0	(_PRCMU_BASE + 0x260)
+#define PRCM_ARMITVAL63TO32	(_PRCMU_BASE + 0x264)
+#define PRCM_ARMITVAL95TO64	(_PRCMU_BASE + 0x268)
+#define PRCM_ARMITVAL127TO96	(_PRCMU_BASE + 0x26C)
+
+#define PRCM_HOSTACCESS_REQ	(_PRCMU_BASE + 0x334)
+#define ARM_WAKEUP_MODEM	0x1
+
+#define PRCM_ARM_IT1_CLEAR	(_PRCMU_BASE + 0x48C)
+#define PRCM_ARM_IT1_VAL	(_PRCMU_BASE + 0x494)
+#define PRCM_HOLD_EVT		(_PRCMU_BASE + 0x174)
+
+#define PRCM_ITSTATUS0		(_PRCMU_BASE + 0x148)
+#define PRCM_ITSTATUS1		(_PRCMU_BASE + 0x150)
+#define PRCM_ITSTATUS2		(_PRCMU_BASE + 0x158)
+#define PRCM_ITSTATUS3		(_PRCMU_BASE + 0x160)
+#define PRCM_ITSTATUS4		(_PRCMU_BASE + 0x168)
+#define PRCM_ITSTATUS5		(_PRCMU_BASE + 0x484)
+#define PRCM_ITCLEAR5		(_PRCMU_BASE + 0x488)
+#define PRCM_ARMIT_MASKXP70_IT	(_PRCMU_BASE + 0x1018)
+
+/* System reset register */
+#define PRCM_APE_SOFTRST	(_PRCMU_BASE + 0x228)
+
+/* Level shifter and clamp control registers */
+#define PRCM_MMIP_LS_CLAMP_SET     (_PRCMU_BASE + 0x420)
+#define PRCM_MMIP_LS_CLAMP_CLR     (_PRCMU_BASE + 0x424)
+
+/* PRCMU clock/PLL/reset registers */
+#define PRCM_PLLDSI_FREQ           (_PRCMU_BASE + 0x500)
+#define PRCM_PLLDSI_ENABLE         (_PRCMU_BASE + 0x504)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_LCDCLK_MGT            (_PRCMU_BASE + 0x044)
+#define PRCM_MCDECLK_MGT           (_PRCMU_BASE + 0x064)
+#define PRCM_HDMICLK_MGT           (_PRCMU_BASE + 0x058)
+#define PRCM_TVCLK_MGT             (_PRCMU_BASE + 0x07c)
+#define PRCM_DSI_PLLOUT_SEL        (_PRCMU_BASE + 0x530)
+#define PRCM_DSITVCLK_DIV          (_PRCMU_BASE + 0x52C)
+#define PRCM_PLLDSI_LOCKP          (_PRCMU_BASE + 0x508)
+#define PRCM_APE_RESETN_SET        (_PRCMU_BASE + 0x1E4)
+#define PRCM_APE_RESETN_CLR        (_PRCMU_BASE + 0x1E8)
+#define PRCM_CLKOCR		   (_PRCMU_BASE + 0x1CC)
+
+/* ePOD and memory power signal control registers */
+#define PRCM_EPOD_C_SET            (_PRCMU_BASE + 0x410)
+#define PRCM_SRAM_LS_SLEEP         (_PRCMU_BASE + 0x304)
+
+/* Debug power control unit registers */
+#define PRCM_POWER_STATE_SET       (_PRCMU_BASE + 0x254)
+
+/* Miscellaneous unit registers */
+#define PRCM_DSI_SW_RESET          (_PRCMU_BASE + 0x324)
+#define PRCM_GPIOCR                (_PRCMU_BASE + 0x138)
+#define PRCM_GPIOCR_DBG_STM_MOD_CMD1            0x800
+#define PRCM_GPIOCR_DBG_UARTMOD_CMD0            0x1
+
+
+#endif /* __MACH_PRCMU__REGS_H */
diff --git a/drivers/mfd/db5500-prcmu.c b/drivers/mfd/db5500-prcmu.c
new file mode 100644
index 000000000000..9dbb3cab4a6f
--- /dev/null
+++ b/drivers/mfd/db5500-prcmu.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
+ *
+ * U5500 PRCM Unit interface driver
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/irq.h>
+#include <linux/jiffies.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/db5500-prcmu.h>
+#include <mach/hardware.h>
+#include <mach/irqs.h>
+#include <mach/db5500-regs.h>
+#include "db5500-prcmu-regs.h"
+
+#define _PRCM_MB_HEADER (tcdm_base + 0xFE8)
+#define PRCM_REQ_MB0_HEADER (_PRCM_MB_HEADER + 0x0)
+#define PRCM_REQ_MB1_HEADER (_PRCM_MB_HEADER + 0x1)
+#define PRCM_REQ_MB2_HEADER (_PRCM_MB_HEADER + 0x2)
+#define PRCM_REQ_MB3_HEADER (_PRCM_MB_HEADER + 0x3)
+#define PRCM_REQ_MB4_HEADER (_PRCM_MB_HEADER + 0x4)
+#define PRCM_REQ_MB5_HEADER (_PRCM_MB_HEADER + 0x5)
+#define PRCM_REQ_MB6_HEADER (_PRCM_MB_HEADER + 0x6)
+#define PRCM_REQ_MB7_HEADER (_PRCM_MB_HEADER + 0x7)
+#define PRCM_ACK_MB0_HEADER (_PRCM_MB_HEADER + 0x8)
+#define PRCM_ACK_MB1_HEADER (_PRCM_MB_HEADER + 0x9)
+#define PRCM_ACK_MB2_HEADER (_PRCM_MB_HEADER + 0xa)
+#define PRCM_ACK_MB3_HEADER (_PRCM_MB_HEADER + 0xb)
+#define PRCM_ACK_MB4_HEADER (_PRCM_MB_HEADER + 0xc)
+#define PRCM_ACK_MB5_HEADER (_PRCM_MB_HEADER + 0xd)
+#define PRCM_ACK_MB6_HEADER (_PRCM_MB_HEADER + 0xe)
+#define PRCM_ACK_MB7_HEADER (_PRCM_MB_HEADER + 0xf)
+
+/* Req Mailboxes */
+#define PRCM_REQ_MB0 (tcdm_base + 0xFD8)
+#define PRCM_REQ_MB1 (tcdm_base + 0xFCC)
+#define PRCM_REQ_MB2 (tcdm_base + 0xFC4)
+#define PRCM_REQ_MB3 (tcdm_base + 0xFC0)
+#define PRCM_REQ_MB4 (tcdm_base + 0xF98)
+#define PRCM_REQ_MB5 (tcdm_base + 0xF90)
+#define PRCM_REQ_MB6 (tcdm_base + 0xF8C)
+#define PRCM_REQ_MB7 (tcdm_base + 0xF84)
+
+/* Ack Mailboxes */
+#define PRCM_ACK_MB0 (tcdm_base + 0xF38)
+#define PRCM_ACK_MB1 (tcdm_base + 0xF30)
+#define PRCM_ACK_MB2 (tcdm_base + 0xF24)
+#define PRCM_ACK_MB3 (tcdm_base + 0xF20)
+#define PRCM_ACK_MB4 (tcdm_base + 0xF1C)
+#define PRCM_ACK_MB5 (tcdm_base + 0xF14)
+#define PRCM_ACK_MB6 (tcdm_base + 0xF0C)
+#define PRCM_ACK_MB7 (tcdm_base + 0xF08)
+
+enum mb_return_code {
+	RC_SUCCESS,
+	RC_FAIL,
+};
+
+/* Mailbox 0 headers. */
+enum mb0_header {
+	/* request */
+	RMB0H_PWR_STATE_TRANS = 1,
+	RMB0H_WAKE_UP_CFG,
+	RMB0H_RD_WAKE_UP_ACK,
+	/* acknowledge */
+	AMB0H_WAKE_UP = 1,
+};
+
+/* Mailbox 5 headers. */
+enum mb5_header {
+	MB5H_I2C_WRITE = 1,
+	MB5H_I2C_READ,
+};
+
+/* Request mailbox 5 fields. */
+#define PRCM_REQ_MB5_I2C_SLAVE (PRCM_REQ_MB5 + 0)
+#define PRCM_REQ_MB5_I2C_REG (PRCM_REQ_MB5 + 1)
+#define PRCM_REQ_MB5_I2C_SIZE (PRCM_REQ_MB5 + 2)
+#define PRCM_REQ_MB5_I2C_DATA (PRCM_REQ_MB5 + 4)
+
+/* Acknowledge mailbox 5 fields. */
+#define PRCM_ACK_MB5_RETURN_CODE (PRCM_ACK_MB5 + 0)
+#define PRCM_ACK_MB5_I2C_DATA (PRCM_ACK_MB5 + 4)
+
+#define NUM_MB 8
+#define MBOX_BIT BIT
+#define ALL_MBOX_BITS (MBOX_BIT(NUM_MB) - 1)
+
+/*
+* Used by MCDE to setup all necessary PRCMU registers
+*/
+#define PRCMU_RESET_DSIPLL			0x00004000
+#define PRCMU_UNCLAMP_DSIPLL			0x00400800
+
+/* HDMI CLK MGT PLLSW=001 (PLLSOC0), PLLDIV=0x8, = 50 Mhz*/
+#define PRCMU_DSI_CLOCK_SETTING			0x00000128
+/* TVCLK_MGT PLLSW=001 (PLLSOC0) PLLDIV=0x13, = 19.05 MHZ */
+#define PRCMU_DSI_LP_CLOCK_SETTING		0x00000135
+#define PRCMU_PLLDSI_FREQ_SETTING		0x0004013C
+#define PRCMU_DSI_PLLOUT_SEL_SETTING		0x00000002
+#define PRCMU_ENABLE_ESCAPE_CLOCK_DIV		0x03000101
+#define PRCMU_DISABLE_ESCAPE_CLOCK_DIV		0x00000101
+
+#define PRCMU_ENABLE_PLLDSI			0x00000001
+#define PRCMU_DISABLE_PLLDSI			0x00000000
+
+#define PRCMU_DSI_RESET_SW			0x00000003
+
+#define PRCMU_PLLDSI_LOCKP_LOCKED		0x3
+
+/*
+ * mb0_transfer - state needed for mailbox 0 communication.
+ * @lock:		The transaction lock.
+ */
+static struct {
+	spinlock_t lock;
+} mb0_transfer;
+
+/*
+ * mb5_transfer - state needed for mailbox 5 communication.
+ * @lock:	The transaction lock.
+ * @work:	The transaction completion structure.
+ * @ack:	Reply ("acknowledge") data.
+ */
+static struct {
+	struct mutex lock;
+	struct completion work;
+	struct {
+		u8 header;
+		u8 status;
+		u8 value[4];
+	} ack;
+} mb5_transfer;
+
+/* PRCMU TCDM base IO address. */
+static __iomem void *tcdm_base;
+
+/**
+ * db5500_prcmu_abb_read() - Read register value(s) from the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The read out value(s).
+ * @size:	The number of registers to read.
+ *
+ * Reads register value(s) from the ABB.
+ * @size has to be <= 4.
+ */
+int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if ((size < 1) || (4 < size))
+		return -EINVAL;
+
+	mutex_lock(&mb5_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+	writeb(slave, PRCM_REQ_MB5_I2C_SLAVE);
+	writeb(reg, PRCM_REQ_MB5_I2C_REG);
+	writeb(size, PRCM_REQ_MB5_I2C_SIZE);
+	writeb(MB5H_I2C_READ, PRCM_REQ_MB5_HEADER);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb5_transfer.work);
+
+	r = 0;
+	if ((mb5_transfer.ack.header == MB5H_I2C_READ) &&
+		(mb5_transfer.ack.status == RC_SUCCESS))
+		memcpy(value, mb5_transfer.ack.value, (size_t)size);
+	else
+		r = -EIO;
+
+	mutex_unlock(&mb5_transfer.lock);
+
+	return r;
+}
+
+/**
+ * db5500_prcmu_abb_write() - Write register value(s) to the ABB.
+ * @slave:	The I2C slave address.
+ * @reg:	The (start) register address.
+ * @value:	The value(s) to write.
+ * @size:	The number of registers to write.
+ *
+ * Writes register value(s) to the ABB.
+ * @size has to be <= 4.
+ */
+int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	int r;
+
+	if ((size < 1) || (4 < size))
+		return -EINVAL;
+
+	mutex_lock(&mb5_transfer.lock);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(5))
+		cpu_relax();
+	writeb(slave, PRCM_REQ_MB5_I2C_SLAVE);
+	writeb(reg, PRCM_REQ_MB5_I2C_REG);
+	writeb(size, PRCM_REQ_MB5_I2C_SIZE);
+	memcpy_toio(PRCM_REQ_MB5_I2C_DATA, value, size);
+	writeb(MB5H_I2C_WRITE, PRCM_REQ_MB5_HEADER);
+
+	writel(MBOX_BIT(5), PRCM_MBOX_CPU_SET);
+	wait_for_completion(&mb5_transfer.work);
+
+	if ((mb5_transfer.ack.header == MB5H_I2C_WRITE) &&
+		(mb5_transfer.ack.status == RC_SUCCESS))
+		r = 0;
+	else
+		r = -EIO;
+
+	mutex_unlock(&mb5_transfer.lock);
+
+	return r;
+}
+
+int db5500_prcmu_enable_dsipll(void)
+{
+	int i;
+
+	/* Enable DSIPLL_RESETN resets */
+	writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_CLR);
+	/* Unclamp DSIPLL in/out */
+	writel(PRCMU_UNCLAMP_DSIPLL, PRCM_MMIP_LS_CLAMP_CLR);
+	/* Set DSI PLL FREQ */
+	writel(PRCMU_PLLDSI_FREQ_SETTING, PRCM_PLLDSI_FREQ);
+	writel(PRCMU_DSI_PLLOUT_SEL_SETTING,
+		PRCM_DSI_PLLOUT_SEL);
+	/* Enable Escape clocks */
+	writel(PRCMU_ENABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
+
+	/* Start DSI PLL */
+	writel(PRCMU_ENABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
+	/* Reset DSI PLL */
+	writel(PRCMU_DSI_RESET_SW, PRCM_DSI_SW_RESET);
+	for (i = 0; i < 10; i++) {
+		if ((readl(PRCM_PLLDSI_LOCKP) &
+			PRCMU_PLLDSI_LOCKP_LOCKED) == PRCMU_PLLDSI_LOCKP_LOCKED)
+			break;
+		udelay(100);
+	}
+	/* Release DSIPLL_RESETN */
+	writel(PRCMU_RESET_DSIPLL, PRCM_APE_RESETN_SET);
+	return 0;
+}
+
+int db5500_prcmu_disable_dsipll(void)
+{
+	/* Disable dsi pll */
+	writel(PRCMU_DISABLE_PLLDSI, PRCM_PLLDSI_ENABLE);
+	/* Disable  escapeclock */
+	writel(PRCMU_DISABLE_ESCAPE_CLOCK_DIV, PRCM_DSITVCLK_DIV);
+	return 0;
+}
+
+int db5500_prcmu_set_display_clocks(void)
+{
+	/* HDMI and TVCLK Should be handled somewhere else */
+	/* PLLDIV=8, PLLSW=2, CLKEN=1 */
+	writel(PRCMU_DSI_CLOCK_SETTING, PRCM_HDMICLK_MGT);
+	/* PLLDIV=14, PLLSW=2, CLKEN=1 */
+	writel(PRCMU_DSI_LP_CLOCK_SETTING, PRCM_TVCLK_MGT);
+	return 0;
+}
+
+static void ack_dbb_wakeup(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mb0_transfer.lock, flags);
+
+	while (readl(PRCM_MBOX_CPU_VAL) & MBOX_BIT(0))
+		cpu_relax();
+
+	writeb(RMB0H_RD_WAKE_UP_ACK, PRCM_REQ_MB0_HEADER);
+	writel(MBOX_BIT(0), PRCM_MBOX_CPU_SET);
+
+	spin_unlock_irqrestore(&mb0_transfer.lock, flags);
+}
+
+static inline void print_unknown_header_warning(u8 n, u8 header)
+{
+	pr_warning("prcmu: Unknown message header (%d) in mailbox %d.\n",
+		header, n);
+}
+
+static bool read_mailbox_0(void)
+{
+	bool r;
+	u8 header;
+
+	header = readb(PRCM_ACK_MB0_HEADER);
+	switch (header) {
+	case AMB0H_WAKE_UP:
+		r = true;
+		break;
+	default:
+		print_unknown_header_warning(0, header);
+		r = false;
+		break;
+	}
+	writel(MBOX_BIT(0), PRCM_ARM_IT1_CLEAR);
+	return r;
+}
+
+static bool read_mailbox_1(void)
+{
+	writel(MBOX_BIT(1), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_2(void)
+{
+	writel(MBOX_BIT(2), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_3(void)
+{
+	writel(MBOX_BIT(3), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_4(void)
+{
+	writel(MBOX_BIT(4), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_5(void)
+{
+	u8 header;
+
+	header = readb(PRCM_ACK_MB5_HEADER);
+	switch (header) {
+	case MB5H_I2C_READ:
+		memcpy_fromio(mb5_transfer.ack.value, PRCM_ACK_MB5_I2C_DATA, 4);
+	case MB5H_I2C_WRITE:
+		mb5_transfer.ack.header = header;
+		mb5_transfer.ack.status = readb(PRCM_ACK_MB5_RETURN_CODE);
+		complete(&mb5_transfer.work);
+		break;
+	default:
+		print_unknown_header_warning(5, header);
+		break;
+	}
+	writel(MBOX_BIT(5), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_6(void)
+{
+	writel(MBOX_BIT(6), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool read_mailbox_7(void)
+{
+	writel(MBOX_BIT(7), PRCM_ARM_IT1_CLEAR);
+	return false;
+}
+
+static bool (* const read_mailbox[NUM_MB])(void) = {
+	read_mailbox_0,
+	read_mailbox_1,
+	read_mailbox_2,
+	read_mailbox_3,
+	read_mailbox_4,
+	read_mailbox_5,
+	read_mailbox_6,
+	read_mailbox_7
+};
+
+static irqreturn_t prcmu_irq_handler(int irq, void *data)
+{
+	u32 bits;
+	u8 n;
+	irqreturn_t r;
+
+	bits = (readl(PRCM_ARM_IT1_VAL) & ALL_MBOX_BITS);
+	if (unlikely(!bits))
+		return IRQ_NONE;
+
+	r = IRQ_HANDLED;
+	for (n = 0; bits; n++) {
+		if (bits & MBOX_BIT(n)) {
+			bits -= MBOX_BIT(n);
+			if (read_mailbox[n]())
+				r = IRQ_WAKE_THREAD;
+		}
+	}
+	return r;
+}
+
+static irqreturn_t prcmu_irq_thread_fn(int irq, void *data)
+{
+	ack_dbb_wakeup();
+	return IRQ_HANDLED;
+}
+
+void __init db5500_prcmu_early_init(void)
+{
+	tcdm_base = __io_address(U5500_PRCMU_TCDM_BASE);
+	spin_lock_init(&mb0_transfer.lock);
+	mutex_init(&mb5_transfer.lock);
+	init_completion(&mb5_transfer.work);
+}
+
+/**
+ * prcmu_fw_init - arch init call for the Linux PRCMU fw init logic
+ *
+ */
+int __init db5500_prcmu_init(void)
+{
+	int r = 0;
+
+	if (ux500_is_svp() || !cpu_is_u5500())
+		return -ENODEV;
+
+	/* Clean up the mailbox interrupts after pre-kernel code. */
+	writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLEAR);
+
+	r = request_threaded_irq(IRQ_DB5500_PRCMU1, prcmu_irq_handler,
+		prcmu_irq_thread_fn, 0, "prcmu", NULL);
+	if (r < 0) {
+		pr_err("prcmu: Failed to allocate IRQ_DB5500_PRCMU1.\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+arch_initcall(db5500_prcmu_init);
diff --git a/include/linux/mfd/db5500-prcmu.h b/include/linux/mfd/db5500-prcmu.h
new file mode 100644
index 000000000000..f0977986402c
--- /dev/null
+++ b/include/linux/mfd/db5500-prcmu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * U5500 PRCMU API.
+ */
+#ifndef __MACH_PRCMU_U5500_H
+#define __MACH_PRCMU_U5500_H
+
+#ifdef CONFIG_UX500_SOC_DB5500
+
+void db5500_prcmu_early_init(void);
+
+int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
+int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size);
+
+#else /* !CONFIG_UX500_SOC_DB5500 */
+
+static inline void db5500_prcmu_early_init(void)
+{
+}
+
+static inline int db5500_prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+static inline int db5500_prcmu_abb_write(u8 slave, u8 reg, u8 *value, u8 size)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_UX500_SOC_DB5500 */
+
+static inline int db5500_prcmu_config_abb_event_readout(u32 abb_events)
+{
+#ifdef CONFIG_MACH_U5500_SIMULATOR
+	return 0;
+#else
+	return -1;
+#endif
+}
+
+#endif /* __MACH_PRCMU_U5500_H */
-- 
cgit v1.2.3


From 1032fbfd792f2b384ac16a63993b8fae5eea9083 Mon Sep 17 00:00:00 2001
From: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Date: Fri, 1 Apr 2011 14:43:33 +0200
Subject: mach-ux500: voltage domain regulators for DB8500

The DB8500 has ePOD:s (electronic power domains) which are possible
to switch on/off to deactivate silicon blocks on the DB8500 SoC
by cutting their power without retention. We model these as simple
regulators with one bit on/off settings.

Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Signed-off-by: Jonas Aberg <jonas.aberg@stericsson.com>
Signed-off-by: Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>
Signed-off-by: Martin Persson <martin.persson@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig            |   1 +
 drivers/mfd/db8500-prcmu.c             | 179 +++++++++++
 drivers/regulator/Kconfig              |   7 +
 drivers/regulator/Makefile             |   1 +
 drivers/regulator/db8500-prcmu.c       | 558 +++++++++++++++++++++++++++++++++
 include/linux/regulator/db8500-prcmu.h |  45 +++
 6 files changed, 791 insertions(+)
 create mode 100644 drivers/regulator/db8500-prcmu.c
 create mode 100644 include/linux/regulator/db8500-prcmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 365c4c35faa7..54429d015954 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -17,6 +17,7 @@ config UX500_SOC_DB5500
 config UX500_SOC_DB8500
 	bool "DB8500"
 	select MFD_DB8500_PRCMU
+	select REGULATOR_DB8500_PRCMU
 
 endmenu
 
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index c44725bd8b9a..e63782107e2f 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -28,6 +28,8 @@
 #include <linux/uaccess.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/db8500-prcmu.h>
+#include <linux/regulator/db8500-prcmu.h>
+#include <linux/regulator/machine.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db8500-regs.h>
@@ -1824,9 +1826,186 @@ void __init prcmu_early_init(void)
 	}
 }
 
+/*
+ * Power domain switches (ePODs) modeled as regulators for the DB8500 SoC
+ */
+static struct regulator_consumer_supply db8500_vape_consumers[] = {
+	REGULATOR_SUPPLY("v-ape", NULL),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.0"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.1"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.2"),
+	REGULATOR_SUPPLY("v-i2c", "nmk-i2c.3"),
+	/* "v-mmc" changed to "vcore" in the mainline kernel */
+	REGULATOR_SUPPLY("vcore", "sdi0"),
+	REGULATOR_SUPPLY("vcore", "sdi1"),
+	REGULATOR_SUPPLY("vcore", "sdi2"),
+	REGULATOR_SUPPLY("vcore", "sdi3"),
+	REGULATOR_SUPPLY("vcore", "sdi4"),
+	REGULATOR_SUPPLY("v-dma", "dma40.0"),
+	REGULATOR_SUPPLY("v-ape", "ab8500-usb.0"),
+	/* "v-uart" changed to "vcore" in the mainline kernel */
+	REGULATOR_SUPPLY("vcore", "uart0"),
+	REGULATOR_SUPPLY("vcore", "uart1"),
+	REGULATOR_SUPPLY("vcore", "uart2"),
+	REGULATOR_SUPPLY("v-ape", "nmk-ske-keypad.0"),
+};
+
+static struct regulator_consumer_supply db8500_vsmps2_consumers[] = {
+	/* CG2900 and CW1200 power to off-chip peripherals */
+	REGULATOR_SUPPLY("gbf_1v8", "cg2900-uart.0"),
+	REGULATOR_SUPPLY("wlan_1v8", "cw1200.0"),
+	REGULATOR_SUPPLY("musb_1v8", "ab8500-usb.0"),
+	/* AV8100 regulator */
+	REGULATOR_SUPPLY("hdmi_1v8", "0-0070"),
+};
+
+static struct regulator_consumer_supply db8500_b2r2_mcde_consumers[] = {
+	REGULATOR_SUPPLY("vsupply", "b2r2.0"),
+	REGULATOR_SUPPLY("vsupply", "mcde.0"),
+};
+
+static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
+	[DB8500_REGULATOR_VAPE] = {
+		.constraints = {
+			.name = "db8500-vape",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_vape_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_vape_consumers),
+	},
+	[DB8500_REGULATOR_VARM] = {
+		.constraints = {
+			.name = "db8500-varm",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VMODEM] = {
+		.constraints = {
+			.name = "db8500-vmodem",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VPLL] = {
+		.constraints = {
+			.name = "db8500-vpll",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS1] = {
+		.constraints = {
+			.name = "db8500-vsmps1",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS2] = {
+		.constraints = {
+			.name = "db8500-vsmps2",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_vsmps2_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_vsmps2_consumers),
+	},
+	[DB8500_REGULATOR_VSMPS3] = {
+		.constraints = {
+			.name = "db8500-vsmps3",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_VRF1] = {
+		.constraints = {
+			.name = "db8500-vrf1",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSP] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sva-mmdsp",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = {
+		.constraints = {
+			/* "ret" means "retention" */
+			.name = "db8500-sva-mmdsp-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAPIPE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sva-pipe",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSP] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sia-mmdsp",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = {
+		.constraints = {
+			.name = "db8500-sia-mmdsp-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SIAPIPE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sia-pipe",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SGA] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-sga",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_B2R2_MCDE] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-b2r2-mcde",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+		.consumer_supplies = db8500_b2r2_mcde_consumers,
+		.num_consumer_supplies = ARRAY_SIZE(db8500_b2r2_mcde_consumers),
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-esram12",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12RET] = {
+		.constraints = {
+			.name = "db8500-esram12-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34] = {
+		.supply_regulator = "db8500-vape",
+		.constraints = {
+			.name = "db8500-esram34",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34RET] = {
+		.constraints = {
+			.name = "db8500-esram34-ret",
+			.valid_ops_mask = REGULATOR_CHANGE_STATUS,
+		},
+	},
+};
+
 static struct mfd_cell db8500_prcmu_devs[] = {
 	{
 		.name = "db8500-prcmu-regulators",
+		.mfd_data = &db8500_regulators,
 	},
 	{
 		.name = "cpufreq-u8500",
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index b9f29e0d4295..f0b13a0d1851 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -274,6 +274,13 @@ config REGULATOR_AB8500
 	  This driver supports the regulators found on the ST-Ericsson mixed
 	  signal AB8500 PMIC
 
+config REGULATOR_DB8500_PRCMU
+	bool "ST-Ericsson DB8500 Voltage Domain Regulators"
+	depends on MFD_DB8500_PRCMU
+	help
+	  This driver supports the voltage domain regulators controlled by the
+	  DB8500 PRCMU
+
 config REGULATOR_TPS6586X
 	tristate "TI TPS6586X Power regulators"
 	depends on MFD_TPS6586X
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index d72a42756778..165ff5371e9e 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_REGULATOR_TPS6524X) += tps6524x-regulator.o
 obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
 obj-$(CONFIG_REGULATOR_AB8500)	+= ab8500.o
+obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
 
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c
new file mode 100644
index 000000000000..1089a961616e
--- /dev/null
+++ b/drivers/regulator/db8500-prcmu.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Authors: Sundar Iyer <sundar.iyer@stericsson.com> for ST-Ericsson
+ *          Bengt Jonsson <bengt.g.jonsson@stericsson.com> for ST-Ericsson
+ *
+ * Power domain regulators on DB8500
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/db8500-prcmu.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/db8500-prcmu.h>
+
+/*
+ * power state reference count
+ */
+static int power_state_active_cnt; /* will initialize to zero */
+static DEFINE_SPINLOCK(power_state_active_lock);
+
+static void power_state_active_enable(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&power_state_active_lock, flags);
+	power_state_active_cnt++;
+	spin_unlock_irqrestore(&power_state_active_lock, flags);
+}
+
+static int power_state_active_disable(void)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&power_state_active_lock, flags);
+	if (power_state_active_cnt <= 0) {
+		pr_err("power state: unbalanced enable/disable calls\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	power_state_active_cnt--;
+out:
+	spin_unlock_irqrestore(&power_state_active_lock, flags);
+	return ret;
+}
+
+/*
+ * Exported interface for CPUIdle only. This function is called when interrupts
+ * are turned off. Hence, no locking.
+ */
+int power_state_active_is_enabled(void)
+{
+	return (power_state_active_cnt > 0);
+}
+
+/**
+ * struct db8500_regulator_info - db8500 regulator information
+ * @dev: device pointer
+ * @desc: regulator description
+ * @rdev: regulator device pointer
+ * @is_enabled: status of the regulator
+ * @epod_id: id for EPOD (power domain)
+ * @is_ramret: RAM retention switch for EPOD (power domain)
+ * @operating_point: operating point (only for vape, to be removed)
+ *
+ */
+struct db8500_regulator_info {
+	struct device *dev;
+	struct regulator_desc desc;
+	struct regulator_dev *rdev;
+	bool is_enabled;
+	u16 epod_id;
+	bool is_ramret;
+	bool exclude_from_power_state;
+	unsigned int operating_point;
+};
+
+static int db8500_regulator_enable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-enable\n",
+		info->desc.name);
+
+	info->is_enabled = true;
+	if (!info->exclude_from_power_state)
+		power_state_active_enable();
+
+	return 0;
+}
+
+static int db8500_regulator_disable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret = 0;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-disable\n",
+		info->desc.name);
+
+	info->is_enabled = false;
+	if (!info->exclude_from_power_state)
+		ret = power_state_active_disable();
+
+	return ret;
+}
+
+static int db8500_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-%s-is_enabled (is_enabled):"
+		" %i\n", info->desc.name, info->is_enabled);
+
+	return info->is_enabled;
+}
+
+/* db8500 regulator operations */
+static struct regulator_ops db8500_regulator_ops = {
+	.enable			= db8500_regulator_enable,
+	.disable		= db8500_regulator_disable,
+	.is_enabled		= db8500_regulator_is_enabled,
+};
+
+/*
+ * EPOD control
+ */
+static bool epod_on[NUM_EPOD_ID];
+static bool epod_ramret[NUM_EPOD_ID];
+
+static int enable_epod(u16 epod_id, bool ramret)
+{
+	int ret;
+
+	if (ramret) {
+		if (!epod_on[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET);
+			if (ret < 0)
+				return ret;
+		}
+		epod_ramret[epod_id] = true;
+	} else {
+		ret = prcmu_set_epod(epod_id, EPOD_STATE_ON);
+		if (ret < 0)
+			return ret;
+		epod_on[epod_id] = true;
+	}
+
+	return 0;
+}
+
+static int disable_epod(u16 epod_id, bool ramret)
+{
+	int ret;
+
+	if (ramret) {
+		if (!epod_on[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF);
+			if (ret < 0)
+				return ret;
+		}
+		epod_ramret[epod_id] = false;
+	} else {
+		if (epod_ramret[epod_id]) {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_RAMRET);
+			if (ret < 0)
+				return ret;
+		} else {
+			ret = prcmu_set_epod(epod_id, EPOD_STATE_OFF);
+			if (ret < 0)
+				return ret;
+		}
+		epod_on[epod_id] = false;
+	}
+
+	return 0;
+}
+
+/*
+ * Regulator switch
+ */
+static int db8500_regulator_switch_enable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-enable\n",
+		info->desc.name);
+
+	ret = enable_epod(info->epod_id, info->is_ramret);
+	if (ret < 0) {
+		dev_err(rdev_get_dev(rdev),
+			"regulator-switch-%s-enable: prcmu call failed\n",
+			info->desc.name);
+		goto out;
+	}
+
+	info->is_enabled = true;
+out:
+	return ret;
+}
+
+static int db8500_regulator_switch_disable(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev), "regulator-switch-%s-disable\n",
+		info->desc.name);
+
+	ret = disable_epod(info->epod_id, info->is_ramret);
+	if (ret < 0) {
+		dev_err(rdev_get_dev(rdev),
+			"regulator_switch-%s-disable: prcmu call failed\n",
+			info->desc.name);
+		goto out;
+	}
+
+	info->is_enabled = 0;
+out:
+	return ret;
+}
+
+static int db8500_regulator_switch_is_enabled(struct regulator_dev *rdev)
+{
+	struct db8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	if (info == NULL)
+		return -EINVAL;
+
+	dev_vdbg(rdev_get_dev(rdev),
+		"regulator-switch-%s-is_enabled (is_enabled): %i\n",
+		info->desc.name, info->is_enabled);
+
+	return info->is_enabled;
+}
+
+static struct regulator_ops db8500_regulator_switch_ops = {
+	.enable			= db8500_regulator_switch_enable,
+	.disable		= db8500_regulator_switch_disable,
+	.is_enabled		= db8500_regulator_switch_is_enabled,
+};
+
+/*
+ * Regulator information
+ */
+static struct db8500_regulator_info
+		db8500_regulator_info[DB8500_NUM_REGULATORS] = {
+	[DB8500_REGULATOR_VAPE] = {
+		.desc = {
+			.name	= "db8500-vape",
+			.id	= DB8500_REGULATOR_VAPE,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VARM] = {
+		.desc = {
+			.name	= "db8500-varm",
+			.id	= DB8500_REGULATOR_VARM,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VMODEM] = {
+		.desc = {
+			.name	= "db8500-vmodem",
+			.id	= DB8500_REGULATOR_VMODEM,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VPLL] = {
+		.desc = {
+			.name	= "db8500-vpll",
+			.id	= DB8500_REGULATOR_VPLL,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS1] = {
+		.desc = {
+			.name	= "db8500-vsmps1",
+			.id	= DB8500_REGULATOR_VSMPS1,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VSMPS2] = {
+		.desc = {
+			.name	= "db8500-vsmps2",
+			.id	= DB8500_REGULATOR_VSMPS2,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.exclude_from_power_state = true,
+	},
+	[DB8500_REGULATOR_VSMPS3] = {
+		.desc = {
+			.name	= "db8500-vsmps3",
+			.id	= DB8500_REGULATOR_VSMPS3,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_VRF1] = {
+		.desc = {
+			.name	= "db8500-vrf1",
+			.id	= DB8500_REGULATOR_VRF1,
+			.ops	= &db8500_regulator_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSP] = {
+		.desc = {
+			.name	= "db8500-sva-mmdsp",
+			.id	= DB8500_REGULATOR_SWITCH_SVAMMDSP,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAMMDSP,
+	},
+	[DB8500_REGULATOR_SWITCH_SVAMMDSPRET] = {
+		.desc = {
+			.name	= "db8500-sva-mmdsp-ret",
+			.id	= DB8500_REGULATOR_SWITCH_SVAMMDSPRET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAMMDSP,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_SVAPIPE] = {
+		.desc = {
+			.name	= "db8500-sva-pipe",
+			.id	= DB8500_REGULATOR_SWITCH_SVAPIPE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SVAPIPE,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSP] = {
+		.desc = {
+			.name	= "db8500-sia-mmdsp",
+			.id	= DB8500_REGULATOR_SWITCH_SIAMMDSP,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAMMDSP,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAMMDSPRET] = {
+		.desc = {
+			.name	= "db8500-sia-mmdsp-ret",
+			.id	= DB8500_REGULATOR_SWITCH_SIAMMDSPRET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAMMDSP,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_SIAPIPE] = {
+		.desc = {
+			.name	= "db8500-sia-pipe",
+			.id	= DB8500_REGULATOR_SWITCH_SIAPIPE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SIAPIPE,
+	},
+	[DB8500_REGULATOR_SWITCH_SGA] = {
+		.desc = {
+			.name	= "db8500-sga",
+			.id	= DB8500_REGULATOR_SWITCH_SGA,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_SGA,
+	},
+	[DB8500_REGULATOR_SWITCH_B2R2_MCDE] = {
+		.desc = {
+			.name	= "db8500-b2r2-mcde",
+			.id	= DB8500_REGULATOR_SWITCH_B2R2_MCDE,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_B2R2_MCDE,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12] = {
+		.desc = {
+			.name	= "db8500-esram12",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM12,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id	= EPOD_ID_ESRAM12,
+		.is_enabled	= true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM12RET] = {
+		.desc = {
+			.name	= "db8500-esram12-ret",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM12RET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_ESRAM12,
+		.is_ramret = true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34] = {
+		.desc = {
+			.name	= "db8500-esram34",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM34,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id	= EPOD_ID_ESRAM34,
+		.is_enabled	= true,
+	},
+	[DB8500_REGULATOR_SWITCH_ESRAM34RET] = {
+		.desc = {
+			.name	= "db8500-esram34-ret",
+			.id	= DB8500_REGULATOR_SWITCH_ESRAM34RET,
+			.ops	= &db8500_regulator_switch_ops,
+			.type	= REGULATOR_VOLTAGE,
+			.owner	= THIS_MODULE,
+		},
+		.epod_id = EPOD_ID_ESRAM34,
+		.is_ramret = true,
+	},
+};
+
+static int __devinit db8500_regulator_probe(struct platform_device *pdev)
+{
+	struct regulator_init_data *db8500_init_data = mfd_get_data(pdev);
+	int i, err;
+
+	/* register all regulators */
+	for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) {
+		struct db8500_regulator_info *info;
+		struct regulator_init_data *init_data = &db8500_init_data[i];
+
+		/* assign per-regulator data */
+		info = &db8500_regulator_info[i];
+		info->dev = &pdev->dev;
+
+		/* register with the regulator framework */
+		info->rdev = regulator_register(&info->desc, &pdev->dev,
+				init_data, info);
+		if (IS_ERR(info->rdev)) {
+			err = PTR_ERR(info->rdev);
+			dev_err(&pdev->dev, "failed to register %s: err %i\n",
+				info->desc.name, err);
+
+			/* if failing, unregister all earlier regulators */
+			i--;
+			while (i >= 0) {
+				info = &db8500_regulator_info[i];
+				regulator_unregister(info->rdev);
+				i--;
+			}
+			return err;
+		}
+
+		dev_dbg(rdev_get_dev(info->rdev),
+			"regulator-%s-probed\n", info->desc.name);
+	}
+
+	return 0;
+}
+
+static int __exit db8500_regulator_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(db8500_regulator_info); i++) {
+		struct db8500_regulator_info *info;
+		info = &db8500_regulator_info[i];
+
+		dev_vdbg(rdev_get_dev(info->rdev),
+			"regulator-%s-remove\n", info->desc.name);
+
+		regulator_unregister(info->rdev);
+	}
+
+	return 0;
+}
+
+static struct platform_driver db8500_regulator_driver = {
+	.driver = {
+		.name = "db8500-prcmu-regulators",
+		.owner = THIS_MODULE,
+	},
+	.probe = db8500_regulator_probe,
+	.remove = __exit_p(db8500_regulator_remove),
+};
+
+static int __init db8500_regulator_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&db8500_regulator_driver);
+	if (ret < 0)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit db8500_regulator_exit(void)
+{
+	platform_driver_unregister(&db8500_regulator_driver);
+}
+
+arch_initcall(db8500_regulator_init);
+module_exit(db8500_regulator_exit);
+
+MODULE_AUTHOR("STMicroelectronics/ST-Ericsson");
+MODULE_DESCRIPTION("DB8500 regulator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h
new file mode 100644
index 000000000000..612062313b68
--- /dev/null
+++ b/include/linux/regulator/db8500-prcmu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * Author: Bengt Jonsson <bengt.g.jonsson@stericsson.com> for ST-Ericsson
+ *
+ * Interface to power domain regulators on DB8500
+ */
+
+#ifndef __REGULATOR_H__
+#define __REGULATOR_H__
+
+/* Number of DB8500 regulators and regulator enumeration */
+enum db8500_regulator_id {
+	DB8500_REGULATOR_VAPE,
+	DB8500_REGULATOR_VARM,
+	DB8500_REGULATOR_VMODEM,
+	DB8500_REGULATOR_VPLL,
+	DB8500_REGULATOR_VSMPS1,
+	DB8500_REGULATOR_VSMPS2,
+	DB8500_REGULATOR_VSMPS3,
+	DB8500_REGULATOR_VRF1,
+	DB8500_REGULATOR_SWITCH_SVAMMDSP,
+	DB8500_REGULATOR_SWITCH_SVAMMDSPRET,
+	DB8500_REGULATOR_SWITCH_SVAPIPE,
+	DB8500_REGULATOR_SWITCH_SIAMMDSP,
+	DB8500_REGULATOR_SWITCH_SIAMMDSPRET,
+	DB8500_REGULATOR_SWITCH_SIAPIPE,
+	DB8500_REGULATOR_SWITCH_SGA,
+	DB8500_REGULATOR_SWITCH_B2R2_MCDE,
+	DB8500_REGULATOR_SWITCH_ESRAM12,
+	DB8500_REGULATOR_SWITCH_ESRAM12RET,
+	DB8500_REGULATOR_SWITCH_ESRAM34,
+	DB8500_REGULATOR_SWITCH_ESRAM34RET,
+	DB8500_NUM_REGULATORS
+};
+
+/*
+ * Exported interface for CPUIdle only. This function is called with all
+ * interrupts turned off.
+ */
+int power_state_active_is_enabled(void);
+
+#endif
-- 
cgit v1.2.3


From 618e724b8d79c6232daac49cb39b0723bf5c4b08 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 11 May 2011 14:06:58 -0700
Subject: ns: Declare sys_setns in syscalls.h

Ooops I overlooked this one, and missing it causes compile
errors of the powerpc syscall.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/syscalls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..b8b380443d05 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -844,4 +844,5 @@ asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      struct file_handle __user *handle,
 				      int flags);
+asmlinkage long sys_setns(int fd, int nstype);
 #endif
-- 
cgit v1.2.3


From a626743f579aa743473fd8b9215ca198bacf2ac4 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 18 Mar 2011 21:53:41 -0700
Subject: mtd: nand: renumber conflicting BBT flags

The NAND_USE_FLASH_BBT_NO_OOB and NAND_CREATE_EMPTY_BBT flags conflict
with the NAND_BBT_SCANBYTE1AND6 and NAND_BBT_DYNAMICSTRUCT flags,
respectively. This change will allow us to utilize these options
independently.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index d44192740f6f..c2b9ac4fbc4a 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -237,9 +237,9 @@ typedef enum {
  * If passed additionally to NAND_USE_FLASH_BBT then BBT code will not touch
  * the OOB area.
  */
-#define NAND_USE_FLASH_BBT_NO_OOB	0x00100000
+#define NAND_USE_FLASH_BBT_NO_OOB	0x00800000
 /* Create an empty BBT with no vendor information if the BBT is available */
-#define NAND_CREATE_EMPTY_BBT		0x00200000
+#define NAND_CREATE_EMPTY_BBT		0x01000000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3


From 41e2a4893566ced3c46af15df5b727326881e47d Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Sat, 19 Mar 2011 14:10:33 -0400
Subject: mmc: Ensure linux starts in eMMC user partition

uBoot sometimes leaves eMMC pointing to the private boot partition.
Ensure we always start looking at the user partition.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Bruce Clemens <bpclemens@marvell.com>
Signed-off-by: Mark F. Brown <markb@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 10 ++++++++++
 include/linux/mmc/card.h |  1 +
 include/linux/mmc/mmc.h  |  1 +
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 772d0d0a541b..5c611a6e0080 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -288,6 +288,7 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
+		card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG];
 
 		/* Sleep / awake timeout in 100ns units */
 		if (sa_shift > 0 && sa_shift <= 0x17)
@@ -567,6 +568,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		}
 	}
 
+	/*
+	 * Ensure eMMC user default partition is enabled
+	 */
+	if (card->ext_csd.bootconfig & 0x7) {
+		card->ext_csd.bootconfig &= ~0x7;
+		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
+			   card->ext_csd.bootconfig);
+	}
+
 	/*
 	 * Activate high speed (if supported)
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index adb4888248be..557b73263390 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -45,6 +45,7 @@ struct mmc_ext_csd {
 	u8			rev;
 	u8			erase_group_def;
 	u8			sec_feature_support;
+	u8			bootconfig;
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 264ba5451e3b..b5ec88fd1352 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -256,6 +256,7 @@ struct _mmc_csd {
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
+#define EXT_CSD_BOOT_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
 #define EXT_CSD_BUS_WIDTH		183	/* R/W */
 #define EXT_CSD_HS_TIMING		185	/* R/W */
-- 
cgit v1.2.3


From f4c5522b0a8827f39f83f928961d87e081bfe71c Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Thu, 31 Mar 2011 18:40:00 -0500
Subject: mmc: Reliable write support.

Allows reliable writes to be used for MMC writes. Reliable writes are used
to service write REQ_FUA/REQ_META requests. Handles both the legacy and
the enhanced reliable write support in MMC cards.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 81 +++++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/mmc.c   |  5 +++
 include/linux/mmc/card.h |  2 ++
 include/linux/mmc/mmc.h  |  4 +++
 4 files changed, 88 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 61d233a7c118..91a676773608 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -48,6 +48,10 @@ MODULE_ALIAS("mmc:block");
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
 
+#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
+    (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
+     ((card)->ext_csd.rel_sectors)))
+
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -331,6 +335,57 @@ out:
 	return err ? 0 : 1;
 }
 
+static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_blk_data *md = mq->data;
+
+	/*
+	 * No-op, only service this because we need REQ_FUA for reliable
+	 * writes.
+	 */
+	spin_lock_irq(&md->lock);
+	__blk_end_request_all(req, 0);
+	spin_unlock_irq(&md->lock);
+
+	return 1;
+}
+
+/*
+ * Reformat current write as a reliable write, supporting
+ * both legacy and the enhanced reliable write MMC cards.
+ * In each transfer we'll handle only as much as a single
+ * reliable write can handle, thus finish the request in
+ * partial completions.
+ */
+static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
+				   struct mmc_card *card,
+				   struct request *req)
+{
+	int err;
+	struct mmc_command set_count;
+
+	if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
+		/* Legacy mode imposes restrictions on transfers. */
+		if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors))
+			brq->data.blocks = 1;
+
+		if (brq->data.blocks > card->ext_csd.rel_sectors)
+			brq->data.blocks = card->ext_csd.rel_sectors;
+		else if (brq->data.blocks < card->ext_csd.rel_sectors)
+			brq->data.blocks = 1;
+	}
+
+	memset(&set_count, 0, sizeof(struct mmc_command));
+	set_count.opcode = MMC_SET_BLOCK_COUNT;
+	set_count.arg = brq->data.blocks | (1 << 31);
+	set_count.flags = MMC_RSP_R1 | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(card->host, &set_count, 0);
+	if (err)
+		printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n",
+		       req->rq_disk->disk_name, err);
+	return err;
+}
+
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -338,6 +393,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_blk_request brq;
 	int ret = 1, disable_multi = 0;
 
+	/*
+	 * Reliable writes are used to implement Forced Unit Access and
+	 * REQ_META accesses, and are supported only on MMCs.
+	 */
+	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
+			  (req->cmd_flags & REQ_META)) &&
+		(rq_data_dir(req) == WRITE) &&
+		REL_WRITES_SUPPORTED(card);
+
 	mmc_claim_host(card->host);
 
 	do {
@@ -374,12 +438,14 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 		if (disable_multi && brq.data.blocks > 1)
 			brq.data.blocks = 1;
 
-		if (brq.data.blocks > 1) {
+		if (brq.data.blocks > 1 || do_rel_wr) {
 			/* SPI multiblock writes terminate using a special
-			 * token, not a STOP_TRANSMISSION request.
+			 * token, not a STOP_TRANSMISSION request. Reliable
+			 * writes use SET_BLOCK_COUNT and do not use a
+			 * STOP_TRANSMISSION request either.
 			 */
-			if (!mmc_host_is_spi(card->host)
-					|| rq_data_dir(req) == READ)
+			if ((!mmc_host_is_spi(card->host) && !do_rel_wr) ||
+			    rq_data_dir(req) == READ)
 				brq.mrq.stop = &brq.stop;
 			readcmd = MMC_READ_MULTIPLE_BLOCK;
 			writecmd = MMC_WRITE_MULTIPLE_BLOCK;
@@ -396,6 +462,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
+		if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req))
+			goto cmd_err;
+
 		mmc_set_data_timeout(&brq.data, card);
 
 		brq.data.sg = mq->sg;
@@ -565,6 +634,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			return mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			return mmc_blk_issue_discard_rq(mq, req);
+	} else if (req->cmd_flags & REQ_FLUSH) {
+		return mmc_blk_issue_flush(mq, req);
 	} else {
 		return mmc_blk_issue_rw_rq(mq, req);
 	}
@@ -622,6 +693,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	md->disk->queue = md->queue.queue;
 	md->disk->driverfs_dev = &card->dev;
 	set_disk_ro(md->disk, md->read_only);
+	if (REL_WRITES_SUPPORTED(card))
+		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
 	/*
 	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 5c611a6e0080..ae6b8fd38800 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -300,6 +300,8 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
 		card->ext_csd.hc_erase_size =
 			ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10;
+
+		card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C];
 	}
 
 	if (card->ext_csd.rev >= 4) {
@@ -351,6 +353,9 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_TRIM_MULT];
 	}
 
+	if (card->ext_csd.rev >= 5)
+		card->ext_csd.rel_param = ext_csd[EXT_CSD_WR_REL_PARAM];
+
 	if (ext_csd[EXT_CSD_ERASED_MEM_CONT])
 		card->erased_byte = 0xFF;
 	else
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 557b73263390..c4e96fa5fb2b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -45,6 +45,8 @@ struct mmc_ext_csd {
 	u8			rev;
 	u8			erase_group_def;
 	u8			sec_feature_support;
+	u8			rel_sectors;
+	u8			rel_param;
 	u8			bootconfig;
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b5ec88fd1352..390aa6eef676 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -255,6 +255,7 @@ struct _mmc_csd {
 
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
+#define EXT_CSD_WR_REL_PARAM		166	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_BOOT_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
@@ -265,6 +266,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE		196	/* RO */
 #define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
 #define EXT_CSD_S_A_TIMEOUT		217	/* RO */
+#define EXT_CSD_REL_WR_SEC_C		222	/* RO */
 #define EXT_CSD_HC_WP_GRP_SIZE		221	/* RO */
 #define EXT_CSD_ERASE_TIMEOUT_MULT	223	/* RO */
 #define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
@@ -277,6 +279,8 @@ struct _mmc_csd {
  * EXT_CSD field definitions
  */
 
+#define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
+
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
-- 
cgit v1.2.3


From a5e9425d2010978c5f85986cc70a9fa0c0d5b912 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:43:20 +0300
Subject: mmc: mmc_card_keep_power cleanups

mmc_card_is_powered_resumed is a mouthful; instead, simply use
mmc_card_keep_power, which also better explains the purpose of
the macro.

Employ mmc_card_keep_power() where possible.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |  4 ++--
 drivers/mmc/core/sdio.c  | 10 +++++-----
 include/linux/mmc/host.h |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 1f453acc8682..c2350e474159 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1746,7 +1746,7 @@ int mmc_suspend_host(struct mmc_host *host)
 	}
 	mmc_bus_put(host);
 
-	if (!err && !(host->pm_flags & MMC_PM_KEEP_POWER))
+	if (!err && !mmc_card_keep_power(host))
 		mmc_power_off(host);
 
 	return err;
@@ -1764,7 +1764,7 @@ int mmc_resume_host(struct mmc_host *host)
 
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
-		if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
+		if (!mmc_card_keep_power(host)) {
 			mmc_power_up(host);
 			mmc_select_voltage(host, host->ocr);
 			/*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index db0f0b44d684..0f7d4362d213 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
-	if (!err && host->pm_flags & MMC_PM_KEEP_POWER) {
+	if (!err && mmc_card_keep_power(host)) {
 		mmc_claim_host(host);
 		sdio_disable_wide(host->card);
 		mmc_release_host(host);
@@ -645,10 +645,10 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	mmc_claim_host(host);
 
 	/* No need to reinitialize powered-resumed nonremovable cards */
-	if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host))
+	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host))
 		err = mmc_sdio_init_card(host, host->ocr, host->card,
-				 (host->pm_flags & MMC_PM_KEEP_POWER));
-	else if (mmc_card_is_powered_resumed(host)) {
+					mmc_card_keep_power(host));
+	else if (mmc_card_keep_power(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
 		if (err > 0) {
@@ -691,7 +691,7 @@ static int mmc_sdio_power_restore(struct mmc_host *host)
 
 	mmc_claim_host(host);
 	ret = mmc_sdio_init_card(host, host->ocr, host->card,
-			(host->pm_flags & MMC_PM_KEEP_POWER));
+				mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eb792cb6d745..8ad3a9c6f495 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -320,7 +320,7 @@ static inline int mmc_card_is_removable(struct mmc_host *host)
 	return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable;
 }
 
-static inline int mmc_card_is_powered_resumed(struct mmc_host *host)
+static inline int mmc_card_keep_power(struct mmc_host *host)
 {
 	return host->pm_flags & MMC_PM_KEEP_POWER;
 }
-- 
cgit v1.2.3


From 33b53716bc4b3ff3da2bc41581226424443f9d5a Mon Sep 17 00:00:00 2001
From: Grant Erickson <marathon96@gmail.com>
Date: Fri, 8 Apr 2011 08:51:32 -0700
Subject: mtd: create function to perform large allocations

Introduce a common function to handle large, contiguous kmalloc buffer
allocations by exponentially backing off on the size of the requested
kernel transfer buffer until it succeeds or until the requested
transfer buffer size falls below the page size.

This helps ensure the operation can succeed under low-memory, highly-
fragmented situations albeit somewhat more slowly.

Artem: so this patch solves the problem that the kernel tries to kmalloc too
large buffers, which (a) may fail and does fail - people complain about this,
and (b) slows down the system in case of high memory fragmentation, because
the kernel starts dropping caches, writing back, swapping, etc. But we do not
really have to allocate a lot of memory to do the I/O, we may do this even with
as little as one min. I/O unit (NAND page) of RAM. So the idea of this patch is
that if the user asks to read or write a lot, we try to kmalloc a lot, with GFP
flags which make the kernel _not_ drop caches, etc. If we can allocate it - good,
if not - we try to allocate twice as less, and so on, until we reach the min.
I/O unit size, which is our last resort allocation and use the normal
GFP_KERNEL flag.

Artem: re-write the allocation function so that it makes sure the allocated
buffer is aligned to the min. I/O size of the flash.

Signed-off-by: Grant Erickson <marathon96@gmail.com>
Tested-by: Ben Gardiner <bengardiner@nanometrics.ca>
Tested-by: Stefano Babic <sbabic@denx.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/mtd.h |  2 ++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index da69bc8a5a7d..a50348b60d79 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -638,6 +638,54 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	return ret;
 }
 
+/**
+ * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size
+ * @size: A pointer to the ideal or maximum size of the allocation. Points
+ *        to the actual allocation size on success.
+ *
+ * This routine attempts to allocate a contiguous kernel buffer up to
+ * the specified size, backing off the size of the request exponentially
+ * until the request succeeds or until the allocation size falls below
+ * the system page size. This attempts to make sure it does not adversely
+ * impact system performance, so when allocating more than one page, we
+ * ask the memory allocator to avoid re-trying, swapping, writing back
+ * or performing I/O.
+ *
+ * Note, this function also makes sure that the allocated buffer is aligned to
+ * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
+ *
+ * This is called, for example by mtd_{read,write} and jffs2_scan_medium,
+ * to handle smaller (i.e. degraded) buffer allocations under low- or
+ * fragmented-memory situations where such reduced allocations, from a
+ * requested ideal, are allowed.
+ *
+ * Returns a pointer to the allocated buffer on success; otherwise, NULL.
+ */
+void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
+{
+	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
+		       __GFP_NORETRY | __GFP_NO_KSWAPD;
+	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
+	void *kbuf;
+
+	*size = min_t(size_t, *size, KMALLOC_MAX_SIZE);
+
+	while (*size > min_alloc) {
+		kbuf = kmalloc(*size, flags);
+		if (kbuf)
+			return kbuf;
+
+		*size >>= 1;
+		*size = ALIGN(*size, mtd->writesize);
+	}
+
+	/*
+	 * For the last resort allocation allow 'kmalloc()' to do all sorts of
+	 * things (write-back, dropping caches, etc) by using GFP_KERNEL.
+	 */
+	return kmalloc(*size, GFP_KERNEL);
+}
+
 EXPORT_SYMBOL_GPL(add_mtd_device);
 EXPORT_SYMBOL_GPL(del_mtd_device);
 EXPORT_SYMBOL_GPL(get_mtd_device);
@@ -648,6 +696,7 @@ EXPORT_SYMBOL_GPL(__put_mtd_device);
 EXPORT_SYMBOL_GPL(register_mtd_user);
 EXPORT_SYMBOL_GPL(unregister_mtd_user);
 EXPORT_SYMBOL_GPL(default_mtd_writev);
+EXPORT_SYMBOL_GPL(mtd_kmalloc_up_to);
 
 #ifdef CONFIG_PROC_FS
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9d5306bad117..06b489a7605b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -348,6 +348,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
 		      unsigned long count, loff_t from, size_t *retlen);
 
+void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
+
 #ifdef CONFIG_MTD_PARTITIONS
 void mtd_erase_callback(struct erase_info *instr);
 #else
-- 
cgit v1.2.3


From 6b93d01fe5971951911a070f51f412d50e9536dc Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:43:21 +0300
Subject: mmc: do not switch to 1-bit mode if not required

6b5eda36 followed SDIO spec part E1 section 8, which states that
in case SDIO interrupts are being used to wake up a suspended host,
then it is required to switch to 1-bit mode before stopping the clock.

Before switching to 1-bit mode (or back to 4-bit mode on resume),
make sure that SDIO interrupts are really being used to wake the host.

This is helpful for devices which have an external irq line (e.g.
wl1271), and do not use SDIO interrupts to wake up the host.

In this case, switching to 1-bit mode (and back to 4-bit mode on resume)
is not necessary.

Reported-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 4 ++--
 include/linux/mmc/host.h | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 0f7d4362d213..4221670bf82e 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -625,7 +625,7 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
-	if (!err && mmc_card_keep_power(host)) {
+	if (!err && mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		mmc_claim_host(host);
 		sdio_disable_wide(host->card);
 		mmc_release_host(host);
@@ -648,7 +648,7 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host))
 		err = mmc_sdio_init_card(host, host->ocr, host->card,
 					mmc_card_keep_power(host));
-	else if (mmc_card_keep_power(host)) {
+	else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
 		if (err > 0) {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8ad3a9c6f495..0fffa5cdc183 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -325,5 +325,9 @@ static inline int mmc_card_keep_power(struct mmc_host *host)
 	return host->pm_flags & MMC_PM_KEEP_POWER;
 }
 
+static inline int mmc_card_wake_sdio_irq(struct mmc_host *host)
+{
+	return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ;
+}
 #endif
 
-- 
cgit v1.2.3


From eab4068795d670b065164096805cbf15a19e9690 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 17:50:14 +0300
Subject: mmc: add MMC_QUIRK_NONSTD_FUNC_IF

Introduce MMC_QUIRK_NONSTD_FUNC_IF to ignore the "SDIO Standard Function
interface code" as indicated by the card's FBR, and instead treat all
functions as non-standard interfaces.

This is required to prevent standard drivers from facing
errors when trying to communicate with SDIO cards that erroneously
indicate standard function interface codes.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 6 ++++++
 include/linux/mmc/card.h | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4221670bf82e..c3c2bd0874e8 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -16,6 +16,7 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
+#include <linux/mmc/sdio_ids.h>
 
 #include "core.h"
 #include "bus.h"
@@ -31,6 +32,11 @@ static int sdio_read_fbr(struct sdio_func *func)
 	int ret;
 	unsigned char data;
 
+	if (mmc_card_nonstd_func_interface(func->card)) {
+		func->class = SDIO_CLASS_NONE;
+		return 0;
+	}
+
 	ret = mmc_io_rw_direct(func->card, 0, 0,
 		SDIO_FBR_BASE(func->num) + SDIO_FBR_STD_IF, 0, &data);
 	if (ret)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c4e96fa5fb2b..317a0029e7d7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -128,6 +128,7 @@ struct mmc_card {
 #define MMC_QUIRK_NONSTD_SDIO	(1<<2)		/* non-standard SDIO card attached */
 						/* (missing CIA registers) */
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
+#define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
@@ -183,6 +184,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 }
 
+static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From 2059a02dcb84236f9db9197fa9b00418d7b8465b Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 5 Apr 2011 18:02:25 +0300
Subject: mmc: add MMC_QUIRK_DISABLE_CD

006ebd5d introduced sdio_disable_cd(), which disconnects the pull-up
resistor on CD/DAT[3] (pin 1) of the card.

Make it possible to start using sdio_disable_cd() by introducing
MMC_QUIRK_DISABLE_CD.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 2 +-
 include/linux/mmc/card.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c3c2bd0874e8..a5840c0de2e4 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -187,7 +187,7 @@ static int sdio_disable_cd(struct mmc_card *card)
 	int ret;
 	u8 ctrl;
 
-	if (!card->cccr.disable_cd)
+	if (!mmc_card_disable_cd(card))
 		return 0;
 
 	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 317a0029e7d7..2a7e54970c93 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -129,6 +129,7 @@ struct mmc_card {
 						/* (missing CIA registers) */
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
+#define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
@@ -184,6 +185,11 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 }
 
+static inline int mmc_card_disable_cd(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_DISABLE_CD;
+}
+
 static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c)
 {
 	return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF;
-- 
cgit v1.2.3


From 32780cd1350e651e68bdf33b7f5b009d21d5b794 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 17:02:15 -0500
Subject: mmc: quirks: Extends card quirks with MMC/SD quirks matching the CID.

The current mechanism is SDIO-only. This allows us to create
function-specific quirks, without creating messy Kconfig dependencies,
or polluting core/ with function-specific code.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.h   |  2 -
 drivers/mmc/core/quirks.c | 93 +++++++++++++++++++++--------------------------
 drivers/mmc/core/sdio.c   |  2 +-
 include/linux/mmc/card.h  | 91 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 133 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 20b1c0831eac..ca1fdde29df6 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -61,8 +61,6 @@ int mmc_attach_mmc(struct mmc_host *host);
 int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
 
-void mmc_fixup_device(struct mmc_card *card);
-
 /* Module parameters */
 extern int use_spi_crc;
 
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index a4c42edc6cb3..3a596217029e 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -1,7 +1,8 @@
 /*
- *  This file contains work-arounds for many known sdio hardware
- *  bugs.
+ *  This file contains work-arounds for many known SD/MMC
+ *  and SDIO hardware bugs.
  *
+ *  Copyright (c) 2011 Andrei Warkentin <andreiw@motorola.com>
  *  Copyright (c) 2011 Pierre Tardy <tardyp@gmail.com>
  *  Inspired from pci fixup code:
  *  Copyright (c) 1999 Martin Mares <mj@ucw.cz>
@@ -11,34 +12,14 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mmc/card.h>
-#include <linux/mod_devicetable.h>
 
-/*
- *  The world is not perfect and supplies us with broken mmc/sdio devices.
- *  For at least a part of these bugs we need a work-around
- */
-
-struct mmc_fixup {
-	u16 vendor, device;	/* You can use SDIO_ANY_ID here of course */
-	void (*vendor_fixup)(struct mmc_card *card, int data);
-	int data;
-};
-
-/*
- * This hook just adds a quirk unconditionnally
- */
-static void __maybe_unused add_quirk(struct mmc_card *card, int data)
-{
-	card->quirks |= data;
-}
+#ifndef SDIO_VENDOR_ID_TI
+#define SDIO_VENDOR_ID_TI		0x0097
+#endif
 
-/*
- * This hook just removes a quirk unconditionnally
- */
-static void __maybe_unused remove_quirk(struct mmc_card *card, int data)
-{
-	card->quirks &= ~data;
-}
+#ifndef SDIO_DEVICE_ID_TI_WL1271
+#define SDIO_DEVICE_ID_TI_WL1271	0x4076
+#endif
 
 /*
  * This hook just adds a quirk for all sdio devices
@@ -49,37 +30,47 @@ static void add_quirk_for_sdio_devices(struct mmc_card *card, int data)
 		card->quirks |= data;
 }
 
-#ifndef SDIO_VENDOR_ID_TI
-#define SDIO_VENDOR_ID_TI		0x0097
-#endif
-
-#ifndef SDIO_DEVICE_ID_TI_WL1271
-#define SDIO_DEVICE_ID_TI_WL1271	0x4076
-#endif
-
 static const struct mmc_fixup mmc_fixup_methods[] = {
 	/* by default sdio devices are considered CLK_GATING broken */
 	/* good cards will be whitelisted as they are tested */
-	{ SDIO_ANY_ID, SDIO_ANY_ID,
-		add_quirk_for_sdio_devices, MMC_QUIRK_BROKEN_CLK_GATING },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		add_quirk, MMC_QUIRK_NONSTD_FUNC_IF },
-	{ SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
-		add_quirk, MMC_QUIRK_DISABLE_CD },
-	{ 0 }
+	SDIO_FIXUP(SDIO_ANY_ID, SDIO_ANY_ID,
+		   add_quirk_for_sdio_devices,
+		   MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   add_quirk, MMC_QUIRK_NONSTD_FUNC_IF),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
+		   add_quirk, MMC_QUIRK_DISABLE_CD),
+
+	END_FIXUP
 };
 
-void mmc_fixup_device(struct mmc_card *card)
+void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
 {
 	const struct mmc_fixup *f;
+	u64 rev = cid_rev_card(card);
+
+	/* Non-core specific workarounds. */
+	if (!table)
+		table = mmc_fixup_methods;
 
-	for (f = mmc_fixup_methods; f->vendor_fixup; f++) {
-		if ((f->vendor == card->cis.vendor
-		     || f->vendor == (u16) SDIO_ANY_ID) &&
-		    (f->device == card->cis.device
-		     || f->device == (u16) SDIO_ANY_ID)) {
+	for (f = table; f->vendor_fixup; f++) {
+		if ((f->manfid == CID_MANFID_ANY ||
+		     f->manfid == card->cid.manfid) &&
+		    (f->oemid == CID_OEMID_ANY ||
+		     f->oemid == card->cid.oemid) &&
+		    (f->name == CID_NAME_ANY ||
+		     !strncmp(f->name, card->cid.prod_name,
+			      sizeof(card->cid.prod_name))) &&
+		    (f->cis_vendor == card->cis.vendor ||
+		     f->cis_vendor == (u16) SDIO_ANY_ID) &&
+		    (f->cis_device == card->cis.device ||
+		     f->cis_device == (u16) SDIO_ANY_ID) &&
+		    rev >= f->rev_start && rev <= f->rev_end) {
 			dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup);
 			f->vendor_fixup(card, f->data);
 		}
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index a5840c0de2e4..1e6095961500 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -472,7 +472,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 
 		card = oldcard;
 	}
-	mmc_fixup_device(card);
+	mmc_fixup_device(card, NULL);
 
 	if (card->type == MMC_TYPE_SD_COMBO) {
 		err = mmc_sd_setup_card(host, card, oldcard != NULL);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2a7e54970c93..c6513175f7f1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -11,6 +11,7 @@
 #define LINUX_MMC_CARD_H
 
 #include <linux/mmc/core.h>
+#include <linux/mod_devicetable.h>
 
 struct mmc_cid {
 	unsigned int		manfid;
@@ -157,7 +158,92 @@ struct mmc_card {
 	struct dentry		*debugfs_root;
 };
 
-void mmc_fixup_device(struct mmc_card *dev);
+/*
+ *  The world is not perfect and supplies us with broken mmc/sdio devices.
+ *  For at least some of these bugs we need a work-around.
+ */
+
+struct mmc_fixup {
+	/* CID-specific fields. */
+	const char *name;
+
+	/* Valid revision range */
+	u64 rev_start, rev_end;
+
+	unsigned int manfid;
+	unsigned short oemid;
+
+	/* SDIO-specfic fields. You can use SDIO_ANY_ID here of course */
+	u16 cis_vendor, cis_device;
+
+	void (*vendor_fixup)(struct mmc_card *card, int data);
+	int data;
+};
+
+#define CID_MANFID_ANY (-1ul)
+#define CID_OEMID_ANY ((unsigned short) -1)
+#define CID_NAME_ANY (NULL)
+
+#define END_FIXUP { 0 }
+
+#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
+		   _cis_vendor, _cis_device,				\
+		   _fixup, _data)					\
+	{						   \
+		.name = (_name),			   \
+		.manfid = (_manfid),			   \
+		.oemid = (_oemid),			   \
+		.rev_start = (_rev_start),		   \
+		.rev_end = (_rev_end),			   \
+		.cis_vendor = (_cis_vendor),		   \
+		.cis_device = (_cis_device),		   \
+		.vendor_fixup = (_fixup),		   \
+		.data = (_data),			   \
+	 }
+
+#define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end,	\
+		      _fixup, _data)					\
+	_FIXUP_EXT(_name, _manfid,					\
+		   _oemid, _rev_start, _rev_end,			\
+		   SDIO_ANY_ID, SDIO_ANY_ID,				\
+		   _fixup, _data)					\
+
+#define MMC_FIXUP(_name, _manfid, _oemid, _fixup, _data) \
+	MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data)
+
+#define SDIO_FIXUP(_vendor, _device, _fixup, _data)			\
+	_FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY,			\
+		    CID_OEMID_ANY, 0, -1ull,				\
+		   _vendor, _device,					\
+		   _fixup, _data)					\
+
+#define cid_rev(hwrev, fwrev, year, month)	\
+	(((u64) hwrev) << 40 |                  \
+	 ((u64) fwrev) << 32 |                  \
+	 ((u64) year) << 16 |                   \
+	 ((u64) month))
+
+#define cid_rev_card(card)		  \
+	cid_rev(card->cid.hwrev,	  \
+		    card->cid.fwrev,      \
+		    card->cid.year,	  \
+		    card->cid.month)
+
+/*
+ * This hook just adds a quirk unconditionally.
+ */
+static inline void __maybe_unused add_quirk(struct mmc_card *card, int data)
+{
+	card->quirks |= data;
+}
+
+/*
+ * This hook just removes a quirk unconditionally.
+ */
+static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
+{
+	card->quirks &= ~data;
+}
 
 #define mmc_card_mmc(c)		((c)->type == MMC_TYPE_MMC)
 #define mmc_card_sd(c)		((c)->type == MMC_TYPE_SD)
@@ -218,4 +304,7 @@ struct mmc_driver {
 extern int mmc_register_driver(struct mmc_driver *);
 extern void mmc_unregister_driver(struct mmc_driver *);
 
+extern void mmc_fixup_device(struct mmc_card *card,
+			     const struct mmc_fixup *table);
+
 #endif
-- 
cgit v1.2.3


From 853c6cac0dc0d9d330deb5b48c19eebafaed1841 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 12 Apr 2011 12:59:09 -0400
Subject: mmc: quirks: fix truncation warnings

Fix data truncation warnings: .manfid is not unsigned long:

drivers/mmc/core/quirks.c:36: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:40: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:43: warning: large integer implicitly truncated to unsigned type
drivers/mmc/core/quirks.c:46: warning: large integer implicitly truncated to unsigned type

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c6513175f7f1..937f852cf01e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -180,7 +180,7 @@ struct mmc_fixup {
 	int data;
 };
 
-#define CID_MANFID_ANY (-1ul)
+#define CID_MANFID_ANY (-1u)
 #define CID_OEMID_ANY ((unsigned short) -1)
 #define CID_NAME_ANY (NULL)
 
-- 
cgit v1.2.3


From eaa02f751ff4f8abfc2e55a15c20a5a274244418 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 16:13:41 -0500
Subject: mmc: core: Rename erase_timeout to cmd_timeout_ms.

Renames erase_timeout to cmd_timeout_ms inside struct mmc_command.
First step to making host honor timeouts for non-data-transfer
commands. Cleans up erase timeout code.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 39 +++++++++++++++++++++------------------
 include/linux/mmc/core.h |  2 +-
 2 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c2350e474159..5178d5daa5f4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1187,9 +1187,8 @@ void mmc_init_erase(struct mmc_card *card)
 	}
 }
 
-static void mmc_set_mmc_erase_timeout(struct mmc_card *card,
-				      struct mmc_command *cmd,
-				      unsigned int arg, unsigned int qty)
+static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
+				          unsigned int arg, unsigned int qty)
 {
 	unsigned int erase_timeout;
 
@@ -1246,38 +1245,42 @@ static void mmc_set_mmc_erase_timeout(struct mmc_card *card,
 	if (mmc_host_is_spi(card->host) && erase_timeout < 1000)
 		erase_timeout = 1000;
 
-	cmd->erase_timeout = erase_timeout;
+	return erase_timeout;
 }
 
-static void mmc_set_sd_erase_timeout(struct mmc_card *card,
-				     struct mmc_command *cmd, unsigned int arg,
-				     unsigned int qty)
+static unsigned int mmc_sd_erase_timeout(struct mmc_card *card,
+					 unsigned int arg,
+					 unsigned int qty)
 {
+	unsigned int erase_timeout;
+
 	if (card->ssr.erase_timeout) {
 		/* Erase timeout specified in SD Status Register (SSR) */
-		cmd->erase_timeout = card->ssr.erase_timeout * qty +
-				     card->ssr.erase_offset;
+		erase_timeout = card->ssr.erase_timeout * qty +
+				card->ssr.erase_offset;
 	} else {
 		/*
 		 * Erase timeout not specified in SD Status Register (SSR) so
 		 * use 250ms per write block.
 		 */
-		cmd->erase_timeout = 250 * qty;
+		erase_timeout = 250 * qty;
 	}
 
 	/* Must not be less than 1 second */
-	if (cmd->erase_timeout < 1000)
-		cmd->erase_timeout = 1000;
+	if (erase_timeout < 1000)
+		erase_timeout = 1000;
+
+	return erase_timeout;
 }
 
-static void mmc_set_erase_timeout(struct mmc_card *card,
-				  struct mmc_command *cmd, unsigned int arg,
-				  unsigned int qty)
+static unsigned int mmc_erase_timeout(struct mmc_card *card,
+				      unsigned int arg,
+				      unsigned int qty)
 {
 	if (mmc_card_sd(card))
-		mmc_set_sd_erase_timeout(card, cmd, arg, qty);
+		return mmc_sd_erase_timeout(card, arg, qty);
 	else
-		mmc_set_mmc_erase_timeout(card, cmd, arg, qty);
+		return mmc_mmc_erase_timeout(card, arg, qty);
 }
 
 static int mmc_do_erase(struct mmc_card *card, unsigned int from,
@@ -1351,7 +1354,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 	cmd.opcode = MMC_ERASE;
 	cmd.arg = arg;
 	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-	mmc_set_erase_timeout(card, &cmd, arg, qty);
+	cmd.cmd_timeout_ms = mmc_erase_timeout(card, arg, qty);
 	err = mmc_wait_for_cmd(card->host, &cmd, 0);
 	if (err) {
 		printk(KERN_ERR "mmc_erase: erase error %d, status %#x\n",
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 07f27af4dba5..811e96e8d1fe 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -92,7 +92,7 @@ struct mmc_command {
  *              actively failing requests
  */
 
-	unsigned int		erase_timeout;	/* in milliseconds */
+	unsigned int		cmd_timeout_ms;	/* in milliseconds */
 
 	struct mmc_data		*data;		/* data segment associated with cmd */
 	struct mmc_request	*mrq;		/* associated request */
-- 
cgit v1.2.3


From d3a8d95dcbb726b9cf0bbc166b2473bdd236c88c Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 16:13:43 -0500
Subject: mmc: core: Allow setting CMD timeout for CMD6 (SWITCH).

CMD6 is an R1B-type command, where DAT is used as busy. Depending
on register written using CMD6, timeout value can be different
as per spec.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c     | 14 ++++++++------
 drivers/mmc/core/mmc_ops.c | 16 +++++++++++++++-
 drivers/mmc/core/mmc_ops.h |  1 -
 include/linux/mmc/core.h   |  1 +
 4 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index ae6b8fd38800..396cb23625d2 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -548,7 +548,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (card->ext_csd.enhanced_area_en) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_ERASE_GROUP_DEF, 1);
+				 EXT_CSD_ERASE_GROUP_DEF, 1, 0);
 
 		if (err && err != -EBADMSG)
 			goto free_card;
@@ -579,7 +579,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	if (card->ext_csd.bootconfig & 0x7) {
 		card->ext_csd.bootconfig &= ~0x7;
 		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
-			   card->ext_csd.bootconfig);
+			   card->ext_csd.bootconfig, 0);
 	}
 
 	/*
@@ -588,7 +588,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	if ((card->ext_csd.hs_max_dtr != 0) &&
 		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_HS_TIMING, 1);
+				 EXT_CSD_HS_TIMING, 1, 0);
 		if (err && err != -EBADMSG)
 			goto free_card;
 
@@ -655,7 +655,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				ddr = 0; /* no DDR for 1-bit width */
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][0]);
+					 ext_csd_bits[idx][0],
+					 0);
 			if (!err) {
 				mmc_set_bus_width_ddr(card->host,
 						      bus_width, MMC_SDR_MODE);
@@ -674,8 +675,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 
 		if (!err && ddr) {
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_BUS_WIDTH,
-					ext_csd_bits[idx][1]);
+					 EXT_CSD_BUS_WIDTH,
+					 ext_csd_bits[idx][1],
+					 0);
 		}
 		if (err) {
 			printk(KERN_WARNING "%s: switch to bus width %d ddr %d "
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index f3b22bf89cc9..a2bae625332a 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -387,7 +387,19 @@ int mmc_spi_set_crc(struct mmc_host *host, int use_crc)
 	return err;
 }
 
-int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
+/**
+ *	mmc_switch - modify EXT_CSD register
+ *	@card: the MMC card associated with the data transfer
+ *	@set: cmd set values
+ *	@index: EXT_CSD register index
+ *	@value: value to program into EXT_CSD register
+ *	@timeout_ms: timeout (ms) for operation performed by register write,
+ *                   timeout of zero implies maximum possible timeout
+ *
+ *	Modifies the EXT_CSD register for selected card.
+ */
+int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
+	       unsigned int timeout_ms)
 {
 	int err;
 	struct mmc_command cmd;
@@ -404,6 +416,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 		  (value << 8) |
 		  set;
 	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+	cmd.cmd_timeout_ms = timeout_ms;
 
 	err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
 	if (err)
@@ -433,6 +446,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mmc_switch);
 
 int mmc_send_status(struct mmc_card *card, u32 *status)
 {
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index e6d44b8a18db..9276946fa5b7 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -20,7 +20,6 @@ int mmc_all_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_set_relative_addr(struct mmc_card *card);
 int mmc_send_csd(struct mmc_card *card, u32 *csd);
 int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
-int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value);
 int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 811e96e8d1fe..f8e4bcbd2846 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -135,6 +135,7 @@ extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
+extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
-- 
cgit v1.2.3


From 371a689f64b0da140c3bcd3f55305ffa1c3a58ef Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 11 Apr 2011 18:10:25 -0500
Subject: mmc: MMC boot partitions support.

Allows device MMC boot partitions to be accessed. MMC partitions are
treated effectively as separate block devices on the same MMC card.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/00-INDEX          |   2 +
 Documentation/mmc/mmc-dev-attrs.txt |  10 ++
 Documentation/mmc/mmc-dev-parts.txt |  27 ++++
 drivers/mmc/card/block.c            | 274 +++++++++++++++++++++++++++++++-----
 drivers/mmc/core/mmc.c              |  22 ++-
 include/linux/mmc/card.h            |   4 +-
 include/linux/mmc/mmc.h             |   8 +-
 7 files changed, 305 insertions(+), 42 deletions(-)
 create mode 100644 Documentation/mmc/mmc-dev-parts.txt

(limited to 'include/linux')

diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index fca586f5b853..93dd7a714075 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -2,3 +2,5 @@
         - this file
 mmc-dev-attrs.txt
         - info on SD and MMC device attributes
+mmc-dev-parts.txt
+        - info on SD and MMC device partitions
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index ff2bd685bced..8898a95b41e5 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -1,3 +1,13 @@
+SD and MMC Block Device Attributes
+==================================
+
+These attributes are defined for the block devices associated with the
+SD or MMC device.
+
+The following attributes are read/write.
+
+	force_ro		Enforce read-only access even if write protect switch is off.
+
 SD and MMC Device Attributes
 ============================
 
diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
new file mode 100644
index 000000000000..2db28b8e662f
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-parts.txt
@@ -0,0 +1,27 @@
+SD and MMC Device Partitions
+============================
+
+Device partitions are additional logical block devices present on the
+SD/MMC device.
+
+As of this writing, MMC boot partitions as supported and exposed as
+/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
+parent /dev/mmcblkX.
+
+MMC Boot Partitions
+===================
+
+Read and write access is provided to the two MMC boot partitions. Due to
+the sensitive nature of the boot partition contents, which often store
+a bootloader or bootloader configuration tables crucial to booting the
+platform, write access is disabled by default to reduce the chance of
+accidental bricking.
+
+To enable write access to /dev/mmcblkXbootY, disable the forced read-only
+access with:
+
+echo 0 > /sys/block/mmcblkXbootY/force_ro
+
+To re-enable read-only access:
+
+echo 1 > /sys/block/mmcblkXbootY/force_ro
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 3e9082b729ff..1e6bd910e79e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -76,9 +76,19 @@ struct mmc_blk_data {
 	spinlock_t	lock;
 	struct gendisk	*disk;
 	struct mmc_queue queue;
+	struct list_head part;
 
 	unsigned int	usage;
 	unsigned int	read_only;
+	unsigned int	part_type;
+
+	/*
+	 * Only set in main mmc_blk_data associated
+	 * with mmc_card with mmc_set_drvdata, and keeps
+	 * track of the current selected device partition.
+	 */
+	unsigned int	part_curr;
+	struct device_attribute force_ro;
 };
 
 static DEFINE_MUTEX(open_lock);
@@ -101,17 +111,22 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 	return md;
 }
 
+static inline int mmc_get_devidx(struct gendisk *disk)
+{
+	int devmaj = MAJOR(disk_devt(disk));
+	int devidx = MINOR(disk_devt(disk)) / perdev_minors;
+
+	if (!devmaj)
+		devidx = disk->first_minor / perdev_minors;
+	return devidx;
+}
+
 static void mmc_blk_put(struct mmc_blk_data *md)
 {
 	mutex_lock(&open_lock);
 	md->usage--;
 	if (md->usage == 0) {
-		int devmaj = MAJOR(disk_devt(md->disk));
-		int devidx = MINOR(disk_devt(md->disk)) / perdev_minors;
-
-		if (!devmaj)
-			devidx = md->disk->first_minor / perdev_minors;
-
+		int devidx = mmc_get_devidx(md->disk);
 		blk_cleanup_queue(md->queue.queue);
 
 		__clear_bit(devidx, dev_use);
@@ -122,6 +137,38 @@ static void mmc_blk_put(struct mmc_blk_data *md)
 	mutex_unlock(&open_lock);
 }
 
+static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	int ret;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+
+	ret = snprintf(buf, PAGE_SIZE, "%d",
+		       get_disk_ro(dev_to_disk(dev)) ^
+		       md->read_only);
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	int ret;
+	char *end;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	unsigned long set = simple_strtoul(buf, &end, 0);
+	if (end == buf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	set_disk_ro(dev_to_disk(dev), set || md->read_only);
+	ret = count;
+out:
+	mmc_blk_put(md);
+	return ret;
+}
+
 static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
 {
 	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
@@ -176,6 +223,29 @@ struct mmc_blk_request {
 	struct mmc_data		data;
 };
 
+static inline int mmc_blk_part_switch(struct mmc_card *card,
+				      struct mmc_blk_data *md)
+{
+	int ret;
+	struct mmc_blk_data *main_md = mmc_get_drvdata(card);
+	if (main_md->part_curr == md->part_type)
+		return 0;
+
+	if (mmc_card_mmc(card)) {
+		card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+		card->ext_csd.part_config |= md->part_type;
+
+		ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_PART_CONFIG, card->ext_csd.part_config,
+				 card->ext_csd.part_time);
+		if (ret)
+			return ret;
+}
+
+	main_md->part_curr = md->part_type;
+	return 0;
+}
+
 static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 {
 	int err;
@@ -620,6 +690,11 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 
 	mmc_claim_host(card->host);
+	ret = mmc_blk_part_switch(card, md);
+	if (ret) {
+		ret = 0;
+		goto out;
+	}
 
 	if (req->cmd_flags & REQ_DISCARD) {
 		if (req->cmd_flags & REQ_SECURE)
@@ -632,6 +707,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
+out:
 	mmc_release_host(card->host);
 	return ret;
 }
@@ -642,7 +718,11 @@ static inline int mmc_blk_readonly(struct mmc_card *card)
 	       !(card->csd.cmdclass & CCC_BLOCK_WRITE);
 }
 
-static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
+static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+					      struct device *parent,
+					      sector_t size,
+					      bool default_ro,
+					      const char *subname)
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
@@ -658,7 +738,6 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 		goto out;
 	}
 
-
 	/*
 	 * Set the read-only status based on the supported commands
 	 * and the write protect switch.
@@ -672,6 +751,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	}
 
 	spin_lock_init(&md->lock);
+	INIT_LIST_HEAD(&md->part);
 	md->usage = 1;
 
 	ret = mmc_init_queue(&md->queue, card, &md->lock);
@@ -686,8 +766,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	md->disk->fops = &mmc_bdops;
 	md->disk->private_data = md;
 	md->disk->queue = md->queue.queue;
-	md->disk->driverfs_dev = &card->dev;
-	set_disk_ro(md->disk, md->read_only);
+	md->disk->driverfs_dev = parent;
+	set_disk_ro(md->disk, md->read_only || default_ro);
 	if (REL_WRITES_SUPPORTED(card))
 		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
@@ -703,33 +783,97 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 	 * messages to tell when the card is present.
 	 */
 
-	snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
-		"mmcblk%d", devidx);
+	if (subname)
+		snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
+			 "mmcblk%d%s",
+			 mmc_get_devidx(dev_to_disk(parent)), subname);
+	else
+		snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
+			 "mmcblk%d", devidx);
 
 	blk_queue_logical_block_size(md->queue.queue, 512);
+	set_capacity(md->disk, size);
+	return md;
+
+ err_putdisk:
+	put_disk(md->disk);
+ err_kfree:
+	kfree(md);
+ out:
+	return ERR_PTR(ret);
+}
+
+static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
+{
+	sector_t size;
+	struct mmc_blk_data *md;
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
 		 * The EXT_CSD sector count is in number or 512 byte
 		 * sectors.
 		 */
-		set_capacity(md->disk, card->ext_csd.sectors);
+		size = card->ext_csd.sectors;
 	} else {
 		/*
 		 * The CSD capacity field is in units of read_blkbits.
 		 * set_capacity takes units of 512 bytes.
 		 */
-		set_capacity(md->disk,
-			card->csd.capacity << (card->csd.read_blkbits - 9));
+		size = card->csd.capacity << (card->csd.read_blkbits - 9);
 	}
+
+	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL);
 	return md;
+}
 
- err_putdisk:
-	put_disk(md->disk);
- err_kfree:
-	kfree(md);
- out:
-	return ERR_PTR(ret);
+static int mmc_blk_alloc_part(struct mmc_card *card,
+			      struct mmc_blk_data *md,
+			      unsigned int part_type,
+			      sector_t size,
+			      bool default_ro,
+			      const char *subname)
+{
+	char cap_str[10];
+	struct mmc_blk_data *part_md;
+
+	part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
+				    subname);
+	if (IS_ERR(part_md))
+		return PTR_ERR(part_md);
+	part_md->part_type = part_type;
+	list_add(&part_md->part, &md->part);
+
+	string_get_size((u64)get_capacity(part_md->disk) << 9, STRING_UNITS_2,
+			cap_str, sizeof(cap_str));
+	printk(KERN_INFO "%s: %s %s partition %u %s\n",
+	       part_md->disk->disk_name, mmc_card_id(card),
+	       mmc_card_name(card), part_md->part_type, cap_str);
+	return 0;
+}
+
+static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
+{
+	int ret = 0;
+
+	if (!mmc_card_mmc(card))
+		return 0;
+
+	if (card->ext_csd.boot_size) {
+		ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT0,
+					 card->ext_csd.boot_size >> 9,
+					 true,
+					 "boot0");
+		if (ret)
+			return ret;
+		ret = mmc_blk_alloc_part(card, md, EXT_CSD_PART_CONFIG_ACC_BOOT1,
+					 card->ext_csd.boot_size >> 9,
+					 true,
+					 "boot1");
+		if (ret)
+			return ret;
+	}
+
+	return ret;
 }
 
 static int
@@ -750,9 +894,54 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 	return 0;
 }
 
+static void mmc_blk_remove_req(struct mmc_blk_data *md)
+{
+	if (md) {
+		if (md->disk->flags & GENHD_FL_UP) {
+			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+
+			/* Stop new requests from getting into the queue */
+			del_gendisk(md->disk);
+		}
+
+		/* Then flush out any already in there */
+		mmc_cleanup_queue(&md->queue);
+		mmc_blk_put(md);
+	}
+}
+
+static void mmc_blk_remove_parts(struct mmc_card *card,
+				 struct mmc_blk_data *md)
+{
+	struct list_head *pos, *q;
+	struct mmc_blk_data *part_md;
+
+	list_for_each_safe(pos, q, &md->part) {
+		part_md = list_entry(pos, struct mmc_blk_data, part);
+		list_del(pos);
+		mmc_blk_remove_req(part_md);
+	}
+}
+
+static int mmc_add_disk(struct mmc_blk_data *md)
+{
+	int ret;
+
+	add_disk(md->disk);
+	md->force_ro.show = force_ro_show;
+	md->force_ro.store = force_ro_store;
+	md->force_ro.attr.name = "force_ro";
+	md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
+	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
+	if (ret)
+		del_gendisk(md->disk);
+
+	return ret;
+}
+
 static int mmc_blk_probe(struct mmc_card *card)
 {
-	struct mmc_blk_data *md;
+	struct mmc_blk_data *md, *part_md;
 	int err;
 	char cap_str[10];
 
@@ -776,14 +965,22 @@ static int mmc_blk_probe(struct mmc_card *card)
 		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
 		cap_str, md->read_only ? "(ro)" : "");
 
+	if (mmc_blk_alloc_parts(card, md))
+		goto out;
+
 	mmc_set_drvdata(card, md);
-	add_disk(md->disk);
+	if (mmc_add_disk(md))
+		goto out;
+
+	list_for_each_entry(part_md, &md->part, part) {
+		if (mmc_add_disk(part_md))
+			goto out;
+	}
 	return 0;
 
  out:
-	mmc_cleanup_queue(&md->queue);
-	mmc_blk_put(md);
-
+	mmc_blk_remove_parts(card, md);
+	mmc_blk_remove_req(md);
 	return err;
 }
 
@@ -791,36 +988,43 @@ static void mmc_blk_remove(struct mmc_card *card)
 {
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
-	if (md) {
-		/* Stop new requests from getting into the queue */
-		del_gendisk(md->disk);
-
-		/* Then flush out any already in there */
-		mmc_cleanup_queue(&md->queue);
-
-		mmc_blk_put(md);
-	}
+	mmc_blk_remove_parts(card, md);
+	mmc_blk_remove_req(md);
 	mmc_set_drvdata(card, NULL);
 }
 
 #ifdef CONFIG_PM
 static int mmc_blk_suspend(struct mmc_card *card, pm_message_t state)
 {
+	struct mmc_blk_data *part_md;
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
 		mmc_queue_suspend(&md->queue);
+		list_for_each_entry(part_md, &md->part, part) {
+			mmc_queue_suspend(&part_md->queue);
+		}
 	}
 	return 0;
 }
 
 static int mmc_blk_resume(struct mmc_card *card)
 {
+	struct mmc_blk_data *part_md;
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
 		mmc_blk_set_blksize(md, card);
+
+		/*
+		 * Resume involves the card going into idle state,
+		 * so current partition is always the main one.
+		 */
+		md->part_curr = md->part_type;
 		mmc_queue_resume(&md->queue);
+		list_for_each_entry(part_md, &md->part, part) {
+			mmc_queue_resume(&part_md->queue);
+		}
 	}
 	return 0;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 396cb23625d2..a2c795e8f9dd 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -288,7 +288,10 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
-		card->ext_csd.bootconfig = ext_csd[EXT_CSD_BOOT_CONFIG];
+		card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
+
+		/* EXT_CSD value is in units of 10ms, but we store in ms */
+		card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
 
 		/* Sleep / awake timeout in 100ns units */
 		if (sa_shift > 0 && sa_shift <= 0x17)
@@ -302,6 +305,12 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] << 10;
 
 		card->ext_csd.rel_sectors = ext_csd[EXT_CSD_REL_WR_SEC_C];
+
+		/*
+		 * There are two boot regions of equal size, defined in
+		 * multiples of 128K.
+		 */
+		card->ext_csd.boot_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 	}
 
 	if (card->ext_csd.rev >= 4) {
@@ -576,10 +585,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Ensure eMMC user default partition is enabled
 	 */
-	if (card->ext_csd.bootconfig & 0x7) {
-		card->ext_csd.bootconfig &= ~0x7;
-		mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_CONFIG,
-			   card->ext_csd.bootconfig, 0);
+	if (card->ext_csd.part_config & EXT_CSD_PART_CONFIG_ACC_MASK) {
+		card->ext_csd.part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_PART_CONFIG,
+				 card->ext_csd.part_config,
+				 card->ext_csd.part_time);
+		if (err && err != -EBADMSG)
+			goto free_card;
 	}
 
 	/*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 937f852cf01e..0c7a58b14343 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -48,7 +48,8 @@ struct mmc_ext_csd {
 	u8			sec_feature_support;
 	u8			rel_sectors;
 	u8			rel_param;
-	u8			bootconfig;
+	u8			part_config;
+	unsigned int		part_time;		/* Units: ms */
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
@@ -61,6 +62,7 @@ struct mmc_ext_csd {
 	bool			enhanced_area_en;	/* enable bit */
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
+	unsigned int		boot_size;		/* in bytes */
 };
 
 struct sd_scr {
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 390aa6eef676..373b2bf5e5b5 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -257,19 +257,21 @@ struct _mmc_csd {
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
-#define EXT_CSD_BOOT_CONFIG		179	/* R/W */
+#define EXT_CSD_PART_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
 #define EXT_CSD_BUS_WIDTH		183	/* R/W */
 #define EXT_CSD_HS_TIMING		185	/* R/W */
 #define EXT_CSD_REV			192	/* RO */
 #define EXT_CSD_STRUCTURE		194	/* RO */
 #define EXT_CSD_CARD_TYPE		196	/* RO */
+#define EXT_CSD_PART_SWITCH_TIME        199     /* RO */
 #define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
 #define EXT_CSD_S_A_TIMEOUT		217	/* RO */
 #define EXT_CSD_REL_WR_SEC_C		222	/* RO */
 #define EXT_CSD_HC_WP_GRP_SIZE		221	/* RO */
 #define EXT_CSD_ERASE_TIMEOUT_MULT	223	/* RO */
 #define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
+#define EXT_CSD_BOOT_MULT		226	/* RO */
 #define EXT_CSD_SEC_TRIM_MULT		229	/* RO */
 #define EXT_CSD_SEC_ERASE_MULT		230	/* RO */
 #define EXT_CSD_SEC_FEATURE_SUPPORT	231	/* RO */
@@ -281,6 +283,10 @@ struct _mmc_csd {
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
 
+#define EXT_CSD_PART_CONFIG_ACC_MASK	(0x7)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT0	(0x1)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT1	(0x2)
+
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
-- 
cgit v1.2.3


From 6a7a6b45f454686a1549729bfbae31f0b3b595d6 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Tue, 12 Apr 2011 15:06:53 -0500
Subject: mmc: quirks: Fix erase/trim for certain SanDisk cards.

CMD38 argument is passed through EXT_CSD[113].

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mmc/card.h |  1 +
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 288d27394ef9..c20995323348 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -48,6 +48,13 @@ MODULE_ALIAS("mmc:block");
 #endif
 #define MODULE_PARAM_PREFIX "mmcblk."
 
+#define INAND_CMD38_ARG_EXT_CSD  113
+#define INAND_CMD38_ARG_ERASE    0x00
+#define INAND_CMD38_ARG_TRIM     0x01
+#define INAND_CMD38_ARG_SECERASE 0x80
+#define INAND_CMD38_ARG_SECTRIM1 0x81
+#define INAND_CMD38_ARG_SECTRIM2 0x88
+
 #define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
     (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
      ((card)->ext_csd.rel_sectors)))
@@ -356,6 +363,16 @@ static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	else
 		arg = MMC_ERASE_ARG;
 
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 INAND_CMD38_ARG_EXT_CSD,
+				 arg == MMC_TRIM_ARG ?
+				 INAND_CMD38_ARG_TRIM :
+				 INAND_CMD38_ARG_ERASE,
+				 0);
+		if (err)
+			goto out;
+	}
 	err = mmc_erase(card, from, nr, arg);
 out:
 	spin_lock_irq(&md->lock);
@@ -386,9 +403,28 @@ static int mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	else
 		arg = MMC_SECURE_ERASE_ARG;
 
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 INAND_CMD38_ARG_EXT_CSD,
+				 arg == MMC_SECURE_TRIM1_ARG ?
+				 INAND_CMD38_ARG_SECTRIM1 :
+				 INAND_CMD38_ARG_SECERASE,
+				 0);
+		if (err)
+			goto out;
+	}
 	err = mmc_erase(card, from, nr, arg);
-	if (!err && arg == MMC_SECURE_TRIM1_ARG)
+	if (!err && arg == MMC_SECURE_TRIM1_ARG) {
+		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 INAND_CMD38_ARG_EXT_CSD,
+					 INAND_CMD38_ARG_SECTRIM2,
+					 0);
+			if (err)
+				goto out;
+		}
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
+	}
 out:
 	spin_lock_irq(&md->lock);
 	__blk_end_request(req, err, blk_rq_bytes(req));
@@ -941,6 +977,11 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 
 static const struct mmc_fixup blk_fixups[] =
 {
+	MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM04G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0c7a58b14343..72a98681ef47 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -133,6 +133,7 @@ struct mmc_card {
 #define MMC_QUIRK_BROKEN_CLK_GATING (1<<3)	/* clock gating the sdio bus will make card fail */
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 #define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
+#define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
-- 
cgit v1.2.3


From 82b0e23a295cc58d1290017ee97a40956ad68d94 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2011 20:26:38 +0200
Subject: mmc: sdhci: Fix read-only detection with JMicron 388 chip

On HP laptops with JMicron 388 chip, the write-locked SD card isn't
detected correctly as read-only in many cases.  This is because the
PRESENT_STATE register becomes unsable just after plugging, and it
returns the WRITE_PROTECT bit wrongly at the first read.

This patch fixes the read-only detection by adding a new sdhci quirk
indicating to check the register more intensively with a relatively
long delay.

The patch is tested with 2.6.39-rc4 kernel.

Cc: Aries Lee <arieslee@jmicron.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c |  5 +++++
 drivers/mmc/host/sdhci.c     | 28 ++++++++++++++++++++++++----
 include/linux/mmc/sdhci.h    |  2 ++
 3 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 82c5dc412679..936bbca19c0a 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -327,6 +327,11 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
 		return ret;
 	}
 
+	/* quirk for unsable RO-detection on JM388 chips */
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
+		chip->quirks |= SDHCI_QUIRK_UNSTABLE_RO_DETECT;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a70a3d1ef35a..e5cfe70dc23b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1256,14 +1256,11 @@ out:
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
-static int sdhci_get_ro(struct mmc_host *mmc)
+static int check_ro(struct sdhci_host *host)
 {
-	struct sdhci_host *host;
 	unsigned long flags;
 	int is_readonly;
 
-	host = mmc_priv(mmc);
-
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD)
@@ -1281,6 +1278,29 @@ static int sdhci_get_ro(struct mmc_host *mmc)
 		!is_readonly : is_readonly;
 }
 
+#define SAMPLE_COUNT	5
+
+static int sdhci_get_ro(struct mmc_host *mmc)
+{
+	struct sdhci_host *host;
+	int i, ro_count;
+
+	host = mmc_priv(mmc);
+
+	if (!(host->quirks & SDHCI_QUIRK_UNSTABLE_RO_DETECT))
+		return check_ro(host);
+
+	ro_count = 0;
+	for (i = 0; i < SAMPLE_COUNT; i++) {
+		if (check_ro(host)) {
+			if (++ro_count > SAMPLE_COUNT / 2)
+				return 1;
+		}
+		msleep(30);
+	}
+	return 0;
+}
+
 static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct sdhci_host *host;
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 83bd9f76709a..92e1c9ad126c 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -85,6 +85,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
 /* Controller treats ADMA descriptors with length 0000h incorrectly */
 #define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC		(1<<30)
+/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
+#define SDHCI_QUIRK_UNSTABLE_RO_DETECT			(1<<31)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From cb87ea28ed9e75a41eb456bfcb547b4e6f10e750 Mon Sep 17 00:00:00 2001
From: John Calixto <john.calixto@modsystems.com>
Date: Tue, 26 Apr 2011 18:56:29 -0400
Subject: mmc: core: Add mmc CMD+ACMD passthrough ioctl

Allows appropriately-privileged applications to send CMD (normal) and ACMD
(application-specific; preceded with CMD55) commands to cards/devices on
the mmc bus.  This is primarily useful for enabling the security
functionality built in to every SD card.

It can also be used as a generic passthrough (e.g. to enable virtual
machines to control mmc bus devices directly).  However, this use case has
not been tested rigorously.  Generic passthrough testing was only conducted
for a few non-security opcodes to prove the feasibility of the passthrough.

Since any opcode can be sent using this passthrough, it is very possible to
render the card/device unusable.  Applications that use this ioctl must
have CAP_SYS_RAWIO.

Security commands tested on TI PCIxx12 (SDHCI), Sigma Designs SMP8652 SoC,
TI OMAP3621/OMAP3630 SoC, Samsung S5PC110 SoC, Qualcomm MSM7200A SoC.

Signed-off-by: John Calixto <john.calixto@modsystems.com>
Reviewed-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/ioctl/ioctl-number.txt |   1 +
 drivers/mmc/card/block.c             | 201 +++++++++++++++++++++++++++++++++++
 drivers/mmc/core/sd_ops.c            |   3 +-
 include/linux/Kbuild                 |   1 +
 include/linux/mmc/Kbuild             |   1 +
 include/linux/mmc/core.h             |   1 +
 include/linux/mmc/ioctl.h            |  54 ++++++++++
 7 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/mmc/Kbuild
 create mode 100644 include/linux/mmc/ioctl.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index a0a5d82b6b0b..2a34d822e6d6 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -304,6 +304,7 @@ Code  Seq#(hex)	Include File		Comments
 0xB0	all	RATIO devices		in development:
 					<mailto:vgo@ratio.de>
 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
+0xB3	00	linux/mmc/ioctl.h
 0xC0	00-0F	linux/usb/iowarrior.h
 0xCB	00-1F	CBM serial IEC bus	in development:
 					<mailto:michael.klein@puffin.lb.shuttle.de>
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e7efd6faeaf9..407836d55712 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -31,7 +31,11 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
 
+#include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
@@ -218,11 +222,208 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+struct mmc_blk_ioc_data {
+	struct mmc_ioc_cmd ic;
+	unsigned char *buf;
+	u64 buf_bytes;
+};
+
+static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
+	struct mmc_ioc_cmd __user *user)
+{
+	struct mmc_blk_ioc_data *idata;
+	int err;
+
+	idata = kzalloc(sizeof(*idata), GFP_KERNEL);
+	if (!idata) {
+		err = -ENOMEM;
+		goto copy_err;
+	}
+
+	if (copy_from_user(&idata->ic, user, sizeof(idata->ic))) {
+		err = -EFAULT;
+		goto copy_err;
+	}
+
+	idata->buf_bytes = (u64) idata->ic.blksz * idata->ic.blocks;
+	if (idata->buf_bytes > MMC_IOC_MAX_BYTES) {
+		err = -EOVERFLOW;
+		goto copy_err;
+	}
+
+	idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL);
+	if (!idata->buf) {
+		err = -ENOMEM;
+		goto copy_err;
+	}
+
+	if (copy_from_user(idata->buf, (void __user *)(unsigned long)
+					idata->ic.data_ptr, idata->buf_bytes)) {
+		err = -EFAULT;
+		goto copy_err;
+	}
+
+	return idata;
+
+copy_err:
+	kfree(idata->buf);
+	kfree(idata);
+	return ERR_PTR(err);
+
+}
+
+static int mmc_blk_ioctl_cmd(struct block_device *bdev,
+	struct mmc_ioc_cmd __user *ic_ptr)
+{
+	struct mmc_blk_ioc_data *idata;
+	struct mmc_blk_data *md;
+	struct mmc_card *card;
+	struct mmc_command cmd = {0};
+	struct mmc_data data = {0};
+	struct mmc_request mrq = {0};
+	struct scatterlist sg;
+	int err;
+
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
+	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
+	if (IS_ERR(idata))
+		return PTR_ERR(idata);
+
+	cmd.opcode = idata->ic.opcode;
+	cmd.arg = idata->ic.arg;
+	cmd.flags = idata->ic.flags;
+
+	data.sg = &sg;
+	data.sg_len = 1;
+	data.blksz = idata->ic.blksz;
+	data.blocks = idata->ic.blocks;
+
+	sg_init_one(data.sg, idata->buf, idata->buf_bytes);
+
+	if (idata->ic.write_flag)
+		data.flags = MMC_DATA_WRITE;
+	else
+		data.flags = MMC_DATA_READ;
+
+	mrq.cmd = &cmd;
+	mrq.data = &data;
+
+	md = mmc_blk_get(bdev->bd_disk);
+	if (!md) {
+		err = -EINVAL;
+		goto cmd_done;
+	}
+
+	card = md->queue.card;
+	if (IS_ERR(card)) {
+		err = PTR_ERR(card);
+		goto cmd_done;
+	}
+
+	mmc_claim_host(card->host);
+
+	if (idata->ic.is_acmd) {
+		err = mmc_app_cmd(card->host, card);
+		if (err)
+			goto cmd_rel_host;
+	}
+
+	/* data.flags must already be set before doing this. */
+	mmc_set_data_timeout(&data, card);
+	/* Allow overriding the timeout_ns for empirical tuning. */
+	if (idata->ic.data_timeout_ns)
+		data.timeout_ns = idata->ic.data_timeout_ns;
+
+	if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
+		/*
+		 * Pretend this is a data transfer and rely on the host driver
+		 * to compute timeout.  When all host drivers support
+		 * cmd.cmd_timeout for R1B, this can be changed to:
+		 *
+		 *     mrq.data = NULL;
+		 *     cmd.cmd_timeout = idata->ic.cmd_timeout_ms;
+		 */
+		data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000;
+	}
+
+	mmc_wait_for_req(card->host, &mrq);
+
+	if (cmd.error) {
+		dev_err(mmc_dev(card->host), "%s: cmd error %d\n",
+						__func__, cmd.error);
+		err = cmd.error;
+		goto cmd_rel_host;
+	}
+	if (data.error) {
+		dev_err(mmc_dev(card->host), "%s: data error %d\n",
+						__func__, data.error);
+		err = data.error;
+		goto cmd_rel_host;
+	}
+
+	/*
+	 * According to the SD specs, some commands require a delay after
+	 * issuing the command.
+	 */
+	if (idata->ic.postsleep_min_us)
+		usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us);
+
+	if (copy_to_user(&(ic_ptr->response), cmd.resp, sizeof(cmd.resp))) {
+		err = -EFAULT;
+		goto cmd_rel_host;
+	}
+
+	if (!idata->ic.write_flag) {
+		if (copy_to_user((void __user *)(unsigned long) idata->ic.data_ptr,
+						idata->buf, idata->buf_bytes)) {
+			err = -EFAULT;
+			goto cmd_rel_host;
+		}
+	}
+
+cmd_rel_host:
+	mmc_release_host(card->host);
+
+cmd_done:
+	mmc_blk_put(md);
+	kfree(idata->buf);
+	kfree(idata);
+	return err;
+}
+
+static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	int ret = -EINVAL;
+	if (cmd == MMC_IOC_CMD)
+		ret = mmc_blk_ioctl_cmd(bdev, (struct mmc_ioc_cmd __user *)arg);
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	return mmc_blk_ioctl(bdev, mode, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
 static const struct block_device_operations mmc_bdops = {
 	.open			= mmc_blk_open,
 	.release		= mmc_blk_release,
 	.getgeo			= mmc_blk_getgeo,
 	.owner			= THIS_MODULE,
+	.ioctl			= mmc_blk_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl		= mmc_blk_compat_ioctl,
+#endif
 };
 
 struct mmc_blk_request {
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index a206aea5360d..021fed153804 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -21,7 +21,7 @@
 #include "core.h"
 #include "sd_ops.h"
 
-static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
+int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
 {
 	int err;
 	struct mmc_command cmd = {0};
@@ -49,6 +49,7 @@ static int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mmc_app_cmd);
 
 /**
  *	mmc_wait_for_app_cmd - start an application command and wait for
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 75cf611641e6..ed38527599ee 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -4,6 +4,7 @@ header-y += caif/
 header-y += dvb/
 header-y += hdlc/
 header-y += isdn/
+header-y += mmc/
 header-y += nfsd/
 header-y += raid/
 header-y += spi/
diff --git a/include/linux/mmc/Kbuild b/include/linux/mmc/Kbuild
new file mode 100644
index 000000000000..1fb26448faa9
--- /dev/null
+++ b/include/linux/mmc/Kbuild
@@ -0,0 +1 @@
+header-y += ioctl.h
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index f8e4bcbd2846..cbe8d55f64c4 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -133,6 +133,7 @@ struct mmc_card;
 
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
+extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h
new file mode 100644
index 000000000000..5baf2983a12f
--- /dev/null
+++ b/include/linux/mmc/ioctl.h
@@ -0,0 +1,54 @@
+#ifndef LINUX_MMC_IOCTL_H
+#define LINUX_MMC_IOCTL_H
+struct mmc_ioc_cmd {
+	/* Implies direction of data.  true = write, false = read */
+	int write_flag;
+
+	/* Application-specific command.  true = precede with CMD55 */
+	int is_acmd;
+
+	__u32 opcode;
+	__u32 arg;
+	__u32 response[4];  /* CMD response */
+	unsigned int flags;
+	unsigned int blksz;
+	unsigned int blocks;
+
+	/*
+	 * Sleep at least postsleep_min_us useconds, and at most
+	 * postsleep_max_us useconds *after* issuing command.  Needed for
+	 * some read commands for which cards have no other way of indicating
+	 * they're ready for the next command (i.e. there is no equivalent of
+	 * a "busy" indicator for read operations).
+	 */
+	unsigned int postsleep_min_us;
+	unsigned int postsleep_max_us;
+
+	/*
+	 * Override driver-computed timeouts.  Note the difference in units!
+	 */
+	unsigned int data_timeout_ns;
+	unsigned int cmd_timeout_ms;
+
+	/*
+	 * For 64-bit machines, the next member, ``__u64 data_ptr``, wants to
+	 * be 8-byte aligned.  Make sure this struct is the same size when
+	 * built for 32-bit.
+	 */
+	__u32 __pad;
+
+	/* DAT buffer */
+	__u64 data_ptr;
+};
+#define mmc_ioc_cmd_set_data(ic, ptr) ic.data_ptr = (__u64)(unsigned long) ptr
+
+#define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd)
+
+/*
+ * Since this ioctl is only meant to enhance (and not replace) normal access
+ * to the mmc bus device, an upper data transfer limit of MMC_IOC_MAX_BYTES
+ * is enforced per ioctl call.  For larger data transfers, use the normal
+ * block device operations.
+ */
+#define MMC_IOC_MAX_BYTES  (512L * 256)
+#endif  /* LINUX_MMC_IOCTL_H */
-- 
cgit v1.2.3


From f2119df6b764609af4baceb68caf1e848c1c8aa7 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:57 +0530
Subject: mmc: sd: add support for signal voltage switch procedure

Host Controller v3.00 adds another Capabilities register. Apart
from other things, this new register indicates whether the Host
Controller supports SDR50, SDR104, and DDR50 UHS-I modes. The spec
doesn't mention about explicit support for SDR12 and SDR25 UHS-I
modes, so the Host Controller v3.00 should support them by default.
Also if the controller supports SDR104 mode, it will also support
SDR50 mode as well. So depending on the host support, we set the
corresponding MMC_CAP_* flags. One more new register. Host Control2
is added in v3.00, which is used during Signal Voltage Switch
procedure described below.

Since as per v3.00 spec, UHS-I supported hosts should set S18R
to 1, we set S18R (bit 24) of OCR before sending ACMD41. We also
need to set XPC (bit 28) of OCR in case the host can supply >150mA.
This support is indicated by the Maximum Current Capabilities
register of the Host Controller.

If the response of ACMD41 has both CCS and S18A set, we start the
signal voltage switch procedure, which if successfull, will switch
the card from 3.3V signalling to 1.8V signalling. Signal voltage
switch procedure adds support for a new command CMD11 in the
Physical Layer Spec v3.01. As part of this procedure, we need to
set 1.8V Signalling Enable (bit 3) of Host Control2 register, which
if remains set after 5ms, means the switch to 1.8V signalling is
successfull. Otherwise, we clear bit 24 of OCR and retry the
initialization sequence. When we remove the card, and insert the
same or another card, we need to make sure that we start with 3.3V
signalling voltage. So we call mmc_set_signal_voltage() with
MMC_SIGNAL_VOLTAGE_330 set so that we are back to 3.3V signalling
voltage before we actually start initializing the card.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |  32 ++++++++
 drivers/mmc/core/core.h  |   1 +
 drivers/mmc/core/sd.c    |  36 ++++++++-
 drivers/mmc/host/sdhci.c | 192 +++++++++++++++++++++++++++++++++++++++++++----
 drivers/mmc/host/sdhci.h |  18 ++++-
 include/linux/mmc/host.h |  15 ++++
 include/linux/mmc/sd.h   |   7 ++
 7 files changed, 284 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 1dbc18576219..5005a6323165 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -942,6 +942,38 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
 	return ocr;
 }
 
+int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage)
+{
+	struct mmc_command cmd = {0};
+	int err = 0;
+
+	BUG_ON(!host);
+
+	/*
+	 * Send CMD11 only if the request is to switch the card to
+	 * 1.8V signalling.
+	 */
+	if (signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
+		cmd.opcode = SD_SWITCH_VOLTAGE;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
+
+		err = mmc_wait_for_cmd(host, &cmd, 0);
+		if (err)
+			return err;
+
+		if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR))
+			return -EIO;
+	}
+
+	host->ios.signal_voltage = signal_voltage;
+
+	if (host->ops->start_signal_voltage_switch)
+		err = host->ops->start_signal_voltage_switch(host, &host->ios);
+
+	return err;
+}
+
 /*
  * Select timing parameters for host.
  */
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index ca1fdde29df6..7745dea430b8 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -41,6 +41,7 @@ void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
 void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
 			   unsigned int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
+int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
 
 static inline void mmc_delay(unsigned int ms)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6dac89fe0535..b0cd285d272a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -403,6 +403,7 @@ struct device_type sd_type = {
 int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 {
 	int err;
+	u32 rocr;
 
 	/*
 	 * Since we're changing the OCR value, we seem to
@@ -420,12 +421,38 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 	 */
 	err = mmc_send_if_cond(host, ocr);
 	if (!err)
-		ocr |= 1 << 30;
+		ocr |= SD_OCR_CCS;
 
-	err = mmc_send_app_op_cond(host, ocr, NULL);
+	/*
+	 * If the host supports one of UHS-I modes, request the card
+	 * to switch to 1.8V signaling level.
+	 */
+	if (host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))
+		ocr |= SD_OCR_S18R;
+
+	/* If the host can supply more than 150mA, XPC should be set to 1. */
+	if (host->caps & (MMC_CAP_SET_XPC_330 | MMC_CAP_SET_XPC_300 |
+	    MMC_CAP_SET_XPC_180))
+		ocr |= SD_OCR_XPC;
+
+try_again:
+	err = mmc_send_app_op_cond(host, ocr, &rocr);
 	if (err)
 		return err;
 
+	/*
+	 * In case CCS and S18A in the response is set, start Signal Voltage
+	 * Switch procedure. SPI mode doesn't support CMD11.
+	 */
+	if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) {
+		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+		if (err) {
+			ocr &= ~SD_OCR_S18R;
+			goto try_again;
+		}
+	}
+
 	if (mmc_host_is_spi(host))
 		err = mmc_send_cid(host, cid);
 	else
@@ -773,6 +800,11 @@ int mmc_attach_sd(struct mmc_host *host)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
+	/* Make sure we are at 3.3V signalling voltage */
+	err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330);
+	if (err)
+		return err;
+
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
 		return err;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e5cfe70dc23b..4e2031449a23 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -83,6 +83,8 @@ static void sdhci_dumpregs(struct sdhci_host *host)
 	printk(KERN_DEBUG DRIVER_NAME ": Cmd:      0x%08x | Max curr: 0x%08x\n",
 		sdhci_readw(host, SDHCI_COMMAND),
 		sdhci_readl(host, SDHCI_MAX_CURRENT));
+	printk(KERN_DEBUG DRIVER_NAME ": Host ctl2: 0x%08x\n",
+		sdhci_readw(host, SDHCI_HOST_CONTROL2));
 
 	if (host->flags & SDHCI_USE_ADMA)
 		printk(KERN_DEBUG DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
@@ -1323,11 +1325,114 @@ out:
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+	struct mmc_ios *ios)
+{
+	struct sdhci_host *host;
+	u8 pwr;
+	u16 clk, ctrl;
+	u32 present_state;
+
+	host = mmc_priv(mmc);
+
+	/*
+	 * Signal Voltage Switching is only applicable for Host Controllers
+	 * v3.00 and above.
+	 */
+	if (host->version < SDHCI_SPEC_300)
+		return 0;
+
+	/*
+	 * We first check whether the request is to set signalling voltage
+	 * to 3.3V. If so, we change the voltage to 3.3V and return quickly.
+	 */
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		/* Set 1.8V Signal Enable in the Host Control2 register to 0 */
+		ctrl &= ~SDHCI_CTRL_VDD_180;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+		/* Wait for 5ms */
+		usleep_range(5000, 5500);
+
+		/* 3.3V regulator output should be stable within 5 ms */
+		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		if (!(ctrl & SDHCI_CTRL_VDD_180))
+			return 0;
+		else {
+			printk(KERN_INFO DRIVER_NAME ": Switching to 3.3V "
+				"signalling voltage failed\n");
+			return -EIO;
+		}
+	} else if (!(ctrl & SDHCI_CTRL_VDD_180) &&
+		  (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)) {
+		/* Stop SDCLK */
+		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		clk &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+		/* Check whether DAT[3:0] is 0000 */
+		present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
+		if (!((present_state & SDHCI_DATA_LVL_MASK) >>
+		       SDHCI_DATA_LVL_SHIFT)) {
+			/*
+			 * Enable 1.8V Signal Enable in the Host Control2
+			 * register
+			 */
+			ctrl |= SDHCI_CTRL_VDD_180;
+			sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+			/* Wait for 5ms */
+			usleep_range(5000, 5500);
+
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			if (ctrl & SDHCI_CTRL_VDD_180) {
+				/* Provide SDCLK again and wait for 1ms*/
+				clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+				clk |= SDHCI_CLOCK_CARD_EN;
+				sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+				usleep_range(1000, 1500);
+
+				/*
+				 * If DAT[3:0] level is 1111b, then the card
+				 * was successfully switched to 1.8V signaling.
+				 */
+				present_state = sdhci_readl(host,
+							SDHCI_PRESENT_STATE);
+				if ((present_state & SDHCI_DATA_LVL_MASK) ==
+				     SDHCI_DATA_LVL_MASK)
+					return 0;
+			}
+		}
+
+		/*
+		 * If we are here, that means the switch to 1.8V signaling
+		 * failed. We power cycle the card, and retry initialization
+		 * sequence by setting S18R to 0.
+		 */
+		pwr = sdhci_readb(host, SDHCI_POWER_CONTROL);
+		pwr &= ~SDHCI_POWER_ON;
+		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+
+		/* Wait for 1ms as per the spec */
+		usleep_range(1000, 1500);
+		pwr |= SDHCI_POWER_ON;
+		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+
+		printk(KERN_INFO DRIVER_NAME ": Switching to 1.8V signalling "
+			"voltage failed, retrying with S18R set to 0\n");
+		return -EAGAIN;
+	} else
+		/* No signal voltage switch required */
+		return 0;
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
 	.get_ro		= sdhci_get_ro,
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
+	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 };
 
 /*****************************************************************************\
@@ -1808,7 +1913,9 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host);
 int sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
-	unsigned int caps, ocr_avail;
+	u32 caps[2];
+	u32 max_current_caps;
+	unsigned int ocr_avail;
 	int ret;
 
 	WARN_ON(host == NULL);
@@ -1831,12 +1938,15 @@ int sdhci_add_host(struct sdhci_host *host)
 			host->version);
 	}
 
-	caps = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
+	caps[0] = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
 		sdhci_readl(host, SDHCI_CAPABILITIES);
 
+	caps[1] = (host->version >= SDHCI_SPEC_300) ?
+		sdhci_readl(host, SDHCI_CAPABILITIES_1) : 0;
+
 	if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
 		host->flags |= SDHCI_USE_SDMA;
-	else if (!(caps & SDHCI_CAN_DO_SDMA))
+	else if (!(caps[0] & SDHCI_CAN_DO_SDMA))
 		DBG("Controller doesn't have SDMA capability\n");
 	else
 		host->flags |= SDHCI_USE_SDMA;
@@ -1847,7 +1957,8 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_SDMA;
 	}
 
-	if ((host->version >= SDHCI_SPEC_200) && (caps & SDHCI_CAN_DO_ADMA2))
+	if ((host->version >= SDHCI_SPEC_200) &&
+		(caps[0] & SDHCI_CAN_DO_ADMA2))
 		host->flags |= SDHCI_USE_ADMA;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) &&
@@ -1897,10 +2008,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	}
 
 	if (host->version >= SDHCI_SPEC_300)
-		host->max_clk = (caps & SDHCI_CLOCK_V3_BASE_MASK)
+		host->max_clk = (caps[0] & SDHCI_CLOCK_V3_BASE_MASK)
 			>> SDHCI_CLOCK_BASE_SHIFT;
 	else
-		host->max_clk = (caps & SDHCI_CLOCK_BASE_MASK)
+		host->max_clk = (caps[0] & SDHCI_CLOCK_BASE_MASK)
 			>> SDHCI_CLOCK_BASE_SHIFT;
 
 	host->max_clk *= 1000000;
@@ -1916,7 +2027,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	}
 
 	host->timeout_clk =
-		(caps & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
+		(caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
 	if (host->timeout_clk == 0) {
 		if (host->ops->get_timeout_clock) {
 			host->timeout_clk = host->ops->get_timeout_clock(host);
@@ -1928,7 +2039,7 @@ int sdhci_add_host(struct sdhci_host *host)
 			return -ENODEV;
 		}
 	}
-	if (caps & SDHCI_TIMEOUT_CLK_UNIT)
+	if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 		host->timeout_clk *= 1000;
 
 	/*
@@ -1955,21 +2066,76 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
-	if (caps & SDHCI_CAN_DO_HISPD)
+	if (caps[0] & SDHCI_CAN_DO_HISPD)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
 	    mmc_card_is_removable(mmc))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	/* UHS-I mode(s) supported by the host controller. */
+	if (host->version >= SDHCI_SPEC_300)
+		mmc->caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25;
+
+	/* SDR104 supports also implies SDR50 support */
+	if (caps[1] & SDHCI_SUPPORT_SDR104)
+		mmc->caps |= MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50;
+	else if (caps[1] & SDHCI_SUPPORT_SDR50)
+		mmc->caps |= MMC_CAP_UHS_SDR50;
+
+	if (caps[1] & SDHCI_SUPPORT_DDR50)
+		mmc->caps |= MMC_CAP_UHS_DDR50;
+
 	ocr_avail = 0;
-	if (caps & SDHCI_CAN_VDD_330)
+	/*
+	 * According to SD Host Controller spec v3.00, if the Host System
+	 * can afford more than 150mA, Host Driver should set XPC to 1. Also
+	 * the value is meaningful only if Voltage Support in the Capabilities
+	 * register is set. The actual current value is 4 times the register
+	 * value.
+	 */
+	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
+
+	if (caps[0] & SDHCI_CAN_VDD_330) {
+		int max_current_330;
+
 		ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
-	if (caps & SDHCI_CAN_VDD_300)
+
+		max_current_330 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_330_MASK) >>
+				   SDHCI_MAX_CURRENT_330_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_330 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_330;
+	}
+	if (caps[0] & SDHCI_CAN_VDD_300) {
+		int max_current_300;
+
 		ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
-	if (caps & SDHCI_CAN_VDD_180)
+
+		max_current_300 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_300_MASK) >>
+				   SDHCI_MAX_CURRENT_300_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_300 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_300;
+	}
+	if (caps[0] & SDHCI_CAN_VDD_180) {
+		int max_current_180;
+
 		ocr_avail |= MMC_VDD_165_195;
 
+		max_current_180 = ((max_current_caps &
+				   SDHCI_MAX_CURRENT_180_MASK) >>
+				   SDHCI_MAX_CURRENT_180_SHIFT) *
+				   SDHCI_MAX_CURRENT_MULTIPLIER;
+
+		if (max_current_180 > 150)
+			mmc->caps |= MMC_CAP_SET_XPC_180;
+	}
+
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
 	if (host->ocr_avail_sdio)
@@ -2029,7 +2195,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (host->quirks & SDHCI_QUIRK_FORCE_BLK_SZ_2048) {
 		mmc->max_blk_size = 2;
 	} else {
-		mmc->max_blk_size = (caps & SDHCI_MAX_BLOCK_MASK) >>
+		mmc->max_blk_size = (caps[0] & SDHCI_MAX_BLOCK_MASK) >>
 				SDHCI_MAX_BLOCK_SHIFT;
 		if (mmc->max_blk_size >= 3) {
 			printk(KERN_WARNING "%s: Invalid maximum block size, "
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c6e25a76d269..5cba2fea46e0 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -68,6 +68,8 @@
 #define  SDHCI_DATA_AVAILABLE	0x00000800
 #define  SDHCI_CARD_PRESENT	0x00010000
 #define  SDHCI_WRITE_PROTECT	0x00080000
+#define  SDHCI_DATA_LVL_MASK	0x00F00000
+#define   SDHCI_DATA_LVL_SHIFT	20
 
 #define SDHCI_HOST_CONTROL 	0x28
 #define  SDHCI_CTRL_LED		0x01
@@ -146,7 +148,8 @@
 
 #define SDHCI_ACMD12_ERR	0x3C
 
-/* 3E-3F reserved */
+#define SDHCI_HOST_CONTROL2		0x3E
+#define  SDHCI_CTRL_VDD_180		0x0008
 
 #define SDHCI_CAPABILITIES	0x40
 #define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
@@ -167,9 +170,20 @@
 #define  SDHCI_CAN_VDD_180	0x04000000
 #define  SDHCI_CAN_64BIT	0x10000000
 
+#define  SDHCI_SUPPORT_SDR50	0x00000001
+#define  SDHCI_SUPPORT_SDR104	0x00000002
+#define  SDHCI_SUPPORT_DDR50	0x00000004
+
 #define SDHCI_CAPABILITIES_1	0x44
 
-#define SDHCI_MAX_CURRENT	0x48
+#define SDHCI_MAX_CURRENT		0x48
+#define  SDHCI_MAX_CURRENT_330_MASK	0x0000FF
+#define  SDHCI_MAX_CURRENT_330_SHIFT	0
+#define  SDHCI_MAX_CURRENT_300_MASK	0x00FF00
+#define  SDHCI_MAX_CURRENT_300_SHIFT	8
+#define  SDHCI_MAX_CURRENT_180_MASK	0xFF0000
+#define  SDHCI_MAX_CURRENT_180_SHIFT	16
+#define   SDHCI_MAX_CURRENT_MULTIPLIER	4
 
 /* 4C-4F reserved for more max current */
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0fffa5cdc183..bde5a0b1c47e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -56,6 +56,11 @@ struct mmc_ios {
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
 #define MMC_1_8V_DDR_MODE	2
+
+	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
+
+#define MMC_SIGNAL_VOLTAGE_330	0
+#define MMC_SIGNAL_VOLTAGE_180	1
 };
 
 struct mmc_host_ops {
@@ -117,6 +122,8 @@ struct mmc_host_ops {
 
 	/* optional callback for HC quirks */
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
+
+	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 };
 
 struct mmc_card;
@@ -173,6 +180,14 @@ struct mmc_host {
 						/* DDR mode at 1.2V */
 #define MMC_CAP_POWER_OFF_CARD	(1 << 13)	/* Can power off after boot */
 #define MMC_CAP_BUS_WIDTH_TEST	(1 << 14)	/* CMD14/CMD19 bus width ok */
+#define MMC_CAP_UHS_SDR12	(1 << 15)	/* Host supports UHS SDR12 mode */
+#define MMC_CAP_UHS_SDR25	(1 << 16)	/* Host supports UHS SDR25 mode */
+#define MMC_CAP_UHS_SDR50	(1 << 17)	/* Host supports UHS SDR50 mode */
+#define MMC_CAP_UHS_SDR104	(1 << 18)	/* Host supports UHS SDR104 mode */
+#define MMC_CAP_UHS_DDR50	(1 << 19)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_SET_XPC_330	(1 << 20)	/* Host supports >150mA current at 3.3V */
+#define MMC_CAP_SET_XPC_300	(1 << 21)	/* Host supports >150mA current at 3.0V */
+#define MMC_CAP_SET_XPC_180	(1 << 22)	/* Host supports >150mA current at 1.8V */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 3fd85e088cc3..c648878f6734 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -17,6 +17,7 @@
 /* This is basically the same command as for MMC with some quirks. */
 #define SD_SEND_RELATIVE_ADDR     3   /* bcr                     R6  */
 #define SD_SEND_IF_COND           8   /* bcr  [11:0] See below   R7  */
+#define SD_SWITCH_VOLTAGE         11  /* ac                      R1  */
 
   /* class 10 */
 #define SD_SWITCH                 6   /* adtc [31:0] See below   R1  */
@@ -32,6 +33,12 @@
 #define SD_APP_OP_COND           41   /* bcr  [31:0] OCR         R3  */
 #define SD_APP_SEND_SCR          51   /* adtc                    R1  */
 
+/* OCR bit definitions */
+#define SD_OCR_S18R		(1 << 24)    /* 1.8V switching request */
+#define SD_ROCR_S18A		SD_OCR_S18R  /* 1.8V switching accepted by card */
+#define SD_OCR_XPC		(1 << 28)    /* SDXC power control */
+#define SD_OCR_CCS		(1 << 30)    /* Card Capacity Status */
+
 /*
  * SD_SWITCH argument format:
  *
-- 
cgit v1.2.3


From 013909c4ffd16ded4895528b856fd8782df04dc6 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:58 +0530
Subject: mmc: sd: query function modes for uhs cards

SD cards which conform to Physical Layer Spec v3.01 can support
additional Bus Speed Modes, Driver Strength, and Current Limit
other than the default values. We use CMD6 mode 0 to read these
additional card functions. The values read here will be used
during UHS-I initialization steps.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 68 +++++++++++++++++++++++++++++++++++++++++-------
 include/linux/mmc/card.h |  4 +++
 2 files changed, 62 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index b0cd285d272a..8285842f19e9 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -189,6 +189,9 @@ static int mmc_decode_scr(struct mmc_card *card)
 
 	scr->sda_vsn = UNSTUFF_BITS(resp, 56, 4);
 	scr->bus_widths = UNSTUFF_BITS(resp, 48, 4);
+	if (scr->sda_vsn == SCR_SPEC_VER_2)
+		/* Check if Physical Layer Spec v3.0 is supported */
+		scr->sda_spec3 = UNSTUFF_BITS(resp, 47, 1);
 
 	if (UNSTUFF_BITS(resp, 55, 1))
 		card->erased_byte = 0xFF;
@@ -274,29 +277,74 @@ static int mmc_read_switch(struct mmc_card *card)
 	status = kmalloc(64, GFP_KERNEL);
 	if (!status) {
 		printk(KERN_ERR "%s: could not allocate a buffer for "
-			"switch capabilities.\n", mmc_hostname(card->host));
+			"switch capabilities.\n",
+			mmc_hostname(card->host));
 		return -ENOMEM;
 	}
 
+	/* Find out the supported Bus Speed Modes. */
 	err = mmc_sd_switch(card, 0, 0, 1, status);
 	if (err) {
-		/* If the host or the card can't do the switch,
-		 * fail more gracefully. */
-		if ((err != -EINVAL)
-		 && (err != -ENOSYS)
-		 && (err != -EFAULT))
+		/*
+		 * If the host or the card can't do the switch,
+		 * fail more gracefully.
+		 */
+		if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
 			goto out;
 
-		printk(KERN_WARNING "%s: problem reading switch "
-			"capabilities, performance might suffer.\n",
+		printk(KERN_WARNING "%s: problem reading Bus Speed modes.\n",
 			mmc_hostname(card->host));
 		err = 0;
 
 		goto out;
 	}
 
-	if (status[13] & 0x02)
-		card->sw_caps.hs_max_dtr = 50000000;
+	if (card->scr.sda_spec3) {
+		card->sw_caps.sd3_bus_mode = status[13];
+
+		/* Find out Driver Strengths supported by the card */
+		err = mmc_sd_switch(card, 0, 2, 1, status);
+		if (err) {
+			/*
+			 * If the host or the card can't do the switch,
+			 * fail more gracefully.
+			 */
+			if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
+				goto out;
+
+			printk(KERN_WARNING "%s: problem reading "
+				"Driver Strength.\n",
+				mmc_hostname(card->host));
+			err = 0;
+
+			goto out;
+		}
+
+		card->sw_caps.sd3_drv_type = status[9];
+
+		/* Find out Current Limits supported by the card */
+		err = mmc_sd_switch(card, 0, 3, 1, status);
+		if (err) {
+			/*
+			 * If the host or the card can't do the switch,
+			 * fail more gracefully.
+			 */
+			if (err != -EINVAL && err != -ENOSYS && err != -EFAULT)
+				goto out;
+
+			printk(KERN_WARNING "%s: problem reading "
+				"Current Limit.\n",
+				mmc_hostname(card->host));
+			err = 0;
+
+			goto out;
+		}
+
+		card->sw_caps.sd3_curr_limit = status[7];
+	} else {
+		if (status[13] & 0x02)
+			card->sw_caps.hs_max_dtr = 50000000;
+	}
 
 out:
 	kfree(status);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 72a98681ef47..56f4d9234a66 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -67,6 +67,7 @@ struct mmc_ext_csd {
 
 struct sd_scr {
 	unsigned char		sda_vsn;
+	unsigned char		sda_spec3;
 	unsigned char		bus_widths;
 #define SD_SCR_BUS_WIDTH_1	(1<<0)
 #define SD_SCR_BUS_WIDTH_4	(1<<2)
@@ -80,6 +81,9 @@ struct sd_ssr {
 
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
+	unsigned int		sd3_bus_mode;
+	unsigned int		sd3_drv_type;
+	unsigned int		sd3_curr_limit;
 };
 
 struct sdio_cccr {
-- 
cgit v1.2.3


From f5671ab3f67a10f7234de21464391c20c1ef8ebb Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Mon, 23 May 2011 17:15:46 +0100
Subject: mtd: introduce mtd_device_(un)register()

To prepare for the removal of add_mtd_device and add_mtd_partitions(),
introduce mtd_device_register().  This will create partitions if they
are supplied or register the whole device if there are no partitions.

Once all drivers are converted to use mtd_device_register(),
add_mtd_device() and add_mtd_partitions() will be made internal only.

v2: move kerneldoc to implementation file and fixup some kerneldoc
warnings.

Artem: tweak comments: remove junk tabs, use dots consistently.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c          | 45 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/mtd.h        |  6 ++++++
 include/linux/mtd/partitions.h |  2 +-
 3 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index f3c94006ffe6..9af103bf9218 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -38,6 +38,7 @@
 #include <linux/gfp.h>
 
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
 
 #include "mtdcore.h"
 /*
@@ -427,6 +428,50 @@ out_error:
 	return ret;
 }
 
+/**
+ * mtd_device_register - register an MTD device.
+ *
+ * @master: the MTD device to register
+ * @parts: the partitions to register - only valid if nr_parts > 0
+ * @nr_parts: the number of partitions in parts.  If zero then the full MTD
+ *            device is registered
+ *
+ * Register an MTD device with the system and optionally, a number of
+ * partitions.  If nr_parts is 0 then the whole device is registered, otherwise
+ * only the partitions are registered.  To register both the full device *and*
+ * the partitions, call mtd_device_register() twice, once with nr_parts == 0
+ * and once equal to the number of partitions.
+ */
+int mtd_device_register(struct mtd_info *master,
+			const struct mtd_partition *parts,
+			int nr_parts)
+{
+	return parts ? add_mtd_partitions(master, parts, nr_parts) :
+		add_mtd_device(master);
+}
+EXPORT_SYMBOL_GPL(mtd_device_register);
+
+/**
+ * mtd_device_unregister - unregister an existing MTD device.
+ *
+ * @master: the MTD device to unregister.  This will unregister both the master
+ *          and any partitions if registered.
+ */
+int mtd_device_unregister(struct mtd_info *master)
+{
+	int err;
+
+	err = del_mtd_partitions(master);
+	if (err)
+		return err;
+
+	if (!device_is_registered(&master->dev))
+		return 0;
+
+	return del_mtd_device(master);
+}
+EXPORT_SYMBOL_GPL(mtd_device_unregister);
+
 /**
  *	register_mtd_user - register a 'user' of MTD devices.
  *	@new: pointer to notifier info structure
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 06b489a7605b..f4b0b27a7bbe 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -325,6 +325,12 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
 extern int add_mtd_device(struct mtd_info *mtd);
 extern int del_mtd_device (struct mtd_info *mtd);
 
+struct mtd_partition;
+
+extern int mtd_device_register(struct mtd_info *master,
+			       const struct mtd_partition *parts,
+			       int nr_parts);
+extern int mtd_device_unregister(struct mtd_info *master);
 extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num);
 extern int __get_mtd_device(struct mtd_info *mtd);
 extern void __put_mtd_device(struct mtd_info *mtd);
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 4a0a8ba90a72..998a6cf35167 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -16,7 +16,7 @@
  * Partition definition structure:
  *
  * An array of struct partition is passed along with a MTD object to
- * add_mtd_partitions() to create them.
+ * mtd_device_register() to create them.
  *
  * For each partition, these fields are available:
  * name: string that will be used to label the partition's MTD device.
-- 
cgit v1.2.3


From 11b73c8b10e58ae90c12e51be5531007e86a9c66 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Mon, 23 May 2011 10:22:44 +0100
Subject: mtd: provide of_mtd_parse_partitions for !CONFIG_MTD_OF_PARTS

If we don't have OpenFirmware enabled then provide a stub
of_mtd_parse_partitions that returns no partitions so drivers don't need
ifdeffery inside.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/partitions.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 998a6cf35167..afe32db14a87 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -73,9 +73,18 @@ extern int parse_mtd_partitions(struct mtd_info *master, const char **types,
 struct device;
 struct device_node;
 
+#ifdef CONFIG_MTD_OF_PARTS
 int __devinit of_mtd_parse_partitions(struct device *dev,
                                       struct device_node *node,
                                       struct mtd_partition **pparts);
+#else
+static inline int of_mtd_parse_partitions(struct device *dev,
+					  struct device_node *node,
+					  struct mtd_partition **pparts)
+{
+	return 0;
+}
+#endif
 
 #ifdef CONFIG_MTD_PARTITIONS
 static inline int mtd_has_partitions(void) { return 1; }
-- 
cgit v1.2.3


From 984e6d8ec5abe0487e4c3c22d233cd6ba8695cda Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Mon, 23 May 2011 10:22:45 +0100
Subject: mtd: physmap: convert to mtd_device_register()

Convert to mtd_device_register() and remove the CONFIG_MTD_PARTITIONS
preprocessor conditionals as partitioning is always available.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c    | 34 +++++++++-------------------------
 drivers/mtd/maps/physmap_of.c | 30 ++++--------------------------
 include/linux/mtd/physmap.h   |  4 ----
 3 files changed, 13 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 7522df4f71f1..d2059261ca8b 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -27,10 +27,8 @@ struct physmap_flash_info {
 	struct mtd_info		*mtd[MAX_RESOURCES];
 	struct mtd_info		*cmtd;
 	struct map_info		map[MAX_RESOURCES];
-#ifdef CONFIG_MTD_PARTITIONS
 	int			nr_parts;
 	struct mtd_partition	*parts;
-#endif
 };
 
 static int physmap_flash_remove(struct platform_device *dev)
@@ -47,18 +45,9 @@ static int physmap_flash_remove(struct platform_device *dev)
 	physmap_data = dev->dev.platform_data;
 
 	if (info->cmtd) {
-#ifdef CONFIG_MTD_PARTITIONS
-		if (info->nr_parts || physmap_data->nr_parts) {
-			del_mtd_partitions(info->cmtd);
-
-			if (info->nr_parts)
-				kfree(info->parts);
-		} else {
-			del_mtd_device(info->cmtd);
-		}
-#else
-		del_mtd_device(info->cmtd);
-#endif
+		mtd_device_unregister(info->cmtd);
+		if (info->nr_parts)
+			kfree(info->parts);
 		if (info->cmtd != info->mtd[0])
 			mtd_concat_destroy(info->cmtd);
 	}
@@ -76,9 +65,7 @@ static const char *rom_probe_types[] = {
 					"qinfo_probe",
 					"map_rom",
 					NULL };
-#ifdef CONFIG_MTD_PARTITIONS
 static const char *part_probe_types[] = { "cmdlinepart", "RedBoot", NULL };
-#endif
 
 static int physmap_flash_probe(struct platform_device *dev)
 {
@@ -164,24 +151,23 @@ static int physmap_flash_probe(struct platform_device *dev)
 	if (err)
 		goto err_out;
 
-#ifdef CONFIG_MTD_PARTITIONS
 	err = parse_mtd_partitions(info->cmtd, part_probe_types,
-				&info->parts, 0);
+				   &info->parts, 0);
 	if (err > 0) {
-		add_mtd_partitions(info->cmtd, info->parts, err);
+		mtd_device_register(info->cmtd, info->parts, err);
 		info->nr_parts = err;
 		return 0;
 	}
 
 	if (physmap_data->nr_parts) {
 		printk(KERN_NOTICE "Using physmap partition information\n");
-		add_mtd_partitions(info->cmtd, physmap_data->parts,
-				   physmap_data->nr_parts);
+		mtd_device_register(info->cmtd, physmap_data->parts,
+				    physmap_data->nr_parts);
 		return 0;
 	}
-#endif
 
-	add_mtd_device(info->cmtd);
+	mtd_device_register(info->cmtd, NULL, 0);
+
 	return 0;
 
 err_out:
@@ -245,14 +231,12 @@ void physmap_configure(unsigned long addr, unsigned long size,
 	physmap_flash_data.set_vpp = set_vpp;
 }
 
-#ifdef CONFIG_MTD_PARTITIONS
 void physmap_set_partitions(struct mtd_partition *parts, int num_parts)
 {
 	physmap_flash_data.nr_parts = num_parts;
 	physmap_flash_data.parts = parts;
 }
 #endif
-#endif
 
 static int __init physmap_init(void)
 {
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index c1d33464aee8..d251d1db129b 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -34,16 +34,12 @@ struct of_flash_list {
 
 struct of_flash {
 	struct mtd_info		*cmtd;
-#ifdef CONFIG_MTD_PARTITIONS
 	struct mtd_partition	*parts;
-#endif
 	int list_size; /* number of elements in of_flash_list */
 	struct of_flash_list	list[0];
 };
 
-#ifdef CONFIG_MTD_PARTITIONS
 #define OF_FLASH_PARTS(info)	((info)->parts)
-
 static int parse_obsolete_partitions(struct platform_device *dev,
 				     struct of_flash *info,
 				     struct device_node *dp)
@@ -89,10 +85,6 @@ static int parse_obsolete_partitions(struct platform_device *dev,
 
 	return nr_parts;
 }
-#else /* MTD_PARTITIONS */
-#define	OF_FLASH_PARTS(info)		(0)
-#define parse_partitions(info, dev)	(0)
-#endif /* MTD_PARTITIONS */
 
 static int of_flash_remove(struct platform_device *dev)
 {
@@ -105,17 +97,14 @@ static int of_flash_remove(struct platform_device *dev)
 	dev_set_drvdata(&dev->dev, NULL);
 
 	if (info->cmtd != info->list[0].mtd) {
-		del_mtd_device(info->cmtd);
+		mtd_device_unregister(info->cmtd);
 		mtd_concat_destroy(info->cmtd);
 	}
 
 	if (info->cmtd) {
-		if (OF_FLASH_PARTS(info)) {
-			del_mtd_partitions(info->cmtd);
+		if (OF_FLASH_PARTS(info))
 			kfree(OF_FLASH_PARTS(info));
-		} else {
-			del_mtd_device(info->cmtd);
-		}
+		mtd_device_unregister(info->cmtd);
 	}
 
 	for (i = 0; i < info->list_size; i++) {
@@ -172,7 +161,6 @@ static struct mtd_info * __devinit obsolete_probe(struct platform_device *dev,
 	}
 }
 
-#ifdef CONFIG_MTD_PARTITIONS
 /* When partitions are set we look for a linux,part-probe property which
    specifies the list of partition probers to use. If none is given then the
    default is use. These take precedence over other device tree
@@ -212,14 +200,11 @@ static void __devinit of_free_probes(const char **probes)
 	if (probes != part_probe_types_def)
 		kfree(probes);
 }
-#endif
 
 static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
-#ifdef CONFIG_MTD_PARTITIONS
 	const char **part_probe_types;
-#endif
 	const struct of_device_id *match;
 	struct device_node *dp = dev->dev.of_node;
 	struct resource res;
@@ -346,7 +331,6 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	if (err)
 		goto err_out;
 
-#ifdef CONFIG_MTD_PARTITIONS
 	part_probe_types = of_get_probes(dp);
 	err = parse_mtd_partitions(info->cmtd, part_probe_types,
 				   &info->parts, 0);
@@ -356,13 +340,11 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	}
 	of_free_probes(part_probe_types);
 
-#ifdef CONFIG_MTD_OF_PARTS
 	if (err == 0) {
 		err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
 		if (err < 0)
 			goto err_out;
 	}
-#endif
 
 	if (err == 0) {
 		err = parse_obsolete_partitions(dev, info, dp);
@@ -370,11 +352,7 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 			goto err_out;
 	}
 
-	if (err > 0)
-		add_mtd_partitions(info->cmtd, info->parts, err);
-	else
-#endif
-		add_mtd_device(info->cmtd);
+	mtd_device_register(info->cmtd, info->parts, err);
 
 	kfree(mtd_list);
 
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index bcfd9f777454..e963b86e296b 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -35,8 +35,6 @@ struct physmap_flash_data {
 void physmap_configure(unsigned long addr, unsigned long size,
 		int bankwidth, void (*set_vpp)(struct map_info *, int) );
 
-#ifdef CONFIG_MTD_PARTITIONS
-
 /*
  * Machines that wish to do flash partition may want to call this function in
  * their setup routine.
@@ -48,6 +46,4 @@ void physmap_configure(unsigned long addr, unsigned long size,
  */
 void physmap_set_partitions(struct mtd_partition *parts, int num_parts);
 
-#endif /* defined(CONFIG_MTD_PARTITIONS) */
-
 #endif /* __LINUX_MTD_PHYSMAP__ */
-- 
cgit v1.2.3


From eea72d5fdf59879edb2c6639b0b7cc385e0df646 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Mon, 23 May 2011 10:23:42 +0100
Subject: mtd: remove add_mtd_partitions, add_mtd_device and friends

These symbols are replaced with mtd_device_register() (and removal with
mtd_device_unregister()) for public registration.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c          | 4 +---
 drivers/mtd/mtdcore.h          | 6 ++++++
 drivers/mtd/mtdpart.c          | 4 ++--
 include/linux/mtd/mtd.h        | 3 ---
 include/linux/mtd/partitions.h | 9 ---------
 5 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 9af103bf9218..c510aff289a8 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -393,7 +393,7 @@ fail_locked:
  *	if the requested device does not appear to be present in the list.
  */
 
-int del_mtd_device (struct mtd_info *mtd)
+int del_mtd_device(struct mtd_info *mtd)
 {
 	int ret;
 	struct mtd_notifier *not;
@@ -723,8 +723,6 @@ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 	return kmalloc(*size, GFP_KERNEL);
 }
 
-EXPORT_SYMBOL_GPL(add_mtd_device);
-EXPORT_SYMBOL_GPL(del_mtd_device);
 EXPORT_SYMBOL_GPL(get_mtd_device);
 EXPORT_SYMBOL_GPL(get_mtd_device_nm);
 EXPORT_SYMBOL_GPL(__get_mtd_device);
diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index 6a64fdebc898..0ed6126b4c1f 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -10,6 +10,12 @@
 extern struct mutex mtd_table_mutex;
 extern struct mtd_info *__mtd_next_device(int i);
 
+extern int add_mtd_device(struct mtd_info *mtd);
+extern int del_mtd_device(struct mtd_info *mtd);
+extern int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *,
+			      int);
+extern int del_mtd_partitions(struct mtd_info *);
+
 #define mtd_for_each_device(mtd)			\
 	for ((mtd) = __mtd_next_device(0);		\
 	     (mtd) != NULL;				\
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index cd631e773a7b..630be3e7da04 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -31,6 +31,8 @@
 #include <linux/mtd/partitions.h>
 #include <linux/err.h>
 
+#include "mtdcore.h"
+
 /* Our partition linked list */
 static LIST_HEAD(mtd_partitions);
 static DEFINE_MUTEX(mtd_partitions_mutex);
@@ -376,7 +378,6 @@ int del_mtd_partitions(struct mtd_info *master)
 
 	return err;
 }
-EXPORT_SYMBOL(del_mtd_partitions);
 
 static struct mtd_part *allocate_partition(struct mtd_info *master,
 			const struct mtd_partition *part, int partno,
@@ -671,7 +672,6 @@ int add_mtd_partitions(struct mtd_info *master,
 
 	return 0;
 }
-EXPORT_SYMBOL(add_mtd_partitions);
 
 static DEFINE_SPINLOCK(part_parser_lock);
 static LIST_HEAD(part_parsers);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f4b0b27a7bbe..1e3887bc105c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -322,9 +322,6 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
 
 	/* Kernel-side ioctl definitions */
 
-extern int add_mtd_device(struct mtd_info *mtd);
-extern int del_mtd_device (struct mtd_info *mtd);
-
 struct mtd_partition;
 
 extern int mtd_device_register(struct mtd_info *master,
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index afe32db14a87..3a6f0372fc96 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -49,9 +49,6 @@ struct mtd_partition {
 
 struct mtd_info;
 
-int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
-int del_mtd_partitions(struct mtd_info *);
-
 /*
  * Functions dealing with the various ways of partitioning the space
  */
@@ -86,12 +83,6 @@ static inline int of_mtd_parse_partitions(struct device *dev,
 }
 #endif
 
-#ifdef CONFIG_MTD_PARTITIONS
-static inline int mtd_has_partitions(void) { return 1; }
-#else
-static inline int mtd_has_partitions(void) { return 0; }
-#endif
-
 #ifdef CONFIG_MTD_CMDLINE_PARTS
 static inline int mtd_has_cmdlinepart(void) { return 1; }
 #else
-- 
cgit v1.2.3


From 6a8a98b22b10f1560d5f90aded4a54234b9b2724 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Mon, 23 May 2011 10:23:43 +0100
Subject: mtd: kill CONFIG_MTD_PARTITIONS

Now that none of the drivers use CONFIG_MTD_PARTITIONS we can remove
it from Kconfig and the last remaining uses.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/DocBook/mtdnand.tmpl |  3 +--
 drivers/mtd/Kconfig                | 10 +---------
 drivers/mtd/Makefile               |  3 +--
 drivers/mtd/maps/Kconfig           | 14 +++++---------
 drivers/mtd/mtdchar.c              |  5 -----
 drivers/mtd/nand/Kconfig           |  3 +--
 include/linux/mtd/mtd.h            |  8 --------
 7 files changed, 9 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 6f242d5dee9a..17910e2052ad 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -189,8 +189,7 @@ static void __iomem *baseaddr;
 		<title>Partition defines</title>
 		<para>
 			If you want to divide your device into partitions, then
-			enable the configuration switch CONFIG_MTD_PARTITIONS and define
-			a partitioning scheme suitable to your board.
+			define a partitioning scheme suitable to your board.
 		</para>
 		<programlisting>
 #define NUM_PARTITIONS 2
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 8b61b0cc7b45..62b4fdbb9ad1 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -1,7 +1,6 @@
 menuconfig MTD
 	tristate "Memory Technology Device (MTD) support"
 	depends on HAS_IOMEM
-	select MTD_PARTITIONS
 	help
 	  Memory Technology Devices are flash, RAM and similar chips, often
 	  used for solid state file systems on embedded devices. This option
@@ -34,11 +33,6 @@ config MTD_TESTS
 	  should normally be compiled as kernel modules. The modules perform
 	  various checks and verifications when loaded.
 
-config MTD_PARTITIONS
-	bool
-
-if MTD_PARTITIONS
-
 config MTD_REDBOOT_PARTS
 	tristate "RedBoot partition table parsing"
 	---help---
@@ -91,7 +85,7 @@ endif # MTD_REDBOOT_PARTS
 
 config MTD_CMDLINE_PARTS
 	bool "Command line partition table parsing"
-	depends on MTD_PARTITIONS = "y" && MTD = "y"
+	depends on MTD = "y"
 	---help---
 	  Allow generic configuration of the MTD partition tables via the kernel
 	  command line. Multiple flash resources are supported for hardware where
@@ -156,8 +150,6 @@ config MTD_AR7_PARTS
 	---help---
 	  TI AR7 partitioning support
 
-endif # MTD_PARTITIONS
-
 comment "User Modules And Translation Layers"
 
 config MTD_CHAR
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index d578095fb255..39664c4229ff 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -4,8 +4,7 @@
 
 # Core functionality.
 obj-$(CONFIG_MTD)		+= mtd.o
-mtd-y				:= mtdcore.o mtdsuper.o mtdconcat.o
-mtd-$(CONFIG_MTD_PARTITIONS)	+= mtdpart.o
+mtd-y				:= mtdcore.o mtdsuper.o mtdconcat.o mtdpart.o
 mtd-$(CONFIG_MTD_OF_PARTS)	+= ofpart.o
 
 obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 44032433730d..195b9ef3b7ee 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -82,7 +82,6 @@ config MTD_PHYSMAP_OF
 config MTD_PMC_MSP_EVM
 	tristate "CFI Flash device mapped on PMC-Sierra MSP"
 	depends on PMC_MSP && MTD_CFI
-	select MTD_PARTITIONS
 	help
 	  This provides a 'mapping' driver which supports the way
 	  in which user-programmable flash chips are connected on the
@@ -122,7 +121,7 @@ config MTD_SC520CDP
 
 config MTD_NETSC520
 	tristate "CFI Flash device mapped on AMD NetSc520"
-	depends on X86 && MTD_CFI && MTD_PARTITIONS
+	depends on X86 && MTD_CFI
 	help
 	  This enables access routines for the flash chips on the AMD NetSc520
 	  demonstration board. If you have one of these boards and would like
@@ -131,7 +130,6 @@ config MTD_NETSC520
 config MTD_TS5500
 	tristate "JEDEC Flash device mapped on Technologic Systems TS-5500"
 	depends on X86
-	select MTD_PARTITIONS
 	select MTD_JEDECPROBE
 	select MTD_CFI_AMDSTD
 	help
@@ -149,7 +147,7 @@ config MTD_TS5500
 
 config MTD_SBC_GXX
 	tristate "CFI Flash device mapped on Arcom SBC-GXx boards"
-	depends on X86 && MTD_CFI_INTELEXT && MTD_PARTITIONS && MTD_COMPLEX_MAPPINGS
+	depends on X86 && MTD_CFI_INTELEXT && MTD_COMPLEX_MAPPINGS
 	help
 	  This provides a driver for the on-board flash of Arcom Control
 	  Systems' SBC-GXn family of boards, formerly known as SBC-MediaGX.
@@ -246,7 +244,7 @@ config MTD_TSUNAMI
 
 config MTD_NETtel
 	tristate "CFI flash device on SnapGear/SecureEdge"
-	depends on X86 && MTD_PARTITIONS && MTD_JEDECPROBE
+	depends on X86 && MTD_JEDECPROBE
 	help
 	  Support for flash chips on NETtel/SecureEdge/SnapGear boards.
 
@@ -261,7 +259,7 @@ config MTD_BCM963XX
 
 config MTD_DILNETPC
 	tristate "CFI Flash device mapped on DIL/Net PC"
-	depends on X86 && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN
+	depends on X86 && MTD_CFI_INTELEXT && BROKEN
 	help
 	  MTD map driver for SSV DIL/Net PC Boards "DNP" and "ADNP".
 	  For details, see <http://www.ssv-embedded.de/ssv/pc104/p169.htm>
@@ -381,7 +379,7 @@ config MTD_IXP2000
 
 config MTD_FORTUNET
 	tristate "CFI Flash device mapped on the FortuNet board"
-	depends on MTD_CFI && MTD_PARTITIONS && SA1100_FORTUNET
+	depends on MTD_CFI && SA1100_FORTUNET
 	help
 	  This enables access to the Flash on the FortuNet board.  If you
 	  have such a board, say 'Y'.
@@ -479,7 +477,6 @@ config MTD_UCLINUX
 config MTD_WRSBC8260
 	tristate "Map driver for WindRiver PowerQUICC II MPC82xx board"
 	depends on (SBC82xx || SBC8560)
-	select MTD_PARTITIONS
 	select MTD_MAP_BANK_WIDTH_4
 	select MTD_MAP_BANK_WIDTH_1
 	select MTD_CFI_I1
@@ -492,7 +489,6 @@ config MTD_WRSBC8260
 config MTD_DMV182
         tristate "Map driver for Dy-4 SVME/DMV-182 board."
         depends on DMV182
-        select MTD_PARTITIONS
 	select MTD_MAP_BANK_WIDTH_32
 	select MTD_CFI_I8
 	select MTD_CFI_AMDSTD
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index f488eabaa7b5..3f92731a5b9e 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -508,7 +508,6 @@ static int shrink_ecclayout(const struct nand_ecclayout *from,
 	return 0;
 }
 
-#ifdef CONFIG_MTD_PARTITIONS
 static int mtd_blkpg_ioctl(struct mtd_info *mtd,
 			   struct blkpg_ioctl_arg __user *arg)
 {
@@ -544,8 +543,6 @@ static int mtd_blkpg_ioctl(struct mtd_info *mtd,
 		return -EINVAL;
 	}
 }
-#endif
-
 
 static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 {
@@ -937,7 +934,6 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 		break;
 	}
 
-#ifdef CONFIG_MTD_PARTITIONS
 	case BLKPG:
 	{
 		ret = mtd_blkpg_ioctl(mtd,
@@ -951,7 +947,6 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 		ret = 0;
 		break;
 	}
-#endif
 
 	default:
 		ret = -ENOTTY;
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 333d23122226..4c3425235adc 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -419,7 +419,6 @@ config MTD_NAND_TMIO
 
 config MTD_NAND_NANDSIM
 	tristate "Support for NAND Flash Simulator"
-	depends on MTD_PARTITIONS
 	help
 	  The simulator may simulate various NAND flash chips for the
 	  MTD nand layer.
@@ -513,7 +512,7 @@ config MTD_NAND_SOCRATES
 
 config MTD_NAND_NUC900
 	tristate "Support for NAND on Nuvoton NUC9xx/w90p910 evaluation boards."
-	depends on ARCH_W90X900 && MTD_PARTITIONS
+	depends on ARCH_W90X900
 	help
 	  This enables the driver for the NAND Flash on evaluation board based
 	  on w90p910 / NUC9xx.
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1e3887bc105c..2541fb848daa 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -353,15 +353,7 @@ int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
 
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
 
-#ifdef CONFIG_MTD_PARTITIONS
 void mtd_erase_callback(struct erase_info *instr);
-#else
-static inline void mtd_erase_callback(struct erase_info *instr)
-{
-	if (instr->callback)
-		instr->callback(instr);
-}
-#endif
 
 /*
  * Debugging macro and defines
-- 
cgit v1.2.3


From d6d50a15a2897d4133d536dd4343b5cf21163db3 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:18:59 +0530
Subject: mmc: sd: add support for driver type selection

This patch adds support for setting driver strength during UHS-I
initialization procedure. Since UHS-I cards set S18A (bit 24) in
response to ACMD41, we use this as a base for UHS-I initialization.
We modify the parameter list of mmc_sd_get_cid() so that we can
save the ROCR from ACMD41 to check whether bit 24 is set.

We decide whether the Host Controller supports A, C, or D driver
type depending on the Capabilities register. Driver type B is
suported by default. We then set the appropriate driver type for
the card using CMD6 mode 1. As per Host Controller spec v3.00, we
set driver type for the host only if Preset Value Enable in the
Host Control2 register is not set. SDHCI_HOST_CONTROL has been
renamed to SDHCI_HOST_CONTROL1 to conform to the spec.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  |   9 +++
 drivers/mmc/core/core.h  |   1 +
 drivers/mmc/core/sd.c    | 152 +++++++++++++++++++++++++++++++++++++++--------
 drivers/mmc/core/sd.h    |   2 +-
 drivers/mmc/core/sdio.c  |   4 +-
 drivers/mmc/host/sdhci.c |  27 +++++++++
 drivers/mmc/host/sdhci.h |  11 +++-
 include/linux/mmc/card.h |   4 ++
 include/linux/mmc/host.h |  10 ++++
 9 files changed, 190 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5005a6323165..61c6c0b8f0e0 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -983,6 +983,15 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
 	mmc_set_ios(host);
 }
 
+/*
+ * Select appropriate driver type for host.
+ */
+void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
+{
+	host->ios.drv_type = drv_type;
+	mmc_set_ios(host);
+}
+
 /*
  * Apply power to the MMC stack.  This is a two-stage process.
  * First, we enable power to the card without the clock running.
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 7745dea430b8..93f33973de39 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -43,6 +43,7 @@ void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
+void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type);
 
 static inline void mmc_delay(unsigned int ms)
 {
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8285842f19e9..5b7c99855635 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -400,6 +400,98 @@ out:
 	return err;
 }
 
+static int sd_select_driver_type(struct mmc_card *card, u8 *status)
+{
+	int host_drv_type = 0, card_drv_type = 0;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the Driver Types A,C or D,
+	 * default Driver Type B is used.
+	 */
+	if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C
+	    | MMC_CAP_DRIVER_TYPE_D)))
+		return 0;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_A;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+			card_drv_type = MMC_SET_DRIVER_TYPE_A;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) {
+		host_drv_type = MMC_SET_DRIVER_TYPE_C;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	} else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) {
+		/*
+		 * If we are here, that means only the default driver type
+		 * B is supported by the host.
+		 */
+		host_drv_type = MMC_SET_DRIVER_TYPE_B;
+		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
+			card_drv_type = MMC_SET_DRIVER_TYPE_B;
+		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+			card_drv_type = MMC_SET_DRIVER_TYPE_C;
+	}
+
+	err = mmc_sd_switch(card, 1, 2, card_drv_type, status);
+	if (err)
+		return err;
+
+	if ((status[15] & 0xF) != card_drv_type) {
+		printk(KERN_WARNING "%s: Problem setting driver strength!\n",
+			mmc_hostname(card->host));
+		return 0;
+	}
+
+	mmc_set_driver_type(card->host, host_drv_type);
+
+	return 0;
+}
+
+/*
+ * UHS-I specific initialization procedure
+ */
+static int mmc_sd_init_uhs_card(struct mmc_card *card)
+{
+	int err;
+	u8 *status;
+
+	if (!card->scr.sda_spec3)
+		return 0;
+
+	if (!(card->csd.cmdclass & CCC_SWITCH))
+		return 0;
+
+	status = kmalloc(64, GFP_KERNEL);
+	if (!status) {
+		printk(KERN_ERR "%s: could not allocate a buffer for "
+			"switch capabilities.\n", mmc_hostname(card->host));
+		return -ENOMEM;
+	}
+
+	/* Set 4-bit bus width */
+	if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
+	    (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+		err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+		if (err)
+			goto out;
+
+		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+	}
+
+	/* Set the driver strength for the card */
+	err = sd_select_driver_type(card, status);
+
+out:
+	kfree(status);
+
+	return err;
+}
+
 MMC_DEV_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
 	card->raw_cid[2], card->raw_cid[3]);
 MMC_DEV_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
@@ -448,10 +540,9 @@ struct device_type sd_type = {
 /*
  * Fetch CID from card.
  */
-int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
+int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr)
 {
 	int err;
-	u32 rocr;
 
 	/*
 	 * Since we're changing the OCR value, we seem to
@@ -485,7 +576,7 @@ int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid)
 		ocr |= SD_OCR_XPC;
 
 try_again:
-	err = mmc_send_app_op_cond(host, ocr, &rocr);
+	err = mmc_send_app_op_cond(host, ocr, rocr);
 	if (err)
 		return err;
 
@@ -493,7 +584,8 @@ try_again:
 	 * In case CCS and S18A in the response is set, start Signal Voltage
 	 * Switch procedure. SPI mode doesn't support CMD11.
 	 */
-	if (!mmc_host_is_spi(host) && ((rocr & 0x41000000) == 0x41000000)) {
+	if (!mmc_host_is_spi(host) && rocr &&
+	   ((*rocr & 0x41000000) == 0x41000000)) {
 		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
 		if (err) {
 			ocr &= ~SD_OCR_S18R;
@@ -628,11 +720,12 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *card;
 	int err;
 	u32 cid[4];
+	u32 rocr = 0;
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	err = mmc_sd_get_cid(host, ocr, cid);
+	err = mmc_sd_get_cid(host, ocr, cid, &rocr);
 	if (err)
 		return err;
 
@@ -685,30 +778,37 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 	if (err)
 		goto free_card;
 
-	/*
-	 * Attempt to change to high-speed (if supported)
-	 */
-	err = mmc_sd_switch_hs(card);
-	if (err > 0)
-		mmc_sd_go_highspeed(card);
-	else if (err)
-		goto free_card;
-
-	/*
-	 * Set bus speed.
-	 */
-	mmc_set_clock(host, mmc_sd_get_max_clock(card));
-
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	if ((host->caps & MMC_CAP_4_BIT_DATA) &&
-		(card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
-		err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+	/* Initialization sequence for UHS-I cards */
+	if (rocr & SD_ROCR_S18A) {
+		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
+	} else {
+		/*
+		 * Attempt to change to high-speed (if supported)
+		 */
+		err = mmc_sd_switch_hs(card);
+		if (err > 0)
+			mmc_sd_go_highspeed(card);
+		else if (err)
+			goto free_card;
+
+		/*
+		 * Set bus speed.
+		 */
+		mmc_set_clock(host, mmc_sd_get_max_clock(card));
 
-		mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+		/*
+		 * Switch to wider bus (if supported).
+		 */
+		if ((host->caps & MMC_CAP_4_BIT_DATA) &&
+			(card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+			err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
+			if (err)
+				goto free_card;
+
+			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+		}
 	}
 
 	host->card = card;
diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 3d8800fa7600..4b34b24f3f76 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h
@@ -5,7 +5,7 @@
 
 extern struct device_type sd_type;
 
-int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid);
+int mmc_sd_get_cid(struct mmc_host *host, u32 ocr, u32 *cid, u32 *rocr);
 int mmc_sd_get_csd(struct mmc_host *host, struct mmc_card *card);
 void mmc_decode_cid(struct mmc_card *card);
 int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 1e6095961500..4d0c15bfa514 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -369,8 +369,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 		goto err;
 	}
 
-	if (ocr & R4_MEMORY_PRESENT
-	    && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) {
+	if ((ocr & R4_MEMORY_PRESENT) &&
+	    mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid, NULL) == 0) {
 		card->type = MMC_TYPE_SD_COMBO;
 
 		if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO ||
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4e2031449a23..8072e16d6d46 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1245,6 +1245,25 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
+	if (host->version >= SDHCI_SPEC_300) {
+		u16 ctrl_2;
+
+		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+			/*
+			 * We only need to set Driver Strength if the
+			 * preset value enable is not set.
+			 */
+			ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
+			if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
+				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
+			else if (ios->drv_type == MMC_SET_DRIVER_TYPE_C)
+				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_C;
+
+			sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+		}
+	}
+
 	/*
 	 * Some (ENE) controllers go apeshit on some ios operation,
 	 * signalling timeout and CRC errors even on CMD0. Resetting
@@ -2086,6 +2105,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
+	/* Driver Type(s) (A, C, D) supported by the host */
+	if (caps[1] & SDHCI_DRIVER_TYPE_A)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
+	if (caps[1] & SDHCI_DRIVER_TYPE_C)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_C;
+	if (caps[1] & SDHCI_DRIVER_TYPE_D)
+		mmc->caps |= MMC_CAP_DRIVER_TYPE_D;
+
 	ocr_avail = 0;
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 5cba2fea46e0..667bf8874be7 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -71,7 +71,7 @@
 #define  SDHCI_DATA_LVL_MASK	0x00F00000
 #define   SDHCI_DATA_LVL_SHIFT	20
 
-#define SDHCI_HOST_CONTROL 	0x28
+#define SDHCI_HOST_CONTROL	0x28
 #define  SDHCI_CTRL_LED		0x01
 #define  SDHCI_CTRL_4BITBUS	0x02
 #define  SDHCI_CTRL_HISPD	0x04
@@ -150,6 +150,12 @@
 
 #define SDHCI_HOST_CONTROL2		0x3E
 #define  SDHCI_CTRL_VDD_180		0x0008
+#define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
+#define   SDHCI_CTRL_DRV_TYPE_B		0x0000
+#define   SDHCI_CTRL_DRV_TYPE_A		0x0010
+#define   SDHCI_CTRL_DRV_TYPE_C		0x0020
+#define   SDHCI_CTRL_DRV_TYPE_D		0x0030
+#define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
 #define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
@@ -173,6 +179,9 @@
 #define  SDHCI_SUPPORT_SDR50	0x00000001
 #define  SDHCI_SUPPORT_SDR104	0x00000002
 #define  SDHCI_SUPPORT_DDR50	0x00000004
+#define  SDHCI_DRIVER_TYPE_A	0x00000010
+#define  SDHCI_DRIVER_TYPE_C	0x00000020
+#define  SDHCI_DRIVER_TYPE_D	0x00000040
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 56f4d9234a66..539327260dc1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -83,6 +83,10 @@ struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
 	unsigned int		sd3_bus_mode;
 	unsigned int		sd3_drv_type;
+#define SD_DRIVER_TYPE_B	0x01
+#define SD_DRIVER_TYPE_A	0x02
+#define SD_DRIVER_TYPE_C	0x04
+#define SD_DRIVER_TYPE_D	0x08
 	unsigned int		sd3_curr_limit;
 };
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bde5a0b1c47e..949e4d525989 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -61,6 +61,13 @@ struct mmc_ios {
 
 #define MMC_SIGNAL_VOLTAGE_330	0
 #define MMC_SIGNAL_VOLTAGE_180	1
+
+	unsigned char	drv_type;		/* driver type (A, B, C, D) */
+
+#define MMC_SET_DRIVER_TYPE_B	0
+#define MMC_SET_DRIVER_TYPE_A	1
+#define MMC_SET_DRIVER_TYPE_C	2
+#define MMC_SET_DRIVER_TYPE_D	3
 };
 
 struct mmc_host_ops {
@@ -188,6 +195,9 @@ struct mmc_host {
 #define MMC_CAP_SET_XPC_330	(1 << 20)	/* Host supports >150mA current at 3.3V */
 #define MMC_CAP_SET_XPC_300	(1 << 21)	/* Host supports >150mA current at 3.0V */
 #define MMC_CAP_SET_XPC_180	(1 << 22)	/* Host supports >150mA current at 1.8V */
+#define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
+#define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
+#define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From 49c468fcf878d2c86e31920cf54aa90c88418a66 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:01 +0530
Subject: mmc: sd: add support for uhs bus speed mode selection

This patch adds support for setting UHS-I bus speed mode during UHS-I
initialization procedure. Since both the host and card can support
more than one bus speed, we select the highest speed based on both of
their capabilities. First we set the bus speed mode for the card using
CMD6 mode 1, and then we program the host controller to support the
required speed mode. We also set High Speed Enable in case one of the
UHS-I modes is selected. We take care to reset SD clock before setting
UHS mode in the Host Control2 register, and then re-enable it as per
the Host Controller spec v3.00. We then set the clock frequency for
the UHS-I mode selected.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/sdhci.c | 40 ++++++++++++++++++++++++++---
 drivers/mmc/host/sdhci.h |  6 +++++
 include/linux/mmc/card.h | 19 ++++++++++++++
 include/linux/mmc/host.h |  5 ++++
 5 files changed, 132 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 5b7c99855635..6970b82171f7 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -452,6 +452,66 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	return 0;
 }
 
+static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+{
+	unsigned int bus_speed = 0, timing = 0;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the UHS-I modes, fallback on
+	 * default speed.
+	 */
+	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+		return 0;
+
+	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+			bus_speed = UHS_SDR104_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR104;
+			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+			bus_speed = UHS_DDR50_BUS_SPEED;
+			timing = MMC_TIMING_UHS_DDR50;
+			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR50)) {
+			bus_speed = UHS_SDR50_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR50;
+			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+			bus_speed = UHS_SDR25_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR25;
+			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
+		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR12)) {
+			bus_speed = UHS_SDR12_BUS_SPEED;
+			timing = MMC_TIMING_UHS_SDR12;
+			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+	}
+
+	card->sd_bus_speed = bus_speed;
+	err = mmc_sd_switch(card, 1, 0, bus_speed, status);
+	if (err)
+		return err;
+
+	if ((status[16] & 0xF) != bus_speed)
+		printk(KERN_WARNING "%s: Problem setting bus speed mode!\n",
+			mmc_hostname(card->host));
+	else {
+		mmc_set_timing(card->host, timing);
+		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+	}
+
+	return 0;
+}
+
 /*
  * UHS-I specific initialization procedure
  */
@@ -485,6 +545,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set the driver strength for the card */
 	err = sd_select_driver_type(card, status);
+	if (err)
+		goto out;
+
+	/* Set bus speed mode of the card */
+	err = sd_set_bus_speed_mode(card, status);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 409cde5970ae..8994493dd940 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1244,7 +1244,16 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		ctrl &= ~SDHCI_CTRL_HISPD;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		u16 ctrl_2;
+		u16 clk, ctrl_2;
+		unsigned int clock;
+
+		/* In case of UHS-I modes, set High Speed Enable */
+		if ((ios->timing == MMC_TIMING_UHS_SDR50) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
+		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR25) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR12))
+			ctrl |= SDHCI_CTRL_HISPD;
 
 		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
@@ -1267,8 +1276,6 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			 * need to reset SD Clock Enable before changing High
 			 * Speed Enable to avoid generating clock gliches.
 			 */
-			u16 clk;
-			unsigned int clock;
 
 			/* Reset SD Clock Enable */
 			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1282,6 +1289,33 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			host->clock = 0;
 			sdhci_set_clock(host, clock);
 		}
+
+		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+		/* Select Bus Speed Mode for host */
+		ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+		if (ios->timing == MMC_TIMING_UHS_SDR12)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+		else if (ios->timing == MMC_TIMING_UHS_SDR25)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+		else if (ios->timing == MMC_TIMING_UHS_SDR50)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+		else if (ios->timing == MMC_TIMING_UHS_SDR104)
+			ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+		else if (ios->timing == MMC_TIMING_UHS_DDR50)
+			ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+
+		/* Reset SD Clock Enable */
+		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		clk &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+		sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+
+		/* Re-enable SD Clock */
+		clock = host->clock;
+		host->clock = 0;
+		sdhci_set_clock(host, clock);
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 667bf8874be7..d96f6afcca1f 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -149,6 +149,12 @@
 #define SDHCI_ACMD12_ERR	0x3C
 
 #define SDHCI_HOST_CONTROL2		0x3E
+#define  SDHCI_CTRL_UHS_MASK		0x0007
+#define   SDHCI_CTRL_UHS_SDR12		0x0000
+#define   SDHCI_CTRL_UHS_SDR25		0x0001
+#define   SDHCI_CTRL_UHS_SDR50		0x0002
+#define   SDHCI_CTRL_UHS_SDR104		0x0003
+#define   SDHCI_CTRL_UHS_DDR50		0x0004
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 539327260dc1..4ef6ded6347d 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -81,7 +81,24 @@ struct sd_ssr {
 
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
+	unsigned int		uhs_max_dtr;
+#define UHS_SDR104_MAX_DTR	208000000
+#define UHS_SDR50_MAX_DTR	100000000
+#define UHS_DDR50_MAX_DTR	50000000
+#define UHS_SDR25_MAX_DTR	UHS_DDR50_MAX_DTR
+#define UHS_SDR12_MAX_DTR	25000000
 	unsigned int		sd3_bus_mode;
+#define UHS_SDR12_BUS_SPEED	0
+#define UHS_SDR25_BUS_SPEED	1
+#define UHS_SDR50_BUS_SPEED	2
+#define UHS_SDR104_BUS_SPEED	3
+#define UHS_DDR50_BUS_SPEED	4
+
+#define SD_MODE_UHS_SDR12	(1 << UHS_SDR12_BUS_SPEED)
+#define SD_MODE_UHS_SDR25	(1 << UHS_SDR25_BUS_SPEED)
+#define SD_MODE_UHS_SDR50	(1 << UHS_SDR50_BUS_SPEED)
+#define SD_MODE_UHS_SDR104	(1 << UHS_SDR104_BUS_SPEED)
+#define SD_MODE_UHS_DDR50	(1 << UHS_DDR50_BUS_SPEED)
 	unsigned int		sd3_drv_type;
 #define SD_DRIVER_TYPE_B	0x01
 #define SD_DRIVER_TYPE_A	0x02
@@ -166,6 +183,8 @@ struct mmc_card {
 	const char		**info;		/* info strings */
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
 
+	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
+
 	struct dentry		*debugfs_root;
 };
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 949e4d525989..62375992bdd6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -50,6 +50,11 @@ struct mmc_ios {
 #define MMC_TIMING_LEGACY	0
 #define MMC_TIMING_MMC_HS	1
 #define MMC_TIMING_SD_HS	2
+#define MMC_TIMING_UHS_SDR12	MMC_TIMING_LEGACY
+#define MMC_TIMING_UHS_SDR25	MMC_TIMING_SD_HS
+#define MMC_TIMING_UHS_SDR50	3
+#define MMC_TIMING_UHS_SDR104	4
+#define MMC_TIMING_UHS_DDR50	5
 
 	unsigned char	ddr;			/* dual data rate used */
 
-- 
cgit v1.2.3


From 5371c927bcd06a5c9dd6785bab2d452b87d9abc6 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:02 +0530
Subject: mmc: sd: set current limit for uhs cards

We decide on the current limit to be set for the card based on the
Capability of Host Controller to provide current at 1.8V signalling,
and the maximum current limit of the card as indicated by CMD6
mode 0. We then set the current limit for the card using CMD6 mode 1.
As per the Physical Layer Spec v3.01, the current limit switch is
only applicable for SDR50, SDR104, and DDR50 bus speed modes. For
other UHS-I modes, we set the default current limit of 200mA.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/sdhci.c | 10 ++++++++
 include/linux/mmc/card.h |  9 +++++++
 include/linux/mmc/host.h |  4 +++
 4 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6970b82171f7..8e2d8012e4cb 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -512,6 +512,64 @@ static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
 	return 0;
 }
 
+static int sd_set_current_limit(struct mmc_card *card, u8 *status)
+{
+	int current_limit = 0;
+	int err;
+
+	/*
+	 * Current limit switch is only defined for SDR50, SDR104, and DDR50
+	 * bus speed modes. For other bus speed modes, we set the default
+	 * current limit of 200mA.
+	 */
+	if ((card->sd_bus_speed == UHS_SDR50_BUS_SPEED) ||
+	    (card->sd_bus_speed == UHS_SDR104_BUS_SPEED) ||
+	    (card->sd_bus_speed == UHS_DDR50_BUS_SPEED)) {
+		if (card->host->caps & MMC_CAP_MAX_CURRENT_800) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
+				current_limit = SD_SET_CURRENT_LIMIT_800;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_600)
+				current_limit = SD_SET_CURRENT_LIMIT_600;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_600) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
+				current_limit = SD_SET_CURRENT_LIMIT_600;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_400) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
+				current_limit = SD_SET_CURRENT_LIMIT_400;
+			else if (card->sw_caps.sd3_curr_limit &
+					SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		} else if (card->host->caps & MMC_CAP_MAX_CURRENT_200) {
+			if (card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
+				current_limit = SD_SET_CURRENT_LIMIT_200;
+		}
+	} else
+		current_limit = SD_SET_CURRENT_LIMIT_200;
+
+	err = mmc_sd_switch(card, 1, 3, current_limit, status);
+	if (err)
+		return err;
+
+	if (((status[15] >> 4) & 0x0F) != current_limit)
+		printk(KERN_WARNING "%s: Problem setting current limit!\n",
+			mmc_hostname(card->host));
+
+	return 0;
+}
+
 /*
  * UHS-I specific initialization procedure
  */
@@ -550,6 +608,11 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set bus speed mode of the card */
 	err = sd_set_bus_speed_mode(card, status);
+	if (err)
+		goto out;
+
+	/* Set current limit for the card */
+	err = sd_set_current_limit(card, status);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8994493dd940..2a15aad2eba5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2216,6 +2216,16 @@ int sdhci_add_host(struct sdhci_host *host)
 
 		if (max_current_180 > 150)
 			mmc->caps |= MMC_CAP_SET_XPC_180;
+
+		/* Maximum current capabilities of the host at 1.8V */
+		if (max_current_180 >= 800)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_800;
+		else if (max_current_180 >= 600)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_600;
+		else if (max_current_180 >= 400)
+			mmc->caps |= MMC_CAP_MAX_CURRENT_400;
+		else
+			mmc->caps |= MMC_CAP_MAX_CURRENT_200;
 	}
 
 	mmc->ocr_avail = ocr_avail;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4ef6ded6347d..47b5ad3960b7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -105,6 +105,15 @@ struct sd_switch_caps {
 #define SD_DRIVER_TYPE_C	0x04
 #define SD_DRIVER_TYPE_D	0x08
 	unsigned int		sd3_curr_limit;
+#define SD_SET_CURRENT_LIMIT_200	0
+#define SD_SET_CURRENT_LIMIT_400	1
+#define SD_SET_CURRENT_LIMIT_600	2
+#define SD_SET_CURRENT_LIMIT_800	3
+
+#define SD_MAX_CURRENT_200	(1 << SD_SET_CURRENT_LIMIT_200)
+#define SD_MAX_CURRENT_400	(1 << SD_SET_CURRENT_LIMIT_400)
+#define SD_MAX_CURRENT_600	(1 << SD_SET_CURRENT_LIMIT_600)
+#define SD_MAX_CURRENT_800	(1 << SD_SET_CURRENT_LIMIT_800)
 };
 
 struct sdio_cccr {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 62375992bdd6..52b5dc914a8c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -203,6 +203,10 @@ struct mmc_host {
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
+#define MMC_CAP_MAX_CURRENT_200	(1 << 26)	/* Host max current limit is 200mA */
+#define MMC_CAP_MAX_CURRENT_400	(1 << 27)	/* Host max current limit is 400mA */
+#define MMC_CAP_MAX_CURRENT_600	(1 << 28)	/* Host max current limit is 600mA */
+#define MMC_CAP_MAX_CURRENT_800	(1 << 29)	/* Host max current limit is 800mA */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From 3a3035114307cd55e024662bb295a87b849f0bd4 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:03 +0530
Subject: mmc: sd: report correct speed and capacity of uhs cards

Since only UHS-I cards respond with S18A set in response to ACMD41,
we set the card as ultra-high-speed after successfull initialization.
We need to decide whether a card is SDXC based on the C_SIZE field
of CSDv2.0 register. According to Physical Layer spec v3.01, the
minimum value of C_SIZE for SDXC card is 00FFFFh.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c   | 11 ++++++++---
 drivers/mmc/core/sd.c    | 10 +++++++++-
 include/linux/mmc/card.h |  7 +++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d6d62fd07ee9..393d817ed040 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -274,8 +274,12 @@ int mmc_add_card(struct mmc_card *card)
 		break;
 	case MMC_TYPE_SD:
 		type = "SD";
-		if (mmc_card_blockaddr(card))
-			type = "SDHC";
+		if (mmc_card_blockaddr(card)) {
+			if (mmc_card_ext_capacity(card))
+				type = "SDXC";
+			else
+				type = "SDHC";
+		}
 		break;
 	case MMC_TYPE_SDIO:
 		type = "SDIO";
@@ -299,7 +303,8 @@ int mmc_add_card(struct mmc_card *card)
 	} else {
 		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
-			mmc_card_highspeed(card) ? "high speed " : "",
+			mmc_sd_card_uhs(card) ? "ultra high speed " :
+			(mmc_card_highspeed(card) ? "high speed " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 8e2d8012e4cb..732c3171ceca 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -130,7 +130,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 		break;
 	case 1:
 		/*
-		 * This is a block-addressed SDHC card. Most
+		 * This is a block-addressed SDHC or SDXC card. Most
 		 * interesting fields are unused and have fixed
 		 * values. To avoid getting tripped by buggy cards,
 		 * we assume those fixed values ourselves.
@@ -144,6 +144,11 @@ static int mmc_decode_csd(struct mmc_card *card)
 		e = UNSTUFF_BITS(resp, 96, 3);
 		csd->max_dtr	  = tran_exp[e] * tran_mant[m];
 		csd->cmdclass	  = UNSTUFF_BITS(resp, 84, 12);
+		csd->c_size	  = UNSTUFF_BITS(resp, 48, 22);
+
+		/* SDXC cards have a minimum C_SIZE of 0x00FFFF */
+		if (csd->c_size >= 0xFFFF)
+			mmc_card_set_ext_capacity(card);
 
 		m = UNSTUFF_BITS(resp, 48, 22);
 		csd->capacity     = (1 + m) << 10;
@@ -911,6 +916,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
+
+		/* Card is an ultra-high-speed card */
+		mmc_sd_card_set_uhs(card);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 47b5ad3960b7..d8dffc992ce2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -30,6 +30,7 @@ struct mmc_csd {
 	unsigned short		cmdclass;
 	unsigned short		tacc_clks;
 	unsigned int		tacc_ns;
+	unsigned int		c_size;
 	unsigned int		r2w_factor;
 	unsigned int		max_dtr;
 	unsigned int		erase_size;		/* In sectors */
@@ -158,6 +159,8 @@ struct mmc_card {
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
 #define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
+#define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
+#define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -293,12 +296,16 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
+#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
+#define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
 static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 {
-- 
cgit v1.2.3


From b513ea250eb7c36a8afb3df938d632ca6b4df7cd Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:04 +0530
Subject: mmc: sd: add support for tuning during uhs initialization

Host Controller needs tuning during initialization to operate SDR50
and SDR104 UHS-I cards. Whether SDR50 mode actually needs tuning is
indicated by bit 45 of the Host Controller Capabilities register.
A new command CMD19 has been defined in the Physical Layer spec
v3.01 to request the card to send tuning pattern.

We enable Buffer Read Ready interrupt at the very begining of tuning
procedure, because that is the only interrupt generated by the Host
Controller during tuning. We program the block size to 64 in the
Block Size register. We make sure that DMA Enable and Multi Block
Select in the Transfer Mode register are set to 0 before actually
sending CMD19. The tuning block is sent by the card to the Host
Controller using DAT lines, so we set Data Present Select (bit 5) in
the Command register. The Host Controller is responsible for doing
the verfication of tuning block sent by the card at the hardware
level. After sending CMD19, we wait for Buffer Read Ready interrupt.
In case we don't receive an interrupt after the specified timeout
value, we fall back on fixed sampling clock by setting Execute
Tuning (bit 6) and Sampling Clock Select (bit 7) of Host Control2
register to 0. Before exiting the tuning procedure, we disable Buffer
Read Ready interrupt and re-enable other interrupts.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c     |   6 ++
 drivers/mmc/host/sdhci.c  | 168 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/host/sdhci.h  |   3 +
 include/linux/mmc/host.h  |   1 +
 include/linux/mmc/mmc.h   |   1 +
 include/linux/mmc/sdhci.h |   4 ++
 6 files changed, 182 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 732c3171ceca..fc65475a26ee 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -618,6 +618,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* Set current limit for the card */
 	err = sd_set_current_limit(card, status);
+	if (err)
+		goto out;
+
+	/* SPI mode doesn't define CMD19 */
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+		err = card->host->ops->execute_tuning(card->host);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2a15aad2eba5..8a56eacea34d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -38,6 +38,8 @@
 #define SDHCI_USE_LEDS_CLASS
 #endif
 
+#define MAX_TUNING_LOOP 40
+
 static unsigned int debug_quirks = 0;
 
 static void sdhci_finish_data(struct sdhci_host *);
@@ -968,7 +970,9 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 		flags |= SDHCI_CMD_CRC;
 	if (cmd->flags & MMC_RSP_OPCODE)
 		flags |= SDHCI_CMD_INDEX;
-	if (cmd->data)
+
+	/* CMD19 is special in that the Data Present Select should be set */
+	if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK))
 		flags |= SDHCI_CMD_DATA;
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
@@ -1501,12 +1505,157 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 		return 0;
 }
 
+static int sdhci_execute_tuning(struct mmc_host *mmc)
+{
+	struct sdhci_host *host;
+	u16 ctrl;
+	u32 ier;
+	int tuning_loop_counter = MAX_TUNING_LOOP;
+	unsigned long timeout;
+	int err = 0;
+
+	host = mmc_priv(mmc);
+
+	disable_irq(host->irq);
+	spin_lock(&host->lock);
+
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * Host Controller needs tuning only in case of SDR104 mode
+	 * and for SDR50 mode when Use Tuning for SDR50 is set in
+	 * Capabilities register.
+	 */
+	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
+	    (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	    (host->flags & SDHCI_SDR50_NEEDS_TUNING)))
+		ctrl |= SDHCI_CTRL_EXEC_TUNING;
+	else {
+		spin_unlock(&host->lock);
+		enable_irq(host->irq);
+		return 0;
+	}
+
+	sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * As per the Host Controller spec v3.00, tuning command
+	 * generates Buffer Read Ready interrupt, so enable that.
+	 *
+	 * Note: The spec clearly says that when tuning sequence
+	 * is being performed, the controller does not generate
+	 * interrupts other than Buffer Read Ready interrupt. But
+	 * to make sure we don't hit a controller bug, we _only_
+	 * enable Buffer Read Ready interrupt here.
+	 */
+	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
+	sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+
+	/*
+	 * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
+	 * of loops reaches 40 times or a timeout of 150ms occurs.
+	 */
+	timeout = 150;
+	do {
+		struct mmc_command cmd = {0};
+		struct mmc_request mrq = {0};
+
+		if (!tuning_loop_counter && !timeout)
+			break;
+
+		cmd.opcode = MMC_SEND_TUNING_BLOCK;
+		cmd.arg = 0;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+		cmd.retries = 0;
+		cmd.data = NULL;
+		cmd.error = 0;
+
+		mrq.cmd = &cmd;
+		host->mrq = &mrq;
+
+		/*
+		 * In response to CMD19, the card sends 64 bytes of tuning
+		 * block to the Host Controller. So we set the block size
+		 * to 64 here.
+		 */
+		sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE);
+
+		/*
+		 * The tuning block is sent by the card to the host controller.
+		 * So we set the TRNS_READ bit in the Transfer Mode register.
+		 * This also takes care of setting DMA Enable and Multi Block
+		 * Select in the same register to 0.
+		 */
+		sdhci_writew(host, SDHCI_TRNS_READ, SDHCI_TRANSFER_MODE);
+
+		sdhci_send_command(host, &cmd);
+
+		host->cmd = NULL;
+		host->mrq = NULL;
+
+		spin_unlock(&host->lock);
+		enable_irq(host->irq);
+
+		/* Wait for Buffer Read Ready interrupt */
+		wait_event_interruptible_timeout(host->buf_ready_int,
+					(host->tuning_done == 1),
+					msecs_to_jiffies(50));
+		disable_irq(host->irq);
+		spin_lock(&host->lock);
+
+		if (!host->tuning_done) {
+			printk(KERN_INFO DRIVER_NAME ": Timeout waiting for "
+				"Buffer Read Ready interrupt during tuning "
+				"procedure, falling back to fixed sampling "
+				"clock\n");
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			ctrl &= ~SDHCI_CTRL_TUNED_CLK;
+			ctrl &= ~SDHCI_CTRL_EXEC_TUNING;
+			sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+
+			err = -EIO;
+			goto out;
+		}
+
+		host->tuning_done = 0;
+
+		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		tuning_loop_counter--;
+		timeout--;
+		mdelay(1);
+	} while (ctrl & SDHCI_CTRL_EXEC_TUNING);
+
+	/*
+	 * The Host Driver has exhausted the maximum number of loops allowed,
+	 * so use fixed sampling frequency.
+	 */
+	if (!tuning_loop_counter || !timeout) {
+		ctrl &= ~SDHCI_CTRL_TUNED_CLK;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	} else {
+		if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
+			printk(KERN_INFO DRIVER_NAME ": Tuning procedure"
+				" failed, falling back to fixed sampling"
+				" clock\n");
+			err = -EIO;
+		}
+	}
+
+out:
+	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+	spin_unlock(&host->lock);
+	enable_irq(host->irq);
+
+	return err;
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
 	.get_ro		= sdhci_get_ro,
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
+	.execute_tuning			= sdhci_execute_tuning,
 };
 
 /*****************************************************************************\
@@ -1724,6 +1873,16 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 {
 	BUG_ON(intmask == 0);
 
+	/* CMD19 generates _only_ Buffer Read Ready interrupt */
+	if (intmask & SDHCI_INT_DATA_AVAIL) {
+		if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) ==
+		    MMC_SEND_TUNING_BLOCK) {
+			host->tuning_done = 1;
+			wake_up(&host->buf_ready_int);
+			return;
+		}
+	}
+
 	if (!host->data) {
 		/*
 		 * The "data complete" interrupt is also used to
@@ -2160,6 +2319,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
+	/* Does the host needs tuning for SDR50? */
+	if (caps[1] & SDHCI_USE_SDR50_TUNING)
+		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
+
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
@@ -2313,6 +2476,9 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host);
 
+	if (host->version >= SDHCI_SPEC_300)
+		init_waitqueue_head(&host->buf_ready_int);
+
 	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
 		mmc_hostname(mmc), host);
 	if (ret)
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index d96f6afcca1f..e62367491eee 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -161,6 +161,8 @@
 #define   SDHCI_CTRL_DRV_TYPE_A		0x0010
 #define   SDHCI_CTRL_DRV_TYPE_C		0x0020
 #define   SDHCI_CTRL_DRV_TYPE_D		0x0030
+#define  SDHCI_CTRL_EXEC_TUNING		0x0040
+#define  SDHCI_CTRL_TUNED_CLK		0x0080
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
@@ -188,6 +190,7 @@
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
+#define  SDHCI_USE_SDR50_TUNING	0x00002000
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 52b5dc914a8c..ca7007fdb399 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -136,6 +136,7 @@ struct mmc_host_ops {
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
+	int	(*execute_tuning)(struct mmc_host *host);
 };
 
 struct mmc_card;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 373b2bf5e5b5..9fa5a73f393d 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -50,6 +50,7 @@
 #define MMC_SET_BLOCKLEN         16   /* ac   [31:0] block len   R1  */
 #define MMC_READ_SINGLE_BLOCK    17   /* adtc [31:0] data addr   R1  */
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
+#define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
 
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 92e1c9ad126c..b74c8530e959 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -111,6 +111,7 @@ struct sdhci_host {
 #define SDHCI_USE_ADMA		(1<<1)	/* Host is ADMA capable */
 #define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
+#define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
@@ -147,6 +148,9 @@ struct sdhci_host {
 	unsigned int            ocr_avail_sd;
 	unsigned int            ocr_avail_mmc;
 
+	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
+	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 #endif /* __SDHCI_H */
-- 
cgit v1.2.3


From 4d55c5a13a189a80d40383f02c8026f9a87d7c87 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:05 +0530
Subject: mmc: sdhci: enable preset value after uhs initialization

According to the Host Controller spec v3.00, setting Preset Value Enable
in the Host Control2 register lets SDCLK Frequency Select, Clock Generator
Select and Driver Strength Select to be set automatically by the Host
Controller based on the UHS-I mode set. This patch enables this feature.
Since Preset Value Enable makes sense only for UHS-I cards, we enable this
feature after successfull UHS-I initialization. We also reset Preset Value
Enable next time before initialization.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 11 +++++++++++
 drivers/mmc/host/sdhci.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h |  1 +
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index fc65475a26ee..b461b290ce25 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -925,6 +925,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 
 		/* Card is an ultra-high-speed card */
 		mmc_sd_card_set_uhs(card);
+
+		/*
+		 * Since initialization is now complete, enable preset
+		 * value registers for UHS-I cards.
+		 */
+		if (host->ops->enable_preset_value)
+			host->ops->enable_preset_value(host, true);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1095,6 +1102,10 @@ int mmc_attach_sd(struct mmc_host *host)
 	if (err)
 		return err;
 
+	/* Disable preset value enable if already set since last time */
+	if (host->ops->enable_preset_value)
+		host->ops->enable_preset_value(host, false);
+
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
 		return err;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8a56eacea34d..a1ab22415883 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1649,6 +1649,37 @@ out:
 	return err;
 }
 
+static void sdhci_enable_preset_value(struct mmc_host *mmc, bool enable)
+{
+	struct sdhci_host *host;
+	u16 ctrl;
+	unsigned long flags;
+
+	host = mmc_priv(mmc);
+
+	/* Host Controller v3.00 defines preset value registers */
+	if (host->version < SDHCI_SPEC_300)
+		return;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/*
+	 * We only enable or disable Preset Value if they are not already
+	 * enabled or disabled respectively. Otherwise, we bail out.
+	 */
+	if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	} else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+	}
+
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
 static const struct mmc_host_ops sdhci_ops = {
 	.request	= sdhci_request,
 	.set_ios	= sdhci_set_ios,
@@ -1656,6 +1687,7 @@ static const struct mmc_host_ops sdhci_ops = {
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 	.execute_tuning			= sdhci_execute_tuning,
+	.enable_preset_value		= sdhci_enable_preset_value,
 };
 
 /*****************************************************************************\
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ca7007fdb399..6716bd12eccd 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -137,6 +137,7 @@ struct mmc_host_ops {
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*execute_tuning)(struct mmc_host *host);
+	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 };
 
 struct mmc_card;
-- 
cgit v1.2.3


From c3ed3877625f10d600b0eca2ca48a68c46aed660 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:06 +0530
Subject: mmc: sdhci: add support for programmable clock mode

Host Controller v3.00 supports programmable clock mode as an optional
feature. The support for this mode is indicated by non-zero value in
bits 48-55 of the Capabilities register. If supported, the actual
value of Clock Multiplier is one more than the value provided in the
bit fields. We only set Clock Generator Select (bit 5) and SDCLK
Frequency Select (bits 8-15) of the Clock Control register in case
Preset Value Enable is not set, otherwise these fields are automatically
set by the Host Controller based on the UHS mode selected. Also, since
the maximum and minimum clock frequency in this mode can be
(Base Clock * Clock Mul) and (Base Clock * Clock Mul)/1024 respectively,
f_max and f_min have been recalculated to reflect this change.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 81 ++++++++++++++++++++++++++++++++++++++---------
 drivers/mmc/host/sdhci.h  |  3 ++
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 70 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a1ab22415883..07cca557c4b5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1013,8 +1013,8 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
-	int div;
-	u16 clk;
+	int div = 0; /* Initialized for compiler warning */
+	u16 clk = 0;
 	unsigned long timeout;
 
 	if (clock == host->clock)
@@ -1032,14 +1032,45 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		goto out;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		/* Version 3.00 divisors must be a multiple of 2. */
-		if (host->max_clk <= clock)
-			div = 1;
-		else {
-			for (div = 2; div < SDHCI_MAX_DIV_SPEC_300; div += 2) {
-				if ((host->max_clk / div) <= clock)
-					break;
+		/*
+		 * Check if the Host Controller supports Programmable Clock
+		 * Mode.
+		 */
+		if (host->clk_mul) {
+			u16 ctrl;
+
+			/*
+			 * We need to figure out whether the Host Driver needs
+			 * to select Programmable Clock Mode, or the value can
+			 * be set automatically by the Host Controller based on
+			 * the Preset Value registers.
+			 */
+			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+			if (!(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+				for (div = 1; div <= 1024; div++) {
+					if (((host->max_clk * host->clk_mul) /
+					      div) <= clock)
+						break;
+				}
+				/*
+				 * Set Programmable Clock Mode in the Clock
+				 * Control register.
+				 */
+				clk = SDHCI_PROG_CLOCK_MODE;
+				div--;
+			}
+		} else {
+			/* Version 3.00 divisors must be a multiple of 2. */
+			if (host->max_clk <= clock)
+				div = 1;
+			else {
+				for (div = 2; div < SDHCI_MAX_DIV_SPEC_300;
+				     div += 2) {
+					if ((host->max_clk / div) <= clock)
+						break;
+				}
 			}
+			div >>= 1;
 		}
 	} else {
 		/* Version 2.00 divisors must be a power of 2. */
@@ -1047,10 +1078,10 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 			if ((host->max_clk / div) <= clock)
 				break;
 		}
+		div >>= 1;
 	}
-	div >>= 1;
 
-	clk = (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
+	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
 	clk |= SDHCI_CLOCK_INT_EN;
@@ -2307,18 +2338,38 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 		host->timeout_clk *= 1000;
 
+	/*
+	 * In case of Host Controller v3.00, find out whether clock
+	 * multiplier is supported.
+	 */
+	host->clk_mul = (caps[1] & SDHCI_CLOCK_MUL_MASK) >>
+			SDHCI_CLOCK_MUL_SHIFT;
+
+	/*
+	 * In case the value in Clock Multiplier is 0, then programmable
+	 * clock mode is not supported, otherwise the actual clock
+	 * multiplier is one more than the value of Clock Multiplier
+	 * in the Capabilities Register.
+	 */
+	if (host->clk_mul)
+		host->clk_mul += 1;
+
 	/*
 	 * Set host parameters.
 	 */
 	mmc->ops = &sdhci_ops;
+	mmc->f_max = host->max_clk;
 	if (host->ops->get_min_clock)
 		mmc->f_min = host->ops->get_min_clock(host);
-	else if (host->version >= SDHCI_SPEC_300)
-		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
-	else
+	else if (host->version >= SDHCI_SPEC_300) {
+		if (host->clk_mul) {
+			mmc->f_min = (host->max_clk * host->clk_mul) / 1024;
+			mmc->f_max = host->max_clk * host->clk_mul;
+		} else
+			mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
+	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	mmc->f_max = host->max_clk;
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE;
 
 	/*
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e62367491eee..6b0a0ee9ac6e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -101,6 +101,7 @@
 #define  SDHCI_DIV_MASK	0xFF
 #define  SDHCI_DIV_MASK_LEN	8
 #define  SDHCI_DIV_HI_MASK	0x300
+#define  SDHCI_PROG_CLOCK_MODE	0x0020
 #define  SDHCI_CLOCK_CARD_EN	0x0004
 #define  SDHCI_CLOCK_INT_STABLE	0x0002
 #define  SDHCI_CLOCK_INT_EN	0x0001
@@ -191,6 +192,8 @@
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
 #define  SDHCI_USE_SDR50_TUNING	0x00002000
+#define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
+#define  SDHCI_CLOCK_MUL_SHIFT	16
 
 #define SDHCI_CAPABILITIES_1	0x44
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index b74c8530e959..a88a799ed7c3 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -117,6 +117,7 @@ struct sdhci_host {
 
 	unsigned int max_clk;	/* Max possible freq (MHz) */
 	unsigned int timeout_clk;	/* Timeout freq (KHz) */
+	unsigned int clk_mul;	/* Clock Muliplier value */
 
 	unsigned int clock;	/* Current clock (MHz) */
 	u8 pwr;			/* Current voltage */
-- 
cgit v1.2.3


From cf2b5eea1ea0ff9b3184bc6771bcb93a9fdcd1d9 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Thu, 5 May 2011 12:19:07 +0530
Subject: mmc: sdhci: add support for retuning mode 1

Host Controller v3.00 can support retuning modes 1,2 or 3 depending on
the bits 46-47 of the Capabilities register. Also, the timer count for
retuning is indicated by bits 40-43 of the same register. We initialize
timer_list for retuning the first time we execute tuning procedure. This
condition is indicated by SDHCI_NEEDS_RETUNING not being set. Since
retuning mode 1 sets a limit of 4MB on the maximum data length, we set
max_blk_count appropriately. Once the tuning timer expires, we set
SDHCI_NEEDS_RETUNING flag, and if the flag is set, we execute tuning
procedure before sending the next command. We need to restore mmc_request
structure after executing retuning procedure since host->mrq is used
inside the procedure to send CMD19. We also disable and re-enable this
flag during suspend and resume respectively, as per the spec v3.00.

Tested by Zhangfei Gao with a Toshiba uhs card and general hs card,
on mmp2 in SDMA mode.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Acked-by: Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 109 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/host/sdhci.h  |   6 ++-
 include/linux/mmc/sdhci.h |   6 +++
 3 files changed, 118 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07cca557c4b5..fa3301644b15 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -46,6 +46,8 @@ static void sdhci_finish_data(struct sdhci_host *);
 
 static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
+static int sdhci_execute_tuning(struct mmc_host *mmc);
+static void sdhci_tuning_timer(unsigned long data);
 
 static void sdhci_dumpregs(struct sdhci_host *host)
 {
@@ -1206,8 +1208,27 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
 		host->mrq->cmd->error = -ENOMEDIUM;
 		tasklet_schedule(&host->finish_tasklet);
-	} else
+	} else {
+		u32 present_state;
+
+		present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
+		/*
+		 * Check if the re-tuning timer has already expired and there
+		 * is no on-going data transfer. If so, we need to execute
+		 * tuning procedure before sending command.
+		 */
+		if ((host->flags & SDHCI_NEEDS_RETUNING) &&
+		    !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) {
+			spin_unlock_irqrestore(&host->lock, flags);
+			sdhci_execute_tuning(mmc);
+			spin_lock_irqsave(&host->lock, flags);
+
+			/* Restore original mmc_request structure */
+			host->mrq = mrq;
+		}
+
 		sdhci_send_command(host, mrq->cmd);
+	}
 
 	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -1673,6 +1694,37 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	}
 
 out:
+	/*
+	 * If this is the very first time we are here, we start the retuning
+	 * timer. Since only during the first time, SDHCI_NEEDS_RETUNING
+	 * flag won't be set, we check this condition before actually starting
+	 * the timer.
+	 */
+	if (!(host->flags & SDHCI_NEEDS_RETUNING) && host->tuning_count &&
+	    (host->tuning_mode == SDHCI_TUNING_MODE_1)) {
+		mod_timer(&host->tuning_timer, jiffies +
+			host->tuning_count * HZ);
+		/* Tuning mode 1 limits the maximum data length to 4MB */
+		mmc->max_blk_count = (4 * 1024 * 1024) / mmc->max_blk_size;
+	} else {
+		host->flags &= ~SDHCI_NEEDS_RETUNING;
+		/* Reload the new initial value for timer */
+		if (host->tuning_mode == SDHCI_TUNING_MODE_1)
+			mod_timer(&host->tuning_timer, jiffies +
+				host->tuning_count * HZ);
+	}
+
+	/*
+	 * In case tuning fails, host controllers which support re-tuning can
+	 * try tuning again at a later time, when the re-tuning timer expires.
+	 * So for these controllers, we return 0. Since there might be other
+	 * controllers who do not have this capability, we return error for
+	 * them.
+	 */
+	if (err && host->tuning_count &&
+	    host->tuning_mode == SDHCI_TUNING_MODE_1)
+		err = 0;
+
 	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
 	spin_unlock(&host->lock);
 	enable_irq(host->irq);
@@ -1775,6 +1827,9 @@ static void sdhci_tasklet_finish(unsigned long param)
 
 	del_timer(&host->timer);
 
+	if (host->version >= SDHCI_SPEC_300)
+		del_timer(&host->tuning_timer);
+
 	mrq = host->mrq;
 
 	/*
@@ -1848,6 +1903,20 @@ static void sdhci_timeout_timer(unsigned long data)
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static void sdhci_tuning_timer(unsigned long data)
+{
+	struct sdhci_host *host;
+	unsigned long flags;
+
+	host = (struct sdhci_host *)data;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	host->flags |= SDHCI_NEEDS_RETUNING;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
 /*****************************************************************************\
  *                                                                           *
  * Interrupt handling                                                        *
@@ -2122,6 +2191,14 @@ int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state)
 
 	sdhci_disable_card_detection(host);
 
+	/* Disable tuning since we are suspending */
+	if (host->version >= SDHCI_SPEC_300 && host->tuning_count &&
+	    host->tuning_mode == SDHCI_TUNING_MODE_1) {
+		host->flags &= ~SDHCI_NEEDS_RETUNING;
+		mod_timer(&host->tuning_timer, jiffies +
+			host->tuning_count * HZ);
+	}
+
 	ret = mmc_suspend_host(host->mmc);
 	if (ret)
 		return ret;
@@ -2163,6 +2240,11 @@ int sdhci_resume_host(struct sdhci_host *host)
 	ret = mmc_resume_host(host->mmc);
 	sdhci_enable_card_detection(host);
 
+	/* Set the re-tuning expiration flag */
+	if ((host->version >= SDHCI_SPEC_300) && host->tuning_count &&
+	    (host->tuning_mode == SDHCI_TUNING_MODE_1))
+		host->flags |= SDHCI_NEEDS_RETUNING;
+
 	return ret;
 }
 
@@ -2414,6 +2496,21 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_DRIVER_TYPE_D)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_D;
 
+	/* Initial value for re-tuning timer count */
+	host->tuning_count = (caps[1] & SDHCI_RETUNING_TIMER_COUNT_MASK) >>
+			      SDHCI_RETUNING_TIMER_COUNT_SHIFT;
+
+	/*
+	 * In case Re-tuning Timer is not disabled, the actual value of
+	 * re-tuning timer will be 2 ^ (n - 1).
+	 */
+	if (host->tuning_count)
+		host->tuning_count = 1 << (host->tuning_count - 1);
+
+	/* Re-tuning mode supported by the Host Controller */
+	host->tuning_mode = (caps[1] & SDHCI_RETUNING_MODE_MASK) >>
+			     SDHCI_RETUNING_MODE_SHIFT;
+
 	ocr_avail = 0;
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
@@ -2559,9 +2656,15 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	setup_timer(&host->timer, sdhci_timeout_timer, (unsigned long)host);
 
-	if (host->version >= SDHCI_SPEC_300)
+	if (host->version >= SDHCI_SPEC_300) {
 		init_waitqueue_head(&host->buf_ready_int);
 
+		/* Initialize re-tuning timer */
+		init_timer(&host->tuning_timer);
+		host->tuning_timer.data = (unsigned long)host;
+		host->tuning_timer.function = sdhci_tuning_timer;
+	}
+
 	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
 		mmc_hostname(mmc), host);
 	if (ret)
@@ -2655,6 +2758,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	free_irq(host->irq, host);
 
 	del_timer_sync(&host->timer);
+	if (host->version >= SDHCI_SPEC_300)
+		del_timer_sync(&host->tuning_timer);
 
 	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 6b0a0ee9ac6e..8ea11b7cf89a 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -191,7 +191,11 @@
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
-#define  SDHCI_USE_SDR50_TUNING	0x00002000
+#define  SDHCI_RETUNING_TIMER_COUNT_MASK	0x00000F00
+#define  SDHCI_RETUNING_TIMER_COUNT_SHIFT	8
+#define  SDHCI_USE_SDR50_TUNING			0x00002000
+#define  SDHCI_RETUNING_MODE_MASK		0x0000C000
+#define  SDHCI_RETUNING_MODE_SHIFT		14
 #define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
 #define  SDHCI_CLOCK_MUL_SHIFT	16
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index a88a799ed7c3..e902618d68f4 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -112,6 +112,7 @@ struct sdhci_host {
 #define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
+#define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
@@ -152,6 +153,11 @@ struct sdhci_host {
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
+	unsigned int		tuning_count;	/* Timer count for re-tuning */
+	unsigned int		tuning_mode;	/* Re-tuning mode supported by host */
+#define SDHCI_TUNING_MODE_1	0
+	struct timer_list	tuning_timer;	/* Timer for tuning */
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 #endif /* __SDHCI_H */
-- 
cgit v1.2.3


From 06e8935febe687e2a561707d4c7ca4245d261dbe Mon Sep 17 00:00:00 2001
From: Stefan Nilsson XK <stefan.xk.nilsson@stericsson.com>
Date: Wed, 11 May 2011 17:48:05 +0200
Subject: mmc: sdio: optimized SDIO IRQ handling for single irq

If there is only 1 function interrupt registered it is possible to
improve performance by directly calling the irq handler and avoiding
the overhead of reading the CCCR registers.

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Ulf Hansson <ulf.hansson@stericsson.com>
Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio_irq.c | 33 ++++++++++++++++++++++++++++++++-
 include/linux/mmc/card.h    |  1 +
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index b3001617e67d..03ead028d2ce 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -31,6 +31,17 @@ static int process_sdio_pending_irqs(struct mmc_card *card)
 {
 	int i, ret, count;
 	unsigned char pending;
+	struct sdio_func *func;
+
+	/*
+	 * Optimization, if there is only 1 function interrupt registered
+	 * call irq handler directly
+	 */
+	func = card->sdio_single_irq;
+	if (func) {
+		func->irq_handler(func);
+		return 1;
+	}
 
 	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_INTx, 0, &pending);
 	if (ret) {
@@ -42,7 +53,7 @@ static int process_sdio_pending_irqs(struct mmc_card *card)
 	count = 0;
 	for (i = 1; i <= 7; i++) {
 		if (pending & (1 << i)) {
-			struct sdio_func *func = card->sdio_func[i - 1];
+			func = card->sdio_func[i - 1];
 			if (!func) {
 				printk(KERN_WARNING "%s: pending IRQ for "
 					"non-existent function\n",
@@ -186,6 +197,24 @@ static int sdio_card_irq_put(struct mmc_card *card)
 	return 0;
 }
 
+/* If there is only 1 function registered set sdio_single_irq */
+static void sdio_single_irq_set(struct mmc_card *card)
+{
+	struct sdio_func *func;
+	int i;
+
+	card->sdio_single_irq = NULL;
+	if ((card->host->caps & MMC_CAP_SDIO_IRQ) &&
+	    card->host->sdio_irqs == 1)
+		for (i = 0; i < card->sdio_funcs; i++) {
+		       func = card->sdio_func[i];
+		       if (func && func->irq_handler) {
+			       card->sdio_single_irq = func;
+			       break;
+		       }
+	       }
+}
+
 /**
  *	sdio_claim_irq - claim the IRQ for a SDIO function
  *	@func: SDIO function
@@ -227,6 +256,7 @@ int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler)
 	ret = sdio_card_irq_get(func->card);
 	if (ret)
 		func->irq_handler = NULL;
+	sdio_single_irq_set(func->card);
 
 	return ret;
 }
@@ -251,6 +281,7 @@ int sdio_release_irq(struct sdio_func *func)
 	if (func->irq_handler) {
 		func->irq_handler = NULL;
 		sdio_card_irq_put(func->card);
+		sdio_single_irq_set(func->card);
 	}
 
 	ret = mmc_io_rw_direct(func->card, 0, 0, SDIO_CCCR_IENx, 0, &reg);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index d8dffc992ce2..4910dec47bb7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -191,6 +191,7 @@ struct mmc_card {
 	struct sdio_cccr	cccr;		/* common card info */
 	struct sdio_cis		cis;		/* common tuple info */
 	struct sdio_func	*sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */
+	struct sdio_func	*sdio_single_irq; /* SDIO function when only one IRQ active */
 	unsigned		num_info;	/* number of info strings */
 	const char		**info;		/* info strings */
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
-- 
cgit v1.2.3


From 7311bef0697bcfbbcb898c3c22e61e23f203ae9d Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 11 May 2011 16:51:11 +0000
Subject: mmc: tmio: runtime suspend the controller, where possible

The TMIO MMC controller cannot be powered off to save power, when no
card is plugged in, because then it will not be able to detect a new
card-insertion event. On some implementations, however, it is
possible to switch to using another source to detect card insertion.
This patch adds support for such implementations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/tmio_mmc.h     |  3 ++
 drivers/mmc/host/tmio_mmc_pio.c | 64 +++++++++++++++++++++++++++++++++++++----
 include/linux/mfd/tmio.h        | 17 +++++++++++
 3 files changed, 78 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index c6bf726c8f44..8260bc2c34e3 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -131,4 +131,7 @@ int tmio_mmc_host_resume(struct device *dev);
 #define tmio_mmc_host_resume NULL
 #endif
 
+int tmio_mmc_host_runtime_suspend(struct device *dev);
+int tmio_mmc_host_runtime_resume(struct device *dev);
+
 #endif
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index af5d4f6d2233..ad6347bb02dd 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -746,6 +746,7 @@ fail:
 static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
@@ -775,13 +776,24 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	/* Power sequence - OFF -> UP -> ON */
 	if (ios->power_mode == MMC_POWER_UP) {
+		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) {
+			pm_runtime_get_sync(&host->pdev->dev);
+			pdata->power = true;
+		}
 		/* power up SD bus */
 		if (host->set_pwr)
 			host->set_pwr(host->pdev, 1);
 	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
 		/* power down SD bus */
-		if (ios->power_mode == MMC_POWER_OFF && host->set_pwr)
-			host->set_pwr(host->pdev, 0);
+		if (ios->power_mode == MMC_POWER_OFF) {
+			if (host->set_pwr)
+				host->set_pwr(host->pdev, 0);
+			if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
+			    pdata->power) {
+				pdata->power = false;
+				pm_runtime_put(&host->pdev->dev);
+			}
+		}
 		tmio_mmc_clk_stop(host);
 	} else {
 		/* start bus clock */
@@ -853,6 +865,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	if (!mmc)
 		return -ENOMEM;
 
+	pdata->dev = &pdev->dev;
 	_host = mmc_priv(mmc);
 	_host->pdata = pdata;
 	_host->mmc = mmc;
@@ -886,6 +899,7 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	else
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
+	pdata->power = false;
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_resume(&pdev->dev);
 	if (ret < 0)
@@ -907,7 +921,8 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 	tmio_mmc_request_dma(_host, pdata);
 
 	/* We have to keep the device powered for its card detection to work */
-	pm_runtime_get_noresume(&pdev->dev);
+	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD))
+		pm_runtime_get_noresume(&pdev->dev);
 
 	mmc_add_host(mmc);
 
@@ -937,15 +952,25 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host)
 {
 	struct platform_device *pdev = host->pdev;
 
+	/*
+	 * We don't have to manipulate pdata->power here: if there is a card in
+	 * the slot, the runtime PM is active and our .runtime_resume() will not
+	 * be run. If there is no card in the slot and the platform can suspend
+	 * the controller, the runtime PM is suspended and pdata->power == false,
+	 * so, our .runtime_resume() will not try to detect a card in the slot.
+	 */
+	if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD)
+		pm_runtime_get_sync(&pdev->dev);
+
 	mmc_remove_host(host->mmc);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
 	tmio_mmc_release_dma(host);
-	iounmap(host->ctl);
-	mmc_free_host(host->mmc);
 
-	/* Compensate for pm_runtime_get_sync() in probe() above */
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
+
+	iounmap(host->ctl);
+	mmc_free_host(host->mmc);
 }
 EXPORT_SYMBOL(tmio_mmc_host_remove);
 
@@ -970,6 +995,9 @@ int tmio_mmc_host_resume(struct device *dev)
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 
+	/* The MMC core will perform the complete set up */
+	host->pdata->power = false;
+
 	if (!host->pm_error)
 		pm_runtime_get_sync(dev);
 
@@ -982,4 +1010,28 @@ EXPORT_SYMBOL(tmio_mmc_host_resume);
 
 #endif	/* CONFIG_PM */
 
+int tmio_mmc_host_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+EXPORT_SYMBOL(tmio_mmc_host_runtime_suspend);
+
+int tmio_mmc_host_runtime_resume(struct device *dev)
+{
+	struct mmc_host *mmc = dev_get_drvdata(dev);
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct tmio_mmc_data *pdata = host->pdata;
+
+	tmio_mmc_reset(host);
+
+	if (pdata->power) {
+		/* Only entered after a card-insert interrupt */
+		tmio_mmc_set_ios(mmc, &mmc->ios);
+		mmc_detect_change(mmc, msecs_to_jiffies(100));
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_mmc_host_runtime_resume);
+
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 8e70310ee945..5a90266c3a5a 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -4,6 +4,7 @@
 #include <linux/fb.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -61,6 +62,12 @@
  * Some controllers can support SDIO IRQ signalling.
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
+/*
+ * Some platforms can detect card insertion events with controller powered
+ * down, in which case they have to call tmio_mmc_cd_wakeup() to power up the
+ * controller and report the event to the driver.
+ */
+#define TMIO_MMC_HAS_COLD_CD		(1 << 3)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -82,11 +89,21 @@ struct tmio_mmc_data {
 	unsigned long			flags;
 	u32				ocr_mask;	/* available voltages */
 	struct tmio_mmc_dma		*dma;
+	struct device			*dev;
+	bool				power;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 	int (*get_cd)(struct platform_device *host);
 };
 
+static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata)
+{
+	if (pdata && !pdata->power) {
+		pdata->power = true;
+		pm_runtime_get(pdata->dev);
+	}
+}
+
 /*
  * data for the NAND controller
  */
-- 
cgit v1.2.3


From 2595880481ac894d390365092de9aaf92b44e147 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 11 May 2011 16:51:15 +0000
Subject: mmc: sdhi: allow powering down controller with no card inserted

Supply a link to TMIO private data for platforms to implement their
own card detection.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c  | 6 ++++++
 include/linux/mmc/sh_mobile_sdhi.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index d264bbeb529b..b3654293017b 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -71,6 +71,7 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 	}
 
 	mmc_data = &priv->mmc_data;
+	p->pdata = mmc_data;
 
 	snprintf(clk_name, sizeof(clk_name), "sdhi%d", pdev->id);
 	priv->clk = clk_get(&pdev->dev, clk_name);
@@ -159,8 +160,11 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct sh_mobile_sdhi *priv = container_of(host->pdata, struct sh_mobile_sdhi, mmc_data);
+	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
 	int i, irq;
 
+	p->pdata = NULL;
+
 	for (i = 0; i < 3; i++) {
 		irq = platform_get_irq(pdev, i);
 		if (irq >= 0)
@@ -178,6 +182,8 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 static const struct dev_pm_ops tmio_mmc_dev_pm_ops = {
 	.suspend = tmio_mmc_host_suspend,
 	.resume = tmio_mmc_host_resume,
+	.runtime_suspend = tmio_mmc_host_runtime_suspend,
+	.runtime_resume = tmio_mmc_host_runtime_resume,
 };
 
 static struct platform_driver sh_mobile_sdhi_driver = {
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
index c981b959760f..faf32b6ec185 100644
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -3,12 +3,16 @@
 
 #include <linux/types.h>
 
+struct platform_device;
+struct tmio_mmc_data;
+
 struct sh_mobile_sdhi_info {
 	int dma_slave_tx;
 	int dma_slave_rx;
 	unsigned long tmio_flags;
 	unsigned long tmio_caps;
 	u32 tmio_ocr_mask;	/* available MMC voltages */
+	struct tmio_mmc_data *pdata;
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	int (*get_cd)(struct platform_device *pdev);
 };
-- 
cgit v1.2.3


From 4c4cb171054230c2e58ed6574d7faa1871c75bbe Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 13 May 2011 11:17:18 +0530
Subject: mmc: core: add support for eMMC Dual Data Rate

eMMC voltage change not required for 1.8V.  3.3V and 1.8V vcc
are capable of doing DDR. vccq of 1.8v is not required.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 14 ++------------
 drivers/mmc/core/core.h  |  2 --
 drivers/mmc/core/mmc.c   | 35 ++++++++++++++++++++++++++++++-----
 include/linux/mmc/host.h |  1 +
 4 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 94c8d5a9ecae..7863eedf3d9a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -717,23 +717,13 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 	mmc_set_ios(host);
 }
 
-/*
- * Change data bus width and DDR mode of a host.
- */
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
-			   unsigned int ddr)
-{
-	host->ios.bus_width = width;
-	host->ios.ddr = ddr;
-	mmc_set_ios(host);
-}
-
 /*
  * Change data bus width of a host.
  */
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 {
-	mmc_set_bus_width_ddr(host, width, MMC_SDR_MODE);
+	host->ios.bus_width = width;
+	mmc_set_ios(host);
 }
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 53d23c240324..d9411ed2a39b 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -38,8 +38,6 @@ void mmc_ungate_clock(struct mmc_host *host);
 void mmc_set_ungated(struct mmc_host *host);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
-			   unsigned int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage,
 			   bool cmd11);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index a2c795e8f9dd..0433fe66cbad 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -20,6 +20,7 @@
 #include "core.h"
 #include "bus.h"
 #include "mmc_ops.h"
+#include "sd_ops.h"
 
 static const unsigned int tran_exp[] = {
 	10000,		100000,		1000000,	10000000,
@@ -633,10 +634,14 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (mmc_card_highspeed(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
-			&& (host->caps & (MMC_CAP_1_8V_DDR)))
+			&& ((host->caps & (MMC_CAP_1_8V_DDR |
+			     MMC_CAP_UHS_DDR50))
+				== (MMC_CAP_1_8V_DDR | MMC_CAP_UHS_DDR50)))
 				ddr = MMC_1_8V_DDR_MODE;
 		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
-			&& (host->caps & (MMC_CAP_1_2V_DDR)))
+			&& ((host->caps & (MMC_CAP_1_2V_DDR |
+			     MMC_CAP_UHS_DDR50))
+				== (MMC_CAP_1_2V_DDR | MMC_CAP_UHS_DDR50)))
 				ddr = MMC_1_2V_DDR_MODE;
 	}
 
@@ -670,8 +675,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 					 ext_csd_bits[idx][0],
 					 0);
 			if (!err) {
-				mmc_set_bus_width_ddr(card->host,
-						      bus_width, MMC_SDR_MODE);
+				mmc_set_bus_width(card->host, bus_width);
 				/*
 				 * If controller can't handle bus width test,
 				 * use the highest bus width to maintain
@@ -697,8 +701,29 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				1 << bus_width, ddr);
 			goto free_card;
 		} else if (ddr) {
+			/*
+			 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+			 * signaling.
+			 *
+			 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+			 *
+			 * 1.8V vccq at 3.3V core voltage (vcc) is not required
+			 * in the JEDEC spec for DDR.
+			 *
+			 * Do not force change in vccq since we are obviously
+			 * working and no change to vccq is needed.
+			 *
+			 * WARNING: eMMC rules are NOT the same as SD DDR
+			 */
+			if (ddr == EXT_CSD_CARD_TYPE_DDR_1_2V) {
+				err = mmc_set_signal_voltage(host,
+					MMC_SIGNAL_VOLTAGE_120, 0);
+				if (err)
+					goto err;
+			}
 			mmc_card_set_ddr_mode(card);
-			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
+			mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
+			mmc_set_bus_width(card->host, bus_width);
 		}
 	}
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6716bd12eccd..de32e6aa018a 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -66,6 +66,7 @@ struct mmc_ios {
 
 #define MMC_SIGNAL_VOLTAGE_330	0
 #define MMC_SIGNAL_VOLTAGE_180	1
+#define MMC_SIGNAL_VOLTAGE_120	2
 
 	unsigned char	drv_type;		/* driver type (A, B, C, D) */
 
-- 
cgit v1.2.3


From c59de9287993b5c36f9005f745a3ce0b1008131d Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:35 -0500
Subject: mmc: quirks: Add/remove quirks conditional support.

Conditional add/remove quirks for MMC and SD.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4910dec47bb7..7190aa2096f7 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -273,16 +273,14 @@ struct mmc_fixup {
 		    card->cid.month)
 
 /*
- * This hook just adds a quirk unconditionally.
+ * Unconditionally quirk add/remove.
  */
+
 static inline void __maybe_unused add_quirk(struct mmc_card *card, int data)
 {
 	card->quirks |= data;
 }
 
-/*
- * This hook just removes a quirk unconditionally.
- */
 static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 {
 	card->quirks &= ~data;
@@ -308,6 +306,40 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
+/*
+ * Quirk add/remove for MMC products.
+ */
+
+static inline void __maybe_unused add_quirk_mmc(struct mmc_card *card, int data)
+{
+	if (mmc_card_mmc(card))
+		card->quirks |= data;
+}
+
+static inline void __maybe_unused remove_quirk_mmc(struct mmc_card *card,
+						   int data)
+{
+	if (mmc_card_mmc(card))
+		card->quirks &= ~data;
+}
+
+/*
+ * Quirk add/remove for SD products.
+ */
+
+static inline void __maybe_unused add_quirk_sd(struct mmc_card *card, int data)
+{
+	if (mmc_card_sd(card))
+		card->quirks |= data;
+}
+
+static inline void __maybe_unused remove_quirk_sd(struct mmc_card *card,
+						   int data)
+{
+	if (mmc_card_sd(card))
+		card->quirks &= ~data;
+}
+
 static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 {
 	return c->quirks & MMC_QUIRK_LENIENT_FN0;
-- 
cgit v1.2.3


From aecb7b64dd9e2512c7a4c7e61dd781415d3dac5a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 24 May 2011 14:04:09 +0530
Subject: dmaengine/dw_dmac: Update maintainer-ship

Nobody is currently maintaining dw_dmac. We are using dw_dmac for SPEAr13xx and
are currently maintaining it. After discussing with Vinod, sending this patch to
update maintainer-ship of dw_dmac.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Havard Skinnemoen <hskinnemoen@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 MAINTAINERS                | 7 +++++++
 drivers/dma/dw_dmac.c      | 2 ++
 drivers/dma/dw_dmac_regs.h | 1 +
 include/linux/dw_dmac.h    | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6b4b9cdec370..8f4413014b2d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5401,6 +5401,13 @@ L:	linux-serial@vger.kernel.org
 S:	Maintained
 F:	drivers/tty/serial
 
+SYNOPSYS DESIGNWARE DMAC DRIVER
+M:	Viresh Kumar <viresh.kumar@st.com>
+S:	Maintained
+F:	include/linux/dw_dmac.h
+F:	drivers/dma/dw_dmac_regs.h
+F:	drivers/dma/dw_dmac.c
+
 TIMEKEEPING, NTP
 M:	John Stultz <johnstul@us.ibm.com>
 M:	Thomas Gleixner <tglx@linutronix.de>
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index eec675bf4f95..efd836dfb65a 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -3,6 +3,7 @@
  * AVR32 systems.)
  *
  * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1575,3 +1576,4 @@ module_exit(dw_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
 MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index c968597c32ab..c3419518d701 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -2,6 +2,7 @@
  * Driver for the Synopsys DesignWare AHB DMA Controller
  *
  * Copyright (C) 2005-2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 6998d9376ef9..4bfe0a2f7d50 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -3,6 +3,7 @@
  * AVR32 systems.)
  *
  * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
-- 
cgit v1.2.3


From 7bf02ea22c6cdd09e2d3f1d3c3fe366b834ae9af Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 24 May 2011 17:11:16 -0700
Subject: arch, mm: filter disallowed nodes from arch specific show_mem
 functions

Architectures that implement their own show_mem() function did not pass
the filter argument to show_free_areas() to appropriately avoid emitting
the state of nodes that are disallowed in the current context.  This patch
now passes the filter argument to show_free_areas() so those nodes are now
avoided.

This patch also removes the show_free_areas() wrapper around
__show_free_areas() and converts existing callers to pass an empty filter.

ia64 emits additional information for each node, so skip_free_areas_zone()
must be made global to filter disallowed nodes and it is converted to use
a nid argument rather than a zone for this use case.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               |  2 +-
 arch/ia64/mm/contig.c            | 10 ++++++----
 arch/ia64/mm/discontig.c         | 10 ++++++----
 arch/parisc/mm/init.c            |  2 +-
 arch/sparc/kernel/setup_32.c     |  2 +-
 arch/sparc/mm/init_32.c          |  2 +-
 arch/unicore32/mm/init.c         |  2 +-
 drivers/net/ioc3-eth.c           |  2 +-
 drivers/tty/serial/68328serial.c |  2 +-
 include/linux/mm.h               |  6 +++---
 lib/show_mem.c                   |  2 +-
 mm/nommu.c                       |  6 +++---
 mm/page_alloc.c                  | 22 ++++++++--------------
 13 files changed, 34 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 76f82ae44efb..3f17ea146f0e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -85,7 +85,7 @@ void show_mem(unsigned int filter)
 	struct meminfo * mi = &meminfo;
 
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank (i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 9a018cde5d84..f114a3b14c6a 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -44,13 +44,16 @@ void show_mem(unsigned int filter)
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -64,8 +67,7 @@ void show_mem(unsigned int filter)
 				if (max_gap < LARGE_GAP)
 					continue;
 #endif
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -81,7 +83,7 @@ void show_mem(unsigned int filter)
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 82ab1bc6afb1..c641333cd997 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -622,13 +622,16 @@ void show_mem(unsigned int filter)
 	pg_data_t *pgdat;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+		int nid = pgdat->node_id;
 
+		if (skip_free_areas_node(filter, nid))
+			continue;
 		pgdat_resize_lock(pgdat, &flags);
 		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
@@ -638,8 +641,7 @@ void show_mem(unsigned int filter)
 			if (pfn_valid(pgdat->node_start_pfn + i))
 				page = pfn_to_page(pgdat->node_start_pfn + i);
 			else {
-				i = vmemmap_find_next_valid_pfn(pgdat->node_id,
-					 i) - 1;
+				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
 				continue;
 			}
 			if (PageReserved(page))
@@ -655,7 +657,7 @@ void show_mem(unsigned int filter)
 		total_cached += cached;
 		total_shared += shared;
 		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
-		       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
+		       "shrd: %10d, swpd: %10d\n", nid,
 		       present, reserved, shared, cached);
 	}
 	printk(KERN_INFO "%ld pages of RAM\n", total_present);
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 5fa1e273006e..c5c9c65e502d 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -686,7 +686,7 @@ void show_mem(unsigned int filter)
 	int shared = 0, cached = 0;
 
 	printk(KERN_INFO "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 #ifndef CONFIG_DISCONTIGMEM
 	i = max_mapnr;
 	while (i-- > 0) {
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 3609bdee9ed2..3249d3f3234d 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -82,7 +82,7 @@ static void prom_sync_me(void)
 			     "nop\n\t" : : "r" (&trapbase));
 
 	prom_printf("PROM SYNC COMMAND...\n");
-	show_free_areas();
+	show_free_areas(0);
 	if(current->pid != 0) {
 		local_irq_enable();
 		sys_sync();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 4c31e2b6e71b..28c2cc81c9a9 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -78,7 +78,7 @@ void __init kmap_init(void)
 void show_mem(unsigned int filter)
 {
 	printk("Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 	printk("Free swap:       %6ldkB\n",
 	       nr_swap_pages << (PAGE_SHIFT-10));
 	printk("%ld pages of RAM\n", totalram_pages);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 1fc02633f700..2d3e7112d2a3 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -62,7 +62,7 @@ void show_mem(unsigned int filter)
 	struct meminfo *mi = &meminfo;
 
 	printk(KERN_DEFAULT "Mem-info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_bank(i, mi) {
 		struct membank *bank = &mi->bank[i];
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 96c95617195f..32f07f868d89 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -915,7 +915,7 @@ static void ioc3_alloc_rings(struct net_device *dev)
 
 			skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
 			if (!skb) {
-				show_free_areas();
+				show_free_areas(0);
 				continue;
 			}
 
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index d5bfd41707e7..e0a77540b8ca 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -281,7 +281,7 @@ static void receive_chars(struct m68k_serial *info, unsigned short rx)
 #ifdef CONFIG_MAGIC_SYSRQ
 			} else if (ch == 0x10) { /* ^P */
 				show_state();
-				show_free_areas();
+				show_free_areas(0);
 				show_buffers();
 /*				show_net_buffers(); */
 				return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6507dde38b16..1746f67c33de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -862,13 +862,13 @@ extern void pagefault_out_of_memory(void);
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
 
-extern void show_free_areas(void);
-extern void __show_free_areas(unsigned int flags);
+extern void show_free_areas(unsigned int flags);
+extern bool skip_free_areas_node(unsigned int flags, int nid);
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 90cbe4bb5960..4407f8c9b1f7 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -16,7 +16,7 @@ void show_mem(unsigned int filter)
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	__show_free_areas(filter);
+	show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
diff --git a/mm/nommu.c b/mm/nommu.c
index c4c542c736a9..5afce48ce339 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1235,7 +1235,7 @@ error_free:
 enomem:
 	printk("Allocation of length %lu from process %d (%s) failed\n",
 	       len, current->pid, current->comm);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 
@@ -1468,14 +1468,14 @@ error_getting_vma:
 	printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 
 error_getting_region:
 	printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
 	       " from process %d failed\n",
 	       len, current->pid);
-	show_free_areas();
+	show_free_areas(0);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(do_mmap_pgoff);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d5498e2d0f5..b1447522d346 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2473,10 +2473,10 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 #endif
 
 /*
- * Determine whether the zone's node should be displayed or not, depending on
- * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
  */
-static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+bool skip_free_areas_node(unsigned int flags, int nid)
 {
 	bool ret = false;
 
@@ -2484,8 +2484,7 @@ static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
 		goto out;
 
 	get_mems_allowed();
-	ret = !node_isset(zone->zone_pgdat->node_id,
-				cpuset_current_mems_allowed);
+	ret = !node_isset(nid, cpuset_current_mems_allowed);
 	put_mems_allowed();
 out:
 	return ret;
@@ -2500,13 +2499,13 @@ out:
  * Suppresses nodes that are not allowed by current's cpuset if
  * SHOW_MEM_FILTER_NODES is passed.
  */
-void __show_free_areas(unsigned int filter)
+void show_free_areas(unsigned int filter)
 {
 	int cpu;
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s per-cpu:\n", zone->name);
@@ -2549,7 +2548,7 @@ void __show_free_areas(unsigned int filter)
 	for_each_populated_zone(zone) {
 		int i;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s"
@@ -2618,7 +2617,7 @@ void __show_free_areas(unsigned int filter)
 	for_each_populated_zone(zone) {
  		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
-		if (skip_free_areas_zone(filter, zone))
+		if (skip_free_areas_node(filter, zone_to_nid(zone)))
 			continue;
 		show_node(zone);
 		printk("%s: ", zone->name);
@@ -2639,11 +2638,6 @@ void __show_free_areas(unsigned int filter)
 	show_swap_cache_info();
 }
 
-void show_free_areas(void)
-{
-	__show_free_areas(0);
-}
-
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
 	zoneref->zone = zone;
-- 
cgit v1.2.3


From fa25c503dfa203b921199ea42c0046c89f2ed49f Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:28 -0700
Subject: mm: per-node vmstat: show proper vmstats

commit 2ac390370a ("writeback: add
/sys/devices/system/node/<node>/vmstat") added vmstat entry.  But
strangely it only show nr_written and nr_dirtied.

        # cat /sys/devices/system/node/node20/vmstat
        nr_written 0
        nr_dirtied 0

Of course, It's not adequate.  With this patch, the vmstat show all vm
stastics as /proc/vmstat.

        # cat /sys/devices/system/node/node0/vmstat
	nr_free_pages 899224
	nr_inactive_anon 201
	nr_active_anon 17380
	nr_inactive_file 31572
	nr_active_file 28277
	nr_unevictable 0
	nr_mlock 0
	nr_anon_pages 17321
	nr_mapped 8640
	nr_file_pages 60107
	nr_dirty 33
	nr_writeback 0
	nr_slab_reclaimable 6850
	nr_slab_unreclaimable 7604
	nr_page_table_pages 3105
	nr_kernel_stack 175
	nr_unstable 0
	nr_bounce 0
	nr_vmscan_write 0
	nr_writeback_temp 0
	nr_isolated_anon 0
	nr_isolated_file 0
	nr_shmem 260
	nr_dirtied 1050
	nr_written 938
	numa_hit 962872
	numa_miss 0
	numa_foreign 0
	numa_interleave 8617
	numa_local 962872
	numa_other 0
	nr_anon_transparent_hugepages 0

[akpm@linux-foundation.org: no externs in .c files]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  14 ++-
 include/linux/vmstat.h |   4 +-
 mm/vmstat.c            | 261 +++++++++++++++++++++++++------------------------
 3 files changed, 144 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b3b72d64e805..793f796c4da3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/vmstat.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/compaction.h>
@@ -179,11 +180,14 @@ static ssize_t node_read_vmstat(struct sys_device *dev,
 				struct sysdev_attribute *attr, char *buf)
 {
 	int nid = dev->id;
-	return sprintf(buf,
-		"nr_written %lu\n"
-		"nr_dirtied %lu\n",
-		node_page_state(nid, NR_WRITTEN),
-		node_page_state(nid, NR_DIRTIED));
+	int i;
+	int n = 0;
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+			     node_page_state(nid, i));
+
+	return n;
 }
 static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2b3831b58aa4..e73d1030f2f7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -313,6 +313,8 @@ static inline void __dec_zone_page_state(struct page *page,
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
-#endif
+#endif		/* CONFIG_SMP */
+
+extern const char * const vmstat_text[];
 
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 897ea9e88238..209546a8bdd5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -659,6 +659,138 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 }
 #endif
 
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS)
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
+					TEXT_FOR_HIGHMEM(xx) xx "_movable",
+
+const char * const vmstat_text[] = {
+	/* Zoned VM counters */
+	"nr_free_pages",
+	"nr_inactive_anon",
+	"nr_active_anon",
+	"nr_inactive_file",
+	"nr_active_file",
+	"nr_unevictable",
+	"nr_mlock",
+	"nr_anon_pages",
+	"nr_mapped",
+	"nr_file_pages",
+	"nr_dirty",
+	"nr_writeback",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
+	"nr_page_table_pages",
+	"nr_kernel_stack",
+	"nr_unstable",
+	"nr_bounce",
+	"nr_vmscan_write",
+	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
+	"nr_shmem",
+	"nr_dirtied",
+	"nr_written",
+
+#ifdef CONFIG_NUMA
+	"numa_hit",
+	"numa_miss",
+	"numa_foreign",
+	"numa_interleave",
+	"numa_local",
+	"numa_other",
+#endif
+	"nr_anon_transparent_hugepages",
+	"nr_dirty_threshold",
+	"nr_dirty_background_threshold",
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	"pgpgin",
+	"pgpgout",
+	"pswpin",
+	"pswpout",
+
+	TEXTS_FOR_ZONES("pgalloc")
+
+	"pgfree",
+	"pgactivate",
+	"pgdeactivate",
+
+	"pgfault",
+	"pgmajfault",
+
+	TEXTS_FOR_ZONES("pgrefill")
+	TEXTS_FOR_ZONES("pgsteal")
+	TEXTS_FOR_ZONES("pgscan_kswapd")
+	TEXTS_FOR_ZONES("pgscan_direct")
+
+#ifdef CONFIG_NUMA
+	"zone_reclaim_failed",
+#endif
+	"pginodesteal",
+	"slabs_scanned",
+	"kswapd_steal",
+	"kswapd_inodesteal",
+	"kswapd_low_wmark_hit_quickly",
+	"kswapd_high_wmark_hit_quickly",
+	"kswapd_skip_congestion_wait",
+	"pageoutrun",
+	"allocstall",
+
+	"pgrotated",
+
+#ifdef CONFIG_COMPACTION
+	"compact_blocks_moved",
+	"compact_pages_moved",
+	"compact_pagemigrate_failed",
+	"compact_stall",
+	"compact_fail",
+	"compact_success",
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+	"htlb_buddy_alloc_success",
+	"htlb_buddy_alloc_fail",
+#endif
+	"unevictable_pgs_culled",
+	"unevictable_pgs_scanned",
+	"unevictable_pgs_rescued",
+	"unevictable_pgs_mlocked",
+	"unevictable_pgs_munlocked",
+	"unevictable_pgs_cleared",
+	"unevictable_pgs_stranded",
+	"unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	"thp_fault_alloc",
+	"thp_fault_fallback",
+	"thp_collapse_alloc",
+	"thp_collapse_alloc_failed",
+	"thp_split",
+#endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
+};
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */
+
+
 #ifdef CONFIG_PROC_FS
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 						struct zone *zone)
@@ -831,135 +963,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
 	.release	= seq_release,
 };
 
-#ifdef CONFIG_ZONE_DMA
-#define TEXT_FOR_DMA(xx) xx "_dma",
-#else
-#define TEXT_FOR_DMA(xx)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define TEXT_FOR_DMA32(xx) xx "_dma32",
-#else
-#define TEXT_FOR_DMA32(xx)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define TEXT_FOR_HIGHMEM(xx) xx "_high",
-#else
-#define TEXT_FOR_HIGHMEM(xx)
-#endif
-
-#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
-					TEXT_FOR_HIGHMEM(xx) xx "_movable",
-
-static const char * const vmstat_text[] = {
-	/* Zoned VM counters */
-	"nr_free_pages",
-	"nr_inactive_anon",
-	"nr_active_anon",
-	"nr_inactive_file",
-	"nr_active_file",
-	"nr_unevictable",
-	"nr_mlock",
-	"nr_anon_pages",
-	"nr_mapped",
-	"nr_file_pages",
-	"nr_dirty",
-	"nr_writeback",
-	"nr_slab_reclaimable",
-	"nr_slab_unreclaimable",
-	"nr_page_table_pages",
-	"nr_kernel_stack",
-	"nr_unstable",
-	"nr_bounce",
-	"nr_vmscan_write",
-	"nr_writeback_temp",
-	"nr_isolated_anon",
-	"nr_isolated_file",
-	"nr_shmem",
-	"nr_dirtied",
-	"nr_written",
-
-#ifdef CONFIG_NUMA
-	"numa_hit",
-	"numa_miss",
-	"numa_foreign",
-	"numa_interleave",
-	"numa_local",
-	"numa_other",
-#endif
-	"nr_anon_transparent_hugepages",
-	"nr_dirty_threshold",
-	"nr_dirty_background_threshold",
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
-	"pgpgin",
-	"pgpgout",
-	"pswpin",
-	"pswpout",
-
-	TEXTS_FOR_ZONES("pgalloc")
-
-	"pgfree",
-	"pgactivate",
-	"pgdeactivate",
-
-	"pgfault",
-	"pgmajfault",
-
-	TEXTS_FOR_ZONES("pgrefill")
-	TEXTS_FOR_ZONES("pgsteal")
-	TEXTS_FOR_ZONES("pgscan_kswapd")
-	TEXTS_FOR_ZONES("pgscan_direct")
-
-#ifdef CONFIG_NUMA
-	"zone_reclaim_failed",
-#endif
-	"pginodesteal",
-	"slabs_scanned",
-	"kswapd_steal",
-	"kswapd_inodesteal",
-	"kswapd_low_wmark_hit_quickly",
-	"kswapd_high_wmark_hit_quickly",
-	"kswapd_skip_congestion_wait",
-	"pageoutrun",
-	"allocstall",
-
-	"pgrotated",
-
-#ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
-	"compact_stall",
-	"compact_fail",
-	"compact_success",
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-	"htlb_buddy_alloc_success",
-	"htlb_buddy_alloc_fail",
-#endif
-	"unevictable_pgs_culled",
-	"unevictable_pgs_scanned",
-	"unevictable_pgs_rescued",
-	"unevictable_pgs_mlocked",
-	"unevictable_pgs_munlocked",
-	"unevictable_pgs_cleared",
-	"unevictable_pgs_stranded",
-	"unevictable_pgs_mlockfreed",
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	"thp_fault_alloc",
-	"thp_fault_fallback",
-	"thp_collapse_alloc",
-	"thp_collapse_alloc_failed",
-	"thp_split",
-#endif
-
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
-};
-
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 							struct zone *zone)
 {
-- 
cgit v1.2.3


From f62e00cc3a00bfbd394a79fc22b334c31f91bd5f Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:29 -0700
Subject: mm: introduce wait_on_page_locked_killable()

commit 2687a356 ("Add lock_page_killable") introduced killable
lock_page().  Similarly this patch introdues killable
wait_on_page_locked().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |  9 +++++++++
 mm/filemap.c            | 11 +++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c11950652646..ea268080380d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -357,6 +357,15 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
+extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+
+static inline int wait_on_page_locked_killable(struct page *page)
+{
+	if (PageLocked(page))
+		return wait_on_page_bit_killable(page, PG_locked);
+	return 0;
+}
+
 /* 
  * Wait for a page to be unlocked.
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index c641edf553a9..dea8a38bb2bb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -562,6 +562,17 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+	if (!test_bit(bit_nr, &page->flags))
+		return 0;
+
+	return __wait_on_bit(page_waitqueue(page), &wait,
+			     sleep_on_page_killable, TASK_KILLABLE);
+}
+
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
  * @page: Page defining the wait queue of interest
-- 
cgit v1.2.3


From 37b23e0525d393d48a7d59f870b3bc061a30ccdb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:30 -0700
Subject: x86,mm: make pagefault killable

When an oom killing occurs, almost all processes are getting stuck at the
following two points.

	1) __alloc_pages_nodemask
	2) __lock_page_or_retry

1) is not very problematic because TIF_MEMDIE leads to an allocation
failure and getting out from page allocator.

2) is more problematic.  In an OOM situation, zones typically don't have
page cache at all and memory starvation might lead to greatly reduced IO
performance.  When a fork bomb occurs, TIF_MEMDIE tasks don't die quickly,
meaning that a fork bomb may create new process quickly rather than the
oom-killer killing it.  Then, the system may become livelocked.

This patch makes the pagefault interruptible by SIGKILL.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c | 12 +++++++++++-
 include/linux/mm.h  |  1 +
 mm/filemap.c        | 31 ++++++++++++++++++++++++-------
 3 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bcb394dfbb35..f7a2a054a3c0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	struct mm_struct *mm;
 	int fault;
 	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
 					(write ? FAULT_FLAG_WRITE : 0);
 
 	tsk = current;
@@ -1138,6 +1138,16 @@ good_area:
 		return;
 	}
 
+	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+		if (!(error_code & PF_USER))
+			no_context(regs, error_code, address);
+		return;
+	}
+
 	/*
 	 * Major/minor page fault accounting is only done on the
 	 * initial attempt. If we go through a retry, it is extremely
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1746f67c33de..57d3d5fade16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -153,6 +153,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
diff --git a/mm/filemap.c b/mm/filemap.c
index dea8a38bb2bb..8144f87dcbb4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -654,15 +654,32 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
-	if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
-		__lock_page(page);
-		return 1;
-	} else {
-		if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			up_read(&mm->mmap_sem);
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		/*
+		 * CAUTION! In this case, mmap_sem is not released
+		 * even though return 0.
+		 */
+		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+			return 0;
+
+		up_read(&mm->mmap_sem);
+		if (flags & FAULT_FLAG_KILLABLE)
+			wait_on_page_locked_killable(page);
+		else
 			wait_on_page_locked(page);
-		}
 		return 0;
+	} else {
+		if (flags & FAULT_FLAG_KILLABLE) {
+			int ret;
+
+			ret = __lock_page_killable(page);
+			if (ret) {
+				up_read(&mm->mmap_sem);
+				return 0;
+			}
+		} else
+			__lock_page(page);
+		return 1;
 	}
 }
 
-- 
cgit v1.2.3


From 1b79acc91115ba47e744b70bb166b77bd94f5855 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:32 -0700
Subject: mm, mem-hotplug: recalculate lowmem_reserve when memory hotplug
 occurs

Currently, memory hotplug calls setup_per_zone_wmarks() and
calculate_zone_inactive_ratio(), but doesn't call
setup_per_zone_lowmem_reserve().

It means the number of reserved pages aren't updated even if memory hot
plug occur.  This patch fixes it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  | 2 +-
 mm/memory_hotplug.c | 9 +++++----
 mm/page_alloc.c     | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57d3d5fade16..e173cd297d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1381,7 +1381,7 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
-extern void calculate_zone_inactive_ratio(struct zone *zone);
+extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2c4edc459fb0..59ac18fefd65 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -459,8 +459,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
 		zone_pcp_update(zone);
 
 	mutex_unlock(&zonelists_mutex);
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+
+	init_per_zone_wmark_min();
+
 	if (onlined_pages) {
 		kswapd_run(zone_to_nid(zone));
 		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
@@ -893,8 +894,8 @@ repeat:
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
 	totalram_pages -= offlined_pages;
 
-	setup_per_zone_wmarks();
-	calculate_zone_inactive_ratio(zone);
+	init_per_zone_wmark_min();
+
 	if (!node_present_pages(node)) {
 		node_clear_state(node, N_HIGH_MEMORY);
 		kswapd_stop(node);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 56d0be36be9d..e133cea36932 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5094,7 +5094,7 @@ void setup_per_zone_wmarks(void)
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-void __meminit calculate_zone_inactive_ratio(struct zone *zone)
+static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
 {
 	unsigned int gb, ratio;
 
@@ -5140,7 +5140,7 @@ static void __meminit setup_per_zone_inactive_ratio(void)
  * 8192MB:	11584k
  * 16384MB:	16384k
  */
-static int __init init_per_zone_wmark_min(void)
+int __meminit init_per_zone_wmark_min(void)
 {
 	unsigned long lowmem_kbytes;
 
-- 
cgit v1.2.3


From a6cccdc36c966e51fd969560d870cfd37afbfa9c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:11:33 -0700
Subject: mm, mem-hotplug: update pcp->stat_threshold when memory hotplug occur

Currently, cpu hotplug updates pcp->stat_threshold, but memory hotplug
doesn't.  There is no reason for this.

[akpm@linux-foundation.org: fix CONFIG_SMP=n build]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 3 +++
 mm/page_alloc.c        | 2 ++
 mm/vmstat.c            | 3 +--
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e73d1030f2f7..51359837511a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -261,6 +261,7 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
+void refresh_zone_stat_thresholds(void);
 
 int calculate_pressure_threshold(struct zone *zone);
 int calculate_normal_threshold(struct zone *zone);
@@ -313,6 +314,8 @@ static inline void __dec_zone_page_state(struct page *page,
 #define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
+static inline void refresh_zone_stat_thresholds(void) { }
+
 #endif		/* CONFIG_SMP */
 
 extern const char * const vmstat_text[];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e133cea36932..77773329aa72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
+#include <linux/vmstat.h>
 #include <linux/mempolicy.h>
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
@@ -5152,6 +5153,7 @@ int __meminit init_per_zone_wmark_min(void)
 	if (min_free_kbytes > 65536)
 		min_free_kbytes = 65536;
 	setup_per_zone_wmarks();
+	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
 	setup_per_zone_inactive_ratio();
 	return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 209546a8bdd5..20c18b7694b2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -157,7 +157,7 @@ int calculate_normal_threshold(struct zone *zone)
 /*
  * Refresh the thresholds for each zone.
  */
-static void refresh_zone_stat_thresholds(void)
+void refresh_zone_stat_thresholds(void)
 {
 	struct zone *zone;
 	int cpu;
@@ -1201,7 +1201,6 @@ static int __init setup_vmstat(void)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	refresh_zone_stat_thresholds();
 	register_cpu_notifier(&vmstat_notifier);
 
 	for_each_online_cpu(cpu)
-- 
cgit v1.2.3


From 72788c385604523422592249c19cba0187021e9b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 24 May 2011 17:11:40 -0700
Subject: oom: replace PF_OOM_ORIGIN with toggling oom_score_adj

There's a kernel-wide shortage of per-process flags, so it's always
helpful to trim one when possible without incurring a significant penalty.
 It's even more important when you're planning on adding a per- process
flag yourself, which I plan to do shortly for transparent hugepages.

PF_OOM_ORIGIN is used by ksm and swapoff to prefer current since it has a
tendency to allocate large amounts of memory and should be preferred for
killing over other tasks.  We'd rather immediately kill the task making
the errant syscall rather than penalizing an innocent task.

This patch removes PF_OOM_ORIGIN since its behavior is equivalent to
setting the process's oom_score_adj to OOM_SCORE_ADJ_MAX.

The process's old oom_score_adj is stored and then set to
OOM_SCORE_ADJ_MAX during the time it used to have PF_OOM_ORIGIN.  The old
value is then reinstated when the process should no longer be considered a
high priority for oom killing.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h   |  2 ++
 include/linux/sched.h |  1 -
 mm/ksm.c              |  7 +++++--
 mm/oom_kill.c         | 36 +++++++++++++++++++++++++++---------
 mm/swapfile.c         |  6 ++++--
 5 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5e3aa8311c5e..4952fb874ad3 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,6 +40,8 @@ enum oom_constraint {
 	CONSTRAINT_MEMCG,
 };
 
+extern int test_set_oom_score_adj(int new_val);
+
 extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf71e08222c..44b8faaac7c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1753,7 +1753,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index 942dfc73a2ff..d708b3ef2260 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -35,6 +35,7 @@
 #include <linux/ksm.h>
 #include <linux/hash.h>
 #include <linux/freezer.h>
+#include <linux/oom.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1894,9 +1895,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
-			current->flags |= PF_OOM_ORIGIN;
+			int oom_score_adj;
+
+			oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 			err = unmerge_and_remove_all_rmap_items();
-			current->flags &= ~PF_OOM_ORIGIN;
+			test_set_oom_score_adj(oom_score_adj);
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52e85c80e8d..e4b0991ca351 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,6 +38,33 @@ int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
 static DEFINE_SPINLOCK(zone_scan_lock);
 
+/**
+ * test_set_oom_score_adj() - set current's oom_score_adj and return old value
+ * @new_val: new oom_score_adj value
+ *
+ * Sets the oom_score_adj value for current to @new_val with proper
+ * synchronization and returns the old value.  Usually used to temporarily
+ * set a value, save the old value in the caller, and then reinstate it later.
+ */
+int test_set_oom_score_adj(int new_val)
+{
+	struct sighand_struct *sighand = current->sighand;
+	int old_val;
+
+	spin_lock_irq(&sighand->siglock);
+	old_val = current->signal->oom_score_adj;
+	if (new_val != old_val) {
+		if (new_val == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&current->mm->oom_disable_count);
+		else if (old_val == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&current->mm->oom_disable_count);
+		current->signal->oom_score_adj = new_val;
+	}
+	spin_unlock_irq(&sighand->siglock);
+
+	return old_val;
+}
+
 #ifdef CONFIG_NUMA
 /**
  * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -154,15 +181,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 		return 0;
 	}
 
-	/*
-	 * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
-	 * priority for oom killing.
-	 */
-	if (p->flags & PF_OOM_ORIGIN) {
-		task_unlock(p);
-		return 1000;
-	}
-
 	/*
 	 * The memory controller may have a limit of 0 bytes, so avoid a divide
 	 * by zero, if necessary.
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8c6b3ce38f09..d537d29e9b7b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,7 @@
 #include <linux/syscalls.h>
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
+#include <linux/oom.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -1555,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct address_space *mapping;
 	struct inode *inode;
 	char *pathname;
+	int oom_score_adj;
 	int i, type, prev;
 	int err;
 
@@ -1613,9 +1615,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_OOM_ORIGIN;
+	oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
 	err = try_to_unuse(type);
-	current->flags &= ~PF_OOM_ORIGIN;
+	test_set_oom_score_adj(oom_score_adj);
 
 	if (err) {
 		/*
-- 
cgit v1.2.3


From 15fa8f425557a0d698f933627771f520ef4ae34b Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:11:41 -0700
Subject: include/linux/gfp.h: work around apparent sparse confusion

Running sparse on page_alloc.c today, it errors out:
        include/linux/gfp.h:254:17: error: bad constant expression
        include/linux/gfp.h:254:17: error: cannot size expression

which is a line in gfp_zone():

        BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);

That's really unfortunate, because it ends up hiding all of the other
legitimate sparse messages like this:
        mm/page_alloc.c:5315:59: warning: incorrect type in argument 1 (different base types)
        mm/page_alloc.c:5315:59:    expected unsigned long [unsigned] [usertype] size
        mm/page_alloc.c:5315:59:    got restricted gfp_t [usertype] <noident>
...

Having sparse be able to catch these very oopsable bugs is a lot more
important than keeping a BUILD_BUG_ON().  Kill the BUILD_BUG_ON().

Compiles on x86_64 with and without CONFIG_DEBUG_VM=y.  defconfig boots
fine for me.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 56d8fc87fbbc..7e8f5d863c76 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,14 +249,9 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-
-	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-	else {
 #ifdef CONFIG_DEBUG_VM
-		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+	BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 #endif
-	}
 	return z;
 }
 
-- 
cgit v1.2.3


From 82d4b5779a75887750748609f3415f01c1bb9f81 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:11:42 -0700
Subject: include/linux/gfp.h: convert BUG_ON() into VM_BUG_ON()

VM_BUG_ON() if effectively a BUG_ON() undef #ifdef CONFIG_DEBUG_VM.  That
is exactly what we have here now, and two different folks have suggested
doing it this way.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7e8f5d863c76..cb4089254f01 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,9 +249,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-#ifdef CONFIG_DEBUG_VM
-	BUG_ON((GFP_ZONE_BAD >> bit) & 1);
-#endif
+	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	return z;
 }
 
-- 
cgit v1.2.3


From d05f3169c0fbca16132ec7c2be71685c6de638b5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 24 May 2011 17:11:44 -0700
Subject: mm: make expand_downwards() symmetrical with expand_upwards()

Currently we have expand_upwards exported while expand_downwards is
accessible only via expand_stack or expand_stack_downwards.

check_stack_guard_page is a nice example of the asymmetry.  It uses
expand_stack for VM_GROWSDOWN while expand_upwards is called for
VM_GROWSUP case.

Let's clean this up by exporting both functions and make those names
consistent.  Let's use expand_{upwards,downwards} because expanding
doesn't always involve stack manipulation (an example is
ia64_do_page_fault which uses expand_upwards for registers backing store
expansion).  expand_downwards has to be defined for both
CONFIG_STACK_GROWS{UP,DOWN} because get_arg_page calls the downwards
version in the early process initialization phase for growsup
configuration.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c          | 2 +-
 include/linux/mm.h | 8 +++++---
 mm/memory.c        | 2 +-
 mm/mmap.c          | 7 +------
 4 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index c1cf372f17a7..e276d5e0abb9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 #ifdef CONFIG_STACK_GROWSUP
 	if (write) {
-		ret = expand_stack_downwards(bprm->vma, pos);
+		ret = expand_downwards(bprm->vma, pos);
 		if (ret < 0)
 			return NULL;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e173cd297d88..d2948af126ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1518,15 +1518,17 @@ unsigned long ra_submit(struct file_ra_state *ra,
 			struct address_space *mapping,
 			struct file *filp);
 
-/* Do stack extension */
+/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+
+/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
+extern int expand_downwards(struct vm_area_struct *vma,
+		unsigned long address);
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
   #define expand_upwards(vma, address) do { } while (0)
 #endif
-extern int expand_stack_downwards(struct vm_area_struct *vma,
-				  unsigned long address);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
diff --git a/mm/memory.c b/mm/memory.c
index 61e66f026563..4c6ea10f3d18 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2966,7 +2966,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 		if (prev && prev->vm_end == address)
 			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-		expand_stack(vma, address - PAGE_SIZE);
+		expand_downwards(vma, address - PAGE_SIZE);
 	}
 	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
 		struct vm_area_struct *next = vma->vm_next;
diff --git a/mm/mmap.c b/mm/mmap.c
index e76f8d752884..adb12527fd0e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1774,7 +1774,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static int expand_downwards(struct vm_area_struct *vma,
+int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	int error;
@@ -1821,11 +1821,6 @@ static int expand_downwards(struct vm_area_struct *vma,
 	return error;
 }
 
-int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-	return expand_downwards(vma, address);
-}
-
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-- 
cgit v1.2.3


From d16dfc550f5326a4000f3322582a7c05dec91d7a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:11:45 -0700
Subject: mm: mmu_gather rework

Rework the existing mmu_gather infrastructure.

The direct purpose of these patches was to allow preemptible mmu_gather,
but even without that I think these patches provide an improvement to the
status quo.

The first 9 patches rework the mmu_gather infrastructure.  For review
purpose I've split them into generic and per-arch patches with the last of
those a generic cleanup.

The next patch provides generic RCU page-table freeing, and the followup
is a patch converting s390 to use this.  I've also got 4 patches from
DaveM lined up (not included in this series) that uses this to implement
gup_fast() for sparc64.

Then there is one patch that extends the generic mmu_gather batching.

After that follow the mm preemptibility patches, these make part of the mm
a lot more preemptible.  It converts i_mmap_lock and anon_vma->lock to
mutexes which together with the mmu_gather rework makes mmu_gather
preemptible as well.

Making i_mmap_lock a mutex also enables a clean-up of the truncate code.

This also allows for preemptible mmu_notifiers, something that XPMEM I
think wants.

Furthermore, it removes the new and universially detested unmap_mutex.

This patch:

Remove the first obstacle towards a fully preemptible mmu_gather.

The current scheme assumes mmu_gather is always done with preemption
disabled and uses per-cpu storage for the page batches.  Change this to
try and allocate a page for batching and in case of failure, use a small
on-stack array to make some progress.

Preemptible mmu_gather is desired in general and usable once i_mmap_lock
becomes a mutex.  Doing it before the mutex conversion saves us from
having to rework the code by moving the mmu_gather bits inside the
pte_lock.

Also avoid flushing the tlb batches from under the pte lock, this is
useful even without the i_mmap_lock conversion as it significantly reduces
pte lock hold times.

[akpm@linux-foundation.org: fix comment tpyo]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 10 ++---
 include/asm-generic/tlb.h | 96 ++++++++++++++++++++++++++++++++++-------------
 include/linux/mm.h        |  2 +-
 mm/memory.c               | 46 +++++++++++------------
 mm/mmap.c                 | 18 ++++-----
 5 files changed, 107 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index e276d5e0abb9..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	unsigned long length = old_end - old_start;
 	unsigned long new_start = old_start - shift;
 	unsigned long new_end = old_end - shift;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 
 	BUG_ON(new_start > new_end);
 
@@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
 		 */
-		free_pgd_range(tlb, new_end, old_end, new_end,
+		free_pgd_range(&tlb, new_end, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	} else {
 		/*
@@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		 * have constraints on va-space that make this illegal (IA64) -
 		 * for the others its just a little faster.
 		 */
-		free_pgd_range(tlb, old_start, old_end, new_end,
+		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	}
-	tlb_finish_mmu(tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, new_end, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e43f9766259f..2d3547c84235 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,6 +5,8 @@
  * Copyright 2001 Red Hat, Inc.
  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  *
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -22,51 +24,71 @@
  * and page free order so much..
  */
 #ifdef CONFIG_SMP
-  #ifdef ARCH_FREE_PTR_NR
-    #define FREE_PTR_NR   ARCH_FREE_PTR_NR
-  #else
-    #define FREE_PTE_NR	506
-  #endif
   #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
 #else
-  #define FREE_PTE_NR	1
   #define tlb_fast_mode(tlb) 1
 #endif
 
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define MMU_GATHER_BUNDLE	8
+
 /* struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		nr;	/* set to ~0U means fast mode */
+	unsigned int		max;	/* nr < max */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	struct page *		pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+	struct arch_mmu_gather	arch;
+#endif
+	struct page		**pages;
+	struct page		*local[MMU_GATHER_BUNDLE];
 };
 
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	}
+}
 
 /* tlb_gather_mmu
- *	Return a pointer to an initialized struct mmu_gather.
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
  */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 {
-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
 	tlb->mm = mm;
 
-	/* Use fast mode if only one CPU is online */
-	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
+
+	if (num_online_cpus() > 1) {
+		tlb->nr = 0;
+		__tlb_alloc_page(tlb);
+	} else /* Use fast mode if only one CPU is online */
+		tlb->nr = ~0U;
 
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = fullmm;
 
-	return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+	tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
 }
 
 static inline void
-tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	if (!tlb->need_flush)
 		return;
@@ -75,6 +97,13 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	if (!tlb_fast_mode(tlb)) {
 		free_pages_and_swap_cache(tlb->pages, tlb->nr);
 		tlb->nr = 0;
+		/*
+		 * If we are using the local on-stack array of pages for MMU
+		 * gather, try allocating an off-stack array again as we have
+		 * recently freed pages.
+		 */
+		if (tlb->pages == tlb->local)
+			__tlb_alloc_page(tlb);
 	}
 }
 
@@ -85,29 +114,42 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	tlb_flush_mmu(tlb, start, end);
+	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var(mmu_gathers);
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
 }
 
-/* tlb_remove_page
+/* __tlb_remove_page
  *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
  *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs.
+ *	mappings in their TLBs. Returns the number of free page slots left.
+ *	When out of page slots we must call tlb_flush_mmu().
  */
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	tlb->need_flush = 1;
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
-		return;
+		return 1; /* avoid calling tlb_flush_mmu() */
 	}
 	tlb->pages[tlb->nr++] = page;
-	if (tlb->nr >= FREE_PTE_NR)
-		tlb_flush_mmu(tlb, 0, 0);
+	VM_BUG_ON(tlb->nr > tlb->max);
+
+	return tlb->max - tlb->nr;
+}
+
+/* tlb_remove_page
+ *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
+ *	required.
+ */
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
 }
 
 /**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2948af126ca..ffcce9bf2b54 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -906,7 +906,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *start_vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *);
diff --git a/mm/memory.c b/mm/memory.c
index 4c6ea10f3d18..19b2d44de9f0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -912,12 +912,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				long *zap_work, struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
+	int force_flush = 0;
 	pte_t *pte;
 	spinlock_t *ptl;
 	int rss[NR_MM_COUNTERS];
 
 	init_rss_vec(rss);
-
+again:
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	do {
@@ -974,7 +975,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			tlb_remove_page(tlb, page);
+			force_flush = !__tlb_remove_page(tlb, page);
+			if (force_flush)
+				break;
 			continue;
 		}
 		/*
@@ -1001,6 +1004,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
+	/*
+	 * mmu_gather ran out of room to batch pages, we break out of
+	 * the PTE lock to avoid doing the potential expensive TLB invalidate
+	 * and page-free while holding it.
+	 */
+	if (force_flush) {
+		force_flush = 0;
+		tlb_flush_mmu(tlb);
+		if (addr != end)
+			goto again;
+	}
+
 	return addr;
 }
 
@@ -1121,17 +1136,14 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
 	long zap_work = ZAP_BLOCK_SIZE;
-	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
-	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-	int fullmm = (*tlbp)->fullmm;
 	struct mm_struct *mm = vma->vm_mm;
 
 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1152,11 +1164,6 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			untrack_pfn_vma(vma, 0, 0);
 
 		while (start != end) {
-			if (!tlb_start_valid) {
-				tlb_start = start;
-				tlb_start_valid = 1;
-			}
-
 			if (unlikely(is_vm_hugetlb_page(vma))) {
 				/*
 				 * It is undesirable to test vma->vm_file as it
@@ -1177,7 +1184,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 
 				start = end;
 			} else
-				start = unmap_page_range(*tlbp, vma,
+				start = unmap_page_range(tlb, vma,
 						start, end, &zap_work, details);
 
 			if (zap_work > 0) {
@@ -1185,19 +1192,13 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 				break;
 			}
 
-			tlb_finish_mmu(*tlbp, tlb_start, start);
-
 			if (need_resched() ||
 				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
-				if (i_mmap_lock) {
-					*tlbp = NULL;
+				if (i_mmap_lock)
 					goto out;
-				}
 				cond_resched();
 			}
 
-			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
-			tlb_start_valid = 0;
 			zap_work = ZAP_BLOCK_SIZE;
 		}
 	}
@@ -1217,16 +1218,15 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *details)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long end = address + size;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
-	if (tlb)
-		tlb_finish_mmu(tlb, address, end);
+	tlb_finish_mmu(&tlb, address, end);
 	return end;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index adb12527fd0e..40d49986e714 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1903,17 +1903,17 @@ static void unmap_region(struct mm_struct *mm,
 		unsigned long start, unsigned long end)
 {
 	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	unsigned long nr_accounted = 0;
 
 	lru_add_drain();
-	tlb = tlb_gather_mmu(mm, 0);
+	tlb_gather_mmu(&tlb, mm, 0);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
-				 next? next->vm_start: 0);
-	tlb_finish_mmu(tlb, start, end);
+	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+				 next ? next->vm_start : 0);
+	tlb_finish_mmu(&tlb, start, end);
 }
 
 /*
@@ -2255,7 +2255,7 @@ EXPORT_SYMBOL(do_brk);
 /* Release all mmaps. */
 void exit_mmap(struct mm_struct *mm)
 {
-	struct mmu_gather *tlb;
+	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
 	unsigned long end;
@@ -2280,14 +2280,14 @@ void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb = tlb_gather_mmu(mm, 1);
+	tlb_gather_mmu(&tlb, mm, 1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-	tlb_finish_mmu(tlb, 0, end);
+	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	tlb_finish_mmu(&tlb, 0, end);
 
 	/*
 	 * Walk the list again, actually closing and freeing it,
-- 
cgit v1.2.3


From e4c70a6629f9c74c4b0de258a3951890e9047c82 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:03 -0700
Subject: lockdep, mutex: provide mutex_lock_nest_lock

In order to convert i_mmap_lock to a mutex we need a mutex equivalent to
spin_lock_nest_lock(), thus provide the mutex_lock_nest_lock() annotation.

As with spin_lock_nest_lock(), mutex_lock_nest_lock() allows annotation of
the locking pattern where an outer lock serializes the acquisition order
of nested locks.  That is, if every time you lock multiple locks A, say A1
and A2 you first acquire N, the order of acquiring A1 and A2 is
irrelevant.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h |  3 +++
 include/linux/mutex.h   |  9 +++++++++
 kernel/mutex.c          | 25 +++++++++++++++++--------
 3 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 4aef1dda6406..ef820a3c378b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -487,12 +487,15 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 # else
 #  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define mutex_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 # endif
 # define mutex_release(l, n, i)			lock_release(l, n, i)
 #else
 # define mutex_acquire(l, s, t, i)		do { } while (0)
+# define mutex_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define mutex_release(l, n, i)			do { } while (0)
 #endif
 
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index c75471db576e..a940fe435aca 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -132,6 +132,7 @@ static inline int mutex_is_locked(struct mutex *lock)
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
 extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
@@ -140,6 +141,13 @@ extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
 #define mutex_lock(lock) mutex_lock_nested(lock, 0)
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+
+#define mutex_lock_nest_lock(lock, nest_lock)				\
+do {									\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);		\
+	_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);		\
+} while (0)
+
 #else
 extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -148,6 +156,7 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
 #endif
 
 /*
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2c938e2337cd..d607ed5dd441 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock);
  */
 static inline int __sched
 __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
-	       	unsigned long ip)
+		    struct lockdep_map *nest_lock, unsigned long ip)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
 	unsigned long flags;
 
 	preempt_disable();
-	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	/*
@@ -269,16 +269,25 @@ void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
 
+void __sched
+_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
+{
+	might_sleep();
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
 int __sched
 mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
 }
 EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
 
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
-				   subclass, _RET_IP_);
+				   subclass, NULL, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
 }
 
 static noinline int __sched
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
 }
 #endif
 
-- 
cgit v1.2.3


From 97a894136f29802da19a15541de3c019e1ca147e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:04 -0700
Subject: mm: Remove i_mmap_lock lockbreak

Hugh says:
 "The only significant loser, I think, would be page reclaim (when
  concurrent with truncation): could spin for a long time waiting for
  the i_mmap_mutex it expects would soon be dropped? "

Counter points:
 - cpu contention makes the spin stop (need_resched())
 - zap pages should be freeing pages at a higher rate than reclaim
   ever can

I think the simplification of the truncate code is definitely worth it.

Effectively reverts: 2aa15890f3c ("mm: prevent concurrent
unmap_mapping_range() on the same inode") and takes out the code that
caused its problem.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c               |   1 -
 include/linux/fs.h       |   2 -
 include/linux/mm.h       |   2 -
 include/linux/mm_types.h |   1 -
 kernel/fork.c            |   1 -
 mm/memory.c              | 195 +++++++----------------------------------------
 mm/mmap.c                |  13 +---
 mm/mremap.c              |   1 -
 8 files changed, 28 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 05f4fa521325..7a7284c71abd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -331,7 +331,6 @@ void address_space_init_once(struct address_space *mapping)
 	spin_lock_init(&mapping->private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-	mutex_init(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(address_space_init_once);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdf9495df204..5d2c86bdf5ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -635,7 +635,6 @@ struct address_space {
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
-	unsigned int		truncate_count;	/* Cover race condition with truncate */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
@@ -644,7 +643,6 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
-	struct mutex		unmap_mutex;    /* to protect unmapping */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffcce9bf2b54..2ad0ac8c3f32 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -895,8 +895,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
-	unsigned long truncate_count;		/* Compare vm_truncate_count */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02aa5619709b..201998e5b530 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,7 +175,6 @@ struct vm_area_struct {
 					   units, *not* PAGE_CACHE_SIZE */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
-	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
 
 #ifndef CONFIG_MMU
 	struct vm_region *vm_region;	/* NOMMU mapping region */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2b44d82b8237..4eef925477fc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			spin_lock(&mapping->i_mmap_lock);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping->i_mmap_writable++;
-			tmp->vm_truncate_count = mpnt->vm_truncate_count;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_prio_tree_add(tmp, mpnt);
diff --git a/mm/memory.c b/mm/memory.c
index 17193d74f302..18655878b9f8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
 	int force_flush = 0;
-	pte_t *pte;
-	spinlock_t *ptl;
 	int rss[NR_MM_COUNTERS];
+	spinlock_t *ptl;
+	pte_t *pte;
 
 again:
 	init_rss_vec(rss);
@@ -1001,12 +1001,9 @@ again:
 	do {
 		pte_t ptent = *pte;
 		if (pte_none(ptent)) {
-			(*zap_work)--;
 			continue;
 		}
 
-		(*zap_work) -= PAGE_SIZE;
-
 		if (pte_present(ptent)) {
 			struct page *page;
 
@@ -1075,7 +1072,7 @@ again:
 				print_bad_pte(vma, addr, ptent, NULL);
 		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
@@ -1099,7 +1096,7 @@ again:
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd)) {
-				(*zap_work)--;
+			} else if (zap_huge_pmd(tlb, vma, pmd))
 				continue;
-			}
 			/* fall through */
 		}
-		if (pmd_none_or_clear_bad(pmd)) {
-			(*zap_work)--;
+		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		}
-		next = zap_pte_range(tlb, vma, pmd, addr, next,
-						zap_work, details);
-	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+		cond_resched();
+	} while (pmd++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud)) {
-			(*zap_work)--;
+		if (pud_none_or_clear_bad(pud))
 			continue;
-		}
-		next = zap_pmd_range(tlb, vma, pud, addr, next,
-						zap_work, details);
-	} while (pud++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+	} while (pud++, addr = next, addr != end);
 
 	return addr;
 }
@@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 static unsigned long unmap_page_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma,
 				unsigned long addr, unsigned long end,
-				long *zap_work, struct zap_details *details)
+				struct zap_details *details)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd)) {
-			(*zap_work)--;
+		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		}
-		next = zap_pud_range(tlb, vma, pgd, addr, next,
-						zap_work, details);
-	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
+		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
 	mem_cgroup_uncharge_end();
 
@@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
-	long zap_work = ZAP_BLOCK_SIZE;
 	unsigned long start = start_addr;
-	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	struct mm_struct *mm = vma->vm_mm;
 
 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
 				 * Since no pte has actually been setup, it is
 				 * safe to do nothing in this case.
 				 */
-				if (vma->vm_file) {
+				if (vma->vm_file)
 					unmap_hugepage_range(vma, start, end, NULL);
-					zap_work -= (end - start) /
-					pages_per_huge_page(hstate_vma(vma));
-				}
 
 				start = end;
 			} else
-				start = unmap_page_range(tlb, vma,
-						start, end, &zap_work, details);
-
-			if (zap_work > 0) {
-				BUG_ON(start != end);
-				break;
-			}
-
-			if (need_resched() ||
-				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
-				if (i_mmap_lock)
-					goto out;
-				cond_resched();
-			}
-
-			zap_work = ZAP_BLOCK_SIZE;
+				start = unmap_page_range(tlb, vma, start, end, details);
 		}
 	}
-out:
+
 	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
@@ -2612,96 +2579,11 @@ unwritable_page:
 	return ret;
 }
 
-/*
- * Helper functions for unmap_mapping_range().
- *
- * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
- *
- * We have to restart searching the prio_tree whenever we drop the lock,
- * since the iterator is only valid while the lock is held, and anyway
- * a later vma might be split and reinserted earlier while lock dropped.
- *
- * The list of nonlinear vmas could be handled more efficiently, using
- * a placeholder, but handle it in the same way until a need is shown.
- * It is important to search the prio_tree before nonlinear list: a vma
- * may become nonlinear and be shifted from prio_tree to nonlinear list
- * while the lock is dropped; but never shifted from list to prio_tree.
- *
- * In order to make forward progress despite restarting the search,
- * vm_truncate_count is used to mark a vma as now dealt with, so we can
- * quickly skip it next time around.  Since the prio_tree search only
- * shows us those vmas affected by unmapping the range in question, we
- * can't efficiently keep all vmas in step with mapping->truncate_count:
- * so instead reset them all whenever it wraps back to 0 (then go to 1).
- * mapping->truncate_count and vma->vm_truncate_count are protected by
- * i_mmap_lock.
- *
- * In order to make forward progress despite repeatedly restarting some
- * large vma, note the restart_addr from unmap_vmas when it breaks out:
- * and restart from that address when we reach that vma again.  It might
- * have been split or merged, shrunk or extended, but never shifted: so
- * restart_addr remains valid so long as it remains in the vma's range.
- * unmap_mapping_range forces truncate_count to leap over page-aligned
- * values so we can save vma's restart_addr in its truncate_count field.
- */
-#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
-
-static void reset_vma_truncate_counts(struct address_space *mapping)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
-		vma->vm_truncate_count = 0;
-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
-		vma->vm_truncate_count = 0;
-}
-
-static int unmap_mapping_range_vma(struct vm_area_struct *vma,
+static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 		unsigned long start_addr, unsigned long end_addr,
 		struct zap_details *details)
 {
-	unsigned long restart_addr;
-	int need_break;
-
-	/*
-	 * files that support invalidating or truncating portions of the
-	 * file from under mmaped areas must have their ->fault function
-	 * return a locked page (and set VM_FAULT_LOCKED in the return).
-	 * This provides synchronisation against concurrent unmapping here.
-	 */
-
-again:
-	restart_addr = vma->vm_truncate_count;
-	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
-		start_addr = restart_addr;
-		if (start_addr >= end_addr) {
-			/* Top of vma has been split off since last time */
-			vma->vm_truncate_count = details->truncate_count;
-			return 0;
-		}
-	}
-
-	restart_addr = zap_page_range(vma, start_addr,
-					end_addr - start_addr, details);
-	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
-
-	if (restart_addr >= end_addr) {
-		/* We have now completed this vma: mark it so */
-		vma->vm_truncate_count = details->truncate_count;
-		if (!need_break)
-			return 0;
-	} else {
-		/* Note restart_addr in vma's truncate_count field */
-		vma->vm_truncate_count = restart_addr;
-		if (!need_break)
-			goto again;
-	}
-
-	spin_unlock(details->i_mmap_lock);
-	cond_resched();
-	spin_lock(details->i_mmap_lock);
-	return -EINTR;
+	zap_page_range(vma, start_addr, end_addr - start_addr, details);
 }
 
 static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
@@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
 	struct prio_tree_iter iter;
 	pgoff_t vba, vea, zba, zea;
 
-restart:
 	vma_prio_tree_foreach(vma, &iter, root,
 			details->first_index, details->last_index) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 
 		vba = vma->vm_pgoff;
 		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
@@ -2728,11 +2606,10 @@ restart:
 		if (zea > vea)
 			zea = vea;
 
-		if (unmap_mapping_range_vma(vma,
+		unmap_mapping_range_vma(vma,
 			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
 			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
-				details) < 0)
-			goto restart;
+				details);
 	}
 }
 
@@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head,
 	 * across *all* the pages in each nonlinear VMA, not just the pages
 	 * whose virtual address lies outside the file truncation point.
 	 */
-restart:
 	list_for_each_entry(vma, head, shared.vm_set.list) {
-		/* Skip quickly over those we have already dealt with */
-		if (vma->vm_truncate_count == details->truncate_count)
-			continue;
 		details->nonlinear_vma = vma;
-		if (unmap_mapping_range_vma(vma, vma->vm_start,
-					vma->vm_end, details) < 0)
-			goto restart;
+		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
 	}
 }
 
@@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping,
 	details.last_index = hba + hlen - 1;
 	if (details.last_index < details.first_index)
 		details.last_index = ULONG_MAX;
-	details.i_mmap_lock = &mapping->i_mmap_lock;
 
-	mutex_lock(&mapping->unmap_mutex);
-	spin_lock(&mapping->i_mmap_lock);
-
-	/* Protect against endless unmapping loops */
-	mapping->truncate_count++;
-	if (unlikely(is_restart_addr(mapping->truncate_count))) {
-		if (mapping->truncate_count == 0)
-			reset_vma_truncate_counts(mapping);
-		mapping->truncate_count++;
-	}
-	details.truncate_count = mapping->truncate_count;
 
+	spin_lock(&mapping->i_mmap_lock);
 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
 	spin_unlock(&mapping->i_mmap_lock);
-	mutex_unlock(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 40d49986e714..50cb04bb56bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -445,10 +445,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_file)
 		mapping = vma->vm_file->f_mapping;
 
-	if (mapping) {
+	if (mapping)
 		spin_lock(&mapping->i_mmap_lock);
-		vma->vm_truncate_count = mapping->truncate_count;
-	}
 
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	__vma_link_file(vma);
@@ -558,16 +556,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		if (!(vma->vm_flags & VM_NONLINEAR))
 			root = &mapping->i_mmap;
 		spin_lock(&mapping->i_mmap_lock);
-		if (importer &&
-		    vma->vm_truncate_count != next->vm_truncate_count) {
-			/*
-			 * unmap_mapping_range might be in progress:
-			 * ensure that the expanding vma is rescanned.
-			 */
-			importer->vm_truncate_count = 0;
-		}
 		if (insert) {
-			insert->vm_truncate_count = vma->vm_truncate_count;
 			/*
 			 * Put into prio_tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
diff --git a/mm/mremap.c b/mm/mremap.c
index a7c1f9f9b941..909e1e1e99b1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,7 +94,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		 */
 		mapping = vma->vm_file->f_mapping;
 		spin_lock(&mapping->i_mmap_lock);
-		new_vma->vm_truncate_count = 0;
 	}
 
 	/*
-- 
cgit v1.2.3


From 3d48ae45e72390ddf8cc5256ac32ed6f7a19cbea Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:06 -0700
Subject: mm: Convert i_mmap_lock to a mutex

Straightforward conversion of i_mmap_lock to a mutex.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lockstat.txt   |  2 +-
 Documentation/vm/locking     |  2 +-
 arch/x86/mm/hugetlbpage.c    |  4 ++--
 fs/hugetlbfs/inode.c         |  4 ++--
 fs/inode.c                   |  2 +-
 include/linux/fs.h           |  2 +-
 include/linux/mmu_notifier.h |  2 +-
 kernel/fork.c                |  4 ++--
 mm/filemap.c                 | 10 +++++-----
 mm/filemap_xip.c             |  4 ++--
 mm/fremap.c                  |  4 ++--
 mm/hugetlb.c                 | 14 +++++++-------
 mm/memory-failure.c          |  4 ++--
 mm/memory.c                  |  4 ++--
 mm/mmap.c                    | 22 +++++++++++-----------
 mm/mremap.c                  |  4 ++--
 mm/rmap.c                    | 28 ++++++++++++++--------------
 17 files changed, 58 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 65f4c795015d..9c0a80d17a23 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -136,7 +136,7 @@ View the top contending locks:
                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
                          &inode->i_mutex:           161            286 18446744073709       62882.54     1244614.55           3653          20598 18446744073709       62318.60     1693822.74
                          &zone->lru_lock:            94             94           0.53           7.33          92.10           4366          32690           0.29          59.81       16350.06
-              &inode->i_data.i_mmap_lock:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
+              &inode->i_data.i_mmap_mutex:            79             79           0.40           3.77          53.03          11779          87755           0.28         116.93       29898.44
                         &q->__queue_lock:            48             50           0.52          31.62          86.31            774          13131           0.17         113.08       12277.52
                         &rq->rq_lock_key:            43             47           0.74          68.50         170.63           3706          33929           0.22         107.99       17460.62
                       &rq->rq_lock_key#2:            39             46           0.75           6.68          49.03           2979          32292           0.17         125.17       17137.63
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index 25fadb448760..f61228bd6395 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by
 expand_stack(), it is hard to come up with a destructive scenario without 
 having the vmlist protection in this case.
 
-The page_table_lock nests with the inode i_mmap_lock and the kmem cache
+The page_table_lock nests with the inode i_mmap_mutex and the kmem cache
 c_spinlock spinlocks.  This is okay, since the kmem code asks for pages after
 dropping c_spinlock.  The page_table_lock also nests with pagecache_lock and
 pagemap_lru_lock spinlocks, and no code asks for memory with these locks
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index d4203988504a..f581a18c0d4d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (!vma_shareable(vma, addr))
 		return;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
 		if (svma == vma)
 			continue;
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		put_page(virt_to_page(spte));
 	spin_unlock(&mm->page_table_lock);
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 
 /*
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..e7a035781b7d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	pgoff = offset >> PAGE_SHIFT;
 
 	i_size_write(inode, offset);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	truncate_hugepages(inode, offset);
 	return 0;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 7a7284c71abd..88aa3bcc7681 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -326,7 +326,7 @@ void address_space_init_once(struct address_space *mapping)
 	memset(mapping, 0, sizeof(*mapping));
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
 	spin_lock_init(&mapping->tree_lock);
-	spin_lock_init(&mapping->i_mmap_lock);
+	mutex_init(&mapping->i_mmap_mutex);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5d2c86bdf5ba..5bb9e826019b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -634,7 +634,7 @@ struct address_space {
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-	spinlock_t		i_mmap_lock;	/* protect tree, count, list */
+	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cc2e7dfea9d7..a877dfc243eb 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -150,7 +150,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/kernel/fork.c b/kernel/fork.c
index 4eef925477fc..927692734bcf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -383,14 +383,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			get_file(file);
 			if (tmp->vm_flags & VM_DENYWRITE)
 				atomic_dec(&inode->i_writecount);
-			spin_lock(&mapping->i_mmap_lock);
+			mutex_lock(&mapping->i_mmap_mutex);
 			if (tmp->vm_flags & VM_SHARED)
 				mapping->i_mmap_writable++;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_prio_tree_add(tmp, mpnt);
 			flush_dcache_mmap_unlock(mapping);
-			spin_unlock(&mapping->i_mmap_lock);
+			mutex_unlock(&mapping->i_mmap_mutex);
 		}
 
 		/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 8144f87dcbb4..88354ae0b1fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,16 +58,16 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(truncate_pagecache)
+ *  ->i_mmap_mutex		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
  *  ->i_mutex
- *    ->i_mmap_lock		(truncate->unmap_mapping_range)
+ *    ->i_mmap_mutex		(truncate->unmap_mapping_range)
  *
  *  ->mmap_sem
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
  *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
  *
@@ -84,7 +84,7 @@
  *    sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
- *  ->i_mmap_lock
+ *  ->i_mmap_mutex
  *    ->anon_vma.lock		(vma_adjust)
  *
  *  ->anon_vma.lock
@@ -106,7 +106,7 @@
  *
  *  (code doesn't rely on that order, so you could switch it around)
  *  ->tasklist_lock             (memory_failure, collect_procs_ao)
- *    ->i_mmap_lock
+ *    ->i_mmap_mutex
  */
 
 /*
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 83364df74a33..93356cd12828 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
 		return;
 
 retry:
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		mm = vma->vm_mm;
 		address = vma->vm_start +
@@ -201,7 +201,7 @@ retry:
 			page_cache_release(page);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (locked) {
 		mutex_unlock(&xip_sparse_mutex);
diff --git a/mm/fremap.c b/mm/fremap.c
index ec520c7b28df..7f4123056e06 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -211,13 +211,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 			}
 			goto out;
 		}
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
 		vma->vm_flags |= VM_NONLINEAR;
 		vma_prio_tree_remove(vma, &mapping->i_mmap);
 		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		flush_dcache_mmap_unlock(mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 
 	if (vma->vm_flags & VM_LOCKED) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bbb4a5bbb958..5fd68b95c671 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2205,7 +2205,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long sz = huge_page_size(h);
 
 	/*
-	 * A page gathering list, protected by per file i_mmap_lock. The
+	 * A page gathering list, protected by per file i_mmap_mutex. The
 	 * lock is used to avoid list corruption from multiple unmapping
 	 * of the same page since we are using page->lru.
 	 */
@@ -2274,9 +2274,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	__unmap_hugepage_range(vma, start, end, ref_page);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 }
 
 /*
@@ -2308,7 +2308,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * this mapping should be shared between all the VMAs,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
@@ -2326,7 +2326,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 				address, address + huge_page_size(h),
 				page);
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 
 	return 1;
 }
@@ -2810,7 +2810,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
 
-	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	spin_lock(&mm->page_table_lock);
 	for (; address < end; address += huge_page_size(h)) {
 		ptep = huge_pte_offset(mm, address);
@@ -2825,7 +2825,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 
 	flush_tlb_range(vma, start, end);
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2b9a5eef39e0..12178ec32ab5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -429,7 +429,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 	 */
 
 	read_lock(&tasklist_lock);
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	for_each_process(tsk) {
 		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
@@ -449,7 +449,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 				add_to_kill(tsk, page, vma, to_kill, tkc);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	read_unlock(&tasklist_lock);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 18655878b9f8..7bbe4d3df756 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2667,12 +2667,12 @@ void unmap_mapping_range(struct address_space *mapping,
 		details.last_index = ULONG_MAX;
 
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 50cb04bb56bf..26efbfca0b20 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -194,7 +194,7 @@ error:
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_lock
+ * Requires inode->i_mapping->i_mmap_mutex
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
@@ -222,9 +222,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
 
 	if (file) {
 		struct address_space *mapping = file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		__remove_shared_vm_struct(vma, file, mapping);
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 }
 
@@ -446,13 +446,13 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 		mapping = vma->vm_file->f_mapping;
 
 	if (mapping)
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 
 	__vma_link(mm, vma, prev, rb_link, rb_parent);
 	__vma_link_file(vma);
 
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	mm->map_count++;
 	validate_mm(mm);
@@ -555,7 +555,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		mapping = file->f_mapping;
 		if (!(vma->vm_flags & VM_NONLINEAR))
 			root = &mapping->i_mmap;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 		if (insert) {
 			/*
 			 * Put into prio_tree now, so instantiated pages
@@ -622,7 +622,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (anon_vma)
 		anon_vma_unlock(anon_vma);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 
 	if (remove_next) {
 		if (file) {
@@ -2290,7 +2290,7 @@ void exit_mmap(struct mm_struct *mm)
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_lock is taken here.
+ * then i_mmap_mutex is taken here.
  */
 int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
 {
@@ -2532,7 +2532,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 		 */
 		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
 			BUG();
-		spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
 	}
 }
 
@@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -2631,7 +2631,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
 		 * AS_MM_ALL_LOCKS can't change to 0 from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
 					&mapping->flags))
 			BUG();
diff --git a/mm/mremap.c b/mm/mremap.c
index 909e1e1e99b1..506fa44403df 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -93,7 +93,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
-		spin_lock(&mapping->i_mmap_lock);
+		mutex_lock(&mapping->i_mmap_mutex);
 	}
 
 	/*
@@ -122,7 +122,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	pte_unmap(new_pte - 1);
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (mapping)
-		spin_unlock(&mapping->i_mmap_lock);
+		mutex_unlock(&mapping->i_mmap_mutex);
 	mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 522e4a93cadd..f0ef7ea5423a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,7 +24,7 @@
  *   inode->i_alloc_sem (vmtruncate_range)
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
- *       mapping->i_mmap_lock
+ *       mapping->i_mmap_mutex
  *         anon_vma->lock
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
@@ -646,14 +646,14 @@ static int page_referenced_file(struct page *page,
 	 * The page lock not only makes sure that page->mapping cannot
 	 * suddenly be NULLified by truncation, it makes sure that the
 	 * structure at mapping cannot be freed and reused yet,
-	 * so we can safely take mapping->i_mmap_lock.
+	 * so we can safely take mapping->i_mmap_mutex.
 	 */
 	BUG_ON(!PageLocked(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 
 	/*
-	 * i_mmap_lock does not stabilize mapcount at all, but mapcount
+	 * i_mmap_mutex does not stabilize mapcount at all, but mapcount
 	 * is more likely to be accurate if we note it after spinning.
 	 */
 	mapcount = page_mapcount(page);
@@ -675,7 +675,7 @@ static int page_referenced_file(struct page *page,
 			break;
 	}
 
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return referenced;
 }
 
@@ -762,7 +762,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 
 	BUG_ON(PageAnon(page));
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED) {
 			unsigned long address = vma_address(page, vma);
@@ -771,7 +771,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 			ret += page_mkclean_one(page, vma, address);
 		}
 	}
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1119,7 +1119,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->lock or mapping->i_mmap_lock.
+	 * we now hold anon_vma->lock or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
@@ -1345,7 +1345,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1391,7 +1391,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	mapcount = page_mapcount(page);
 	if (!mapcount)
 		goto out;
-	cond_resched_lock(&mapping->i_mmap_lock);
+	cond_resched();
 
 	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
 	if (max_nl_cursor == 0)
@@ -1413,7 +1413,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 			}
 			vma->vm_private_data = (void *) max_nl_cursor;
 		}
-		cond_resched_lock(&mapping->i_mmap_lock);
+		cond_resched();
 		max_nl_cursor += CLUSTER_SIZE;
 	} while (max_nl_cursor <= max_nl_size);
 
@@ -1425,7 +1425,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
 		vma->vm_private_data = NULL;
 out:
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
@@ -1544,7 +1544,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 
 	if (!mapping)
 		return ret;
-	spin_lock(&mapping->i_mmap_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
@@ -1558,7 +1558,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 	 * never contain migration ptes.  Decide what to do about this
 	 * limitation to linear when we need rmap_walk() on nonlinear.
 	 */
-	spin_unlock(&mapping->i_mmap_lock);
+	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 25aeeb046e695c3093a86aa9386128ffb3b1bc32 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:07 -0700
Subject: mm: revert page_lock_anon_vma() lock annotation

Its beyond ugly and gets in the way.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 15 +--------------
 mm/rmap.c            |  4 +---
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 830e65dc01ee..590c291a8cd9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -218,20 +218,7 @@ int try_to_munlock(struct page *);
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page);
-
-static inline struct anon_vma *page_lock_anon_vma(struct page *page)
-{
-	struct anon_vma *anon_vma;
-
-	__cond_lock(RCU, anon_vma = __page_lock_anon_vma(page));
-
-	/* (void) is needed to make gcc happy */
-	(void) __cond_lock(&anon_vma->root->lock, anon_vma);
-
-	return anon_vma;
-}
-
+struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f0ef7ea5423a..c6044761617e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -323,7 +323,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-struct anon_vma *__page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma, *root_anon_vma;
 	unsigned long anon_mapping;
@@ -357,8 +357,6 @@ out:
 }
 
 void page_unlock_anon_vma(struct anon_vma *anon_vma)
-	__releases(&anon_vma->root->lock)
-	__releases(RCU)
 {
 	anon_vma_unlock(anon_vma);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 2b575eb64f7a9c701fb4bfdb12388ac547f6c2b6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 May 2011 17:12:11 -0700
Subject: mm: convert anon_vma->lock to a mutex

Straightforward conversion of anon_vma->lock to a mutex.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h      |  8 ++------
 include/linux/mmu_notifier.h |  2 +-
 include/linux/rmap.h         | 14 +++++++-------
 mm/huge_memory.c             |  4 ++--
 mm/mmap.c                    | 10 +++++-----
 mm/rmap.c                    |  8 ++++----
 6 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8847c8c29791..48c32ebf65a7 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -92,12 +92,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		spin_unlock_wait(&(__anon_vma)->root->lock);		\
-		/*							\
-		 * spin_unlock_wait() is just a loop in C and so the	\
-		 * CPU can reorder anything around it.			\
-		 */							\
-		smp_mb();						\
+		anon_vma_lock(__anon_vma);				\
+		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
 	} while (0)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a877dfc243eb..1d1b1e13f79f 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -150,7 +150,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 590c291a8cd9..2148b122779b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -26,7 +26,7 @@
  */
 struct anon_vma {
 	struct anon_vma *root;	/* Root of this anon_vma tree */
-	spinlock_t lock;	/* Serialize access to vma list */
+	struct mutex mutex;	/* Serialize access to vma list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct list_head same_anon_vma;	/* locked by anon_vma->lock */
+	struct list_head same_anon_vma;	/* locked by anon_vma->mutex */
 };
 
 #ifdef CONFIG_MMU
@@ -93,24 +93,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_lock(&anon_vma->root->lock);
+		mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		spin_unlock(&anon_vma->root->lock);
+		mutex_unlock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_lock(struct anon_vma *anon_vma)
 {
-	spin_lock(&anon_vma->root->lock);
+	mutex_lock(&anon_vma->root->mutex);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	spin_unlock(&anon_vma->root->lock);
+	mutex_unlock(&anon_vma->root->mutex);
 }
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 83326ad66d9b..90eef404ec2e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1139,7 +1139,7 @@ static int __split_huge_page_splitting(struct page *page,
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->lock to
+		 * and it won't wait on the anon_vma->root->mutex to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush_notify(vma, address, pmd);
@@ -1333,7 +1333,7 @@ static int __split_huge_page_map(struct page *page,
 	return ret;
 }
 
-/* must be called with anon_vma->root->lock hold */
+/* must be called with anon_vma->root->mutex hold */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index 26efbfca0b20..ac2631b7477f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2502,15 +2502,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->lock. If some other vma in this mm shares
+		 * anon_vma->root->mutex. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->head.next))
@@ -2559,7 +2559,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -2615,7 +2615,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->lock.
+		 * anon_vma->root->mutex.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->head.next))
diff --git a/mm/rmap.c b/mm/rmap.c
index d271845d7d15..ce29d405d093 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -25,7 +25,7 @@
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_mutex
- *         anon_vma->lock
+ *         anon_vma->mutex
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -40,7 +40,7 @@
  *
  * (code doesn't rely on that order so it could be switched around)
  * ->tasklist_lock
- *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *   anon_vma->mutex      (memory_failure, collect_procs_anon)
  *     pte map lock
  */
 
@@ -307,7 +307,7 @@ static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
-	spin_lock_init(&anon_vma->lock);
+	mutex_init(&anon_vma->mutex);
 	atomic_set(&anon_vma->refcount, 0);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
@@ -1143,7 +1143,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->lock or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
-- 
cgit v1.2.3


From de03c72cfce5b263a674d04348b58475ec50163c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:12:15 -0700
Subject: mm: convert mm->cpu_vm_cpumask into cpumask_var_t

cpumask_t is very big struct and cpu_vm_mask is placed wrong position.
It might lead to reduce cache hit ratio.

This patch has two change.
1) Move the place of cpumask into last of mm_struct. Because usually cpumask
   is accessed only front bits when the system has cpu-hotplug capability
2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory
   footprint if cpumask_size() will use nr_cpumask_bits properly in future.

In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var.
It may help to detect out of tree cpu_vm_mask users.

This patch has no functional change.

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cachetlb.txt |  2 +-
 arch/x86/kernel/tboot.c    |  1 -
 include/linux/mm_types.h   |  9 ++++++---
 include/linux/sched.h      |  1 +
 init/main.c                |  2 ++
 kernel/fork.c              | 37 ++++++++++++++++++++++++++++++++++---
 mm/init-mm.c               |  1 -
 7 files changed, 44 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 9164ae3b83bc..9b728dc17535 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -16,7 +16,7 @@ on all processors in the system.  Don't let this scare you into
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 fact an area where many optimizations are possible.  For example,
 if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
 for this address space on that cpu.
 
 First, the TLB flushing interfaces, since they are the simplest.  The
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 998e972f3b1a..30ac65df7d4e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -110,7 +110,6 @@ static struct mm_struct tboot_mm = {
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask    = CPU_MASK_ALL,
 };
 
 static inline void switch_to_tboot_pt(void)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 201998e5b530..c2f9ea7922f4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -265,8 +265,6 @@ struct mm_struct {
 
 	struct linux_binfmt *binfmt;
 
-	cpumask_t cpu_vm_mask;
-
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
@@ -316,9 +314,14 @@ struct mm_struct {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
+
+	cpumask_var_t cpu_vm_mask_var;
 };
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
+{
+	return mm->cpu_vm_mask_var;
+}
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 44b8faaac7c0..f18300eddfcb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2176,6 +2176,7 @@ static inline void mmdrop(struct mm_struct * mm)
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
diff --git a/init/main.c b/init/main.c
index 48df882d51d2..22da33918aef 100644
--- a/init/main.c
+++ b/init/main.c
@@ -509,6 +509,8 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	mm_init();
+	BUG_ON(mm_init_cpumask(&init_mm, 0));
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
diff --git a/kernel/fork.c b/kernel/fork.c
index 927692734bcf..8e7e135d0817 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (oldmm)
+		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+	else
+		memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+	return 0;
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -521,10 +535,20 @@ struct mm_struct * mm_alloc(void)
 	struct mm_struct * mm;
 
 	mm = allocate_mm();
-	if (mm) {
-		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	memset(mm, 0, sizeof(*mm));
+	mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	if (mm_init_cpumask(mm, NULL)) {
+		mm_free_pgd(mm);
+		free_mm(mm);
+		return NULL;
 	}
+
 	return mm;
 }
 
@@ -536,6 +560,7 @@ struct mm_struct * mm_alloc(void)
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
+	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -690,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
+	if (mm_init_cpumask(mm, oldmm))
+		goto fail_nocpumask;
+
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 
@@ -716,6 +744,9 @@ fail_nomem:
 	return NULL;
 
 fail_nocontext:
+	free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 1d29cdfe8ebb..4019979b2637 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,5 @@ struct mm_struct init_mm = {
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask	= CPU_MASK_ALL,
 	INIT_MM_CONTEXT(init_mm)
 };
-- 
cgit v1.2.3


From a238ab5b0239575c179f4976064192c3f7409dad Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 24 May 2011 17:12:16 -0700
Subject: mm: break out page allocation warning code

This originally started as a simple patch to give vmalloc() some more
verbose output on failure on top of the plain page allocator messages.
Johannes suggested that it might be nicer to lead with the vmalloc() info
_before_ the page allocator messages.

But, I do think there's a lot of value in what __alloc_pages_slowpath()
does with its filtering and so forth.

This patch creates a new function which other allocators can call instead
of relying on the internal page allocator warnings.  It also gives this
function private rate-limiting which separates it from other
printk_ratelimit() users.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/page_alloc.c    | 62 ++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 43 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ad0ac8c3f32..8e2f2cd821f7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1387,6 +1387,8 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
 
+extern void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+
 extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 83eaa2eb72f8..44019da9632e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
 #include <linux/pagevec.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include <linux/oom.h>
 #include <linux/notifier.h>
 #include <linux/topology.h>
@@ -1736,6 +1737,45 @@ static inline bool should_suppress_show_mem(void)
 	return ret;
 }
 
+static DEFINE_RATELIMIT_STATE(nopage_rs,
+		DEFAULT_RATELIMIT_INTERVAL,
+		DEFAULT_RATELIMIT_BURST);
+
+void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+{
+	va_list args;
+	unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+		return;
+
+	/*
+	 * This documents exceptions given to allocations in certain
+	 * contexts that are allowed to allocate outside current's set
+	 * of allowed nodes.
+	 */
+	if (!(gfp_mask & __GFP_NOMEMALLOC))
+		if (test_thread_flag(TIF_MEMDIE) ||
+		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
+			filter &= ~SHOW_MEM_FILTER_NODES;
+	if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+		filter &= ~SHOW_MEM_FILTER_NODES;
+
+	if (fmt) {
+		printk(KERN_WARNING);
+		va_start(args, fmt);
+		vprintk(fmt, args);
+		va_end(args);
+	}
+
+	pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
+		   current->comm, order, gfp_mask);
+
+	dump_stack();
+	if (!should_suppress_show_mem())
+		show_mem(filter);
+}
+
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 				unsigned long pages_reclaimed)
@@ -2178,27 +2218,7 @@ rebalance:
 	}
 
 nopage:
-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
-		unsigned int filter = SHOW_MEM_FILTER_NODES;
-
-		/*
-		 * This documents exceptions given to allocations in certain
-		 * contexts that are allowed to allocate outside current's set
-		 * of allowed nodes.
-		 */
-		if (!(gfp_mask & __GFP_NOMEMALLOC))
-			if (test_thread_flag(TIF_MEMDIE) ||
-			    (current->flags & (PF_MEMALLOC | PF_EXITING)))
-				filter &= ~SHOW_MEM_FILTER_NODES;
-		if (in_interrupt() || !wait)
-			filter &= ~SHOW_MEM_FILTER_NODES;
-
-		pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
-			current->comm, order, gfp_mask);
-		dump_stack();
-		if (!should_suppress_show_mem())
-			show_mem(filter);
-	}
+	warn_alloc_failed(gfp_mask, order, NULL);
 	return page;
 got_pg:
 	if (kmemcheck_enabled)
-- 
cgit v1.2.3


From 7b1de5868b124d8f399d8791ed30a9b679d64d4d Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 24 May 2011 17:12:25 -0700
Subject: readahead: readahead page allocations are OK to fail

Pass __GFP_NORETRY|__GFP_NOWARN for readahead page allocations.

readahead page allocations are completely optional.  They are OK to fail
and in particular shall not trigger OOM on themselves.

Reported-by: Dave Young <hidave.darkstar@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 6 ++++++
 mm/readahead.c          | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ea268080380d..716875e53520 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -219,6 +219,12 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x)
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
+static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x) |
+				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+}
+
 typedef int filler_t(void *, struct page *);
 
 extern struct page * find_get_page(struct address_space *mapping,
diff --git a/mm/readahead.c b/mm/readahead.c
index 2c0cc489e288..867f9dd82dcd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -180,7 +180,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		if (page)
 			continue;
 
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_readahead(mapping);
 		if (!page)
 			break;
 		page->index = page_offset;
-- 
cgit v1.2.3


From a09ed5e00084448453c8bada4dcd31e5fbfc2f21 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Tue, 24 May 2011 17:12:26 -0700
Subject: vmscan: change shrink_slab() interfaces by passing shrink_control

Consolidate the existing parameters to shrink_slab() into a new
shrink_control struct.  This is needed later to pass the same struct to
shrinkers.

Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/drop_caches.c    |  6 +++++-
 include/linux/mm.h  | 13 +++++++++++--
 mm/memory-failure.c |  7 ++++++-
 mm/vmscan.c         | 46 +++++++++++++++++++++++++++++++++-------------
 4 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..440999c24353 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 static void drop_slab(void)
 {
 	int nr_objects;
+	struct shrink_control shrink = {
+		.gfp_mask = GFP_KERNEL,
+		.nr_scanned = 1000,
+	};
 
 	do {
-		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+		nr_objects = shrink_slab(&shrink, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e2f2cd821f7..32cfa9602d00 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1161,6 +1161,15 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
 }
 #endif
 
+/*
+ * This struct is used to pass information from page reclaim to the shrinkers.
+ * We consolidate the values for easier extention later.
+ */
+struct shrink_control {
+	unsigned long nr_scanned;
+	gfp_t gfp_mask;
+};
+
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
@@ -1630,8 +1639,8 @@ int in_gate_area_no_mm(unsigned long addr);
 
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages);
+unsigned long shrink_slab(struct shrink_control *shrink,
+				unsigned long lru_pages);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 369b80e81416..341341b2b47b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -239,7 +239,12 @@ void shake_page(struct page *p, int access)
 	if (access) {
 		int nr;
 		do {
-			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			struct shrink_control shrink = {
+				.gfp_mask = GFP_KERNEL,
+				.nr_scanned = 1000,
+			};
+
+			nr = shrink_slab(&shrink, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 890f54184d9a..e4e245ed1a5b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -222,11 +222,13 @@ EXPORT_SYMBOL(unregister_shrinker);
  *
  * Returns the number of slab objects which we shrunk.
  */
-unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-			unsigned long lru_pages)
+unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
+	unsigned long scanned = shrink->nr_scanned;
+	gfp_t gfp_mask = shrink->gfp_mask;
 
 	if (scanned == 0)
 		scanned = SWAP_CLUSTER_MAX;
@@ -2035,7 +2037,8 @@ static bool all_unreclaimable(struct zonelist *zonelist,
  * 		else, the number of pages reclaimed
  */
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-					struct scan_control *sc)
+					struct scan_control *sc,
+					struct shrink_control *shrink)
 {
 	int priority;
 	unsigned long total_scanned = 0;
@@ -2069,7 +2072,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 				lru_pages += zone_reclaimable_pages(zone);
 			}
 
-			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
+			shrink->nr_scanned = sc->nr_scanned;
+			shrink_slab(shrink, lru_pages);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2141,12 +2145,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 		.mem_cgroup = NULL,
 		.nodemask = nodemask,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 
 	trace_mm_vmscan_direct_reclaim_begin(order,
 				sc.may_writepage,
 				gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
@@ -2206,17 +2213,20 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.order = 0,
 		.mem_cgroup = mem_cont,
 		.nodemask = NULL, /* we don't care the placement */
+		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
 	};
 
-	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
-			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 	zonelist = NODE_DATA(numa_node_id())->node_zonelists;
 
 	trace_mm_vmscan_memcg_reclaim_begin(0,
 					    sc.may_writepage,
 					    sc.gfp_mask);
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -2344,6 +2354,9 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 		.order = order,
 		.mem_cgroup = NULL,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 loop_again:
 	total_scanned = 0;
 	sc.nr_reclaimed = 0;
@@ -2443,8 +2456,8 @@ loop_again:
 					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-						lru_pages);
+			shrink.nr_scanned = sc.nr_scanned;
+			nr_slab = shrink_slab(&shrink, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
@@ -2796,7 +2809,10 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 		.swappiness = vm_swappiness,
 		.order = 0,
 	};
-	struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
+	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 	struct task_struct *p = current;
 	unsigned long nr_reclaimed;
 
@@ -2805,7 +2821,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
@@ -2980,6 +2996,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.swappiness = vm_swappiness,
 		.order = order,
 	};
+	struct shrink_control shrink = {
+		.gfp_mask = sc.gfp_mask,
+	};
 	unsigned long nr_slab_pages0, nr_slab_pages1;
 
 	cond_resched();
@@ -3006,6 +3025,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	}
 
 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+	shrink.nr_scanned = sc.nr_scanned;
 	if (nr_slab_pages0 > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
@@ -3021,7 +3041,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 			unsigned long lru_pages = zone_reclaimable_pages(zone);
 
 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+			if (!shrink_slab(&shrink, lru_pages))
 				break;
 
 			/* Freed enough memory */
-- 
cgit v1.2.3


From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Tue, 24 May 2011 17:12:27 -0700
Subject: vmscan: change shrinker API by passing shrink_control struct

Change each shrinker's API by consolidating the existing parameters into
shrink_control struct.  This will simplify any further features added w/o
touching each file of shrinker.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: fix warning]
[kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API]
[akpm@linux-foundation.org: fix xfs warning]
[akpm@linux-foundation.org: update gfs2]
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/mmu.c                   |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c      |  9 +++------
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  4 +++-
 drivers/staging/zcache/zcache.c      |  5 ++++-
 fs/dcache.c                          |  8 ++++++--
 fs/drop_caches.c                     |  3 +--
 fs/gfs2/glock.c                      |  5 ++++-
 fs/gfs2/quota.c                      | 12 +++++++-----
 fs/gfs2/quota.h                      |  4 +++-
 fs/inode.c                           |  6 +++++-
 fs/mbcache.c                         | 10 ++++++----
 fs/nfs/dir.c                         |  5 ++++-
 fs/nfs/internal.h                    |  2 +-
 fs/quota/dquot.c                     |  5 ++++-
 fs/xfs/linux-2.6/xfs_buf.c           |  4 ++--
 fs/xfs/linux-2.6/xfs_sync.c          |  5 +++--
 fs/xfs/quota/xfs_qm.c                |  6 +++---
 include/linux/mm.h                   | 19 +++++++++++--------
 mm/memory-failure.c                  |  3 +--
 mm/vmscan.c                          | 34 +++++++++++++++++++---------------
 net/sunrpc/auth.c                    |  4 +++-
 21 files changed, 95 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 28418054b880..bd14bb4c8594 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
 	return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
 }
 
-static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
+	int nr_to_scan = sc->nr_to_scan;
 
 	if (nr_to_scan == 0)
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c6289034e29a..0b2e167d2bce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -56,9 +56,7 @@ static int i915_gem_phys_pwrite(struct drm_device *dev,
 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
 
 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
-				    int nr_to_scan,
-				    gfp_t gfp_mask);
-
+				    struct shrink_control *sc);
 
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
@@ -4092,9 +4090,7 @@ i915_gpu_is_active(struct drm_device *dev)
 }
 
 static int
-i915_gem_inactive_shrink(struct shrinker *shrinker,
-			 int nr_to_scan,
-			 gfp_t gfp_mask)
+i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker,
@@ -4102,6 +4098,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker,
 			     mm.inactive_shrinker);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_i915_gem_object *obj, *next;
+	int nr_to_scan = sc->nr_to_scan;
 	int cnt;
 
 	if (!mutex_trylock(&dev->struct_mutex))
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 9d9d92945f8c..d948575717bf 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -395,12 +395,14 @@ static int ttm_pool_get_num_unused_pages(void)
 /**
  * Callback for mm to request pool to reduce number of page held.
  */
-static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask)
+static int ttm_pool_mm_shrink(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	static atomic_t start_pool = ATOMIC_INIT(0);
 	unsigned i;
 	unsigned pool_offset = atomic_add_return(1, &start_pool);
 	struct ttm_page_pool *pool;
+	int shrink_pages = sc->nr_to_scan;
 
 	pool_offset = pool_offset % NUM_POOLS;
 	/* select start pool in round robin fashion */
diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c
index b8a2b30a1572..77ac2d4d3ef1 100644
--- a/drivers/staging/zcache/zcache.c
+++ b/drivers/staging/zcache/zcache.c
@@ -1181,9 +1181,12 @@ static bool zcache_freeze;
 /*
  * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
  */
-static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_zcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
 	int ret = -1;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (nr >= 0) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/fs/dcache.c b/fs/dcache.c
index 18b2a1f10ed8..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
 EXPORT_SYMBOL(shrink_dcache_parent);
 
 /*
- * Scan `nr' dentries and return the number which remain.
+ * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
  *
  * We need to avoid reentering the filesystem if the caller is performing a
  * GFP_NOFS allocation attempt.  One example deadlock is:
@@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 440999c24353..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -42,11 +42,10 @@ static void drop_slab(void)
 	int nr_objects;
 	struct shrink_control shrink = {
 		.gfp_mask = GFP_KERNEL,
-		.nr_scanned = 1000,
 	};
 
 	do {
-		nr_objects = shrink_slab(&shrink, 1000);
+		nr_objects = shrink_slab(&shrink, 1000, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a2a6abbccc07..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
 
 
-static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+				    struct shrink_control *sc)
 {
 	struct gfs2_glock *gl;
 	int may_demote;
 	int nr_skipped = 0;
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 	LIST_HEAD(skipped);
 
 	if (nr == 0)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct gfs2_quota_data *qd;
 	struct gfs2_sbd *sdp;
+	int nr_to_scan = sc->nr_to_scan;
 
-	if (nr == 0)
+	if (nr_to_scan == 0)
 		goto out;
 
-	if (!(gfp_mask & __GFP_FS))
+	if (!(sc->gfp_mask & __GFP_FS))
 		return -1;
 
 	spin_lock(&qd_lru_lock);
-	while (nr && !list_empty(&qd_lru_list)) {
+	while (nr_to_scan && !list_empty(&qd_lru_list)) {
 		qd = list_entry(qd_lru_list.next,
 				struct gfs2_quota_data, qd_reclaim);
 		sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 		spin_unlock(&qd_lru_lock);
 		kmem_cache_free(gfs2_quotad_cachep, qd);
 		spin_lock(&qd_lru_lock);
-		nr--;
+		nr_to_scan--;
 	}
 	spin_unlock(&qd_lru_lock);
 
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
 
 struct gfs2_inode;
 struct gfs2_sbd;
+struct shrink_control;
 
 #define NO_QUOTA_CHANGE ((u32)-1)
 
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
 	return ret;
 }
 
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+				 struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 88aa3bcc7681..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
+
 	if (nr) {
 		/*
 		 * Nasty deadlock avoidance.  We may hold various FS locks,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink,
+			      struct shrink_control *sc);
 
 static struct shrinker mb_cache_shrinker = {
 	.shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
  * gets low.
  *
  * @shrink: (ignored)
- * @nr_to_scan: Number of objects to scan
- * @gfp_mask: (ignored)
+ * @sc: shrink_control passed from reclaim
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free_list);
 	struct mb_cache *cache;
 	struct mb_cache_entry *entry, *tmp;
 	int count = 0;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mb_debug("trying to free %d entries", nr_to_scan);
 	spin_lock(&mb_cache_spinlock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
 	}
 }
 
-int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink,
+			      struct shrink_control *sc)
 {
 	LIST_HEAD(head);
 	struct nfs_inode *nfsi, *next;
 	struct nfs_access_entry *cache;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
-					int nr_to_scan, gfp_t gfp_mask);
+					struct shrink_control *sc);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink,
+				 struct shrink_control *sc)
 {
+	int nr = sc->nr_to_scan;
+
 	if (nr) {
 		spin_lock(&dq_list_lock);
 		prune_dqcache(nr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52b2b5da566e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1422,12 +1422,12 @@ restart:
 int
 xfs_buftarg_shrink(
 	struct shrinker		*shrink,
-	int			nr_to_scan,
-	gfp_t			mask)
+	struct shrink_control	*sc)
 {
 	struct xfs_buftarg	*btp = container_of(shrink,
 					struct xfs_buftarg, bt_shrinker);
 	struct xfs_buf		*bp;
+	int nr_to_scan = sc->nr_to_scan;
 	LIST_HEAD(dispose);
 
 	if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index cb1bb2080e44..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -1032,13 +1032,14 @@ xfs_reclaim_inodes(
 static int
 xfs_reclaim_inode_shrink(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	struct xfs_mount *mp;
 	struct xfs_perag *pag;
 	xfs_agnumber_t	ag;
 	int		reclaimable;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
 	if (nr_to_scan) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(struct shrinker *, int, gfp_t);
+STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
 static struct shrinker xfs_qm_shaker = {
 	.shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
 STATIC int
 xfs_qm_shake(
 	struct shrinker	*shrink,
-	int		nr_to_scan,
-	gfp_t		gfp_mask)
+	struct shrink_control *sc)
 {
 	int	ndqused, nfree, n;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if (!kmem_shake_allow(gfp_mask))
 		return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32cfa9602d00..5cbbf78eaac7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1166,18 +1166,20 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * We consolidate the values for easier extention later.
  */
 struct shrink_control {
-	unsigned long nr_scanned;
 	gfp_t gfp_mask;
+
+	/* How many slab objects shrinker() should scan and try to reclaim */
+	unsigned long nr_to_scan;
 };
 
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
- * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'.  It should
- * look through the least-recently-used 'nr_to_scan' entries and
- * attempt to free them up.  It should return the number of objects
- * which remain in the cache.  If it returns -1, it means it cannot do
- * any scanning at this time (eg. there is a risk of deadlock).
+ * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
+ * and a 'gfpmask'.  It should look through the least-recently-used
+ * 'nr_to_scan' entries and attempt to free them up.  It should return
+ * the number of objects which remain in the cache.  If it returns -1, it means
+ * it cannot do any scanning at this time (eg. there is a risk of deadlock).
  *
  * The 'gfpmask' refers to the allocation we are currently trying to
  * fulfil.
@@ -1186,7 +1188,7 @@ struct shrink_control {
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, struct shrink_control *sc);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
@@ -1640,7 +1642,8 @@ int in_gate_area_no_mm(unsigned long addr);
 int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(struct shrink_control *shrink,
-				unsigned long lru_pages);
+			  unsigned long nr_pages_scanned,
+			  unsigned long lru_pages);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 341341b2b47b..5c8f7e08928d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -241,10 +241,9 @@ void shake_page(struct page *p, int access)
 		do {
 			struct shrink_control shrink = {
 				.gfp_mask = GFP_KERNEL,
-				.nr_scanned = 1000,
 			};
 
-			nr = shrink_slab(&shrink, 1000);
+			nr = shrink_slab(&shrink, 1000, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e4e245ed1a5b..7e0116150dc7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -202,6 +202,14 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
+static inline int do_shrinker_shrink(struct shrinker *shrinker,
+				     struct shrink_control *sc,
+				     unsigned long nr_to_scan)
+{
+	sc->nr_to_scan = nr_to_scan;
+	return (*shrinker->shrink)(shrinker, sc);
+}
+
 #define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
@@ -223,15 +231,14 @@ EXPORT_SYMBOL(unregister_shrinker);
  * Returns the number of slab objects which we shrunk.
  */
 unsigned long shrink_slab(struct shrink_control *shrink,
+			  unsigned long nr_pages_scanned,
 			  unsigned long lru_pages)
 {
 	struct shrinker *shrinker;
 	unsigned long ret = 0;
-	unsigned long scanned = shrink->nr_scanned;
-	gfp_t gfp_mask = shrink->gfp_mask;
 
-	if (scanned == 0)
-		scanned = SWAP_CLUSTER_MAX;
+	if (nr_pages_scanned == 0)
+		nr_pages_scanned = SWAP_CLUSTER_MAX;
 
 	if (!down_read_trylock(&shrinker_rwsem)) {
 		/* Assume we'll be able to shrink next time */
@@ -244,8 +251,8 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 		unsigned long total_scan;
 		unsigned long max_pass;
 
-		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-		delta = (4 * scanned) / shrinker->seeks;
+		max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+		delta = (4 * nr_pages_scanned) / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
 		shrinker->nr += delta;
@@ -272,9 +279,9 @@ unsigned long shrink_slab(struct shrink_control *shrink,
 			int shrink_ret;
 			int nr_before;
 
-			nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
-			shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
-								gfp_mask);
+			nr_before = do_shrinker_shrink(shrinker, shrink, 0);
+			shrink_ret = do_shrinker_shrink(shrinker, shrink,
+							this_scan);
 			if (shrink_ret == -1)
 				break;
 			if (shrink_ret < nr_before)
@@ -2072,8 +2079,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 				lru_pages += zone_reclaimable_pages(zone);
 			}
 
-			shrink->nr_scanned = sc->nr_scanned;
-			shrink_slab(shrink, lru_pages);
+			shrink_slab(shrink, sc->nr_scanned, lru_pages);
 			if (reclaim_state) {
 				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
@@ -2456,8 +2462,7 @@ loop_again:
 					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
-			shrink.nr_scanned = sc.nr_scanned;
-			nr_slab = shrink_slab(&shrink, lru_pages);
+			nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
@@ -3025,7 +3030,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	}
 
 	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-	shrink.nr_scanned = sc.nr_scanned;
 	if (nr_slab_pages0 > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
@@ -3041,7 +3045,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 			unsigned long lru_pages = zone_reclaimable_pages(zone);
 
 			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(&shrink, lru_pages))
+			if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
 				break;
 
 			/* Freed enough memory */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 67e31276682a..cd6e4aa19dbf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  * Run memory cache shrinker.
  */
 static int
-rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 {
 	LIST_HEAD(free);
 	int res;
+	int nr_to_scan = sc->nr_to_scan;
+	gfp_t gfp_mask = sc->gfp_mask;
 
 	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
 		return (nr_to_scan == 0) ? 0 : -1;
-- 
cgit v1.2.3


From bf4e8902ee5080f5d2c810b639e7e778c8082b52 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:32 -0700
Subject: mm: enable set_page_section() only if CONFIG_SPARSEMEM and
 !CONFIG_SPARSEMEM_VMEMMAP

set_page_section() is meaningful only in CONFIG_SPARSEMEM and
!CONFIG_SPARSEMEM_VMEMMAP context.  Move it to proper place and amend
accordingly functions which are using it.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5cbbf78eaac7..6702171f8d0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -682,6 +682,12 @@ static inline struct zone *page_zone(struct page *page)
 }
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+static inline void set_page_section(struct page *page, unsigned long section)
+{
+	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
+}
+
 static inline unsigned long page_to_section(struct page *page)
 {
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -700,18 +706,14 @@ static inline void set_page_node(struct page *page, unsigned long node)
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
-static inline void set_page_section(struct page *page, unsigned long section)
-{
-	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
-	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
-}
-
 static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 	set_page_section(page, pfn_to_section_nr(pfn));
+#endif
 }
 
 /*
-- 
cgit v1.2.3


From e3c40f379a144f35e53864a2cd970e238071afd7 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:33 -0700
Subject: mm: pfn_to_section_nr()/section_nr_to_pfn() is valid only in
 CONFIG_SPARSEMEM context

pfn_to_section_nr()/section_nr_to_pfn() is valid only in CONFIG_SPARSEMEM
context.  Move it to proper place.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e56f835274c9..d7152002e18d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -928,9 +928,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define pfn_to_nid(pfn)		(0)
 #endif
 
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
-
 #ifdef CONFIG_SPARSEMEM
 
 /*
@@ -956,6 +953,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
 
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
 struct page;
 struct page_cgroup;
 struct mem_section {
-- 
cgit v1.2.3


From ba93fa81b5f2bf0076407a3a777fff122ce16220 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:34 -0700
Subject: mm: do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM

Do not define PFN_SECTION_SHIFT if !CONFIG_SPARSEMEM.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6702171f8d0a..32309f6542e8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -605,10 +605,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#ifndef PFN_SECTION_SHIFT
-#define PFN_SECTION_SHIFT 0
-#endif
-
 /*
  * Define the bit shifts to access each section.  For non-existent
  * sections we define the shift as 0; that plus a 0 mask ensures
-- 
cgit v1.2.3


From 172703b08cd05e2d5196ac13e94cc186f629d58b Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@linux.intel.com>
Date: Tue, 24 May 2011 17:12:36 -0700
Subject: mm: delete non-atomic mm counter implementation

The problem with having two different types of counters is that developers
adding new code need to keep in mind whether it's safe to use both the
atomic and non-atomic implementations.  For example, when adding new
callers of the *_mm_counter() functions a developer needs to ensure that
those paths are always executed with page_table_lock held, in case we're
using the non-atomic implementation of mm counters.

Hugh Dickins introduced the atomic mm counters in commit f412ac08c986
("[PATCH] mm: fix rss and mmlist locking").  When asked why he left the
non-atomic counters around he said,

  | The only reason was to avoid adding costly atomic operations into a
  | configuration that had no need for them there: the page_table_lock
  | sufficed.
  |
  | Certainly it would be simpler just to delete the non-atomic variant.
  |
  | And I think it's fair to say that any configuration on which we're
  | measuring performance to that degree (rather than "does it boot fast?"
  | type measurements), would already be going the split ptlocks route.

Removing the non-atomic counters eases the maintenance burden because
developers no longer have to mindful of the two implementations when using
*_mm_counter().

Note that all architectures provide a means of atomically updating
atomic_long_t variables, even if they have to revert to the generic
spinlock implementation because they don't support 64-bit atomic
instructions (see lib/atomic64.c).

Signed-off-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 44 +++++++-------------------------------------
 include/linux/mm_types.h |  9 +++------
 2 files changed, 10 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32309f6542e8..48e458190d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1053,65 +1053,35 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 /*
  * per-process(per-mm_struct) statistics.
  */
-#if defined(SPLIT_RSS_COUNTING)
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
 static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
 {
 	atomic_long_set(&mm->rss_stat.count[member], value);
 }
 
+#if defined(SPLIT_RSS_COUNTING)
 unsigned long get_mm_counter(struct mm_struct *mm, int member);
-
-static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	atomic_long_add(value, &mm->rss_stat.count[member]);
-}
-
-static inline void inc_mm_counter(struct mm_struct *mm, int member)
-{
-	atomic_long_inc(&mm->rss_stat.count[member]);
-}
-
-static inline void dec_mm_counter(struct mm_struct *mm, int member)
-{
-	atomic_long_dec(&mm->rss_stat.count[member]);
-}
-
-#else  /* !USE_SPLIT_PTLOCKS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
-{
-	mm->rss_stat.count[member] = value;
-}
-
+#else
 static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
 {
-	return mm->rss_stat.count[member];
+	return atomic_long_read(&mm->rss_stat.count[member]);
 }
+#endif
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-	mm->rss_stat.count[member] += value;
+	atomic_long_add(value, &mm->rss_stat.count[member]);
 }
 
 static inline void inc_mm_counter(struct mm_struct *mm, int member)
 {
-	mm->rss_stat.count[member]++;
+	atomic_long_inc(&mm->rss_stat.count[member]);
 }
 
 static inline void dec_mm_counter(struct mm_struct *mm, int member)
 {
-	mm->rss_stat.count[member]--;
+	atomic_long_dec(&mm->rss_stat.count[member]);
 }
 
-#endif /* !USE_SPLIT_PTLOCKS */
-
 static inline unsigned long get_mm_rss(struct mm_struct *mm)
 {
 	return get_mm_counter(mm, MM_FILEPAGES) +
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c2f9ea7922f4..071d459e866b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -204,19 +204,16 @@ enum {
 
 #if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
 /* per-thread cached information, */
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
 };
-#else  /* !USE_SPLIT_PTLOCKS */
+#endif /* USE_SPLIT_PTLOCKS */
+
 struct mm_rss_stat {
-	unsigned long count[NR_MM_COUNTERS];
+	atomic_long_t count[NR_MM_COUNTERS];
 };
-#endif /* !USE_SPLIT_PTLOCKS */
 
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
-- 
cgit v1.2.3


From 8bba154ef29e16331e578029e9050c74b87b7ff9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 24 May 2011 17:12:37 -0700
Subject: memblock/nobootmem: allow alloc_bootmem() to take 0 as low limit

The bootmem wrapper with memblock supports top-down now, so we do not need
to set the low limit to __pa(MAX_DMA_ADDRESS).

The logic should be: good to allocate above __pa(MAX_DMA_ADDRESS), but it
is ok if we can not find memory above 16M on system that has a small
amount of RAM.

Signed-off-by: Yinghai LU <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Olaf Hering <olaf@aepfle.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 01eca1794e14..ab344a521105 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -99,24 +99,31 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long align,
 				      unsigned long goal);
 
+#ifdef CONFIG_NO_BOOTMEM
+/* We are using top down, so it is safe to use 0 here */
+#define BOOTMEM_LOW_LIMIT 0
+#else
+#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
+#endif
+
 #define alloc_bootmem(x) \
-	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
-	__alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_nopanic(x) \
-	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
-	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-- 
cgit v1.2.3


From b09e0fa4b4ea66266058eead43350bd7d55fec67 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 24 May 2011 17:12:39 -0700
Subject: tmpfs: implement generic xattr support

Implement generic xattrs for tmpfs filesystems.  The Feodra project, while
trying to replace suid apps with file capabilities, realized that tmpfs,
which is used on the build systems, does not support file capabilities and
thus cannot be used to build packages which use file capabilities.  Xattrs
are also needed for overlayfs.

The xattr interface is a bit odd.  If a filesystem does not implement any
{get,set,list}xattr functions the VFS will call into some random LSM hooks
and the running LSM can then implement some method for handling xattrs.
SELinux for example provides a method to support security.selinux but no
other security.* xattrs.

As it stands today when one enables CONFIG_TMPFS_POSIX_ACL tmpfs will have
xattr handler routines specifically to handle acls.  Because of this tmpfs
would loose the VFS/LSM helpers to support the running LSM.  To make up
for that tmpfs had stub functions that did nothing but call into the LSM
hooks which implement the helpers.

This new patch does not use the LSM fallback functions and instead just
implements a native get/set/list xattr feature for the full security.* and
trusted.* namespace like a normal filesystem.  This means that tmpfs can
now support both security.selinux and security.capability, which was not
previously possible.

The basic implementation is that I attach a:

struct shmem_xattr {
	struct list_head list; /* anchored by shmem_inode_info->xattr_list */
	char *name;
	size_t size;
	char value[0];
};

Into the struct shmem_inode_info for each xattr that is set.  This
implementation could easily support the user.* namespace as well, except
some care needs to be taken to prevent large amounts of unswappable memory
being allocated for unprivileged users.

[mszeredi@suse.cz: new config option, suport trusted.*, support symlinks]
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Tested-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Hugh Dickins <hughd@google.com>
Tested-by: Jordi Pujol <jordipujolp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig               |  18 ++-
 include/linux/shmem_fs.h |   8 +-
 mm/shmem.c               | 320 +++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 290 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index f3aa9b08b228..979992dcb386 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -121,9 +121,25 @@ config TMPFS
 
 	  See <file:Documentation/filesystems/tmpfs.txt> for details.
 
+config TMPFS_XATTR
+	bool "Tmpfs extended attributes"
+	depends on TMPFS
+	default n
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  Currently this enables support for the trusted.* and
+	  security.* namespaces.
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on tmpfs.
+
 config TMPFS_POSIX_ACL
 	bool "Tmpfs POSIX Access Control Lists"
-	depends on TMPFS
+	depends on TMPFS_XATTR
 	select GENERIC_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 399be5ad2f99..2b7fec840517 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -9,6 +9,8 @@
 
 #define SHMEM_NR_DIRECT 16
 
+#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
@@ -17,8 +19,12 @@ struct shmem_inode_info {
 	unsigned long		next_index;	/* highest alloced index + 1 */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct page		*i_indirect;	/* top indirect blocks page */
-	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+	union {
+		swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+		char		inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
+	};
 	struct list_head	swaplist;	/* chain of maybes on swap */
+	struct list_head	xattr_list;	/* list of shmem_xattr */
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ba4ad28b7db6..69edb45a9f28 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+struct shmem_xattr {
+	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
+	char *name;		/* xattr name */
+	size_t size;
+	char value[0];
+};
+
 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
@@ -822,6 +829,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr, *nxattr;
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
 		truncate_inode_pages(inode->i_mapping, 0);
@@ -834,6 +842,11 @@ static void shmem_evict_inode(struct inode *inode)
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
 	}
+
+	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
+		kfree(xattr->name);
+		kfree(xattr);
+	}
 	BUG_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	end_writeback(inode);
@@ -1615,6 +1628,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		spin_lock_init(&info->lock);
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
+		INIT_LIST_HEAD(&info->xattr_list);
 		cache_no_acl(inode);
 
 		switch (mode & S_IFMT) {
@@ -2014,9 +2028,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 	info = SHMEM_I(inode);
 	inode->i_size = len-1;
-	if (len <= (char *)inode - (char *)info) {
+	if (len <= SHMEM_SYMLINK_INLINE_LEN) {
 		/* do it inline */
-		memcpy(info, symname, len);
+		memcpy(info->inline_symlink, symname, len);
 		inode->i_op = &shmem_symlink_inline_operations;
 	} else {
 		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -2042,7 +2056,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+	nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
 	return NULL;
 }
 
@@ -2066,63 +2080,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
 	}
 }
 
-static const struct inode_operations shmem_symlink_inline_operations = {
-	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_link_inline,
-};
-
-static const struct inode_operations shmem_symlink_inode_operations = {
-	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_link,
-	.put_link	= shmem_put_link,
-};
-
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 /*
- * Superblocks without xattr inode operations will get security.* xattr
- * support from the VFS "for free". As soon as we have any other xattrs
+ * Superblocks without xattr inode operations may get some security.* xattr
+ * support from the LSM "for free". As soon as we have any other xattrs
  * like ACLs, we also need to implement the security.* handlers at
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
-					size_t list_len, const char *name,
-					size_t name_len, int handler_flags)
+static int shmem_xattr_get(struct dentry *dentry, const char *name,
+			   void *buffer, size_t size)
 {
-	return security_inode_listsecurity(dentry->d_inode, list, list_len);
-}
+	struct shmem_inode_info *info;
+	struct shmem_xattr *xattr;
+	int ret = -ENODATA;
 
-static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
-		void *buffer, size_t size, int handler_flags)
-{
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (strcmp(name, xattr->name))
+			continue;
+
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
+		}
+		break;
+	}
+	spin_unlock(&info->lock);
+	return ret;
 }
 
-static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags, int handler_flags)
+static int shmem_xattr_set(struct dentry *dentry, const char *name,
+			   const void *value, size_t size, int flags)
 {
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-	return security_inode_setsecurity(dentry->d_inode, name, value,
-					  size, flags);
+	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_xattr *xattr;
+	struct shmem_xattr *new_xattr = NULL;
+	size_t len;
+	int err = 0;
+
+	/* value == NULL means remove */
+	if (value) {
+		/* wrap around? */
+		len = sizeof(*new_xattr) + size;
+		if (len <= sizeof(*new_xattr))
+			return -ENOMEM;
+
+		new_xattr = kmalloc(len, GFP_KERNEL);
+		if (!new_xattr)
+			return -ENOMEM;
+
+		new_xattr->name = kstrdup(name, GFP_KERNEL);
+		if (!new_xattr->name) {
+			kfree(new_xattr);
+			return -ENOMEM;
+		}
+
+		new_xattr->size = size;
+		memcpy(new_xattr->value, value, size);
+	}
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		if (!strcmp(name, xattr->name)) {
+			if (flags & XATTR_CREATE) {
+				xattr = new_xattr;
+				err = -EEXIST;
+			} else if (new_xattr) {
+				list_replace(&xattr->list, &new_xattr->list);
+			} else {
+				list_del(&xattr->list);
+			}
+			goto out;
+		}
+	}
+	if (flags & XATTR_REPLACE) {
+		xattr = new_xattr;
+		err = -ENODATA;
+	} else {
+		list_add(&new_xattr->list, &info->xattr_list);
+		xattr = NULL;
+	}
+out:
+	spin_unlock(&info->lock);
+	if (xattr)
+		kfree(xattr->name);
+	kfree(xattr);
+	return err;
 }
 
-static const struct xattr_handler shmem_xattr_security_handler = {
-	.prefix = XATTR_SECURITY_PREFIX,
-	.list   = shmem_xattr_security_list,
-	.get    = shmem_xattr_security_get,
-	.set    = shmem_xattr_security_set,
-};
 
 static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
 	&generic_acl_access_handler,
 	&generic_acl_default_handler,
-	&shmem_xattr_security_handler,
+#endif
 	NULL
 };
+
+static int shmem_xattr_validate(const char *name)
+{
+	struct { const char *prefix; size_t len; } arr[] = {
+		{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
+		{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arr); i++) {
+		size_t preflen = arr[i].len;
+		if (strncmp(name, arr[i].prefix, preflen) == 0) {
+			if (!name[preflen])
+				return -EINVAL;
+			return 0;
+		}
+	}
+	return -EOPNOTSUPP;
+}
+
+static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
+			      void *buffer, size_t size)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_getxattr(dentry, name, buffer, size);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_get(dentry, name, buffer, size);
+}
+
+static int shmem_setxattr(struct dentry *dentry, const char *name,
+			  const void *value, size_t size, int flags)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_setxattr(dentry, name, value, size, flags);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	if (size == 0)
+		value = "";  /* empty EA, do not remove */
+
+	return shmem_xattr_set(dentry, name, value, size, flags);
+
+}
+
+static int shmem_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+
+	/*
+	 * If this is a request for a synthetic attribute in the system.*
+	 * namespace use the generic infrastructure to resolve a handler
+	 * for it via sb->s_xattr.
+	 */
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_removexattr(dentry, name);
+
+	err = shmem_xattr_validate(name);
+	if (err)
+		return err;
+
+	return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	bool trusted = capable(CAP_SYS_ADMIN);
+	struct shmem_xattr *xattr;
+	struct shmem_inode_info *info;
+	size_t used = 0;
+
+	info = SHMEM_I(dentry->d_inode);
+
+	spin_lock(&info->lock);
+	list_for_each_entry(xattr, &info->xattr_list, list) {
+		size_t len;
+
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
+
+		len = strlen(xattr->name) + 1;
+		used += len;
+		if (buffer) {
+			if (size < used) {
+				used = -ERANGE;
+				break;
+			}
+			memcpy(buffer, xattr->name, len);
+			buffer += len;
+		}
+	}
+	spin_unlock(&info->lock);
+
+	return used;
+}
+#endif /* CONFIG_TMPFS_XATTR */
+
+static const struct inode_operations shmem_symlink_inline_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= shmem_follow_link_inline,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
+};
+
+static const struct inode_operations shmem_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= shmem_follow_link,
+	.put_link	= shmem_put_link,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
 #endif
+};
 
 static struct dentry *shmem_get_parent(struct dentry *child)
 {
@@ -2402,8 +2606,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 	sb->s_xattr = shmem_xattr_handlers;
+#endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
 	sb->s_flags |= MS_POSIXACL;
 #endif
 
@@ -2501,11 +2707,13 @@ static const struct file_operations shmem_file_operations = {
 static const struct inode_operations shmem_inode_operations = {
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 
@@ -2523,23 +2731,27 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.mknod		= shmem_mknod,
 	.rename		= shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
 
 static const struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_XATTR
+	.setxattr	= shmem_setxattr,
+	.getxattr	= shmem_getxattr,
+	.listxattr	= shmem_listxattr,
+	.removexattr	= shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	.setattr	= shmem_notify_change,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= generic_listxattr,
-	.removexattr	= generic_removexattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
-- 
cgit v1.2.3


From c856507f2b2b47a49d8587afb58930b463f6bff4 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 24 May 2011 17:12:40 -0700
Subject: mm: remove last trace of shmem_get_unmapped_area

Remove noMMU declaration of shmem_get_unmapped_area() from mm.h: it fell
out of use in 2.6.21 and ceased to exist in 2.6.29.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 48e458190d88..8eb969ebf904 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -873,14 +873,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
 int shmem_zero_setup(struct vm_area_struct *);
 
-#ifndef CONFIG_MMU
-extern unsigned long shmem_get_unmapped_area(struct file *file,
-					     unsigned long addr,
-					     unsigned long len,
-					     unsigned long pgoff,
-					     unsigned long flags);
-#endif
-
 extern int can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
-- 
cgit v1.2.3


From d98f6cb67fb5b9376d4957d7ba9f32eac35c2e08 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:41 -0700
Subject: mm: export get_vma_policy()

In commit 48fce3429d ("mempolicies: unexport get_vma_policy()")
get_vma_policy() was marked static as all clients were local to
mempolicy.c.

However, the decision to generate /proc/pid/numa_maps in the numa memory
policy code and outside the procfs subsystem introduces an artificial
interdependency between the two systems.  Exporting get_vma_policy() once
again is the first step to clean up this interdependency.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 3 +++
 mm/mempolicy.c            | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 31ac26ca4acf..c2f603218fbc 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -199,6 +199,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_vma_policy(struct task_struct *tsk,
+		struct vm_area_struct *vma, unsigned long addr);
+
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
 extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 959a8b8c7350..5bfb03ef3cb0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1489,7 +1489,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct task_struct *task,
+struct mempolicy *get_vma_policy(struct task_struct *task,
 		struct vm_area_struct *vma, unsigned long addr)
 {
 	struct mempolicy *pol = task->mempolicy;
-- 
cgit v1.2.3


From 13057efb0a0063eb8042d99093ec88a52c4f1593 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:46 -0700
Subject: mm: declare mpol_to_str() when CONFIG_TMPFS=n

When CONFIG_TMPFS=n mpol_to_str() is not declared in mempolicy.h.
However, in the NUMA case, the definition is always compiled.

Since it is not strictly true that tmpfs is the only client, and since the
symbol was always lurking around anyways, export mpol_to_str()
unconditionally.  Furthermore, this will allow us to move show_numa_map()
out of mempolicy.c and into the procfs subsystem.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index c2f603218fbc..7978eec1b7d9 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -231,10 +231,10 @@ int do_migrate_pages(struct mm_struct *mm,
 
 #ifdef CONFIG_TMPFS
 extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+#endif
 
 extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 			int no_context);
-#endif
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -371,13 +371,13 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
 {
 	return 1;	/* error */
 }
+#endif
 
 static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 				int no_context)
 {
 	return 0;
 }
-#endif
 
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From f2beb7983613ecca20a61604f01ab50cc7a797e6 Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Tue, 24 May 2011 17:12:48 -0700
Subject: proc: make struct proc_maps_private truly private

Now that mm/mempolicy.c is no longer implementing /proc/pid/numa_maps
there is no need to export struct proc_maps_private to the world.  Move it
to fs/proc/internal.h instead.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 8 ++++++++
 include/linux/proc_fs.h | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..3763b436e69d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
+struct proc_maps_private {
+	struct pid *pid;
+	struct task_struct *task;
+#ifdef CONFIG_MMU
+	struct vm_area_struct *tail_vma;
+#endif
+};
+
 void proc_init_inodecache(void);
 
 static inline struct pid *proc_pid(struct inode *inode)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index eaf4350c0f90..3686cd6c9aca 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -288,12 +288,4 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde)
 	return pde->parent->data;
 }
 
-struct proc_maps_private {
-	struct pid *pid;
-	struct task_struct *task;
-#ifdef CONFIG_MMU
-	struct vm_area_struct *tail_vma;
-#endif
-};
-
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.2.3


From a539f3533b78e39a22723d6d3e1e11b6c14454d9 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Tue, 24 May 2011 17:12:51 -0700
Subject: mm: add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro

Add SECTION_ALIGN_UP() and SECTION_ALIGN_DOWN() macro which aligns given
pfn to upper section and lower section boundary accordingly.

Required for the latest memory hotplug support for the Xen balloon driver.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d7152002e18d..217bcf6bca77 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -956,6 +956,9 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
 #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
 
+#define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
+#define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
+
 struct page;
 struct page_cgroup;
 struct mem_section {
-- 
cgit v1.2.3


From 0ac1ee0bfec2a4ad118f907ce586d0dfd8db7641 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 24 May 2011 17:13:05 -0700
Subject: ulimit: raise default hard ulimit on number of files to 4096

Apps are increasingly using more than 1024 file descriptors.  See
discussion in several distro bug trackers, e.g.  BugLink:
http://bugs.launchpad.net/bugs/663090
https://issues.rpath.com/browse/RPL-2054

You don't want to raise the default soft limit, since that might break
apps that use select(), but it's safe to raise the default hard limit;
that way, apps that know they need lots of file descriptors can raise
their soft limit without needing root, and without user intervention.

Ubuntu is doing this with a kernel change because they have a policy of
not changing kernel defaults in userland.

While 4096 might not be enough for *all* apps, it seems to be plenty for
the apps I've seen lately that are unhappy with 1024.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: Dan Kegel <dank@kegel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/resource.h | 2 +-
 include/linux/fs.h             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index 587566f95f6c..61fa862fe08d 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -78,7 +78,7 @@
 	[RLIMIT_CORE]		= {              0,  RLIM_INFINITY },	\
 	[RLIMIT_RSS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_NPROC]		= {              0,              0 },	\
-	[RLIMIT_NOFILE]		= {       INR_OPEN,       INR_OPEN },	\
+	[RLIMIT_NOFILE]		= {   INR_OPEN_CUR,   INR_OPEN_MAX },	\
 	[RLIMIT_MEMLOCK]	= {    MLOCK_LIMIT,    MLOCK_LIMIT },	\
 	[RLIMIT_AS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bb9e826019b..3f9d3251790d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -23,7 +23,8 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-#define INR_OPEN 1024		/* Initial setting for nfile rlimits */
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
-- 
cgit v1.2.3


From 21b7d815b2cc380929181e1a4a7129e8b569e574 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Tue, 24 May 2011 17:13:06 -0700
Subject: include/linux/c2port.h: remove wrong and never used macros

The macro to_class_dev() uses the deprecated structure class_device, and
the c2port_device has no member named class in the definition of the macro
to_c2port_device.

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/c2port.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/c2port.h b/include/linux/c2port.h
index 2a5cd867c365..a2f7d7413f30 100644
--- a/include/linux/c2port.h
+++ b/include/linux/c2port.h
@@ -60,9 +60,6 @@ struct c2port_ops {
  * Exported functions
  */
 
-#define to_class_dev(obj) container_of((obj), struct class_device, kobj)
-#define to_c2port_device(obj) container_of((obj), struct c2port_device, class)
-
 extern struct c2port_device *c2port_device_register(char *name,
 					struct c2port_ops *ops, void *devdata);
 extern void c2port_device_unregister(struct c2port_device *dev);
-- 
cgit v1.2.3


From e50c1f609c63223adaa38f5a79b18759a00adf72 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 24 May 2011 17:13:11 -0700
Subject: fscache: remove dead code under CONFIG_WORKQUEUE_DEBUGFS

There is no CONFIG_WORKQUEUE_DEBUGFS any more, so this code is dead.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/operation.c        | 10 ----------
 fs/fscache/page.c             | 13 -------------
 include/linux/fscache-cache.h | 12 ------------
 3 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	_enter("{OBJ%x OP%x,%u}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
-	fscache_set_op_state(op, "EnQ");
-
 	ASSERT(list_empty(&op->pend_link));
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
-	fscache_set_op_state(op, "Run");
-
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
-	fscache_set_op_state(op, "SubmitX");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
 
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
-	fscache_set_op_state(op, "Submit");
-
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
 	if (!atomic_dec_and_test(&op->usage))
 		return;
 
-	fscache_set_op_state(op, "Put");
-
 	_debug("PUT OP");
 	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
 		BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 	fscache_stat(&fscache_n_attr_changed_calls);
 
 	if (fscache_object_is_active(object)) {
-		fscache_set_op_state(op, "CallFS");
 		fscache_stat(&fscache_n_cop_attr_changed);
 		ret = object->cache->ops->attr_changed(object);
 		fscache_stat_d(&fscache_n_cop_attr_changed);
-		fscache_set_op_state(op, "Done");
 		if (ret < 0)
 			fscache_abort_object(object);
 	}
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 
 	fscache_operation_init(op, fscache_attr_changed_op, NULL);
 	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
-	fscache_set_op_name(op, "Attr");
 
 	spin_lock(&cookie->lock);
 
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 	op->context	= context;
 	op->start_time	= jiffies;
 	INIT_LIST_HEAD(&op->to_do);
-	fscache_set_op_name(&op->op, "Retr");
 	return op;
 }
 
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
-	fscache_set_op_name(&op->op, "RetrRA1");
 
 	spin_lock(&cookie->lock);
 
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrRAN");
 
 	spin_lock(&cookie->lock);
 
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
-	fscache_set_op_name(&op->op, "RetrAL1");
 
 	spin_lock(&cookie->lock);
 
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
-	fscache_set_op_state(&op->op, "GetPage");
-
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
-	fscache_set_op_state(&op->op, "Store");
 	fscache_stat(&fscache_n_store_pages);
 	fscache_stat(&fscache_n_cop_write_page);
 	ret = object->cache->ops->write_page(op, page);
 	fscache_stat_d(&fscache_n_cop_write_page);
-	fscache_set_op_state(&op->op, "EndWrite");
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
-		fscache_set_op_state(&op->op, "Abort");
 		fscache_abort_object(object);
 	} else {
 		fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	fscache_operation_init(&op->op, fscache_write_op,
 			       fscache_release_write_op);
 	op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
-	fscache_set_op_name(&op->op, "Write1");
 
 	ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
 	if (ret < 0)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 76427e688d15..af095b54502e 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -100,17 +100,6 @@ struct fscache_operation {
 
 	/* operation releaser */
 	fscache_operation_release_t release;
-
-#ifdef CONFIG_WORKQUEUE_DEBUGFS
-	struct work_struct put_work;	/* work to delay operation put */
-	const char *name;		/* operation name */
-	const char *state;		/* operation state */
-#define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
-#define fscache_set_op_state(OP, S)	do { (OP)->state = (S); } while(0)
-#else
-#define fscache_set_op_name(OP, N)	do { } while(0)
-#define fscache_set_op_state(OP, S)	do { } while(0)
-#endif
 };
 
 extern atomic_t fscache_op_debug_id;
@@ -137,7 +126,6 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 	op->processor = processor;
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
-	fscache_set_op_state(op, "Init");
 }
 
 /*
-- 
cgit v1.2.3


From 4b060420a596095869a6d7849caa798d23839cd1 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 24 May 2011 17:13:12 -0700
Subject: bitmap, irq: add smp_affinity_list interface to /proc/irq

Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large.

Setting smp affinity to cpus 256 to 263 would be:

	echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity

instead of:

	echo 256-263 > smp_affinity_list

Think about what it looks like for cpus around say, 4088 to 4095.

We already have many alternate "list" interfaces:

/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist

Add a companion interface, smp_affinity_list to use cpu lists instead of
cpu maps.  This conforms to other companion interfaces where both a map
and a list interface exists.

This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.

[akpm@linux-foundation.org: make __bitmap_parselist() static]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/IRQ-affinity.txt     |  17 ++++--
 Documentation/filesystems/proc.txt |  11 +++-
 include/linux/bitmap.h             |   5 +-
 include/linux/cpumask.h            |  15 +++++
 kernel/irq/proc.c                  |  54 ++++++++++++++++--
 lib/bitmap.c                       | 109 +++++++++++++++++++++++++++++++++----
 6 files changed, 188 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index b4a615b78403..7890fae18529 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -4,10 +4,11 @@ ChangeLog:
 
 SMP IRQ affinity
 
-/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
-for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
-to turn off all CPUs, and if an IRQ controller does not support IRQ
-affinity then the value will not change from the default 0xffffffff.
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source.  It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.
 
 /proc/irq/default_smp_affinity specifies default affinity mask that applies
 to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms
 This time around IRQ44 was delivered only to the last four processors.
 i.e counters for the CPU0-3 did not change.
 
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+
+[root@moon 44]# echo 1024-1031 > smp_affinity
+[root@moon 44]# cat smp_affinity
+1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 60740e8ecb37..f48178024067 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default:
   > cat /proc/irq/0/smp_affinity
   ffffffff
 
+There is an alternate interface, smp_affinity_list which allows specifying
+a cpu range instead of a bitmask:
+
+  > cat /proc/irq/0/smp_affinity_list
+  1024-1031
+
 The default_smp_affinity mask applies to all non-active IRQs, which are the
 IRQs which have not yet been allocated/activated, and hence which lack a
 /proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not
 include information about any possible driver locality preference.
 
 prof_cpu_mask specifies which CPUs are to be profiled by the system wide
-profiler. Default value is ffffffff (all cpus).
+profiler. Default value is ffffffff (all cpus if there are only 32 of them).
 
 The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
 between all the CPUs which are allowed to handle it. As usual the kernel has
 more info than you and does a better job than you, so the defaults are the
-best choice for almost everyone.
+best choice for almost everyone.  [Note this applies only to those IO-APIC's
+that support "Round Robin" interrupt distribution.]
 
 There are  three  more  important subdirectories in /proc: net, scsi, and sys.
 The general  rule  is  that  the  contents,  or  even  the  existence of these
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index daf8c480c786..dcafe0bf0005 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -55,7 +55,8 @@
  * bitmap_parse(buf, buflen, dst, nbits)	Parse bitmap dst from kernel buf
  * bitmap_parse_user(ubuf, ulen, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_scnlistprintf(buf, len, src, nbits)	Print bitmap src as list to buf
- * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from list
+ * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from kernel buf
+ * bitmap_parselist_user(buf, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
  * bitmap_release_region(bitmap, pos, order)	Free specified bit region
  * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
 extern int bitmap_parselist(const char *buf, unsigned long *maskp,
 			int nmaskbits);
+extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+			unsigned long *dst, int nbits);
 extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
 		const unsigned long *old, const unsigned long *new, int bits);
 extern int bitmap_bitremap(int oldbit,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bae6fe24d1f9..b24ac56477b4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -546,6 +546,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
 	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
+/**
+ * cpumask_parselist_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parselist_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
+							nr_cpumask_bits);
+}
+
 /**
  * cpulist_scnprintf - print a cpumask into a string as comma-separated list
  * @buf: the buffer to sprintf into
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f2500f..64e3df6ab1ef 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
 
-static int irq_affinity_proc_show(struct seq_file *m, void *v)
+static int show_irq_affinity(int type, struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
 	const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	if (irqd_is_setaffinity_pending(&desc->irq_data))
 		mask = desc->pending_mask;
 #endif
-	seq_cpumask(m, mask);
+	if (type)
+		seq_cpumask_list(m, mask);
+	else
+		seq_cpumask(m, mask);
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
 #endif
 
 int no_irq_affinity;
-static ssize_t irq_affinity_proc_write(struct file *file,
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(0, m, v);
+}
+
+static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(1, m, v);
+}
+
+
+static ssize_t write_irq_affinity(int type, struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	if (type)
+		err = cpumask_parselist_user(buffer, count, new_value);
+	else
+		err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		goto free_cpumask;
 
@@ -100,11 +117,28 @@ free_cpumask:
 	return err;
 }
 
+static ssize_t irq_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(0, file, buffer, count, pos);
+}
+
+static ssize_t irq_affinity_list_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	return write_irq_affinity(1, file, buffer, count, pos);
+}
+
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
+}
+
 static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
 	.release	= single_release,
 };
 
+static const struct file_operations irq_affinity_list_proc_fops = {
+	.open		= irq_affinity_list_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_affinity_list_proc_write,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	proc_create_data("affinity_hint", 0400, desc->dir,
 			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
 
+	/* create /proc/irq/<irq>/smp_affinity_list */
+	proc_create_data("smp_affinity_list", 0600, desc->dir,
+			 &irq_affinity_list_proc_fops, (void *)(long)irq);
+
 	proc_create_data("node", 0444, desc->dir,
 			 &irq_node_proc_fops, (void *)(long)irq);
 #endif
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 91e0ccfdb424..41baf02924e6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -571,8 +571,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen,
 EXPORT_SYMBOL(bitmap_scnlistprintf);
 
 /**
- * bitmap_parselist - convert list format ASCII string to bitmap
+ * __bitmap_parselist - convert list format ASCII string to bitmap
  * @bp: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
  * @maskp: write resulting mask here
  * @nmaskbits: number of bits in mask to be written
  *
@@ -587,20 +590,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
  *    %-EINVAL: invalid character in string
  *    %-ERANGE: bit number specified too large for mask
  */
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+static int __bitmap_parselist(const char *buf, unsigned int buflen,
+		int is_user, unsigned long *maskp,
+		int nmaskbits)
 {
 	unsigned a, b;
+	int c, old_c, totaldigits;
+	const char __user *ubuf = buf;
+	int exp_digit, in_range;
 
+	totaldigits = c = 0;
 	bitmap_zero(maskp, nmaskbits);
 	do {
-		if (!isdigit(*bp))
-			return -EINVAL;
-		b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
-		if (*bp == '-') {
-			bp++;
-			if (!isdigit(*bp))
+		exp_digit = 1;
+		in_range = 0;
+		a = b = 0;
+
+		/* Get the next cpu# or a range of cpu#'s */
+		while (buflen) {
+			old_c = c;
+			if (is_user) {
+				if (__get_user(c, ubuf++))
+					return -EFAULT;
+			} else
+				c = *buf++;
+			buflen--;
+			if (isspace(c))
+				continue;
+
+			/*
+			 * If the last character was a space and the current
+			 * character isn't '\0', we've got embedded whitespace.
+			 * This is a no-no, so throw an error.
+			 */
+			if (totaldigits && c && isspace(old_c))
+				return -EINVAL;
+
+			/* A '\0' or a ',' signal the end of a cpu# or range */
+			if (c == '\0' || c == ',')
+				break;
+
+			if (c == '-') {
+				if (exp_digit || in_range)
+					return -EINVAL;
+				b = 0;
+				in_range = 1;
+				exp_digit = 1;
+				continue;
+			}
+
+			if (!isdigit(c))
 				return -EINVAL;
-			b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+			b = b * 10 + (c - '0');
+			if (!in_range)
+				a = b;
+			exp_digit = 0;
+			totaldigits++;
 		}
 		if (!(a <= b))
 			return -EINVAL;
@@ -610,13 +656,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 			set_bit(a, maskp);
 			a++;
 		}
-		if (*bp == ',')
-			bp++;
-	} while (*bp != '\0' && *bp != '\n');
+	} while (buflen && c == ',');
 	return 0;
 }
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	char *nl  = strchr(bp, '\n');
+	int len;
+
+	if (nl)
+		len = nl - bp;
+	else
+		len = strlen(bp);
+
+	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
 EXPORT_SYMBOL(bitmap_parselist);
 
+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+			unsigned int ulen, unsigned long *maskp,
+			int nmaskbits)
+{
+	if (!access_ok(VERIFY_READ, ubuf, ulen))
+		return -EFAULT;
+	return __bitmap_parselist((const char *)ubuf,
+					ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
 /**
  * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
-- 
cgit v1.2.3


From 1dbe39424a43e56a6c9aed12661192af51dcdb9f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 24 May 2011 17:13:13 -0700
Subject: xattr.h: expose string defines to userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

af4f136056c9 ("security: move LSM xattrnames to xattr.h") moved the
XATTR_CAPS_SUFFIX define from capability.h to xattr.h.  This makes sense
except it was previously exports to userspace but xattr.h does not export
it to userspace.  This patch exports these headers to userspace to fix the
ABI regression.

There is some slight possibility that this will cause problems in other
applications which used these #defines differently (wrongly) and I could
JUST export the capabilities xattr name that we broke.  Does anyonehave an
idea how exposing these headers could cause a problem?

Below is what is being exposed to userspace, included here since it isn't
clear exactly what is going to be made available from the patch.

/* Namespaces */
#define XATTR_OS2_PREFIX "os2."
#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)

#define XATTR_SECURITY_PREFIX   "security."
#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1)

#define XATTR_SYSTEM_PREFIX "system."
#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)

#define XATTR_TRUSTED_PREFIX "trusted."
#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1)

#define XATTR_USER_PREFIX "user."
#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)

/* Security namespace */
#define XATTR_SELINUX_SUFFIX "selinux"
#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX

#define XATTR_SMACK_SUFFIX "SMACK64"
#define XATTR_SMACK_IPIN "SMACK64IPIN"
#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
#define XATTR_NAME_SMACKIPIN    XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
#define XATTR_NAME_SMACKIPOUT   XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT

#define XATTR_CAPS_SUFFIX "capability"
#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX

Reported-by: Ozan Çaglayan <ozan@pardus.org.tr>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/xattr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 6050783005bd..aed54c50aa66 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -13,10 +13,6 @@
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
 
-#ifdef  __KERNEL__
-
-#include <linux/types.h>
-
 /* Namespaces */
 #define XATTR_OS2_PREFIX "os2."
 #define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
@@ -53,6 +49,10 @@
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
 
+#ifdef  __KERNEL__
+
+#include <linux/types.h>
+
 struct inode;
 struct dentry;
 
-- 
cgit v1.2.3


From 903c0c7cdc21f2ccb7562a7bbc70289c0c2b16ad Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:16 -0700
Subject: sparse: define dummy BUILD_BUG_ON definition for sparse

BUILD_BUG_ON() causes a syntax error to detect coding errors.  So it
causes sparse to detect an error too.  This reduces sparse's usefulness.

This patch makes a dummy BUILD_BUG_ON() definition for sparse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f37ba716ef8b..22295244de18 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -638,6 +638,13 @@ struct sysinfo {
 	char _f[20-2*sizeof(long)-sizeof(int)];	/* Padding: libc5 uses this.. */
 };
 
+#ifdef __CHECKER__
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)
+#define BUILD_BUG_ON_ZERO(e)
+#define BUILD_BUG_ON_NULL(e)
+#define BUILD_BUG_ON(condition)
+#else /* __CHECKER__ */
+
 /* Force a compilation error if a constant expression is not a power of 2 */
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
@@ -674,6 +681,7 @@ extern int __build_bug_on_failed;
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
+#endif	/* __CHECKER__ */
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
-- 
cgit v1.2.3


From 5bd7e6a30e1eb38f58f0046c0cd42dfc38e90d64 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:17 -0700
Subject: sparse: define __must_be_array() for __CHECKER__

commit c5e631cf65f ("ARRAY_SIZE: check for type") added __must_be_array().
But sparse can't parse this gcc extention.

Now make C=2 makes following sparse errors a lot.

  kernel/futex.c:2699:25: error: No right hand side of '+'-expression

Because __must_be_array() is used for ARRAY_SIZE() macro and it is
used very widely.

This patch fixes it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb4c1eb7778e..59e4028e833d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -34,8 +34,12 @@
     __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
     (typeof(ptr)) (__ptr + (off)); })
 
+#ifdef __CHECKER__
+#define __must_be_array(arr) 0
+#else
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
 
 /*
  * Force always-inline if the user requests it so via the .config,
-- 
cgit v1.2.3


From 746a2a838deec3ef86ef6b7c3edd4207b9a351aa Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 24 May 2011 17:13:17 -0700
Subject: sparse: Undef __compiletime_{warning,error} if __CHECKER__ is defined

sparse can't parse warning and error attribute.  then they should be
hidden from sparse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 64b7c003fd7a..dfadc96e9d63 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -51,7 +51,7 @@
 #if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
-#if __GNUC_MINOR__ >= 4
+#if __GNUC_MINOR__ >= 4 && !defined(__CHECKER__)
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
-- 
cgit v1.2.3


From 95dde501907b06e7203c74f8435acfdab9eb2659 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 24 May 2011 17:13:19 -0700
Subject: memblock: add error return when CONFIG_HAVE_MEMBLOCK is not set

On larger systems, information in the kernel log is lost because there is
so much early text printed, that it overflows the static log buffer before
the log_buf_len kernel parameter can be processed, and a bigger log buffer
allocated.

Distros are relunctant to increase memory usage by increasing the size of
the static log buffer, so minimize the problem by allocating the new log
buffer as early as possible.

This patch:

Add an error return if CONFIG_HAVE_MEMBLOCK is not set instead of having
to add #ifdef CONFIG_HAVE_MEMBLOCK around blocks of code calling that
function.

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 62a10c2a11f2..7525e38c434d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,6 +2,8 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
+#define MEMBLOCK_ERROR	0
+
 #ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
@@ -20,7 +22,6 @@
 #include <asm/memblock.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
-#define MEMBLOCK_ERROR		0
 
 struct memblock_region {
 	phys_addr_t base;
@@ -160,6 +161,12 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 #define __initdata_memblock
 #endif
 
+#else
+static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
+{
+	return MEMBLOCK_ERROR;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 162a7e7500f9664636e649ba59defe541b7c2c60 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 24 May 2011 17:13:20 -0700
Subject: printk: allocate kernel log buffer earlier

On larger systems, because of the numerous ACPI, Bootmem and EFI messages,
the static log buffer overflows before the larger one specified by the
log_buf_len param is allocated.  Minimize the overflow by allocating the
new log buffer as soon as possible.

On kernels without memblock, a later call to setup_log_buf from
kernel/init.c is the fallback.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix CONFIG_PRINTK=n build]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup.c |  2 ++
 include/linux/printk.h  |  7 ++++
 init/main.c             |  1 +
 kernel/printk.c         | 87 ++++++++++++++++++++++++++++++++-----------------
 4 files changed, 68 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 605e5ae19c7f..a3e5948670c2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -946,6 +946,8 @@ void __init setup_arch(char **cmdline_p)
 	if (init_ohci1394_dma_early)
 		init_ohci1394_dma_on_all_controllers();
 #endif
+	/* Allocate bigger log buffer */
+	setup_log_buf(1);
 
 	reserve_initrd();
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index ee048e77e1ae..0101d55d9651 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -1,6 +1,8 @@
 #ifndef __KERNEL_PRINTK__
 #define __KERNEL_PRINTK__
 
+#include <linux/init.h>
+
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
@@ -113,6 +115,7 @@ extern int dmesg_restrict;
 extern int kptr_restrict;
 
 void log_buf_kexec_setup(void);
+void __init setup_log_buf(int early);
 #else
 static inline __attribute__ ((format (printf, 1, 0)))
 int vprintk(const char *s, va_list args)
@@ -137,6 +140,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 static inline void log_buf_kexec_setup(void)
 {
 }
+
+static inline void setup_log_buf(int early)
+{
+}
 #endif
 
 extern void dump_stack(void) __cold;
diff --git a/init/main.c b/init/main.c
index 22da33918aef..d2f1e086bf33 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,7 @@ asmlinkage void __init start_kernel(void)
 	 * These use large bootmem allocations and must precede
 	 * kmem_cache_init()
 	 */
+	setup_log_buf(0);
 	pidhash_init();
 	vfs_caches_init_early();
 	sort_main_extable();
diff --git a/kernel/printk.c b/kernel/printk.c
index da8ca817eae3..35185392173f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void)
 }
 #endif
 
+/* requested log_buf_len from kernel cmdline */
+static unsigned long __initdata new_log_buf_len;
+
+/* save requested log_buf_len since it's too early to process it */
 static int __init log_buf_len_setup(char *str)
 {
 	unsigned size = memparse(str, &str);
-	unsigned long flags;
 
 	if (size)
 		size = roundup_pow_of_two(size);
-	if (size > log_buf_len) {
-		unsigned start, dest_idx, offset;
-		char *new_log_buf;
+	if (size > log_buf_len)
+		new_log_buf_len = size;
 
-		new_log_buf = alloc_bootmem(size);
-		if (!new_log_buf) {
-			printk(KERN_WARNING "log_buf_len: allocation failed\n");
-			goto out;
-		}
+	return 0;
+}
+early_param("log_buf_len", log_buf_len_setup);
 
-		spin_lock_irqsave(&logbuf_lock, flags);
-		log_buf_len = size;
-		log_buf = new_log_buf;
-
-		offset = start = min(con_start, log_start);
-		dest_idx = 0;
-		while (start != log_end) {
-			log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
-			start++;
-			dest_idx++;
-		}
-		log_start -= offset;
-		con_start -= offset;
-		log_end -= offset;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+void __init setup_log_buf(int early)
+{
+	unsigned long flags;
+	unsigned start, dest_idx, offset;
+	char *new_log_buf;
+	int free;
+
+	if (!new_log_buf_len)
+		return;
+
+	if (early) {
+		unsigned long mem;
 
-		printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
+		mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
+		if (mem == MEMBLOCK_ERROR)
+			return;
+		new_log_buf = __va(mem);
+	} else {
+		new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
 	}
-out:
-	return 1;
-}
 
-__setup("log_buf_len=", log_buf_len_setup);
+	if (unlikely(!new_log_buf)) {
+		pr_err("log_buf_len: %ld bytes not available\n",
+			new_log_buf_len);
+		return;
+	}
+
+	spin_lock_irqsave(&logbuf_lock, flags);
+	log_buf_len = new_log_buf_len;
+	log_buf = new_log_buf;
+	new_log_buf_len = 0;
+	free = __LOG_BUF_LEN - log_end;
+
+	offset = start = min(con_start, log_start);
+	dest_idx = 0;
+	while (start != log_end) {
+		unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
+
+		log_buf[dest_idx] = __log_buf[log_idx_mask];
+		start++;
+		dest_idx++;
+	}
+	log_start -= offset;
+	con_start -= offset;
+	log_end -= offset;
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	pr_info("log_buf_len: %d\n", log_buf_len);
+	pr_info("early log buf free: %d(%d%%)\n",
+		free, (free * 100) / __LOG_BUF_LEN);
+}
 
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
-- 
cgit v1.2.3


From 3c1ab50d0a31b27bb4e55168f4901dd91e6e5ea4 Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Tue, 24 May 2011 17:13:23 -0700
Subject: drivers/leds/leds-pca9532.c: add gpio capability

Allow unused leds on pca9532 to be used as gpio.  The board I am working
on now has no less than 6 pca9532 chips.  One chips is used for only leds,
one has 14 leds and 2 gpio and the rest of the chips are gpio only.

There is also one board in mainline which could use this capabilty;
arch/arm/mach-iop32x/n2100.c
 232         {       .type = PCA9532_TYPE_NONE }, /* power OFF gpio */
 233         {       .type = PCA9532_TYPE_NONE }, /* reset gpio */

This patch defines a new pin type, PCA9532_TYPE_GPIO, and registers a
gpiochip if any pin has this type set.  The gpio will registers all chip
pins but will filter on gpio_request.

[randy.dunlap@oracle.com: fix build when GPIOLIB is not enabled]
Signed-off-by: Joachim Eastwood <joachim.eastwood@jotron.com>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Reviewed-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jan Weitzel <j.weitzel@phytec.de>
Cc: Juergen Kilb <j.kilb@phytec.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/Kconfig         |  10 ++++
 drivers/leds/leds-pca9532.c  | 113 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/leds-pca9532.h |   3 +-
 3 files changed, 122 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 9bec8699b8a3..09de8dac8a73 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -162,6 +162,16 @@ config LEDS_PCA9532
 	  LED controller. It is generally only useful
 	  as a platform driver
 
+config LEDS_PCA9532_GPIO
+	bool "Enable GPIO support for PCA9532"
+	depends on LEDS_PCA9532
+	depends on GPIOLIB
+	help
+	  Allow unused pins on PCA9532 to be used as gpio.
+
+	  To use a pin as gpio pca9532_type in pca9532_platform data needs to
+	  set to PCA9532_TYPE_GPIO.
+
 config LEDS_GPIO
 	tristate "LED Support for GPIO connected LEDs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 5bf63af09ddf..ebea85603f43 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -19,7 +19,9 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/leds-pca9532.h>
+#include <linux/gpio.h>
 
+#define PCA9532_REG_INPUT(i) ((i)/8)
 #define PCA9532_REG_PSC(i) (0x2+(i)*2)
 #define PCA9532_REG_PWM(i) (0x3+(i)*2)
 #define PCA9532_REG_LS0  0x6
@@ -34,6 +36,9 @@ struct pca9532_data {
 	struct mutex update_lock;
 	struct input_dev *idev;
 	struct work_struct work;
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	struct gpio_chip gpio;
+#endif
 	u8 pwm[2];
 	u8 psc[2];
 };
@@ -200,16 +205,68 @@ static void pca9532_led_work(struct work_struct *work)
 	pca9532_setled(led);
 }
 
-static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (led->type == PCA9532_TYPE_GPIO)
+		return 0;
+
+	return -EBUSY;
+}
+
+static void pca9532_gpio_set_value(struct gpio_chip *gc, unsigned offset, int val)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	struct pca9532_led *led = &data->leds[offset];
+
+	if (val)
+		led->state = PCA9532_ON;
+	else
+		led->state = PCA9532_OFF;
+
+	pca9532_setled(led);
+}
+
+static int pca9532_gpio_get_value(struct gpio_chip *gc, unsigned offset)
+{
+	struct pca9532_data *data = container_of(gc, struct pca9532_data, gpio);
+	unsigned char reg;
+
+	reg = i2c_smbus_read_byte_data(data->client, PCA9532_REG_INPUT(offset));
+
+	return !!(reg & (1 << (offset % 8)));
+}
+
+static int pca9532_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	/* To use as input ensure pin is not driven */
+	pca9532_gpio_set_value(gc, offset, 0);
+
+	return 0;
+}
+
+static int pca9532_gpio_direction_output(struct gpio_chip *gc, unsigned offset, int val)
+{
+	pca9532_gpio_set_value(gc, offset, val);
+
+	return 0;
+}
+#endif /* CONFIG_LEDS_PCA9532_GPIO */
+
+static int pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
 {
 	int i = n_devs;
 
 	if (!data)
-		return;
+		return -EINVAL;
 
 	while (--i >= 0) {
 		switch (data->leds[i].type) {
 		case PCA9532_TYPE_NONE:
+		case PCA9532_TYPE_GPIO:
 			break;
 		case PCA9532_TYPE_LED:
 			led_classdev_unregister(&data->leds[i].ldev);
@@ -224,12 +281,26 @@ static void pca9532_destroy_devices(struct pca9532_data *data, int n_devs)
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (data->gpio.dev) {
+		int err = gpiochip_remove(&data->gpio);
+		if (err) {
+			dev_err(&data->client->dev, "%s failed, %d\n",
+						"gpiochip_remove()", err);
+			return err;
+		}
+	}
+#endif
+
+	return 0;
 }
 
 static int pca9532_configure(struct i2c_client *client,
 	struct pca9532_data *data, struct pca9532_platform_data *pdata)
 {
 	int i, err = 0;
+	int gpios = 0;
 
 	for (i = 0; i < 2; i++)	{
 		data->pwm[i] = pdata->pwm[i];
@@ -249,6 +320,9 @@ static int pca9532_configure(struct i2c_client *client,
 		switch (led->type) {
 		case PCA9532_TYPE_NONE:
 			break;
+		case PCA9532_TYPE_GPIO:
+			gpios++;
+			break;
 		case PCA9532_TYPE_LED:
 			led->state = pled->state;
 			led->name = pled->name;
@@ -297,6 +371,34 @@ static int pca9532_configure(struct i2c_client *client,
 			break;
 		}
 	}
+
+#ifdef CONFIG_LEDS_PCA9532_GPIO
+	if (gpios) {
+		data->gpio.label = "gpio-pca9532";
+		data->gpio.direction_input = pca9532_gpio_direction_input;
+		data->gpio.direction_output = pca9532_gpio_direction_output;
+		data->gpio.set = pca9532_gpio_set_value;
+		data->gpio.get = pca9532_gpio_get_value;
+		data->gpio.request = pca9532_gpio_request_pin;
+		data->gpio.can_sleep = 1;
+		data->gpio.base = pdata->gpio_base;
+		data->gpio.ngpio = 16;
+		data->gpio.dev = &client->dev;
+		data->gpio.owner = THIS_MODULE;
+
+		err = gpiochip_add(&data->gpio);
+		if (err) {
+			/* Use data->gpio.dev as a flag for freeing gpiochip */
+			data->gpio.dev = NULL;
+			dev_warn(&client->dev, "could not add gpiochip\n");
+		} else {
+			dev_info(&client->dev, "gpios %i...%i\n",
+				data->gpio.base, data->gpio.base +
+				data->gpio.ngpio - 1);
+		}
+	}
+#endif
+
 	return 0;
 
 exit:
@@ -337,7 +439,12 @@ static int pca9532_probe(struct i2c_client *client,
 static int pca9532_remove(struct i2c_client *client)
 {
 	struct pca9532_data *data = i2c_get_clientdata(client);
-	pca9532_destroy_devices(data, 16);
+	int err;
+
+	err = pca9532_destroy_devices(data, 16);
+	if (err)
+		return err;
+
 	kfree(data);
 	return 0;
 }
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index f158eb1149aa..b8d6fffed4d8 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -25,7 +25,7 @@ enum pca9532_state {
 };
 
 enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED,
-	PCA9532_TYPE_N2100_BEEP };
+	PCA9532_TYPE_N2100_BEEP, PCA9532_TYPE_GPIO };
 
 struct pca9532_led {
 	u8 id;
@@ -41,6 +41,7 @@ struct pca9532_platform_data {
 	struct pca9532_led leds[16];
 	u8 pwm[2];
 	u8 psc[2];
+	int gpio_base;
 };
 
 #endif /* __LINUX_PCA9532_H */
-- 
cgit v1.2.3


From 4440673a95e63ad888a41db596edaa0c55d3a332 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 24 May 2011 17:13:29 -0700
Subject: leds: provide helper to register "leds-gpio" devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function makes a deep copy of the platform data to allow it to live
in init memory.  For a kernel that supports several machines and so
includes the definition for several leds-gpio devices this saves quite
some memory because all but one definition can be free'd after boot.

As the function is used by arch code it must be builtin and so cannot go
into leds-gpio.c.

[akpm@linux-foundation.org: s/CONFIG_LED_REGISTER_GPIO/CONFIG_LEDS_REGISTER_GPIO/]
Signed-off-by: Uwe Kleine-König  <u.kleine-koenig@pengutronix.de>
Cc: Russell King <rmk@arm.linux.org.uk>
Acked-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Tested-by: H Hartley Sweeten <hartleys@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/Makefile                  |  2 +-
 drivers/leds/Kconfig              |  7 +++++++
 drivers/leds/Makefile             |  1 +
 drivers/leds/leds-gpio-register.c | 42 +++++++++++++++++++++++++++++++++++++++
 include/linux/leds.h              |  2 ++
 5 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 drivers/leds/leds-gpio-register.c

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 145aeadb6c03..bceb60c85c01 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_DMA_ENGINE)	+= dma/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
-obj-$(CONFIG_NEW_LEDS)		+= leds/
+obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index a4a77734ab6e..1d027b475b22 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -14,6 +14,13 @@ config LEDS_CLASS
 	  This option enables the led sysfs class in /sys/class/leds.  You'll
 	  need this to do anything useful with LEDs.  If unsure, say N.
 
+config LEDS_GPIO_REGISTER
+	bool
+	help
+	  This option provides the function gpio_led_register_device.
+	  As this function is used by arch code it must not be compiled as a
+	  module.
+
 if NEW_LEDS
 
 comment "LED drivers"
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 0b6ad375bfdb..bccb96c9bb45 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_LEDS_COBALT_QUBE)		+= leds-cobalt-qube.o
 obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
 obj-$(CONFIG_LEDS_SUNFIRE)		+= leds-sunfire.o
 obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
+obj-$(CONFIG_LEDS_GPIO_REGISTER)	+= leds-gpio-register.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c
new file mode 100644
index 000000000000..1c4ed5510f35
--- /dev/null
+++ b/drivers/leds/leds-gpio-register.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2011 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+
+/**
+ * gpio_led_register_device - register a gpio-led device
+ * @pdata: the platform data used for the new device
+ *
+ * Makes a copy of pdata and pdata->leds and registers a new leds-gpio device
+ * with the result. This allows to have pdata and pdata-leds in .init.rodata
+ * and so saves some bytes compared to a static struct platform_device with
+ * static platform data.
+ *
+ * Returns the registered device or an error pointer.
+ */
+struct platform_device *__init gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata)
+{
+	struct platform_device *ret;
+	struct gpio_led_platform_data _pdata = *pdata;
+
+	_pdata.leds = kmemdup(pdata->leds,
+			pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL);
+	if (!_pdata.leds)
+		return ERR_PTR(-ENOMEM);
+
+	ret = platform_device_register_resndata(NULL, "leds-gpio", id,
+			NULL, 0, &_pdata, sizeof(_pdata));
+	if (IS_ERR(ret))
+		kfree(_pdata.leds);
+
+	return ret;
+}
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 61e0340a4b77..5884def15a24 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -207,5 +207,7 @@ struct gpio_led_platform_data {
 					unsigned long *delay_off);
 };
 
+struct platform_device *gpio_led_register_device(
+		int id, const struct gpio_led_platform_data *pdata);
 
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From c196e32a111b0ee356d67acceb938ae0b5e63ef0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 24 May 2011 17:13:31 -0700
Subject: lib: add kstrto*_from_user()

There is quite a lot of code which does copy_from_user() + strict_strto*()
or simple_strto*() combo in slightly different ways.

Before doing conversions all over tree, let's get final API correct.

Enter kstrtoull_from_user() and friends.

Typical code which uses them looks very simple:

	TYPE val;
	int rv;

	rv = kstrtoTYPE_from_user(buf, count, 0, &val);
	if (rv < 0)
		return rv;
	[use val]
	return count;

There is a tiny semantic difference from the plain kstrto*() API -- the
latter allows any amount of leading zeroes, while the former copies data
into buffer on stack and thus allows leading zeroes as long as it fits
into buffer.

This shouldn't be a problem for typical usecase "echo 42 > /proc/x".

The point is to make reading one integer from userspace _very_ simple and
very bug free.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 31 +++++++++++++++++++++++++++++++
 lib/kstrtox.c          | 26 ++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 22295244de18..fb0e7329fee1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -248,6 +248,37 @@ int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
 int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
 int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
 
+int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
+int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res);
+int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res);
+int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res);
+int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res);
+int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res);
+int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
+int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
+int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
+
+static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
+{
+	return kstrtoull_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res)
+{
+	return kstrtoll_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res)
+{
+	return kstrtouint_from_user(s, count, base, res);
+}
+
+static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res)
+{
+	return kstrtoint_from_user(s, count, base, res);
+}
+
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a235f3cc471c..2dbae88090ac 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -17,6 +17,7 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <asm/uaccess.h>
 
 static inline char _tolower(const char c)
 {
@@ -222,3 +223,28 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
 	return 0;
 }
 EXPORT_SYMBOL(kstrtos8);
+
+#define kstrto_from_user(f, g, type)					\
+int f(const char __user *s, size_t count, unsigned int base, type *res)	\
+{									\
+	/* sign, base 2 representation, newline, terminator */		\
+	char buf[1 + sizeof(type) * 8 + 1 + 1];				\
+									\
+	count = min(count, sizeof(buf) - 1);				\
+	if (copy_from_user(buf, s, count))				\
+		return -EFAULT;						\
+	buf[count] = '\0';						\
+	return g(buf, base, res);					\
+}									\
+EXPORT_SYMBOL(f)
+
+kstrto_from_user(kstrtoull_from_user,	kstrtoull,	unsigned long long);
+kstrto_from_user(kstrtoll_from_user,	kstrtoll,	long long);
+kstrto_from_user(kstrtoul_from_user,	kstrtoul,	unsigned long);
+kstrto_from_user(kstrtol_from_user,	kstrtol,	long);
+kstrto_from_user(kstrtouint_from_user,	kstrtouint,	unsigned int);
+kstrto_from_user(kstrtoint_from_user,	kstrtoint,	int);
+kstrto_from_user(kstrtou16_from_user,	kstrtou16,	u16);
+kstrto_from_user(kstrtos16_from_user,	kstrtos16,	s16);
+kstrto_from_user(kstrtou8_from_user,	kstrtou8,	u8);
+kstrto_from_user(kstrtos8_from_user,	kstrtos8,	s8);
-- 
cgit v1.2.3


From 6aae6e0304d33e537298867dafb2703ec58c2e4f Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Tue, 24 May 2011 17:13:33 -0700
Subject: include/linux/genalloc.h: add multiple-inclusion guards

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
Cc: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 9869ef3674ac..b1c70f10c42b 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -9,6 +9,8 @@
  */
 
 
+#ifndef __GENALLOC_H__
+#define __GENALLOC_H__
 /*
  *  General purpose special memory pool descriptor.
  */
@@ -34,3 +36,4 @@ extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+#endif /* __GENALLOC_H__ */
-- 
cgit v1.2.3


From 3c8f370ded3483b27f1218ff0051fcf0c7a2facd Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Tue, 24 May 2011 17:13:34 -0700
Subject: lib/genalloc.c: add support for specifying the physical address

So we can specify the virtual address as the base of the pool chunk and
then get physical addresses for hardware IP.

For example on at91 we will use this on spi, uart or macb

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Patrice VILCHEZ <patrice.vilchez@atmel.com>
Cc: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 22 +++++++++++++++++++++-
 lib/genalloc.c           | 45 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 58 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index b1c70f10c42b..5bbebda78b02 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -26,13 +26,33 @@ struct gen_pool {
 struct gen_pool_chunk {
 	spinlock_t lock;
 	struct list_head next_chunk;	/* next chunk in pool */
+	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
 	unsigned long start_addr;	/* starting address of memory chunk */
 	unsigned long end_addr;		/* ending address of memory chunk */
 	unsigned long bits[0];		/* bitmap for allocating memory chunk */
 };
 
 extern struct gen_pool *gen_pool_create(int, int);
-extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
+extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
+extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
+			     size_t, int);
+/**
+ * gen_pool_add - add a new chunk of special memory to the pool
+ * @pool: pool to add new memory chunk to
+ * @addr: starting address of memory chunk to add to pool
+ * @size: size in bytes of the memory chunk to add to pool
+ * @nid: node id of the node the chunk structure and bitmap should be
+ *       allocated on, or -1
+ *
+ * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
+ */
+static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
+			       size_t size, int nid)
+{
+	return gen_pool_add_virt(pool, addr, -1, size, nid);
+}
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 1923f1490e72..577ddf805975 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -39,17 +39,20 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 EXPORT_SYMBOL(gen_pool_create);
 
 /**
- * gen_pool_add - add a new chunk of special memory to the pool
+ * gen_pool_add_virt - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
- * @addr: starting address of memory chunk to add to pool
+ * @virt: virtual starting address of memory chunk to add to pool
+ * @phys: physical starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
  *
  * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
  */
-int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
-		 int nid)
+int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+		 size_t size, int nid)
 {
 	struct gen_pool_chunk *chunk;
 	int nbits = size >> pool->min_alloc_order;
@@ -58,11 +61,12 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 
 	chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
 	if (unlikely(chunk == NULL))
-		return -1;
+		return -ENOMEM;
 
 	spin_lock_init(&chunk->lock);
-	chunk->start_addr = addr;
-	chunk->end_addr = addr + size;
+	chunk->phys_addr = phys;
+	chunk->start_addr = virt;
+	chunk->end_addr = virt + size;
 
 	write_lock(&pool->lock);
 	list_add(&chunk->next_chunk, &pool->chunks);
@@ -70,7 +74,32 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 
 	return 0;
 }
-EXPORT_SYMBOL(gen_pool_add);
+EXPORT_SYMBOL(gen_pool_add_virt);
+
+/**
+ * gen_pool_virt_to_phys - return the physical address of memory
+ * @pool: pool to allocate from
+ * @addr: starting address of memory
+ *
+ * Returns the physical address on success, or -1 on error.
+ */
+phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
+{
+	struct list_head *_chunk;
+	struct gen_pool_chunk *chunk;
+
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+
+		if (addr >= chunk->start_addr && addr < chunk->end_addr)
+			return chunk->phys_addr + addr - chunk->start_addr;
+	}
+	read_unlock(&pool->lock);
+
+	return -1;
+}
+EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
 /**
  * gen_pool_destroy - destroy a special memory pool
-- 
cgit v1.2.3


From c84598bbfa756b7d042da31aa4e198ae866a6c7d Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 24 May 2011 17:13:35 -0700
Subject: percpu_counter: change return value and add comments

The percpu_counter_*_positive() API in UP case doesn't check if return
value is positive.  Add comments to explain why we don't.  Also if count <
0, returns 0 instead of 1 for *read_positive().

[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu_counter.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 46f6ba56fa91..5edc9014263a 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -75,7 +75,7 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 	barrier();		/* Prevent reloads of fbc->count */
 	if (ret >= 0)
 		return ret;
-	return 1;
+	return 0;
 }
 
 static inline int percpu_counter_initialized(struct percpu_counter *fbc)
@@ -133,6 +133,10 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc)
 	return fbc->count;
 }
 
+/*
+ * percpu_counter is intended to track positive numbers. In the UP case the
+ * number should never be negative.
+ */
 static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 {
 	return fbc->count;
-- 
cgit v1.2.3


From 774466add7c810fd7e4c8bcf41995b6799608880 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 25 May 2011 20:43:32 +0200
Subject: hwmon: (jc42) Change detection class

While the JC42-compatible chips are temperature sensors, I2C_CLASS_SPD
makes more sense because these chips always live on memory modules.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/jc42.c | 2 +-
 include/linux/i2c.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 934991237061..02cebb74e206 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -213,7 +213,7 @@ static const struct dev_pm_ops jc42_dev_pm_ops = {
 
 /* This is the driver that will be inserted */
 static struct i2c_driver jc42_driver = {
-	.class		= I2C_CLASS_HWMON,
+	.class		= I2C_CLASS_SPD,
 	.driver = {
 		.name	= "jc42",
 		.pm = JC42_DEV_PM_OPS,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f1e3ff5880a9..a6c652ef516d 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -409,7 +409,7 @@ void i2c_unlock_adapter(struct i2c_adapter *);
 /* i2c adapter classes (bitmask) */
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
-#define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
+#define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
 
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
-- 
cgit v1.2.3


From d0c97cfb81ebc5b416c0f92fa2fc18d2773e3023 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:36 -0500
Subject: mmc: core: Use CMD23 for multiblock transfers when we can.

CMD23-prefixed instead of open-ended multiblock transfers
have a performance advantage on some MMC cards.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 108 ++++++++++++++++++++++++++++++++++-------------
 include/linux/mmc/card.h |   1 +
 include/linux/mmc/core.h |   1 +
 include/linux/mmc/host.h |   6 +++
 include/linux/mmc/mmc.h  |   6 +++
 5 files changed, 93 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 126c7f41c5a3..a380accaba9a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -59,10 +59,6 @@ MODULE_ALIAS("mmc:block");
 #define INAND_CMD38_ARG_SECTRIM1 0x81
 #define INAND_CMD38_ARG_SECTRIM2 0x88
 
-#define REL_WRITES_SUPPORTED(card) (mmc_card_mmc((card)) &&	\
-    (((card)->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||	\
-     ((card)->ext_csd.rel_sectors)))
-
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -90,6 +86,10 @@ struct mmc_blk_data {
 	struct mmc_queue queue;
 	struct list_head part;
 
+	unsigned int	flags;
+#define MMC_BLK_CMD23	(1 << 0)	/* Can do SET_BLOCK_COUNT for multiblock */
+#define MMC_BLK_REL_WR	(1 << 1)	/* MMC Reliable write support */
+
 	unsigned int	usage;
 	unsigned int	read_only;
 	unsigned int	part_type;
@@ -429,6 +429,7 @@ static const struct block_device_operations mmc_bdops = {
 
 struct mmc_blk_request {
 	struct mmc_request	mrq;
+	struct mmc_command	sbc;
 	struct mmc_command	cmd;
 	struct mmc_command	stop;
 	struct mmc_data		data;
@@ -652,13 +653,10 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
  * reliable write can handle, thus finish the request in
  * partial completions.
  */
-static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
-				   struct mmc_card *card,
-				   struct request *req)
+static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
+				    struct mmc_card *card,
+				    struct request *req)
 {
-	int err;
-	struct mmc_command set_count = {0};
-
 	if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
 		/* Legacy mode imposes restrictions on transfers. */
 		if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors))
@@ -669,15 +667,6 @@ static inline int mmc_apply_rel_rw(struct mmc_blk_request *brq,
 		else if (brq->data.blocks < card->ext_csd.rel_sectors)
 			brq->data.blocks = 1;
 	}
-
-	set_count.opcode = MMC_SET_BLOCK_COUNT;
-	set_count.arg = brq->data.blocks | (1 << 31);
-	set_count.flags = MMC_RSP_R1 | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &set_count, 0);
-	if (err)
-		printk(KERN_ERR "%s: error %d SET_BLOCK_COUNT\n",
-		       req->rq_disk->disk_name, err);
-	return err;
 }
 
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
@@ -694,7 +683,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
 			  (req->cmd_flags & REQ_META)) &&
 		(rq_data_dir(req) == WRITE) &&
-		REL_WRITES_SUPPORTED(card);
+		(md->flags & MMC_BLK_REL_WR);
 
 	do {
 		struct mmc_command cmd = {0};
@@ -732,11 +721,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 
 		if (brq.data.blocks > 1 || do_rel_wr) {
 			/* SPI multiblock writes terminate using a special
-			 * token, not a STOP_TRANSMISSION request. Reliable
-			 * writes use SET_BLOCK_COUNT and do not use a
-			 * STOP_TRANSMISSION request either.
+			 * token, not a STOP_TRANSMISSION request.
 			 */
-			if ((!mmc_host_is_spi(card->host) && !do_rel_wr) ||
+			if (!mmc_host_is_spi(card->host) ||
 			    rq_data_dir(req) == READ)
 				brq.mrq.stop = &brq.stop;
 			readcmd = MMC_READ_MULTIPLE_BLOCK;
@@ -754,8 +741,37 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
-		if (do_rel_wr && mmc_apply_rel_rw(&brq, card, req))
-			goto cmd_err;
+		if (do_rel_wr)
+			mmc_apply_rel_rw(&brq, card, req);
+
+		/*
+		 * Pre-defined multi-block transfers are preferable to
+		 * open ended-ones (and necessary for reliable writes).
+		 * However, it is not sufficient to just send CMD23,
+		 * and avoid the final CMD12, as on an error condition
+		 * CMD12 (stop) needs to be sent anyway. This, coupled
+		 * with Auto-CMD23 enhancements provided by some
+		 * hosts, means that the complexity of dealing
+		 * with this is best left to the host. If CMD23 is
+		 * supported by card and host, we'll fill sbc in and let
+		 * the host deal with handling it correctly. This means
+		 * that for hosts that don't expose MMC_CAP_CMD23, no
+		 * change of behavior will be observed.
+		 *
+		 * N.B: Some MMC cards experience perf degradation.
+		 * We'll avoid using CMD23-bounded multiblock writes for
+		 * these, while retaining features like reliable writes.
+		 */
+
+		if ((md->flags & MMC_BLK_CMD23) &&
+		    mmc_op_multi(brq.cmd.opcode) &&
+		    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
+			brq.sbc.opcode = MMC_SET_BLOCK_COUNT;
+			brq.sbc.arg = brq.data.blocks |
+				(do_rel_wr ? (1 << 31) : 0);
+			brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+			brq.mrq.sbc = &brq.sbc;
+		}
 
 		mmc_set_data_timeout(&brq.data, card);
 
@@ -792,7 +808,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 		 * until later as we need to wait for the card to leave
 		 * programming mode even when things go wrong.
 		 */
-		if (brq.cmd.error || brq.data.error || brq.stop.error) {
+		if (brq.sbc.error || brq.cmd.error ||
+		    brq.data.error || brq.stop.error) {
 			if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
 				/* Redo read one sector at a time */
 				printk(KERN_WARNING "%s: retrying using single "
@@ -803,6 +820,13 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			status = get_card_status(card, req);
 		}
 
+		if (brq.sbc.error) {
+			printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
+			       "command, response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, brq.sbc.error,
+			       brq.sbc.resp[0], status);
+		}
+
 		if (brq.cmd.error) {
 			printk(KERN_ERR "%s: error %d sending read/write "
 			       "command, response %#x, card status %#x\n",
@@ -1014,8 +1038,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	md->disk->queue = md->queue.queue;
 	md->disk->driverfs_dev = parent;
 	set_disk_ro(md->disk, md->read_only || default_ro);
-	if (REL_WRITES_SUPPORTED(card))
-		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 
 	/*
 	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
@@ -1034,6 +1056,19 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 
 	blk_queue_logical_block_size(md->queue.queue, 512);
 	set_capacity(md->disk, size);
+
+	if (mmc_host_cmd23(card->host) &&
+	    mmc_card_mmc(card))
+		md->flags |= MMC_BLK_CMD23;
+
+	if (mmc_card_mmc(card) &&
+	    md->flags & MMC_BLK_CMD23 &&
+	    ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
+	     card->ext_csd.rel_sectors)) {
+		md->flags |= MMC_BLK_REL_WR;
+		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
+	}
+
 	return md;
 
  err_putdisk:
@@ -1189,6 +1224,21 @@ static const struct mmc_fixup blk_fixups[] =
 	MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
 	MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+
+	/*
+	 * Some MMC cards experience performance degradation with CMD23
+	 * instead of CMD12-bounded multiblock transfers. For now we'll
+	 * black list what's bad...
+	 * - Certain Toshiba cards.
+	 *
+	 * N.B. This doesn't affect SD cards.
+	 */
+	MMC_FIXUP("MMC08G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("MMC16G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_BLK_NO_CMD23),
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 7190aa2096f7..4a0e27baaea0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -171,6 +171,7 @@ struct mmc_card {
 #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4)		/* SDIO card has nonstd function interfaces */
 #define MMC_QUIRK_DISABLE_CD	(1<<5)		/* disconnect CD/DAT[3] resistor */
 #define MMC_QUIRK_INAND_CMD38	(1<<6)		/* iNAND devices have broken CMD38 */
+#define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index cbe8d55f64c4..b6718e549a51 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -120,6 +120,7 @@ struct mmc_data {
 };
 
 struct mmc_request {
+	struct mmc_command	*sbc;		/* SET_BLOCK_COUNT for multiblock */
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 	struct mmc_command	*stop;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index de32e6aa018a..e946bd10fe3f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -210,6 +210,7 @@ struct mmc_host {
 #define MMC_CAP_MAX_CURRENT_400	(1 << 27)	/* Host max current limit is 400mA */
 #define MMC_CAP_MAX_CURRENT_600	(1 << 28)	/* Host max current limit is 600mA */
 #define MMC_CAP_MAX_CURRENT_800	(1 << 29)	/* Host max current limit is 800mA */
+#define MMC_CAP_CMD23		(1 << 30)	/* CMD23 supported. */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -366,5 +367,10 @@ static inline int mmc_card_wake_sdio_irq(struct mmc_host *host)
 {
 	return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ;
 }
+
+static inline int mmc_host_cmd23(struct mmc_host *host)
+{
+	return host->caps & MMC_CAP_CMD23;
+}
 #endif
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 9fa5a73f393d..ac26a685cca8 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -83,6 +83,12 @@
 #define MMC_APP_CMD              55   /* ac   [31:16] RCA        R1  */
 #define MMC_GEN_CMD              56   /* adtc [0] RD/WR          R1  */
 
+static inline bool mmc_op_multi(u32 opcode)
+{
+	return opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+	       opcode == MMC_READ_MULTIPLE_BLOCK;
+}
+
 /*
  * MMC_SWITCH argument format:
  *
-- 
cgit v1.2.3


From e89d456fcdde2df008c032bf928e69e628e07a28 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:37 -0500
Subject: mmc: sdhci: Implement MMC_CAP_CMD23 for SDHCI.

Implements support for multiblock transfers bounded
by SET_BLOCK_COUNT (CMD23).

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 63 +++++++++++++++++++++++++++++++++++------------
 drivers/mmc/host/sdhci.h  |  2 +-
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 49 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cc63f5ed2310..b9ed66307ac3 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -838,9 +838,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 }
 
 static void sdhci_set_transfer_mode(struct sdhci_host *host,
-	struct mmc_data *data)
+	struct mmc_command *cmd)
 {
 	u16 mode;
+	struct mmc_data *data = cmd->data;
 
 	if (data == NULL)
 		return;
@@ -848,11 +849,14 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 	WARN_ON(!host->data);
 
 	mode = SDHCI_TRNS_BLK_CNT_EN;
-	if (data->blocks > 1) {
-		if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
-			mode |= SDHCI_TRNS_MULTI | SDHCI_TRNS_ACMD12;
-		else
-			mode |= SDHCI_TRNS_MULTI;
+	if (mmc_op_multi(cmd->opcode) || data->blocks > 1) {
+		mode |= SDHCI_TRNS_MULTI;
+		/*
+		 * If we are sending CMD23, CMD12 never gets sent
+		 * on successful completion (so no Auto-CMD12).
+		 */
+		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
+			mode |= SDHCI_TRNS_AUTO_CMD12;
 	}
 	if (data->flags & MMC_DATA_READ)
 		mode |= SDHCI_TRNS_READ;
@@ -893,7 +897,15 @@ static void sdhci_finish_data(struct sdhci_host *host)
 	else
 		data->bytes_xfered = data->blksz * data->blocks;
 
-	if (data->stop) {
+	/*
+	 * Need to send CMD12 if -
+	 * a) open-ended multiblock transfer (no CMD23)
+	 * b) error in multiblock transfer
+	 */
+	if (data->stop &&
+	    (data->error ||
+	     !host->mrq->sbc)) {
+
 		/*
 		 * The controller needs a reset of internal state machines
 		 * upon error conditions.
@@ -949,7 +961,7 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 
 	sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT);
 
-	sdhci_set_transfer_mode(host, cmd->data);
+	sdhci_set_transfer_mode(host, cmd);
 
 	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
 		printk(KERN_ERR "%s: Unsupported response type!\n",
@@ -1004,13 +1016,21 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 	host->cmd->error = 0;
 
-	if (host->data && host->data_early)
-		sdhci_finish_data(host);
+	/* Finished CMD23, now send actual command. */
+	if (host->cmd == host->mrq->sbc) {
+		host->cmd = NULL;
+		sdhci_send_command(host, host->mrq->cmd);
+	} else {
 
-	if (!host->cmd->data)
-		tasklet_schedule(&host->finish_tasklet);
+		/* Processed actual command. */
+		if (host->data && host->data_early)
+			sdhci_finish_data(host);
 
-	host->cmd = NULL;
+		if (!host->cmd->data)
+			tasklet_schedule(&host->finish_tasklet);
+
+		host->cmd = NULL;
+	}
 }
 
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
@@ -1189,7 +1209,12 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 #ifndef SDHCI_USE_LEDS_CLASS
 	sdhci_activate_led(host);
 #endif
-	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) {
+
+	/*
+	 * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
+	 * requests if Auto-CMD12 is enabled.
+	 */
+	if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
 		if (mrq->stop) {
 			mrq->data->stop = NULL;
 			mrq->stop = NULL;
@@ -1227,7 +1252,10 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 			host->mrq = mrq;
 		}
 
-		sdhci_send_command(host, mrq->cmd);
+		if (mrq->sbc)
+			sdhci_send_command(host, mrq->sbc);
+		else
+			sdhci_send_command(host, mrq->cmd);
 	}
 
 	mmiowb();
@@ -2455,7 +2483,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE;
+	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+
+	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
+		host->flags |= SDHCI_AUTO_CMD12;
 
 	/*
 	 * A controller may support 8-bit width, but the board itself
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 7e28eec97f04..2c3fbc5a4c07 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -36,7 +36,7 @@
 #define SDHCI_TRANSFER_MODE	0x0C
 #define  SDHCI_TRNS_DMA		0x01
 #define  SDHCI_TRNS_BLK_CNT_EN	0x02
-#define  SDHCI_TRNS_ACMD12	0x04
+#define  SDHCI_TRNS_AUTO_CMD12	0x04
 #define  SDHCI_TRNS_READ	0x10
 #define  SDHCI_TRNS_MULTI	0x20
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e902618d68f4..73e27ba51e99 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -113,6 +113,7 @@ struct sdhci_host {
 #define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 #define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
+#define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From f0d89972b01798cf9d245dfa1cacfa0ee78a3593 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:38 -0500
Subject: mmc: core: Block CMD23 support for UHS104/SDXC cards.

SD cards operating at UHS104 or better support SET_BLOCK_COUNT.

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 9 ++++++---
 drivers/mmc/core/sd.c    | 2 ++
 include/linux/mmc/card.h | 3 +++
 include/linux/mmc/sd.h   | 2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a380accaba9a..71da5641e258 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1057,9 +1057,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	blk_queue_logical_block_size(md->queue.queue, 512);
 	set_capacity(md->disk, size);
 
-	if (mmc_host_cmd23(card->host) &&
-	    mmc_card_mmc(card))
-		md->flags |= MMC_BLK_CMD23;
+	if (mmc_host_cmd23(card->host)) {
+		if (mmc_card_mmc(card) ||
+		    (mmc_card_sd(card) &&
+		     card->scr.cmds & SD_SCR_CMD23_SUPPORT))
+			md->flags |= MMC_BLK_CMD23;
+	}
 
 	if (mmc_card_mmc(card) &&
 	    md->flags & MMC_BLK_CMD23 &&
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 596d0b9d30b8..ff2774128aa9 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -203,6 +203,8 @@ static int mmc_decode_scr(struct mmc_card *card)
 	else
 		card->erased_byte = 0x0;
 
+	if (scr->sda_spec3)
+		scr->cmds = UNSTUFF_BITS(resp, 32, 2);
 	return 0;
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 4a0e27baaea0..c6927a4d157f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -72,6 +72,9 @@ struct sd_scr {
 	unsigned char		bus_widths;
 #define SD_SCR_BUS_WIDTH_1	(1<<0)
 #define SD_SCR_BUS_WIDTH_4	(1<<2)
+	unsigned char		cmds;
+#define SD_SCR_CMD20_SUPPORT   (1<<0)
+#define SD_SCR_CMD23_SUPPORT   (1<<1)
 };
 
 struct sd_ssr {
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index c648878f6734..7d35d52c3df3 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -66,7 +66,7 @@
 
 #define SCR_SPEC_VER_0		0	/* Implements system specification 1.0 - 1.01 */
 #define SCR_SPEC_VER_1		1	/* Implements system specification 1.10 */
-#define SCR_SPEC_VER_2		2	/* Implements system specification 2.00 */
+#define SCR_SPEC_VER_2		2	/* Implements system specification 2.00-3.0X */
 
 /*
  * SD bus widths
-- 
cgit v1.2.3


From 8edf63710bd43e62d59bfe017df542fa0713bbb3 Mon Sep 17 00:00:00 2001
From: Andrei Warkentin <andreiw@motorola.com>
Date: Mon, 23 May 2011 15:06:39 -0500
Subject: mmc: sdhci: Auto-CMD23 support.

Enables Auto-CMD23 support where available (SDHCI 3.0 controllers)

Signed-off-by: Andrei Warkentin <andreiw@motorola.com>
Tested-by: Arindam Nath <arindam.nath@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 17 ++++++++++++++++-
 drivers/mmc/host/sdhci.h  |  2 ++
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b9ed66307ac3..f845fd6a64ae 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -857,7 +857,12 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 		 */
 		if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
 			mode |= SDHCI_TRNS_AUTO_CMD12;
+		else if (host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
+			mode |= SDHCI_TRNS_AUTO_CMD23;
+			sdhci_writel(host, host->mrq->sbc->arg, SDHCI_ARGUMENT2);
+		}
 	}
+
 	if (data->flags & MMC_DATA_READ)
 		mode |= SDHCI_TRNS_READ;
 	if (host->flags & SDHCI_REQ_USE_DMA)
@@ -1252,7 +1257,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 			host->mrq = mrq;
 		}
 
-		if (mrq->sbc)
+		if (mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23))
 			sdhci_send_command(host, mrq->sbc);
 		else
 			sdhci_send_command(host, mrq->cmd);
@@ -2488,6 +2493,16 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
 
+	/* Auto-CMD23 stuff only works in ADMA or PIO. */
+	if ((host->version == SDHCI_SPEC_300) &&
+	    ((host->flags & SDHCI_USE_ADMA) ||
+	     !(host->flags & SDHCI_REQ_USE_DMA))) {
+		host->flags |= SDHCI_AUTO_CMD23;
+		DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc));
+	} else {
+		DBG("%s: Auto-CMD23 unavailable\n", mmc_hostname(mmc));
+	}
+
 	/*
 	 * A controller may support 8-bit width, but the board itself
 	 * might not have the pins brought out.  Boards that support
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 2c3fbc5a4c07..745c42fa41ed 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -25,6 +25,7 @@
  */
 
 #define SDHCI_DMA_ADDRESS	0x00
+#define SDHCI_ARGUMENT2		SDHCI_DMA_ADDRESS
 
 #define SDHCI_BLOCK_SIZE	0x04
 #define  SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
@@ -37,6 +38,7 @@
 #define  SDHCI_TRNS_DMA		0x01
 #define  SDHCI_TRNS_BLK_CNT_EN	0x02
 #define  SDHCI_TRNS_AUTO_CMD12	0x04
+#define  SDHCI_TRNS_AUTO_CMD23	0x08
 #define  SDHCI_TRNS_READ	0x10
 #define  SDHCI_TRNS_MULTI	0x20
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 73e27ba51e99..6a68c4eb4e44 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -114,6 +114,7 @@ struct sdhci_host {
 #define SDHCI_SDR50_NEEDS_TUNING (1<<4)	/* SDR50 needs tuning */
 #define SDHCI_NEEDS_RETUNING	(1<<5)	/* Host needs retuning */
 #define SDHCI_AUTO_CMD12	(1<<6)	/* Auto CMD12 support */
+#define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From 6dcbbe25dcc9bd2bdeb4f685f8fb874ffc10e6be Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 24 May 2011 08:31:08 +0000
Subject: net: move is_vlan_dev into public header file (v2)

Migrate is_vlan_dev() to if_vlan.h so that core networkig can use it

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: davem@davemloft.net
CC: bhutchings@solarflare.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 5 +++++
 net/8021q/vlan.h        | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 290bd8ac94cf..dc01681fbb42 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -110,6 +110,11 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
+static inline int is_vlan_dev(struct net_device *dev)
+{
+        return dev->priv_flags & IFF_802_1Q_VLAN;
+}
+
 #define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
 #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index c3408def8a19..9da07e30d1a2 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -118,11 +118,6 @@ extern void vlan_netlink_fini(void);
 
 extern struct rtnl_link_ops vlan_link_ops;
 
-static inline int is_vlan_dev(struct net_device *dev)
-{
-	return dev->priv_flags & IFF_802_1Q_VLAN;
-}
-
 extern int vlan_net_id;
 
 struct proc_dir_entry;
-- 
cgit v1.2.3


From f29c50419c8d1998edd759f1990c4243a248f469 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 19 May 2011 14:35:33 -0400
Subject: maccess,probe_kernel: Make write/read src const void *

The functions probe_kernel_write() and probe_kernel_read() do not modify
the src pointer. Allow const pointers to be passed in without the need
of a typecast.

Acked-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1305824936.1465.4.camel@gandalf.stny.rr.com
---
 arch/blackfin/mm/maccess.c | 4 ++--
 arch/s390/mm/maccess.c     | 4 ++--
 include/linux/uaccess.h    | 8 ++++----
 mm/maccess.c               | 8 ++++----
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/mm/maccess.c b/arch/blackfin/mm/maccess.c
index b71cebc1f8a3..e2532114c5fd 100644
--- a/arch/blackfin/mm/maccess.c
+++ b/arch/blackfin/mm/maccess.c
@@ -16,7 +16,7 @@ static int validate_memory_access_address(unsigned long addr, int size)
 	return bfin_mem_access_type(addr, size);
 }
 
-long probe_kernel_read(void *dst, void *src, size_t size)
+long probe_kernel_read(void *dst, const void *src, size_t size)
 {
 	unsigned long lsrc = (unsigned long)src;
 	int mem_type;
@@ -55,7 +55,7 @@ long probe_kernel_read(void *dst, void *src, size_t size)
 	return -EFAULT;
 }
 
-long probe_kernel_write(void *dst, void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	unsigned long ldst = (unsigned long)dst;
 	int mem_type;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 71a4b0d34be0..51e5cd9b906a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -19,7 +19,7 @@
  * using the stura instruction.
  * Returns the number of bytes copied or -EFAULT.
  */
-static long probe_kernel_write_odd(void *dst, void *src, size_t size)
+static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
 {
 	unsigned long count, aligned;
 	int offset, mask;
@@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size)
 	return rc ? rc : count;
 }
 
-long probe_kernel_write(void *dst, void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	long copied = 0;
 
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d512d98dfb7d..5ca0951e1855 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -93,8 +93,8 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_read(void *dst, void *src, size_t size);
-extern long __probe_kernel_read(void *dst, void *src, size_t size);
+extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -105,7 +105,7 @@ extern long __probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
-extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index e2b6f5634e0d..4cee182ab5f3 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -15,10 +15,10 @@
  * happens, handle that and return -EFAULT.
  */
 
-long __weak probe_kernel_read(void *dst, void *src, size_t size)
+long __weak probe_kernel_read(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_read")));
 
-long __probe_kernel_read(void *dst, void *src, size_t size)
+long __probe_kernel_read(void *dst, const void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -43,10 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_write")));
 
-long __probe_kernel_write(void *dst, void *src, size_t size)
+long __probe_kernel_write(void *dst, const void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3


From 916f676f8dc016103f983c7ec54c18ecdbb6e349 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 25 May 2011 09:53:13 -0400
Subject: x86, efi: Retain boot service code until after switching to virtual
 mode

UEFI stands for "Unified Extensible Firmware Interface", where "Firmware"
is an ancient African word meaning "Why do something right when you can
do it so wrong that children will weep and brave adults will cower before
you", and "UEI" is Celtic for "We missed DOS so we burned it into your
ROMs". The UEFI specification provides for runtime services (ie, another
way for the operating system to be forced to depend on the firmware) and
we rely on these for certain trivial tasks such as setting up the
bootloader. But some hardware fails to work if we attempt to use these
runtime services from physical mode, and so we have to switch into virtual
mode. So far so dreadful.

The specification makes it clear that the operating system is free to do
whatever it wants with boot services code after ExitBootServices() has been
called. SetVirtualAddressMap() can't be called until ExitBootServices() has
been. So, obviously, a whole bunch of EFI implementations call into boot
services code when we do that. Since we've been charmingly naive and
trusted that the specification may be somehow relevant to the real world,
we've already stuffed a picture of a penguin or something in that address
space. And just to make things more entertaining, we've also marked it
non-executable.

This patch allocates the boot services regions during EFI init and makes
sure that they're executable. Then, after SetVirtualAddressMap(), it
discards them and everyone lives happily ever after. Except for the ones
who have to work on EFI, who live sad lives haunted by the knowledge that
someone's eventually going to write yet another firmware specification.

[ hpa: adding this to urgent with a stable tag since it fixes currently-broken
  hardware.  However, I do not know what the dependencies are and so I do
  not know which -stable versions this may be a candidate for. ]

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1306331593-28715-1-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/setup.c        |  7 +++++++
 arch/x86/platform/efi/efi.c    | 45 +++++++++++++++++++++++++++++++++++++++++-
 arch/x86/platform/efi/efi_64.c |  5 +++--
 include/linux/efi.h            |  1 +
 4 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 605e5ae19c7f..b9ca498f5602 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -910,6 +910,13 @@ void __init setup_arch(char **cmdline_p)
 	memblock.current_limit = get_max_mapped();
 	memblock_x86_fill();
 
+	/*
+	 * The EFI specification says that boot service code won't be called
+	 * after ExitBootServices(). This is, in fact, a lie.
+	 */
+	if (efi_enabled)
+		efi_reserve_boot_services();
+
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b30aa26a8df2..0d3a4fa34560 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -304,6 +304,40 @@ static void __init print_efi_memmap(void)
 }
 #endif  /*  EFI_DEBUG  */
 
+void __init efi_reserve_boot_services(void)
+{
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		unsigned long long start = md->phys_addr;
+		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		memblock_x86_reserve_range(start, start + size, "EFI Boot");
+	}
+}
+
+static void __init efi_free_boot_services(void)
+{
+	void *p;
+
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		unsigned long long start = md->phys_addr;
+		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		free_bootmem_late(start, size);
+	}
+}
+
 void __init efi_init(void)
 {
 	efi_config_table_t *config_tables;
@@ -536,7 +570,9 @@ void __init efi_enter_virtual_mode(void)
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
 		size = md->num_pages << EFI_PAGE_SHIFT;
@@ -592,6 +628,13 @@ void __init efi_enter_virtual_mode(void)
 		panic("EFI call to SetVirtualAddressMap() failed!");
 	}
 
+	/*
+	 * Thankfully, it does seem that no runtime services other than
+	 * SetVirtualAddressMap() will touch boot services code, so we can
+	 * get rid of it all at this point
+	 */
+	efi_free_boot_services();
+
 	/*
 	 * Now that EFI is in virtual mode, update the function
 	 * pointers in the runtime service table to the new virtual addresses.
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2649426a7905..ac3aa54e2654 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -49,10 +49,11 @@ static void __init early_code_mapping_set_exec(int executable)
 	if (!(__supported_pte_mask & _PAGE_NX))
 		return;
 
-	/* Make EFI runtime service code area executable */
+	/* Make EFI service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+		    md->type == EFI_BOOT_SERVICES_CODE)
 			efi_set_executable(md, executable);
 	}
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 33fa1203024e..e376270cd26e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -299,6 +299,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern unsigned long efi_get_time(void);
 extern int efi_set_rtc_mmss(unsigned long nowtime);
+extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
 /**
-- 
cgit v1.2.3


From 2fc1b6f0d0a719e1e2a30bf076a3a799feaf6af2 Mon Sep 17 00:00:00 2001
From: liubo <liubo2009@cn.fujitsu.com>
Date: Tue, 19 Apr 2011 09:35:28 +0800
Subject: tracing: Add __print_symbolic_u64 to avoid warnings on 32bit machine

Filesystem, like Btrfs, has some "ULL" macros, and when these macros are passed
to tracepoints'__print_symbolic(), there will be 64->32 truncate WARNINGS during
compiling on 32bit box.

Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Link: http://lkml.kernel.org/r/4DACE6E0.7000507@cn.fujitsu.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 12 ++++++++++++
 include/trace/ftrace.h       | 13 +++++++++++++
 kernel/trace/trace_output.c  | 27 +++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b5a550a39a70..59d3ef100eb9 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -16,6 +16,11 @@ struct trace_print_flags {
 	const char		*name;
 };
 
+struct trace_print_flags_u64 {
+	unsigned long long	mask;
+	const char		*name;
+};
+
 const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 				   unsigned long flags,
 				   const struct trace_print_flags *flag_array);
@@ -23,6 +28,13 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 				     const struct trace_print_flags *symbol_array);
 
+#if BITS_PER_LONG == 32
+const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
+					 unsigned long long val,
+					 const struct trace_print_flags_u64
+								 *symbol_array);
+#endif
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 3e68366d485a..533c49f48047 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -205,6 +205,19 @@
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
+#undef __print_symbolic_u64
+#if BITS_PER_LONG == 32
+#define __print_symbolic_u64(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags_u64 symbols[] =	\
+			{ symbol_array, { -1, NULL } };			\
+		ftrace_print_symbols_seq_u64(p, value, symbols);	\
+	})
+#else
+#define __print_symbolic_u64(value, symbol_array...)			\
+			__print_symbolic(value, symbol_array)
+#endif
+
 #undef __print_hex
 #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len)
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cf535ccedc86..e37de492a9e1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
+#if BITS_PER_LONG == 32
+const char *
+ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
+			 const struct trace_print_flags_u64 *symbol_array)
+{
+	int i;
+	const char *ret = p->buffer + p->len;
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%llx", val);
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
+#endif
+
 const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
-- 
cgit v1.2.3


From b1cff0ad1062621ae63cb6c5dc4165191fe2e9f1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 25 May 2011 14:27:43 -0400
Subject: ftrace: Add internal recursive checks

Witold reported a reboot caused by the selftests of the dynamic function
tracer. He sent me a config and I used ktest to do a config_bisect on it
(as my config did not cause the crash). It pointed out that the problem
config was CONFIG_PROVE_RCU.

What happened was that if multiple callbacks are attached to the
function tracer, we iterate a list of callbacks. Because the list is
managed by synchronize_sched() and preempt_disable, the access to the
pointers uses rcu_dereference_raw().

When PROVE_RCU is enabled, the rcu_dereference_raw() calls some
debugging functions, which happen to be traced. The tracing of the debug
function would then call rcu_dereference_raw() which would then call the
debug function and then... well you get the idea.

I first wrote two different patches to solve this bug.

1) add a __rcu_dereference_raw() that would not do any checks.
2) add notrace to the offending debug functions.

Both of these patches worked.

Talking with Paul McKenney on IRC, he suggested to add recursion
detection instead. This seemed to be a better solution, so I decided to
implement it. As the task_struct already has a trace_recursion to detect
recursion in the ring buffer, and that has a very small number it
allows, I decided to use that same variable to add flags that can detect
the recursion inside the infrastructure of the function tracer.

I plan to change it so that the task struct bit can be checked in
mcount, but as that requires changes to all archs, I will hold that off
to the next merge window.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1306348063.1465.116.camel@gandalf.stny.rr.com
Reported-by: Witold Baryluk <baryluk@smp.if.uj.edu.pl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/sched.h      |  2 +-
 kernel/trace/ftrace.c      | 13 ++++++++++++-
 kernel/trace/ring_buffer.c | 10 +++++-----
 kernel/trace/trace.h       | 15 +++++++++++++++
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d8b2d0bec0d8..7b78d9cad471 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1513,7 +1513,7 @@ struct task_struct {
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-	/* bitmask of trace recursion */
+	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 25949b33057c..1ee417fcbfa5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
 static void ftrace_global_list_func(unsigned long ip,
 				    unsigned long parent_ip)
 {
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
+	struct ftrace_ops *op;
+
+	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
+		return;
 
+	trace_recursion_set(TRACE_GLOBAL_BIT);
+	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip);
 		op = rcu_dereference_raw(op->next); /*see above*/
 	};
+	trace_recursion_clear(TRACE_GLOBAL_BIT);
 }
 
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
@@ -3490,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
 	struct ftrace_ops *op;
 
+	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
+		return;
+
+	trace_recursion_set(TRACE_INTERNAL_BIT);
 	/*
 	 * Some of the ops may be dynamically allocated,
 	 * they must be freed after a synchronize_sched().
@@ -3502,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 		op = rcu_dereference_raw(op->next);
 	};
 	preempt_enable_notrace();
+	trace_recursion_clear(TRACE_INTERNAL_BIT);
 }
 
 static void clear_ftrace_swapper(void)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0ef7b4b2a1f7..b0c7aa407943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void)
 
 	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
 		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
-		    current->trace_recursion,
+		    trace_recursion_buffer(),
 		    hardirq_count() >> HARDIRQ_SHIFT,
 		    softirq_count() >> SOFTIRQ_SHIFT,
 		    in_nmi());
@@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void)
 
 static inline int trace_recursive_lock(void)
 {
-	current->trace_recursion++;
+	trace_recursion_inc();
 
-	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
 		return 0;
 
 	trace_recursive_fail();
@@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void)
 
 static inline void trace_recursive_unlock(void)
 {
-	WARN_ON_ONCE(!current->trace_recursion);
+	WARN_ON_ONCE(!trace_recursion_buffer());
 
-	current->trace_recursion--;
+	trace_recursion_dec();
 }
 
 #else
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b69c4bd306f..229f8591f61d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
 #include "trace_entries.h"
 
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT		(1<<11)
+#define TRACE_GLOBAL_BIT		(1<<12)
+
+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
+
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
cgit v1.2.3


From ce2a40458ebf9c2e47fa0806fec31f845bfcb9d5 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:13 +0000
Subject: Remove unused MSG_ flags in linux/smp.h

Now that powerpc has removed its use of MSG_ALL_BUT_SELF and MSG_ALL
all these MSG_ flags are unused.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/smp.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 74243c86ba39..7ad824d510a2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -98,16 +98,6 @@ void ipi_call_unlock_irq(void);
  */
 int on_each_cpu(smp_call_func_t func, void *info, int wait);
 
-#define MSG_ALL_BUT_SELF	0x8000	/* Assume <32768 CPU's */
-#define MSG_ALL			0x8001
-
-#define MSG_INVALIDATE_TLB	0x0001	/* Remote processor TLB invalidate */
-#define MSG_STOP_CPU		0x0002	/* Sent to shut down slave CPU's
-					 * when rebooting
-					 */
-#define MSG_RESCHEDULE		0x0003	/* Reschedule request from master CPU*/
-#define MSG_CALL_FUNCTION       0x0004  /* Call function on all other CPUs */
-
 /*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
-- 
cgit v1.2.3


From 7b7bf499f79de3f6c85a340c8453a78789523f85 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 May 2011 13:21:14 +0100
Subject: ARM: 6913/1: sparsemem: allow pfn_valid to be overridden when using
 SPARSEMEM

In commit eb33575c ("[ARM] Double check memmap is actually valid with a
memmap has unexpected holes V2"), a new function, memmap_valid_within,
was introduced to mmzone.h so that holes in the memmap which pass
pfn_valid in SPARSEMEM configurations can be detected and avoided.

The fix to this problem checks that the pfn <-> page linkages are
correct by calculating the page for the pfn and then checking that
page_to_pfn on that page returns the original pfn. Unfortunately, in
SPARSEMEM configurations, this results in reading from the page flags to
determine the correct section. Since the memmap here has been freed,
junk is read from memory and the check is no longer robust.

In the best case, reading from /proc/pagetypeinfo will give you the
wrong answer. In the worst case, you get SEGVs, Kernel OOPses and hung
CPUs. Furthermore, ioremap implementations that use pfn_valid to
disallow the remapping of normal memory will break.

This patch allows architectures to provide their own pfn_valid function
instead of using the default implementation used by sparsemem. The
architecture-specific version is aware of the memmap state and will
return false when passed a pfn for a freed page within a valid section.

Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig            | 3 +++
 arch/arm/include/asm/page.h | 2 +-
 arch/arm/mm/init.c          | 4 +++-
 include/linux/mmzone.h      | 2 ++
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 7275009686e6..5be55d950ab5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1516,6 +1516,9 @@ config ARCH_SPARSEMEM_DEFAULT
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool ARCH_SPARSEMEM_ENABLE
 
+config HAVE_ARCH_PFN_VALID
+	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+
 config HIGHMEM
 	bool "High Memory Support"
 	depends on MMU
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index f51a69595f6e..ac75d0848889 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -197,7 +197,7 @@ typedef unsigned long pgprot_t;
 
 typedef struct page *pgtable_t;
 
-#ifndef CONFIG_SPARSEMEM
+#ifdef CONFIG_HAVE_ARCH_PFN_VALID
 extern int pfn_valid(unsigned long);
 #endif
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 3f17ea146f0e..bbc3346e8bcd 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -273,13 +273,15 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
 	free_area_init_node(0, zone_size, min, zhole_size);
 }
 
-#ifndef CONFIG_SPARSEMEM
+#ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
 	return memblock_is_memory(pfn << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(pfn_valid);
+#endif
 
+#ifndef CONFIG_SPARSEMEM
 static void arm_memory_present(void)
 {
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 217bcf6bca77..261f299c9441 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1056,12 +1056,14 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 	return __nr_to_section(pfn_to_section_nr(pfn));
 }
 
+#ifndef CONFIG_HAVE_ARCH_PFN_VALID
 static inline int pfn_valid(unsigned long pfn)
 {
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
 	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
 }
+#endif
 
 static inline int pfn_present(unsigned long pfn)
 {
-- 
cgit v1.2.3


From 24da4fab5a617ecbf0f0c64e7ba7703383faa411 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 May 2011 00:23:34 +0200
Subject: vfs: Create __block_page_mkwrite() helper passing error values back

Create __block_page_mkwrite() helper which does all what block_page_mkwrite()
does except that it passes back errors from __block_write_begin /
block_commit_write calls.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 37 ++++++++++++++++++++-----------------
 include/linux/buffer_head.h | 14 ++++++++++++++
 2 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index a08bb8e61c6f..f6ad8f9b8fa5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2332,23 +2332,22 @@ EXPORT_SYMBOL(block_commit_write);
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
  */
-int
-block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-		   get_block_t get_block)
+int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+			 get_block_t get_block)
 {
 	struct page *page = vmf->page;
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 	unsigned long end;
 	loff_t size;
-	int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
+	int ret;
 
 	lock_page(page);
 	size = i_size_read(inode);
 	if ((page->mapping != inode->i_mapping) ||
 	    (page_offset(page) > size)) {
-		/* page got truncated out from underneath us */
-		unlock_page(page);
-		goto out;
+		/* We overload EFAULT to mean page got truncated */
+		ret = -EFAULT;
+		goto out_unlock;
 	}
 
 	/* page is wholly or partially inside EOF */
@@ -2361,18 +2360,22 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (!ret)
 		ret = block_commit_write(page, 0, end);
 
-	if (unlikely(ret)) {
-		unlock_page(page);
-		if (ret == -ENOMEM)
-			ret = VM_FAULT_OOM;
-		else /* -ENOSPC, -EIO, etc */
-			ret = VM_FAULT_SIGBUS;
-	} else
-		ret = VM_FAULT_LOCKED;
-
-out:
+	if (unlikely(ret < 0))
+		goto out_unlock;
+	return 0;
+out_unlock:
+	unlock_page(page);
 	return ret;
 }
+EXPORT_SYMBOL(__block_page_mkwrite);
+
+int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+		   get_block_t get_block)
+{
+	int ret = __block_page_mkwrite(vma, vmf, get_block);
+
+	return block_page_mkwrite_return(ret);
+}
 EXPORT_SYMBOL(block_page_mkwrite);
 
 /*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index f5df23561b96..2bf6a9136a94 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -217,8 +217,22 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
 			get_block_t *, loff_t *);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
+int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+				get_block_t get_block);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 				get_block_t get_block);
+/* Convert errno to return value from ->page_mkwrite() call */
+static inline int block_page_mkwrite_return(int err)
+{
+	if (err == 0)
+		return VM_FAULT_LOCKED;
+	if (err == -EFAULT)
+		return VM_FAULT_NOPAGE;
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
+	/* -ENOSPC, -EDQUOT, -EIO ... */
+	return VM_FAULT_SIGBUS;
+}
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned,
-- 
cgit v1.2.3


From ea13a86463fd0c26c2c209c53dc46b8eff81bad4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 May 2011 00:23:35 +0200
Subject: vfs: Block mmapped writes while the fs is frozen

We should not allow file modification via mmap while the filesystem is
frozen. So block in block_page_mkwrite() while the filesystem is frozen.
We cannot do the blocking wait in __block_page_mkwrite() since e.g. ext4
will want to call that function with transaction started in some cases
and that would deadlock. But we can at least do the non-blocking reliable
check in __block_page_mkwrite() which is the hardest part anyway.

We have to check for frozen filesystem with the page marked dirty and under
page lock with which we then return from ->page_mkwrite(). Only that way we
cannot race with writeback done by freezing code - either we mark the page
dirty after the writeback has started, see freezing in progress and block, or
writeback will wait for our page lock which is released only when the fault is
done and then writeback will writeout and writeprotect the page again.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 24 +++++++++++++++++++++++-
 include/linux/buffer_head.h |  2 ++
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index f6ad8f9b8fa5..b0675bfe8207 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2331,6 +2331,9 @@ EXPORT_SYMBOL(block_commit_write);
  * page lock we can determine safely if the page is beyond EOF. If it is not
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
+ *
+ * Direct callers of this function should call vfs_check_frozen() so that page
+ * fault does not busyloop until the fs is thawed.
  */
 int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 			 get_block_t get_block)
@@ -2362,6 +2365,18 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	if (unlikely(ret < 0))
 		goto out_unlock;
+	/*
+	 * Freezing in progress? We check after the page is marked dirty and
+	 * with page lock held so if the test here fails, we are sure freezing
+	 * code will wait during syncing until the page fault is done - at that
+	 * point page will be dirty and unlocked so freezing code will write it
+	 * and writeprotect it again.
+	 */
+	set_page_dirty(page);
+	if (inode->i_sb->s_frozen != SB_UNFROZEN) {
+		ret = -EAGAIN;
+		goto out_unlock;
+	}
 	return 0;
 out_unlock:
 	unlock_page(page);
@@ -2372,8 +2387,15 @@ EXPORT_SYMBOL(__block_page_mkwrite);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 		   get_block_t get_block)
 {
-	int ret = __block_page_mkwrite(vma, vmf, get_block);
+	int ret;
+	struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
 
+	/*
+	 * This check is racy but catches the common case. The check in
+	 * __block_page_mkwrite() is reliable.
+	 */
+	vfs_check_frozen(sb, SB_FREEZE_WRITE);
+	ret = __block_page_mkwrite(vma, vmf, get_block);
 	return block_page_mkwrite_return(ret);
 }
 EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 2bf6a9136a94..503c8a6b3079 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -230,6 +230,8 @@ static inline int block_page_mkwrite_return(int err)
 		return VM_FAULT_NOPAGE;
 	if (err == -ENOMEM)
 		return VM_FAULT_OOM;
+	if (err == -EAGAIN)
+		return VM_FAULT_RETRY;
 	/* -ENOSPC, -EDQUOT, -EIO ... */
 	return VM_FAULT_SIGBUS;
 }
-- 
cgit v1.2.3


From 9fdfdcf17151e8326c4d18cc133abc6e58f47568 Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Thu, 26 May 2011 10:01:19 -0600
Subject: fs: add field to superblock to support cleancache

This second patch of eight in this cleancache series adds a field to
the generic superblock to squirrel away a pool identifier that is
dynamically provided by cleancache-enabled filesystems at mount time
to uniquely identify files and pages belonging to this mounted filesystem.

Details and a FAQ can be found in Documentation/vm/cleancache.txt

[v8: trivial merge conflict update]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik Van Riel <riel@redhat.com>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Andreas Dilger <adilger@sun.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
Cc: Nitin Gupta <ngupta@vflare.org>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdf9495df204..0169ed3f106e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1429,6 +1429,11 @@ struct super_block {
 	 */
 	char __rcu *s_options;
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
+
+	/*
+	 * Saved pool identifier for cleancache (-1 means none)
+	 */
+	int cleancache_poolid;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3


From 077b1f83a69d94f2918630a882d74939baca0bce Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Thu, 26 May 2011 10:01:36 -0600
Subject: mm: cleancache core ops functions and config

This third patch of eight in this cleancache series provides
the core code for cleancache that interfaces between the hooks in
VFS and individual filesystems and a cleancache backend.  It also
includes build and config patches.

Two new files are added: mm/cleancache.c and include/linux/cleancache.h.

Note that CONFIG_CLEANCACHE can default to on; in systems that do
not provide a cleancache backend, all hooks devolve to a simple
check of a global enable flag, so performance impact should
be negligible but can be reduced to zero impact if config'ed off.
However for this first commit, it defaults to off.

Details and a FAQ can be found in Documentation/vm/cleancache.txt

Credits: Cleancache_ops design derived from Jeremy Fitzhardinge
design for tmem

[v8: dan.magenheimer@oracle.com: fix exportfs call affecting btrfs]
[v8: akpm@linux-foundation.org: use static inline function, not macro]
[v7: dan.magenheimer@oracle.com: cleanup sysfs and remove cleancache prefix]
[v6: JBeulich@novell.com: robustly handle buggy fs encode_fh actor definition]
[v5: jeremy@goop.org: clean up global usage and static var names]
[v5: jeremy@goop.org: simplify init hook and any future fs init changes]
[v5: hch@infradead.org: cleaner non-global interface for ops registration]
[v4: adilger@sun.com: interface must support exportfs FS's]
[v4: hch@infradead.org: interface must support 64-bit FS on 32-bit kernel]
[v3: akpm@linux-foundation.org: use one ops struct to avoid pointer hops]
[v3: akpm@linux-foundation.org: document and ensure PageLocked reqts are met]
[v3: ngupta@vflare.org: fix success/fail codes, change funcs to void]
[v2: viro@ZenIV.linux.org.uk: use sane types]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Nitin Gupta <ngupta@vflare.org>
Acked-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Acked-by: Jan Beulich <JBeulich@novell.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik Van Riel <riel@redhat.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
---
 include/linux/cleancache.h | 122 +++++++++++++++++++++++
 mm/Kconfig                 |  23 +++++
 mm/Makefile                |   1 +
 mm/cleancache.c            | 244 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 390 insertions(+)
 create mode 100644 include/linux/cleancache.h
 create mode 100644 mm/cleancache.c

(limited to 'include/linux')

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
new file mode 100644
index 000000000000..04ffb2e6c9d0
--- /dev/null
+++ b/include/linux/cleancache.h
@@ -0,0 +1,122 @@
+#ifndef _LINUX_CLEANCACHE_H
+#define _LINUX_CLEANCACHE_H
+
+#include <linux/fs.h>
+#include <linux/exportfs.h>
+#include <linux/mm.h>
+
+#define CLEANCACHE_KEY_MAX 6
+
+/*
+ * cleancache requires every file with a page in cleancache to have a
+ * unique key unless/until the file is removed/truncated.  For some
+ * filesystems, the inode number is unique, but for "modern" filesystems
+ * an exportable filehandle is required (see exportfs.h)
+ */
+struct cleancache_filekey {
+	union {
+		ino_t ino;
+		__u32 fh[CLEANCACHE_KEY_MAX];
+		u32 key[CLEANCACHE_KEY_MAX];
+	} u;
+};
+
+struct cleancache_ops {
+	int (*init_fs)(size_t);
+	int (*init_shared_fs)(char *uuid, size_t);
+	int (*get_page)(int, struct cleancache_filekey,
+			pgoff_t, struct page *);
+	void (*put_page)(int, struct cleancache_filekey,
+			pgoff_t, struct page *);
+	void (*flush_page)(int, struct cleancache_filekey, pgoff_t);
+	void (*flush_inode)(int, struct cleancache_filekey);
+	void (*flush_fs)(int);
+};
+
+extern struct cleancache_ops
+	cleancache_register_ops(struct cleancache_ops *ops);
+extern void __cleancache_init_fs(struct super_block *);
+extern void __cleancache_init_shared_fs(char *, struct super_block *);
+extern int  __cleancache_get_page(struct page *);
+extern void __cleancache_put_page(struct page *);
+extern void __cleancache_flush_page(struct address_space *, struct page *);
+extern void __cleancache_flush_inode(struct address_space *);
+extern void __cleancache_flush_fs(struct super_block *);
+extern int cleancache_enabled;
+
+#ifdef CONFIG_CLEANCACHE
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return page->mapping->host->i_sb->cleancache_poolid >= 0;
+}
+static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
+{
+	return mapping->host->i_sb->cleancache_poolid >= 0;
+}
+#else
+#define cleancache_enabled (0)
+#define cleancache_fs_enabled(_page) (0)
+#define cleancache_fs_enabled_mapping(_page) (0)
+#endif
+
+/*
+ * The shim layer provided by these inline functions allows the compiler
+ * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
+ * is disabled, to a single global variable check if CONFIG_CLEANCACHE
+ * is enabled but no cleancache "backend" has dynamically enabled it,
+ * and, for the most frequent cleancache ops, to a single global variable
+ * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
+ * and a cleancache backend has dynamically enabled cleancache, but the
+ * filesystem referenced by that cleancache op has not enabled cleancache.
+ * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
+ * no measurable performance impact.
+ */
+
+static inline void cleancache_init_fs(struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_init_fs(sb);
+}
+
+static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_init_shared_fs(uuid, sb);
+}
+
+static inline int cleancache_get_page(struct page *page)
+{
+	int ret = -1;
+
+	if (cleancache_enabled && cleancache_fs_enabled(page))
+		ret = __cleancache_get_page(page);
+	return ret;
+}
+
+static inline void cleancache_put_page(struct page *page)
+{
+	if (cleancache_enabled && cleancache_fs_enabled(page))
+		__cleancache_put_page(page);
+}
+
+static inline void cleancache_flush_page(struct address_space *mapping,
+					struct page *page)
+{
+	/* careful... page->mapping is NULL sometimes when this is called */
+	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
+		__cleancache_flush_page(mapping, page);
+}
+
+static inline void cleancache_flush_inode(struct address_space *mapping)
+{
+	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
+		__cleancache_flush_inode(mapping);
+}
+
+static inline void cleancache_flush_fs(struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_flush_fs(sb);
+}
+
+#endif /* _LINUX_CLEANCACHE_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index e9c0c61f2ddd..8ca47a5ee9c8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
 	depends on !SMP
 	bool
 	default y
+
+config CLEANCACHE
+	bool "Enable cleancache driver to cache clean pages if tmem is present"
+	default n
+	help
+	  Cleancache can be thought of as a page-granularity victim cache
+	  for clean pages that the kernel's pageframe replacement algorithm
+	  (PFRA) would like to keep around, but can't since there isn't enough
+	  memory.  So when the PFRA "evicts" a page, it first attempts to use
+	  cleancacne code to put the data contained in that page into
+	  "transcendent memory", memory that is not directly accessible or
+	  addressable by the kernel and is of unknown and possibly
+	  time-varying size.  And when a cleancache-enabled
+	  filesystem wishes to access a page in a file on disk, it first
+	  checks cleancache to see if it already contains it; if it does,
+	  the page is copied into the kernel and a disk access is avoided.
+	  When a transcendent memory driver is available (such as zcache or
+	  Xen transcendent memory), a significant I/O reduction
+	  may be achieved.  When none is available, all cleancache calls
+	  are reduced to a single pointer-compare-against-NULL resulting
+	  in a negligible performance hit.
+
+	  If unsure, say Y to enable cleancache
diff --git a/mm/Makefile b/mm/Makefile
index 42a8326c3e3d..836e4163c1bf 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_CLEANCACHE) += cleancache.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
new file mode 100644
index 000000000000..bcaae4c2a770
--- /dev/null
+++ b/mm/cleancache.c
@@ -0,0 +1,244 @@
+/*
+ * Cleancache frontend
+ *
+ * This code provides the generic "frontend" layer to call a matching
+ * "backend" driver implementation of cleancache.  See
+ * Documentation/vm/cleancache.txt for more information.
+ *
+ * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
+ * Author: Dan Magenheimer
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/exportfs.h>
+#include <linux/mm.h>
+#include <linux/cleancache.h>
+
+/*
+ * This global enablement flag may be read thousands of times per second
+ * by cleancache_get/put/flush even on systems where cleancache_ops
+ * is not claimed (e.g. cleancache is config'ed on but remains
+ * disabled), so is preferred to the slower alternative: a function
+ * call that checks a non-global.
+ */
+int cleancache_enabled;
+EXPORT_SYMBOL(cleancache_enabled);
+
+/*
+ * cleancache_ops is set by cleancache_ops_register to contain the pointers
+ * to the cleancache "backend" implementation functions.
+ */
+static struct cleancache_ops cleancache_ops;
+
+/* useful stats available in /sys/kernel/mm/cleancache */
+static unsigned long cleancache_succ_gets;
+static unsigned long cleancache_failed_gets;
+static unsigned long cleancache_puts;
+static unsigned long cleancache_flushes;
+
+/*
+ * register operations for cleancache, returning previous thus allowing
+ * detection of multiple backends and possible nesting
+ */
+struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
+{
+	struct cleancache_ops old = cleancache_ops;
+
+	cleancache_ops = *ops;
+	cleancache_enabled = 1;
+	return old;
+}
+EXPORT_SYMBOL(cleancache_register_ops);
+
+/* Called by a cleancache-enabled filesystem at time of mount */
+void __cleancache_init_fs(struct super_block *sb)
+{
+	sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
+}
+EXPORT_SYMBOL(__cleancache_init_fs);
+
+/* Called by a cleancache-enabled clustered filesystem at time of mount */
+void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
+{
+	sb->cleancache_poolid =
+		(*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
+}
+EXPORT_SYMBOL(__cleancache_init_shared_fs);
+
+/*
+ * If the filesystem uses exportable filehandles, use the filehandle as
+ * the key, else use the inode number.
+ */
+static int cleancache_get_key(struct inode *inode,
+			      struct cleancache_filekey *key)
+{
+	int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
+	int len = 0, maxlen = CLEANCACHE_KEY_MAX;
+	struct super_block *sb = inode->i_sb;
+
+	key->u.ino = inode->i_ino;
+	if (sb->s_export_op != NULL) {
+		fhfn = sb->s_export_op->encode_fh;
+		if  (fhfn) {
+			struct dentry d;
+			d.d_inode = inode;
+			len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
+			if (len <= 0 || len == 255)
+				return -1;
+			if (maxlen > CLEANCACHE_KEY_MAX)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * "Get" data from cleancache associated with the poolid/inode/index
+ * that were specified when the data was put to cleanache and, if
+ * successful, use it to fill the specified page with data and return 0.
+ * The pageframe is unchanged and returns -1 if the get fails.
+ * Page must be locked by caller.
+ */
+int __cleancache_get_page(struct page *page)
+{
+	int ret = -1;
+	int pool_id;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	VM_BUG_ON(!PageLocked(page));
+	pool_id = page->mapping->host->i_sb->cleancache_poolid;
+	if (pool_id < 0)
+		goto out;
+
+	if (cleancache_get_key(page->mapping->host, &key) < 0)
+		goto out;
+
+	ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
+	if (ret == 0)
+		cleancache_succ_gets++;
+	else
+		cleancache_failed_gets++;
+out:
+	return ret;
+}
+EXPORT_SYMBOL(__cleancache_get_page);
+
+/*
+ * "Put" data from a page to cleancache and associate it with the
+ * (previously-obtained per-filesystem) poolid and the page's,
+ * inode and page index.  Page must be locked.  Note that a put_page
+ * always "succeeds", though a subsequent get_page may succeed or fail.
+ */
+void __cleancache_put_page(struct page *page)
+{
+	int pool_id;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	VM_BUG_ON(!PageLocked(page));
+	pool_id = page->mapping->host->i_sb->cleancache_poolid;
+	if (pool_id >= 0 &&
+	      cleancache_get_key(page->mapping->host, &key) >= 0) {
+		(*cleancache_ops.put_page)(pool_id, key, page->index, page);
+		cleancache_puts++;
+	}
+}
+EXPORT_SYMBOL(__cleancache_put_page);
+
+/*
+ * Flush any data from cleancache associated with the poolid and the
+ * page's inode and page index so that a subsequent "get" will fail.
+ */
+void __cleancache_flush_page(struct address_space *mapping, struct page *page)
+{
+	/* careful... page->mapping is NULL sometimes when this is called */
+	int pool_id = mapping->host->i_sb->cleancache_poolid;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (pool_id >= 0) {
+		VM_BUG_ON(!PageLocked(page));
+		if (cleancache_get_key(mapping->host, &key) >= 0) {
+			(*cleancache_ops.flush_page)(pool_id, key, page->index);
+			cleancache_flushes++;
+		}
+	}
+}
+EXPORT_SYMBOL(__cleancache_flush_page);
+
+/*
+ * Flush all data from cleancache associated with the poolid and the
+ * mappings's inode so that all subsequent gets to this poolid/inode
+ * will fail.
+ */
+void __cleancache_flush_inode(struct address_space *mapping)
+{
+	int pool_id = mapping->host->i_sb->cleancache_poolid;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
+		(*cleancache_ops.flush_inode)(pool_id, key);
+}
+EXPORT_SYMBOL(__cleancache_flush_inode);
+
+/*
+ * Called by any cleancache-enabled filesystem at time of unmount;
+ * note that pool_id is surrendered and may be reutrned by a subsequent
+ * cleancache_init_fs or cleancache_init_shared_fs
+ */
+void __cleancache_flush_fs(struct super_block *sb)
+{
+	if (sb->cleancache_poolid >= 0) {
+		int old_poolid = sb->cleancache_poolid;
+		sb->cleancache_poolid = -1;
+		(*cleancache_ops.flush_fs)(old_poolid);
+	}
+}
+EXPORT_SYMBOL(__cleancache_flush_fs);
+
+#ifdef CONFIG_SYSFS
+
+/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
+
+#define CLEANCACHE_SYSFS_RO(_name) \
+	static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
+				struct kobj_attribute *attr, char *buf) \
+	{ \
+		return sprintf(buf, "%lu\n", cleancache_##_name); \
+	} \
+	static struct kobj_attribute cleancache_##_name##_attr = { \
+		.attr = { .name = __stringify(_name), .mode = 0444 }, \
+		.show = cleancache_##_name##_show, \
+	}
+
+CLEANCACHE_SYSFS_RO(succ_gets);
+CLEANCACHE_SYSFS_RO(failed_gets);
+CLEANCACHE_SYSFS_RO(puts);
+CLEANCACHE_SYSFS_RO(flushes);
+
+static struct attribute *cleancache_attrs[] = {
+	&cleancache_succ_gets_attr.attr,
+	&cleancache_failed_gets_attr.attr,
+	&cleancache_puts_attr.attr,
+	&cleancache_flushes_attr.attr,
+	NULL,
+};
+
+static struct attribute_group cleancache_attr_group = {
+	.attrs = cleancache_attrs,
+	.name = "cleancache",
+};
+
+#endif /* CONFIG_SYSFS */
+
+static int __init init_cleancache(void)
+{
+#ifdef CONFIG_SYSFS
+	int err;
+
+	err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
+#endif /* CONFIG_SYSFS */
+	return 0;
+}
+module_init(init_cleancache)
-- 
cgit v1.2.3


From ca16d140af91febe25daeb9e032bf8bd46b8c31f Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 26 May 2011 19:16:19 +0900
Subject: mm: don't access vm_flags as 'int'

The type of vma->vm_flags is 'unsigned long'. Neither 'int' nor
'unsigned int'. This patch fixes such misuse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
[ Changed to use a typedef - we'll extend it to cover more cases
  later, since there has been discussion about making it a 64-bit
  type..                      - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c           | 3 ++-
 fs/proc/task_mmu.c             | 2 +-
 include/linux/hugetlb.h        | 6 +++---
 include/linux/hugetlb_inline.h | 2 +-
 include/linux/mm.h             | 6 +++---
 include/linux/mm_types.h       | 4 +++-
 ipc/shm.c                      | 2 +-
 mm/fremap.c                    | 2 +-
 mm/hugetlb.c                   | 4 ++--
 mm/memory.c                    | 2 +-
 mm/mlock.c                     | 8 ++++----
 mm/mmap.c                      | 8 ++++----
 12 files changed, 26 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e7a035781b7d..7aafeb8fa300 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void)
 	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
 }
 
-struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
+struct file *hugetlb_file_setup(const char *name, size_t size,
+				vm_flags_t acctflag,
 				struct user_struct **user, int creat_flags)
 {
 	int error = -ENOMEM;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2c9db29ea358..db15935fa757 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -211,7 +211,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
-	int flags = vma->vm_flags;
+	vm_flags_t flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
 	unsigned long start, end;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 943c76b3d4bb..cf8931e1dd9d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -41,7 +41,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
-						int acctflags);
+						vm_flags_t vm_flags);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 void copy_huge_page(struct page *dst, struct page *src);
@@ -168,7 +168,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
-struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
+struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
 				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
@@ -192,7 +192,7 @@ static inline void set_file_hugepages(struct file *file)
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
 static inline struct file *hugetlb_file_setup(const char *name, size_t size,
-		int acctflag, struct user_struct **user, int creat_flags)
+		vm_flags_t acctflag, struct user_struct **user, int creat_flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 6931489a5c14..2bb681fbeb35 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -7,7 +7,7 @@
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
-	return vma->vm_flags & VM_HUGETLB;
+	return !!(vma->vm_flags & VM_HUGETLB);
 }
 
 #else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8eb969ebf904..fb8e814f78dc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -165,12 +165,12 @@ extern pgprot_t protection_map[16];
  */
 static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
 {
-	return (vma->vm_flags & VM_PFN_AT_MMAP);
+	return !!(vma->vm_flags & VM_PFN_AT_MMAP);
 }
 
 static inline int is_pfn_mapping(struct vm_area_struct *vma)
 {
-	return (vma->vm_flags & VM_PFNMAP);
+	return !!(vma->vm_flags & VM_PFNMAP);
 }
 
 /*
@@ -1432,7 +1432,7 @@ extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long flag, unsigned long pgoff);
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long flags,
-	unsigned int vm_flags, unsigned long pgoff);
+	vm_flags_t vm_flags, unsigned long pgoff);
 
 static inline unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 071d459e866b..6fe96c19f85e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -102,6 +102,8 @@ struct page {
 #endif
 };
 
+typedef unsigned long __nocast vm_flags_t;
+
 /*
  * A region containing a mapping of a non-memory backed file under NOMMU
  * conditions.  These are held in a global tree and are pinned by the VMAs that
@@ -109,7 +111,7 @@ struct page {
  */
 struct vm_region {
 	struct rb_node	vm_rb;		/* link in global region tree */
-	unsigned long	vm_flags;	/* VMA vm_flags */
+	vm_flags_t	vm_flags;	/* VMA vm_flags */
 	unsigned long	vm_start;	/* start address of region */
 	unsigned long	vm_end;		/* region initialised to here */
 	unsigned long	vm_top;		/* region allocated to here */
diff --git a/ipc/shm.c b/ipc/shm.c
index 729acb7e3148..ab3385a21b27 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -347,7 +347,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	struct file * file;
 	char name[13];
 	int id;
-	int acctflag = 0;
+	vm_flags_t acctflag = 0;
 
 	if (size < SHMMIN || size > ns->shm_ctlmax)
 		return -EINVAL;
diff --git a/mm/fremap.c b/mm/fremap.c
index 7f4123056e06..b8e0e2d468af 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -224,7 +224,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		/*
 		 * drop PG_Mlocked flag for over-mapped range
 		 */
-		unsigned int saved_flags = vma->vm_flags;
+		vm_flags_t saved_flags = vma->vm_flags;
 		munlock_vma_pages_range(vma, start, start + size);
 		vma->vm_flags = saved_flags;
 	}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5fd68b95c671..f33bb319b73f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2833,7 +2833,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 int hugetlb_reserve_pages(struct inode *inode,
 					long from, long to,
 					struct vm_area_struct *vma,
-					int acctflag)
+					vm_flags_t vm_flags)
 {
 	long ret, chg;
 	struct hstate *h = hstate_inode(inode);
@@ -2843,7 +2843,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * attempt will be made for VM_NORESERVE to allocate a page
 	 * and filesystem quota without using reserves
 	 */
-	if (acctflag & VM_NORESERVE)
+	if (vm_flags & VM_NORESERVE)
 		return 0;
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index b73f677f0bb1..fc24f7d788bd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -730,7 +730,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 	add_taint(TAINT_BAD_PAGE);
 }
 
-static inline int is_cow_mapping(unsigned int flags)
+static inline int is_cow_mapping(vm_flags_t flags)
 {
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
diff --git a/mm/mlock.c b/mm/mlock.c
index 516b2c2ddd5a..048260c4e02e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -307,13 +307,13 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
  * For vmas that pass the filters, merge/split as appropriate.
  */
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
-	unsigned long start, unsigned long end, unsigned int newflags)
+	unsigned long start, unsigned long end, vm_flags_t newflags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgoff_t pgoff;
 	int nr_pages;
 	int ret = 0;
-	int lock = newflags & VM_LOCKED;
+	int lock = !!(newflags & VM_LOCKED);
 
 	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
 	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
@@ -385,7 +385,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
 		prev = vma;
 
 	for (nstart = start ; ; ) {
-		unsigned int newflags;
+		vm_flags_t newflags;
 
 		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
 
@@ -524,7 +524,7 @@ static int do_mlockall(int flags)
 		goto out;
 
 	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
-		unsigned int newflags;
+		vm_flags_t newflags;
 
 		newflags = vma->vm_flags | VM_LOCKED;
 		if (!(flags & MCL_CURRENT))
diff --git a/mm/mmap.c b/mm/mmap.c
index ac2631b7477f..bbdc9af5e117 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -960,7 +960,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 {
 	struct mm_struct * mm = current->mm;
 	struct inode *inode;
-	unsigned int vm_flags;
+	vm_flags_t vm_flags;
 	int error;
 	unsigned long reqprot = prot;
 
@@ -1165,7 +1165,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
  */
 int vma_wants_writenotify(struct vm_area_struct *vma)
 {
-	unsigned int vm_flags = vma->vm_flags;
+	vm_flags_t vm_flags = vma->vm_flags;
 
 	/* If it was private or non-writable, the write bit is already clear */
 	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
@@ -1193,7 +1193,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
  * We account for memory if it's a private writeable mapping,
  * not hugepages and VM_NORESERVE wasn't set.
  */
-static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
+static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 {
 	/*
 	 * hugetlb has its own accounting separate from the core VM
@@ -1207,7 +1207,7 @@ static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
 			  unsigned long len, unsigned long flags,
-			  unsigned int vm_flags, unsigned long pgoff)
+			  vm_flags_t vm_flags, unsigned long pgoff)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev;
-- 
cgit v1.2.3


From 249ddc79a38a8918ad53ac22606ca8af694344a5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 24 May 2011 10:20:17 +0200
Subject: netfilter: ipset: Use proper timeout value to jiffies conversion

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 9f30c5f2ec1c..bcdd40ad39ed 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -45,7 +45,7 @@ ip_set_timeout_test(unsigned long timeout)
 {
 	return timeout != IPSET_ELEM_UNSET &&
 	       (timeout == IPSET_ELEM_PERMANENT ||
-		time_after(timeout, jiffies));
+		time_is_after_jiffies(timeout));
 }
 
 static inline bool
@@ -53,7 +53,7 @@ ip_set_timeout_expired(unsigned long timeout)
 {
 	return timeout != IPSET_ELEM_UNSET &&
 	       timeout != IPSET_ELEM_PERMANENT &&
-	       time_before(timeout, jiffies);
+	       time_is_before_jiffies(timeout);
 }
 
 static inline unsigned long
@@ -64,7 +64,7 @@ ip_set_timeout_set(u32 timeout)
 	if (!timeout)
 		return IPSET_ELEM_PERMANENT;
 
-	t = timeout * HZ + jiffies;
+	t = msecs_to_jiffies(timeout * 1000) + jiffies;
 	if (t == IPSET_ELEM_UNSET || t == IPSET_ELEM_PERMANENT)
 		/* Bingo! */
 		t++;
@@ -75,7 +75,8 @@ ip_set_timeout_set(u32 timeout)
 static inline u32
 ip_set_timeout_get(unsigned long timeout)
 {
-	return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ;
+	return timeout == IPSET_ELEM_PERMANENT ? 0 : 
+		jiffies_to_msecs(timeout - jiffies)/1000;
 }
 
 #else
@@ -89,14 +90,14 @@ static inline bool
 ip_set_timeout_test(unsigned long timeout)
 {
 	return timeout == IPSET_ELEM_PERMANENT ||
-	       time_after(timeout, jiffies);
+	       time_is_after_jiffies(timeout);
 }
 
 static inline bool
 ip_set_timeout_expired(unsigned long timeout)
 {
 	return timeout != IPSET_ELEM_PERMANENT &&
-	       time_before(timeout, jiffies);
+	       time_is_before_jiffies(timeout);
 }
 
 static inline unsigned long
@@ -107,7 +108,7 @@ ip_set_timeout_set(u32 timeout)
 	if (!timeout)
 		return IPSET_ELEM_PERMANENT;
 
-	t = timeout * HZ + jiffies;
+	t = msecs_to_jiffies(timeout * 1000) + jiffies;
 	if (t == IPSET_ELEM_PERMANENT)
 		/* Bingo! :-) */
 		t++;
@@ -118,7 +119,8 @@ ip_set_timeout_set(u32 timeout)
 static inline u32
 ip_set_timeout_get(unsigned long timeout)
 {
-	return timeout == IPSET_ELEM_PERMANENT ? 0 : (timeout - jiffies)/HZ;
+	return timeout == IPSET_ELEM_PERMANENT ? 0 :
+		jiffies_to_msecs(timeout - jiffies)/1000;
 }
 #endif /* ! IP_SET_BITMAP_TIMEOUT */
 
-- 
cgit v1.2.3


From b141c242ff978b63cdf0f3d1a767a5152750166b Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 24 May 2011 10:20:18 +0200
Subject: netfilter: ipset: remove unused variable from type_pf_tdel()

Variable 'ret' is set in type_pf_tdel() but not used, remove.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_ahash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h
index a0196ac79051..ac3c822eb39a 100644
--- a/include/linux/netfilter/ipset/ip_set_ahash.h
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -839,7 +839,7 @@ type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
 	struct htable *t = h->table;
 	const struct type_pf_elem *d = value;
 	struct hbucket *n;
-	int i, ret = 0;
+	int i;
 	struct type_pf_elem *data;
 	u32 key;
 
@@ -850,7 +850,7 @@ type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
 		if (!type_pf_data_equal(data, d))
 			continue;
 		if (type_pf_data_expired(data))
-			ret = -IPSET_ERR_EXIST;
+			return -IPSET_ERR_EXIST;
 		if (i != n->pos - 1)
 			/* Not last one */
 			type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
-- 
cgit v1.2.3


From eb8956074e7652e802be5f078080c704c2c87104 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 6 Apr 2011 16:52:52 +0200
Subject: mfd: Add platform data pointer back

Now that we have a way to pass MFD cells down to the sub drivers,
we can gradually get rid of mfd_data by putting the platform pointer
back in place.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mfd-core.c   | 7 +++++++
 include/linux/mfd/core.h | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index f4c8c844b913..0902523af62d 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -88,6 +88,13 @@ static int mfd_add_device(struct device *parent, int id,
 
 	pdev->dev.parent = parent;
 
+	if (cell->pdata_size) {
+		ret = platform_device_add_data(pdev,
+					cell->platform_data, cell->pdata_size);
+		if (ret)
+			goto fail_res;
+	}
+
 	ret = mfd_platform_add_cell(pdev, cell);
 	if (ret)
 		goto fail_res;
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index aef23309a742..68c13e52a50c 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -36,6 +36,10 @@ struct mfd_cell {
 	/* mfd_data can be used to pass data to client drivers */
 	void			*mfd_data;
 
+	/* platform data passed to the sub devices drivers */
+	void			*platform_data;
+	size_t			pdata_size;
+
 	/*
 	 * These resources can be specified relative to the parent device.
 	 * For accessing hardware you should use resources from the platform dev
-- 
cgit v1.2.3


From 1f235a3785dbcfb324ff228048b859a3bd1e1a59 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 6 Apr 2011 00:20:24 +0200
Subject: mfd: Use mfd cell platform_data for ab3550 cells platform bits

With the addition of a platform device mfd_cell pointer, MFD drivers
can go back to passing platform data back to their sub drivers.
This allows for an mfd_cell->mfd_data removal and thus keep the sub drivers
MFD agnostic. This is mostly needed for non MFD aware sub drivers.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3550-core.c  | 6 ++++--
 include/linux/mfd/abx500.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab3550-core.c b/drivers/mfd/ab3550-core.c
index ff86acf3e6bd..3d7dce671b93 100644
--- a/drivers/mfd/ab3550-core.c
+++ b/drivers/mfd/ab3550-core.c
@@ -1320,8 +1320,10 @@ static int __init ab3550_probe(struct i2c_client *client,
 		goto exit_no_ops;
 
 	/* Set up and register the platform devices. */
-	for (i = 0; i < AB3550_NUM_DEVICES; i++)
-		ab3550_devs[i].mfd_data = ab3550_plf_data->dev_data[i];
+	for (i = 0; i < AB3550_NUM_DEVICES; i++) {
+		ab3550_devs[i].platform_data = ab3550_plf_data->dev_data[i];
+		ab3550_devs[i].pdata_size = ab3550_plf_data->dev_data_sz[i];
+	}
 
 	err = mfd_add_devices(&client->dev, 0, ab3550_devs,
 		ARRAY_SIZE(ab3550_devs), NULL,
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 7d9b6ae1c203..67bd6f7ecf32 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -186,6 +186,7 @@ struct abx500_init_settings {
 struct ab3550_platform_data {
 	struct {unsigned int base; unsigned int count; } irq;
 	void *dev_data[AB3550_NUM_DEVICES];
+	size_t dev_data_sz[AB3550_NUM_DEVICES];
 	struct abx500_init_settings *init_settings;
 	unsigned int init_settings_sz;
 };
-- 
cgit v1.2.3


From ba279f58c6148c1dc76265da98eb292e76c15a32 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Fri, 8 Apr 2011 02:04:50 +0200
Subject: mfd: Remove mfd_data

Cell pointers are passed through device->mfd_cell and platform data
is passed through the MFD cell platform_data pointer.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/core.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 68c13e52a50c..4e76163dd862 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -33,9 +33,6 @@ struct mfd_cell {
 	int			(*suspend)(struct platform_device *dev);
 	int			(*resume)(struct platform_device *dev);
 
-	/* mfd_data can be used to pass data to client drivers */
-	void			*mfd_data;
-
 	/* platform data passed to the sub devices drivers */
 	void			*platform_data;
 	size_t			pdata_size;
@@ -93,24 +90,6 @@ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 	return pdev->mfd_cell;
 }
 
-/*
- * Given a platform device that's been created by mfd_add_devices(), fetch
- * the .mfd_data entry from the mfd_cell that created it.
- * Otherwise just return the platform_data pointer.
- * This maintains compatibility with platform drivers whose devices aren't
- * created by the mfd layer, and expect platform_data to contain what would've
- * otherwise been in mfd_data.
- */
-static inline void *mfd_get_data(struct platform_device *pdev)
-{
-	const struct mfd_cell *cell = mfd_get_cell(pdev);
-
-	if (cell)
-		return cell->mfd_data;
-	else
-		return pdev->dev.platform_data;
-}
-
 extern int mfd_add_devices(struct device *parent, int id,
 			   struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
-- 
cgit v1.2.3


From 8997619a045bef5d138f0f45141a398557f809e6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Apr 2011 11:04:12 +0900
Subject: mfd: Remove compatibility interface for WM831x specific IRQ API

The last user was removed in the merge window.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm831x/core.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 903280d21866..0d515ee1c247 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -301,30 +301,4 @@ int wm831x_device_suspend(struct wm831x *wm831x);
 int wm831x_irq_init(struct wm831x *wm831x, int irq);
 void wm831x_irq_exit(struct wm831x *wm831x);
 
-static inline int __must_check wm831x_request_irq(struct wm831x *wm831x,
-						  unsigned int irq,
-						  irq_handler_t handler,
-						  unsigned long flags,
-						  const char *name,
-						  void *dev)
-{
-	return request_threaded_irq(irq, NULL, handler, flags, name, dev);
-}
-
-static inline void wm831x_free_irq(struct wm831x *wm831x,
-				   unsigned int irq, void *dev)
-{
-	free_irq(irq, dev);
-}
-
-static inline void wm831x_disable_irq(struct wm831x *wm831x, int irq)
-{
-	disable_irq(irq);
-}
-
-static inline void wm831x_enable_irq(struct wm831x *wm831x, int irq)
-{
-	enable_irq(irq);
-}
-
 #endif
-- 
cgit v1.2.3


From 0b14c22ea1e0226d894df76176971d06e8886aa7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Apr 2011 11:04:42 +0900
Subject: mfd: Provide platform data for WM831x GPIO configuration

Allow the GPIO mode of WM831x devices to be configured using platform data.
Users may provide a table of GPIO register values in gpio_defaults[]. In
order to allow 0 to be set explicitly out of range values are accepted and
masked off, with a WM831X_GPIO_CONFIGURE define provided to set an out of
range value.

This can be used to configure higher numbered GPIOs or override values set
in OTP for GPIOs configured using OTP.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        | 13 ++++++++++++-
 include/linux/mfd/wm831x/pdata.h |  4 ++++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 3fe9a58fe6c7..265f75fc6a25 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1442,7 +1442,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	int rev;
 	enum wm831x_parent parent;
-	int ret;
+	int ret, i;
 
 	mutex_init(&wm831x->io_lock);
 	mutex_init(&wm831x->key_lock);
@@ -1581,6 +1581,17 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		}
 	}
 
+	if (pdata) {
+		for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
+			if (!pdata->gpio_defaults[i])
+				continue;
+
+			wm831x_reg_write(wm831x,
+					 WM831X_GPIO1_CONTROL + i,
+					 pdata->gpio_defaults[i] & 0xffff);
+		}
+	}
+
 	ret = wm831x_irq_init(wm831x, irq);
 	if (ret != 0)
 		goto err;
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 632d1567a1b6..ff42d700293f 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -105,6 +105,9 @@ struct wm831x_watchdog_pdata {
 #define WM831X_MAX_LDO    11
 #define WM831X_MAX_ISINK  2
 
+#define WM831X_GPIO_CONFIGURE 0x10000
+#define WM831X_GPIO_NUM 16
+
 struct wm831x_pdata {
 	/** Used to distinguish multiple WM831x chips */
 	int wm831x_num;
@@ -119,6 +122,7 @@ struct wm831x_pdata {
 
 	int irq_base;
 	int gpio_base;
+	int gpio_defaults[WM831X_GPIO_NUM];
 	struct wm831x_backlight_pdata *backlight;
 	struct wm831x_backup_pdata *backup;
 	struct wm831x_battery_pdata *battery;
-- 
cgit v1.2.3


From d7ac829fa30d44d6553a0ead41f47bb92ee4d73e Mon Sep 17 00:00:00 2001
From: Lesly A M <leslyam@ti.com>
Date: Thu, 14 Apr 2011 17:57:51 +0530
Subject: mfd: Modifying the twl4030-power macro name Main_Ref to all caps

Modifying the macro name Main_Ref to all caps(MAIN_REF).

Suggested by Nishanth Menon <nm@ti.com>

Signed-off-by: Lesly A M <leslyam@ti.com>
Cc: Nishanth Menon <nm@ti.com>
Cc: David Derrick <dderrick@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c | 2 +-
 drivers/mfd/twl4030-power.c                  | 2 +-
 include/linux/i2c/twl.h                      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index bbcb6775a6a3..01ee0a15ef88 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -730,7 +730,7 @@ static struct twl4030_resconfig twl4030_rconfig[] __initdata = {
 	{ .resource = RES_RESET, .devgroup = -1,
 	  .type = 1, .type2 = -1, .remap_off = -1, .remap_sleep = -1
 	},
-	{ .resource = RES_Main_Ref, .devgroup = -1,
+	{ .resource = RES_MAIN_REF, .devgroup = -1,
 	  .type = 1, .type2 = -1, .remap_off = -1, .remap_sleep = -1
 	},
 	{ 0, 0},
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 8373d79323cc..8162e435c9ff 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -120,7 +120,7 @@ static u8 res_config_addrs[] = {
 	[RES_HFCLKOUT]	= 0x8b,
 	[RES_32KCLKOUT]	= 0x8e,
 	[RES_RESET]	= 0x91,
-	[RES_Main_Ref]	= 0x94,
+	[RES_MAIN_REF]	= 0x94,
 };
 
 static int __init twl4030_write_script_byte(u8 address, u8 byte)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 0c0d1ae79981..4ebd7c32bc59 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -501,7 +501,7 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
 #define RES_32KCLKOUT           26
 #define RES_RESET               27
 /* Power Reference */
-#define RES_Main_Ref            28
+#define RES_MAIN_REF            28
 
 #define TOTAL_RESOURCES		28
 /*
-- 
cgit v1.2.3


From ca972d13382436530896e90591e2793e7a9e7eba Mon Sep 17 00:00:00 2001
From: Lesly A M <leslyam@ti.com>
Date: Thu, 14 Apr 2011 17:57:53 +0530
Subject: mfd: TWL5030 version checking in twl-core

Added API to get the TWL5030 Si version from the IDCODE register.
It is used for enabling the workaround for TWL erratum 27.

Signed-off-by: Lesly A M <leslyam@ti.com>
Cc: Nishanth Menon <nm@ti.com>
Cc: David Derrick <dderrick@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl.h | 17 +++++++++++++-
 2 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 960b5bed7f52..2bd9e0676bc2 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -229,6 +229,9 @@
 /* is driver active, bound to a chip? */
 static bool inuse;
 
+/* TWL IDCODE Register value */
+static u32 twl_idcode;
+
 static unsigned int twl_id;
 unsigned int twl_rev(void)
 {
@@ -487,6 +490,58 @@ EXPORT_SYMBOL(twl_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
+/**
+ * twl_read_idcode_register - API to read the IDCODE register.
+ *
+ * Unlocks the IDCODE register and read the 32 bit value.
+ */
+static int twl_read_idcode_register(void)
+{
+	int err;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_INTBR, TWL_EEPROM_R_UNLOCK,
+						REG_UNLOCK_TEST_REG);
+	if (err) {
+		pr_err("TWL4030 Unable to unlock IDCODE registers -%d\n", err);
+		goto fail;
+	}
+
+	err = twl_i2c_read(TWL4030_MODULE_INTBR, (u8 *)(&twl_idcode),
+						REG_IDCODE_7_0, 4);
+	if (err) {
+		pr_err("TWL4030: unable to read IDCODE -%d\n", err);
+		goto fail;
+	}
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_INTBR, 0x0, REG_UNLOCK_TEST_REG);
+	if (err)
+		pr_err("TWL4030 Unable to relock IDCODE registers -%d\n", err);
+fail:
+	return err;
+}
+
+/**
+ * twl_get_type - API to get TWL Si type.
+ *
+ * Api to get the TWL Si type from IDCODE value.
+ */
+int twl_get_type(void)
+{
+	return TWL_SIL_TYPE(twl_idcode);
+}
+EXPORT_SYMBOL_GPL(twl_get_type);
+
+/**
+ * twl_get_version - API to get TWL Si version.
+ *
+ * Api to get the TWL Si version from IDCODE value.
+ */
+int twl_get_version(void)
+{
+	return TWL_SIL_REV(twl_idcode);
+}
+EXPORT_SYMBOL_GPL(twl_get_version);
+
 static struct device *
 add_numbered_child(unsigned chip, const char *name, int num,
 		void *pdata, unsigned pdata_len,
@@ -1014,6 +1069,7 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	unsigned			i;
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
 	u8 temp;
+	int ret = 0;
 
 	if (!pdata) {
 		dev_dbg(&client->dev, "no platform data?\n");
@@ -1060,6 +1116,12 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	/* setup clock framework */
 	clocks_init(&client->dev, pdata->clock);
 
+	/* read TWL IDCODE Register */
+	if (twl_id == TWL4030_CLASS_ID) {
+		ret = twl_read_idcode_register();
+		WARN(ret < 0, "Error: reading twl_idcode register value\n");
+	}
+
 	/* load power event scripts */
 	if (twl_has_power() && pdata->power)
 		twl4030_power_init(pdata->power);
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 4ebd7c32bc59..314218e79c4a 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -150,7 +150,12 @@
 #define MMC_PU				(0x1 << 3)
 #define MMC_PD				(0x1 << 2)
 
-
+#define TWL_SIL_TYPE(rev)		((rev) & 0x00FFFFFF)
+#define TWL_SIL_REV(rev)		((rev) >> 24)
+#define TWL_SIL_5030			0x09002F
+#define TWL5030_REV_1_0			0x00
+#define TWL5030_REV_1_1			0x10
+#define TWL5030_REV_1_2			0x30
 
 #define TWL4030_CLASS_ID 		0x4030
 #define TWL6030_CLASS_ID 		0x6030
@@ -180,6 +185,9 @@ int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
 int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
+int twl_get_type(void);
+int twl_get_version(void);
+
 int twl6030_interrupt_unmask(u8 bit_mask, u8 offset);
 int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 
@@ -279,7 +287,12 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
  *(Use TWL_4030_MODULE_INTBR)
  */
 
+#define REG_IDCODE_7_0			0x00
+#define REG_IDCODE_15_8			0x01
+#define REG_IDCODE_16_23		0x02
+#define REG_IDCODE_31_24		0x03
 #define REG_GPPUPDCTR1			0x0F
+#define REG_UNLOCK_TEST_REG		0x12
 
 /*I2C1 and I2C4(SR) SDA/SCL pull-up control bits */
 
@@ -288,6 +301,8 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
 #define SR_I2C_SCL_CTRL_PU		BIT(4)
 #define SR_I2C_SDA_CTRL_PU		BIT(6)
 
+#define TWL_EEPROM_R_UNLOCK		0x49
+
 /*----------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From cbdb53e1f33baf60ded045dc79cd0dd4e9705fa5 Mon Sep 17 00:00:00 2001
From: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Date: Tue, 5 Apr 2011 14:40:52 -0700
Subject: mfd: Add Qualcomm PMIC 8921 core driver

Add support for the Qualcomm PM8921 PMIC chip. The core driver
will communicate with the PMIC chip via the MSM SSBI bus.

Signed-off-by: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig               |  18 +++++
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/pm8921-core.c         | 158 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/pm8xxx/core.h   |  71 +++++++++++++++++
 include/linux/mfd/pm8xxx/pm8921.h |  27 +++++++
 5 files changed, 275 insertions(+)
 create mode 100644 drivers/mfd/pm8921-core.c
 create mode 100644 include/linux/mfd/pm8xxx/core.h
 create mode 100644 include/linux/mfd/pm8xxx/pm8921.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ec2da8392466..d42328873dc9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -691,6 +691,24 @@ config MFD_OMAP_USB_HOST
 	  This MFD driver does the required setup functionalities for
 	  OMAP USB Host drivers.
 
+config MFD_PM8XXX
+	tristate
+
+config MFD_PM8921_CORE
+	tristate "Qualcomm PM8921 PMIC chip"
+	depends on MSM_SSBI
+	select MFD_CORE
+	select MFD_PM8XXX
+	help
+	  If you say yes to this option, support will be included for the
+	  built-in PM8921 PMIC chip.
+
+	  This is required if your board has a PM8921 and uses its features,
+	  such as: MPPs, GPIOs, regulators, interrupts, and PWM.
+
+	  Say M here if you want to include support for PM8921 chip as a module.
+	  This will build a module called "pm8921-core".
+
 endif # MFD_SUPPORT
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 24aa44448daf..d32a7b8f57f7 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -91,3 +91,4 @@ obj-$(CONFIG_MFD_VX855)		+= vx855.o
 obj-$(CONFIG_MFD_WL1273_CORE)	+= wl1273-core.o
 obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o
+obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o
diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
new file mode 100644
index 000000000000..a2ecd3233b1b
--- /dev/null
+++ b/drivers/mfd/pm8921-core.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/msm_ssbi.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/pm8xxx/pm8921.h>
+#include <linux/mfd/pm8xxx/core.h>
+
+#define REG_HWREV		0x002  /* PMIC4 revision */
+#define REG_HWREV_2		0x0E8  /* PMIC4 revision 2 */
+
+struct pm8921 {
+	struct device			*dev;
+};
+
+static int pm8921_readb(const struct device *dev, u16 addr, u8 *val)
+{
+	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
+	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
+
+	return msm_ssbi_read(pmic->dev->parent, addr, val, 1);
+}
+
+static int pm8921_writeb(const struct device *dev, u16 addr, u8 val)
+{
+	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
+	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
+
+	return msm_ssbi_write(pmic->dev->parent, addr, &val, 1);
+}
+
+static int pm8921_read_buf(const struct device *dev, u16 addr, u8 *buf,
+									int cnt)
+{
+	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
+	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
+
+	return msm_ssbi_read(pmic->dev->parent, addr, buf, cnt);
+}
+
+static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf,
+									int cnt)
+{
+	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
+	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
+
+	return msm_ssbi_write(pmic->dev->parent, addr, buf, cnt);
+}
+
+static struct pm8xxx_drvdata pm8921_drvdata = {
+	.pmic_readb		= pm8921_readb,
+	.pmic_writeb		= pm8921_writeb,
+	.pmic_read_buf		= pm8921_read_buf,
+	.pmic_write_buf		= pm8921_write_buf,
+};
+
+static int __devinit pm8921_probe(struct platform_device *pdev)
+{
+	const struct pm8921_platform_data *pdata = pdev->dev.platform_data;
+	struct pm8921 *pmic;
+	int rc;
+	u8 val;
+
+	if (!pdata) {
+		pr_err("missing platform data\n");
+		return -EINVAL;
+	}
+
+	pmic = kzalloc(sizeof(struct pm8921), GFP_KERNEL);
+	if (!pmic) {
+		pr_err("Cannot alloc pm8921 struct\n");
+		return -ENOMEM;
+	}
+
+	/* Read PMIC chip revision */
+	rc = msm_ssbi_read(pdev->dev.parent, REG_HWREV, &val, sizeof(val));
+	if (rc) {
+		pr_err("Failed to read hw rev reg %d:rc=%d\n", REG_HWREV, rc);
+		goto err_read_rev;
+	}
+	pr_info("PMIC revision 1: %02X\n", val);
+
+	/* Read PMIC chip revision 2 */
+	rc = msm_ssbi_read(pdev->dev.parent, REG_HWREV_2, &val, sizeof(val));
+	if (rc) {
+		pr_err("Failed to read hw rev 2 reg %d:rc=%d\n",
+			REG_HWREV_2, rc);
+		goto err_read_rev;
+	}
+	pr_info("PMIC revision 2: %02X\n", val);
+
+	pmic->dev = &pdev->dev;
+	pm8921_drvdata.pm_chip_data = pmic;
+	platform_set_drvdata(pdev, &pm8921_drvdata);
+
+	return 0;
+
+err_read_rev:
+	kfree(pmic);
+	return rc;
+}
+
+static int __devexit pm8921_remove(struct platform_device *pdev)
+{
+	struct pm8xxx_drvdata *drvdata;
+	struct pm8921 *pmic = NULL;
+
+	drvdata = platform_get_drvdata(pdev);
+	if (drvdata)
+		pmic = drvdata->pm_chip_data;
+	if (pmic)
+		mfd_remove_devices(pmic->dev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(pmic);
+
+	return 0;
+}
+
+static struct platform_driver pm8921_driver = {
+	.probe		= pm8921_probe,
+	.remove		= __devexit_p(pm8921_remove),
+	.driver		= {
+		.name	= "pm8921-core",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init pm8921_init(void)
+{
+	return platform_driver_register(&pm8921_driver);
+}
+subsys_initcall(pm8921_init);
+
+static void __exit pm8921_exit(void)
+{
+	platform_driver_unregister(&pm8921_driver);
+}
+module_exit(pm8921_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PMIC 8921 core driver");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("platform:pm8921-core");
diff --git a/include/linux/mfd/pm8xxx/core.h b/include/linux/mfd/pm8xxx/core.h
new file mode 100644
index 000000000000..36ccb33332ed
--- /dev/null
+++ b/include/linux/mfd/pm8xxx/core.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+/*
+ * Qualcomm PMIC 8xxx driver header file
+ *
+ */
+
+#ifndef __MFD_PM8XXX_CORE_H
+#define __MFD_PM8XXX_CORE_H
+
+#include <linux/mfd/core.h>
+
+struct pm8xxx_drvdata {
+	int	(*pmic_readb) (const struct device *dev, u16 addr, u8 *val);
+	int	(*pmic_writeb) (const struct device *dev, u16 addr, u8 val);
+	int	(*pmic_read_buf) (const struct device *dev, u16 addr, u8 *buf,
+									int n);
+	int	(*pmic_write_buf) (const struct device *dev, u16 addr, u8 *buf,
+									int n);
+	void	*pm_chip_data;
+};
+
+static inline int pm8xxx_readb(const struct device *dev, u16 addr, u8 *val)
+{
+	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
+
+	if (!dd)
+		return -EINVAL;
+	return dd->pmic_readb(dev, addr, val);
+}
+
+static inline int pm8xxx_writeb(const struct device *dev, u16 addr, u8 val)
+{
+	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
+
+	if (!dd)
+		return -EINVAL;
+	return dd->pmic_writeb(dev, addr, val);
+}
+
+static inline int pm8xxx_read_buf(const struct device *dev, u16 addr, u8 *buf,
+									int n)
+{
+	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
+
+	if (!dd)
+		return -EINVAL;
+	return dd->pmic_read_buf(dev, addr, buf, n);
+}
+
+static inline int pm8xxx_write_buf(const struct device *dev, u16 addr, u8 *buf,
+									int n)
+{
+	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
+
+	if (!dd)
+		return -EINVAL;
+	return dd->pmic_write_buf(dev, addr, buf, n);
+}
+
+#endif
diff --git a/include/linux/mfd/pm8xxx/pm8921.h b/include/linux/mfd/pm8xxx/pm8921.h
new file mode 100644
index 000000000000..33fbe9c960a3
--- /dev/null
+++ b/include/linux/mfd/pm8xxx/pm8921.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+/*
+ * Qualcomm PMIC 8921 driver header file
+ *
+ */
+
+#ifndef __MFD_PM8921_H
+#define __MFD_PM8921_H
+
+#include <linux/device.h>
+
+struct pm8921_platform_data {
+	int					irq_base;
+};
+
+#endif
-- 
cgit v1.2.3


From c013f0a56c56b88ac63c4037f2dfaaf2422fa863 Mon Sep 17 00:00:00 2001
From: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Date: Tue, 5 Apr 2011 14:40:53 -0700
Subject: mfd: Add pm8xxx irq support

Add support for the irq controller in Qualcomm 8xxx pmic. The 8xxx
interrupt controller provides control for gpio and mpp configured as
interrupts in addition to other subdevice interrupts. The interrupt
controller also provides a way to read the real time status of an
interrupt. This real time status is the only way one can get the
input values of gpio and mpp lines.

Signed-off-by: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig               |  10 +
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/pm8921-core.c         |  54 ++++++
 drivers/mfd/pm8xxx-irq.c          | 371 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/pm8xxx/core.h   |  10 +
 include/linux/mfd/pm8xxx/irq.h    |  59 ++++++
 include/linux/mfd/pm8xxx/pm8921.h |   4 +
 7 files changed, 509 insertions(+)
 create mode 100644 drivers/mfd/pm8xxx-irq.c
 create mode 100644 include/linux/mfd/pm8xxx/irq.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d42328873dc9..8344fc0ab858 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -709,6 +709,16 @@ config MFD_PM8921_CORE
 	  Say M here if you want to include support for PM8921 chip as a module.
 	  This will build a module called "pm8921-core".
 
+config MFD_PM8XXX_IRQ
+	bool "Support for Qualcomm PM8xxx IRQ features"
+	depends on MFD_PM8XXX
+	default y if MFD_PM8XXX
+	help
+	  This is the IRQ driver for Qualcomm PM 8xxx PMIC chips.
+
+	  This is required to use certain other PM 8xxx features, such as GPIO
+	  and MPP.
+
 endif # MFD_SUPPORT
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d32a7b8f57f7..1acb8f29a96c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -92,3 +92,4 @@ obj-$(CONFIG_MFD_WL1273_CORE)	+= wl1273-core.o
 obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o
 obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o
+obj-$(CONFIG_MFD_PM8XXX_IRQ) 	+= pm8xxx-irq.o
diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index a2ecd3233b1b..e873b15753d8 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/err.h>
 #include <linux/msm_ssbi.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/pm8xxx/pm8921.h>
@@ -26,6 +27,7 @@
 
 struct pm8921 {
 	struct device			*dev;
+	struct pm_irq_chip		*irq_chip;
 };
 
 static int pm8921_readb(const struct device *dev, u16 addr, u8 *val)
@@ -62,19 +64,53 @@ static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf,
 	return msm_ssbi_write(pmic->dev->parent, addr, buf, cnt);
 }
 
+static int pm8921_read_irq_stat(const struct device *dev, int irq)
+{
+	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
+	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
+
+	return pm8xxx_get_irq_stat(pmic->irq_chip, irq);
+}
+
 static struct pm8xxx_drvdata pm8921_drvdata = {
 	.pmic_readb		= pm8921_readb,
 	.pmic_writeb		= pm8921_writeb,
 	.pmic_read_buf		= pm8921_read_buf,
 	.pmic_write_buf		= pm8921_write_buf,
+	.pmic_read_irq_stat	= pm8921_read_irq_stat,
 };
 
+static int __devinit pm8921_add_subdevices(const struct pm8921_platform_data
+					   *pdata,
+					   struct pm8921 *pmic,
+					   u32 rev)
+{
+	int ret = 0, irq_base = 0;
+	struct pm_irq_chip *irq_chip;
+
+	if (pdata->irq_pdata) {
+		pdata->irq_pdata->irq_cdata.nirqs = PM8921_NR_IRQS;
+		pdata->irq_pdata->irq_cdata.rev = rev;
+		irq_base = pdata->irq_pdata->irq_base;
+		irq_chip = pm8xxx_irq_init(pmic->dev, pdata->irq_pdata);
+
+		if (IS_ERR(irq_chip)) {
+			pr_err("Failed to init interrupts ret=%ld\n",
+					PTR_ERR(irq_chip));
+			return PTR_ERR(irq_chip);
+		}
+		pmic->irq_chip = irq_chip;
+	}
+	return ret;
+}
+
 static int __devinit pm8921_probe(struct platform_device *pdev)
 {
 	const struct pm8921_platform_data *pdata = pdev->dev.platform_data;
 	struct pm8921 *pmic;
 	int rc;
 	u8 val;
+	u32 rev;
 
 	if (!pdata) {
 		pr_err("missing platform data\n");
@@ -94,6 +130,7 @@ static int __devinit pm8921_probe(struct platform_device *pdev)
 		goto err_read_rev;
 	}
 	pr_info("PMIC revision 1: %02X\n", val);
+	rev = val;
 
 	/* Read PMIC chip revision 2 */
 	rc = msm_ssbi_read(pdev->dev.parent, REG_HWREV_2, &val, sizeof(val));
@@ -103,13 +140,26 @@ static int __devinit pm8921_probe(struct platform_device *pdev)
 		goto err_read_rev;
 	}
 	pr_info("PMIC revision 2: %02X\n", val);
+	rev |= val << BITS_PER_BYTE;
 
 	pmic->dev = &pdev->dev;
 	pm8921_drvdata.pm_chip_data = pmic;
 	platform_set_drvdata(pdev, &pm8921_drvdata);
 
+	rc = pm8921_add_subdevices(pdata, pmic, rev);
+	if (rc) {
+		pr_err("Cannot add subdevices rc=%d\n", rc);
+		goto err;
+	}
+
+	/* gpio might not work if no irq device is found */
+	WARN_ON(pmic->irq_chip == NULL);
+
 	return 0;
 
+err:
+	mfd_remove_devices(pmic->dev);
+	platform_set_drvdata(pdev, NULL);
 err_read_rev:
 	kfree(pmic);
 	return rc;
@@ -125,6 +175,10 @@ static int __devexit pm8921_remove(struct platform_device *pdev)
 		pmic = drvdata->pm_chip_data;
 	if (pmic)
 		mfd_remove_devices(pmic->dev);
+	if (pmic->irq_chip) {
+		pm8xxx_irq_exit(pmic->irq_chip);
+		pmic->irq_chip = NULL;
+	}
 	platform_set_drvdata(pdev, NULL);
 	kfree(pmic);
 
diff --git a/drivers/mfd/pm8xxx-irq.c b/drivers/mfd/pm8xxx-irq.c
new file mode 100644
index 000000000000..d452dd013081
--- /dev/null
+++ b/drivers/mfd/pm8xxx-irq.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	"%s: " fmt, __func__
+
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mfd/pm8xxx/core.h>
+#include <linux/mfd/pm8xxx/irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* PMIC8xxx IRQ */
+
+#define	SSBI_REG_ADDR_IRQ_BASE		0x1BB
+
+#define	SSBI_REG_ADDR_IRQ_ROOT		(SSBI_REG_ADDR_IRQ_BASE + 0)
+#define	SSBI_REG_ADDR_IRQ_M_STATUS1	(SSBI_REG_ADDR_IRQ_BASE + 1)
+#define	SSBI_REG_ADDR_IRQ_M_STATUS2	(SSBI_REG_ADDR_IRQ_BASE + 2)
+#define	SSBI_REG_ADDR_IRQ_M_STATUS3	(SSBI_REG_ADDR_IRQ_BASE + 3)
+#define	SSBI_REG_ADDR_IRQ_M_STATUS4	(SSBI_REG_ADDR_IRQ_BASE + 4)
+#define	SSBI_REG_ADDR_IRQ_BLK_SEL	(SSBI_REG_ADDR_IRQ_BASE + 5)
+#define	SSBI_REG_ADDR_IRQ_IT_STATUS	(SSBI_REG_ADDR_IRQ_BASE + 6)
+#define	SSBI_REG_ADDR_IRQ_CONFIG	(SSBI_REG_ADDR_IRQ_BASE + 7)
+#define	SSBI_REG_ADDR_IRQ_RT_STATUS	(SSBI_REG_ADDR_IRQ_BASE + 8)
+
+#define	PM_IRQF_LVL_SEL			0x01	/* level select */
+#define	PM_IRQF_MASK_FE			0x02	/* mask falling edge */
+#define	PM_IRQF_MASK_RE			0x04	/* mask rising edge */
+#define	PM_IRQF_CLR			0x08	/* clear interrupt */
+#define	PM_IRQF_BITS_MASK		0x70
+#define	PM_IRQF_BITS_SHIFT		4
+#define	PM_IRQF_WRITE			0x80
+
+#define	PM_IRQF_MASK_ALL		(PM_IRQF_MASK_FE | \
+					PM_IRQF_MASK_RE)
+
+struct pm_irq_chip {
+	struct device		*dev;
+	spinlock_t		pm_irq_lock;
+	unsigned int		devirq;
+	unsigned int		irq_base;
+	unsigned int		num_irqs;
+	unsigned int		num_blocks;
+	unsigned int		num_masters;
+	u8			config[0];
+};
+
+static int pm8xxx_read_root_irq(const struct pm_irq_chip *chip, u8 *rp)
+{
+	return pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_ROOT, rp);
+}
+
+static int pm8xxx_read_master_irq(const struct pm_irq_chip *chip, u8 m, u8 *bp)
+{
+	return pm8xxx_readb(chip->dev,
+			SSBI_REG_ADDR_IRQ_M_STATUS1 + m, bp);
+}
+
+static int pm8xxx_read_block_irq(struct pm_irq_chip *chip, u8 bp, u8 *ip)
+{
+	int	rc;
+
+	spin_lock(&chip->pm_irq_lock);
+	rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp);
+	if (rc) {
+		pr_err("Failed Selecting Block %d rc=%d\n", bp, rc);
+		goto bail;
+	}
+
+	rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_IT_STATUS, ip);
+	if (rc)
+		pr_err("Failed Reading Status rc=%d\n", rc);
+bail:
+	spin_unlock(&chip->pm_irq_lock);
+	return rc;
+}
+
+static int pm8xxx_config_irq(struct pm_irq_chip *chip, u8 bp, u8 cp)
+{
+	int	rc;
+
+	spin_lock(&chip->pm_irq_lock);
+	rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp);
+	if (rc) {
+		pr_err("Failed Selecting Block %d rc=%d\n", bp, rc);
+		goto bail;
+	}
+
+	cp |= PM_IRQF_WRITE;
+	rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_CONFIG, cp);
+	if (rc)
+		pr_err("Failed Configuring IRQ rc=%d\n", rc);
+bail:
+	spin_unlock(&chip->pm_irq_lock);
+	return rc;
+}
+
+static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block)
+{
+	int pmirq, irq, i, ret = 0;
+	u8 bits;
+
+	ret = pm8xxx_read_block_irq(chip, block, &bits);
+	if (ret) {
+		pr_err("Failed reading %d block ret=%d", block, ret);
+		return ret;
+	}
+	if (!bits) {
+		pr_err("block bit set in master but no irqs: %d", block);
+		return 0;
+	}
+
+	/* Check IRQ bits */
+	for (i = 0; i < 8; i++) {
+		if (bits & (1 << i)) {
+			pmirq = block * 8 + i;
+			irq = pmirq + chip->irq_base;
+			generic_handle_irq(irq);
+		}
+	}
+	return 0;
+}
+
+static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master)
+{
+	u8 blockbits;
+	int block_number, i, ret = 0;
+
+	ret = pm8xxx_read_master_irq(chip, master, &blockbits);
+	if (ret) {
+		pr_err("Failed to read master %d ret=%d\n", master, ret);
+		return ret;
+	}
+	if (!blockbits) {
+		pr_err("master bit set in root but no blocks: %d", master);
+		return 0;
+	}
+
+	for (i = 0; i < 8; i++)
+		if (blockbits & (1 << i)) {
+			block_number = master * 8 + i;	/* block # */
+			ret |= pm8xxx_irq_block_handler(chip, block_number);
+		}
+	return ret;
+}
+
+static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct pm_irq_chip *chip = irq_desc_get_handler_data(desc);
+	struct irq_chip *irq_chip = irq_desc_get_chip(desc);
+	u8	root;
+	int	i, ret, masters = 0;
+
+	ret = pm8xxx_read_root_irq(chip, &root);
+	if (ret) {
+		pr_err("Can't read root status ret=%d\n", ret);
+		return;
+	}
+
+	/* on pm8xxx series masters start from bit 1 of the root */
+	masters = root >> 1;
+
+	/* Read allowed masters for blocks. */
+	for (i = 0; i < chip->num_masters; i++)
+		if (masters & (1 << i))
+			pm8xxx_irq_master_handler(chip, i);
+
+	irq_chip->irq_ack(&desc->irq_data);
+}
+
+static void pm8xxx_irq_mask_ack(struct irq_data *d)
+{
+	struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d);
+	unsigned int pmirq = d->irq - chip->irq_base;
+	int	master, irq_bit;
+	u8	block, config;
+
+	block = pmirq / 8;
+	master = block / 8;
+	irq_bit = pmirq % 8;
+
+	config = chip->config[pmirq] | PM_IRQF_MASK_ALL | PM_IRQF_CLR;
+	pm8xxx_config_irq(chip, block, config);
+}
+
+static void pm8xxx_irq_unmask(struct irq_data *d)
+{
+	struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d);
+	unsigned int pmirq = d->irq - chip->irq_base;
+	int	master, irq_bit;
+	u8	block, config;
+
+	block = pmirq / 8;
+	master = block / 8;
+	irq_bit = pmirq % 8;
+
+	config = chip->config[pmirq];
+	pm8xxx_config_irq(chip, block, config);
+}
+
+static int pm8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d);
+	unsigned int pmirq = d->irq - chip->irq_base;
+	int master, irq_bit;
+	u8 block, config;
+
+	block = pmirq / 8;
+	master = block / 8;
+	irq_bit  = pmirq % 8;
+
+	chip->config[pmirq] = (irq_bit << PM_IRQF_BITS_SHIFT)
+							| PM_IRQF_MASK_ALL;
+	if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
+		if (flow_type & IRQF_TRIGGER_RISING)
+			chip->config[pmirq] &= ~PM_IRQF_MASK_RE;
+		if (flow_type & IRQF_TRIGGER_FALLING)
+			chip->config[pmirq] &= ~PM_IRQF_MASK_FE;
+	} else {
+		chip->config[pmirq] |= PM_IRQF_LVL_SEL;
+
+		if (flow_type & IRQF_TRIGGER_HIGH)
+			chip->config[pmirq] &= ~PM_IRQF_MASK_RE;
+		else
+			chip->config[pmirq] &= ~PM_IRQF_MASK_FE;
+	}
+
+	config = chip->config[pmirq] | PM_IRQF_CLR;
+	return pm8xxx_config_irq(chip, block, config);
+}
+
+static int pm8xxx_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+	return 0;
+}
+
+static struct irq_chip pm8xxx_irq_chip = {
+	.name		= "pm8xxx",
+	.irq_mask_ack	= pm8xxx_irq_mask_ack,
+	.irq_unmask	= pm8xxx_irq_unmask,
+	.irq_set_type	= pm8xxx_irq_set_type,
+	.irq_set_wake	= pm8xxx_irq_set_wake,
+	.flags		= IRQCHIP_MASK_ON_SUSPEND,
+};
+
+/**
+ * pm8xxx_get_irq_stat - get the status of the irq line
+ * @chip: pointer to identify a pmic irq controller
+ * @irq: the irq number
+ *
+ * The pm8xxx gpio and mpp rely on the interrupt block to read
+ * the values on their pins. This function is to facilitate reading
+ * the status of a gpio or an mpp line. The caller has to convert the
+ * gpio number to irq number.
+ *
+ * RETURNS:
+ * an int indicating the value read on that line
+ */
+int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
+{
+	int pmirq, rc;
+	u8  block, bits, bit;
+	unsigned long flags;
+
+	if (chip == NULL || irq < chip->irq_base ||
+			irq >= chip->irq_base + chip->num_irqs)
+		return -EINVAL;
+
+	pmirq = irq - chip->irq_base;
+
+	block = pmirq / 8;
+	bit = pmirq % 8;
+
+	spin_lock_irqsave(&chip->pm_irq_lock, flags);
+
+	rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, block);
+	if (rc) {
+		pr_err("Failed Selecting block irq=%d pmirq=%d blk=%d rc=%d\n",
+			irq, pmirq, block, rc);
+		goto bail_out;
+	}
+
+	rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_RT_STATUS, &bits);
+	if (rc) {
+		pr_err("Failed Configuring irq=%d pmirq=%d blk=%d rc=%d\n",
+			irq, pmirq, block, rc);
+		goto bail_out;
+	}
+
+	rc = (bits & (1 << bit)) ? 1 : 0;
+
+bail_out:
+	spin_unlock_irqrestore(&chip->pm_irq_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pm8xxx_get_irq_stat);
+
+struct pm_irq_chip *  __devinit pm8xxx_irq_init(struct device *dev,
+				const struct pm8xxx_irq_platform_data *pdata)
+{
+	struct pm_irq_chip  *chip;
+	int devirq, rc;
+	unsigned int pmirq;
+
+	if (!pdata) {
+		pr_err("No platform data\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	devirq = pdata->devirq;
+	if (devirq < 0) {
+		pr_err("missing devirq\n");
+		rc = devirq;
+		return ERR_PTR(-EINVAL);
+	}
+
+	chip = kzalloc(sizeof(struct pm_irq_chip)
+			+ sizeof(u8) * pdata->irq_cdata.nirqs, GFP_KERNEL);
+	if (!chip) {
+		pr_err("Cannot alloc pm_irq_chip struct\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	chip->dev = dev;
+	chip->devirq = devirq;
+	chip->irq_base = pdata->irq_base;
+	chip->num_irqs = pdata->irq_cdata.nirqs;
+	chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8);
+	chip->num_masters = DIV_ROUND_UP(chip->num_blocks, 8);
+	spin_lock_init(&chip->pm_irq_lock);
+
+	for (pmirq = 0; pmirq < chip->num_irqs; pmirq++) {
+		irq_set_chip_and_handler(chip->irq_base + pmirq,
+				&pm8xxx_irq_chip,
+				handle_level_irq);
+		irq_set_chip_data(chip->irq_base + pmirq, chip);
+#ifdef CONFIG_ARM
+		set_irq_flags(chip->irq_base + pmirq, IRQF_VALID);
+#else
+		irq_set_noprobe(chip->irq_base + pmirq);
+#endif
+	}
+
+	irq_set_irq_type(devirq, pdata->irq_trigger_flag);
+	irq_set_handler_data(devirq, chip);
+	irq_set_chained_handler(devirq, pm8xxx_irq_handler);
+	set_irq_wake(devirq, 1);
+
+	return chip;
+}
+
+int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip)
+{
+	irq_set_chained_handler(chip->devirq, NULL);
+	kfree(chip);
+	return 0;
+}
diff --git a/include/linux/mfd/pm8xxx/core.h b/include/linux/mfd/pm8xxx/core.h
index 36ccb33332ed..bd2f4f64e931 100644
--- a/include/linux/mfd/pm8xxx/core.h
+++ b/include/linux/mfd/pm8xxx/core.h
@@ -27,6 +27,7 @@ struct pm8xxx_drvdata {
 									int n);
 	int	(*pmic_write_buf) (const struct device *dev, u16 addr, u8 *buf,
 									int n);
+	int	(*pmic_read_irq_stat) (const struct device *dev, int irq);
 	void	*pm_chip_data;
 };
 
@@ -68,4 +69,13 @@ static inline int pm8xxx_write_buf(const struct device *dev, u16 addr, u8 *buf,
 	return dd->pmic_write_buf(dev, addr, buf, n);
 }
 
+static inline int pm8xxx_read_irq_stat(const struct device *dev, int irq)
+{
+	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
+
+	if (!dd)
+		return -EINVAL;
+	return dd->pmic_read_irq_stat(dev, irq);
+}
+
 #endif
diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h
new file mode 100644
index 000000000000..4b21769f4483
--- /dev/null
+++ b/include/linux/mfd/pm8xxx/irq.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+/*
+ * Qualcomm PMIC irq 8xxx driver header file
+ *
+ */
+
+#ifndef __MFD_PM8XXX_IRQ_H
+#define __MFD_PM8XXX_IRQ_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+
+struct pm8xxx_irq_core_data {
+	u32		rev;
+	int		nirqs;
+};
+
+struct pm8xxx_irq_platform_data {
+	int				irq_base;
+	struct pm8xxx_irq_core_data	irq_cdata;
+	int				devirq;
+	int				irq_trigger_flag;
+};
+
+struct pm_irq_chip;
+
+#ifdef CONFIG_MFD_PM8XXX_IRQ
+int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq);
+struct pm_irq_chip * __devinit pm8xxx_irq_init(struct device *dev,
+				const struct pm8xxx_irq_platform_data *pdata);
+int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip);
+#else
+static inline int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
+{
+	return -ENXIO;
+}
+static inline struct pm_irq_chip * __devinit pm8xxx_irq_init(
+				const struct device *dev,
+				const struct pm8xxx_irq_platform_data *pdata)
+{
+	return ERR_PTR(-ENXIO);
+}
+static inline int __devexit pm8xxx_irq_exit(struct pm_irq_chip *chip)
+{
+	return -ENXIO;
+}
+#endif /* CONFIG_MFD_PM8XXX_IRQ */
+#endif /* __MFD_PM8XXX_IRQ_H */
diff --git a/include/linux/mfd/pm8xxx/pm8921.h b/include/linux/mfd/pm8xxx/pm8921.h
index 33fbe9c960a3..d5517fd32d1b 100644
--- a/include/linux/mfd/pm8xxx/pm8921.h
+++ b/include/linux/mfd/pm8xxx/pm8921.h
@@ -19,9 +19,13 @@
 #define __MFD_PM8921_H
 
 #include <linux/device.h>
+#include <linux/mfd/pm8xxx/irq.h>
+
+#define PM8921_NR_IRQS		256
 
 struct pm8921_platform_data {
 	int					irq_base;
+	struct pm8xxx_irq_platform_data		*irq_pdata;
 };
 
 #endif
-- 
cgit v1.2.3


From 008b30408c40ede5985397e7daac45d30b375a01 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 6 May 2011 17:21:20 +0800
Subject: mfd: Add rtc support to 88pm860x

Enable rtc function in 88pm860x PMIC.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  |  29 +++
 drivers/rtc/Kconfig          |  10 +
 drivers/rtc/Makefile         |   1 +
 drivers/rtc/rtc-88pm860x.c   | 427 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/88pm860x.h |   6 +
 5 files changed, 473 insertions(+)
 create mode 100644 drivers/rtc/rtc-88pm860x.c

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 7ba4aafb051d..f2cac9287756 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -90,6 +90,10 @@ static struct resource charger_resources[] __devinitdata = {
 	{PM8607_IRQ_VCHG, PM8607_IRQ_VCHG, "vchg voltage",    IORESOURCE_IRQ,},
 };
 
+static struct resource rtc_resources[] __devinitdata = {
+	{PM8607_IRQ_RTC, PM8607_IRQ_RTC, "rtc", IORESOURCE_IRQ,},
+};
+
 static struct mfd_cell bk_devs[] = {
 	{"88pm860x-backlight", 0,},
 	{"88pm860x-backlight", 1,},
@@ -143,6 +147,10 @@ static struct mfd_cell power_devs[] = {
 	{"88pm860x-charger", -1,},
 };
 
+static struct mfd_cell rtc_devs[] = {
+	{"88pm860x-rtc", -1,},
+};
+
 static struct pm860x_backlight_pdata bk_pdata[ARRAY_SIZE(bk_devs)];
 static struct pm860x_led_pdata led_pdata[ARRAY_SIZE(led_devs)];
 static struct regulator_init_data regulator_pdata[ARRAY_SIZE(regulator_devs)];
@@ -635,6 +643,26 @@ out:
 	return;
 }
 
+static void __devinit device_rtc_init(struct pm860x_chip *chip,
+				      struct i2c_client *i2c,
+				      struct pm860x_platform_data *pdata)
+{
+	int ret;
+
+	if ((pdata == NULL))
+		return;
+
+	rtc_devs[0].platform_data = pdata->rtc;
+	rtc_devs[0].pdata_size = sizeof(struct pm860x_rtc_pdata);
+	rtc_devs[0].num_resources = ARRAY_SIZE(rtc_resources);
+	rtc_devs[0].resources = &rtc_resources[0];
+	ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
+			      ARRAY_SIZE(rtc_devs), &rtc_resources[0],
+			      chip->irq_base);
+	if (ret < 0)
+		dev_err(chip->dev, "Failed to add rtc subdev\n");
+}
+
 static void __devinit device_touch_init(struct pm860x_chip *chip,
 					struct i2c_client *i2c,
 					struct pm860x_platform_data *pdata)
@@ -770,6 +798,7 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 		goto out;
 
 	device_regulator_init(chip, i2c, pdata);
+	device_rtc_init(chip, i2c, pdata);
 	device_onkey_init(chip, i2c, pdata);
 	device_touch_init(chip, i2c, pdata);
 	device_power_init(chip, i2c, pdata);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index b8f4e9e66cd5..8e437e2f6281 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -125,6 +125,16 @@ comment "I2C RTC drivers"
 
 if I2C
 
+config RTC_DRV_88PM860X
+	tristate "Marvell 88PM860x"
+	depends on RTC_CLASS && I2C && MFD_88PM860X
+	help
+	  If you say yes here you get support for RTC function in Marvell
+	  88PM860x chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-88pm860x.
+
 config RTC_DRV_DS1307
 	tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00, EPSON RX-8025"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 9574748d1c73..612f5a88a8ee 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,6 +15,7 @@ rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
 
 # Keep the list ordered.
 
+obj-$(CONFIG_RTC_DRV_88PM860X)  += rtc-88pm860x.o
 obj-$(CONFIG_RTC_DRV_AB3100)	+= rtc-ab3100.o
 obj-$(CONFIG_RTC_DRV_AB8500)	+= rtc-ab8500.o
 obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c
new file mode 100644
index 000000000000..64b847b7f970
--- /dev/null
+++ b/drivers/rtc/rtc-88pm860x.c
@@ -0,0 +1,427 @@
+/*
+ * Real Time Clock driver for Marvell 88PM860x PMIC
+ *
+ * Copyright (c) 2010 Marvell International Ltd.
+ * Author:	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/88pm860x.h>
+
+#define VRTC_CALIBRATION
+
+struct pm860x_rtc_info {
+	struct pm860x_chip	*chip;
+	struct i2c_client	*i2c;
+	struct rtc_device	*rtc_dev;
+	struct device		*dev;
+	struct delayed_work	calib_work;
+
+	int			irq;
+	int			vrtc;
+	int			(*sync)(unsigned int ticks);
+};
+
+#define REG_VRTC_MEAS1		0x7D
+
+#define REG0_ADDR		0xB0
+#define REG1_ADDR		0xB2
+#define REG2_ADDR		0xB4
+#define REG3_ADDR		0xB6
+
+#define REG0_DATA		0xB1
+#define REG1_DATA		0xB3
+#define REG2_DATA		0xB5
+#define REG3_DATA		0xB7
+
+/* bit definitions of Measurement Enable Register 2 (0x51) */
+#define MEAS2_VRTC		(1 << 0)
+
+/* bit definitions of RTC Register 1 (0xA0) */
+#define ALARM_EN		(1 << 3)
+#define ALARM_WAKEUP		(1 << 4)
+#define ALARM			(1 << 5)
+#define RTC1_USE_XO		(1 << 7)
+
+#define VRTC_CALIB_INTERVAL	(HZ * 60 * 10)		/* 10 minutes */
+
+static irqreturn_t rtc_update_handler(int irq, void *data)
+{
+	struct pm860x_rtc_info *info = (struct pm860x_rtc_info *)data;
+	int mask;
+
+	mask = ALARM | ALARM_WAKEUP;
+	pm860x_set_bits(info->i2c, PM8607_RTC1, mask | ALARM_EN, mask);
+	rtc_update_irq(info->rtc_dev, 1, RTC_AF);
+	return IRQ_HANDLED;
+}
+
+static int pm860x_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct pm860x_rtc_info *info = dev_get_drvdata(dev);
+
+	if (enabled)
+		pm860x_set_bits(info->i2c, PM8607_RTC1, ALARM, ALARM);
+	else
+		pm860x_set_bits(info->i2c, PM8607_RTC1, ALARM, 0);
+	return 0;
+}
+
+/*
+ * Calculate the next alarm time given the requested alarm time mask
+ * and the current time.
+ */
+static void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now,
+				struct rtc_time *alrm)
+{
+	unsigned long next_time;
+	unsigned long now_time;
+
+	next->tm_year = now->tm_year;
+	next->tm_mon = now->tm_mon;
+	next->tm_mday = now->tm_mday;
+	next->tm_hour = alrm->tm_hour;
+	next->tm_min = alrm->tm_min;
+	next->tm_sec = alrm->tm_sec;
+
+	rtc_tm_to_time(now, &now_time);
+	rtc_tm_to_time(next, &next_time);
+
+	if (next_time < now_time) {
+		/* Advance one day */
+		next_time += 60 * 60 * 24;
+		rtc_time_to_tm(next_time, next);
+	}
+}
+
+static int pm860x_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct pm860x_rtc_info *info = dev_get_drvdata(dev);
+	unsigned char buf[8];
+	unsigned long ticks, base, data;
+
+	pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf);
+	dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1],
+		buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
+	base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7];
+
+	/* load 32-bit read-only counter */
+	pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf);
+	data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+	ticks = base + data;
+	dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n",
+		base, data, ticks);
+
+	rtc_time_to_tm(ticks, tm);
+
+	return 0;
+}
+
+static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct pm860x_rtc_info *info = dev_get_drvdata(dev);
+	unsigned char buf[4];
+	unsigned long ticks, base, data;
+
+	if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+		dev_dbg(info->dev, "Set time %d out of range. "
+			"Please set time between 1970 to 2038.\n",
+			1900 + tm->tm_year);
+		return -EINVAL;
+	}
+	rtc_tm_to_time(tm, &ticks);
+
+	/* load 32-bit read-only counter */
+	pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf);
+	data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+	base = ticks - data;
+	dev_dbg(info->dev, "set base:0x%lx, RO count:0x%lx, ticks:0x%lx\n",
+		base, data, ticks);
+
+	pm860x_page_reg_write(info->i2c, REG0_DATA, (base >> 24) & 0xFF);
+	pm860x_page_reg_write(info->i2c, REG1_DATA, (base >> 16) & 0xFF);
+	pm860x_page_reg_write(info->i2c, REG2_DATA, (base >> 8) & 0xFF);
+	pm860x_page_reg_write(info->i2c, REG3_DATA, base & 0xFF);
+
+	if (info->sync)
+		info->sync(ticks);
+	return 0;
+}
+
+static int pm860x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pm860x_rtc_info *info = dev_get_drvdata(dev);
+	unsigned char buf[8];
+	unsigned long ticks, base, data;
+	int ret;
+
+	pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf);
+	dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1],
+		buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
+	base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7];
+
+	pm860x_bulk_read(info->i2c, PM8607_RTC_EXPIRE1, 4, buf);
+	data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+	ticks = base + data;
+	dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n",
+		base, data, ticks);
+
+	rtc_time_to_tm(ticks, &alrm->time);
+	ret = pm860x_reg_read(info->i2c, PM8607_RTC1);
+	alrm->enabled = (ret & ALARM_EN) ? 1 : 0;
+	alrm->pending = (ret & (ALARM | ALARM_WAKEUP)) ? 1 : 0;
+	return 0;
+}
+
+static int pm860x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pm860x_rtc_info *info = dev_get_drvdata(dev);
+	struct rtc_time now_tm, alarm_tm;
+	unsigned long ticks, base, data;
+	unsigned char buf[8];
+	int mask;
+
+	pm860x_set_bits(info->i2c, PM8607_RTC1, ALARM_EN, 0);
+
+	pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf);
+	dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1],
+		buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
+	base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7];
+
+	/* load 32-bit read-only counter */
+	pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf);
+	data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
+	ticks = base + data;
+	dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n",
+		base, data, ticks);
+
+	rtc_time_to_tm(ticks, &now_tm);
+	rtc_next_alarm_time(&alarm_tm, &now_tm, &alrm->time);
+	/* get new ticks for alarm in 24 hours */
+	rtc_tm_to_time(&alarm_tm, &ticks);
+	data = ticks - base;
+
+	buf[0] = data & 0xff;
+	buf[1] = (data >> 8) & 0xff;
+	buf[2] = (data >> 16) & 0xff;
+	buf[3] = (data >> 24) & 0xff;
+	pm860x_bulk_write(info->i2c, PM8607_RTC_EXPIRE1, 4, buf);
+	if (alrm->enabled) {
+		mask = ALARM | ALARM_WAKEUP | ALARM_EN;
+		pm860x_set_bits(info->i2c, PM8607_RTC1, mask, mask);
+	} else {
+		mask = ALARM | ALARM_WAKEUP | ALARM_EN;
+		pm860x_set_bits(info->i2c, PM8607_RTC1, mask,
+				ALARM | ALARM_WAKEUP);
+	}
+	return 0;
+}
+
+static const struct rtc_class_ops pm860x_rtc_ops = {
+	.read_time	= pm860x_rtc_read_time,
+	.set_time	= pm860x_rtc_set_time,
+	.read_alarm	= pm860x_rtc_read_alarm,
+	.set_alarm	= pm860x_rtc_set_alarm,
+	.alarm_irq_enable = pm860x_rtc_alarm_irq_enable,
+};
+
+#ifdef VRTC_CALIBRATION
+static void calibrate_vrtc_work(struct work_struct *work)
+{
+	struct pm860x_rtc_info *info = container_of(work,
+		struct pm860x_rtc_info, calib_work.work);
+	unsigned char buf[2];
+	unsigned int sum, data, mean, vrtc_set;
+	int i;
+
+	for (i = 0, sum = 0; i < 16; i++) {
+		msleep(100);
+		pm860x_bulk_read(info->i2c, REG_VRTC_MEAS1, 2, buf);
+		data = (buf[0] << 4) | buf[1];
+		data = (data * 5400) >> 12;	/* convert to mv */
+		sum += data;
+	}
+	mean = sum >> 4;
+	vrtc_set = 2700 + (info->vrtc & 0x3) * 200;
+	dev_dbg(info->dev, "mean:%d, vrtc_set:%d\n", mean, vrtc_set);
+
+	sum = pm860x_reg_read(info->i2c, PM8607_RTC_MISC1);
+	data = sum & 0x3;
+	if ((mean + 200) < vrtc_set) {
+		/* try higher voltage */
+		if (++data == 4)
+			goto out;
+		data = (sum & 0xf8) | (data & 0x3);
+		pm860x_reg_write(info->i2c, PM8607_RTC_MISC1, data);
+	} else if ((mean - 200) > vrtc_set) {
+		/* try lower voltage */
+		if (data-- == 0)
+			goto out;
+		data = (sum & 0xf8) | (data & 0x3);
+		pm860x_reg_write(info->i2c, PM8607_RTC_MISC1, data);
+	} else
+		goto out;
+	dev_dbg(info->dev, "set 0x%x to RTC_MISC1\n", data);
+	/* trigger next calibration since VRTC is updated */
+	schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL);
+	return;
+out:
+	/* disable measurement */
+	pm860x_set_bits(info->i2c, PM8607_MEAS_EN2, MEAS2_VRTC, 0);
+	dev_dbg(info->dev, "finish VRTC calibration\n");
+	return;
+}
+#endif
+
+static int __devinit pm860x_rtc_probe(struct platform_device *pdev)
+{
+	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct pm860x_rtc_pdata *pdata = NULL;
+	struct pm860x_rtc_info *info;
+	struct rtc_time tm;
+	unsigned long ticks = 0;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (pdata == NULL)
+		dev_warn(&pdev->dev, "No platform data!\n");
+
+	info = kzalloc(sizeof(struct pm860x_rtc_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	info->irq = platform_get_irq(pdev, 0);
+	if (info->irq < 0) {
+		dev_err(&pdev->dev, "No IRQ resource!\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	info->chip = chip;
+	info->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	info->dev = &pdev->dev;
+	dev_set_drvdata(&pdev->dev, info);
+
+	ret = request_threaded_irq(info->irq, NULL, rtc_update_handler,
+				   IRQF_ONESHOT, "rtc", info);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
+			info->irq, ret);
+		goto out;
+	}
+
+	/* set addresses of 32-bit base value for RTC time */
+	pm860x_page_reg_write(info->i2c, REG0_ADDR, REG0_DATA);
+	pm860x_page_reg_write(info->i2c, REG1_ADDR, REG1_DATA);
+	pm860x_page_reg_write(info->i2c, REG2_ADDR, REG2_DATA);
+	pm860x_page_reg_write(info->i2c, REG3_ADDR, REG3_DATA);
+
+	ret = pm860x_rtc_read_time(&pdev->dev, &tm);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to read initial time.\n");
+		goto out_rtc;
+	}
+	if ((tm.tm_year < 70) || (tm.tm_year > 138)) {
+		tm.tm_year = 70;
+		tm.tm_mon = 0;
+		tm.tm_mday = 1;
+		tm.tm_hour = 0;
+		tm.tm_min = 0;
+		tm.tm_sec = 0;
+		ret = pm860x_rtc_set_time(&pdev->dev, &tm);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Failed to set initial time.\n");
+			goto out_rtc;
+		}
+	}
+	rtc_tm_to_time(&tm, &ticks);
+	if (pdata && pdata->sync) {
+		pdata->sync(ticks);
+		info->sync = pdata->sync;
+	}
+
+	info->rtc_dev = rtc_device_register("88pm860x-rtc", &pdev->dev,
+					    &pm860x_rtc_ops, THIS_MODULE);
+	ret = PTR_ERR(info->rtc_dev);
+	if (IS_ERR(info->rtc_dev)) {
+		dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);
+		goto out_rtc;
+	}
+
+	/*
+	 * enable internal XO instead of internal 3.25MHz clock since it can
+	 * free running in PMIC power-down state.
+	 */
+	pm860x_set_bits(info->i2c, PM8607_RTC1, RTC1_USE_XO, RTC1_USE_XO);
+
+#ifdef VRTC_CALIBRATION
+	/* <00> -- 2.7V, <01> -- 2.9V, <10> -- 3.1V, <11> -- 3.3V */
+	if (pdata && pdata->vrtc)
+		info->vrtc = pdata->vrtc & 0x3;
+	else
+		info->vrtc = 1;
+	pm860x_set_bits(info->i2c, PM8607_MEAS_EN2, MEAS2_VRTC, MEAS2_VRTC);
+
+	/* calibrate VRTC */
+	INIT_DELAYED_WORK(&info->calib_work, calibrate_vrtc_work);
+	schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL);
+#endif	/* VRTC_CALIBRATION */
+	return 0;
+out_rtc:
+	free_irq(info->irq, info);
+out:
+	kfree(info);
+	return ret;
+}
+
+static int __devexit pm860x_rtc_remove(struct platform_device *pdev)
+{
+	struct pm860x_rtc_info *info = platform_get_drvdata(pdev);
+
+#ifdef VRTC_CALIBRATION
+	flush_scheduled_work();
+	/* disable measurement */
+	pm860x_set_bits(info->i2c, PM8607_MEAS_EN2, MEAS2_VRTC, 0);
+#endif	/* VRTC_CALIBRATION */
+
+	platform_set_drvdata(pdev, NULL);
+	rtc_device_unregister(info->rtc_dev);
+	free_irq(info->irq, info);
+	kfree(info);
+	return 0;
+}
+
+static struct platform_driver pm860x_rtc_driver = {
+	.driver		= {
+		.name	= "88pm860x-rtc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pm860x_rtc_probe,
+	.remove		= __devexit_p(pm860x_rtc_remove),
+};
+
+static int __init pm860x_rtc_init(void)
+{
+	return platform_driver_register(&pm860x_rtc_driver);
+}
+module_init(pm860x_rtc_init);
+
+static void __exit pm860x_rtc_exit(void)
+{
+	platform_driver_unregister(&pm860x_rtc_driver);
+}
+module_exit(pm860x_rtc_exit);
+
+MODULE_DESCRIPTION("Marvell 88PM860x RTC driver");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 8fba7972ff5f..63b4fb8e3b6f 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -330,6 +330,11 @@ struct pm860x_led_pdata {
 	unsigned long	flags;
 };
 
+struct pm860x_rtc_pdata {
+	int		(*sync)(unsigned int ticks);
+	int		vrtc;
+};
+
 struct pm860x_touch_pdata {
 	int		gpadc_prebias;
 	int		slot_cycle;
@@ -349,6 +354,7 @@ struct pm860x_power_pdata {
 struct pm860x_platform_data {
 	struct pm860x_backlight_pdata	*backlight;
 	struct pm860x_led_pdata		*led;
+	struct pm860x_rtc_pdata		*rtc;
 	struct pm860x_touch_pdata	*touch;
 	struct pm860x_power_pdata	*power;
 	struct regulator_init_data	*regulator;
-- 
cgit v1.2.3


From 521d8ec3f0d8069bea3b3afa70f487cdb5118018 Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Thu, 12 May 2011 14:27:55 +0100
Subject: mfd: Add phoenix lite (twl6025) support to twl6030

Phoenix Lite is based on the twl6030 family of PMICs. It has mostly the
same feature set of twl6030 but with small changes. The codec block has
also been removed. It also has a new charger block and new features in
its ADC block. VUSB handling also differs.

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 190 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/i2c/twl.h |  34 +++++++++
 2 files changed, 187 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 2bd9e0676bc2..b8f2a4e7f6e7 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -198,6 +198,7 @@
 #define TWL6030_BASEADD_GASGAUGE	0x00C0
 #define TWL6030_BASEADD_PIH		0x00D0
 #define TWL6030_BASEADD_CHARGER		0x00E0
+#define TWL6025_BASEADD_CHARGER		0x00DA
 
 /* subchip/slave 2 0x4A - DFT */
 #define TWL6030_BASEADD_DIEID		0x00C0
@@ -331,6 +332,7 @@ static struct twl_mapping twl6030_map[] = {
 
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_RTC },
 	{ SUB_CHIP_ID0, TWL6030_BASEADD_MEM },
+	{ SUB_CHIP_ID1, TWL6025_BASEADD_CHARGER },
 };
 
 /*----------------------------------------------------------------------*/
@@ -604,7 +606,7 @@ static inline struct device *add_child(unsigned chip, const char *name,
 static struct device *
 add_regulator_linked(int num, struct regulator_init_data *pdata,
 		struct regulator_consumer_supply *consumers,
-		unsigned num_consumers)
+		unsigned num_consumers, unsigned long features)
 {
 	unsigned sub_chip_id;
 	/* regulator framework demands init_data ... */
@@ -616,6 +618,8 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 		pdata->num_consumer_supplies = num_consumers;
 	}
 
+	pdata->driver_data = (void *)features;
+
 	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
 	sub_chip_id = twl_map[TWL_MODULE_PM_MASTER].sid;
 	return add_numbered_child(sub_chip_id, "twl_reg", num,
@@ -623,9 +627,10 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 }
 
 static struct device *
-add_regulator(int num, struct regulator_init_data *pdata)
+add_regulator(int num, struct regulator_init_data *pdata,
+		unsigned long features)
 {
-	return add_regulator_linked(num, pdata, NULL, 0);
+	return add_regulator_linked(num, pdata, NULL, 0, features);
 }
 
 /*
@@ -705,17 +710,20 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			};
 
 			child = add_regulator_linked(TWL4030_REG_VUSB1V5,
-						      &usb_fixed, &usb1v5, 1);
+						      &usb_fixed, &usb1v5, 1,
+						      features);
 			if (IS_ERR(child))
 				return PTR_ERR(child);
 
 			child = add_regulator_linked(TWL4030_REG_VUSB1V8,
-						      &usb_fixed, &usb1v8, 1);
+						      &usb_fixed, &usb1v8, 1,
+						      features);
 			if (IS_ERR(child))
 				return PTR_ERR(child);
 
 			child = add_regulator_linked(TWL4030_REG_VUSB3V1,
-						      &usb_fixed, &usb3v1, 1);
+						      &usb_fixed, &usb3v1, 1,
+						      features);
 			if (IS_ERR(child))
 				return PTR_ERR(child);
 
@@ -740,9 +748,8 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	}
 	if (twl_has_usb() && pdata->usb && twl_class_is_6030()) {
 
-		static struct regulator_consumer_supply usb3v3 = {
-			.supply =	"vusb",
-		};
+		static struct regulator_consumer_supply usb3v3;
+		int regulator;
 
 		if (twl_has_regulator()) {
 			/* this is a template that gets copied */
@@ -755,12 +762,22 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 					| REGULATOR_CHANGE_STATUS,
 			};
 
-			child = add_regulator_linked(TWL6030_REG_VUSB,
-						      &usb_fixed, &usb3v3, 1);
+			if (features & TWL6025_SUBCLASS) {
+				usb3v3.supply =	"ldousb";
+				regulator = TWL6025_REG_LDOUSB;
+			} else {
+				usb3v3.supply = "vusb";
+				regulator = TWL6030_REG_VUSB;
+			}
+			child = add_regulator_linked(regulator, &usb_fixed,
+							&usb3v3, 1,
+							features);
 			if (IS_ERR(child))
 				return PTR_ERR(child);
 		}
 
+		pdata->usb->features = features;
+
 		child = add_child(0, "twl6030_usb",
 			pdata->usb, sizeof(*pdata->usb),
 			true,
@@ -773,7 +790,16 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		/* we need to connect regulators to this transceiver */
 		if (twl_has_regulator() && child)
 			usb3v3.dev = child;
+	} else if (twl_has_regulator() && twl_class_is_6030()) {
+		if (features & TWL6025_SUBCLASS)
+			child = add_regulator(TWL6025_REG_LDOUSB,
+						pdata->ldousb, features);
+		else
+			child = add_regulator(TWL6030_REG_VUSB,
+						pdata->vusb, features);
 
+			if (IS_ERR(child))
+					return PTR_ERR(child);
 	}
 
 	if (twl_has_watchdog() && twl_class_is_4030()) {
@@ -810,46 +836,55 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 
 	/* twl4030 regulators */
 	if (twl_has_regulator() && twl_class_is_4030()) {
-		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
+		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VIO, pdata->vio);
+		child = add_regulator(TWL4030_REG_VIO, pdata->vio,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VDD1, pdata->vdd1);
+		child = add_regulator(TWL4030_REG_VDD1, pdata->vdd1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VDD2, pdata->vdd2);
+		child = add_regulator(TWL4030_REG_VDD2, pdata->vdd2,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
+		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac);
+		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
 		child = add_regulator((features & TWL4030_VAUX2)
 					? TWL4030_REG_VAUX2_4030
 					: TWL4030_REG_VAUX2,
-				pdata->vaux2);
+				pdata->vaux2, features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VINTANA1, pdata->vintana1);
+		child = add_regulator(TWL4030_REG_VINTANA1, pdata->vintana1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VINTANA2, pdata->vintana2);
+		child = add_regulator(TWL4030_REG_VINTANA2, pdata->vintana2,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VINTDIG, pdata->vintdig);
+		child = add_regulator(TWL4030_REG_VINTDIG, pdata->vintdig,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
@@ -857,72 +892,152 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	/* maybe add LDOs that are omitted on cost-reduced parts */
 	if (twl_has_regulator() && !(features & TPS_SUBSET)
 	  && twl_class_is_4030()) {
-		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
+		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
+		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim);
+		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1);
+		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3);
+		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4);
+		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 	}
 
 	/* twl6030 regulators */
+	if (twl_has_regulator() && twl_class_is_6030() &&
+			!(features & TWL6025_SUBCLASS)) {
+		child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VPP, pdata->vpp,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VUSIM, pdata->vusim,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VCXIO, pdata->vcxio,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VDAC, pdata->vdac,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX1_6030, pdata->vaux1,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX2_6030, pdata->vaux2,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_VAUX3_6030, pdata->vaux3,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL6030_REG_CLK32KG, pdata->clk32kg,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* 6030 and 6025 share this regulator */
 	if (twl_has_regulator() && twl_class_is_6030()) {
-		child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc);
+		child = add_regulator(TWL6030_REG_VANA, pdata->vana,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* twl6025 regulators */
+	if (twl_has_regulator() && twl_class_is_6030() &&
+			(features & TWL6025_SUBCLASS)) {
+		child = add_regulator(TWL6025_REG_LDO5, pdata->ldo5,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VPP, pdata->vpp);
+		child = add_regulator(TWL6025_REG_LDO1, pdata->ldo1,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VUSIM, pdata->vusim);
+		child = add_regulator(TWL6025_REG_LDO7, pdata->ldo7,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VANA, pdata->vana);
+		child = add_regulator(TWL6025_REG_LDO6, pdata->ldo6,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VCXIO, pdata->vcxio);
+		child = add_regulator(TWL6025_REG_LDOLN, pdata->ldoln,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VDAC, pdata->vdac);
+		child = add_regulator(TWL6025_REG_LDO2, pdata->ldo2,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VAUX1_6030, pdata->vaux1);
+		child = add_regulator(TWL6025_REG_LDO4, pdata->ldo4,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VAUX2_6030, pdata->vaux2);
+		child = add_regulator(TWL6025_REG_LDO3, pdata->ldo3,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VAUX3_6030, pdata->vaux3);
+		child = add_regulator(TWL6025_REG_SMPS3, pdata->smps3,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_CLK32KG, pdata->clk32kg);
+		child = add_regulator(TWL6025_REG_SMPS4, pdata->smps4,
+					features);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
+
+		child = add_regulator(TWL6025_REG_VIO, pdata->vio6025,
+					features);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
 	}
 
 	if (twl_has_bci() && pdata->bci &&
@@ -1170,6 +1285,7 @@ static const struct i2c_device_id twl_ids[] = {
 	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
 	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
 	{ "twl6030", TWL6030_CLASS },	/* "Phoenix power chip" */
+	{ "twl6025", TWL6030_CLASS | TWL6025_SUBCLASS }, /* "Phoenix lite" */
 	{ /* end of list */ },
 };
 MODULE_DEVICE_TABLE(i2c, twl_ids);
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 314218e79c4a..d70e704cbbb6 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -170,6 +170,8 @@ static inline int twl_class_is_ ##class(void)	\
 TWL_CLASS_IS(4030, TWL4030_CLASS_ID)
 TWL_CLASS_IS(6030, TWL6030_CLASS_ID)
 
+#define TWL6025_SUBCLASS	BIT(4)  /* TWL6025 has changed registers */
+
 /*
  * Read and write single 8-bit registers
  */
@@ -608,6 +610,7 @@ enum twl4030_usb_mode {
 
 struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
+	unsigned long		features;
 
 	int		(*phy_init)(struct device *dev);
 	int		(*phy_exit)(struct device *dev);
@@ -714,6 +717,20 @@ struct twl4030_platform_data {
 	struct regulator_init_data              *vcxio;
 	struct regulator_init_data              *vusb;
 	struct regulator_init_data		*clk32kg;
+	/* TWL6025 LDO regulators */
+	struct regulator_init_data		*ldo1;
+	struct regulator_init_data		*ldo2;
+	struct regulator_init_data		*ldo3;
+	struct regulator_init_data		*ldo4;
+	struct regulator_init_data		*ldo5;
+	struct regulator_init_data		*ldo6;
+	struct regulator_init_data		*ldo7;
+	struct regulator_init_data		*ldoln;
+	struct regulator_init_data		*ldousb;
+	/* TWL6025 DCDC regulators */
+	struct regulator_init_data		*smps3;
+	struct regulator_init_data		*smps4;
+	struct regulator_init_data		*vio6025;
 };
 
 /*----------------------------------------------------------------------*/
@@ -795,4 +812,21 @@ static inline int twl4030charger_usb_en(int enable) { return 0; }
 #define TWL6030_REG_VRTC	47
 #define TWL6030_REG_CLK32KG	48
 
+/* LDOs on 6025 have different names */
+#define TWL6025_REG_LDO2	49
+#define TWL6025_REG_LDO4	50
+#define TWL6025_REG_LDO3	51
+#define TWL6025_REG_LDO5	52
+#define TWL6025_REG_LDO1	53
+#define TWL6025_REG_LDO7	54
+#define TWL6025_REG_LDO6	55
+#define TWL6025_REG_LDOLN	56
+#define TWL6025_REG_LDOUSB	57
+
+/* 6025 DCDC supplies */
+#define TWL6025_REG_SMPS3	58
+#define TWL6025_REG_SMPS4	59
+#define TWL6025_REG_VIO		60
+
+
 #endif /* End of __TWL4030_H */
-- 
cgit v1.2.3


From 6523b148b44be38d89c2ee9865d34da30d9f5f1c Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Thu, 12 May 2011 14:27:56 +0100
Subject: mfd: Fix twl6030 irq definitions

The charger fault IRQs from the twl will in future patches be handled
by a seperate IRQ handler in the charger driver than the general charger
IRQ. Give them different IRQ numbers now to allow the charger driver to
be merged in the future.

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl6030-irq.c | 4 ++--
 include/linux/i2c/twl.h   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index dfbae34e1804..eb3b5f88e566 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c
@@ -76,8 +76,8 @@ static int twl6030_interrupt_mapping[24] = {
 	USBOTG_INTR_OFFSET,	/* Bit 18	ID			*/
 	USB_PRES_INTR_OFFSET,	/* Bit 19	VBUS			*/
 	CHARGER_INTR_OFFSET,	/* Bit 20	CHRG_CTRL		*/
-	CHARGER_INTR_OFFSET,	/* Bit 21	EXT_CHRG		*/
-	CHARGER_INTR_OFFSET,	/* Bit 22	INT_CHRG		*/
+	CHARGERFAULT_INTR_OFFSET,	/* Bit 21	EXT_CHRG	*/
+	CHARGERFAULT_INTR_OFFSET,	/* Bit 22	INT_CHRG	*/
 	RSV_INTR_OFFSET,	/* Bit 23	Reserved		*/
 };
 /*----------------------------------------------------------------------*/
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index d70e704cbbb6..ba4f88624fcd 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -91,6 +91,7 @@
 #define BCI_INTR_OFFSET		2
 #define MADC_INTR_OFFSET	3
 #define USB_INTR_OFFSET		4
+#define CHARGERFAULT_INTR_OFFSET 5
 #define BCI_PRES_INTR_OFFSET	9
 #define USB_PRES_INTR_OFFSET	10
 #define RTC_INTR_OFFSET		11
-- 
cgit v1.2.3


From 863dde5bfa3c48f459c6302daf64f94a11c7c1e5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 8 May 2011 00:54:45 +0200
Subject: mfd: Provide ab8500-core enumerators for chip cuts

Since functionality in MFD cells may need to be adjusted according to
chip revision, let's enumerate them and keep track of them.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-core.c  | 32 ++++++++++++++++----------------
 include/linux/mfd/abx500.h |  7 +++++++
 2 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 67d01c938284..fc0c1af1566e 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -254,8 +254,9 @@ static void ab8500_irq_sync_unlock(struct irq_data *data)
 		if (new == old)
 			continue;
 
-		/* Interrupt register 12 does'nt exist prior to version 0x20 */
-		if (ab8500_irq_regoffset[i] == 11 && ab8500->chip_id < 0x20)
+		/* Interrupt register 12 doesn't exist prior to version 2.0 */
+		if (ab8500_irq_regoffset[i] == 11 &&
+			ab8500->chip_id < AB8500_CUT2P0)
 			continue;
 
 		ab8500->oldmask[i] = new;
@@ -307,8 +308,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 		int status;
 		u8 value;
 
-		/* Interrupt register 12 does'nt exist prior to version 0x20 */
-		if (regoffset == 11 && ab8500->chip_id < 0x20)
+		/* Interrupt register 12 doesn't exist prior to version 2.0 */
+		if (regoffset == 11 && ab8500->chip_id < AB8500_CUT2P0)
 			continue;
 
 		status = get_register_interruptible(ab8500, AB8500_INTERRUPT,
@@ -724,17 +725,15 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
 	if (ret < 0)
 		return ret;
 
-	/*
-	 * 0x0 - Early Drop
-	 * 0x10 - Cut 1.0
-	 * 0x11 - Cut 1.1
-	 * 0x20 - Cut 2.0
-	 * 0x30 - Cut 3.0
-	 */
-	if (value == 0x0 || value == 0x10 || value == 0x11 || value == 0x20 ||
-		value == 0x30) {
+	switch (value) {
+	case AB8500_CUTEARLY:
+	case AB8500_CUT1P0:
+	case AB8500_CUT1P1:
+	case AB8500_CUT2P0:
+	case AB8500_CUT3P0:
 		dev_info(ab8500->dev, "detected chip, revision: %#x\n", value);
-	} else {
+		break;
+	default:
 		dev_err(ab8500->dev, "unknown chip, revision: %#x\n", value);
 		return -EINVAL;
 	}
@@ -763,8 +762,9 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
 
 	/* Clear and mask all interrupts */
 	for (i = 0; i < AB8500_NUM_IRQ_REGS; i++) {
-		/* Interrupt register 12 does'nt exist prior to version 0x20 */
-		if (ab8500_irq_regoffset[i] == 11 && ab8500->chip_id < 0x20)
+		/* Interrupt register 12 doesn't exist prior to version 2.0 */
+		if (ab8500_irq_regoffset[i] == 11 &&
+			ab8500->chip_id < AB8500_CUT2P0)
 			continue;
 
 		get_register_interruptible(ab8500, AB8500_INTERRUPT,
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 67bd6f7ecf32..896b5e47f16e 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -34,6 +34,13 @@
 #define AB5500_2_0	0x21
 #define AB5500_2_1	0x22
 
+/* AB8500 CIDs*/
+#define AB8500_CUTEARLY	0x00
+#define AB8500_CUT1P0	0x10
+#define AB8500_CUT1P1	0x11
+#define AB8500_CUT2P0	0x20
+#define AB8500_CUT3P0	0x30
+
 /*
  * AB3100, EVENTA1, A2 and A3 event register flags
  * these are catenated into a single 32-bit flag in the code
-- 
cgit v1.2.3


From 4a7c00cd94d4ca7061c481fe823a256e37436044 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 10 May 2011 08:59:23 +0300
Subject: mfd: Update twl4030-code maintainer e-mail address

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-codec.c       | 4 ++--
 include/linux/mfd/twl4030-codec.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
index 315b5ead8437..2bf4136464c1 100644
--- a/drivers/mfd/twl4030-codec.c
+++ b/drivers/mfd/twl4030-codec.c
@@ -1,7 +1,7 @@
 /*
  * MFD driver for twl4030 codec submodule
  *
- * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
  *
  * Copyright:   (C) 2009 Nokia Corporation
  *
@@ -272,6 +272,6 @@ static void __devexit twl4030_codec_exit(void)
 }
 module_exit(twl4030_codec_exit);
 
-MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
 MODULE_LICENSE("GPL");
 
diff --git a/include/linux/mfd/twl4030-codec.h b/include/linux/mfd/twl4030-codec.h
index 2ec317c68e59..5cc16bbd1da1 100644
--- a/include/linux/mfd/twl4030-codec.h
+++ b/include/linux/mfd/twl4030-codec.h
@@ -1,7 +1,7 @@
 /*
  * MFD driver for twl4030 codec submodule
  *
- * Author:	Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
  *
  * Copyright:   (C) 2009 Nokia Corporation
  *
-- 
cgit v1.2.3


From 7d9e7e9fbd3041a0596394579d800788bbf94939 Mon Sep 17 00:00:00 2001
From: Paul Parsons <lost.distance@yahoo.com>
Date: Fri, 13 May 2011 18:52:56 +0000
Subject: leds: Add ASIC3 LED support

Add LED support for the HTC ASIC3. Underlying support is provided by the mfd/asic3 and leds/leds-asic3 drivers. An example configuration is provided by the pxa/hx4700 platform.

Signed-off-by: Paul Parsons <lost.distance@yahoo.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/leds/Kconfig      |  10 +++
 drivers/leds/Makefile     |   1 +
 drivers/leds/leds-asic3.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/asic3.h |  22 ++++++-
 4 files changed, 195 insertions(+), 3 deletions(-)
 create mode 100644 drivers/leds/leds-asic3.c

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1d027b475b22..23f0d5e99f35 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -389,6 +389,16 @@ config LEDS_NETXBIG
 	  and 5Big Network v2 boards. The LEDs are wired to a CPLD and are
 	  controlled through a GPIO extension bus.
 
+config LEDS_ASIC3
+	bool "LED support for the HTC ASIC3"
+	depends on MFD_ASIC3
+	default y
+	help
+	  This option enables support for the LEDs on the HTC ASIC3. The HTC
+	  ASIC3 LED GPIOs are inputs, not outputs, thus the leds-gpio driver
+	  cannot be used. This driver supports hardware blinking with an on+off
+	  period from 62ms to 125s. Say Y to enable LEDs on the HP iPAQ hx4700.
+
 config LEDS_TRIGGERS
 	bool "LED Trigger support"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index bccb96c9bb45..bbfd2e367dc0 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_LEDS_DELL_NETBOOKS)	+= dell-led.o
 obj-$(CONFIG_LEDS_MC13783)		+= leds-mc13783.o
 obj-$(CONFIG_LEDS_NS2)			+= leds-ns2.o
 obj-$(CONFIG_LEDS_NETXBIG)		+= leds-netxbig.o
+obj-$(CONFIG_LEDS_ASIC3)		+= leds-asic3.o
 
 # LED SPI Drivers
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o
diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c
new file mode 100644
index 000000000000..22f847c890c9
--- /dev/null
+++ b/drivers/leds/leds-asic3.c
@@ -0,0 +1,165 @@
+/*
+ *  Copyright (C) 2011 Paul Parsons <lost.distance@yahoo.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/slab.h>
+
+#include <linux/mfd/asic3.h>
+#include <linux/mfd/core.h>
+
+/*
+ *	The HTC ASIC3 LED GPIOs are inputs, not outputs.
+ *	Hence we turn the LEDs on/off via the TimeBase register.
+ */
+
+/*
+ *	When TimeBase is 4 the clock resolution is about 32Hz.
+ *	This driver supports hardware blinking with an on+off
+ *	period from 62ms (2 clocks) to 125s (4000 clocks).
+ */
+#define MS_TO_CLK(ms)	DIV_ROUND_CLOSEST(((ms)*1024), 32000)
+#define CLK_TO_MS(clk)	(((clk)*32000)/1024)
+#define MAX_CLK		4000            /* Fits into 12-bit Time registers */
+#define MAX_MS		CLK_TO_MS(MAX_CLK)
+
+static const unsigned int led_n_base[ASIC3_NUM_LEDS] = {
+	[0] = ASIC3_LED_0_Base,
+	[1] = ASIC3_LED_1_Base,
+	[2] = ASIC3_LED_2_Base,
+};
+
+static void brightness_set(struct led_classdev *cdev,
+	enum led_brightness value)
+{
+	struct platform_device *pdev = to_platform_device(cdev->dev->parent);
+	const struct mfd_cell *cell = mfd_get_cell(pdev);
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+	u32 timebase;
+	unsigned int base;
+
+	timebase = (value == LED_OFF) ? 0 : (LED_EN|0x4);
+
+	base = led_n_base[cell->id];
+	asic3_write_register(asic, (base + ASIC3_LED_PeriodTime), 32);
+	asic3_write_register(asic, (base + ASIC3_LED_DutyTime), 32);
+	asic3_write_register(asic, (base + ASIC3_LED_AutoStopCount), 0);
+	asic3_write_register(asic, (base + ASIC3_LED_TimeBase), timebase);
+}
+
+static int blink_set(struct led_classdev *cdev,
+	unsigned long *delay_on,
+	unsigned long *delay_off)
+{
+	struct platform_device *pdev = to_platform_device(cdev->dev->parent);
+	const struct mfd_cell *cell = mfd_get_cell(pdev);
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+	u32 on;
+	u32 off;
+	unsigned int base;
+
+	if (*delay_on > MAX_MS || *delay_off > MAX_MS)
+		return -EINVAL;
+
+	if (*delay_on == 0 && *delay_off == 0) {
+		/* If both are zero then a sensible default should be chosen */
+		on = MS_TO_CLK(500);
+		off = MS_TO_CLK(500);
+	} else {
+		on = MS_TO_CLK(*delay_on);
+		off = MS_TO_CLK(*delay_off);
+		if ((on + off) > MAX_CLK)
+			return -EINVAL;
+	}
+
+	base = led_n_base[cell->id];
+	asic3_write_register(asic, (base + ASIC3_LED_PeriodTime), (on + off));
+	asic3_write_register(asic, (base + ASIC3_LED_DutyTime), on);
+	asic3_write_register(asic, (base + ASIC3_LED_AutoStopCount), 0);
+	asic3_write_register(asic, (base + ASIC3_LED_TimeBase), (LED_EN|0x4));
+
+	*delay_on = CLK_TO_MS(on);
+	*delay_off = CLK_TO_MS(off);
+
+	return 0;
+}
+
+static int __devinit asic3_led_probe(struct platform_device *pdev)
+{
+	struct asic3_led *led = pdev->dev.platform_data;
+	int ret;
+
+	ret = mfd_cell_enable(pdev);
+	if (ret < 0)
+		goto ret0;
+
+	led->cdev = kzalloc(sizeof(struct led_classdev), GFP_KERNEL);
+	if (!led->cdev) {
+		ret = -ENOMEM;
+		goto ret1;
+	}
+
+	led->cdev->name = led->name;
+	led->cdev->default_trigger = led->default_trigger;
+	led->cdev->brightness_set = brightness_set;
+	led->cdev->blink_set = blink_set;
+
+	ret = led_classdev_register(&pdev->dev, led->cdev);
+	if (ret < 0)
+		goto ret2;
+
+	return 0;
+
+ret2:
+	kfree(led->cdev);
+ret1:
+	(void) mfd_cell_disable(pdev);
+ret0:
+	return ret;
+}
+
+static int __devexit asic3_led_remove(struct platform_device *pdev)
+{
+	struct asic3_led *led = pdev->dev.platform_data;
+
+	led_classdev_unregister(led->cdev);
+
+	kfree(led->cdev);
+
+	return mfd_cell_disable(pdev);
+}
+
+static struct platform_driver asic3_led_driver = {
+	.probe		= asic3_led_probe,
+	.remove		= __devexit_p(asic3_led_remove),
+	.driver		= {
+		.name	= "leds-asic3",
+		.owner	= THIS_MODULE,
+	},
+};
+
+MODULE_ALIAS("platform:leds-asic3");
+
+static int __init asic3_led_init(void)
+{
+	return platform_driver_register(&asic3_led_driver);
+}
+
+static void __exit asic3_led_exit(void)
+{
+	platform_driver_unregister(&asic3_led_driver);
+}
+
+module_init(asic3_led_init);
+module_exit(asic3_led_exit);
+
+MODULE_AUTHOR("Paul Parsons <lost.distance@yahoo.com>");
+MODULE_DESCRIPTION("HTC ASIC3 LED driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index de3c4ad19afb..d0dd3ebd8488 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -16,6 +16,13 @@
 
 #include <linux/types.h>
 
+struct led_classdev;
+struct asic3_led {
+	const char	*name;
+	const char	*default_trigger;
+	struct led_classdev *cdev;
+};
+
 struct asic3_platform_data {
 	u16 *gpio_config;
 	unsigned int gpio_config_num;
@@ -23,6 +30,8 @@ struct asic3_platform_data {
 	unsigned int irq_base;
 
 	unsigned int gpio_base;
+
+	struct asic3_led *leds;
 };
 
 #define ASIC3_NUM_GPIO_BANKS	4
@@ -111,9 +120,9 @@ struct asic3_platform_data {
 #define ASIC3_GPIOA11_PWM0		ASIC3_CONFIG_GPIO(11, 1, 1, 0)
 #define ASIC3_GPIOA12_PWM1		ASIC3_CONFIG_GPIO(12, 1, 1, 0)
 #define ASIC3_GPIOA15_CONTROL_CX	ASIC3_CONFIG_GPIO(15, 1, 1, 0)
-#define ASIC3_GPIOC0_LED0		ASIC3_CONFIG_GPIO(32, 1, 1, 0)
-#define ASIC3_GPIOC1_LED1		ASIC3_CONFIG_GPIO(33, 1, 1, 0)
-#define ASIC3_GPIOC2_LED2		ASIC3_CONFIG_GPIO(34, 1, 1, 0)
+#define ASIC3_GPIOC0_LED0		ASIC3_CONFIG_GPIO(32, 1, 0, 0)
+#define ASIC3_GPIOC1_LED1		ASIC3_CONFIG_GPIO(33, 1, 0, 0)
+#define ASIC3_GPIOC2_LED2		ASIC3_CONFIG_GPIO(34, 1, 0, 0)
 #define ASIC3_GPIOC3_SPI_RXD		ASIC3_CONFIG_GPIO(35, 1, 0, 0)
 #define ASIC3_GPIOC4_CF_nCD		ASIC3_CONFIG_GPIO(36, 1, 0, 0)
 #define ASIC3_GPIOC4_SPI_TXD		ASIC3_CONFIG_GPIO(36, 1, 1, 0)
@@ -152,6 +161,7 @@ struct asic3_platform_data {
 #define PWM_TIMEBASE_VALUE(x)    ((x)&0xf)   /* Low 4 bits sets time base */
 #define PWM_TIMEBASE_ENABLE     (1 << 4)   /* Enable clock */
 
+#define ASIC3_NUM_LEDS                  3
 #define ASIC3_LED_0_Base                0x0700
 #define ASIC3_LED_1_Base                0x0800
 #define ASIC3_LED_2_Base 		      0x0900
@@ -293,4 +303,10 @@ struct asic3_platform_data {
 #define ASIC3_MAP_SIZE_32BIT	0x2000
 #define ASIC3_MAP_SIZE_16BIT	0x1000
 
+/* Functions needed by leds-asic3 */
+
+struct asic3;
+extern void asic3_write_register(struct asic3 *asic, unsigned int reg, u32 val);
+extern u32 asic3_read_register(struct asic3 *asic, unsigned int reg);
+
 #endif /* __ASIC3_H__ */
-- 
cgit v1.2.3


From 74e32d1b68f177f9c998041d789253df9c7f3575 Mon Sep 17 00:00:00 2001
From: Paul Parsons <lost.distance@yahoo.com>
Date: Sun, 15 May 2011 14:13:11 +0000
Subject: mfd: Fix ASIC3 SD Host Controller Configuration size

The size of the TC6380AF SD Host Controller Configuration area is 0x200 bytes (assuming registers are aligned on 32-bit boundaries), not 0x400 bytes. Source: Toshiba TC6380AF Specification sections 4.2 and 4.3.1

Signed-off-by: Paul Parsons <lost.distance@yahoo.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/asic3.c       | 3 ++-
 include/linux/mfd/asic3.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 52e56ea43a85..c27fd1fc3b86 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -856,7 +856,8 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 	/* MMC */
 	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
-				 mem_sdio->start, 0x400 >> asic->bus_shift);
+				 mem_sdio->start,
+				 ASIC3_SD_CONFIG_SIZE >> asic->bus_shift);
 	if (!asic->tmio_cnf) {
 		ret = -ENOMEM;
 		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index d0dd3ebd8488..ed793b77a1c5 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -297,6 +297,7 @@ struct asic3_platform_data {
  *
  *****************************************************************************/
 #define ASIC3_SD_CONFIG_BASE	0x0400 /* Assumes 32 bit addressing */
+#define ASIC3_SD_CONFIG_SIZE	0x0200 /* Assumes 32 bit addressing */
 #define ASIC3_SD_CTRL_BASE	0x1000
 #define ASIC3_SDIO_CTRL_BASE	0x1200
 
-- 
cgit v1.2.3


From 39325b59d88b42ba2ccf2e62c234059e9941a47c Mon Sep 17 00:00:00 2001
From: Trilok Soni <tsoni@codeaurora.org>
Date: Thu, 19 May 2011 10:54:04 +0530
Subject: input: Add Qualcomm pm8xxx keypad controller driver

Add Qualcomm PMIC8XXX based keypad controller driver
supporting upto 18x8 matrix configuration.

Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Trilok Soni <tsoni@codeaurora.org>
Signed-off-by: Anirudh Ghayal <aghayal@codeaurora.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/keyboard/Kconfig           |  11 +
 drivers/input/keyboard/Makefile          |   1 +
 drivers/input/keyboard/pmic8xxx-keypad.c | 799 +++++++++++++++++++++++++++++++
 include/linux/input/pmic8xxx-keypad.h    |  52 ++
 4 files changed, 863 insertions(+)
 create mode 100644 drivers/input/keyboard/pmic8xxx-keypad.c
 create mode 100644 include/linux/input/pmic8xxx-keypad.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 69badb4e06aa..b4dee9d5a055 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -412,6 +412,17 @@ config KEYBOARD_PXA930_ROTARY
 	  To compile this driver as a module, choose M here: the
 	  module will be called pxa930_rotary.
 
+config KEYBOARD_PMIC8XXX
+	tristate "Qualcomm PMIC8XXX keypad support"
+	depends on MFD_PM8XXX
+	help
+	  Say Y here if you want to enable the driver for the PMIC8XXX
+	  keypad provided as a reference design from Qualcomm. This is intended
+	  to support upto 18x8 matrix based keypad design.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called pmic8xxx-keypad.
+
 config KEYBOARD_SAMSUNG
 	tristate "Samsung keypad support"
 	depends on SAMSUNG_DEV_KEYPAD
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index c49cf8e04cd7..ddde0fd476f7 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_KEYBOARD_NOMADIK)		+= nomadik-ske-keypad.o
 obj-$(CONFIG_KEYBOARD_OMAP)		+= omap-keypad.o
 obj-$(CONFIG_KEYBOARD_OMAP4)		+= omap4-keypad.o
 obj-$(CONFIG_KEYBOARD_OPENCORES)	+= opencores-kbd.o
+obj-$(CONFIG_KEYBOARD_PMIC8XXX)		+= pmic8xxx-keypad.o
 obj-$(CONFIG_KEYBOARD_PXA27x)		+= pxa27x_keypad.o
 obj-$(CONFIG_KEYBOARD_PXA930_ROTARY)	+= pxa930_rotary.o
 obj-$(CONFIG_KEYBOARD_QT1070)           += qt1070.o
diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c
new file mode 100644
index 000000000000..40b02ae96f86
--- /dev/null
+++ b/drivers/input/keyboard/pmic8xxx-keypad.c
@@ -0,0 +1,799 @@
+/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+
+#include <linux/mfd/pm8xxx/core.h>
+#include <linux/mfd/pm8xxx/gpio.h>
+#include <linux/input/pmic8xxx-keypad.h>
+
+#define PM8XXX_MAX_ROWS		18
+#define PM8XXX_MAX_COLS		8
+#define PM8XXX_ROW_SHIFT	3
+#define PM8XXX_MATRIX_MAX_SIZE	(PM8XXX_MAX_ROWS * PM8XXX_MAX_COLS)
+
+#define PM8XXX_MIN_ROWS		5
+#define PM8XXX_MIN_COLS		5
+
+#define MAX_SCAN_DELAY		128
+#define MIN_SCAN_DELAY		1
+
+/* in nanoseconds */
+#define MAX_ROW_HOLD_DELAY	122000
+#define MIN_ROW_HOLD_DELAY	30500
+
+#define MAX_DEBOUNCE_TIME	20
+#define MIN_DEBOUNCE_TIME	5
+
+#define KEYP_CTRL			0x148
+
+#define KEYP_CTRL_EVNTS			BIT(0)
+#define KEYP_CTRL_EVNTS_MASK		0x3
+
+#define KEYP_CTRL_SCAN_COLS_SHIFT	5
+#define KEYP_CTRL_SCAN_COLS_MIN		5
+#define KEYP_CTRL_SCAN_COLS_BITS	0x3
+
+#define KEYP_CTRL_SCAN_ROWS_SHIFT	2
+#define KEYP_CTRL_SCAN_ROWS_MIN		5
+#define KEYP_CTRL_SCAN_ROWS_BITS	0x7
+
+#define KEYP_CTRL_KEYP_EN		BIT(7)
+
+#define KEYP_SCAN			0x149
+
+#define KEYP_SCAN_READ_STATE		BIT(0)
+#define KEYP_SCAN_DBOUNCE_SHIFT		1
+#define KEYP_SCAN_PAUSE_SHIFT		3
+#define KEYP_SCAN_ROW_HOLD_SHIFT	6
+
+#define KEYP_TEST			0x14A
+
+#define KEYP_TEST_CLEAR_RECENT_SCAN	BIT(6)
+#define KEYP_TEST_CLEAR_OLD_SCAN	BIT(5)
+#define KEYP_TEST_READ_RESET		BIT(4)
+#define KEYP_TEST_DTEST_EN		BIT(3)
+#define KEYP_TEST_ABORT_READ		BIT(0)
+
+#define KEYP_TEST_DBG_SELECT_SHIFT	1
+
+/* bits of these registers represent
+ * '0' for key press
+ * '1' for key release
+ */
+#define KEYP_RECENT_DATA		0x14B
+#define KEYP_OLD_DATA			0x14C
+
+#define KEYP_CLOCK_FREQ			32768
+
+/**
+ * struct pmic8xxx_kp - internal keypad data structure
+ * @pdata - keypad platform data pointer
+ * @input - input device pointer for keypad
+ * @key_sense_irq - key press/release irq number
+ * @key_stuck_irq - key stuck notification irq number
+ * @keycodes - array to hold the key codes
+ * @dev - parent device pointer
+ * @keystate - present key press/release state
+ * @stuckstate - present state when key stuck irq
+ * @ctrl_reg - control register value
+ */
+struct pmic8xxx_kp {
+	const struct pm8xxx_keypad_platform_data *pdata;
+	struct input_dev *input;
+	int key_sense_irq;
+	int key_stuck_irq;
+
+	unsigned short keycodes[PM8XXX_MATRIX_MAX_SIZE];
+
+	struct device *dev;
+	u16 keystate[PM8XXX_MAX_ROWS];
+	u16 stuckstate[PM8XXX_MAX_ROWS];
+
+	u8 ctrl_reg;
+};
+
+static int pmic8xxx_kp_write_u8(struct pmic8xxx_kp *kp,
+				 u8 data, u16 reg)
+{
+	int rc;
+
+	rc = pm8xxx_writeb(kp->dev->parent, reg, data);
+	return rc;
+}
+
+static int pmic8xxx_kp_read(struct pmic8xxx_kp *kp,
+				 u8 *data, u16 reg, unsigned num_bytes)
+{
+	int rc;
+
+	rc = pm8xxx_read_buf(kp->dev->parent, reg, data, num_bytes);
+	return rc;
+}
+
+static int pmic8xxx_kp_read_u8(struct pmic8xxx_kp *kp,
+				 u8 *data, u16 reg)
+{
+	int rc;
+
+	rc = pmic8xxx_kp_read(kp, data, reg, 1);
+	return rc;
+}
+
+static u8 pmic8xxx_col_state(struct pmic8xxx_kp *kp, u8 col)
+{
+	/* all keys pressed on that particular row? */
+	if (col == 0x00)
+		return 1 << kp->pdata->num_cols;
+	else
+		return col & ((1 << kp->pdata->num_cols) - 1);
+}
+
+/*
+ * Synchronous read protocol for RevB0 onwards:
+ *
+ * 1. Write '1' to ReadState bit in KEYP_SCAN register
+ * 2. Wait 2*32KHz clocks, so that HW can successfully enter read mode
+ *    synchronously
+ * 3. Read rows in old array first if events are more than one
+ * 4. Read rows in recent array
+ * 5. Wait 4*32KHz clocks
+ * 6. Write '0' to ReadState bit of KEYP_SCAN register so that hw can
+ *    synchronously exit read mode.
+ */
+static int pmic8xxx_chk_sync_read(struct pmic8xxx_kp *kp)
+{
+	int rc;
+	u8 scan_val;
+
+	rc = pmic8xxx_kp_read_u8(kp, &scan_val, KEYP_SCAN);
+	if (rc < 0) {
+		dev_err(kp->dev, "Error reading KEYP_SCAN reg, rc=%d\n", rc);
+		return rc;
+	}
+
+	scan_val |= 0x1;
+
+	rc = pmic8xxx_kp_write_u8(kp, scan_val, KEYP_SCAN);
+	if (rc < 0) {
+		dev_err(kp->dev, "Error writing KEYP_SCAN reg, rc=%d\n", rc);
+		return rc;
+	}
+
+	/* 2 * 32KHz clocks */
+	udelay((2 * DIV_ROUND_UP(USEC_PER_SEC, KEYP_CLOCK_FREQ)) + 1);
+
+	return rc;
+}
+
+static int pmic8xxx_kp_read_data(struct pmic8xxx_kp *kp, u16 *state,
+					u16 data_reg, int read_rows)
+{
+	int rc, row;
+	u8 new_data[PM8XXX_MAX_ROWS];
+
+	rc = pmic8xxx_kp_read(kp, new_data, data_reg, read_rows);
+	if (rc)
+		return rc;
+
+	for (row = 0; row < kp->pdata->num_rows; row++) {
+		dev_dbg(kp->dev, "new_data[%d] = %d\n", row,
+					new_data[row]);
+		state[row] = pmic8xxx_col_state(kp, new_data[row]);
+	}
+
+	return rc;
+}
+
+static int pmic8xxx_kp_read_matrix(struct pmic8xxx_kp *kp, u16 *new_state,
+					 u16 *old_state)
+{
+	int rc, read_rows;
+	u8 scan_val;
+
+	if (kp->pdata->num_rows < PM8XXX_MIN_ROWS)
+		read_rows = PM8XXX_MIN_ROWS;
+	else
+		read_rows = kp->pdata->num_rows;
+
+	pmic8xxx_chk_sync_read(kp);
+
+	if (old_state) {
+		rc = pmic8xxx_kp_read_data(kp, old_state, KEYP_OLD_DATA,
+						read_rows);
+		if (rc < 0) {
+			dev_err(kp->dev,
+				"Error reading KEYP_OLD_DATA, rc=%d\n", rc);
+			return rc;
+		}
+	}
+
+	rc = pmic8xxx_kp_read_data(kp, new_state, KEYP_RECENT_DATA,
+					 read_rows);
+	if (rc < 0) {
+		dev_err(kp->dev,
+			"Error reading KEYP_RECENT_DATA, rc=%d\n", rc);
+		return rc;
+	}
+
+	/* 4 * 32KHz clocks */
+	udelay((4 * DIV_ROUND_UP(USEC_PER_SEC, KEYP_CLOCK_FREQ)) + 1);
+
+	rc = pmic8xxx_kp_read_u8(kp, &scan_val, KEYP_SCAN);
+	if (rc < 0) {
+		dev_err(kp->dev, "Error reading KEYP_SCAN reg, rc=%d\n", rc);
+		return rc;
+	}
+
+	scan_val &= 0xFE;
+	rc = pmic8xxx_kp_write_u8(kp, scan_val, KEYP_SCAN);
+	if (rc < 0)
+		dev_err(kp->dev, "Error writing KEYP_SCAN reg, rc=%d\n", rc);
+
+	return rc;
+}
+
+static void __pmic8xxx_kp_scan_matrix(struct pmic8xxx_kp *kp, u16 *new_state,
+					 u16 *old_state)
+{
+	int row, col, code;
+
+	for (row = 0; row < kp->pdata->num_rows; row++) {
+		int bits_changed = new_state[row] ^ old_state[row];
+
+		if (!bits_changed)
+			continue;
+
+		for (col = 0; col < kp->pdata->num_cols; col++) {
+			if (!(bits_changed & (1 << col)))
+				continue;
+
+			dev_dbg(kp->dev, "key [%d:%d] %s\n", row, col,
+					!(new_state[row] & (1 << col)) ?
+					"pressed" : "released");
+
+			code = MATRIX_SCAN_CODE(row, col, PM8XXX_ROW_SHIFT);
+
+			input_event(kp->input, EV_MSC, MSC_SCAN, code);
+			input_report_key(kp->input,
+					kp->keycodes[code],
+					!(new_state[row] & (1 << col)));
+
+			input_sync(kp->input);
+		}
+	}
+}
+
+static bool pmic8xxx_detect_ghost_keys(struct pmic8xxx_kp *kp, u16 *new_state)
+{
+	int row, found_first = -1;
+	u16 check, row_state;
+
+	check = 0;
+	for (row = 0; row < kp->pdata->num_rows; row++) {
+		row_state = (~new_state[row]) &
+				 ((1 << kp->pdata->num_cols) - 1);
+
+		if (hweight16(row_state) > 1) {
+			if (found_first == -1)
+				found_first = row;
+			if (check & row_state) {
+				dev_dbg(kp->dev, "detected ghost key on row[%d]"
+					 " and row[%d]\n", found_first, row);
+				return true;
+			}
+		}
+		check |= row_state;
+	}
+	return false;
+}
+
+static int pmic8xxx_kp_scan_matrix(struct pmic8xxx_kp *kp, unsigned int events)
+{
+	u16 new_state[PM8XXX_MAX_ROWS];
+	u16 old_state[PM8XXX_MAX_ROWS];
+	int rc;
+
+	switch (events) {
+	case 0x1:
+		rc = pmic8xxx_kp_read_matrix(kp, new_state, NULL);
+		if (rc < 0)
+			return rc;
+
+		/* detecting ghost key is not an error */
+		if (pmic8xxx_detect_ghost_keys(kp, new_state))
+			return 0;
+		__pmic8xxx_kp_scan_matrix(kp, new_state, kp->keystate);
+		memcpy(kp->keystate, new_state, sizeof(new_state));
+	break;
+	case 0x3: /* two events - eventcounter is gray-coded */
+		rc = pmic8xxx_kp_read_matrix(kp, new_state, old_state);
+		if (rc < 0)
+			return rc;
+
+		__pmic8xxx_kp_scan_matrix(kp, old_state, kp->keystate);
+		__pmic8xxx_kp_scan_matrix(kp, new_state, old_state);
+		memcpy(kp->keystate, new_state, sizeof(new_state));
+	break;
+	case 0x2:
+		dev_dbg(kp->dev, "Some key events were lost\n");
+		rc = pmic8xxx_kp_read_matrix(kp, new_state, old_state);
+		if (rc < 0)
+			return rc;
+		__pmic8xxx_kp_scan_matrix(kp, old_state, kp->keystate);
+		__pmic8xxx_kp_scan_matrix(kp, new_state, old_state);
+		memcpy(kp->keystate, new_state, sizeof(new_state));
+	break;
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+/*
+ * NOTE: We are reading recent and old data registers blindly
+ * whenever key-stuck interrupt happens, because events counter doesn't
+ * get updated when this interrupt happens due to key stuck doesn't get
+ * considered as key state change.
+ *
+ * We are not using old data register contents after they are being read
+ * because it might report the key which was pressed before the key being stuck
+ * as stuck key because it's pressed status is stored in the old data
+ * register.
+ */
+static irqreturn_t pmic8xxx_kp_stuck_irq(int irq, void *data)
+{
+	u16 new_state[PM8XXX_MAX_ROWS];
+	u16 old_state[PM8XXX_MAX_ROWS];
+	int rc;
+	struct pmic8xxx_kp *kp = data;
+
+	rc = pmic8xxx_kp_read_matrix(kp, new_state, old_state);
+	if (rc < 0) {
+		dev_err(kp->dev, "failed to read keypad matrix\n");
+		return IRQ_HANDLED;
+	}
+
+	__pmic8xxx_kp_scan_matrix(kp, new_state, kp->stuckstate);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pmic8xxx_kp_irq(int irq, void *data)
+{
+	struct pmic8xxx_kp *kp = data;
+	u8 ctrl_val, events;
+	int rc;
+
+	rc = pmic8xxx_kp_read(kp, &ctrl_val, KEYP_CTRL, 1);
+	if (rc < 0) {
+		dev_err(kp->dev, "failed to read keyp_ctrl register\n");
+		return IRQ_HANDLED;
+	}
+
+	events = ctrl_val & KEYP_CTRL_EVNTS_MASK;
+
+	rc = pmic8xxx_kp_scan_matrix(kp, events);
+	if (rc < 0)
+		dev_err(kp->dev, "failed to scan matrix\n");
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit pmic8xxx_kpd_init(struct pmic8xxx_kp *kp)
+{
+	int bits, rc, cycles;
+	u8 scan_val = 0, ctrl_val = 0;
+	static const u8 row_bits[] = {
+		0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7,
+	};
+
+	/* Find column bits */
+	if (kp->pdata->num_cols < KEYP_CTRL_SCAN_COLS_MIN)
+		bits = 0;
+	else
+		bits = kp->pdata->num_cols - KEYP_CTRL_SCAN_COLS_MIN;
+	ctrl_val = (bits & KEYP_CTRL_SCAN_COLS_BITS) <<
+		KEYP_CTRL_SCAN_COLS_SHIFT;
+
+	/* Find row bits */
+	if (kp->pdata->num_rows < KEYP_CTRL_SCAN_ROWS_MIN)
+		bits = 0;
+	else
+		bits = row_bits[kp->pdata->num_rows - KEYP_CTRL_SCAN_ROWS_MIN];
+
+	ctrl_val |= (bits << KEYP_CTRL_SCAN_ROWS_SHIFT);
+
+	rc = pmic8xxx_kp_write_u8(kp, ctrl_val, KEYP_CTRL);
+	if (rc < 0) {
+		dev_err(kp->dev, "Error writing KEYP_CTRL reg, rc=%d\n", rc);
+		return rc;
+	}
+
+	bits = (kp->pdata->debounce_ms / 5) - 1;
+
+	scan_val |= (bits << KEYP_SCAN_DBOUNCE_SHIFT);
+
+	bits = fls(kp->pdata->scan_delay_ms) - 1;
+	scan_val |= (bits << KEYP_SCAN_PAUSE_SHIFT);
+
+	/* Row hold time is a multiple of 32KHz cycles. */
+	cycles = (kp->pdata->row_hold_ns * KEYP_CLOCK_FREQ) / NSEC_PER_SEC;
+
+	scan_val |= (cycles << KEYP_SCAN_ROW_HOLD_SHIFT);
+
+	rc = pmic8xxx_kp_write_u8(kp, scan_val, KEYP_SCAN);
+	if (rc)
+		dev_err(kp->dev, "Error writing KEYP_SCAN reg, rc=%d\n", rc);
+
+	return rc;
+
+}
+
+static int  __devinit pmic8xxx_kp_config_gpio(int gpio_start, int num_gpios,
+			struct pmic8xxx_kp *kp, struct pm_gpio *gpio_config)
+{
+	int	rc, i;
+
+	if (gpio_start < 0 || num_gpios < 0)
+		return -EINVAL;
+
+	for (i = 0; i < num_gpios; i++) {
+		rc = pm8xxx_gpio_config(gpio_start + i, gpio_config);
+		if (rc) {
+			dev_err(kp->dev, "%s: FAIL pm8xxx_gpio_config():"
+					"for PM GPIO [%d] rc=%d.\n",
+					__func__, gpio_start + i, rc);
+			return rc;
+		}
+	 }
+
+	return 0;
+}
+
+static int pmic8xxx_kp_enable(struct pmic8xxx_kp *kp)
+{
+	int rc;
+
+	kp->ctrl_reg |= KEYP_CTRL_KEYP_EN;
+
+	rc = pmic8xxx_kp_write_u8(kp, kp->ctrl_reg, KEYP_CTRL);
+	if (rc < 0)
+		dev_err(kp->dev, "Error writing KEYP_CTRL reg, rc=%d\n", rc);
+
+	return rc;
+}
+
+static int pmic8xxx_kp_disable(struct pmic8xxx_kp *kp)
+{
+	int rc;
+
+	kp->ctrl_reg &= ~KEYP_CTRL_KEYP_EN;
+
+	rc = pmic8xxx_kp_write_u8(kp, kp->ctrl_reg, KEYP_CTRL);
+	if (rc < 0)
+		return rc;
+
+	return rc;
+}
+
+static int pmic8xxx_kp_open(struct input_dev *dev)
+{
+	struct pmic8xxx_kp *kp = input_get_drvdata(dev);
+
+	return pmic8xxx_kp_enable(kp);
+}
+
+static void pmic8xxx_kp_close(struct input_dev *dev)
+{
+	struct pmic8xxx_kp *kp = input_get_drvdata(dev);
+
+	pmic8xxx_kp_disable(kp);
+}
+
+/*
+ * keypad controller should be initialized in the following sequence
+ * only, otherwise it might get into FSM stuck state.
+ *
+ * - Initialize keypad control parameters, like no. of rows, columns,
+ *   timing values etc.,
+ * - configure rows and column gpios pull up/down.
+ * - set irq edge type.
+ * - enable the keypad controller.
+ */
+static int __devinit pmic8xxx_kp_probe(struct platform_device *pdev)
+{
+	const struct pm8xxx_keypad_platform_data *pdata = mfd_get_data(pdev);
+	const struct matrix_keymap_data *keymap_data;
+	struct pmic8xxx_kp *kp;
+	int rc;
+	u8 ctrl_val;
+
+	struct pm_gpio kypd_drv = {
+		.direction	= PM_GPIO_DIR_OUT,
+		.output_buffer	= PM_GPIO_OUT_BUF_OPEN_DRAIN,
+		.output_value	= 0,
+		.pull		= PM_GPIO_PULL_NO,
+		.vin_sel	= PM_GPIO_VIN_S3,
+		.out_strength	= PM_GPIO_STRENGTH_LOW,
+		.function	= PM_GPIO_FUNC_1,
+		.inv_int_pol	= 1,
+	};
+
+	struct pm_gpio kypd_sns = {
+		.direction	= PM_GPIO_DIR_IN,
+		.pull		= PM_GPIO_PULL_UP_31P5,
+		.vin_sel	= PM_GPIO_VIN_S3,
+		.out_strength	= PM_GPIO_STRENGTH_NO,
+		.function	= PM_GPIO_FUNC_NORMAL,
+		.inv_int_pol	= 1,
+	};
+
+
+	if (!pdata || !pdata->num_cols || !pdata->num_rows ||
+		pdata->num_cols > PM8XXX_MAX_COLS ||
+		pdata->num_rows > PM8XXX_MAX_ROWS ||
+		pdata->num_cols < PM8XXX_MIN_COLS) {
+		dev_err(&pdev->dev, "invalid platform data\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->scan_delay_ms ||
+		pdata->scan_delay_ms > MAX_SCAN_DELAY ||
+		pdata->scan_delay_ms < MIN_SCAN_DELAY ||
+		!is_power_of_2(pdata->scan_delay_ms)) {
+		dev_err(&pdev->dev, "invalid keypad scan time supplied\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->row_hold_ns ||
+		pdata->row_hold_ns > MAX_ROW_HOLD_DELAY ||
+		pdata->row_hold_ns < MIN_ROW_HOLD_DELAY ||
+		((pdata->row_hold_ns % MIN_ROW_HOLD_DELAY) != 0)) {
+		dev_err(&pdev->dev, "invalid keypad row hold time supplied\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->debounce_ms ||
+		((pdata->debounce_ms % 5) != 0) ||
+		pdata->debounce_ms > MAX_DEBOUNCE_TIME ||
+		pdata->debounce_ms < MIN_DEBOUNCE_TIME) {
+		dev_err(&pdev->dev, "invalid debounce time supplied\n");
+		return -EINVAL;
+	}
+
+	keymap_data = pdata->keymap_data;
+	if (!keymap_data) {
+		dev_err(&pdev->dev, "no keymap data supplied\n");
+		return -EINVAL;
+	}
+
+	kp = kzalloc(sizeof(*kp), GFP_KERNEL);
+	if (!kp)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, kp);
+
+	kp->pdata	= pdata;
+	kp->dev		= &pdev->dev;
+
+	kp->input = input_allocate_device();
+	if (!kp->input) {
+		dev_err(&pdev->dev, "unable to allocate input device\n");
+		rc = -ENOMEM;
+		goto err_alloc_device;
+	}
+
+	kp->key_sense_irq = platform_get_irq(pdev, 0);
+	if (kp->key_sense_irq < 0) {
+		dev_err(&pdev->dev, "unable to get keypad sense irq\n");
+		rc = -ENXIO;
+		goto err_get_irq;
+	}
+
+	kp->key_stuck_irq = platform_get_irq(pdev, 1);
+	if (kp->key_stuck_irq < 0) {
+		dev_err(&pdev->dev, "unable to get keypad stuck irq\n");
+		rc = -ENXIO;
+		goto err_get_irq;
+	}
+
+	kp->input->name = pdata->input_name ? : "PMIC8XXX keypad";
+	kp->input->phys = pdata->input_phys_device ? : "pmic8xxx_keypad/input0";
+
+	kp->input->dev.parent	= &pdev->dev;
+
+	kp->input->id.bustype	= BUS_I2C;
+	kp->input->id.version	= 0x0001;
+	kp->input->id.product	= 0x0001;
+	kp->input->id.vendor	= 0x0001;
+
+	kp->input->evbit[0]	= BIT_MASK(EV_KEY);
+
+	if (pdata->rep)
+		__set_bit(EV_REP, kp->input->evbit);
+
+	kp->input->keycode	= kp->keycodes;
+	kp->input->keycodemax	= PM8XXX_MATRIX_MAX_SIZE;
+	kp->input->keycodesize	= sizeof(kp->keycodes);
+	kp->input->open		= pmic8xxx_kp_open;
+	kp->input->close	= pmic8xxx_kp_close;
+
+	matrix_keypad_build_keymap(keymap_data, PM8XXX_ROW_SHIFT,
+					kp->input->keycode, kp->input->keybit);
+
+	input_set_capability(kp->input, EV_MSC, MSC_SCAN);
+	input_set_drvdata(kp->input, kp);
+
+	/* initialize keypad state */
+	memset(kp->keystate, 0xff, sizeof(kp->keystate));
+	memset(kp->stuckstate, 0xff, sizeof(kp->stuckstate));
+
+	rc = pmic8xxx_kpd_init(kp);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "unable to initialize keypad controller\n");
+		goto err_get_irq;
+	}
+
+	rc = pmic8xxx_kp_config_gpio(pdata->cols_gpio_start,
+					pdata->num_cols, kp, &kypd_sns);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "unable to configure keypad sense lines\n");
+		goto err_gpio_config;
+	}
+
+	rc = pmic8xxx_kp_config_gpio(pdata->rows_gpio_start,
+					pdata->num_rows, kp, &kypd_drv);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "unable to configure keypad drive lines\n");
+		goto err_gpio_config;
+	}
+
+	rc = request_any_context_irq(kp->key_sense_irq, pmic8xxx_kp_irq,
+				 IRQF_TRIGGER_RISING, "pmic-keypad", kp);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "failed to request keypad sense irq\n");
+		goto err_get_irq;
+	}
+
+	rc = request_any_context_irq(kp->key_stuck_irq, pmic8xxx_kp_stuck_irq,
+				 IRQF_TRIGGER_RISING, "pmic-keypad-stuck", kp);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "failed to request keypad stuck irq\n");
+		goto err_req_stuck_irq;
+	}
+
+	rc = pmic8xxx_kp_read_u8(kp, &ctrl_val, KEYP_CTRL);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "failed to read KEYP_CTRL register\n");
+		goto err_pmic_reg_read;
+	}
+
+	kp->ctrl_reg = ctrl_val;
+
+	rc = input_register_device(kp->input);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "unable to register keypad input device\n");
+		goto err_pmic_reg_read;
+	}
+
+	device_init_wakeup(&pdev->dev, pdata->wakeup);
+
+	return 0;
+
+err_pmic_reg_read:
+	free_irq(kp->key_stuck_irq, NULL);
+err_req_stuck_irq:
+	free_irq(kp->key_sense_irq, NULL);
+err_gpio_config:
+err_get_irq:
+	input_free_device(kp->input);
+err_alloc_device:
+	platform_set_drvdata(pdev, NULL);
+	kfree(kp);
+	return rc;
+}
+
+static int __devexit pmic8xxx_kp_remove(struct platform_device *pdev)
+{
+	struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
+
+	device_init_wakeup(&pdev->dev, 0);
+	free_irq(kp->key_stuck_irq, NULL);
+	free_irq(kp->key_sense_irq, NULL);
+	input_unregister_device(kp->input);
+	kfree(kp);
+
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int pmic8xxx_kp_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
+	struct input_dev *input_dev = kp->input;
+
+	if (device_may_wakeup(dev)) {
+		enable_irq_wake(kp->key_sense_irq);
+	} else {
+		mutex_lock(&input_dev->mutex);
+
+		if (input_dev->users)
+			pmic8xxx_kp_disable(kp);
+
+		mutex_unlock(&input_dev->mutex);
+	}
+
+	return 0;
+}
+
+static int pmic8xxx_kp_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
+	struct input_dev *input_dev = kp->input;
+
+	if (device_may_wakeup(dev)) {
+		disable_irq_wake(kp->key_sense_irq);
+	} else {
+		mutex_lock(&input_dev->mutex);
+
+		if (input_dev->users)
+			pmic8xxx_kp_enable(kp);
+
+		mutex_unlock(&input_dev->mutex);
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(pm8xxx_kp_pm_ops,
+			 pmic8xxx_kp_suspend, pmic8xxx_kp_resume);
+
+static struct platform_driver pmic8xxx_kp_driver = {
+	.probe		= pmic8xxx_kp_probe,
+	.remove		= __devexit_p(pmic8xxx_kp_remove),
+	.driver		= {
+		.name = PM8XXX_KEYPAD_DEV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &pm8xxx_kp_pm_ops,
+	},
+};
+
+static int __init pmic8xxx_kp_init(void)
+{
+	return platform_driver_register(&pmic8xxx_kp_driver);
+}
+module_init(pmic8xxx_kp_init);
+
+static void __exit pmic8xxx_kp_exit(void)
+{
+	platform_driver_unregister(&pmic8xxx_kp_driver);
+}
+module_exit(pmic8xxx_kp_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PMIC8XXX keypad driver");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("platform:pmic8xxx_keypad");
+MODULE_AUTHOR("Trilok Soni <tsoni@codeaurora.org>");
diff --git a/include/linux/input/pmic8xxx-keypad.h b/include/linux/input/pmic8xxx-keypad.h
new file mode 100644
index 000000000000..5f1e2f9ad959
--- /dev/null
+++ b/include/linux/input/pmic8xxx-keypad.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PMIC8XXX_KEYPAD_H__
+#define __PMIC8XXX_KEYPAD_H__
+
+#include <linux/input/matrix_keypad.h>
+
+#define PM8XXX_KEYPAD_DEV_NAME     "pm8xxx-keypad"
+
+/**
+ * struct pm8xxx_keypad_platform_data - platform data for keypad
+ * @keymap_data - matrix keymap data
+ * @input_name - input device name
+ * @input_phys_device - input device name
+ * @num_cols - number of columns of keypad
+ * @num_rows - number of row of keypad
+ * @debounce_ms - debounce period in milliseconds
+ * @scan_delay_ms - scan delay in milliseconds
+ * @row_hold_ns - row hold period in nanoseconds
+ * @wakeup - configure keypad as wakeup
+ * @rep - enable or disable key repeat bit
+ */
+struct pm8xxx_keypad_platform_data {
+	const struct matrix_keymap_data *keymap_data;
+
+	const char *input_name;
+	const char *input_phys_device;
+
+	unsigned int num_cols;
+	unsigned int num_rows;
+	unsigned int rows_gpio_start;
+	unsigned int cols_gpio_start;
+
+	unsigned int debounce_ms;
+	unsigned int scan_delay_ms;
+	unsigned int row_hold_ns;
+
+	bool wakeup;
+	bool rep;
+};
+
+#endif /*__PMIC8XXX_KEYPAD_H__ */
-- 
cgit v1.2.3


From 92d57a73e41047bff7d0812e06f893567876d455 Mon Sep 17 00:00:00 2001
From: Trilok Soni <tsoni@codeaurora.org>
Date: Fri, 13 May 2011 15:17:51 +0530
Subject: input: Add support for Qualcomm PMIC8XXX power key

Add support for PMIC8XXX power key driven over dedicated
KYPD_PWR_N pin.

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Trilok Soni <tsoni@codeaurora.org>
Signed-off-by: Anirudh Ghayal <aghayal@codeaurora.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/misc/Kconfig            |  11 ++
 drivers/input/misc/Makefile           |   1 +
 drivers/input/misc/pmic8xxx-pwrkey.c  | 231 ++++++++++++++++++++++++++++++++++
 include/linux/input/pmic8xxx-pwrkey.h |  31 +++++
 4 files changed, 274 insertions(+)
 create mode 100644 drivers/input/misc/pmic8xxx-pwrkey.c
 create mode 100644 include/linux/input/pmic8xxx-pwrkey.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index f9cf0881b0e3..45dc6aa62ba4 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -330,6 +330,17 @@ config INPUT_PWM_BEEPER
 	  To compile this driver as a module, choose M here: the module will be
 	  called pwm-beeper.
 
+config INPUT_PMIC8XXX_PWRKEY
+	tristate "PMIC8XXX power key support"
+	depends on MFD_PM8XXX
+	help
+	  Say Y here if you want support for the PMIC8XXX power key.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pmic8xxx-pwrkey.
+
 config INPUT_GPIO_ROTARY_ENCODER
 	tristate "Rotary encoders connected to GPIO pins"
 	depends on GPIOLIB && GENERIC_GPIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index e3f7984e6274..38efb2cb182b 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_INPUT_PCF8574)		+= pcf8574_keypad.o
 obj-$(CONFIG_INPUT_PCSPKR)		+= pcspkr.o
 obj-$(CONFIG_INPUT_POWERMATE)		+= powermate.o
 obj-$(CONFIG_INPUT_PWM_BEEPER)		+= pwm-beeper.o
+obj-$(CONFIG_INPUT_PMIC8XXX_PWRKEY)	+= pmic8xxx-pwrkey.o
 obj-$(CONFIG_INPUT_RB532_BUTTON)	+= rb532_button.o
 obj-$(CONFIG_INPUT_GPIO_ROTARY_ENCODER)	+= rotary_encoder.o
 obj-$(CONFIG_INPUT_SGI_BTNS)		+= sgi_btns.o
diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
new file mode 100644
index 000000000000..97e07e786e41
--- /dev/null
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c
@@ -0,0 +1,231 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/log2.h>
+
+#include <linux/mfd/pm8xxx/core.h>
+#include <linux/input/pmic8xxx-pwrkey.h>
+
+#define PON_CNTL_1 0x1C
+#define PON_CNTL_PULL_UP BIT(7)
+#define PON_CNTL_TRIG_DELAY_MASK (0x7)
+
+/**
+ * struct pmic8xxx_pwrkey - pmic8xxx pwrkey information
+ * @key_press_irq: key press irq number
+ */
+struct pmic8xxx_pwrkey {
+	struct input_dev *pwr;
+	int key_press_irq;
+};
+
+static irqreturn_t pwrkey_press_irq(int irq, void *_pwrkey)
+{
+	struct pmic8xxx_pwrkey *pwrkey = _pwrkey;
+
+	input_report_key(pwrkey->pwr, KEY_POWER, 1);
+	input_sync(pwrkey->pwr);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pwrkey_release_irq(int irq, void *_pwrkey)
+{
+	struct pmic8xxx_pwrkey *pwrkey = _pwrkey;
+
+	input_report_key(pwrkey->pwr, KEY_POWER, 0);
+	input_sync(pwrkey->pwr);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int pmic8xxx_pwrkey_suspend(struct device *dev)
+{
+	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		enable_irq_wake(pwrkey->key_press_irq);
+
+	return 0;
+}
+
+static int pmic8xxx_pwrkey_resume(struct device *dev)
+{
+	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(pwrkey->key_press_irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(pm8xxx_pwr_key_pm_ops,
+		pmic8xxx_pwrkey_suspend, pmic8xxx_pwrkey_resume);
+
+static int __devinit pmic8xxx_pwrkey_probe(struct platform_device *pdev)
+{
+	struct input_dev *pwr;
+	int key_release_irq = platform_get_irq(pdev, 0);
+	int key_press_irq = platform_get_irq(pdev, 1);
+	int err;
+	unsigned int delay;
+	u8 pon_cntl;
+	struct pmic8xxx_pwrkey *pwrkey;
+	const struct pm8xxx_pwrkey_platform_data *pdata = mfd_get_data(pdev);
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "power key platform data not supplied\n");
+		return -EINVAL;
+	}
+
+	if (pdata->kpd_trigger_delay_us > 62500) {
+		dev_err(&pdev->dev, "invalid power key trigger delay\n");
+		return -EINVAL;
+	}
+
+	pwrkey = kzalloc(sizeof(*pwrkey), GFP_KERNEL);
+	if (!pwrkey)
+		return -ENOMEM;
+
+	pwr = input_allocate_device();
+	if (!pwr) {
+		dev_dbg(&pdev->dev, "Can't allocate power button\n");
+		err = -ENOMEM;
+		goto free_pwrkey;
+	}
+
+	input_set_capability(pwr, EV_KEY, KEY_POWER);
+
+	pwr->name = "pmic8xxx_pwrkey";
+	pwr->phys = "pmic8xxx_pwrkey/input0";
+	pwr->dev.parent = &pdev->dev;
+
+	delay = (pdata->kpd_trigger_delay_us << 10) / USEC_PER_SEC;
+	delay = 1 + ilog2(delay);
+
+	err = pm8xxx_readb(pdev->dev.parent, PON_CNTL_1, &pon_cntl);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed reading PON_CNTL_1 err=%d\n", err);
+		goto free_input_dev;
+	}
+
+	pon_cntl &= ~PON_CNTL_TRIG_DELAY_MASK;
+	pon_cntl |= (delay & PON_CNTL_TRIG_DELAY_MASK);
+	if (pdata->pull_up)
+		pon_cntl |= PON_CNTL_PULL_UP;
+	else
+		pon_cntl &= ~PON_CNTL_PULL_UP;
+
+	err = pm8xxx_writeb(pdev->dev.parent, PON_CNTL_1, pon_cntl);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed writing PON_CNTL_1 err=%d\n", err);
+		goto free_input_dev;
+	}
+
+	err = input_register_device(pwr);
+	if (err) {
+		dev_dbg(&pdev->dev, "Can't register power key: %d\n", err);
+		goto free_input_dev;
+	}
+
+	pwrkey->key_press_irq = key_press_irq;
+	pwrkey->pwr = pwr;
+
+	platform_set_drvdata(pdev, pwrkey);
+
+	err = request_irq(key_press_irq, pwrkey_press_irq,
+		IRQF_TRIGGER_RISING, "pmic8xxx_pwrkey_press", pwrkey);
+	if (err < 0) {
+		dev_dbg(&pdev->dev, "Can't get %d IRQ for pwrkey: %d\n",
+				 key_press_irq, err);
+		goto unreg_input_dev;
+	}
+
+	err = request_irq(key_release_irq, pwrkey_release_irq,
+		 IRQF_TRIGGER_RISING, "pmic8xxx_pwrkey_release", pwrkey);
+	if (err < 0) {
+		dev_dbg(&pdev->dev, "Can't get %d IRQ for pwrkey: %d\n",
+				 key_release_irq, err);
+
+		goto free_press_irq;
+	}
+
+	device_init_wakeup(&pdev->dev, pdata->wakeup);
+
+	return 0;
+
+free_press_irq:
+	free_irq(key_press_irq, NULL);
+unreg_input_dev:
+	platform_set_drvdata(pdev, NULL);
+	input_unregister_device(pwr);
+	pwr = NULL;
+free_input_dev:
+	input_free_device(pwr);
+free_pwrkey:
+	kfree(pwrkey);
+	return err;
+}
+
+static int __devexit pmic8xxx_pwrkey_remove(struct platform_device *pdev)
+{
+	struct pmic8xxx_pwrkey *pwrkey = platform_get_drvdata(pdev);
+	int key_release_irq = platform_get_irq(pdev, 0);
+	int key_press_irq = platform_get_irq(pdev, 1);
+
+	device_init_wakeup(&pdev->dev, 0);
+
+	free_irq(key_press_irq, pwrkey);
+	free_irq(key_release_irq, pwrkey);
+	input_unregister_device(pwrkey->pwr);
+	platform_set_drvdata(pdev, NULL);
+	kfree(pwrkey);
+
+	return 0;
+}
+
+static struct platform_driver pmic8xxx_pwrkey_driver = {
+	.probe		= pmic8xxx_pwrkey_probe,
+	.remove		= __devexit_p(pmic8xxx_pwrkey_remove),
+	.driver		= {
+		.name	= PM8XXX_PWRKEY_DEV_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &pm8xxx_pwr_key_pm_ops,
+	},
+};
+
+static int __init pmic8xxx_pwrkey_init(void)
+{
+	return platform_driver_register(&pmic8xxx_pwrkey_driver);
+}
+module_init(pmic8xxx_pwrkey_init);
+
+static void __exit pmic8xxx_pwrkey_exit(void)
+{
+	platform_driver_unregister(&pmic8xxx_pwrkey_driver);
+}
+module_exit(pmic8xxx_pwrkey_exit);
+
+MODULE_ALIAS("platform:pmic8xxx_pwrkey");
+MODULE_DESCRIPTION("PMIC8XXX Power Key driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Trilok Soni <tsoni@codeaurora.org>");
diff --git a/include/linux/input/pmic8xxx-pwrkey.h b/include/linux/input/pmic8xxx-pwrkey.h
new file mode 100644
index 000000000000..6d2974e57109
--- /dev/null
+++ b/include/linux/input/pmic8xxx-pwrkey.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PMIC8XXX_PWRKEY_H__
+#define __PMIC8XXX_PWRKEY_H__
+
+#define PM8XXX_PWRKEY_DEV_NAME "pm8xxx-pwrkey"
+
+/**
+ * struct pm8xxx_pwrkey_platform_data - platform data for pwrkey driver
+ * @pull up:  power on register control for pull up/down configuration
+ * @kpd_trigger_delay_us: time delay for power key state change interrupt
+ *                  trigger.
+ * @wakeup: configure power key as wakeup source
+ */
+struct pm8xxx_pwrkey_platform_data  {
+	bool pull_up;
+	u32  kpd_trigger_delay_us;
+	u32  wakeup;
+};
+
+#endif /* __PMIC8XXX_PWRKEY_H__ */
-- 
cgit v1.2.3


From 86e4ca66e81bba0f8640f1fa19b8b8f72cbd0561 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 26 May 2011 15:00:31 -0400
Subject: bug.h: Move ratelimit warn interfaces to ratelimit.h

As reported by Ingo Molnar, we still have configuration combinations
where use of the WARN_RATELIMIT interfaces break the build because
dependencies don't get met.

Instead of going down the long road of trying to make it so that
ratelimit.h can get included by kernel.h or asm-generic/bug.h,
just move the interface into ratelimit.h and make users have
to include that.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 include/asm-generic/bug.h | 40 ----------------------------------------
 include/linux/ratelimit.h | 40 ++++++++++++++++++++++++++++++++++++++++
 net/core/filter.c         |  1 +
 3 files changed, 41 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 91784841e407..dfb0ec666c94 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -162,46 +162,6 @@ extern void warn_slowpath_null(const char *file, const int line);
 	unlikely(__ret_warn_once);				\
 })
 
-#ifdef CONFIG_PRINTK
-
-#define WARN_ON_RATELIMIT(condition, state)			\
-		WARN_ON((condition) && __ratelimit(state))
-
-#define __WARN_RATELIMIT(condition, state, format...)		\
-({								\
-	int rtn = 0;						\
-	if (unlikely(__ratelimit(state)))			\
-		rtn = WARN(condition, format);			\
-	rtn;							\
-})
-
-#define WARN_RATELIMIT(condition, format...)			\
-({								\
-	static DEFINE_RATELIMIT_STATE(_rs,			\
-				      DEFAULT_RATELIMIT_INTERVAL,	\
-				      DEFAULT_RATELIMIT_BURST);	\
-	__WARN_RATELIMIT(condition, &_rs, format);		\
-})
-
-#else
-
-#define WARN_ON_RATELIMIT(condition, state)			\
-	WARN_ON(condition)
-
-#define __WARN_RATELIMIT(condition, state, format...)		\
-({								\
-	int rtn = WARN(condition, format);			\
-	rtn;							\
-})
-
-#define WARN_RATELIMIT(condition, format...)			\
-({								\
-	int rtn = WARN(condition, format);			\
-	rtn;							\
-})
-
-#endif
-
 /*
  * WARN_ON_SMP() is for cases that the warning is either
  * meaningless for !SMP or may even cause failures.
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 03ff67b0cdf5..2f007157fab9 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -41,4 +41,44 @@ extern struct ratelimit_state printk_ratelimit_state;
 extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
 #define __ratelimit(state) ___ratelimit(state, __func__)
 
+#ifdef CONFIG_PRINTK
+
+#define WARN_ON_RATELIMIT(condition, state)			\
+		WARN_ON((condition) && __ratelimit(state))
+
+#define __WARN_RATELIMIT(condition, state, format...)		\
+({								\
+	int rtn = 0;						\
+	if (unlikely(__ratelimit(state)))			\
+		rtn = WARN(condition, format);			\
+	rtn;							\
+})
+
+#define WARN_RATELIMIT(condition, format...)			\
+({								\
+	static DEFINE_RATELIMIT_STATE(_rs,			\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);	\
+	__WARN_RATELIMIT(condition, &_rs, format);		\
+})
+
+#else
+
+#define WARN_ON_RATELIMIT(condition, state)			\
+	WARN_ON(condition)
+
+#define __WARN_RATELIMIT(condition, state, format...)		\
+({								\
+	int rtn = WARN(condition, format);			\
+	rtn;							\
+})
+
+#define WARN_RATELIMIT(condition, format...)			\
+({								\
+	int rtn = WARN(condition, format);			\
+	rtn;							\
+})
+
+#endif
+
 #endif /* _LINUX_RATELIMIT_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 0e3622f1dcb1..36f975fa87cb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/filter.h>
 #include <linux/reciprocal_div.h>
+#include <linux/ratelimit.h>
 
 /* No hurry in this branch */
 static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
-- 
cgit v1.2.3


From be93d8cfbae1996052e91b2883d306a5d9d0fe18 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 26 May 2011 12:03:50 -0700
Subject: Fix build with !HUGETLBFS

I stupidly broke the case of CONFIG_HUGETLBFS=n when doing the
conversion to vm_flags_t in commit ca16d140af91 ("mm: don't access
vm_flags as 'int'").  And my 'allyesconfig' build didn't find it, for
obvious reasons..

Include <linux/mm_types.h> in <linux/hugetlb.h>.  The problem could have
been avoided by just turning the hugetlb_file_setup() error wrapper into
a macro, but mm_types.h is a reasonable include in this file.

Reported-by: Richard -rw- Weinberger <richard.weinberger@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cf8931e1dd9d..59225ef27d15 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_HUGETLB_H
 #define _LINUX_HUGETLB_H
 
+#include <linux/mm_types.h>
 #include <linux/fs.h>
 #include <linux/hugetlb_inline.h>
 
-- 
cgit v1.2.3


From 7fe4fc881d1340f17396f52c836e597dadadea42 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 25 May 2011 10:33:17 -0700
Subject: Intel xhci: Add PCI id for Panther Point xHCI host.

This adds the PCI ID for the xHCI (USB 3.0) host controller in the Intel
Panther Point chipset.  It will be used by both the EHCI and xHCI driver
in the following patches.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 24787b751286..a311008af5e1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2483,6 +2483,7 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
+#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI	0x1e31
 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN	0x1e40
 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX	0x1e5f
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN	0x2310
-- 
cgit v1.2.3


From 704f15ddb5fc2a7f25a12eb0913302d8ad9ffab3 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Thu, 26 May 2011 16:25:02 -0700
Subject: flex_array: avoid divisions when accessing elements

On most architectures division is an expensive operation and accessing an
element currently requires four of them.  This performance penalty
effectively precludes flex arrays from being used on any kind of fast
path.  However, two of these divisions can be handled at creation time and
the others can be replaced by a reciprocal divide, completely avoiding
real divisions on access.

[eparis@redhat.com: rebase on top of changes to support 0 len elements]
[eparis@redhat.com: initialize part_nr when array fits entirely in base]
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  2 ++
 lib/flex_array.c           | 51 ++++++++++++++++++++++++++--------------------
 2 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index ebeb2f3ad068..6843cf193a44 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -21,6 +21,8 @@ struct flex_array {
 		struct {
 			int element_size;
 			int total_nr_elements;
+			int elems_per_part;
+			u32 reciprocal_elems;
 			struct flex_array_part *parts[];
 		};
 		/*
diff --git a/lib/flex_array.c b/lib/flex_array.c
index cab7621f98aa..9b8b89458c4c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/module.h>
+#include <linux/reciprocal_div.h>
 
 struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
@@ -70,15 +71,15 @@ static inline int elements_fit_in_base(struct flex_array *fa)
  * Element size | Objects | Objects |
  * PAGE_SIZE=4k |  32-bit |  64-bit |
  * ---------------------------------|
- *      1 bytes | 4186112 | 2093056 |
- *      2 bytes | 2093056 | 1046528 |
- *      3 bytes | 1395030 |  697515 |
- *      4 bytes | 1046528 |  523264 |
- *     32 bytes |  130816 |   65408 |
- *     33 bytes |  126728 |   63364 |
- *   2048 bytes |    2044 |    1022 |
- *   2049 bytes |    1022 |     511 |
- *       void * | 1046528 |  261632 |
+ *      1 bytes | 4177920 | 2088960 |
+ *      2 bytes | 2088960 | 1044480 |
+ *      3 bytes | 1392300 |  696150 |
+ *      4 bytes | 1044480 |  522240 |
+ *     32 bytes |  130560 |   65408 |
+ *     33 bytes |  126480 |   63240 |
+ *   2048 bytes |    2040 |    1020 |
+ *   2049 bytes |    1020 |     510 |
+ *       void * | 1044480 |  261120 |
  *
  * Since 64-bit pointers are twice the size, we lose half the
  * capacity in the base structure.  Also note that no effort is made
@@ -88,11 +89,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 					gfp_t flags)
 {
 	struct flex_array *ret;
+	int elems_per_part = 0;
+	int reciprocal_elems = 0;
 	int max_size = 0;
 
-	if (element_size)
-		max_size = FLEX_ARRAY_NR_BASE_PTRS *
-			   FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+	if (element_size) {
+		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+		reciprocal_elems = reciprocal_value(elems_per_part);
+		max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
+	}
 
 	/* max_size will end up 0 if element_size > PAGE_SIZE */
 	if (total > max_size)
@@ -102,6 +107,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		return NULL;
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
+	ret->elems_per_part = elems_per_part;
+	ret->reciprocal_elems = reciprocal_elems;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
 		memset(&ret->parts[0], FLEX_ARRAY_FREE,
 						FLEX_ARRAY_BASE_BYTES_LEFT);
@@ -112,7 +119,7 @@ EXPORT_SYMBOL(flex_array_alloc);
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
-	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+	return reciprocal_divide(element_nr, fa->reciprocal_elems);
 }
 
 /**
@@ -141,12 +148,12 @@ void flex_array_free(struct flex_array *fa)
 EXPORT_SYMBOL(flex_array_free);
 
 static unsigned int index_inside_part(struct flex_array *fa,
-					unsigned int element_nr)
+					unsigned int element_nr,
+					unsigned int part_nr)
 {
 	unsigned int part_offset;
 
-	part_offset = element_nr %
-				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+	part_offset = element_nr - part_nr * fa->elems_per_part;
 	return part_offset * fa->element_size;
 }
 
@@ -186,7 +193,7 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 			gfp_t flags)
 {
-	int part_nr;
+	int part_nr = 0;
 	struct flex_array_part *part;
 	void *dst;
 
@@ -202,7 +209,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		if (!part)
 			return -ENOMEM;
 	}
-	dst = &part->elements[index_inside_part(fa, element_nr)];
+	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
 	memcpy(dst, src, fa->element_size);
 	return 0;
 }
@@ -217,7 +224,7 @@ EXPORT_SYMBOL(flex_array_put);
  */
 int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 {
-	int part_nr;
+	int part_nr = 0;
 	struct flex_array_part *part;
 	void *dst;
 
@@ -233,7 +240,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 		if (!part)
 			return -EINVAL;
 	}
-	dst = &part->elements[index_inside_part(fa, element_nr)];
+	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
 	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
 	return 0;
 }
@@ -302,7 +309,7 @@ EXPORT_SYMBOL(flex_array_prealloc);
  */
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 {
-	int part_nr;
+	int part_nr = 0;
 	struct flex_array_part *part;
 
 	if (!fa->element_size)
@@ -317,7 +324,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 		if (!part)
 			return NULL;
 	}
-	return &part->elements[index_inside_part(fa, element_nr)];
+	return &part->elements[index_inside_part(fa, element_nr, part_nr)];
 }
 EXPORT_SYMBOL(flex_array_get);
 
-- 
cgit v1.2.3


From 9796cc964daf7f18b4cd84c86975b3bc3804ca5a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Thu, 26 May 2011 16:25:12 -0700
Subject: drivers/rtc/rtc-mxc.c: remove defines already included in rtc.h

[akpm@linux-foundation.org: retain the code comments]
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Vladimir Zapolskiy <vzapolskiy@gmail.com>
Cc: Alessandro Zummo <alessandro.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-mxc.c | 6 ------
 include/linux/rtc.h   | 8 ++++----
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index d814417bee8c..39e41fbdf08b 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -55,12 +55,6 @@ static const u32 PIE_BIT_DEF[MAX_PIE_NUM][2] = {
 	{ MAX_PIE_FREQ,	RTC_SAM7_BIT },
 };
 
-/* Those are the bits from a classic RTC we want to mimic */
-#define RTC_IRQF	0x80	/* any of the following 3 is active */
-#define RTC_PF		0x40	/* Periodic interrupt */
-#define RTC_AF		0x20	/* Alarm interrupt */
-#define RTC_UF		0x10	/* Update interrupt for 1Hz RTC */
-
 #define MXC_RTC_TIME	0
 #define MXC_RTC_ALARM	1
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 877ece45426f..b27ebea25660 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -92,10 +92,10 @@ struct rtc_pll_info {
 #define RTC_PLL_SET	_IOW('p', 0x12, struct rtc_pll_info)  /* Set PLL correction */
 
 /* interrupt flags */
-#define RTC_IRQF 0x80 /* any of the following is active */
-#define RTC_PF 0x40
-#define RTC_AF 0x20
-#define RTC_UF 0x10
+#define RTC_IRQF 0x80	/* Any of the following is active */
+#define RTC_PF 0x40	/* Periodic interrupt */
+#define RTC_AF 0x20	/* Alarm interrupt */
+#define RTC_UF 0x10	/* Update interrupt for 1Hz RTC */
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 4714d1d32d97239fb5ae3e10521d3f133a899b66 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Thu, 26 May 2011 16:25:18 -0700
Subject: cgroups: read-write lock CLONE_THREAD forking per threadgroup

Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

Add an rwsem that lives in a threadgroup's signal_struct that's taken for
reading in the fork path, under CONFIG_CGROUPS.  If another part of the
kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS
ifdefs should be changed to a higher-up flag that CGROUPS and the other
system would both depend on.

This is a pre-patch for cgroup-procs-write.patch.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init_task.h |  9 +++++++++
 include/linux/sched.h     | 36 ++++++++++++++++++++++++++++++++++++
 kernel/fork.c             | 10 ++++++++++
 3 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index bafc58c00fc3..580f70c02391 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -22,6 +22,14 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
+#ifdef CONFIG_CGROUPS
+#define INIT_THREADGROUP_FORK_LOCK(sig)					\
+	.threadgroup_fork_lock =					\
+		__RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
+#else
+#define INIT_THREADGROUP_FORK_LOCK(sig)
+#endif
+
 #define INIT_SIGNALS(sig) {						\
 	.nr_threads	= 1,						\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
@@ -38,6 +46,7 @@ extern struct fs_struct init_fs;
 	},								\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
+	INIT_THREADGROUP_FORK_LOCK(sig)					\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f18300eddfcb..dc8871295a5a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -513,6 +513,7 @@ struct thread_group_cputimer {
 	spinlock_t lock;
 };
 
+#include <linux/rwsem.h>
 struct autogroup;
 
 /*
@@ -632,6 +633,16 @@ struct signal_struct {
 	unsigned audit_tty;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+#ifdef CONFIG_CGROUPS
+	/*
+	 * The threadgroup_fork_lock prevents threads from forking with
+	 * CLONE_THREAD while held for writing. Use this for fork-sensitive
+	 * threadgroup-wide operations. It's taken for reading in fork.c in
+	 * copy_process().
+	 * Currently only needed write-side by cgroups.
+	 */
+	struct rw_semaphore threadgroup_fork_lock;
+#endif
 
 	int oom_adj;		/* OOM kill score adjustment (bit shift) */
 	int oom_score_adj;	/* OOM kill score adjustment */
@@ -2323,6 +2334,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
+/* See the declaration of threadgroup_fork_lock in signal_struct. */
+#ifdef CONFIG_CGROUPS
+static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
+{
+	down_read(&tsk->signal->threadgroup_fork_lock);
+}
+static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
+{
+	up_read(&tsk->signal->threadgroup_fork_lock);
+}
+static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
+{
+	down_write(&tsk->signal->threadgroup_fork_lock);
+}
+static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
+{
+	up_write(&tsk->signal->threadgroup_fork_lock);
+}
+#else
+static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
+static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
+static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
+static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
+#endif
+
 #ifndef __HAVE_THREAD_FUNCTIONS
 
 #define task_thread_info(task)	((struct thread_info *)(task)->stack)
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e7e135d0817..1fa9d940e301 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -957,6 +957,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	tty_audit_fork(sig);
 	sched_autogroup_fork(sig);
 
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->threadgroup_fork_lock);
+#endif
+
 	sig->oom_adj = current->signal->oom_adj;
 	sig->oom_score_adj = current->signal->oom_score_adj;
 	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
@@ -1138,6 +1142,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	monotonic_to_bootbased(&p->real_start_time);
 	p->io_context = NULL;
 	p->audit_context = NULL;
+	if (clone_flags & CLONE_THREAD)
+		threadgroup_fork_read_lock(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1342,6 +1348,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+	if (clone_flags & CLONE_THREAD)
+		threadgroup_fork_read_unlock(current);
 	perf_event_fork(p);
 	return p;
 
@@ -1380,6 +1388,8 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
+	if (clone_flags & CLONE_THREAD)
+		threadgroup_fork_read_unlock(current);
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
-- 
cgit v1.2.3


From f780bdb7c1c73009cb57adcf99ef50027d80bf3c Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Thu, 26 May 2011 16:25:19 -0700
Subject: cgroups: add per-thread subsystem callbacks

Add cgroup subsystem callbacks for per-thread attachment in atomic contexts

Add can_attach_task(), pre_attach(), and attach_task() as new callbacks
for cgroups's subsystem interface.  Unlike can_attach and attach, these
are for per-thread operations, to be called potentially many times when
attaching an entire threadgroup.

Also, the old "bool threadgroup" interface is removed, as replaced by
this.  All subsystems are modified for the new interface - of note is
cpuset, which requires from/to nodemasks for attach to be globally scoped
(though per-cpuset would work too) to persist from its pre_attach to
attach_task and attach.

This is a pre-patch for cgroup-procs-writable.patch.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 30 ++++++++----
 block/blk-cgroup.c                | 18 +++-----
 include/linux/cgroup.h            | 10 ++--
 kernel/cgroup.c                   | 17 +++++--
 kernel/cgroup_freezer.c           | 26 ++++-------
 kernel/cpuset.c                   | 96 +++++++++++++++++++--------------------
 kernel/sched.c                    | 38 ++--------------
 mm/memcontrol.c                   | 18 +++-----
 security/device_cgroup.c          |  3 +-
 9 files changed, 114 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index aedf1bd02fdd..b3bd3bdbe202 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -575,7 +575,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task, bool threadgroup)
+	       struct task_struct *task)
 (cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
@@ -584,9 +584,14 @@ task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
 remain valid while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future. If threadgroup is
-true, then a successful result indicates that all threads in the given
-thread's threadgroup can be moved together.
+attach() or cancel_attach() will be called in future.
+
+int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As can_attach, but for operations that must be run once per task to be
+attached (possibly many when using cgroup_attach_proc). Called after
+can_attach.
 
 void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	       struct task_struct *task, bool threadgroup)
@@ -598,15 +603,24 @@ function, so that the subsystem can implement a rollback. If not, not necessary.
 This will be called only about subsystems whose can_attach() operation have
 succeeded.
 
+void pre_attach(struct cgroup *cgrp);
+(cgroup_mutex held by caller)
+
+For any non-per-thread attachment work that needs to happen before
+attach_task. Needed by cpuset.
+
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task,
-	    bool threadgroup)
+	    struct cgroup *old_cgrp, struct task_struct *task)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
-If threadgroup is true, the subsystem should take care of all threads
-in the specified thread's threadgroup. Currently does not support any
+
+void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As attach, but for operations that must be run once per task to be attached,
+like can_attach_task. Called before attach. Currently does not support any
 subsystem that might need the old_cgrp for every thread in the group.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 07371cfdfae6..bcaf16ee6ad1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,10 +30,8 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
 						  struct cgroup *);
-static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
-			      struct task_struct *, bool);
-static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
-			   struct cgroup *, struct task_struct *, bool);
+static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
+static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
 
@@ -46,8 +44,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
 struct cgroup_subsys blkio_subsys = {
 	.name = "blkio",
 	.create = blkiocg_create,
-	.can_attach = blkiocg_can_attach,
-	.attach = blkiocg_attach,
+	.can_attach_task = blkiocg_can_attach_task,
+	.attach_task = blkiocg_attach_task,
 	.destroy = blkiocg_destroy,
 	.populate = blkiocg_populate,
 #ifdef CONFIG_BLK_CGROUP
@@ -1616,9 +1614,7 @@ done:
  * of the main cic data structures.  For now we allow a task to change
  * its cgroup only if it's the only owner of its ioc.
  */
-static int blkiocg_can_attach(struct cgroup_subsys *subsys,
-				struct cgroup *cgroup, struct task_struct *tsk,
-				bool threadgroup)
+static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	struct io_context *ioc;
 	int ret = 0;
@@ -1633,9 +1629,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *subsys,
 	return ret;
 }
 
-static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
-				struct cgroup *prev, struct task_struct *tsk,
-				bool threadgroup)
+static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	struct io_context *ioc;
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5ac7ebc36dbb..1e6cde21fa3f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -467,12 +467,14 @@ struct cgroup_subsys {
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			  struct task_struct *tsk, bool threadgroup);
+			  struct task_struct *tsk);
+	int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			  struct task_struct *tsk, bool threadgroup);
+			      struct task_struct *tsk);
+	void (*pre_attach)(struct cgroup *cgrp);
+	void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			struct cgroup *old_cgrp, struct task_struct *tsk,
-			bool threadgroup);
+		       struct cgroup *old_cgrp, struct task_struct *tsk);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 	void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			struct cgroup *old_cgrp, struct task_struct *task);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af5..38fb0ad1cb46 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, tsk, false);
+			retval = ss->can_attach(ss, cgrp, tsk);
 			if (retval) {
 				/*
 				 * Remember on which subsystem the can_attach()
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 				goto out;
 			}
 		}
+		if (ss->can_attach_task) {
+			retval = ss->can_attach_task(cgrp, tsk);
+			if (retval) {
+				failed_ss = ss;
+				goto out;
+			}
+		}
 	}
 
 	task_lock(tsk);
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	write_unlock(&css_set_lock);
 
 	for_each_subsys(root, ss) {
+		if (ss->pre_attach)
+			ss->pre_attach(cgrp);
+		if (ss->attach_task)
+			ss->attach_task(cgrp, tsk);
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk, false);
+			ss->attach(ss, cgrp, oldcgrp, tsk);
 	}
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
 	synchronize_rcu();
@@ -1829,7 +1840,7 @@ out:
 				 */
 				break;
 			if (ss->cancel_attach)
-				ss->cancel_attach(ss, cgrp, tsk, false);
+				ss->cancel_attach(ss, cgrp, tsk);
 		}
 	}
 	return retval;
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e7bebb7c6c38..e691818d7e45 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
-			      struct task_struct *task, bool threadgroup)
+			      struct task_struct *task)
 {
 	struct freezer *freezer;
 
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 	if (freezer->state != CGROUP_THAWED)
 		return -EBUSY;
 
+	return 0;
+}
+
+static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
 	rcu_read_lock();
-	if (__cgroup_freezing_or_frozen(task)) {
+	if (__cgroup_freezing_or_frozen(tsk)) {
 		rcu_read_unlock();
 		return -EBUSY;
 	}
 	rcu_read_unlock();
-
-	if (threadgroup) {
-		struct task_struct *c;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-			if (__cgroup_freezing_or_frozen(c)) {
-				rcu_read_unlock();
-				return -EBUSY;
-			}
-		}
-		rcu_read_unlock();
-	}
-
 	return 0;
 }
 
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
 	.populate	= freezer_populate,
 	.subsys_id	= freezer_subsys_id,
 	.can_attach	= freezer_can_attach,
+	.can_attach_task = freezer_can_attach_task,
+	.pre_attach	= NULL,
+	.attach_task	= NULL,
 	.attach		= NULL,
 	.fork		= freezer_fork,
 	.exit		= NULL,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2bb8c2e98fff..55b297d78adc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 	return val;
 }
 
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
-
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-			     struct task_struct *tsk, bool threadgroup)
+			     struct task_struct *tsk)
 {
-	int ret;
 	struct cpuset *cs = cgroup_cs(cont);
 
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	if (tsk->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 
-	ret = security_task_setscheduler(tsk);
-	if (ret)
-		return ret;
-	if (threadgroup) {
-		struct task_struct *c;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-			ret = security_task_setscheduler(c);
-			if (ret) {
-				rcu_read_unlock();
-				return ret;
-			}
-		}
-		rcu_read_unlock();
-	}
 	return 0;
 }
 
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
-			       struct cpuset *cs)
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
+{
+	return security_task_setscheduler(task);
+}
+
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+	struct cpuset *cs = cgroup_cs(cont);
+
+	if (cs == &top_cpuset)
+		cpumask_copy(cpus_attach, cpu_possible_mask);
+	else
+		guarantee_online_cpus(cs, cpus_attach);
+
+	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
 {
 	int err;
+	struct cpuset *cs = cgroup_cs(cont);
+
 	/*
 	 * can_attach beforehand should guarantee that this doesn't fail.
 	 * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
 	err = set_cpus_allowed_ptr(tsk, cpus_attach);
 	WARN_ON_ONCE(err);
 
-	cpuset_change_task_nodemask(tsk, to);
+	cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
 	cpuset_update_task_spread_flag(cs, tsk);
-
 }
 
 static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-			  struct cgroup *oldcont, struct task_struct *tsk,
-			  bool threadgroup)
+			  struct cgroup *oldcont, struct task_struct *tsk)
 {
 	struct mm_struct *mm;
 	struct cpuset *cs = cgroup_cs(cont);
 	struct cpuset *oldcs = cgroup_cs(oldcont);
-	static nodemask_t to;		/* protected by cgroup_mutex */
 
-	if (cs == &top_cpuset) {
-		cpumask_copy(cpus_attach, cpu_possible_mask);
-	} else {
-		guarantee_online_cpus(cs, cpus_attach);
-	}
-	guarantee_online_mems(cs, &to);
-
-	/* do per-task migration stuff possibly for each in the threadgroup */
-	cpuset_attach_task(tsk, &to, cs);
-	if (threadgroup) {
-		struct task_struct *c;
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-			cpuset_attach_task(c, &to, cs);
-		}
-		rcu_read_unlock();
-	}
-
-	/* change mm; only needs to be done once even if threadgroup */
-	to = cs->mems_allowed;
+	/*
+	 * Change mm, possibly for multiple threads in a threadgroup. This is
+	 * expensive and may sleep.
+	 */
+	cpuset_attach_nodemask_from = oldcs->mems_allowed;
+	cpuset_attach_nodemask_to = cs->mems_allowed;
 	mm = get_task_mm(tsk);
 	if (mm) {
-		mpol_rebind_mm(mm, &to);
+		mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
 		if (is_memory_migrate(cs))
-			cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+			cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+					  &cpuset_attach_nodemask_to);
 		mmput(mm);
 	}
 }
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = {
 	.create = cpuset_create,
 	.destroy = cpuset_destroy,
 	.can_attach = cpuset_can_attach,
+	.can_attach_task = cpuset_can_attach_task,
+	.pre_attach = cpuset_pre_attach,
+	.attach_task = cpuset_attach_task,
 	.attach = cpuset_attach,
 	.populate = cpuset_populate,
 	.post_clone = cpuset_post_clone,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2d12893b8b0f..5e43e9dc65d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	return 0;
 }
 
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		      struct task_struct *tsk, bool threadgroup)
-{
-	int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
-	if (retval)
-		return retval;
-	if (threadgroup) {
-		struct task_struct *c;
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-			retval = cpu_cgroup_can_attach_task(cgrp, c);
-			if (retval) {
-				rcu_read_unlock();
-				return retval;
-			}
-		}
-		rcu_read_unlock();
-	}
-	return 0;
-}
-
 static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		  struct cgroup *old_cont, struct task_struct *tsk,
-		  bool threadgroup)
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	sched_move_task(tsk);
-	if (threadgroup) {
-		struct task_struct *c;
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-			sched_move_task(c);
-		}
-		rcu_read_unlock();
-	}
 }
 
 static void
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.create		= cpu_cgroup_create,
 	.destroy	= cpu_cgroup_destroy,
-	.can_attach	= cpu_cgroup_can_attach,
-	.attach		= cpu_cgroup_attach,
+	.can_attach_task = cpu_cgroup_can_attach_task,
+	.attach_task	= cpu_cgroup_attach_task,
 	.exit		= cpu_cgroup_exit,
 	.populate	= cpu_cgroup_populate,
 	.subsys_id	= cpu_cgroup_subsys_id,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5fd3dcd3f2e..fc259926c170 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4953,8 +4953,7 @@ static void mem_cgroup_clear_mc(void)
 
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 	int ret = 0;
 	struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
@@ -4993,8 +4992,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 	mem_cgroup_clear_mc();
 }
@@ -5112,8 +5110,7 @@ retry:
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
 				struct cgroup *old_cont,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 	struct mm_struct *mm;
 
@@ -5131,22 +5128,19 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 	return 0;
 }
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
 				struct cgroup *cgroup,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
 				struct cgroup *old_cont,
-				struct task_struct *p,
-				bool threadgroup)
+				struct task_struct *p)
 {
 }
 #endif
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 8d9c48f13774..cd1f779fa51d 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -62,8 +62,7 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task,
-		bool threadgroup)
+		struct cgroup *new_cgroup, struct task_struct *task)
 {
 	if (current != task && !capable(CAP_SYS_ADMIN))
 			return -EPERM;
-- 
cgit v1.2.3


From a77aea92010acf54ad785047234418d5d68772e2 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Thu, 26 May 2011 16:25:23 -0700
Subject: cgroup: remove the ns_cgroup

The ns_cgroup is an annoying cgroup at the namespace / cgroup frontier and
leads to some problems:

  * cgroup creation is out-of-control
  * cgroup name can conflict when pids are looping
  * it is not possible to have a single process handling a lot of
    namespaces without falling in a exponential creation time
  * we may want to create a namespace without creating a cgroup

  The ns_cgroup was replaced by a compatibility flag 'clone_children',
  where a newly created cgroup will copy the parent cgroup values.
  The userspace has to manually create a cgroup and add a task to
  the 'tasks' file.

This patch removes the ns_cgroup as suggested in the following thread:

https://lists.linux-foundation.org/pipermail/containers/2009-June/018616.html

The 'cgroup_clone' function is removed because it is no longer used.

This is a userspace-visible change.  Commit 45531757b45c ("cgroup: notify
ns_cgroup deprecated") (merged into 2.6.27) caused the kernel to emit a
printk warning users that the feature is planned for removal.  Since that
time we have heard from XXX users who were affected by this.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jamal Hadi Salim <hadi@cyberus.ca>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt      |   2 +-
 arch/mips/configs/bcm47xx_defconfig    |   1 -
 arch/mn10300/configs/asb2364_defconfig |   1 -
 arch/powerpc/configs/ppc6xx_defconfig  |   1 -
 arch/powerpc/configs/pseries_defconfig |   1 -
 arch/sh/configs/apsh4ad0a_defconfig    |   1 -
 arch/sh/configs/sdk7786_defconfig      |   1 -
 arch/sh/configs/se7206_defconfig       |   1 -
 arch/sh/configs/shx3_defconfig         |   1 -
 arch/sh/configs/urquell_defconfig      |   1 -
 arch/x86/configs/i386_defconfig        |   1 -
 arch/x86/configs/x86_64_defconfig      |   1 -
 include/linux/cgroup.h                 |   3 -
 include/linux/cgroup_subsys.h          |   6 --
 include/linux/nsproxy.h                |   9 ---
 init/Kconfig                           |   8 ---
 kernel/Makefile                        |   1 -
 kernel/cgroup.c                        | 116 --------------------------------
 kernel/cpuset.c                        |   7 +-
 kernel/fork.c                          |   6 --
 kernel/ns_cgroup.c                     | 118 ---------------------------------
 kernel/nsproxy.c                       |   4 --
 22 files changed, 4 insertions(+), 287 deletions(-)
 delete mode 100644 kernel/ns_cgroup.c

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 8c4f3466c894..0ed99f08f1f3 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -651,7 +651,7 @@ always handled well.
 void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
 (cgroup_mutex held by caller)
 
-Called at the end of cgroup_clone() to do any parameter
+Called during cgroup_create() to do any parameter
 initialization which might be required before a task could attach.  For
 example in cpusets, no task may attach before 'cpus' and 'mems' are set
 up.
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index 22fdf2f0cc23..ad15fb10322b 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -16,7 +16,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_TINY_RCU=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
diff --git a/arch/mn10300/configs/asb2364_defconfig b/arch/mn10300/configs/asb2364_defconfig
index 31d76261a3d5..fbb96ae3122a 100644
--- a/arch/mn10300/configs/asb2364_defconfig
+++ b/arch/mn10300/configs/asb2364_defconfig
@@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 214208924a9c..04360f9b0109 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -10,7 +10,6 @@ CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 7de13865508c..c9f212b5f3de 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -15,7 +15,6 @@ CONFIG_AUDITSYSCALL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index 77ec0e7b8ddf..e7583484cc07 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -7,7 +7,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index c41650572d79..8a7dd7b59c5c 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -12,7 +12,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_DEBUG=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index a468ff227fc6..72c3fad7383f 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -8,7 +8,6 @@ CONFIG_RCU_TRACE=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_DEBUG=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index 3f92d37c6374..6bb413036892 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -9,7 +9,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index 7b3daec6fefe..8bfa4d056d7a 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_DEBUG=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6f9872658dd2..2bf18059fbea 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -10,7 +10,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index ee01a9d5d4f0..22a0dc8e51dd 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -11,7 +11,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_CPUACCT=y
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1e6cde21fa3f..ab4ac0ccb857 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -555,9 +555,6 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 	return task_subsys_state(task, subsys_id)->cgroup;
 }
 
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
-							char *nodename);
-
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
 	struct list_head *cg_link;
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index cdbfcb8780ec..ac663c18776c 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -19,12 +19,6 @@ SUBSYS(debug)
 
 /* */
 
-#ifdef CONFIG_CGROUP_NS
-SUBSYS(ns)
-#endif
-
-/* */
-
 #ifdef CONFIG_CGROUP_SCHED
 SUBSYS(cpu_cgroup)
 #endif
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 7b370c7cfeff..50d20aba57d3 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -81,13 +81,4 @@ static inline void get_nsproxy(struct nsproxy *ns)
 	atomic_inc(&ns->count);
 }
 
-#ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
-#else
-static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
-{
-	return 0;
-}
-#endif
-
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index 332aac649966..ebafac4231ee 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -589,14 +589,6 @@ config CGROUP_DEBUG
 
 	  Say N if unsure.
 
-config CGROUP_NS
-	bool "Namespace cgroup subsystem"
-	help
-	  Provides a simple namespace cgroup subsystem to
-	  provide hierarchical naming of sets of namespaces,
-	  for instance virtual servers and checkpoint/restart
-	  jobs.
-
 config CGROUP_FREEZER
 	bool "Freezer cgroup subsystem"
 	help
diff --git a/kernel/Makefile b/kernel/Makefile
index e9cf19155b46..2d64cfcc8b42 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,7 +61,6 @@ obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
-obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 00a884342d3d..2731d115d725 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4629,122 +4629,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 		put_css_set_taskexit(cg);
 }
 
-/**
- * cgroup_clone - clone the cgroup the given subsystem is attached to
- * @tsk: the task to be moved
- * @subsys: the given subsystem
- * @nodename: the name for the new cgroup
- *
- * Duplicate the current cgroup in the hierarchy that the given
- * subsystem is attached to, and move this task into the new
- * child.
- */
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
-							char *nodename)
-{
-	struct dentry *dentry;
-	int ret = 0;
-	struct cgroup *parent, *child;
-	struct inode *inode;
-	struct css_set *cg;
-	struct cgroupfs_root *root;
-	struct cgroup_subsys *ss;
-
-	/* We shouldn't be called by an unregistered subsystem */
-	BUG_ON(!subsys->active);
-
-	/* First figure out what hierarchy and cgroup we're dealing
-	 * with, and pin them so we can drop cgroup_mutex */
-	mutex_lock(&cgroup_mutex);
- again:
-	root = subsys->root;
-	if (root == &rootnode) {
-		mutex_unlock(&cgroup_mutex);
-		return 0;
-	}
-
-	/* Pin the hierarchy */
-	if (!atomic_inc_not_zero(&root->sb->s_active)) {
-		/* We race with the final deactivate_super() */
-		mutex_unlock(&cgroup_mutex);
-		return 0;
-	}
-
-	/* Keep the cgroup alive */
-	task_lock(tsk);
-	parent = task_cgroup(tsk, subsys->subsys_id);
-	cg = tsk->cgroups;
-	get_css_set(cg);
-	task_unlock(tsk);
-
-	mutex_unlock(&cgroup_mutex);
-
-	/* Now do the VFS work to create a cgroup */
-	inode = parent->dentry->d_inode;
-
-	/* Hold the parent directory mutex across this operation to
-	 * stop anyone else deleting the new cgroup */
-	mutex_lock(&inode->i_mutex);
-	dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
-	if (IS_ERR(dentry)) {
-		printk(KERN_INFO
-		       "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
-		       PTR_ERR(dentry));
-		ret = PTR_ERR(dentry);
-		goto out_release;
-	}
-
-	/* Create the cgroup directory, which also creates the cgroup */
-	ret = vfs_mkdir(inode, dentry, 0755);
-	child = __d_cgrp(dentry);
-	dput(dentry);
-	if (ret) {
-		printk(KERN_INFO
-		       "Failed to create cgroup %s: %d\n", nodename,
-		       ret);
-		goto out_release;
-	}
-
-	/* The cgroup now exists. Retake cgroup_mutex and check
-	 * that we're still in the same state that we thought we
-	 * were. */
-	mutex_lock(&cgroup_mutex);
-	if ((root != subsys->root) ||
-	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
-		/* Aargh, we raced ... */
-		mutex_unlock(&inode->i_mutex);
-		put_css_set(cg);
-
-		deactivate_super(root->sb);
-		/* The cgroup is still accessible in the VFS, but
-		 * we're not going to try to rmdir() it at this
-		 * point. */
-		printk(KERN_INFO
-		       "Race in cgroup_clone() - leaking cgroup %s\n",
-		       nodename);
-		goto again;
-	}
-
-	/* do any required auto-setup */
-	for_each_subsys(root, ss) {
-		if (ss->post_clone)
-			ss->post_clone(ss, child);
-	}
-
-	/* All seems fine. Finish by moving the task into the new cgroup */
-	ret = cgroup_attach_task(child, tsk);
-	mutex_unlock(&cgroup_mutex);
-
- out_release:
-	mutex_unlock(&inode->i_mutex);
-
-	mutex_lock(&cgroup_mutex);
-	put_css_set(cg);
-	mutex_unlock(&cgroup_mutex);
-	deactivate_super(root->sb);
-	return ret;
-}
-
 /**
  * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
  * @cgrp: the cgroup in question
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 55b297d78adc..1ceeb049c827 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1802,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 }
 
 /*
- * post_clone() is called at the end of cgroup_clone().
- * 'cgroup' was just created automatically as a result of
- * a cgroup_clone(), and the current task is about to
- * be moved into 'cgroup'.
+ * post_clone() is called during cgroup_create() when the
+ * clone_children mount argument was specified.  The cgroup
+ * can not yet have any tasks.
  *
  * Currently we refuse to set up the cgroup - thereby
  * refusing the task to be entered, and as a result refusing
diff --git a/kernel/fork.c b/kernel/fork.c
index 1fa9d940e301..1f84099ecce6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1229,12 +1229,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
 
-	if (current->nsproxy != p->nsproxy) {
-		retval = ns_cgroup_clone(p, pid);
-		if (retval)
-			goto bad_fork_free_pid;
-	}
-
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
deleted file mode 100644
index 2c98ad94ba0e..000000000000
--- a/kernel/ns_cgroup.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * ns_cgroup.c - namespace cgroup subsystem
- *
- * Copyright 2006, 2007 IBM Corp
- */
-
-#include <linux/module.h>
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/proc_fs.h>
-#include <linux/slab.h>
-#include <linux/nsproxy.h>
-
-struct ns_cgroup {
-	struct cgroup_subsys_state css;
-};
-
-struct cgroup_subsys ns_subsys;
-
-static inline struct ns_cgroup *cgroup_to_ns(
-		struct cgroup *cgroup)
-{
-	return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
-			    struct ns_cgroup, css);
-}
-
-int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
-{
-	char name[PROC_NUMBUF];
-
-	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
-	return cgroup_clone(task, &ns_subsys, name);
-}
-
-/*
- * Rules:
- *   1. you can only enter a cgroup which is a descendant of your current
- *     cgroup
- *   2. you can only place another process into a cgroup if
- *     a. you have CAP_SYS_ADMIN
- *     b. your cgroup is an ancestor of task's destination cgroup
- *       (hence either you are in the same cgroup as task, or in an
- *        ancestor cgroup thereof)
- */
-static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
-			 struct task_struct *task, bool threadgroup)
-{
-	if (current != task) {
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (!cgroup_is_descendant(new_cgroup, current))
-			return -EPERM;
-	}
-
-	if (!cgroup_is_descendant(new_cgroup, task))
-		return -EPERM;
-
-	if (threadgroup) {
-		struct task_struct *c;
-		rcu_read_lock();
-		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-			if (!cgroup_is_descendant(new_cgroup, c)) {
-				rcu_read_unlock();
-				return -EPERM;
-			}
-		}
-		rcu_read_unlock();
-	}
-
-	return 0;
-}
-
-/*
- * Rules: you can only create a cgroup if
- *     1. you are capable(CAP_SYS_ADMIN)
- *     2. the target cgroup is a descendant of your own cgroup
- */
-static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
-						struct cgroup *cgroup)
-{
-	struct ns_cgroup *ns_cgroup;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return ERR_PTR(-EPERM);
-	if (!cgroup_is_descendant(cgroup, current))
-		return ERR_PTR(-EPERM);
-	if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) {
-		printk("ns_cgroup can't be created with parent "
-		       "'clone_children' set.\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	printk_once("ns_cgroup deprecated: consider using the "
-		    "'clone_children' flag without the ns_cgroup.\n");
-
-	ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
-	if (!ns_cgroup)
-		return ERR_PTR(-ENOMEM);
-	return &ns_cgroup->css;
-}
-
-static void ns_destroy(struct cgroup_subsys *ss,
-			struct cgroup *cgroup)
-{
-	struct ns_cgroup *ns_cgroup;
-
-	ns_cgroup = cgroup_to_ns(cgroup);
-	kfree(ns_cgroup);
-}
-
-struct cgroup_subsys ns_subsys = {
-	.name = "ns",
-	.can_attach = ns_can_attach,
-	.create = ns_create,
-	.destroy  = ns_destroy,
-	.subsys_id = ns_subsys_id,
-};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 5424e37673ed..d6a00f3de15d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -201,10 +201,6 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		goto out;
 	}
 
-	err = ns_cgroup_clone(current, task_pid(current));
-	if (err)
-		put_nsproxy(*new_nsp);
-
 out:
 	return err;
 }
-- 
cgit v1.2.3


From f042e707ee671e4beb5389abeb9a1819a2cf5532 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 26 May 2011 16:25:24 -0700
Subject: mm: move enum vm_event_item into a standalone header file

enums are problematic because they cannot be forward-declared:

  akpm2:/home/akpm> cat t.c

  enum foo;

  static inline void bar(enum foo f)
  {
  }
  akpm2:/home/akpm> gcc -c t.c
  t.c:4: error: parameter 1 ('f') has incomplete type

So move the enum's definition into a standalone header file which can be used
wherever its definition is needed.

Cc: Ying Han <yinghan@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 64 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/vmstat.h        | 62 +----------------------------------------
 2 files changed, 65 insertions(+), 61 deletions(-)
 create mode 100644 include/linux/vm_event_item.h

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
new file mode 100644
index 000000000000..03b90cdc1921
--- /dev/null
+++ b/include/linux/vm_event_item.h
@@ -0,0 +1,64 @@
+#ifndef VM_EVENT_ITEM_H_INCLUDED
+#define VM_EVENT_ITEM_H_INCLUDED
+
+#ifdef CONFIG_ZONE_DMA
+#define DMA_ZONE(xx) xx##_DMA,
+#else
+#define DMA_ZONE(xx)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE
+
+enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
+		FOR_ALL_ZONES(PGALLOC),
+		PGFREE, PGACTIVATE, PGDEACTIVATE,
+		PGFAULT, PGMAJFAULT,
+		FOR_ALL_ZONES(PGREFILL),
+		FOR_ALL_ZONES(PGSTEAL),
+		FOR_ALL_ZONES(PGSCAN_KSWAPD),
+		FOR_ALL_ZONES(PGSCAN_DIRECT),
+#ifdef CONFIG_NUMA
+		PGSCAN_ZONE_RECLAIM_FAILED,
+#endif
+		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
+		KSWAPD_SKIP_CONGESTION_WAIT,
+		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_COMPACTION
+		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
+		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
+#endif
+		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
+		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
+		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
+		UNEVICTABLE_PGMLOCKED,
+		UNEVICTABLE_PGMUNLOCKED,
+		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
+		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
+		UNEVICTABLE_MLOCKFREED,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		THP_FAULT_ALLOC,
+		THP_FAULT_FALLBACK,
+		THP_COLLAPSE_ALLOC,
+		THP_COLLAPSE_ALLOC_FAILED,
+		THP_SPLIT,
+#endif
+		NR_VM_EVENT_ITEMS
+};
+
+#endif		/* VM_EVENT_ITEM_H_INCLUDED */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 51359837511a..bcd942fa611c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -5,69 +5,9 @@
 #include <linux/percpu.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
+#include <linux/vm_event_item.h>
 #include <asm/atomic.h>
 
-#ifdef CONFIG_ZONE_DMA
-#define DMA_ZONE(xx) xx##_DMA,
-#else
-#define DMA_ZONE(xx)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define DMA32_ZONE(xx) xx##_DMA32,
-#else
-#define DMA32_ZONE(xx)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define HIGHMEM_ZONE(xx) , xx##_HIGH
-#else
-#define HIGHMEM_ZONE(xx)
-#endif
-
-
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE
-
-enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
-		FOR_ALL_ZONES(PGALLOC),
-		PGFREE, PGACTIVATE, PGDEACTIVATE,
-		PGFAULT, PGMAJFAULT,
-		FOR_ALL_ZONES(PGREFILL),
-		FOR_ALL_ZONES(PGSTEAL),
-		FOR_ALL_ZONES(PGSCAN_KSWAPD),
-		FOR_ALL_ZONES(PGSCAN_DIRECT),
-#ifdef CONFIG_NUMA
-		PGSCAN_ZONE_RECLAIM_FAILED,
-#endif
-		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
-		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
-		KSWAPD_SKIP_CONGESTION_WAIT,
-		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
-#ifdef CONFIG_COMPACTION
-		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
-		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
-		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
-#endif
-		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
-		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
-		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
-		UNEVICTABLE_PGMLOCKED,
-		UNEVICTABLE_PGMUNLOCKED,
-		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
-		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
-		UNEVICTABLE_MLOCKFREED,
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		THP_FAULT_ALLOC,
-		THP_FAULT_FALLBACK,
-		THP_COLLAPSE_ALLOC,
-		THP_COLLAPSE_ALLOC_FAILED,
-		THP_SPLIT,
-#endif
-		NR_VM_EVENT_ITEMS
-};
-
 extern int sysctl_stat_interval;
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
-- 
cgit v1.2.3


From 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Thu, 26 May 2011 16:25:25 -0700
Subject: memcg: count the soft_limit reclaim in global background reclaim

The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.

This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.

Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.

  $ cat /dev/cgroup/A/memory.stat | grep 'soft'
  soft_steal 240000
  soft_scan 240000
  total_soft_steal 240000
  total_soft_scan 240000

This patch:

In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU.  However, the return value is ignored.

We would like to skip shrink_zone() if soft_limit reclaim does enough
work.  Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core.  This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.

Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++--
 include/linux/swap.h       |  3 ++-
 mm/memcontrol.c            | 29 ++++++++++++++++++++---------
 mm/vmscan.c                | 16 +++++++++++++---
 4 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5e9840f50980..0629121f2c0b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -144,7 +144,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-						gfp_t gfp_mask);
+						gfp_t gfp_mask,
+						unsigned long *total_scanned);
 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -338,7 +339,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 
 static inline
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-					    gfp_t gfp_mask)
+					    gfp_t gfp_mask,
+					    unsigned long *total_scanned)
 {
 	return 0;
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a5c6da5d8df8..384eb5fe530b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -257,7 +257,8 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						unsigned int swappiness,
-						struct zone *zone);
+						struct zone *zone,
+						unsigned long *nr_scanned);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc259926c170..e41a6c26f1e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 						struct zone *zone,
 						gfp_t gfp_mask,
-						unsigned long reclaim_options)
+						unsigned long reclaim_options,
+						unsigned long *total_scanned)
 {
 	struct mem_cgroup *victim;
 	int ret, total = 0;
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
 	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
 	unsigned long excess;
+	unsigned long nr_scanned;
 
 	excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
 
@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 			continue;
 		}
 		/* we use swappiness of local cgroup */
-		if (check_soft)
+		if (check_soft) {
 			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
-				noswap, get_swappiness(victim), zone);
-		else
+				noswap, get_swappiness(victim), zone,
+				&nr_scanned);
+			*total_scanned += nr_scanned;
+		} else
 			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
 						noswap, get_swappiness(victim));
 		css_put(&victim->css);
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
 		return CHARGE_WOULDBLOCK;
 
 	ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
-					      gfp_mask, flags);
+					      gfp_mask, flags, NULL);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		return CHARGE_RETRY;
 	/*
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 			break;
 
 		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_SHRINK);
+						MEM_CGROUP_RECLAIM_SHRINK,
+						NULL);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 
 		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
 						MEM_CGROUP_RECLAIM_NOSWAP |
-						MEM_CGROUP_RECLAIM_SHRINK);
+						MEM_CGROUP_RECLAIM_SHRINK,
+						NULL);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-					    gfp_t gfp_mask)
+					    gfp_t gfp_mask,
+					    unsigned long *total_scanned)
 {
 	unsigned long nr_reclaimed = 0;
 	struct mem_cgroup_per_zone *mz, *next_mz = NULL;
@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 	int loop = 0;
 	struct mem_cgroup_tree_per_zone *mctz;
 	unsigned long long excess;
+	unsigned long nr_scanned;
 
 	if (order > 0)
 		return 0;
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 		if (!mz)
 			break;
 
+		nr_scanned = 0;
 		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
 						gfp_mask,
-						MEM_CGROUP_RECLAIM_SOFT);
+						MEM_CGROUP_RECLAIM_SOFT,
+						&nr_scanned);
 		nr_reclaimed += reclaimed;
+		*total_scanned += nr_scanned;
 		spin_lock(&mctz->lock);
 
 		/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7e0116150dc7..9ce6ec84328e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						unsigned int swappiness,
-						struct zone *zone)
+						struct zone *zone,
+						unsigned long *nr_scanned)
 {
 	struct scan_control sc = {
+		.nr_scanned = 0,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.order = 0,
 		.mem_cgroup = mem,
 	};
+
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 
@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
+	*nr_scanned = sc.nr_scanned;
 	return sc.nr_reclaimed;
 }
 
@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 	int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 	unsigned long total_scanned;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
+	unsigned long nr_soft_reclaimed;
+	unsigned long nr_soft_scanned;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_unmap = 1,
@@ -2439,11 +2445,15 @@ loop_again:
 
 			sc.nr_scanned = 0;
 
+			nr_soft_scanned = 0;
 			/*
 			 * Call soft limit reclaim before calling shrink_zone.
-			 * For now we ignore the return value
 			 */
-			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
+			nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+							order, sc.gfp_mask,
+							&nr_soft_scanned);
+			sc.nr_reclaimed += nr_soft_reclaimed;
+			total_scanned += nr_soft_scanned;
 
 			/*
 			 * We put equal pressure on every zone, unless
-- 
cgit v1.2.3


From 889976dbcb1218119fdd950fb7819084e37d7d37 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Thu, 26 May 2011 16:25:33 -0700
Subject: memcg: reclaim memory from nodes in round-robin order

Presently, memory cgroup's direct reclaim frees memory from the current
node.  But this has some troubles.  Usually when a set of threads works in
a cooperative way, they tend to operate on the same node.  So if they hit
limits under memcg they will reclaim memory from themselves, damaging the
active working set.

For example, assume 2 node system which has Node 0 and Node 1 and a memcg
which has 1G limit.  After some work, file cache remains and the usages
are

   Node 0:  1M
   Node 1:  998M.

and run an application on Node 0, it will eat its foot before freeing
unnecessary file caches.

This patch adds round-robin for NUMA and adds equal pressure to each node.
When using cpuset's spread memory feature, this will work very well.

But yes, a better algorithm is needed.

[akpm@linux-foundation.org: comment editing]
[kamezawa.hiroyu@jp.fujitsu.com: fix time comparisons]
Signed-off-by: Ying Han <yinghan@google.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   1 +
 mm/memcontrol.c            | 102 ++++++++++++++++++++++++++++++++++++++++++---
 mm/vmscan.c                |  10 ++++-
 3 files changed, 106 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0629121f2c0b..16052117131e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -106,6 +106,7 @@ extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
  */
 int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
 int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
+int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 				       struct zone *zone,
 				       enum lru_list lru);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc62c714f3b6..1520efd1c7c4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -231,6 +231,11 @@ struct mem_cgroup {
 	 * reclaimed from.
 	 */
 	int last_scanned_child;
+	int last_scanned_node;
+#if MAX_NUMNODES > 1
+	nodemask_t	scan_nodes;
+	unsigned long   next_scan_node_update;
+#endif
 	/*
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
@@ -624,18 +629,27 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 	preempt_enable();
 }
 
+static unsigned long
+mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx)
+{
+	struct mem_cgroup_per_zone *mz;
+	u64 total = 0;
+	int zid;
+
+	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+		mz = mem_cgroup_zoneinfo(mem, nid, zid);
+		total += MEM_CGROUP_ZSTAT(mz, idx);
+	}
+	return total;
+}
 static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
 					enum lru_list idx)
 {
-	int nid, zid;
-	struct mem_cgroup_per_zone *mz;
+	int nid;
 	u64 total = 0;
 
 	for_each_online_node(nid)
-		for (zid = 0; zid < MAX_NR_ZONES; zid++) {
-			mz = mem_cgroup_zoneinfo(mem, nid, zid);
-			total += MEM_CGROUP_ZSTAT(mz, idx);
-		}
+		total += mem_cgroup_get_zonestat_node(mem, nid, idx);
 	return total;
 }
 
@@ -1418,6 +1432,81 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
 	return ret;
 }
 
+#if MAX_NUMNODES > 1
+
+/*
+ * Always updating the nodemask is not very good - even if we have an empty
+ * list or the wrong list here, we can start from some node and traverse all
+ * nodes based on the zonelist. So update the list loosely once per 10 secs.
+ *
+ */
+static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
+{
+	int nid;
+
+	if (time_after(mem->next_scan_node_update, jiffies))
+		return;
+
+	mem->next_scan_node_update = jiffies + 10*HZ;
+	/* make a nodemask where this memcg uses memory from */
+	mem->scan_nodes = node_states[N_HIGH_MEMORY];
+
+	for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
+
+		if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) ||
+		    mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE))
+			continue;
+
+		if (total_swap_pages &&
+		    (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
+		     mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
+			continue;
+		node_clear(nid, mem->scan_nodes);
+	}
+}
+
+/*
+ * Selecting a node where we start reclaim from. Because what we need is just
+ * reducing usage counter, start from anywhere is O,K. Considering
+ * memory reclaim from current node, there are pros. and cons.
+ *
+ * Freeing memory from current node means freeing memory from a node which
+ * we'll use or we've used. So, it may make LRU bad. And if several threads
+ * hit limits, it will see a contention on a node. But freeing from remote
+ * node means more costs for memory reclaim because of memory latency.
+ *
+ * Now, we use round-robin. Better algorithm is welcomed.
+ */
+int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+{
+	int node;
+
+	mem_cgroup_may_update_nodemask(mem);
+	node = mem->last_scanned_node;
+
+	node = next_node(node, mem->scan_nodes);
+	if (node == MAX_NUMNODES)
+		node = first_node(mem->scan_nodes);
+	/*
+	 * We call this when we hit limit, not when pages are added to LRU.
+	 * No LRU may hold pages because all pages are UNEVICTABLE or
+	 * memcg is too small and all pages are not on LRU. In that case,
+	 * we use curret node.
+	 */
+	if (unlikely(node == MAX_NUMNODES))
+		node = numa_node_id();
+
+	mem->last_scanned_node = node;
+	return node;
+}
+
+#else
+int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
+{
+	return 0;
+}
+#endif
+
 /*
  * Scan the hierarchy if needed to reclaim memory. We remember the last child
  * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -4606,6 +4695,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 		res_counter_init(&mem->memsw, NULL);
 	}
 	mem->last_scanned_child = 0;
+	mem->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&mem->oom_notify);
 
 	if (parent)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 884ae08c16cc..b0875871820d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2226,6 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
+	int nid;
 	struct scan_control sc = {
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
@@ -2242,7 +2243,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.gfp_mask = sc.gfp_mask,
 	};
 
-	zonelist = NODE_DATA(numa_node_id())->node_zonelists;
+	/*
+	 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
+	 * take care of from where we get pages. So the node where we start the
+	 * scan does not need to be the current node.
+	 */
+	nid = mem_cgroup_select_victim_node(mem_cont);
+
+	zonelist = NODE_DATA(nid)->node_zonelists;
 
 	trace_mm_vmscan_memcg_reclaim_begin(0,
 					    sc.may_writepage,
-- 
cgit v1.2.3


From 246e87a9393448c20873bc5dee64be68ed559e24 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 26 May 2011 16:25:34 -0700
Subject: memcg: fix get_scan_count() for small targets

During memory reclaim we determine the number of pages to be scanned per
zone as

	(anon + file) >> priority.
Assume
	scan = (anon + file) >> priority.

If scan < SWAP_CLUSTER_MAX, the scan will be skipped for this time and
priority gets higher.  This has some problems.

  1. This increases priority as 1 without any scan.
     To do scan in this priority, amount of pages should be larger than 512M.
     If pages>>priority < SWAP_CLUSTER_MAX, it's recorded and scan will be
     batched, later. (But we lose 1 priority.)
     If memory size is below 16M, pages >> priority is 0 and no scan in
     DEF_PRIORITY forever.

  2. If zone->all_unreclaimabe==true, it's scanned only when priority==0.
     So, x86's ZONE_DMA will never be recoverred until the user of pages
     frees memory by itself.

  3. With memcg, the limit of memory can be small. When using small memcg,
     it gets priority < DEF_PRIORITY-2 very easily and need to call
     wait_iff_congested().
     For doing scan before priorty=9, 64MB of memory should be used.

Then, this patch tries to scan SWAP_CLUSTER_MAX of pages in force...when

  1. the target is enough small.
  2. it's kswapd or memcg reclaim.

Then we can avoid rapid priority drop and may be able to recover
all_unreclaimable in a small zones.  And this patch removes nr_saved_scan.
 This will allow scanning in this priority even when pages >> priority is
very small.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Ying Han <yinghan@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  5 -----
 mm/page_alloc.c        |  4 +---
 mm/vmscan.c            | 60 +++++++++++++++++++++++++++-----------------------
 3 files changed, 34 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 217bcf6bca77..29312bdf119f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -273,11 +273,6 @@ struct zone_reclaim_stat {
 	 */
 	unsigned long		recent_rotated[2];
 	unsigned long		recent_scanned[2];
-
-	/*
-	 * accumulated for batching
-	 */
-	unsigned long		nr_saved_scan[NR_LRU_LISTS];
 };
 
 struct zone {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a00f17c3bf4..a4e1db3f1981 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4323,10 +4323,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		for_each_lru(l) {
+		for_each_lru(l)
 			INIT_LIST_HEAD(&zone->lru[l].list);
-			zone->reclaim_stat.nr_saved_scan[l] = 0;
-		}
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b0875871820d..2e8fbacd8744 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1717,26 +1717,6 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 	return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
 }
 
-/*
- * Smallish @nr_to_scan's are deposited in @nr_saved_scan,
- * until we collected @swap_cluster_max pages to scan.
- */
-static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
-				       unsigned long *nr_saved_scan)
-{
-	unsigned long nr;
-
-	*nr_saved_scan += nr_to_scan;
-	nr = *nr_saved_scan;
-
-	if (nr >= SWAP_CLUSTER_MAX)
-		*nr_saved_scan = 0;
-	else
-		nr = 0;
-
-	return nr;
-}
-
 /*
  * Determine how aggressively the anon and file LRU lists should be
  * scanned.  The relative value of each set of LRU lists is determined
@@ -1755,6 +1735,22 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
 	u64 fraction[2], denominator;
 	enum lru_list l;
 	int noswap = 0;
+	int force_scan = 0;
+
+
+	anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+	file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+
+	if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
+		/* kswapd does zone balancing and need to scan this zone */
+		if (scanning_global_lru(sc) && current_is_kswapd())
+			force_scan = 1;
+		/* memcg may have small limit and need to avoid priority drop */
+		if (!scanning_global_lru(sc))
+			force_scan = 1;
+	}
 
 	/* If we have no swap space, do not bother scanning anon pages. */
 	if (!sc->may_swap || (nr_swap_pages <= 0)) {
@@ -1765,11 +1761,6 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
 		goto out;
 	}
 
-	anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
-		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
-	file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
-		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
-
 	if (scanning_global_lru(sc)) {
 		free  = zone_page_state(zone, NR_FREE_PAGES);
 		/* If we have very few page cache pages,
@@ -1836,8 +1827,23 @@ out:
 			scan >>= priority;
 			scan = div64_u64(scan * fraction[file], denominator);
 		}
-		nr[l] = nr_scan_try_batch(scan,
-					  &reclaim_stat->nr_saved_scan[l]);
+
+		/*
+		 * If zone is small or memcg is small, nr[l] can be 0.
+		 * This results no-scan on this priority and priority drop down.
+		 * For global direct reclaim, it can visit next zone and tend
+		 * not to have problems. For global kswapd, it's for zone
+		 * balancing and it need to scan a small amounts. When using
+		 * memcg, priority drop can cause big latency. So, it's better
+		 * to scan small amount. See may_noscan above.
+		 */
+		if (!scan && force_scan) {
+			if (file)
+				scan = SWAP_CLUSTER_MAX;
+			else if (!noswap)
+				scan = SWAP_CLUSTER_MAX;
+		}
+		nr[l] = scan;
 	}
 }
 
-- 
cgit v1.2.3


From 1bac180bd29e03989f50054af97b53b8d37a364a Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Thu, 26 May 2011 16:25:36 -0700
Subject: memcg: rename mem_cgroup_zone_nr_pages() to
 mem_cgroup_zone_nr_lru_pages()

The caller of the function has been renamed to zone_nr_lru_pages(), and
this is just fixing up in the memcg code.  The current name is easily to
be mis-read as zone's total number of pages.

Signed-off-by: Ying Han <yinghan@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 +++++-----
 mm/memcontrol.c            |  6 +++---
 mm/vmscan.c                |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 16052117131e..ac1e5d20916a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -107,9 +107,9 @@ extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
 int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
 int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
-unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
-				       struct zone *zone,
-				       enum lru_list lru);
+unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
+						struct zone *zone,
+						enum lru_list lru);
 struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
 						      struct zone *zone);
 struct zone_reclaim_stat*
@@ -304,8 +304,8 @@ mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
 }
 
 static inline unsigned long
-mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
-			 enum lru_list lru)
+mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, struct zone *zone,
+			     enum lru_list lru)
 {
 	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bcb0a0bee1fc..cc48a6854f7e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1078,9 +1078,9 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
 	return (active > inactive);
 }
 
-unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
-				       struct zone *zone,
-				       enum lru_list lru)
+unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
+						struct zone *zone,
+						enum lru_list lru)
 {
 	int nid = zone_to_nid(zone);
 	int zid = zone_idx(zone);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2e8fbacd8744..faa0a088f9cc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -173,7 +173,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
 				struct scan_control *sc, enum lru_list lru)
 {
 	if (!scanning_global_lru(sc))
-		return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
+		return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, zone, lru);
 
 	return zone_page_state(zone, NR_LRU_BASE + lru);
 }
-- 
cgit v1.2.3


From 456f998ec817ebfa254464be4f089542fa390645 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Thu, 26 May 2011 16:25:38 -0700
Subject: memcg: add the pagefault count into memcg stats

Two new stats in per-memcg memory.stat which tracks the number of page
faults and number of major page faults.

  "pgfault"
  "pgmajfault"

They are different from "pgpgin"/"pgpgout" stat which count number of
pages charged/discharged to the cgroup and have no meaning of reading/
writing page to disk.

It is valuable to track the two stats for both measuring application's
performance as well as the efficiency of the kernel page reclaim path.
Counting pagefaults per process is useful, but we also need the aggregated
value since processes are monitored and controlled in cgroup basis in
memcg.

Functional test: check the total number of pgfault/pgmajfault of all
memcgs and compare with global vmstat value:

  $ cat /proc/vmstat | grep fault
  pgfault 1070751
  pgmajfault 553

  $ cat /dev/cgroup/memory.stat | grep fault
  pgfault 1071138
  pgmajfault 553
  total_pgfault 1071142
  total_pgmajfault 553

  $ cat /dev/cgroup/A/memory.stat | grep fault
  pgfault 199
  pgmajfault 0
  total_pgfault 199
  total_pgmajfault 0

Performance test: run page fault test(pft) wit 16 thread on faulting in
15G anon pages in 16G container.  There is no regression noticed on the
"flt/cpu/s"

Sample output from pft:

  TAG pft:anon-sys-default:
    Gb  Thr CLine   User     System     Wall    flt/cpu/s fault/wsec
    15   16   1     0.67s   233.41s    14.76s   16798.546 266356.260

  +-------------------------------------------------------------------------+
      N           Min           Max        Median           Avg        Stddev
  x  10     16682.962     17344.027     16913.524     16928.812      166.5362
  +  10     16695.568     16923.896     16820.604     16824.652     84.816568
  No difference proven at 95.0% confidence

[akpm@linux-foundation.org: fix build]
[hughd@google.com: shmem fix]
Signed-off-by: Ying Han <yinghan@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ncpfs/mmap.c            |  2 ++
 include/linux/memcontrol.h |  7 +++++++
 mm/filemap.c               |  1 +
 mm/memcontrol.c            | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/memory.c                |  2 ++
 mm/shmem.c                 | 11 ++++++-----
 6 files changed, 65 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index a7c07b44b100..e5d71b27a5b0 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -16,6 +16,7 @@
 #include <linux/mman.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -92,6 +93,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	 * -- wli
 	 */
 	count_vm_event(PGMAJFAULT);
+	mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
 	return VM_FAULT_MAJOR;
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ac1e5d20916a..9724a38ee69d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -20,6 +20,8 @@
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
 #include <linux/cgroup.h>
+#include <linux/vm_event_item.h>
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
@@ -149,6 +151,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						unsigned long *total_scanned);
 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
 
+void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
 #endif
@@ -357,6 +360,10 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head,
 {
 }
 
+static inline
+void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/filemap.c b/mm/filemap.c
index 7455ccd8bda8..bcdc393b6580 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1661,6 +1661,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
+		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
 retry_find:
 		page = find_get_page(mapping, offset);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4021fcd71b60..bd9052a5d3ad 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -94,6 +94,8 @@ enum mem_cgroup_events_index {
 	MEM_CGROUP_EVENTS_PGPGIN,	/* # of pages paged in */
 	MEM_CGROUP_EVENTS_PGPGOUT,	/* # of pages paged out */
 	MEM_CGROUP_EVENTS_COUNT,	/* # of pages paged in/out */
+	MEM_CGROUP_EVENTS_PGFAULT,	/* # of page-faults */
+	MEM_CGROUP_EVENTS_PGMAJFAULT,	/* # of major page-faults */
 	MEM_CGROUP_EVENTS_NSTATS,
 };
 /*
@@ -590,6 +592,16 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
 	this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
 }
 
+void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)
+{
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
+}
+
+void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)
+{
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
+}
+
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
 					    enum mem_cgroup_events_index idx)
 {
@@ -827,6 +839,33 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
 	return (mem == root_mem_cgroup);
 }
 
+void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
+{
+	struct mem_cgroup *mem;
+
+	if (!mm)
+		return;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	if (unlikely(!mem))
+		goto out;
+
+	switch (idx) {
+	case PGMAJFAULT:
+		mem_cgroup_pgmajfault(mem, 1);
+		break;
+	case PGFAULT:
+		mem_cgroup_pgfault(mem, 1);
+		break;
+	default:
+		BUG();
+	}
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(mem_cgroup_count_vm_event);
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -3958,6 +3997,8 @@ enum {
 	MCS_PGPGIN,
 	MCS_PGPGOUT,
 	MCS_SWAP,
+	MCS_PGFAULT,
+	MCS_PGMAJFAULT,
 	MCS_INACTIVE_ANON,
 	MCS_ACTIVE_ANON,
 	MCS_INACTIVE_FILE,
@@ -3980,6 +4021,8 @@ struct {
 	{"pgpgin", "total_pgpgin"},
 	{"pgpgout", "total_pgpgout"},
 	{"swap", "total_swap"},
+	{"pgfault", "total_pgfault"},
+	{"pgmajfault", "total_pgmajfault"},
 	{"inactive_anon", "total_inactive_anon"},
 	{"active_anon", "total_active_anon"},
 	{"inactive_file", "total_inactive_file"},
@@ -4008,6 +4051,10 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
 		val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
 		s->stat[MCS_SWAP] += val * PAGE_SIZE;
 	}
+	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);
+	s->stat[MCS_PGFAULT] += val;
+	val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);
+	s->stat[MCS_PGMAJFAULT] += val;
 
 	/* per zone stat */
 	val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
diff --git a/mm/memory.c b/mm/memory.c
index fc24f7d788bd..6953d3926e01 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2874,6 +2874,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
+		mem_cgroup_count_vm_event(mm, PGMAJFAULT);
 	} else if (PageHWPoison(page)) {
 		/*
 		 * hwpoisoned dirty swapcache pages are kept for killing
@@ -3413,6 +3414,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	__set_current_state(TASK_RUNNING);
 
 	count_vm_event(PGFAULT);
+	mem_cgroup_count_vm_event(mm, PGFAULT);
 
 	/* do counter updates before entering really critical section. */
 	check_sync_rss_stat(current);
diff --git a/mm/shmem.c b/mm/shmem.c
index 69edb45a9f28..1acfb2687bfa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1305,12 +1305,10 @@ repeat:
 		swappage = lookup_swap_cache(swap);
 		if (!swappage) {
 			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
 			/* here we actually do the io */
-			if (type && !(*type & VM_FAULT_MAJOR)) {
-				__count_vm_event(PGMAJFAULT);
+			if (type)
 				*type |= VM_FAULT_MAJOR;
-			}
-			spin_unlock(&info->lock);
 			swappage = shmem_swapin(swap, gfp, info, idx);
 			if (!swappage) {
 				spin_lock(&info->lock);
@@ -1549,7 +1547,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
-
+	if (ret & VM_FAULT_MAJOR) {
+		count_vm_event(PGMAJFAULT);
+		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+	}
 	return ret | VM_FAULT_LOCKED;
 }
 
-- 
cgit v1.2.3


From 3864601387cf4196371e3c1897fdffa5228296f9 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 26 May 2011 16:25:46 -0700
Subject: mm: extract exe_file handling from procfs

Setup and cleanup of mm_struct->exe_file is currently done in fs/proc/.
This was because exe_file was needed only for /proc/<pid>/exe.  Since we
will need the exe_file functionality also for core dumps (so core name can
contain full binary path), built this functionality always into the
kernel.

To achieve that move that out of proc FS to the kernel/ where in fact it
should belong.  By doing that we can make dup_mm_exe_file static.  Also we
can drop linux/proc_fs.h inclusion in fs/exec.c and kernel/fork.c.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  1 -
 fs/proc/base.c           | 51 -----------------------------------------------
 include/linux/mm.h       | 10 ++--------
 include/linux/mm_types.h |  2 --
 include/linux/proc_fs.h  | 19 ------------------
 kernel/fork.c            | 52 +++++++++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 53 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 936f5776655c..88a16c572282 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -42,7 +42,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
 #include <linux/namei.h>
-#include <linux/proc_fs.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc8bca72b002..c2ac2fb123c8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1576,57 +1576,6 @@ static const struct file_operations proc_pid_set_comm_operations = {
 	.release	= single_release,
 };
 
-/*
- * We added or removed a vma mapping the executable. The vmas are only mapped
- * during exec and are not mapped with the mmap system call.
- * Callers must hold down_write() on the mm's mmap_sem for these
- */
-void added_exe_file_vma(struct mm_struct *mm)
-{
-	mm->num_exe_file_vmas++;
-}
-
-void removed_exe_file_vma(struct mm_struct *mm)
-{
-	mm->num_exe_file_vmas--;
-	if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
-		fput(mm->exe_file);
-		mm->exe_file = NULL;
-	}
-
-}
-
-void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
-{
-	if (new_exe_file)
-		get_file(new_exe_file);
-	if (mm->exe_file)
-		fput(mm->exe_file);
-	mm->exe_file = new_exe_file;
-	mm->num_exe_file_vmas = 0;
-}
-
-struct file *get_mm_exe_file(struct mm_struct *mm)
-{
-	struct file *exe_file;
-
-	/* We need mmap_sem to protect against races with removal of
-	 * VM_EXECUTABLE vmas */
-	down_read(&mm->mmap_sem);
-	exe_file = mm->exe_file;
-	if (exe_file)
-		get_file(exe_file);
-	up_read(&mm->mmap_sem);
-	return exe_file;
-}
-
-void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
-	/* It's safe to write the exe_file pointer without exe_file_lock because
-	 * this is called during fork when the task is not yet in /proc */
-	newmm->exe_file = get_mm_exe_file(oldmm);
-}
-
 static int proc_exe_link(struct inode *inode, struct path *exe_path)
 {
 	struct task_struct *task;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fb8e814f78dc..9670f71d7be9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1408,17 +1408,11 @@ extern void exit_mmap(struct mm_struct *);
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-#ifdef CONFIG_PROC_FS
 /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
 extern void added_exe_file_vma(struct mm_struct *mm);
 extern void removed_exe_file_vma(struct mm_struct *mm);
-#else
-static inline void added_exe_file_vma(struct mm_struct *mm)
-{}
-
-static inline void removed_exe_file_vma(struct mm_struct *mm)
-{}
-#endif /* CONFIG_PROC_FS */
+extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
+extern struct file *get_mm_exe_file(struct mm_struct *mm);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 extern int install_special_mapping(struct mm_struct *mm,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6fe96c19f85e..2a78aae78c69 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -302,11 +302,9 @@ struct mm_struct {
 	struct task_struct __rcu *owner;
 #endif
 
-#ifdef CONFIG_PROC_FS
 	/* store ref to file /proc/<pid>/exe symlink points to */
 	struct file *exe_file;
 	unsigned long num_exe_file_vmas;
-#endif
 #ifdef CONFIG_MMU_NOTIFIER
 	struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 648c9c58add7..e7576cf9e32d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -173,12 +173,6 @@ extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
 
-/* While the {get|set|dup}_mm_exe_file functions are for mm_structs, they are
- * only needed to implement /proc/<pid>|self/exe so we define them here. */
-extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
-extern struct file *get_mm_exe_file(struct mm_struct *mm);
-extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
-
 extern struct file *proc_ns_fget(int fd);
 
 #else
@@ -230,19 +224,6 @@ static inline void pid_ns_release_proc(struct pid_namespace *ns)
 {
 }
 
-static inline void set_mm_exe_file(struct mm_struct *mm,
-				   struct file *new_exe_file)
-{}
-
-static inline struct file *get_mm_exe_file(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void dup_mm_exe_file(struct mm_struct *oldmm,
-	       			   struct mm_struct *newmm)
-{}
-
 static inline struct file *proc_ns_fget(int fd)
 {
 	return ERR_PTR(-EINVAL);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1f84099ecce6..ca406d916713 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -59,7 +59,6 @@
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
-#include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
@@ -597,6 +596,57 @@ void mmput(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmput);
 
+/*
+ * We added or removed a vma mapping the executable. The vmas are only mapped
+ * during exec and are not mapped with the mmap system call.
+ * Callers must hold down_write() on the mm's mmap_sem for these
+ */
+void added_exe_file_vma(struct mm_struct *mm)
+{
+	mm->num_exe_file_vmas++;
+}
+
+void removed_exe_file_vma(struct mm_struct *mm)
+{
+	mm->num_exe_file_vmas--;
+	if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
+		fput(mm->exe_file);
+		mm->exe_file = NULL;
+	}
+
+}
+
+void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+{
+	if (new_exe_file)
+		get_file(new_exe_file);
+	if (mm->exe_file)
+		fput(mm->exe_file);
+	mm->exe_file = new_exe_file;
+	mm->num_exe_file_vmas = 0;
+}
+
+struct file *get_mm_exe_file(struct mm_struct *mm)
+{
+	struct file *exe_file;
+
+	/* We need mmap_sem to protect against races with removal of
+	 * VM_EXECUTABLE vmas */
+	down_read(&mm->mmap_sem);
+	exe_file = mm->exe_file;
+	if (exe_file)
+		get_file(exe_file);
+	up_read(&mm->mmap_sem);
+	return exe_file;
+}
+
+static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
+{
+	/* It's safe to write the exe_file pointer without exe_file_lock because
+	 * this is called during fork when the task is not yet in /proc */
+	newmm->exe_file = get_mm_exe_file(oldmm);
+}
+
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
-- 
cgit v1.2.3


From 997c136f518c5debd63847e78e2a8694f56dcf90 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Thu, 26 May 2011 16:25:54 -0700
Subject: fs/proc/vmcore.c: add hook to read_from_oldmem() to check for non-ram
 pages

The balloon driver in a Xen guest frees guest pages and marks them as
mmio.  When the kernel crashes and the crash kernel attempts to read the
oldmem via /proc/vmcore a read from ballooned pages will generate 100%
load in dom0 because Xen asks qemu-dm for the page content.  Since the
reads come in as 8byte requests each ballooned page is tried 512 times.

With this change a hook can be registered which checks wether the given
pfn is really ram.  The hook has to return a value > 0 for ram pages, a
value < 0 on error (because the hypercall is not known) and 0 for non-ram
pages.

This will reduce the time to read /proc/vmcore.  Without this change a
512M guest with 128M crashkernel region needs 200 seconds to read it, with
this change it takes just 2 seconds.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/vmcore.c           | 52 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/crash_dump.h |  5 +++++
 2 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 74802bc5ded9..cd99bf557650 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -35,6 +35,46 @@ static u64 vmcore_size;
 
 static struct proc_dir_entry *proc_vmcore = NULL;
 
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * The called function has to take care of module refcounting.
+ */
+static int (*oldmem_pfn_is_ram)(unsigned long pfn);
+
+int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+	if (oldmem_pfn_is_ram)
+		return -EBUSY;
+	oldmem_pfn_is_ram = fn;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
+
+void unregister_oldmem_pfn_is_ram(void)
+{
+	oldmem_pfn_is_ram = NULL;
+	wmb();
+}
+EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
+
+static int pfn_is_ram(unsigned long pfn)
+{
+	int (*fn)(unsigned long pfn);
+	/* pfn is ram unless fn() checks pagetype */
+	int ret = 1;
+
+	/*
+	 * Ask hypervisor if the pfn is really ram.
+	 * A ballooned page contains no data and reading from such a page
+	 * will cause high load in the hypervisor.
+	 */
+	fn = oldmem_pfn_is_ram;
+	if (fn)
+		ret = fn(pfn);
+
+	return ret;
+}
+
 /* Reads a page from the oldmem device from given offset. */
 static ssize_t read_from_oldmem(char *buf, size_t count,
 				u64 *ppos, int userbuf)
@@ -55,9 +95,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
 		else
 			nr_bytes = count;
 
-		tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf);
-		if (tmp < 0)
-			return tmp;
+		/* If pfn is not ram, return zeros for sparse dump files */
+		if (pfn_is_ram(pfn) == 0)
+			memset(buf, 0, nr_bytes);
+		else {
+			tmp = copy_oldmem_page(pfn, buf, nr_bytes,
+						offset, userbuf);
+			if (tmp < 0)
+				return tmp;
+		}
 		*ppos += nr_bytes;
 		count -= nr_bytes;
 		buf += nr_bytes;
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 088cd4ace4ef..74054074e876 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -66,6 +66,11 @@ static inline void vmcore_unusable(void)
 	if (is_kdump_kernel())
 		elfcorehdr_addr = ELFCORE_ADDR_ERR;
 }
+
+#define HAVE_OLDMEM_PFN_IS_RAM 1
+extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
+extern void unregister_oldmem_pfn_is_ram(void);
+
 #else /* !CONFIG_CRASH_DUMP */
 static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
-- 
cgit v1.2.3


From 074127367a503de0168e2ca5d0b36a6f761f026a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 26 May 2011 16:25:55 -0700
Subject: ipmi: convert to seq_file interface

The ->read_proc interface is going away, convert to seq_file.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc:Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 138 +++++++++++++++++++++---------------
 drivers/char/ipmi/ipmi_si_intf.c    |  88 +++++++++++++++--------
 include/linux/ipmi_smi.h            |   2 +-
 3 files changed, 142 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 38223e93aa98..58c0e6387cf7 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -36,6 +36,7 @@
 #include <asm/system.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -1896,102 +1897,128 @@ int ipmi_request_supply_msgs(ipmi_user_t          user,
 EXPORT_SYMBOL(ipmi_request_supply_msgs);
 
 #ifdef CONFIG_PROC_FS
-static int ipmb_file_read_proc(char *page, char **start, off_t off,
-			       int count, int *eof, void *data)
+static int smi_ipmb_proc_show(struct seq_file *m, void *v)
 {
-	char       *out = (char *) page;
-	ipmi_smi_t intf = data;
+	ipmi_smi_t intf = m->private;
 	int        i;
-	int        rv = 0;
 
-	for (i = 0; i < IPMI_MAX_CHANNELS; i++)
-		rv += sprintf(out+rv, "%x ", intf->channels[i].address);
-	out[rv-1] = '\n'; /* Replace the final space with a newline */
-	out[rv] = '\0';
-	rv++;
-	return rv;
+	seq_printf(m, "%x", intf->channels[0].address);
+	for (i = 1; i < IPMI_MAX_CHANNELS; i++)
+		seq_printf(m, " %x", intf->channels[i].address);
+	return seq_putc(m, '\n');
 }
 
-static int version_file_read_proc(char *page, char **start, off_t off,
-				  int count, int *eof, void *data)
+static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
 {
-	char       *out = (char *) page;
-	ipmi_smi_t intf = data;
+	return single_open(file, smi_ipmb_proc_show, PDE(inode)->data);
+}
 
-	return sprintf(out, "%u.%u\n",
+static const struct file_operations smi_ipmb_proc_ops = {
+	.open		= smi_ipmb_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int smi_version_proc_show(struct seq_file *m, void *v)
+{
+	ipmi_smi_t intf = m->private;
+
+	return seq_printf(m, "%u.%u\n",
 		       ipmi_version_major(&intf->bmc->id),
 		       ipmi_version_minor(&intf->bmc->id));
 }
 
-static int stat_file_read_proc(char *page, char **start, off_t off,
-			       int count, int *eof, void *data)
+static int smi_version_proc_open(struct inode *inode, struct file *file)
 {
-	char       *out = (char *) page;
-	ipmi_smi_t intf = data;
+	return single_open(file, smi_version_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations smi_version_proc_ops = {
+	.open		= smi_version_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
-	out += sprintf(out, "sent_invalid_commands:       %u\n",
+static int smi_stats_proc_show(struct seq_file *m, void *v)
+{
+	ipmi_smi_t intf = m->private;
+
+	seq_printf(m, "sent_invalid_commands:       %u\n",
 		       ipmi_get_stat(intf, sent_invalid_commands));
-	out += sprintf(out, "sent_local_commands:         %u\n",
+	seq_printf(m, "sent_local_commands:         %u\n",
 		       ipmi_get_stat(intf, sent_local_commands));
-	out += sprintf(out, "handled_local_responses:     %u\n",
+	seq_printf(m, "handled_local_responses:     %u\n",
 		       ipmi_get_stat(intf, handled_local_responses));
-	out += sprintf(out, "unhandled_local_responses:   %u\n",
+	seq_printf(m, "unhandled_local_responses:   %u\n",
 		       ipmi_get_stat(intf, unhandled_local_responses));
-	out += sprintf(out, "sent_ipmb_commands:          %u\n",
+	seq_printf(m, "sent_ipmb_commands:          %u\n",
 		       ipmi_get_stat(intf, sent_ipmb_commands));
-	out += sprintf(out, "sent_ipmb_command_errs:      %u\n",
+	seq_printf(m, "sent_ipmb_command_errs:      %u\n",
 		       ipmi_get_stat(intf, sent_ipmb_command_errs));
-	out += sprintf(out, "retransmitted_ipmb_commands: %u\n",
+	seq_printf(m, "retransmitted_ipmb_commands: %u\n",
 		       ipmi_get_stat(intf, retransmitted_ipmb_commands));
-	out += sprintf(out, "timed_out_ipmb_commands:     %u\n",
+	seq_printf(m, "timed_out_ipmb_commands:     %u\n",
 		       ipmi_get_stat(intf, timed_out_ipmb_commands));
-	out += sprintf(out, "timed_out_ipmb_broadcasts:   %u\n",
+	seq_printf(m, "timed_out_ipmb_broadcasts:   %u\n",
 		       ipmi_get_stat(intf, timed_out_ipmb_broadcasts));
-	out += sprintf(out, "sent_ipmb_responses:         %u\n",
+	seq_printf(m, "sent_ipmb_responses:         %u\n",
 		       ipmi_get_stat(intf, sent_ipmb_responses));
-	out += sprintf(out, "handled_ipmb_responses:      %u\n",
+	seq_printf(m, "handled_ipmb_responses:      %u\n",
 		       ipmi_get_stat(intf, handled_ipmb_responses));
-	out += sprintf(out, "invalid_ipmb_responses:      %u\n",
+	seq_printf(m, "invalid_ipmb_responses:      %u\n",
 		       ipmi_get_stat(intf, invalid_ipmb_responses));
-	out += sprintf(out, "unhandled_ipmb_responses:    %u\n",
+	seq_printf(m, "unhandled_ipmb_responses:    %u\n",
 		       ipmi_get_stat(intf, unhandled_ipmb_responses));
-	out += sprintf(out, "sent_lan_commands:           %u\n",
+	seq_printf(m, "sent_lan_commands:           %u\n",
 		       ipmi_get_stat(intf, sent_lan_commands));
-	out += sprintf(out, "sent_lan_command_errs:       %u\n",
+	seq_printf(m, "sent_lan_command_errs:       %u\n",
 		       ipmi_get_stat(intf, sent_lan_command_errs));
-	out += sprintf(out, "retransmitted_lan_commands:  %u\n",
+	seq_printf(m, "retransmitted_lan_commands:  %u\n",
 		       ipmi_get_stat(intf, retransmitted_lan_commands));
-	out += sprintf(out, "timed_out_lan_commands:      %u\n",
+	seq_printf(m, "timed_out_lan_commands:      %u\n",
 		       ipmi_get_stat(intf, timed_out_lan_commands));
-	out += sprintf(out, "sent_lan_responses:          %u\n",
+	seq_printf(m, "sent_lan_responses:          %u\n",
 		       ipmi_get_stat(intf, sent_lan_responses));
-	out += sprintf(out, "handled_lan_responses:       %u\n",
+	seq_printf(m, "handled_lan_responses:       %u\n",
 		       ipmi_get_stat(intf, handled_lan_responses));
-	out += sprintf(out, "invalid_lan_responses:       %u\n",
+	seq_printf(m, "invalid_lan_responses:       %u\n",
 		       ipmi_get_stat(intf, invalid_lan_responses));
-	out += sprintf(out, "unhandled_lan_responses:     %u\n",
+	seq_printf(m, "unhandled_lan_responses:     %u\n",
 		       ipmi_get_stat(intf, unhandled_lan_responses));
-	out += sprintf(out, "handled_commands:            %u\n",
+	seq_printf(m, "handled_commands:            %u\n",
 		       ipmi_get_stat(intf, handled_commands));
-	out += sprintf(out, "invalid_commands:            %u\n",
+	seq_printf(m, "invalid_commands:            %u\n",
 		       ipmi_get_stat(intf, invalid_commands));
-	out += sprintf(out, "unhandled_commands:          %u\n",
+	seq_printf(m, "unhandled_commands:          %u\n",
 		       ipmi_get_stat(intf, unhandled_commands));
-	out += sprintf(out, "invalid_events:              %u\n",
+	seq_printf(m, "invalid_events:              %u\n",
 		       ipmi_get_stat(intf, invalid_events));
-	out += sprintf(out, "events:                      %u\n",
+	seq_printf(m, "events:                      %u\n",
 		       ipmi_get_stat(intf, events));
-	out += sprintf(out, "failed rexmit LAN msgs:      %u\n",
+	seq_printf(m, "failed rexmit LAN msgs:      %u\n",
 		       ipmi_get_stat(intf, dropped_rexmit_lan_commands));
-	out += sprintf(out, "failed rexmit IPMB msgs:     %u\n",
+	seq_printf(m, "failed rexmit IPMB msgs:     %u\n",
 		       ipmi_get_stat(intf, dropped_rexmit_ipmb_commands));
+	return 0;
+}
 
-	return (out - ((char *) page));
+static int smi_stats_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smi_stats_proc_show, PDE(inode)->data);
 }
+
+static const struct file_operations smi_stats_proc_ops = {
+	.open		= smi_stats_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
-			    read_proc_t *read_proc,
+			    const struct file_operations *proc_ops,
 			    void *data)
 {
 	int                    rv = 0;
@@ -2010,15 +2037,12 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
 	}
 	strcpy(entry->name, name);
 
-	file = create_proc_entry(name, 0, smi->proc_dir);
+	file = proc_create_data(name, 0, smi->proc_dir, proc_ops, data);
 	if (!file) {
 		kfree(entry->name);
 		kfree(entry);
 		rv = -ENOMEM;
 	} else {
-		file->data = data;
-		file->read_proc = read_proc;
-
 		mutex_lock(&smi->proc_entry_lock);
 		/* Stick it on the list. */
 		entry->next = smi->proc_entries;
@@ -2043,17 +2067,17 @@ static int add_proc_entries(ipmi_smi_t smi, int num)
 
 	if (rv == 0)
 		rv = ipmi_smi_add_proc_entry(smi, "stats",
-					     stat_file_read_proc,
+					     &smi_stats_proc_ops,
 					     smi);
 
 	if (rv == 0)
 		rv = ipmi_smi_add_proc_entry(smi, "ipmb",
-					     ipmb_file_read_proc,
+					     &smi_ipmb_proc_ops,
 					     smi);
 
 	if (rv == 0)
 		rv = ipmi_smi_add_proc_entry(smi, "version",
-					     version_file_read_proc,
+					     &smi_version_proc_ops,
 					     smi);
 #endif /* CONFIG_PROC_FS */
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 64c6b8530615..9397ab49b72e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -43,6 +43,7 @@
 #include <linux/moduleparam.h>
 #include <asm/system.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 #include <linux/timer.h>
 #include <linux/errno.h>
 #include <linux/spinlock.h>
@@ -2805,54 +2806,73 @@ static int try_enable_event_buffer(struct smi_info *smi_info)
 	return rv;
 }
 
-static int type_file_read_proc(char *page, char **start, off_t off,
-			       int count, int *eof, void *data)
+static int smi_type_proc_show(struct seq_file *m, void *v)
 {
-	struct smi_info *smi = data;
+	struct smi_info *smi = m->private;
 
-	return sprintf(page, "%s\n", si_to_str[smi->si_type]);
+	return seq_printf(m, "%s\n", si_to_str[smi->si_type]);
 }
 
-static int stat_file_read_proc(char *page, char **start, off_t off,
-			       int count, int *eof, void *data)
+static int smi_type_proc_open(struct inode *inode, struct file *file)
 {
-	char            *out = (char *) page;
-	struct smi_info *smi = data;
+	return single_open(file, smi_type_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations smi_type_proc_ops = {
+	.open		= smi_type_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int smi_si_stats_proc_show(struct seq_file *m, void *v)
+{
+	struct smi_info *smi = m->private;
 
-	out += sprintf(out, "interrupts_enabled:    %d\n",
+	seq_printf(m, "interrupts_enabled:    %d\n",
 		       smi->irq && !smi->interrupt_disabled);
-	out += sprintf(out, "short_timeouts:        %u\n",
+	seq_printf(m, "short_timeouts:        %u\n",
 		       smi_get_stat(smi, short_timeouts));
-	out += sprintf(out, "long_timeouts:         %u\n",
+	seq_printf(m, "long_timeouts:         %u\n",
 		       smi_get_stat(smi, long_timeouts));
-	out += sprintf(out, "idles:                 %u\n",
+	seq_printf(m, "idles:                 %u\n",
 		       smi_get_stat(smi, idles));
-	out += sprintf(out, "interrupts:            %u\n",
+	seq_printf(m, "interrupts:            %u\n",
 		       smi_get_stat(smi, interrupts));
-	out += sprintf(out, "attentions:            %u\n",
+	seq_printf(m, "attentions:            %u\n",
 		       smi_get_stat(smi, attentions));
-	out += sprintf(out, "flag_fetches:          %u\n",
+	seq_printf(m, "flag_fetches:          %u\n",
 		       smi_get_stat(smi, flag_fetches));
-	out += sprintf(out, "hosed_count:           %u\n",
+	seq_printf(m, "hosed_count:           %u\n",
 		       smi_get_stat(smi, hosed_count));
-	out += sprintf(out, "complete_transactions: %u\n",
+	seq_printf(m, "complete_transactions: %u\n",
 		       smi_get_stat(smi, complete_transactions));
-	out += sprintf(out, "events:                %u\n",
+	seq_printf(m, "events:                %u\n",
 		       smi_get_stat(smi, events));
-	out += sprintf(out, "watchdog_pretimeouts:  %u\n",
+	seq_printf(m, "watchdog_pretimeouts:  %u\n",
 		       smi_get_stat(smi, watchdog_pretimeouts));
-	out += sprintf(out, "incoming_messages:     %u\n",
+	seq_printf(m, "incoming_messages:     %u\n",
 		       smi_get_stat(smi, incoming_messages));
+	return 0;
+}
 
-	return out - page;
+static int smi_si_stats_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smi_si_stats_proc_show, PDE(inode)->data);
 }
 
-static int param_read_proc(char *page, char **start, off_t off,
-			   int count, int *eof, void *data)
+static const struct file_operations smi_si_stats_proc_ops = {
+	.open		= smi_si_stats_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int smi_params_proc_show(struct seq_file *m, void *v)
 {
-	struct smi_info *smi = data;
+	struct smi_info *smi = m->private;
 
-	return sprintf(page,
+	return seq_printf(m,
 		       "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
 		       si_to_str[smi->si_type],
 		       addr_space_to_str[smi->io.addr_type],
@@ -2864,6 +2884,18 @@ static int param_read_proc(char *page, char **start, off_t off,
 		       smi->slave_addr);
 }
 
+static int smi_params_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smi_params_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations smi_params_proc_ops = {
+	.open		= smi_params_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * oem_data_avail_to_receive_msg_avail
  * @info - smi_info structure with msg_flags set
@@ -3257,7 +3289,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	}
 
 	rv = ipmi_smi_add_proc_entry(new_smi->intf, "type",
-				     type_file_read_proc,
+				     &smi_type_proc_ops,
 				     new_smi);
 	if (rv) {
 		dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
@@ -3265,7 +3297,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	}
 
 	rv = ipmi_smi_add_proc_entry(new_smi->intf, "si_stats",
-				     stat_file_read_proc,
+				     &smi_si_stats_proc_ops,
 				     new_smi);
 	if (rv) {
 		dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
@@ -3273,7 +3305,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	}
 
 	rv = ipmi_smi_add_proc_entry(new_smi->intf, "params",
-				     param_read_proc,
+				     &smi_params_proc_ops,
 				     new_smi);
 	if (rv) {
 		dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 906590aa6907..204f9cd26c16 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -236,7 +236,7 @@ static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg)
    directory for this interface.  Note that the entry will
    automatically be dstroyed when the interface is destroyed. */
 int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
-			    read_proc_t *read_proc,
+			    const struct file_operations *proc_ops,
 			    void *data);
 
 #endif /* __LINUX_IPMI_SMI_H */
-- 
cgit v1.2.3


From 26498e89e83c62cffcb8836a2ac2c5b795d84258 Mon Sep 17 00:00:00 2001
From: Sisir Koppaka <sisir.koppaka@gmail.com>
Date: Thu, 26 May 2011 16:25:57 -0700
Subject: pid: fix typo in function description

finds is misspelt as finr. No functional change.

Signed-off-by: Sisir Koppaka <sisir.koppaka@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index cdced84261d7..b152d44fb181 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -105,7 +105,7 @@ extern struct pid_namespace init_pid_ns;
  * or rcu_read_lock() held.
  *
  * find_pid_ns() finds the pid in the namespace specified
- * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
+ * find_vpid() finds the pid by its virtual id, i.e. in the current namespace
  *
  * see also find_task_by_vpid() set in include/linux/sched.h
  */
-- 
cgit v1.2.3


From 19de85ef574c3a2182e3ccad9581805052f14946 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 26 May 2011 16:26:09 -0700
Subject: bitops: add #ifndef for each of find bitops

The style that we normally use in asm-generic is to test the macro itself
for existence, so in asm-generic, do:

	#ifndef find_next_zero_bit_le
	extern unsigned long find_next_zero_bit_le(const void *addr,
		unsigned long size, unsigned long offset);
	#endif

and in the architectures, write

	static inline unsigned long find_next_zero_bit_le(const void *addr,
		unsigned long size, unsigned long offset)
	#define find_next_zero_bit_le find_next_zero_bit_le

This adds the #ifndef for each of the find bitops in the generic header
and source files.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bitops/find.h |  4 ++++
 include/asm-generic/bitops/le.h   |  7 +++++++
 include/linux/bitops.h            |  2 ++
 lib/find_last_bit.c               |  4 ++++
 lib/find_next_bit.c               | 12 ++++++++++++
 5 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 110fa700f853..71c778033f57 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_GENERIC_BITOPS_FIND_H_
 #define _ASM_GENERIC_BITOPS_FIND_H_
 
+#ifndef find_next_bit
 /**
  * find_next_bit - find the next set bit in a memory region
  * @addr: The address to base the search on
@@ -9,7 +10,9 @@
  */
 extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
+#endif
 
+#ifndef find_next_zero_bit
 /**
  * find_next_zero_bit - find the next cleared bit in a memory region
  * @addr: The address to base the search on
@@ -18,6 +21,7 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
  */
 extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 		long size, unsigned long offset);
+#endif
 
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
 
diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h
index 946a21b1b5dc..f95c663a6a41 100644
--- a/include/asm-generic/bitops/le.h
+++ b/include/asm-generic/bitops/le.h
@@ -30,13 +30,20 @@ static inline unsigned long find_first_zero_bit_le(const void *addr,
 
 #define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
 
+#ifndef find_next_zero_bit_le
 extern unsigned long find_next_zero_bit_le(const void *addr,
 		unsigned long size, unsigned long offset);
+#endif
+
+#ifndef find_next_bit_le
 extern unsigned long find_next_bit_le(const void *addr,
 		unsigned long size, unsigned long offset);
+#endif
 
+#ifndef find_first_zero_bit_le
 #define find_first_zero_bit_le(addr, size) \
 	find_next_zero_bit_le((addr), (size), 0)
+#endif
 
 #else
 #error "Please fix <asm/byteorder.h>"
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2184c6b97aeb..4829252d7cfa 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -149,6 +149,7 @@ static inline unsigned long __ffs64(u64 word)
 #ifdef __KERNEL__
 
 #ifdef CONFIG_GENERIC_FIND_LAST_BIT
+#ifndef find_last_bit
 /**
  * find_last_bit - find the last set bit in a memory region
  * @addr: The address to start the search at
@@ -158,6 +159,7 @@ static inline unsigned long __ffs64(u64 word)
  */
 extern unsigned long find_last_bit(const unsigned long *addr,
 				   unsigned long size);
+#endif
 #endif /* CONFIG_GENERIC_FIND_LAST_BIT */
 
 #endif /* __KERNEL__ */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 5d202e36bdd8..d903959ad695 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -15,6 +15,8 @@
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
+#ifndef find_last_bit
+
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
 	unsigned long words;
@@ -43,3 +45,5 @@ found:
 	return size;
 }
 EXPORT_SYMBOL(find_last_bit);
+
+#endif
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index b0a8767282bf..c02d09f37d58 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -17,6 +17,7 @@
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 
 #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
+#ifndef find_next_bit
 /*
  * Find the next set bit in a memory region.
  */
@@ -59,7 +60,9 @@ found_middle:
 	return result + __ffs(tmp);
 }
 EXPORT_SYMBOL(find_next_bit);
+#endif
 
+#ifndef find_next_zero_bit
 /*
  * This implementation of find_{first,next}_zero_bit was stolen from
  * Linus' asm-alpha/bitops.h.
@@ -103,9 +106,11 @@ found_middle:
 	return result + ffz(tmp);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
+#endif
 #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
 
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+#ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
  */
@@ -131,7 +136,9 @@ found:
 	return result + __ffs(tmp);
 }
 EXPORT_SYMBOL(find_first_bit);
+#endif
 
+#ifndef find_first_zero_bit
 /*
  * Find the first cleared bit in a memory region.
  */
@@ -157,6 +164,7 @@ found:
 	return result + ffz(tmp);
 }
 EXPORT_SYMBOL(find_first_zero_bit);
+#endif
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef __BIG_ENDIAN
@@ -186,6 +194,7 @@ static inline unsigned long ext2_swab(const unsigned long y)
 #endif
 }
 
+#ifndef find_next_zero_bit_le
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
@@ -229,7 +238,9 @@ found_middle_swap:
 	return result + ffz(ext2_swab(tmp));
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
+#endif
 
+#ifndef find_next_bit_le
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
@@ -274,6 +285,7 @@ found_middle_swap:
 	return result + __ffs(ext2_swab(tmp));
 }
 EXPORT_SYMBOL(find_next_bit_le);
+#endif
 
 #endif /* CONFIG_GENERIC_FIND_BIT_LE */
 #endif /* __BIG_ENDIAN */
-- 
cgit v1.2.3


From 63e424c84429903c92a0f1e9654c31ccaf6694d0 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 26 May 2011 16:26:10 -0700
Subject: arch: remove CONFIG_GENERIC_FIND_{NEXT_BIT,BIT_LE,LAST_BIT}

By the previous style change, CONFIG_GENERIC_FIND_NEXT_BIT,
CONFIG_GENERIC_FIND_BIT_LE, and CONFIG_GENERIC_FIND_LAST_BIT are not used
to test for existence of find bitops anymore.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig      |  4 ----
 arch/blackfin/Kconfig   |  3 ---
 arch/cris/Kconfig       |  4 ----
 arch/frv/Kconfig        |  8 --------
 arch/h8300/Kconfig      |  8 --------
 arch/ia64/Kconfig       |  4 ----
 arch/m32r/Kconfig       |  8 --------
 arch/m68k/Kconfig.nommu |  4 ----
 arch/microblaze/Kconfig |  6 ------
 arch/mips/Kconfig       |  8 --------
 arch/mn10300/Kconfig    |  3 ---
 arch/parisc/Kconfig     |  8 --------
 arch/powerpc/Kconfig    |  8 --------
 arch/score/Kconfig      |  3 ---
 arch/sh/Kconfig         |  6 ------
 arch/sparc/Kconfig      |  8 --------
 arch/tile/Kconfig       |  1 -
 arch/um/Kconfig.x86     |  1 -
 arch/x86/Kconfig        |  1 -
 arch/xtensa/Kconfig     |  6 ------
 include/linux/bitops.h  |  2 --
 lib/Kconfig             | 10 ----------
 lib/Makefile            |  8 ++------
 lib/find_next_bit.c     |  6 ------
 24 files changed, 2 insertions(+), 126 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index e3a82775f9da..60219bf94198 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -41,10 +41,6 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index a18180f2d007..d619b17c4413 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -47,9 +47,6 @@ config GENERIC_BUG
 config ZONE_DMA
 	def_bool y
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
 config GENERIC_GPIO
 	def_bool y
 
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index a6d03069d0ff..b6b94a27d276 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -31,10 +31,6 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 064f62196745..cb884e489425 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -19,14 +19,6 @@ config RWSEM_GENERIC_SPINLOCK
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index e20322ffcaf8..091ed6192ae8 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -41,14 +41,6 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index e5cc56ae6ce3..38280ef4a2af 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -78,10 +78,6 @@ config HUGETLB_PAGE_SIZE_VARIABLE
 	depends on HUGETLB_PAGE
 	default y
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 736b808d2291..85b44e858225 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -256,14 +256,6 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/m68k/Kconfig.nommu b/arch/m68k/Kconfig.nommu
index 273bccab9517..fc98f9b9d4d2 100644
--- a/arch/m68k/Kconfig.nommu
+++ b/arch/m68k/Kconfig.nommu
@@ -2,10 +2,6 @@ config FPU
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
 config GENERIC_GPIO
 	bool
 	default n
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index eccdefe70d4e..e446bab2427b 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -33,12 +33,6 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	def_bool n
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
-config GENERIC_FIND_BIT_LE
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cef1a854487d..653da62d0682 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -821,14 +821,6 @@ config ARCH_SUPPORTS_OPROFILE
 	bool
 	default y if !MIPS_MT_SMTC
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index feaf09cc8632..1f870340ebdd 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -44,9 +44,6 @@ config GENERIC_CALIBRATE_DELAY
 config GENERIC_CMOS_UPDATE
         def_bool n
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 69ff049c8571..65adc86a230e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -47,14 +47,6 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_BUG
 	bool
 	default y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2f6a22e8e935..2729c6663d8a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -91,14 +91,6 @@ config GENERIC_HWEIGHT
 	bool
 	default y
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_GPIO
 	bool
 	help
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index e73bc781cc14..288add8d168f 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -43,9 +43,6 @@ config NO_DMA
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b44e37753b9a..74495a5ea027 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -71,12 +71,6 @@ config GENERIC_CSUM
 	def_bool y
 	depends on SUPERH64
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
-config GENERIC_FIND_BIT_LE
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 63a027c9ada5..af32e17fa170 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -190,14 +190,6 @@ config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y if SPARC64
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_FIND_BIT_LE
-	bool
-	default y
-
 config GENERIC_HWEIGHT
 	bool
 	default y if !ULTRA_HAS_POPULATION_COUNT
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 635e1bfb1c5d..e1e50101b3bb 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -5,7 +5,6 @@ config TILE
 	def_bool y
 	select HAVE_KVM if !TILEGX
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_FIND_NEXT_BIT
 	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
 	select HAVE_GENERIC_HARDIRQS
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 795ea8e869f4..8aae429a56e2 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -15,7 +15,6 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_FIND_NEXT_BIT
 
 config 64BIT
 	bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 483775f42d2a..da349723d411 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,7 +64,6 @@ config X86
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_FIND_NEXT_BIT
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7c275f5d0df0..5d43c1f8ada8 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -20,12 +20,6 @@ config XTENSA
 config RWSEM_XCHGADD_ALGORITHM
 	def_bool y
 
-config GENERIC_FIND_NEXT_BIT
-	def_bool y
-
-config GENERIC_FIND_BIT_LE
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4829252d7cfa..a3ef66a2a083 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -148,7 +148,6 @@ static inline unsigned long __ffs64(u64 word)
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_GENERIC_FIND_LAST_BIT
 #ifndef find_last_bit
 /**
  * find_last_bit - find the last set bit in a memory region
@@ -160,7 +159,6 @@ static inline unsigned long __ffs64(u64 word)
 extern unsigned long find_last_bit(const unsigned long *addr,
 				   unsigned long size);
 #endif
-#endif /* CONFIG_GENERIC_FIND_LAST_BIT */
 
 #endif /* __KERNEL__ */
 #endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 9c10e38fc609..830181cc7a83 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,16 +19,6 @@ config RATIONAL
 config GENERIC_FIND_FIRST_BIT
 	bool
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-
-config GENERIC_FIND_BIT_LE
-	bool
-
-config GENERIC_FIND_LAST_BIT
-	bool
-	default y
-
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 4b49a249064b..6b597fdb1898 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o find_next_bit.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -22,7 +22,7 @@ lib-y	+= kobject.o kref.o klist.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
-	 bsearch.o
+	 bsearch.o find_last_bit.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
@@ -39,10 +39,6 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
-lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
-lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
-lib-$(CONFIG_GENERIC_FIND_BIT_LE) += find_next_bit.o
-obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index c02d09f37d58..4bd75a73ba00 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -16,7 +16,6 @@
 
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 
-#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
 #ifndef find_next_bit
 /*
  * Find the next set bit in a memory region.
@@ -107,9 +106,7 @@ found_middle:
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
-#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
 
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
 #ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
@@ -165,10 +162,8 @@ found:
 }
 EXPORT_SYMBOL(find_first_zero_bit);
 #endif
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef __BIG_ENDIAN
-#ifdef CONFIG_GENERIC_FIND_BIT_LE
 
 /* include/linux/byteorder does not support "unsigned long" type */
 static inline unsigned long ext2_swabp(const unsigned long * x)
@@ -287,5 +282,4 @@ found_middle_swap:
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
 
-#endif /* CONFIG_GENERIC_FIND_BIT_LE */
 #endif /* __BIG_ENDIAN */
-- 
cgit v1.2.3


From 3474cb3cc0140f9cf6ca56983f8180b4b4c5c36a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 10 May 2011 16:23:07 -0700
Subject: gpio: Convert gpio_is_valid to return bool

Make the code a bit more readable.

Instead of casting an int to an unsigned then comparing to
MAX_NR_GPIOS, add a >= 0 test and let the compiler optimizer
do the conversion to unsigned.

The generated code should be the same.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/asm-generic/gpio.h | 6 +++---
 include/linux/gpio.h       | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index ff5c66080c8c..a98b52cb970b 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -35,9 +35,9 @@
  * platform data and other tables.
  */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
-	return ((unsigned)number) < ARCH_NR_GPIOS;
+	return number >= 0 && number < ARCH_NR_GPIOS;
 }
 
 struct device;
@@ -212,7 +212,7 @@ extern void gpio_unexport(unsigned gpio);
 
 #else	/* !CONFIG_GPIOLIB */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
 	/* only non-negative numbers are valid */
 	return number >= 0;
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 32720baf70f1..0f8265f8e8c3 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -25,9 +25,9 @@ struct gpio_chip;
  * warning when something is wrongly called.
  */
 
-static inline int gpio_is_valid(int number)
+static inline bool gpio_is_valid(int number)
 {
-	return 0;
+	return false;
 }
 
 static inline int gpio_request(unsigned gpio, const char *label)
-- 
cgit v1.2.3


From 492c826b9facefa84995f4dea917e301b5ee0884 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 8 May 2011 22:30:18 +0100
Subject: regulator: Remove supply_regulator_dev from machine configuration

supply_regulator_dev (using a struct pointer) has been deprecated in favour
of supply_regulator (using a regulator name) for quite a few releases
now with a warning generated if it is used and there are no current in tree
users so just remove the code.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 Documentation/power/regulator/machine.txt |  4 ++--
 drivers/regulator/core.c                  | 16 ----------------
 include/linux/regulator/machine.h         |  3 ---
 3 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
index bdec39b9bd75..b42419b52e44 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.txt
@@ -53,11 +53,11 @@ static struct regulator_init_data regulator1_data = {
 
 Regulator-1 supplies power to Regulator-2. This relationship must be registered
 with the core so that Regulator-1 is also enabled when Consumer A enables its
-supply (Regulator-2). The supply regulator is set by the supply_regulator_dev
+supply (Regulator-2). The supply regulator is set by the supply_regulator
 field below:-
 
 static struct regulator_init_data regulator2_data = {
-	.supply_regulator_dev = &platform_regulator1_device.dev,
+	.supply_regulator = "regulator_name",
 	.constraints = {
 		.min_uV = 1800000,
 		.max_uV = 2000000,
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 98d25fcb3930..432faa5cb8af 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2596,14 +2596,6 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	if (ret < 0)
 		goto scrub;
 
-	/* set supply regulator if it exists */
-	if (init_data->supply_regulator && init_data->supply_regulator_dev) {
-		dev_err(dev,
-			"Supply regulator specified by both name and dev\n");
-		ret = -EINVAL;
-		goto scrub;
-	}
-
 	if (init_data->supply_regulator) {
 		struct regulator_dev *r;
 		int found = 0;
@@ -2628,14 +2620,6 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 			goto scrub;
 	}
 
-	if (init_data->supply_regulator_dev) {
-		dev_warn(dev, "Uses supply_regulator_dev instead of regulator_supply\n");
-		ret = set_supply(rdev,
-			dev_get_drvdata(init_data->supply_regulator_dev));
-		if (ret < 0)
-			goto scrub;
-	}
-
 	/* add consumers devices */
 	for (i = 0; i < init_data->num_consumer_supplies; i++) {
 		ret = set_consumer_device_supply(rdev,
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index c4c4fc45f856..8f1a55d99494 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -160,8 +160,6 @@ struct regulator_consumer_supply {
  * @supply_regulator: Parent regulator.  Specified using the regulator name
  *                    as it appears in the name field in sysfs, which can
  *                    be explicitly set using the constraints field 'name'.
- * @supply_regulator_dev: Parent regulator (if any) - DEPRECATED in favour
- *                        of supply_regulator.
  *
  * @constraints: Constraints.  These must be specified for the regulator to
  *               be usable.
@@ -173,7 +171,6 @@ struct regulator_consumer_supply {
  */
 struct regulator_init_data {
 	const char *supply_regulator;        /* or NULL for system supply */
-	struct device *supply_regulator_dev; /* or NULL for system supply */
 
 	struct regulation_constraints constraints;
 
-- 
cgit v1.2.3


From bf5892a8167e4aa5a9a6d72f803fde850e0c5753 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 8 May 2011 22:13:37 +0100
Subject: regulator: Support voltage offsets to compensate for drops in system

Some systems, particularly physically large systems used for early
prototyping, may experience substantial voltage drops between the regulator
and the consumers as a result of long traces in the system. With these
systems voltages may need to be set higher than requested in order to
ensure reliable system operation.

Allow systems to work around such hardware issues by allowing constraints
to supply an offset to be applied to any requested and reported voltages.
This is not ideal, especially since the voltage drop may be load dependant,
but is sufficient for most affected systems, it is not expected to be used
in production hardware. The offset is applied after all constraint
processing so constraints should be specified in terms of consumer values
not physically configured values.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 15 ++++++++++++---
 include/linux/regulator/machine.h |  4 ++++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 432faa5cb8af..58452ac0f165 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -724,6 +724,10 @@ static void print_constraints(struct regulator_dev *rdev)
 			count += sprintf(buf + count, "at %d mV ", ret / 1000);
 	}
 
+	if (constraints->uV_offset)
+		count += sprintf(buf, "%dmV offset ",
+				 constraints->uV_offset / 1000);
+
 	if (constraints->min_uA && constraints->max_uA) {
 		if (constraints->min_uA == constraints->max_uA)
 			count += sprintf(buf + count, "%d mA ",
@@ -1641,6 +1645,9 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 
 	trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV);
 
+	min_uV += rdev->constraints->uV_offset;
+	max_uV += rdev->constraints->uV_offset;
+
 	if (rdev->desc->ops->set_voltage) {
 		ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV,
 						   &selector);
@@ -1865,18 +1872,20 @@ EXPORT_SYMBOL_GPL(regulator_sync_voltage);
 
 static int _regulator_get_voltage(struct regulator_dev *rdev)
 {
-	int sel;
+	int sel, ret;
 
 	if (rdev->desc->ops->get_voltage_sel) {
 		sel = rdev->desc->ops->get_voltage_sel(rdev);
 		if (sel < 0)
 			return sel;
-		return rdev->desc->ops->list_voltage(rdev, sel);
+		ret = rdev->desc->ops->list_voltage(rdev, sel);
 	}
 	if (rdev->desc->ops->get_voltage)
-		return rdev->desc->ops->get_voltage(rdev);
+		ret = rdev->desc->ops->get_voltage(rdev);
 	else
 		return -EINVAL;
+
+	return ret - rdev->constraints->uV_offset;
 }
 
 /**
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 8f1a55d99494..ce3127a75c88 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -68,6 +68,8 @@ struct regulator_state {
  *
  * @min_uV: Smallest voltage consumers may set.
  * @max_uV: Largest voltage consumers may set.
+ * @uV_offset: Offset applied to voltages from consumer to compensate for
+ *             voltage drops.
  *
  * @min_uA: Smallest consumers consumers may set.
  * @max_uA: Largest current consumers may set.
@@ -99,6 +101,8 @@ struct regulation_constraints {
 	int min_uV;
 	int max_uV;
 
+	int uV_offset;
+
 	/* current output range (inclusive) - for current control */
 	int min_uA;
 	int max_uA;
-- 
cgit v1.2.3


From 27c6750ec56fd3b22c670d9333d519a322996eb2 Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Mon, 2 May 2011 16:19:46 -0500
Subject: MFD: TPS65910: Add new mfd device for TPS65910

The TPS65910 chip is a power management IC for multimedia and handheld
devices. It contains the following components:

- Regulators
- GPIO controller
- RTC

The tps65910 core driver is registered as a platform driver and provides
communication through I2C with the host device for the different
components.

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/Kconfig          |   8 +
 drivers/mfd/Makefile         |   1 +
 drivers/mfd/tps65910.c       | 208 ++++++++++++
 include/linux/mfd/tps65910.h | 753 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 970 insertions(+)
 create mode 100644 drivers/mfd/tps65910.c
 create mode 100644 include/linux/mfd/tps65910.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 8344fc0ab858..d67511a67fdd 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -719,6 +719,14 @@ config MFD_PM8XXX_IRQ
 	  This is required to use certain other PM 8xxx features, such as GPIO
 	  and MPP.
 
+config MFD_TPS65910
+	tristate "TPS65910 Power Management chip"
+	depends on I2C &&  GPIOLIB
+	select MFD_CORE
+	help
+	  if you say yes here you get support for the TPS65910 series of
+	  Power Management chips.
+
 endif # MFD_SUPPORT
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1acb8f29a96c..f43ef5bba3ba 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -93,3 +93,4 @@ obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o
 obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o
 obj-$(CONFIG_MFD_PM8XXX_IRQ) 	+= pm8xxx-irq.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
new file mode 100644
index 000000000000..dbcdfb55bb8d
--- /dev/null
+++ b/drivers/mfd/tps65910.c
@@ -0,0 +1,208 @@
+/*
+ * tps65910.c  --  TI TPS6591x
+ *
+ * Copyright 2010 Texas Instruments Inc.
+ *
+ * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps65910.h>
+
+static struct mfd_cell tps65910s[] = {
+	{
+		.name = "tps65910-pmic",
+	},
+	{
+		.name = "tps65910-rtc",
+	},
+	{
+		.name = "tps65910-power",
+	},
+};
+
+
+static int tps65910_i2c_read(struct tps65910 *tps65910, u8 reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = tps65910->i2c_client;
+	struct i2c_msg xfer[2];
+	int ret;
+
+	/* Write register */
+	xfer[0].addr = i2c->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = 1;
+	xfer[0].buf = &reg;
+
+	/* Read data */
+	xfer[1].addr = i2c->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = bytes;
+	xfer[1].buf = dest;
+
+	ret = i2c_transfer(i2c->adapter, xfer, 2);
+	if (ret == 2)
+		ret = 0;
+	else if (ret >= 0)
+		ret = -EIO;
+
+	return ret;
+}
+
+static int tps65910_i2c_write(struct tps65910 *tps65910, u8 reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = tps65910->i2c_client;
+	/* we add 1 byte for device register */
+	u8 msg[TPS65910_MAX_REGISTER + 1];
+	int ret;
+
+	if (bytes > (TPS65910_MAX_REGISTER + 1))
+		return -EINVAL;
+
+	msg[0] = reg;
+	memcpy(&msg[1], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 1);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes + 1)
+		return -EIO;
+	return 0;
+}
+
+int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
+{
+	u8 data;
+	int err;
+
+	mutex_lock(&tps65910->io_mutex);
+	err = tps65910_i2c_read(tps65910, reg, 1, &data);
+	if (err) {
+		dev_err(tps65910->dev, "read from reg %x failed\n", reg);
+		goto out;
+	}
+
+	data |= mask;
+	err = tps65910_i2c_write(tps65910, reg, 1, &data);
+	if (err)
+		dev_err(tps65910->dev, "write to reg %x failed\n", reg);
+
+out:
+	mutex_unlock(&tps65910->io_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(tps65910_set_bits);
+
+int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
+{
+	u8 data;
+	int err;
+
+	mutex_lock(&tps65910->io_mutex);
+	err = tps65910_i2c_read(tps65910, reg, 1, &data);
+	if (err) {
+		dev_err(tps65910->dev, "read from reg %x failed\n", reg);
+		goto out;
+	}
+
+	data &= mask;
+	err = tps65910_i2c_write(tps65910, reg, 1, &data);
+	if (err)
+		dev_err(tps65910->dev, "write to reg %x failed\n", reg);
+
+out:
+	mutex_unlock(&tps65910->io_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(tps65910_clear_bits);
+
+static int tps65910_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct tps65910 *tps65910;
+	int ret = 0;
+
+	tps65910 = kzalloc(sizeof(struct tps65910), GFP_KERNEL);
+	if (tps65910 == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, tps65910);
+	tps65910->dev = &i2c->dev;
+	tps65910->i2c_client = i2c;
+	tps65910->read = tps65910_i2c_read;
+	tps65910->write = tps65910_i2c_write;
+	mutex_init(&tps65910->io_mutex);
+
+	ret = mfd_add_devices(tps65910->dev, -1,
+			      tps65910s, ARRAY_SIZE(tps65910s),
+			      NULL, 0);
+	if (ret < 0)
+		goto err;
+
+	return ret;
+
+err:
+	mfd_remove_devices(tps65910->dev);
+	kfree(tps65910);
+	return ret;
+}
+
+static int tps65910_i2c_remove(struct i2c_client *i2c)
+{
+	struct tps65910 *tps65910 = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(tps65910->dev);
+	kfree(tps65910);
+
+	return 0;
+}
+
+static const struct i2c_device_id tps65910_i2c_id[] = {
+       { "tps65910", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, tps65910_i2c_id);
+
+
+static struct i2c_driver tps65910_i2c_driver = {
+	.driver = {
+		   .name = "tps65910",
+		   .owner = THIS_MODULE,
+	},
+	.probe = tps65910_i2c_probe,
+	.remove = tps65910_i2c_remove,
+	.id_table = tps65910_i2c_id,
+};
+
+static int __init tps65910_i2c_init(void)
+{
+	return i2c_add_driver(&tps65910_i2c_driver);
+}
+/* init early so consumer devices can complete system boot */
+subsys_initcall(tps65910_i2c_init);
+
+static void __exit tps65910_i2c_exit(void)
+{
+	i2c_del_driver(&tps65910_i2c_driver);
+}
+module_exit(tps65910_i2c_exit);
+
+MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>");
+MODULE_AUTHOR("Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>");
+MODULE_DESCRIPTION("TPS6591x chip family multi-function driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
new file mode 100644
index 000000000000..fa1b029e5499
--- /dev/null
+++ b/include/linux/mfd/tps65910.h
@@ -0,0 +1,753 @@
+/*
+ * tps65910.h  --  TI TPS6591x
+ *
+ * Copyright 2010-2011 Texas Instruments Inc.
+ *
+ * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
+ * Author: Arnaud Deconinck <a-deconinck@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_TPS65910_H
+#define __LINUX_MFD_TPS65910_H
+
+/*
+ * List of registers for component TPS65910
+ *
+ */
+
+#define TPS65910_SECONDS				0x0
+#define TPS65910_MINUTES				0x1
+#define TPS65910_HOURS					0x2
+#define TPS65910_DAYS					0x3
+#define TPS65910_MONTHS					0x4
+#define TPS65910_YEARS					0x5
+#define TPS65910_WEEKS					0x6
+#define TPS65910_ALARM_SECONDS				0x8
+#define TPS65910_ALARM_MINUTES				0x9
+#define TPS65910_ALARM_HOURS				0xA
+#define TPS65910_ALARM_DAYS				0xB
+#define TPS65910_ALARM_MONTHS				0xC
+#define TPS65910_ALARM_YEARS				0xD
+#define TPS65910_RTC_CTRL				0x10
+#define TPS65910_RTC_STATUS				0x11
+#define TPS65910_RTC_INTERRUPTS				0x12
+#define TPS65910_RTC_COMP_LSB				0x13
+#define TPS65910_RTC_COMP_MSB				0x14
+#define TPS65910_RTC_RES_PROG				0x15
+#define TPS65910_RTC_RESET_STATUS			0x16
+#define TPS65910_BCK1					0x17
+#define TPS65910_BCK2					0x18
+#define TPS65910_BCK3					0x19
+#define TPS65910_BCK4					0x1A
+#define TPS65910_BCK5					0x1B
+#define TPS65910_PUADEN					0x1C
+#define TPS65910_REF					0x1D
+#define TPS65910_VRTC					0x1E
+#define TPS65910_VIO					0x20
+#define TPS65910_VDD1					0x21
+#define TPS65910_VDD1_OP				0x22
+#define TPS65910_VDD1_SR				0x23
+#define TPS65910_VDD2					0x24
+#define TPS65910_VDD2_OP				0x25
+#define TPS65910_VDD2_SR				0x26
+#define TPS65910_VDD3					0x27
+#define TPS65910_VDIG1					0x30
+#define TPS65910_VDIG2					0x31
+#define TPS65910_VAUX1					0x32
+#define TPS65910_VAUX2					0x33
+#define TPS65910_VAUX33					0x34
+#define TPS65910_VMMC					0x35
+#define TPS65910_VPLL					0x36
+#define TPS65910_VDAC					0x37
+#define TPS65910_THERM					0x38
+#define TPS65910_BBCH					0x39
+#define TPS65910_DCDCCTRL				0x3E
+#define TPS65910_DEVCTRL				0x3F
+#define TPS65910_DEVCTRL2				0x40
+#define TPS65910_SLEEP_KEEP_LDO_ON			0x41
+#define TPS65910_SLEEP_KEEP_RES_ON			0x42
+#define TPS65910_SLEEP_SET_LDO_OFF			0x43
+#define TPS65910_SLEEP_SET_RES_OFF			0x44
+#define TPS65910_EN1_LDO_ASS				0x45
+#define TPS65910_EN1_SMPS_ASS				0x46
+#define TPS65910_EN2_LDO_ASS				0x47
+#define TPS65910_EN2_SMPS_ASS				0x48
+#define TPS65910_EN3_LDO_ASS				0x49
+#define TPS65910_SPARE					0x4A
+#define TPS65910_INT_STS				0x50
+#define TPS65910_INT_MSK				0x51
+#define TPS65910_INT_STS2				0x52
+#define TPS65910_INT_MSK2				0x53
+#define TPS65910_INT_STS3				0x54
+#define TPS65910_INT_MSK3				0x55
+#define TPS65910_GPIO0					0x60
+#define TPS65910_GPIO1					0x61
+#define TPS65910_GPIO2					0x62
+#define TPS65910_GPIO3					0x63
+#define TPS65910_GPIO4					0x64
+#define TPS65910_GPIO5					0x65
+#define TPS65910_JTAGVERNUM				0x80
+#define TPS65910_MAX_REGISTER				0x80
+
+/*
+ * List of register bitfields for component TPS65910
+ *
+ */
+
+
+/*Register BCK1  (0x80) register.RegisterDescription */
+#define BCK1_BCKUP_MASK					0xFF
+#define BCK1_BCKUP_SHIFT				0
+
+
+/*Register BCK2  (0x80) register.RegisterDescription */
+#define BCK2_BCKUP_MASK					0xFF
+#define BCK2_BCKUP_SHIFT				0
+
+
+/*Register BCK3  (0x80) register.RegisterDescription */
+#define BCK3_BCKUP_MASK					0xFF
+#define BCK3_BCKUP_SHIFT				0
+
+
+/*Register BCK4  (0x80) register.RegisterDescription */
+#define BCK4_BCKUP_MASK					0xFF
+#define BCK4_BCKUP_SHIFT				0
+
+
+/*Register BCK5  (0x80) register.RegisterDescription */
+#define BCK5_BCKUP_MASK					0xFF
+#define BCK5_BCKUP_SHIFT				0
+
+
+/*Register PUADEN  (0x80) register.RegisterDescription */
+#define PUADEN_EN3P_MASK				0x80
+#define PUADEN_EN3P_SHIFT				7
+#define PUADEN_I2CCTLP_MASK				0x40
+#define PUADEN_I2CCTLP_SHIFT				6
+#define PUADEN_I2CSRP_MASK				0x20
+#define PUADEN_I2CSRP_SHIFT				5
+#define PUADEN_PWRONP_MASK				0x10
+#define PUADEN_PWRONP_SHIFT				4
+#define PUADEN_SLEEPP_MASK				0x08
+#define PUADEN_SLEEPP_SHIFT				3
+#define PUADEN_PWRHOLDP_MASK				0x04
+#define PUADEN_PWRHOLDP_SHIFT				2
+#define PUADEN_BOOT1P_MASK				0x02
+#define PUADEN_BOOT1P_SHIFT				1
+#define PUADEN_BOOT0P_MASK				0x01
+#define PUADEN_BOOT0P_SHIFT				0
+
+
+/*Register REF	(0x80) register.RegisterDescription */
+#define REF_VMBCH_SEL_MASK				0x0C
+#define REF_VMBCH_SEL_SHIFT				2
+#define REF_ST_MASK					0x03
+#define REF_ST_SHIFT					0
+
+
+/*Register VRTC  (0x80) register.RegisterDescription */
+#define VRTC_VRTC_OFFMASK_MASK				0x08
+#define VRTC_VRTC_OFFMASK_SHIFT				3
+#define VRTC_ST_MASK					0x03
+#define VRTC_ST_SHIFT					0
+
+
+/*Register VIO	(0x80) register.RegisterDescription */
+#define VIO_ILMAX_MASK					0xC0
+#define VIO_ILMAX_SHIFT					6
+#define VIO_SEL_MASK					0x0C
+#define VIO_SEL_SHIFT					2
+#define VIO_ST_MASK					0x03
+#define VIO_ST_SHIFT					0
+
+
+/*Register VDD1  (0x80) register.RegisterDescription */
+#define VDD1_VGAIN_SEL_MASK				0xC0
+#define VDD1_VGAIN_SEL_SHIFT				6
+#define VDD1_ILMAX_MASK					0x20
+#define VDD1_ILMAX_SHIFT				5
+#define VDD1_TSTEP_MASK					0x1C
+#define VDD1_TSTEP_SHIFT				2
+#define VDD1_ST_MASK					0x03
+#define VDD1_ST_SHIFT					0
+
+
+/*Register VDD1_OP  (0x80) register.RegisterDescription */
+#define VDD1_OP_CMD_MASK				0x80
+#define VDD1_OP_CMD_SHIFT				7
+#define VDD1_OP_SEL_MASK				0x7F
+#define VDD1_OP_SEL_SHIFT				0
+
+
+/*Register VDD1_SR  (0x80) register.RegisterDescription */
+#define VDD1_SR_SEL_MASK				0x7F
+#define VDD1_SR_SEL_SHIFT				0
+
+
+/*Register VDD2  (0x80) register.RegisterDescription */
+#define VDD2_VGAIN_SEL_MASK				0xC0
+#define VDD2_VGAIN_SEL_SHIFT				6
+#define VDD2_ILMAX_MASK					0x20
+#define VDD2_ILMAX_SHIFT				5
+#define VDD2_TSTEP_MASK					0x1C
+#define VDD2_TSTEP_SHIFT				2
+#define VDD2_ST_MASK					0x03
+#define VDD2_ST_SHIFT					0
+
+
+/*Register VDD2_OP  (0x80) register.RegisterDescription */
+#define VDD2_OP_CMD_MASK				0x80
+#define VDD2_OP_CMD_SHIFT				7
+#define VDD2_OP_SEL_MASK				0x7F
+#define VDD2_OP_SEL_SHIFT				0
+
+
+/*Register VDD2_SR  (0x80) register.RegisterDescription */
+#define VDD2_SR_SEL_MASK				0x7F
+#define VDD2_SR_SEL_SHIFT				0
+
+
+/*Register VDD3  (0x80) register.RegisterDescription */
+#define VDD3_CKINEN_MASK				0x04
+#define VDD3_CKINEN_SHIFT				2
+#define VDD3_ST_MASK					0x03
+#define VDD3_ST_SHIFT					0
+
+
+/*Register VDIG1  (0x80) register.RegisterDescription */
+#define VDIG1_SEL_MASK					0x0C
+#define VDIG1_SEL_SHIFT					2
+#define VDIG1_ST_MASK					0x03
+#define VDIG1_ST_SHIFT					0
+
+
+/*Register VDIG2  (0x80) register.RegisterDescription */
+#define VDIG2_SEL_MASK					0x0C
+#define VDIG2_SEL_SHIFT					2
+#define VDIG2_ST_MASK					0x03
+#define VDIG2_ST_SHIFT					0
+
+
+/*Register VAUX1  (0x80) register.RegisterDescription */
+#define VAUX1_SEL_MASK					0x0C
+#define VAUX1_SEL_SHIFT					2
+#define VAUX1_ST_MASK					0x03
+#define VAUX1_ST_SHIFT					0
+
+
+/*Register VAUX2  (0x80) register.RegisterDescription */
+#define VAUX2_SEL_MASK					0x0C
+#define VAUX2_SEL_SHIFT					2
+#define VAUX2_ST_MASK					0x03
+#define VAUX2_ST_SHIFT					0
+
+
+/*Register VAUX33  (0x80) register.RegisterDescription */
+#define VAUX33_SEL_MASK					0x0C
+#define VAUX33_SEL_SHIFT				2
+#define VAUX33_ST_MASK					0x03
+#define VAUX33_ST_SHIFT					0
+
+
+/*Register VMMC  (0x80) register.RegisterDescription */
+#define VMMC_SEL_MASK					0x0C
+#define VMMC_SEL_SHIFT					2
+#define VMMC_ST_MASK					0x03
+#define VMMC_ST_SHIFT					0
+
+
+/*Register VPLL  (0x80) register.RegisterDescription */
+#define VPLL_SEL_MASK					0x0C
+#define VPLL_SEL_SHIFT					2
+#define VPLL_ST_MASK					0x03
+#define VPLL_ST_SHIFT					0
+
+
+/*Register VDAC  (0x80) register.RegisterDescription */
+#define VDAC_SEL_MASK					0x0C
+#define VDAC_SEL_SHIFT					2
+#define VDAC_ST_MASK					0x03
+#define VDAC_ST_SHIFT					0
+
+
+/*Register THERM  (0x80) register.RegisterDescription */
+#define THERM_THERM_HD_MASK				0x20
+#define THERM_THERM_HD_SHIFT				5
+#define THERM_THERM_TS_MASK				0x10
+#define THERM_THERM_TS_SHIFT				4
+#define THERM_THERM_HDSEL_MASK				0x0C
+#define THERM_THERM_HDSEL_SHIFT				2
+#define THERM_RSVD1_MASK				0x02
+#define THERM_RSVD1_SHIFT				1
+#define THERM_THERM_STATE_MASK				0x01
+#define THERM_THERM_STATE_SHIFT				0
+
+
+/*Register BBCH  (0x80) register.RegisterDescription */
+#define BBCH_BBSEL_MASK					0x06
+#define BBCH_BBSEL_SHIFT				1
+#define BBCH_BBCHEN_MASK				0x01
+#define BBCH_BBCHEN_SHIFT				0
+
+
+/*Register DCDCCTRL  (0x80) register.RegisterDescription */
+#define DCDCCTRL_VDD2_PSKIP_MASK			0x20
+#define DCDCCTRL_VDD2_PSKIP_SHIFT			5
+#define DCDCCTRL_VDD1_PSKIP_MASK			0x10
+#define DCDCCTRL_VDD1_PSKIP_SHIFT			4
+#define DCDCCTRL_VIO_PSKIP_MASK				0x08
+#define DCDCCTRL_VIO_PSKIP_SHIFT			3
+#define DCDCCTRL_DCDCCKEXT_MASK				0x04
+#define DCDCCTRL_DCDCCKEXT_SHIFT			2
+#define DCDCCTRL_DCDCCKSYNC_MASK			0x03
+#define DCDCCTRL_DCDCCKSYNC_SHIFT			0
+
+
+/*Register DEVCTRL  (0x80) register.RegisterDescription */
+#define DEVCTRL_RTC_PWDN_MASK				0x40
+#define DEVCTRL_RTC_PWDN_SHIFT				6
+#define DEVCTRL_CK32K_CTRL_MASK				0x20
+#define DEVCTRL_CK32K_CTRL_SHIFT			5
+#define DEVCTRL_SR_CTL_I2C_SEL_MASK			0x10
+#define DEVCTRL_SR_CTL_I2C_SEL_SHIFT			4
+#define DEVCTRL_DEV_OFF_RST_MASK			0x08
+#define DEVCTRL_DEV_OFF_RST_SHIFT			3
+#define DEVCTRL_DEV_ON_MASK				0x04
+#define DEVCTRL_DEV_ON_SHIFT				2
+#define DEVCTRL_DEV_SLP_MASK				0x02
+#define DEVCTRL_DEV_SLP_SHIFT				1
+#define DEVCTRL_DEV_OFF_MASK				0x01
+#define DEVCTRL_DEV_OFF_SHIFT				0
+
+
+/*Register DEVCTRL2  (0x80) register.RegisterDescription */
+#define DEVCTRL2_TSLOT_LENGTH_MASK			0x30
+#define DEVCTRL2_TSLOT_LENGTH_SHIFT			4
+#define DEVCTRL2_SLEEPSIG_POL_MASK			0x08
+#define DEVCTRL2_SLEEPSIG_POL_SHIFT			3
+#define DEVCTRL2_PWON_LP_OFF_MASK			0x04
+#define DEVCTRL2_PWON_LP_OFF_SHIFT			2
+#define DEVCTRL2_PWON_LP_RST_MASK			0x02
+#define DEVCTRL2_PWON_LP_RST_SHIFT			1
+#define DEVCTRL2_IT_POL_MASK				0x01
+#define DEVCTRL2_IT_POL_SHIFT				0
+
+
+/*Register SLEEP_KEEP_LDO_ON  (0x80) register.RegisterDescription */
+#define SLEEP_KEEP_LDO_ON_VDAC_KEEPON_MASK		0x80
+#define SLEEP_KEEP_LDO_ON_VDAC_KEEPON_SHIFT		7
+#define SLEEP_KEEP_LDO_ON_VPLL_KEEPON_MASK		0x40
+#define SLEEP_KEEP_LDO_ON_VPLL_KEEPON_SHIFT		6
+#define SLEEP_KEEP_LDO_ON_VAUX33_KEEPON_MASK		0x20
+#define SLEEP_KEEP_LDO_ON_VAUX33_KEEPON_SHIFT		5
+#define SLEEP_KEEP_LDO_ON_VAUX2_KEEPON_MASK		0x10
+#define SLEEP_KEEP_LDO_ON_VAUX2_KEEPON_SHIFT		4
+#define SLEEP_KEEP_LDO_ON_VAUX1_KEEPON_MASK		0x08
+#define SLEEP_KEEP_LDO_ON_VAUX1_KEEPON_SHIFT		3
+#define SLEEP_KEEP_LDO_ON_VDIG2_KEEPON_MASK		0x04
+#define SLEEP_KEEP_LDO_ON_VDIG2_KEEPON_SHIFT		2
+#define SLEEP_KEEP_LDO_ON_VDIG1_KEEPON_MASK		0x02
+#define SLEEP_KEEP_LDO_ON_VDIG1_KEEPON_SHIFT		1
+#define SLEEP_KEEP_LDO_ON_VMMC_KEEPON_MASK		0x01
+#define SLEEP_KEEP_LDO_ON_VMMC_KEEPON_SHIFT		0
+
+
+/*Register SLEEP_KEEP_RES_ON  (0x80) register.RegisterDescription */
+#define SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK		0x80
+#define SLEEP_KEEP_RES_ON_THERM_KEEPON_SHIFT		7
+#define SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK		0x40
+#define SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_SHIFT	6
+#define SLEEP_KEEP_RES_ON_VRTC_KEEPON_MASK		0x20
+#define SLEEP_KEEP_RES_ON_VRTC_KEEPON_SHIFT		5
+#define SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK		0x10
+#define SLEEP_KEEP_RES_ON_I2CHS_KEEPON_SHIFT		4
+#define SLEEP_KEEP_RES_ON_VDD3_KEEPON_MASK		0x08
+#define SLEEP_KEEP_RES_ON_VDD3_KEEPON_SHIFT		3
+#define SLEEP_KEEP_RES_ON_VDD2_KEEPON_MASK		0x04
+#define SLEEP_KEEP_RES_ON_VDD2_KEEPON_SHIFT		2
+#define SLEEP_KEEP_RES_ON_VDD1_KEEPON_MASK		0x02
+#define SLEEP_KEEP_RES_ON_VDD1_KEEPON_SHIFT		1
+#define SLEEP_KEEP_RES_ON_VIO_KEEPON_MASK		0x01
+#define SLEEP_KEEP_RES_ON_VIO_KEEPON_SHIFT		0
+
+
+/*Register SLEEP_SET_LDO_OFF  (0x80) register.RegisterDescription */
+#define SLEEP_SET_LDO_OFF_VDAC_SETOFF_MASK		0x80
+#define SLEEP_SET_LDO_OFF_VDAC_SETOFF_SHIFT		7
+#define SLEEP_SET_LDO_OFF_VPLL_SETOFF_MASK		0x40
+#define SLEEP_SET_LDO_OFF_VPLL_SETOFF_SHIFT		6
+#define SLEEP_SET_LDO_OFF_VAUX33_SETOFF_MASK		0x20
+#define SLEEP_SET_LDO_OFF_VAUX33_SETOFF_SHIFT		5
+#define SLEEP_SET_LDO_OFF_VAUX2_SETOFF_MASK		0x10
+#define SLEEP_SET_LDO_OFF_VAUX2_SETOFF_SHIFT		4
+#define SLEEP_SET_LDO_OFF_VAUX1_SETOFF_MASK		0x08
+#define SLEEP_SET_LDO_OFF_VAUX1_SETOFF_SHIFT		3
+#define SLEEP_SET_LDO_OFF_VDIG2_SETOFF_MASK		0x04
+#define SLEEP_SET_LDO_OFF_VDIG2_SETOFF_SHIFT		2
+#define SLEEP_SET_LDO_OFF_VDIG1_SETOFF_MASK		0x02
+#define SLEEP_SET_LDO_OFF_VDIG1_SETOFF_SHIFT		1
+#define SLEEP_SET_LDO_OFF_VMMC_SETOFF_MASK		0x01
+#define SLEEP_SET_LDO_OFF_VMMC_SETOFF_SHIFT		0
+
+
+/*Register SLEEP_SET_RES_OFF  (0x80) register.RegisterDescription */
+#define SLEEP_SET_RES_OFF_DEFAULT_VOLT_MASK		0x80
+#define SLEEP_SET_RES_OFF_DEFAULT_VOLT_SHIFT		7
+#define SLEEP_SET_RES_OFF_RSVD_MASK			0x60
+#define SLEEP_SET_RES_OFF_RSVD_SHIFT			5
+#define SLEEP_SET_RES_OFF_SPARE_SETOFF_MASK		0x10
+#define SLEEP_SET_RES_OFF_SPARE_SETOFF_SHIFT		4
+#define SLEEP_SET_RES_OFF_VDD3_SETOFF_MASK		0x08
+#define SLEEP_SET_RES_OFF_VDD3_SETOFF_SHIFT		3
+#define SLEEP_SET_RES_OFF_VDD2_SETOFF_MASK		0x04
+#define SLEEP_SET_RES_OFF_VDD2_SETOFF_SHIFT		2
+#define SLEEP_SET_RES_OFF_VDD1_SETOFF_MASK		0x02
+#define SLEEP_SET_RES_OFF_VDD1_SETOFF_SHIFT		1
+#define SLEEP_SET_RES_OFF_VIO_SETOFF_MASK		0x01
+#define SLEEP_SET_RES_OFF_VIO_SETOFF_SHIFT		0
+
+
+/*Register EN1_LDO_ASS	(0x80) register.RegisterDescription */
+#define EN1_LDO_ASS_VDAC_EN1_MASK			0x80
+#define EN1_LDO_ASS_VDAC_EN1_SHIFT			7
+#define EN1_LDO_ASS_VPLL_EN1_MASK			0x40
+#define EN1_LDO_ASS_VPLL_EN1_SHIFT			6
+#define EN1_LDO_ASS_VAUX33_EN1_MASK			0x20
+#define EN1_LDO_ASS_VAUX33_EN1_SHIFT			5
+#define EN1_LDO_ASS_VAUX2_EN1_MASK			0x10
+#define EN1_LDO_ASS_VAUX2_EN1_SHIFT			4
+#define EN1_LDO_ASS_VAUX1_EN1_MASK			0x08
+#define EN1_LDO_ASS_VAUX1_EN1_SHIFT			3
+#define EN1_LDO_ASS_VDIG2_EN1_MASK			0x04
+#define EN1_LDO_ASS_VDIG2_EN1_SHIFT			2
+#define EN1_LDO_ASS_VDIG1_EN1_MASK			0x02
+#define EN1_LDO_ASS_VDIG1_EN1_SHIFT			1
+#define EN1_LDO_ASS_VMMC_EN1_MASK			0x01
+#define EN1_LDO_ASS_VMMC_EN1_SHIFT			0
+
+
+/*Register EN1_SMPS_ASS  (0x80) register.RegisterDescription */
+#define EN1_SMPS_ASS_RSVD_MASK				0xE0
+#define EN1_SMPS_ASS_RSVD_SHIFT				5
+#define EN1_SMPS_ASS_SPARE_EN1_MASK			0x10
+#define EN1_SMPS_ASS_SPARE_EN1_SHIFT			4
+#define EN1_SMPS_ASS_VDD3_EN1_MASK			0x08
+#define EN1_SMPS_ASS_VDD3_EN1_SHIFT			3
+#define EN1_SMPS_ASS_VDD2_EN1_MASK			0x04
+#define EN1_SMPS_ASS_VDD2_EN1_SHIFT			2
+#define EN1_SMPS_ASS_VDD1_EN1_MASK			0x02
+#define EN1_SMPS_ASS_VDD1_EN1_SHIFT			1
+#define EN1_SMPS_ASS_VIO_EN1_MASK			0x01
+#define EN1_SMPS_ASS_VIO_EN1_SHIFT			0
+
+
+/*Register EN2_LDO_ASS	(0x80) register.RegisterDescription */
+#define EN2_LDO_ASS_VDAC_EN2_MASK			0x80
+#define EN2_LDO_ASS_VDAC_EN2_SHIFT			7
+#define EN2_LDO_ASS_VPLL_EN2_MASK			0x40
+#define EN2_LDO_ASS_VPLL_EN2_SHIFT			6
+#define EN2_LDO_ASS_VAUX33_EN2_MASK			0x20
+#define EN2_LDO_ASS_VAUX33_EN2_SHIFT			5
+#define EN2_LDO_ASS_VAUX2_EN2_MASK			0x10
+#define EN2_LDO_ASS_VAUX2_EN2_SHIFT			4
+#define EN2_LDO_ASS_VAUX1_EN2_MASK			0x08
+#define EN2_LDO_ASS_VAUX1_EN2_SHIFT			3
+#define EN2_LDO_ASS_VDIG2_EN2_MASK			0x04
+#define EN2_LDO_ASS_VDIG2_EN2_SHIFT			2
+#define EN2_LDO_ASS_VDIG1_EN2_MASK			0x02
+#define EN2_LDO_ASS_VDIG1_EN2_SHIFT			1
+#define EN2_LDO_ASS_VMMC_EN2_MASK			0x01
+#define EN2_LDO_ASS_VMMC_EN2_SHIFT			0
+
+
+/*Register EN2_SMPS_ASS  (0x80) register.RegisterDescription */
+#define EN2_SMPS_ASS_RSVD_MASK				0xE0
+#define EN2_SMPS_ASS_RSVD_SHIFT				5
+#define EN2_SMPS_ASS_SPARE_EN2_MASK			0x10
+#define EN2_SMPS_ASS_SPARE_EN2_SHIFT			4
+#define EN2_SMPS_ASS_VDD3_EN2_MASK			0x08
+#define EN2_SMPS_ASS_VDD3_EN2_SHIFT			3
+#define EN2_SMPS_ASS_VDD2_EN2_MASK			0x04
+#define EN2_SMPS_ASS_VDD2_EN2_SHIFT			2
+#define EN2_SMPS_ASS_VDD1_EN2_MASK			0x02
+#define EN2_SMPS_ASS_VDD1_EN2_SHIFT			1
+#define EN2_SMPS_ASS_VIO_EN2_MASK			0x01
+#define EN2_SMPS_ASS_VIO_EN2_SHIFT			0
+
+
+/*Register EN3_LDO_ASS	(0x80) register.RegisterDescription */
+#define EN3_LDO_ASS_VDAC_EN3_MASK			0x80
+#define EN3_LDO_ASS_VDAC_EN3_SHIFT			7
+#define EN3_LDO_ASS_VPLL_EN3_MASK			0x40
+#define EN3_LDO_ASS_VPLL_EN3_SHIFT			6
+#define EN3_LDO_ASS_VAUX33_EN3_MASK			0x20
+#define EN3_LDO_ASS_VAUX33_EN3_SHIFT			5
+#define EN3_LDO_ASS_VAUX2_EN3_MASK			0x10
+#define EN3_LDO_ASS_VAUX2_EN3_SHIFT			4
+#define EN3_LDO_ASS_VAUX1_EN3_MASK			0x08
+#define EN3_LDO_ASS_VAUX1_EN3_SHIFT			3
+#define EN3_LDO_ASS_VDIG2_EN3_MASK			0x04
+#define EN3_LDO_ASS_VDIG2_EN3_SHIFT			2
+#define EN3_LDO_ASS_VDIG1_EN3_MASK			0x02
+#define EN3_LDO_ASS_VDIG1_EN3_SHIFT			1
+#define EN3_LDO_ASS_VMMC_EN3_MASK			0x01
+#define EN3_LDO_ASS_VMMC_EN3_SHIFT			0
+
+
+/*Register SPARE  (0x80) register.RegisterDescription */
+#define SPARE_SPARE_MASK				0xFF
+#define SPARE_SPARE_SHIFT				0
+
+
+/*Register INT_STS  (0x80) register.RegisterDescription */
+#define INT_STS_RTC_PERIOD_IT_MASK			0x80
+#define INT_STS_RTC_PERIOD_IT_SHIFT			7
+#define INT_STS_RTC_ALARM_IT_MASK			0x40
+#define INT_STS_RTC_ALARM_IT_SHIFT			6
+#define INT_STS_HOTDIE_IT_MASK				0x20
+#define INT_STS_HOTDIE_IT_SHIFT				5
+#define INT_STS_PWRHOLD_IT_MASK				0x10
+#define INT_STS_PWRHOLD_IT_SHIFT			4
+#define INT_STS_PWRON_LP_IT_MASK			0x08
+#define INT_STS_PWRON_LP_IT_SHIFT			3
+#define INT_STS_PWRON_IT_MASK				0x04
+#define INT_STS_PWRON_IT_SHIFT				2
+#define INT_STS_VMBHI_IT_MASK				0x02
+#define INT_STS_VMBHI_IT_SHIFT				1
+#define INT_STS_VMBDCH_IT_MASK				0x01
+#define INT_STS_VMBDCH_IT_SHIFT				0
+
+
+/*Register INT_MSK  (0x80) register.RegisterDescription */
+#define INT_MSK_RTC_PERIOD_IT_MSK_MASK			0x80
+#define INT_MSK_RTC_PERIOD_IT_MSK_SHIFT			7
+#define INT_MSK_RTC_ALARM_IT_MSK_MASK			0x40
+#define INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
+#define INT_MSK_HOTDIE_IT_MSK_MASK			0x20
+#define INT_MSK_HOTDIE_IT_MSK_SHIFT			5
+#define INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
+#define INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
+#define INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
+#define INT_MSK_PWRON_IT_MSK_MASK			0x04
+#define INT_MSK_PWRON_IT_MSK_SHIFT			2
+#define INT_MSK_VMBHI_IT_MSK_MASK			0x02
+#define INT_MSK_VMBHI_IT_MSK_SHIFT			1
+#define INT_MSK_VMBDCH_IT_MSK_MASK			0x01
+#define INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+
+
+/*Register INT_STS2  (0x80) register.RegisterDescription */
+#define INT_STS2_GPIO3_F_IT_MASK			0x80
+#define INT_STS2_GPIO3_F_IT_SHIFT			7
+#define INT_STS2_GPIO3_R_IT_MASK			0x40
+#define INT_STS2_GPIO3_R_IT_SHIFT			6
+#define INT_STS2_GPIO2_F_IT_MASK			0x20
+#define INT_STS2_GPIO2_F_IT_SHIFT			5
+#define INT_STS2_GPIO2_R_IT_MASK			0x10
+#define INT_STS2_GPIO2_R_IT_SHIFT			4
+#define INT_STS2_GPIO1_F_IT_MASK			0x08
+#define INT_STS2_GPIO1_F_IT_SHIFT			3
+#define INT_STS2_GPIO1_R_IT_MASK			0x04
+#define INT_STS2_GPIO1_R_IT_SHIFT			2
+#define INT_STS2_GPIO0_F_IT_MASK			0x02
+#define INT_STS2_GPIO0_F_IT_SHIFT			1
+#define INT_STS2_GPIO0_R_IT_MASK			0x01
+#define INT_STS2_GPIO0_R_IT_SHIFT			0
+
+
+/*Register INT_MSK2  (0x80) register.RegisterDescription */
+#define INT_MSK2_GPIO3_F_IT_MSK_MASK			0x80
+#define INT_MSK2_GPIO3_F_IT_MSK_SHIFT			7
+#define INT_MSK2_GPIO3_R_IT_MSK_MASK			0x40
+#define INT_MSK2_GPIO3_R_IT_MSK_SHIFT			6
+#define INT_MSK2_GPIO2_F_IT_MSK_MASK			0x20
+#define INT_MSK2_GPIO2_F_IT_MSK_SHIFT			5
+#define INT_MSK2_GPIO2_R_IT_MSK_MASK			0x10
+#define INT_MSK2_GPIO2_R_IT_MSK_SHIFT			4
+#define INT_MSK2_GPIO1_F_IT_MSK_MASK			0x08
+#define INT_MSK2_GPIO1_F_IT_MSK_SHIFT			3
+#define INT_MSK2_GPIO1_R_IT_MSK_MASK			0x04
+#define INT_MSK2_GPIO1_R_IT_MSK_SHIFT			2
+#define INT_MSK2_GPIO0_F_IT_MSK_MASK			0x02
+#define INT_MSK2_GPIO0_F_IT_MSK_SHIFT			1
+#define INT_MSK2_GPIO0_R_IT_MSK_MASK			0x01
+#define INT_MSK2_GPIO0_R_IT_MSK_SHIFT			0
+
+
+/*Register INT_STS3  (0x80) register.RegisterDescription */
+#define INT_STS3_GPIO5_F_IT_MASK			0x08
+#define INT_STS3_GPIO5_F_IT_SHIFT			3
+#define INT_STS3_GPIO5_R_IT_MASK			0x04
+#define INT_STS3_GPIO5_R_IT_SHIFT			2
+#define INT_STS3_GPIO4_F_IT_MASK			0x02
+#define INT_STS3_GPIO4_F_IT_SHIFT			1
+#define INT_STS3_GPIO4_R_IT_MASK			0x01
+#define INT_STS3_GPIO4_R_IT_SHIFT			0
+
+
+/*Register INT_MSK3  (0x80) register.RegisterDescription */
+#define INT_MSK3_GPIO5_F_IT_MSK_MASK			0x08
+#define INT_MSK3_GPIO5_F_IT_MSK_SHIFT			3
+#define INT_MSK3_GPIO5_R_IT_MSK_MASK			0x04
+#define INT_MSK3_GPIO5_R_IT_MSK_SHIFT			2
+#define INT_MSK3_GPIO4_F_IT_MSK_MASK			0x02
+#define INT_MSK3_GPIO4_F_IT_MSK_SHIFT			1
+#define INT_MSK3_GPIO4_R_IT_MSK_MASK			0x01
+#define INT_MSK3_GPIO4_R_IT_MSK_SHIFT			0
+
+
+/*Register GPIO0  (0x80) register.RegisterDescription */
+#define GPIO0_GPIO_DEB_MASK				0x10
+#define GPIO0_GPIO_DEB_SHIFT				4
+#define GPIO0_GPIO_PUEN_MASK				0x08
+#define GPIO0_GPIO_PUEN_SHIFT				3
+#define GPIO0_GPIO_CFG_MASK				0x04
+#define GPIO0_GPIO_CFG_SHIFT				2
+#define GPIO0_GPIO_STS_MASK				0x02
+#define GPIO0_GPIO_STS_SHIFT				1
+#define GPIO0_GPIO_SET_MASK				0x01
+#define GPIO0_GPIO_SET_SHIFT				0
+
+
+/*Register GPIO1  (0x80) register.RegisterDescription */
+#define GPIO1_GPIO_DEB_MASK				0x10
+#define GPIO1_GPIO_DEB_SHIFT				4
+#define GPIO1_GPIO_PUEN_MASK				0x08
+#define GPIO1_GPIO_PUEN_SHIFT				3
+#define GPIO1_GPIO_CFG_MASK				0x04
+#define GPIO1_GPIO_CFG_SHIFT				2
+#define GPIO1_GPIO_STS_MASK				0x02
+#define GPIO1_GPIO_STS_SHIFT				1
+#define GPIO1_GPIO_SET_MASK				0x01
+#define GPIO1_GPIO_SET_SHIFT				0
+
+
+/*Register GPIO2  (0x80) register.RegisterDescription */
+#define GPIO2_GPIO_DEB_MASK				0x10
+#define GPIO2_GPIO_DEB_SHIFT				4
+#define GPIO2_GPIO_PUEN_MASK			 	0x08
+#define GPIO2_GPIO_PUEN_SHIFT				3
+#define GPIO2_GPIO_CFG_MASK				0x04
+#define GPIO2_GPIO_CFG_SHIFT				2
+#define GPIO2_GPIO_STS_MASK				0x02
+#define GPIO2_GPIO_STS_SHIFT				1
+#define GPIO2_GPIO_SET_MASK				0x01
+#define GPIO2_GPIO_SET_SHIFT				0
+
+
+/*Register GPIO3  (0x80) register.RegisterDescription */
+#define GPIO3_GPIO_DEB_MASK				0x10
+#define GPIO3_GPIO_DEB_SHIFT				4
+#define GPIO3_GPIO_PUEN_MASK				0x08
+#define GPIO3_GPIO_PUEN_SHIFT				3
+#define GPIO3_GPIO_CFG_MASK				0x04
+#define GPIO3_GPIO_CFG_SHIFT				2
+#define GPIO3_GPIO_STS_MASK			 	0x02
+#define GPIO3_GPIO_STS_SHIFT				1
+#define GPIO3_GPIO_SET_MASK				0x01
+#define GPIO3_GPIO_SET_SHIFT				0
+
+
+/*Register GPIO4  (0x80) register.RegisterDescription */
+#define GPIO4_GPIO_DEB_MASK				0x10
+#define GPIO4_GPIO_DEB_SHIFT				4
+#define GPIO4_GPIO_PUEN_MASK				0x08
+#define GPIO4_GPIO_PUEN_SHIFT				3
+#define GPIO4_GPIO_CFG_MASK				0x04
+#define GPIO4_GPIO_CFG_SHIFT				2
+#define GPIO4_GPIO_STS_MASK				0x02
+#define GPIO4_GPIO_STS_SHIFT				1
+#define GPIO4_GPIO_SET_MASK				0x01
+#define GPIO4_GPIO_SET_SHIFT				0
+
+
+/*Register GPIO5  (0x80) register.RegisterDescription */
+#define GPIO5_GPIO_DEB_MASK				0x10
+#define GPIO5_GPIO_DEB_SHIFT				4
+#define GPIO5_GPIO_PUEN_MASK				0x08
+#define GPIO5_GPIO_PUEN_SHIFT				3
+#define GPIO5_GPIO_CFG_MASK				0x04
+#define GPIO5_GPIO_CFG_SHIFT				2
+#define GPIO5_GPIO_STS_MASK				0x02
+#define GPIO5_GPIO_STS_SHIFT				1
+#define GPIO5_GPIO_SET_MASK				0x01
+#define GPIO5_GPIO_SET_SHIFT				0
+
+
+/*Register JTAGVERNUM  (0x80) register.RegisterDescription */
+#define JTAGVERNUM_VERNUM_MASK				0x0F
+#define JTAGVERNUM_VERNUM_SHIFT				0
+
+
+/* IRQ Definitions */
+#define TPS65910_IRQ_VBAT_VMBDCH			0
+#define TPS65910_IRQ_VBAT_VMHI				1
+#define TPS65910_IRQ_PWRON				2
+#define TPS65910_IRQ_PWRON_LP				3
+#define TPS65910_IRQ_PWRHOLD				4
+#define TPS65910_IRQ_HOTDIE				5
+#define TPS65910_IRQ_RTC_ALARM				6
+#define TPS65910_IRQ_RTC_PERIOD				7
+#define TPS65910_IRQ_GPIO_R				8
+#define TPS65910_IRQ_GPIO_F				9
+#define TPS65910_NUM_IRQ				10
+
+/* GPIO Register Definitions */
+#define TPS65910_GPIO_DEB				BIT(2)
+#define TPS65910_GPIO_PUEN				BIT(3)
+#define TPS65910_GPIO_CFG				BIT(2)
+#define TPS65910_GPIO_STS				BIT(1)
+#define TPS65910_GPIO_SET				BIT(0)
+
+/**
+ * struct tps65910_board
+ * Board platform data may be used to initialize regulators.
+ */
+
+struct tps65910_board {
+	struct regulator_init_data *tps65910_pmic_init_data;
+};
+
+/**
+ * struct tps65910 - tps65910 sub-driver chip access routines
+ */
+
+struct tps65910 {
+	struct device *dev;
+	struct i2c_client *i2c_client;
+	struct mutex io_mutex;
+	int (*read)(struct tps65910 *tps65910, u8 reg, int size, void *dest);
+	int (*write)(struct tps65910 *tps65910, u8 reg, int size, void *src);
+
+	/* Client devices */
+	struct tps65910_pmic *pmic;
+	struct tps65910_rtc *rtc;
+	struct tps65910_power *power;
+
+	/* GPIO Handling */
+	struct gpio_chip gpio;
+
+	/* IRQ Handling */
+	struct mutex irq_lock;
+	int chip_irq;
+	int irq_base;
+	u16 irq_mask;
+};
+
+struct tps65910_platform_data {
+	int irq_base;
+};
+
+int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
+int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
+
+#endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3


From 2537df722d338ab687d7ed91dc589265c0d14aec Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Mon, 2 May 2011 16:19:52 -0500
Subject: TPS65910: GPIO: Add GPIO driver

TPS65910 has one configurable GPIO that can be used for several
purposes. Subsequent versions of the TPS chip support more than
one GPIO.

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/Makefile         |  2 +-
 drivers/mfd/tps65910-gpio.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/tps65910.c       |  7 ++++
 include/linux/mfd/tps65910.h |  2 +
 4 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/tps65910-gpio.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f43ef5bba3ba..c64fa8d48f45 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -93,4 +93,4 @@ obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o
 obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o
 obj-$(CONFIG_MFD_PM8XXX_IRQ) 	+= pm8xxx-irq.o
-obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-gpio.o
diff --git a/drivers/mfd/tps65910-gpio.c b/drivers/mfd/tps65910-gpio.c
new file mode 100644
index 000000000000..f3ae37652446
--- /dev/null
+++ b/drivers/mfd/tps65910-gpio.c
@@ -0,0 +1,91 @@
+/*
+ * tps65910-gpio.c  --  TI TPS6591x
+ *
+ * Copyright 2010 Texas Instruments Inc.
+ *
+ * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Jorge Eduardo Candelaria jedu@slimlogic.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/mfd/tps65910.h>
+
+static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset)
+{
+	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
+	uint8_t val;
+
+	tps65910->read(tps65910, TPS65910_GPIO0, 1, &val);
+
+	if (val & GPIO0_GPIO_STS_MASK)
+		return 1;
+
+	return 0;
+}
+
+static void tps65910_gpio_set(struct gpio_chip *gc, unsigned offset,
+			      int value)
+{
+	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
+
+	if (value)
+		tps65910_set_bits(tps65910, TPS65910_GPIO0,
+					GPIO0_GPIO_SET_MASK);
+	else
+		tps65910_clear_bits(tps65910, TPS65910_GPIO0,
+					GPIO0_GPIO_SET_MASK);
+}
+
+static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset,
+				int value)
+{
+	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
+
+	/* Set the initial value */
+	tps65910_gpio_set(gc, 0, value);
+
+	return tps65910_set_bits(tps65910, TPS65910_GPIO0, GPIO0_GPIO_CFG_MASK);
+}
+
+static int tps65910_gpio_input(struct gpio_chip *gc, unsigned offset)
+{
+	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
+
+	return tps65910_clear_bits(tps65910, TPS65910_GPIO0,
+					GPIO0_GPIO_CFG_MASK);
+}
+
+void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base)
+{
+	int ret;
+
+	if (!gpio_base)
+		return;
+
+	tps65910->gpio.owner		= THIS_MODULE;
+	tps65910->gpio.label		= tps65910->i2c_client->name;
+	tps65910->gpio.dev		= tps65910->dev;
+	tps65910->gpio.base		= gpio_base;
+	tps65910->gpio.ngpio		= 1;
+	tps65910->gpio.can_sleep	= 1;
+
+	tps65910->gpio.direction_input	= tps65910_gpio_input;
+	tps65910->gpio.direction_output	= tps65910_gpio_output;
+	tps65910->gpio.set		= tps65910_gpio_set;
+	tps65910->gpio.get		= tps65910_gpio_get;
+
+	ret = gpiochip_add(&tps65910->gpio);
+
+	if (ret)
+		dev_warn(tps65910->dev, "GPIO registration failed: %d\n", ret);
+}
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index dbcdfb55bb8d..5a6a7be1f8cf 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -135,8 +135,13 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct tps65910 *tps65910;
+	struct tps65910_board *pmic_plat_data;
 	int ret = 0;
 
+	pmic_plat_data = dev_get_platdata(&i2c->dev);
+	if (!pmic_plat_data)
+		return -EINVAL;
+
 	tps65910 = kzalloc(sizeof(struct tps65910), GFP_KERNEL);
 	if (tps65910 == NULL)
 		return -ENOMEM;
@@ -154,6 +159,8 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		goto err;
 
+	tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
+
 	return ret;
 
 err:
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index fa1b029e5499..0e01d504ab58 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -714,6 +714,7 @@
  */
 
 struct tps65910_board {
+	int gpio_base;
 	struct regulator_init_data *tps65910_pmic_init_data;
 };
 
@@ -749,5 +750,6 @@ struct tps65910_platform_data {
 
 int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
 int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
+void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
 
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3


From e3471bdc2784ee20a0d636c5904200c2d1148ef9 Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Mon, 2 May 2011 16:20:04 -0500
Subject: TPS65910: IRQ: Add interrupt controller

This module controls the interrupt handling for the tps chip. The
interrupt sources are the following:

- GPIO falling/rising edge detection
- Battery voltage below/above threshold
- PWRON signal
- PWRHOLD signal
- Temperature detection
- RTC alarm and periodic event

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/Makefile         |   2 +-
 drivers/mfd/tps65910-irq.c   | 187 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/tps65910.c       |  12 +++
 include/linux/mfd/tps65910.h |   5 ++
 4 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/tps65910-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index c64fa8d48f45..bdb8eab5668b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -93,4 +93,4 @@ obj-$(CONFIG_MFD_CS5535)	+= cs5535-mfd.o
 obj-$(CONFIG_MFD_OMAP_USB_HOST)	+= omap-usb-host.o
 obj-$(CONFIG_MFD_PM8921_CORE) 	+= pm8921-core.o
 obj-$(CONFIG_MFD_PM8XXX_IRQ) 	+= pm8xxx-irq.o
-obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-gpio.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-gpio.o tps65910-irq.o
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
new file mode 100644
index 000000000000..b8435e045f81
--- /dev/null
+++ b/drivers/mfd/tps65910-irq.c
@@ -0,0 +1,187 @@
+/*
+ * tps65910-irq.c  --  TI TPS6591x
+ *
+ * Copyright 2010 Texas Instruments Inc.
+ *
+ * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/gpio.h>
+#include <linux/mfd/tps65910.h>
+
+static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
+							int irq)
+{
+	return (irq - tps65910->irq_base);
+}
+
+/*
+ * This is a threaded IRQ handler so can access I2C/SPI.  Since all
+ * interrupts are clear on read the IRQ line will be reasserted and
+ * the physical IRQ will be handled again if another interrupt is
+ * asserted while we run - in the normal course of events this is a
+ * rare occurrence so we save I2C/SPI reads.  We're also assuming that
+ * it's rare to get lots of interrupts firing simultaneously so try to
+ * minimise I/O.
+ */
+static irqreturn_t tps65910_irq(int irq, void *irq_data)
+{
+	struct tps65910 *tps65910 = irq_data;
+	u16 irq_sts;
+	u16 irq_mask;
+	u8 reg;
+	int i;
+
+	tps65910->read(tps65910, TPS65910_INT_STS, 1, &reg);
+	irq_sts = reg;
+	tps65910->read(tps65910, TPS65910_INT_STS2, 1, &reg);
+	irq_sts |= reg << 8;
+
+	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
+	irq_mask = reg;
+	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	irq_mask |= reg << 8;
+
+	irq_sts &= ~irq_mask;
+
+	if (!irq_sts)
+		return IRQ_NONE;
+
+	for (i = 0; i < TPS65910_NUM_IRQ; i++) {
+
+		if (!(irq_sts & (1 << i)))
+			continue;
+
+		handle_nested_irq(tps65910->irq_base + i);
+	}
+
+	/* Write the STS register back to clear IRQs we handled */
+	reg = irq_sts & 0xFF;
+	tps65910->write(tps65910, TPS65910_INT_STS, 1, &reg);
+	reg = irq_sts >> 8;
+	tps65910->write(tps65910, TPS65910_INT_STS2, 1, &reg);
+
+	return IRQ_HANDLED;
+}
+
+static void tps65910_irq_lock(struct irq_data *data)
+{
+	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&tps65910->irq_lock);
+}
+
+static void tps65910_irq_sync_unlock(struct irq_data *data)
+{
+	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
+	u16 reg_mask;
+	u8 reg;
+
+	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
+	reg_mask = reg;
+	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	reg_mask |= reg << 8;
+
+	if (tps65910->irq_mask != reg_mask) {
+		reg = tps65910->irq_mask & 0xFF;
+		tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg);
+		reg = tps65910->irq_mask >> 8;
+		tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	}
+	mutex_unlock(&tps65910->irq_lock);
+}
+
+static void tps65910_irq_enable(struct irq_data *data)
+{
+	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
+
+	tps65910->irq_mask &= ~( 1 << irq_to_tps65910_irq(tps65910, data->irq));
+}
+
+static void tps65910_irq_disable(struct irq_data *data)
+{
+	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
+
+	tps65910->irq_mask |= ( 1 << irq_to_tps65910_irq(tps65910, data->irq));
+}
+
+static struct irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irq_bus_lock = tps65910_irq_lock,
+	.irq_bus_sync_unlock = tps65910_irq_sync_unlock,
+	.irq_disable = tps65910_irq_disable,
+	.irq_enable = tps65910_irq_enable,
+};
+
+int tps65910_irq_init(struct tps65910 *tps65910, int irq,
+		    struct tps65910_platform_data *pdata)
+{
+	int ret, cur_irq;
+	int flags = IRQF_ONESHOT;
+	u8 reg;
+
+	if (!irq) {
+		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(tps65910->dev, "No interrupt support, no IRQ base\n");
+		return -EINVAL;
+	}
+
+	/* Mask top level interrupts */
+	reg = 0xFF;
+	tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg);
+	reg = 0x03;
+	tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg);
+
+	mutex_init(&tps65910->irq_lock);
+	tps65910->chip_irq = irq;
+	tps65910->irq_base = pdata->irq_base;
+
+	/* Register with genirq */
+	for (cur_irq = tps65910->irq_base;
+	     cur_irq < TPS65910_NUM_IRQ + tps65910->irq_base;
+	     cur_irq++) {
+		irq_set_chip_data(cur_irq, tps65910);
+		irq_set_chip_and_handler(cur_irq, &tps65910_irq_chip,
+					 handle_edge_irq);
+		irq_set_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		irq_set_noprobe(cur_irq);
+#endif
+	}
+
+	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
+				   "tps65910", tps65910);
+	if (ret != 0)
+		dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret);
+
+	return ret;
+}
+
+int tps65910_irq_exit(struct tps65910 *tps65910)
+{
+	free_irq(tps65910->chip_irq, tps65910);
+	return 0;
+}
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 5a6a7be1f8cf..bf649cf6a0ae 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -136,12 +136,20 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 {
 	struct tps65910 *tps65910;
 	struct tps65910_board *pmic_plat_data;
+	struct tps65910_platform_data *init_data;
 	int ret = 0;
 
 	pmic_plat_data = dev_get_platdata(&i2c->dev);
 	if (!pmic_plat_data)
 		return -EINVAL;
 
+	init_data = kzalloc(sizeof(struct tps65910_platform_data), GFP_KERNEL);
+	if (init_data == NULL)
+		return -ENOMEM;
+
+	init_data->irq = pmic_plat_data->irq;
+	init_data->irq_base = pmic_plat_data->irq;
+
 	tps65910 = kzalloc(sizeof(struct tps65910), GFP_KERNEL);
 	if (tps65910 == NULL)
 		return -ENOMEM;
@@ -161,6 +169,10 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 
 	tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
 
+	ret = tps65910_irq_init(tps65910, init_data->irq, init_data);
+	if (ret < 0)
+		goto err;
+
 	return ret;
 
 err:
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 0e01d504ab58..8afe91c85587 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -715,6 +715,8 @@
 
 struct tps65910_board {
 	int gpio_base;
+	int irq;
+	int irq_base;
 	struct regulator_init_data *tps65910_pmic_init_data;
 };
 
@@ -745,11 +747,14 @@ struct tps65910 {
 };
 
 struct tps65910_platform_data {
+	int irq;
 	int irq_base;
 };
 
 int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
 int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
 void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
+int tps65910_irq_init(struct tps65910 *tps65910, int irq,
+		struct tps65910_platform_data *pdata);
 
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3


From 518fb721de3685c8326e72746151b534a241feda Mon Sep 17 00:00:00 2001
From: Graeme Gregory <gg@slimlogic.co.uk>
Date: Mon, 2 May 2011 16:20:08 -0500
Subject: TPS65910: Add tps65910 regulator driver

The regulator module consists of 3 DCDCs and 8 LDOs. The output
voltages are configurable and are meant to supply power to the
main processor and other components

Signed-off-by: Graeme Gregory <gg@slimlogic.co.uk>
Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig              |   6 +
 drivers/regulator/Makefile             |   1 +
 drivers/regulator/tps65910-regulator.c | 705 +++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65910.h           |  14 +
 4 files changed, 726 insertions(+)
 create mode 100644 drivers/regulator/tps65910-regulator.c

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index f0b13a0d1851..d7ed20f293d7 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -297,5 +297,11 @@ config REGULATOR_TPS6524X
 	  serial interface currently supported on the sequencer serial
 	  port controller.
 
+config REGULATOR_TPS65910
+	tristate "TI TPS65910 Power Regulator"
+	depends on MFD_TPS65910
+	help
+	  This driver supports TPS65910 voltage regulator chips.
+
 endif
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 165ff5371e9e..3932d2ec38f3 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -42,5 +42,6 @@ obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
 obj-$(CONFIG_REGULATOR_AB8500)	+= ab8500.o
 obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
+obj-$(CONFIG_REGULATOR_TPS65910) += tps65910-regulator.o
 
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
new file mode 100644
index 000000000000..d461fa7906b8
--- /dev/null
+++ b/drivers/regulator/tps65910-regulator.c
@@ -0,0 +1,705 @@
+/*
+ * tps65910.c  --  TI tps65910
+ *
+ * Copyright 2010 Texas Instruments Inc.
+ *
+ * Author: Graeme Gregory <gg@slimlogic.co.uk>
+ * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+#include <linux/mfd/tps65910.h>
+
+#define TPS65910_REG_VRTC		0
+#define TPS65910_REG_VIO		1
+#define TPS65910_REG_VDD1		2
+#define TPS65910_REG_VDD2		3
+#define TPS65910_REG_VDD3		4
+#define TPS65910_REG_VDIG1		5
+#define TPS65910_REG_VDIG2		6
+#define TPS65910_REG_VPLL		7
+#define TPS65910_REG_VDAC		8
+#define TPS65910_REG_VAUX1		9
+#define TPS65910_REG_VAUX2		10
+#define TPS65910_REG_VAUX33		11
+#define TPS65910_REG_VMMC		12
+
+#define TPS65910_NUM_REGULATOR		13
+
+#define TPS65910_SUPPLY_STATE_ENABLED	0x1
+
+/* supported VIO voltages in milivolts */
+static const u16 VIO_VSEL_table[] = {
+	1500, 1800, 2500, 3300,
+};
+
+/* supported VIO voltages in milivolts */
+static const u16 VDD3_VSEL_table[] = {
+	5000,
+};
+
+/* supported VDIG1 voltages in milivolts */
+static const u16 VDIG1_VSEL_table[] = {
+	1200, 1500, 1800, 2700,
+};
+
+/* supported VDIG2 voltages in milivolts */
+static const u16 VDIG2_VSEL_table[] = {
+	1000, 1100, 1200, 1800,
+};
+
+/* supported VPLL voltages in milivolts */
+static const u16 VPLL_VSEL_table[] = {
+	1000, 1100, 1800, 2500,
+};
+
+/* supported VDAC voltages in milivolts */
+static const u16 VDAC_VSEL_table[] = {
+	1800, 2600, 2800, 2850,
+};
+
+/* supported VAUX1 voltages in milivolts */
+static const u16 VAUX1_VSEL_table[] = {
+	1800, 2500, 2800, 2850,
+};
+
+/* supported VAUX2 voltages in milivolts */
+static const u16 VAUX2_VSEL_table[] = {
+	1800, 2800, 2900, 3300,
+};
+
+/* supported VAUX33 voltages in milivolts */
+static const u16 VAUX33_VSEL_table[] = {
+	1800, 2000, 2800, 3300,
+};
+
+/* supported VMMC voltages in milivolts */
+static const u16 VMMC_VSEL_table[] = {
+	1800, 2800, 3000, 3300,
+};
+
+struct tps_info {
+	const char *name;
+	unsigned min_uV;
+	unsigned max_uV;
+	u8 table_len;
+	const u16 *table;
+};
+
+static struct tps_info tps65910_regs[] = {
+	{
+		.name = "VRTC",
+	},
+	{
+		.name = "VIO",
+		.min_uV = 1500000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VIO_VSEL_table),
+		.table = VIO_VSEL_table,
+	},
+	{
+		.name = "VDD1",
+		.min_uV = 600000,
+		.max_uV = 4500000,
+	},
+	{
+		.name = "VDD2",
+		.min_uV = 600000,
+		.max_uV = 4500000,
+	},
+	{
+		.name = "VDD3",
+		.min_uV = 5000000,
+		.max_uV = 5000000,
+		.table_len = ARRAY_SIZE(VDD3_VSEL_table),
+		.table = VDD3_VSEL_table,
+	},
+	{
+		.name = "VDIG1",
+		.min_uV = 1200000,
+		.max_uV = 2700000,
+		.table_len = ARRAY_SIZE(VDIG1_VSEL_table),
+		.table = VDIG1_VSEL_table,
+	},
+	{
+		.name = "VDIG2",
+		.min_uV = 1000000,
+		.max_uV = 1800000,
+		.table_len = ARRAY_SIZE(VDIG2_VSEL_table),
+		.table = VDIG2_VSEL_table,
+	},
+	{
+		.name = "VPLL",
+		.min_uV = 1000000,
+		.max_uV = 2500000,
+		.table_len = ARRAY_SIZE(VPLL_VSEL_table),
+		.table = VPLL_VSEL_table,
+	},
+	{
+		.name = "VDAC",
+		.min_uV = 1800000,
+		.max_uV = 2850000,
+		.table_len = ARRAY_SIZE(VDAC_VSEL_table),
+		.table = VDAC_VSEL_table,
+	},
+	{
+		.name = "VAUX1",
+		.min_uV = 1800000,
+		.max_uV = 2850000,
+		.table_len = ARRAY_SIZE(VAUX1_VSEL_table),
+		.table = VAUX1_VSEL_table,
+	},
+	{
+		.name = "VAUX2",
+		.min_uV = 1800000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VAUX2_VSEL_table),
+		.table = VAUX2_VSEL_table,
+	},
+	{
+		.name = "VAUX33",
+		.min_uV = 1800000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VAUX33_VSEL_table),
+		.table = VAUX33_VSEL_table,
+	},
+	{
+		.name = "VMMC",
+		.min_uV = 1800000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VMMC_VSEL_table),
+		.table = VMMC_VSEL_table,
+	},
+};
+
+struct tps65910_reg {
+	struct regulator_desc desc[TPS65910_NUM_REGULATOR];
+	struct tps65910 *mfd;
+	struct regulator_dev *rdev[TPS65910_NUM_REGULATOR];
+	struct tps_info *info[TPS65910_NUM_REGULATOR];
+	struct mutex mutex;
+	int mode;
+};
+
+static inline int tps65910_read(struct tps65910_reg *pmic, u8 reg)
+{
+	u8 val;
+	int err;
+
+	err = pmic->mfd->read(pmic->mfd, reg, 1, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+static inline int tps65910_write(struct tps65910_reg *pmic, u8 reg, u8 val)
+{
+	return pmic->mfd->write(pmic->mfd, reg, 1, &val);
+}
+
+static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg,
+					u8 set_mask, u8 clear_mask)
+{
+	int err, data;
+
+	mutex_lock(&pmic->mutex);
+
+	data = tps65910_read(pmic, reg);
+	if (data < 0) {
+		dev_err(pmic->mfd->dev, "Read from reg 0x%x failed\n", reg);
+		err = data;
+		goto out;
+	}
+
+	data &= ~clear_mask;
+	data |= set_mask;
+	err = tps65910_write(pmic, reg, data);
+	if (err)
+		dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
+
+out:
+	mutex_unlock(&pmic->mutex);
+	return err;
+}
+
+static int tps65910_reg_read(struct tps65910_reg *pmic, u8 reg)
+{
+	int data;
+
+	mutex_lock(&pmic->mutex);
+
+	data = tps65910_read(pmic, reg);
+	if (data < 0)
+		dev_err(pmic->mfd->dev, "Read from reg 0x%x failed\n", reg);
+
+	mutex_unlock(&pmic->mutex);
+	return data;
+}
+
+static int tps65910_reg_write(struct tps65910_reg *pmic, u8 reg, u8 val)
+{
+	int err;
+
+	mutex_lock(&pmic->mutex);
+
+	err = tps65910_write(pmic, reg, val);
+	if (err < 0)
+		dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
+
+	mutex_unlock(&pmic->mutex);
+	return err;
+}
+
+static int tps65910_get_ctrl_register(int id)
+{
+	switch (id) {
+	case TPS65910_REG_VRTC:
+		return TPS65910_VRTC;
+	case TPS65910_REG_VIO:
+		return TPS65910_VIO;
+	case TPS65910_REG_VDD1:
+		return TPS65910_VDD1;
+	case TPS65910_REG_VDD2:
+		return TPS65910_VDD2;
+	case TPS65910_REG_VDD3:
+		return TPS65910_VDD3;
+	case TPS65910_REG_VDIG1:
+		return TPS65910_VDIG1;
+	case TPS65910_REG_VDIG2:
+		return TPS65910_VDIG2;
+	case TPS65910_REG_VPLL:
+		return TPS65910_VPLL;
+	case TPS65910_REG_VDAC:
+		return TPS65910_VDAC;
+	case TPS65910_REG_VAUX1:
+		return TPS65910_VAUX1;
+	case TPS65910_REG_VAUX2:
+		return TPS65910_VAUX2;
+	case TPS65910_REG_VAUX33:
+		return TPS65910_VAUX33;
+	case TPS65910_REG_VMMC:
+		return TPS65910_VMMC;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int tps65910_is_enabled(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int reg, value, id = rdev_get_id(dev);
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	value = tps65910_reg_read(pmic, reg);
+	if (value < 0)
+		return value;
+
+	return value & TPS65910_SUPPLY_STATE_ENABLED;
+}
+
+static int tps65910_enable(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	struct tps65910 *mfd = pmic->mfd;
+	int reg, id = rdev_get_id(dev);
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	return tps65910_set_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
+}
+
+static int tps65910_disable(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	struct tps65910 *mfd = pmic->mfd;
+	int reg, id = rdev_get_id(dev);
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	return tps65910_clear_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
+}
+
+
+static int tps65910_set_mode(struct regulator_dev *dev, unsigned int mode)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	struct tps65910 *mfd = pmic->mfd;
+	int reg, value, id = rdev_get_id(dev);
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		return tps65910_modify_bits(pmic, reg, LDO_ST_ON_BIT,
+							LDO_ST_MODE_BIT);
+	case REGULATOR_MODE_IDLE:
+		value = LDO_ST_ON_BIT | LDO_ST_MODE_BIT;
+		return tps65910_set_bits(mfd, reg, value);
+	case REGULATOR_MODE_STANDBY:
+		return tps65910_clear_bits(mfd, reg, LDO_ST_ON_BIT);
+	}
+
+	return -EINVAL;
+}
+
+static unsigned int tps65910_get_mode(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int reg, value, id = rdev_get_id(dev);
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	value = tps65910_reg_read(pmic, reg);
+	if (value < 0)
+		return value;
+
+	if (value & LDO_ST_ON_BIT)
+		return REGULATOR_MODE_STANDBY;
+	else if (value & LDO_ST_MODE_BIT)
+		return REGULATOR_MODE_IDLE;
+	else
+		return REGULATOR_MODE_NORMAL;
+}
+
+static int tps65910_get_voltage_dcdc(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int id = rdev_get_id(dev), voltage = 0;
+	int opvsel = 0, srvsel = 0, mult = 0, sr = 0;
+
+	switch (id) {
+	case TPS65910_REG_VDD1:
+		opvsel = tps65910_reg_read(pmic, TPS65910_VDD1_OP);
+		mult = tps65910_reg_read(pmic, TPS65910_VDD1);
+		mult = (mult & VDD1_VGAIN_SEL_MASK) >> VDD1_VGAIN_SEL_SHIFT;
+		srvsel = tps65910_reg_read(pmic, TPS65910_VDD1_SR);
+		sr = opvsel & VDD1_OP_CMD_MASK;
+		opvsel &= VDD1_OP_SEL_MASK;
+		srvsel &= VDD1_SR_SEL_MASK;
+		break;
+	case TPS65910_REG_VDD2:
+		opvsel = tps65910_reg_read(pmic, TPS65910_VDD2_OP);
+		mult = tps65910_reg_read(pmic, TPS65910_VDD2);
+		mult = (mult & VDD2_VGAIN_SEL_MASK) >> VDD2_VGAIN_SEL_SHIFT;
+		srvsel = tps65910_reg_read(pmic, TPS65910_VDD2_SR);
+		sr = opvsel & VDD2_OP_CMD_MASK;
+		opvsel &= VDD2_OP_SEL_MASK;
+		srvsel &= VDD2_SR_SEL_MASK;
+		break;
+	}
+
+	/* multiplier 0 == 1 but 2,3 normal */
+	if (!mult)
+		mult=1;
+
+	if (sr) {
+		/* Valid range is 3-75 so normalise */
+		if (srvsel < 3) srvsel = 3;
+		if (srvsel > 75) srvsel = 75;
+		srvsel -= 3;
+
+		voltage = (srvsel * VDD1_2_OFFSET + VDD1_2_MIN_VOLT) * 100;
+	} else {
+
+		/* Valid range is 3-75 so normalise */
+		if (opvsel < 3) opvsel = 3;
+		if (opvsel > 75) opvsel = 75;
+		opvsel -= 3;
+
+		voltage = (opvsel * VDD1_2_OFFSET + VDD1_2_MIN_VOLT) * 100;
+	}
+
+	voltage *= mult;
+
+	return voltage;
+}
+
+static int tps65910_get_voltage(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int reg, value, id = rdev_get_id(dev), voltage = 0;
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	value = tps65910_reg_read(pmic, reg);
+	if (value < 0)
+		return value;
+
+	switch (id) {
+	case TPS65910_REG_VIO:
+	case TPS65910_REG_VDIG1:
+	case TPS65910_REG_VDIG2:
+	case TPS65910_REG_VPLL:
+	case TPS65910_REG_VDAC:
+	case TPS65910_REG_VAUX1:
+	case TPS65910_REG_VAUX2:
+	case TPS65910_REG_VAUX33:
+	case TPS65910_REG_VMMC:
+		value &= LDO_SEL_MASK;
+		value >>= LDO_SEL_SHIFT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	voltage = pmic->info[id]->table[value] * 1000;
+
+	return voltage;
+}
+
+static int tps65910_get_voltage_vdd3(struct regulator_dev *dev)
+{
+	return 5 * 1000 * 1000;
+}
+
+static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
+				unsigned selector)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int id = rdev_get_id(dev), vsel;
+	int dcdc_mult;
+
+	/* Split vsel into appropriate registers */
+	dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+	if (dcdc_mult == 1) dcdc_mult--;
+
+	vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+
+	if (id == TPS65910_REG_VDD1) {
+		tps65910_modify_bits(pmic, TPS65910_VDD1,
+				(dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
+						VDD1_VGAIN_SEL_MASK);
+		tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel);
+	} else {
+		tps65910_modify_bits(pmic, TPS65910_VDD2,
+				(dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
+						VDD1_VGAIN_SEL_MASK);
+		tps65910_reg_write(pmic, TPS65910_VDD2_OP, vsel);
+	}
+
+	return 0;
+}
+
+static int tps65910_set_voltage(struct regulator_dev *dev, unsigned selector)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int reg, id = rdev_get_id(dev);
+
+	reg = tps65910_get_ctrl_register(id);
+	if (reg < 0)
+		return reg;
+
+	switch (id) {
+	case TPS65910_REG_VIO:
+	case TPS65910_REG_VDIG1:
+	case TPS65910_REG_VDIG2:
+	case TPS65910_REG_VPLL:
+	case TPS65910_REG_VDAC:
+	case TPS65910_REG_VAUX1:
+	case TPS65910_REG_VAUX2:
+	case TPS65910_REG_VAUX33:
+	case TPS65910_REG_VMMC:
+		return tps65910_modify_bits(pmic, reg,
+				(selector << LDO_SEL_SHIFT), LDO_SEL_MASK);
+	}
+
+	return -EINVAL;
+}
+
+static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
+					unsigned selector)
+{
+	int mult, volt;
+
+	mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+
+	volt = VDD1_2_MIN_VOLT + (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+
+	return  volt * 100 * mult;
+}
+
+static int tps65910_list_voltage(struct regulator_dev *dev,
+					unsigned selector)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int id = rdev_get_id(dev), voltage;
+
+	if (id < TPS65910_REG_VIO || id > TPS65910_REG_VMMC)
+		return -EINVAL;
+
+	if (selector >= pmic->info[id]->table_len)
+		return -EINVAL;
+	else
+		voltage = pmic->info[id]->table[selector] * 1000;
+
+	return voltage;
+}
+
+/* Regulator ops (except VRTC) */
+static struct regulator_ops tps65910_ops_dcdc = {
+	.is_enabled		= tps65910_is_enabled,
+	.enable			= tps65910_enable,
+	.disable		= tps65910_disable,
+	.set_mode		= tps65910_set_mode,
+	.get_mode		= tps65910_get_mode,
+	.get_voltage		= tps65910_get_voltage_dcdc,
+	.set_voltage_sel	= tps65910_set_voltage_dcdc,
+	.list_voltage		= tps65910_list_voltage_dcdc,
+};
+
+static struct regulator_ops tps65910_ops_vdd3 = {
+	.is_enabled		= tps65910_is_enabled,
+	.enable			= tps65910_enable,
+	.disable		= tps65910_disable,
+	.set_mode		= tps65910_set_mode,
+	.get_mode		= tps65910_get_mode,
+	.get_voltage		= tps65910_get_voltage_vdd3,
+	.list_voltage		= tps65910_list_voltage,
+};
+
+static struct regulator_ops tps65910_ops = {
+	.is_enabled		= tps65910_is_enabled,
+	.enable			= tps65910_enable,
+	.disable		= tps65910_disable,
+	.set_mode		= tps65910_set_mode,
+	.get_mode		= tps65910_get_mode,
+	.get_voltage		= tps65910_get_voltage,
+	.set_voltage_sel	= tps65910_set_voltage,
+	.list_voltage		= tps65910_list_voltage,
+};
+
+static __devinit int tps65910_probe(struct platform_device *pdev)
+{
+	struct tps65910 *tps65910 = dev_get_drvdata(pdev->dev.parent);
+	struct tps_info *info = tps65910_regs;
+	struct regulator_init_data *reg_data;
+	struct regulator_dev *rdev;
+	struct tps65910_reg *pmic;
+	struct tps65910_board *pmic_plat_data;
+	static int desc_id;
+	int i, err;
+
+	pmic_plat_data = dev_get_platdata(tps65910->dev);
+	if (!pmic_plat_data)
+		return -EINVAL;
+
+	reg_data = pmic_plat_data->tps65910_pmic_init_data;
+
+	pmic = kzalloc(sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+
+	mutex_init(&pmic->mutex);
+	pmic->mfd = tps65910;
+	platform_set_drvdata(pdev, pmic);
+
+	/* Give control of all register to control port */
+	tps65910_set_bits(pmic->mfd, TPS65910_DEVCTRL,
+				DEVCTRL_SR_CTL_I2C_SEL_MASK);
+
+	for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) {
+		/* Register the regulators */
+		pmic->info[i] = info;
+
+		pmic->desc[i].name = info->name;
+		pmic->desc[i].id = desc_id++;
+		pmic->desc[i].n_voltages = info->table_len;
+
+		if ((i == TPS65910_REG_VDD1) || (i == TPS65910_REG_VDD2))
+			pmic->desc[i].ops = &tps65910_ops_dcdc;
+		else if (i == TPS65910_REG_VDD3)
+			pmic->desc[i].ops = &tps65910_ops_vdd3;
+		else
+			pmic->desc[i].ops = &tps65910_ops;
+
+		pmic->desc[i].type = REGULATOR_VOLTAGE;
+		pmic->desc[i].owner = THIS_MODULE;
+
+		rdev = regulator_register(&pmic->desc[i],
+				tps65910->dev, reg_data, pmic);
+		if (IS_ERR(rdev)) {
+			dev_err(tps65910->dev,
+				"failed to register %s regulator\n",
+				pdev->name);
+			err = PTR_ERR(rdev);
+			goto err;
+		}
+
+		/* Save regulator for cleanup */
+		pmic->rdev[i] = rdev;
+	}
+	return 0;
+
+err:
+	while (--i >= 0)
+		regulator_unregister(pmic->rdev[i]);
+
+	kfree(pmic);
+	return err;
+}
+
+static int __devexit tps65910_remove(struct platform_device *pdev)
+{
+	struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < TPS65910_NUM_REGULATOR; i++)
+		regulator_unregister(tps65910_reg->rdev[i]);
+
+	kfree(tps65910_reg);
+	return 0;
+}
+
+static struct platform_driver tps65910_driver = {
+	.driver = {
+		.name = "tps65910-pmic",
+		.owner = THIS_MODULE,
+	},
+	.probe = tps65910_probe,
+	.remove = __devexit_p(tps65910_remove),
+};
+
+static int __init tps65910_init(void)
+{
+	return platform_driver_register(&tps65910_driver);
+}
+subsys_initcall(tps65910_init);
+
+static void __exit tps65910_cleanup(void)
+{
+	platform_driver_unregister(&tps65910_driver);
+}
+module_exit(tps65910_cleanup);
+
+MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>");
+MODULE_DESCRIPTION("TPS6507x voltage regulator driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:tps65910-pmic");
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 8afe91c85587..be08a0c7e7de 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -215,12 +215,26 @@
 #define VDD2_SR_SEL_SHIFT				0
 
 
+/*Registers VDD1, VDD2 voltage values definitions */
+#define VDD1_2_NUM_VOLTS				73
+#define VDD1_2_MIN_VOLT					6000
+#define VDD1_2_OFFSET					125
+
+
 /*Register VDD3  (0x80) register.RegisterDescription */
 #define VDD3_CKINEN_MASK				0x04
 #define VDD3_CKINEN_SHIFT				2
 #define VDD3_ST_MASK					0x03
 #define VDD3_ST_SHIFT					0
 
+/*Registers VDIG (0x80) to VDAC register.RegisterDescription */
+#define LDO_SEL_MASK					0x0C
+#define LDO_SEL_SHIFT					2
+#define LDO_ST_MASK					0x03
+#define LDO_ST_SHIFT					0
+#define LDO_ST_ON_BIT					0x01
+#define LDO_ST_MODE_BIT					0x02	
+
 
 /*Register VDIG1  (0x80) register.RegisterDescription */
 #define VDIG1_SEL_MASK					0x0C
-- 
cgit v1.2.3


From ecb9c4f5956afa2ca0a20a1f99932b9ff81de854 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Mon, 16 May 2011 18:20:34 +0800
Subject: regulator: Remove
 MAX8997_REG_BUCK1DVS/MAX8997_REG_BUCK2DVS/MAX8997_REG_BUCK5DVS macros

In current implementation, the original macro implementation assumes the caller
pass the parameter starting from 1 (to match the register names in datasheet).
Thus we have unneeded plus one then minus one operations
when using MAX8997_REG_BUCK1DVS/MAX8997_REG_BUCK2DVS/MAX8997_REG_BUCK5DVS macros.

This patch removes these macros to avoid unneeded plus one then minus one operations
without reducing readability.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/max8997.c         | 12 ++++++------
 include/linux/mfd/max8997-private.h |  4 ----
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index b1c1444864b3..10d5a1d9768e 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c
@@ -1032,11 +1032,11 @@ static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 
 	/* For the safety, set max voltage before setting up */
 	for (i = 0; i < 8; i++) {
-		max8997_update_reg(i2c, MAX8997_REG_BUCK1DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK1DVS1 + i,
 				max_buck1, 0x3f);
-		max8997_update_reg(i2c, MAX8997_REG_BUCK2DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK2DVS1 + i,
 				max_buck2, 0x3f);
-		max8997_update_reg(i2c, MAX8997_REG_BUCK5DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK5DVS1 + i,
 				max_buck5, 0x3f);
 	}
 
@@ -1113,13 +1113,13 @@ static __devinit int max8997_pmic_probe(struct platform_device *pdev)
 
 	/* Initialize all the DVS related BUCK registers */
 	for (i = 0; i < 8; i++) {
-		max8997_update_reg(i2c, MAX8997_REG_BUCK1DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK1DVS1 + i,
 				max8997->buck1_vol[i],
 				0x3f);
-		max8997_update_reg(i2c, MAX8997_REG_BUCK2DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK2DVS1 + i,
 				max8997->buck2_vol[i],
 				0x3f);
-		max8997_update_reg(i2c, MAX8997_REG_BUCK5DVS(i + 1),
+		max8997_update_reg(i2c, MAX8997_REG_BUCK5DVS1 + i,
 				max8997->buck5_vol[i],
 				0x3f);
 	}
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 69d1010e2e51..5ff2400ad46c 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -311,10 +311,6 @@ enum max8997_irq {
 	MAX8997_IRQ_NR,
 };
 
-#define MAX8997_REG_BUCK1DVS(x)	(MAX8997_REG_BUCK1DVS1 + (x) - 1)
-#define MAX8997_REG_BUCK2DVS(x)	(MAX8997_REG_BUCK2DVS1 + (x) - 1)
-#define MAX8997_REG_BUCK5DVS(x)	(MAX8997_REG_BUCK5DVS1 + (x) - 1)
-
 #define MAX8997_NUM_GPIO	12
 struct max8997_dev {
 	struct device *dev;
-- 
cgit v1.2.3


From 795570561cc9c8dc7f7582ed6c4d07121b1c4831 Mon Sep 17 00:00:00 2001
From: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Date: Mon, 16 May 2011 18:34:59 -0500
Subject: MFD: TPS65910: Add support for TPS65911 device

The TPS65911 is the next generation of the TPS65910 family of
PMIC chips. It adds a few features:

- Watchdog Timer
- PWM & LED generators
- Comparators for system control status

It also adds a set of Interrupts and GPIOs, among other things.

The driver exports a function to identify between different
versions of the tps65910 family, allowing other modules to
identify the capabilities of the current chip.

Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/tps65910.c       |  4 +++-
 include/linux/mfd/tps65910.h | 46 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index e31824870b9f..2229e66d80db 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -157,6 +157,7 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, tps65910);
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
+	tps65910->id = id->driver_data;
 	tps65910->read = tps65910_i2c_read;
 	tps65910->write = tps65910_i2c_write;
 	mutex_init(&tps65910->io_mutex);
@@ -192,7 +193,8 @@ static int tps65910_i2c_remove(struct i2c_client *i2c)
 }
 
 static const struct i2c_device_id tps65910_i2c_id[] = {
-       { "tps65910", 0 },
+       { "tps65910", TPS65910 },
+       { "tps65911", TPS65911 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, tps65910_i2c_id);
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index be08a0c7e7de..6ceea0dab8d7 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -17,6 +17,14 @@
 #ifndef __LINUX_MFD_TPS65910_H
 #define __LINUX_MFD_TPS65910_H
 
+/* TPS chip id list */
+#define TPS65910			0
+#define TPS65911			1
+
+/* TPS regulator type list */
+#define REGULATOR_LDO			0
+#define REGULATOR_DCDC			1
+
 /*
  * List of registers for component TPS65910
  *
@@ -96,6 +104,21 @@
 #define TPS65910_JTAGVERNUM				0x80
 #define TPS65910_MAX_REGISTER				0x80
 
+/*
+ * List of registers specific to TPS65911
+ */
+#define TPS65911_VDDCTRL				0x27
+#define TPS65911_VDDCTRL_OP				0x28
+#define TPS65911_VDDCTRL_SR				0x29
+#define TPS65911_LDO1					0x30
+#define TPS65911_LDO2					0x31
+#define TPS65911_LDO5					0x32
+#define TPS65911_LDO8					0x33
+#define TPS65911_LDO7					0x34
+#define TPS65911_LDO6					0x35
+#define TPS65911_LDO4					0x36
+#define TPS65911_LDO3					0x37
+
 /*
  * List of register bitfields for component TPS65910
  *
@@ -702,6 +725,23 @@
 #define JTAGVERNUM_VERNUM_SHIFT				0
 
 
+/* Register VDDCTRL (0x27) bit definitions */
+#define VDDCTRL_ST_MASK                                  0x03
+#define VDDCTRL_ST_SHIFT                                 0
+
+
+/*Register VDDCTRL_OP  (0x28) bit definitios */
+#define VDDCTRL_OP_CMD_MASK                              0x80
+#define VDDCTRL_OP_CMD_SHIFT                             7
+#define VDDCTRL_OP_SEL_MASK                              0x7F
+#define VDDCTRL_OP_SEL_SHIFT                             0
+
+
+/*Register VDDCTRL_SR  (0x29) bit definitions */
+#define VDDCTRL_SR_SEL_MASK                              0x7F
+#define VDDCTRL_SR_SEL_SHIFT                             0
+
+
 /* IRQ Definitions */
 #define TPS65910_IRQ_VBAT_VMBDCH			0
 #define TPS65910_IRQ_VBAT_VMHI				1
@@ -742,6 +782,7 @@ struct tps65910 {
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct mutex io_mutex;
+	unsigned int id;
 	int (*read)(struct tps65910 *tps65910, u8 reg, int size, void *dest);
 	int (*write)(struct tps65910 *tps65910, u8 reg, int size, void *src);
 
@@ -771,4 +812,9 @@ void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		struct tps65910_platform_data *pdata);
 
+static inline int tps65910_chip_id(struct tps65910 *tps65910)
+{
+	return tps65910->id;
+}
+
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3


From a320e3c3d6351814afa5182159df88d2637e0f6f Mon Sep 17 00:00:00 2001
From: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Date: Mon, 16 May 2011 18:35:03 -0500
Subject: regulator: tps65911: Add new chip version

The tps65911 chip introduces new features, including changes in
the regulator module.

- VDD1 and VDD2 remain unchanged.
- VDD3 is now named VDDCTRL and has a wider voltage range.
- LDOs are now named LDO1...8 and voltage ranges are sequential,
  making LDOs easier to handle.

Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/tps65910-regulator.c | 357 +++++++++++++++++++++++++++++----
 include/linux/mfd/tps65910.h           |  10 +-
 2 files changed, 332 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 048ba47e058f..55dd4e6650db 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -39,8 +39,17 @@
 #define TPS65910_REG_VAUX33		11
 #define TPS65910_REG_VMMC		12
 
-#define TPS65910_NUM_REGULATOR		13
+#define TPS65911_REG_VDDCTRL		4
+#define TPS65911_REG_LDO1		5
+#define TPS65911_REG_LDO2		6
+#define TPS65911_REG_LDO3		7
+#define TPS65911_REG_LDO4		8
+#define TPS65911_REG_LDO5		9
+#define TPS65911_REG_LDO6		10
+#define TPS65911_REG_LDO7		11
+#define TPS65911_REG_LDO8		12
 
+#define TPS65910_NUM_REGULATOR		13
 #define TPS65910_SUPPLY_STATE_ENABLED	0x1
 
 /* supported VIO voltages in milivolts */
@@ -48,7 +57,9 @@ static const u16 VIO_VSEL_table[] = {
 	1500, 1800, 2500, 3300,
 };
 
-/* supported VIO voltages in milivolts */
+/* VSEL tables for TPS65910 specific LDOs and dcdc's */
+
+/* supported VDD3 voltages in milivolts */
 static const u16 VDD3_VSEL_table[] = {
 	5000,
 };
@@ -187,6 +198,71 @@ static struct tps_info tps65910_regs[] = {
 	},
 };
 
+static struct tps_info tps65911_regs[] = {
+	{
+		.name = "VIO",
+		.min_uV = 1500000,
+		.max_uV = 3300000,
+		.table_len = ARRAY_SIZE(VIO_VSEL_table),
+		.table = VIO_VSEL_table,
+	},
+	{
+		.name = "VDD1",
+		.min_uV = 600000,
+		.max_uV = 4500000,
+	},
+	{
+		.name = "VDD2",
+		.min_uV = 600000,
+		.max_uV = 4500000,
+	},
+	{
+		.name = "VDDCTRL",
+		.min_uV = 600000,
+		.max_uV = 1400000,
+	},
+	{
+		.name = "LDO1",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO2",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO3",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO4",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO5",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO6",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO7",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+	{
+		.name = "LDO8",
+		.min_uV = 1000000,
+		.max_uV = 3300000,
+	},
+};
+
 struct tps65910_reg {
 	struct regulator_desc desc[TPS65910_NUM_REGULATOR];
 	struct tps65910 *mfd;
@@ -194,6 +270,7 @@ struct tps65910_reg {
 	struct tps_info *info[TPS65910_NUM_REGULATOR];
 	struct mutex mutex;
 	int mode;
+	int  (*get_ctrl_reg)(int);
 };
 
 static inline int tps65910_read(struct tps65910_reg *pmic, u8 reg)
@@ -300,12 +377,46 @@ static int tps65910_get_ctrl_register(int id)
 	}
 }
 
+static int tps65911_get_ctrl_register(int id)
+{
+	switch (id) {
+	case TPS65910_REG_VRTC:
+		return TPS65910_VRTC;
+	case TPS65910_REG_VIO:
+		return TPS65910_VIO;
+	case TPS65910_REG_VDD1:
+		return TPS65910_VDD1;
+	case TPS65910_REG_VDD2:
+		return TPS65910_VDD2;
+	case TPS65911_REG_VDDCTRL:
+		return TPS65911_VDDCTRL;
+	case TPS65911_REG_LDO1:
+		return TPS65911_LDO1;
+	case TPS65911_REG_LDO2:
+		return TPS65911_LDO2;
+	case TPS65911_REG_LDO3:
+		return TPS65911_LDO3;
+	case TPS65911_REG_LDO4:
+		return TPS65911_LDO4;
+	case TPS65911_REG_LDO5:
+		return TPS65911_LDO5;
+	case TPS65911_REG_LDO6:
+		return TPS65911_LDO6;
+	case TPS65911_REG_LDO7:
+		return TPS65911_LDO7;
+	case TPS65911_REG_LDO8:
+		return TPS65911_LDO8;
+	default:
+		return -EINVAL;
+	}
+}
+
 static int tps65910_is_enabled(struct regulator_dev *dev)
 {
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int reg, value, id = rdev_get_id(dev);
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -322,7 +433,7 @@ static int tps65910_enable(struct regulator_dev *dev)
 	struct tps65910 *mfd = pmic->mfd;
 	int reg, id = rdev_get_id(dev);
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -335,7 +446,7 @@ static int tps65910_disable(struct regulator_dev *dev)
 	struct tps65910 *mfd = pmic->mfd;
 	int reg, id = rdev_get_id(dev);
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -348,7 +459,8 @@ static int tps65910_set_mode(struct regulator_dev *dev, unsigned int mode)
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	struct tps65910 *mfd = pmic->mfd;
 	int reg, value, id = rdev_get_id(dev);
-	reg = tps65910_get_ctrl_register(id);
+
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -371,7 +483,7 @@ static unsigned int tps65910_get_mode(struct regulator_dev *dev)
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int reg, value, id = rdev_get_id(dev);
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -391,7 +503,7 @@ static int tps65910_get_voltage_dcdc(struct regulator_dev *dev)
 {
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int id = rdev_get_id(dev), voltage = 0;
-	int opvsel = 0, srvsel = 0, mult = 0, sr = 0;
+	int opvsel = 0, srvsel = 0, vselmax = 0, mult = 0, sr = 0;
 
 	switch (id) {
 	case TPS65910_REG_VDD1:
@@ -402,6 +514,7 @@ static int tps65910_get_voltage_dcdc(struct regulator_dev *dev)
 		sr = opvsel & VDD1_OP_CMD_MASK;
 		opvsel &= VDD1_OP_SEL_MASK;
 		srvsel &= VDD1_SR_SEL_MASK;
+		vselmax = 75;
 		break;
 	case TPS65910_REG_VDD2:
 		opvsel = tps65910_reg_read(pmic, TPS65910_VDD2_OP);
@@ -411,6 +524,15 @@ static int tps65910_get_voltage_dcdc(struct regulator_dev *dev)
 		sr = opvsel & VDD2_OP_CMD_MASK;
 		opvsel &= VDD2_OP_SEL_MASK;
 		srvsel &= VDD2_SR_SEL_MASK;
+		vselmax = 75;
+		break;
+	case TPS65911_REG_VDDCTRL:
+		opvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_OP);
+		srvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_SR);
+		sr = opvsel & VDDCTRL_OP_CMD_MASK;
+		opvsel &= VDDCTRL_OP_SEL_MASK;
+		srvsel &= VDDCTRL_SR_SEL_MASK;
+		vselmax = 64;
 		break;
 	}
 
@@ -419,17 +541,21 @@ static int tps65910_get_voltage_dcdc(struct regulator_dev *dev)
 		mult=1;
 
 	if (sr) {
-		/* Valid range is 3-75 so normalise */
-		if (srvsel < 3) srvsel = 3;
-		if (srvsel > 75) srvsel = 75;
+		/* normalise to valid range */
+		if (srvsel < 3)
+			srvsel = 3;
+		if (srvsel > vselmax)
+			srvsel = vselmax;
 		srvsel -= 3;
 
 		voltage = (srvsel * VDD1_2_OFFSET + VDD1_2_MIN_VOLT) * 100;
 	} else {
 
-		/* Valid range is 3-75 so normalise */
-		if (opvsel < 3) opvsel = 3;
-		if (opvsel > 75) opvsel = 75;
+		/* normalise to valid range*/
+		if (opvsel < 3)
+			opvsel = 3;
+		if (opvsel > vselmax)
+			opvsel = vselmax;
 		opvsel -= 3;
 
 		voltage = (opvsel * VDD1_2_OFFSET + VDD1_2_MIN_VOLT) * 100;
@@ -445,7 +571,7 @@ static int tps65910_get_voltage(struct regulator_dev *dev)
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int reg, value, id = rdev_get_id(dev), voltage = 0;
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -480,29 +606,88 @@ static int tps65910_get_voltage_vdd3(struct regulator_dev *dev)
 	return 5 * 1000 * 1000;
 }
 
+static int tps65911_get_voltage(struct regulator_dev *dev)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int step_mv, id = rdev_get_id(dev);
+	u8 value, reg;
+
+	reg = pmic->get_ctrl_reg(id);
+
+	value = tps65910_reg_read(pmic, reg);
+
+	switch (id) {
+	case TPS65911_REG_LDO1:
+	case TPS65911_REG_LDO2:
+	case TPS65911_REG_LDO4:
+		value &= LDO1_SEL_MASK;
+		value >>= LDO_SEL_SHIFT;
+		/* The first 5 values of the selector correspond to 1V */
+		if (value < 5)
+			value = 0;
+		else
+			value -= 4;
+
+		step_mv = 50;
+		break;
+	case TPS65911_REG_LDO3:
+	case TPS65911_REG_LDO5:
+	case TPS65911_REG_LDO6:
+	case TPS65911_REG_LDO7:
+	case TPS65911_REG_LDO8:
+		value &= LDO3_SEL_MASK;
+		value >>= LDO_SEL_SHIFT;
+		/* The first 3 values of the selector correspond to 1V */
+		if (value < 3)
+			value = 0;
+		else
+			value -= 2;
+
+		step_mv = 100;
+		break;
+	case TPS65910_REG_VIO:
+		return pmic->info[id]->table[value] * 1000;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return (LDO_MIN_VOLT + value * step_mv) * 1000;
+}
+
 static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
 				unsigned selector)
 {
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int id = rdev_get_id(dev), vsel;
-	int dcdc_mult;
+	int dcdc_mult = 0;
 
-	/* Split vsel into appropriate registers */
-	dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
-	if (dcdc_mult == 1) dcdc_mult--;
-
-	vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+	switch (id) {
+	case TPS65910_REG_VDD1:
+		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		if (dcdc_mult == 1)
+			dcdc_mult--;
+		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
 
-	if (id == TPS65910_REG_VDD1) {
 		tps65910_modify_bits(pmic, TPS65910_VDD1,
 				(dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
 						VDD1_VGAIN_SEL_MASK);
 		tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel);
-	} else {
+		break;
+	case TPS65910_REG_VDD2:
+		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		if (dcdc_mult == 1)
+			dcdc_mult--;
+		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+
 		tps65910_modify_bits(pmic, TPS65910_VDD2,
 				(dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
 						VDD1_VGAIN_SEL_MASK);
 		tps65910_reg_write(pmic, TPS65910_VDD2_OP, vsel);
+		break;
+	case TPS65911_REG_VDDCTRL:
+		vsel = selector;
+		tps65910_reg_write(pmic, TPS65911_VDDCTRL_OP, vsel);
 	}
 
 	return 0;
@@ -513,7 +698,7 @@ static int tps65910_set_voltage(struct regulator_dev *dev, unsigned selector)
 	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
 	int reg, id = rdev_get_id(dev);
 
-	reg = tps65910_get_ctrl_register(id);
+	reg = pmic->get_ctrl_reg(id);
 	if (reg < 0)
 		return reg;
 
@@ -534,14 +719,49 @@ static int tps65910_set_voltage(struct regulator_dev *dev, unsigned selector)
 	return -EINVAL;
 }
 
+static int tps65911_set_voltage(struct regulator_dev *dev, unsigned selector)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int reg, id = rdev_get_id(dev);
+
+	reg = pmic->get_ctrl_reg(id);
+	if (reg < 0)
+		return reg;
+
+	switch (id) {
+	case TPS65911_REG_LDO1:
+	case TPS65911_REG_LDO2:
+	case TPS65911_REG_LDO4:
+		return tps65910_modify_bits(pmic, reg,
+				(selector << LDO_SEL_SHIFT), LDO1_SEL_MASK);
+	case TPS65911_REG_LDO3:
+	case TPS65911_REG_LDO5:
+	case TPS65911_REG_LDO6:
+	case TPS65911_REG_LDO7:
+	case TPS65911_REG_LDO8:
+	case TPS65910_REG_VIO:
+		return tps65910_modify_bits(pmic, reg,
+				(selector << LDO_SEL_SHIFT), LDO3_SEL_MASK);
+	}
+
+	return -EINVAL;
+}
+
+
 static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
 					unsigned selector)
 {
-	int mult, volt;
-
-	mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+	int volt, mult = 1, id = rdev_get_id(dev);
 
-	volt = VDD1_2_MIN_VOLT + (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+	switch (id) {
+	case TPS65910_REG_VDD1:
+	case TPS65910_REG_VDD2:
+		mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		volt = VDD1_2_MIN_VOLT +
+				(selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+	case TPS65911_REG_VDDCTRL:
+		volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
+	}
 
 	return  volt * 100 * mult;
 }
@@ -563,6 +783,45 @@ static int tps65910_list_voltage(struct regulator_dev *dev,
 	return voltage;
 }
 
+static int tps65911_list_voltage(struct regulator_dev *dev, unsigned selector)
+{
+	struct tps65910_reg *pmic = rdev_get_drvdata(dev);
+	int step_mv = 0, id = rdev_get_id(dev);
+
+	switch(id) {
+	case TPS65911_REG_LDO1:
+	case TPS65911_REG_LDO2:
+	case TPS65911_REG_LDO4:
+		/* The first 5 values of the selector correspond to 1V */
+		if (selector < 5)
+			selector = 0;
+		else
+			selector -= 4;
+
+		step_mv = 50;
+		break;
+	case TPS65911_REG_LDO3:
+	case TPS65911_REG_LDO5:
+	case TPS65911_REG_LDO6:
+	case TPS65911_REG_LDO7:
+	case TPS65911_REG_LDO8:
+		/* The first 3 values of the selector correspond to 1V */
+		if (selector < 3)
+			selector = 0;
+		else
+			selector -= 2;
+
+		step_mv = 100;
+		break;
+	case TPS65910_REG_VIO:
+		return pmic->info[id]->table[selector] * 1000;
+	default:
+		return -EINVAL;
+	}
+
+	return (LDO_MIN_VOLT + selector * step_mv) * 1000;
+}
+
 /* Regulator ops (except VRTC) */
 static struct regulator_ops tps65910_ops_dcdc = {
 	.is_enabled		= tps65910_is_enabled,
@@ -596,10 +855,21 @@ static struct regulator_ops tps65910_ops = {
 	.list_voltage		= tps65910_list_voltage,
 };
 
+static struct regulator_ops tps65911_ops = {
+	.is_enabled		= tps65910_is_enabled,
+	.enable			= tps65910_enable,
+	.disable		= tps65910_disable,
+	.set_mode		= tps65910_set_mode,
+	.get_mode		= tps65910_get_mode,
+	.get_voltage		= tps65911_get_voltage,
+	.set_voltage_sel	= tps65911_set_voltage,
+	.list_voltage		= tps65911_list_voltage,
+};
+
 static __devinit int tps65910_probe(struct platform_device *pdev)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(pdev->dev.parent);
-	struct tps_info *info = tps65910_regs;
+	struct tps_info *info;
 	struct regulator_init_data *reg_data;
 	struct regulator_dev *rdev;
 	struct tps65910_reg *pmic;
@@ -624,6 +894,18 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 	tps65910_set_bits(pmic->mfd, TPS65910_DEVCTRL,
 				DEVCTRL_SR_CTL_I2C_SEL_MASK);
 
+	switch(tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		pmic->get_ctrl_reg = &tps65910_get_ctrl_register;
+		info = tps65910_regs;
+	case TPS65911:
+		pmic->get_ctrl_reg = &tps65911_get_ctrl_register;
+		info = tps65911_regs;
+	default:
+		pr_err("Invalid tps chip version\n");
+		return -ENODEV;
+	}
+
 	for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) {
 		/* Register the regulators */
 		pmic->info[i] = info;
@@ -632,12 +914,19 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 		pmic->desc[i].id = i;
 		pmic->desc[i].n_voltages = info->table_len;
 
-		if ((i == TPS65910_REG_VDD1) || (i == TPS65910_REG_VDD2))
+		if (i == TPS65910_REG_VDD1 || i == TPS65910_REG_VDD2) {
 			pmic->desc[i].ops = &tps65910_ops_dcdc;
-		else if (i == TPS65910_REG_VDD3)
-			pmic->desc[i].ops = &tps65910_ops_vdd3;
-		else
-			pmic->desc[i].ops = &tps65910_ops;
+		} else if (i == TPS65910_REG_VDD3) {
+			if (tps65910_chip_id(tps65910) == TPS65910)
+				pmic->desc[i].ops = &tps65910_ops_vdd3;
+			else
+				pmic->desc[i].ops = &tps65910_ops_dcdc;
+		} else {
+			if (tps65910_chip_id(tps65910) == TPS65910)
+				pmic->desc[i].ops = &tps65910_ops;
+			else
+				pmic->desc[i].ops = &tps65911_ops;
+		}
 
 		pmic->desc[i].type = REGULATOR_VOLTAGE;
 		pmic->desc[i].owner = THIS_MODULE;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 6ceea0dab8d7..20359e669ae7 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -232,7 +232,6 @@
 #define VDD2_OP_SEL_MASK				0x7F
 #define VDD2_OP_SEL_SHIFT				0
 
-
 /*Register VDD2_SR  (0x80) register.RegisterDescription */
 #define VDD2_SR_SEL_MASK				0x7F
 #define VDD2_SR_SEL_SHIFT				0
@@ -249,6 +248,8 @@
 #define VDD3_CKINEN_SHIFT				2
 #define VDD3_ST_MASK					0x03
 #define VDD3_ST_SHIFT					0
+#define VDDCTRL_MIN_VOLT				6000
+#define VDDCTRL_OFFSET					125
 
 /*Registers VDIG (0x80) to VDAC register.RegisterDescription */
 #define LDO_SEL_MASK					0x0C
@@ -259,6 +260,13 @@
 #define LDO_ST_MODE_BIT					0x02	
 
 
+/* Registers LDO1 to LDO8 in tps65910 */
+#define LDO1_SEL_MASK					0xFC
+#define LDO3_SEL_MASK					0x7C
+#define LDO_MIN_VOLT					1000
+#define LDO_MAX_VOLT					3300;
+
+
 /*Register VDIG1  (0x80) register.RegisterDescription */
 #define VDIG1_SEL_MASK					0x0C
 #define VDIG1_SEL_SHIFT					2
-- 
cgit v1.2.3


From a2974732ca7614aaf0baf9d6dd3ad893d50ce1c5 Mon Sep 17 00:00:00 2001
From: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Date: Mon, 16 May 2011 18:35:07 -0500
Subject: TPS65911: Add new irq definitions

TPS65911 adds new interrupt sources, as well as two new registers
to handle them, one for interrupt status and one for interrupt
masking. The added irqs are:

-VMBCH2 - Low and High threshold
-GPIO1-8 - Rising and falling edge detection
-WTCHDG - Watchdog interrupt
-PWRDN	- PWRDN reset interrupt

The code should handle these new registers only when the chip
version is TPS65911.

Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/tps65910-irq.c   | 57 ++++++++++++++++++++++++++++++++++----------
 include/linux/mfd/tps65910.h | 32 ++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index b8435e045f81..2bfad5c86cc7 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -41,8 +41,8 @@ static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
 static irqreturn_t tps65910_irq(int irq, void *irq_data)
 {
 	struct tps65910 *tps65910 = irq_data;
-	u16 irq_sts;
-	u16 irq_mask;
+	u32 irq_sts;
+	u32 irq_mask;
 	u8 reg;
 	int i;
 
@@ -50,18 +50,28 @@ static irqreturn_t tps65910_irq(int irq, void *irq_data)
 	irq_sts = reg;
 	tps65910->read(tps65910, TPS65910_INT_STS2, 1, &reg);
 	irq_sts |= reg << 8;
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65911:
+		tps65910->read(tps65910, TPS65910_INT_STS3, 1, &reg);
+		irq_sts |= reg << 16;
+	}
 
 	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
 	irq_mask = reg;
 	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
 	irq_mask |= reg << 8;
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65911:
+		tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg);
+		irq_mask |= reg << 16;
+	}
 
 	irq_sts &= ~irq_mask;
 
 	if (!irq_sts)
 		return IRQ_NONE;
 
-	for (i = 0; i < TPS65910_NUM_IRQ; i++) {
+	for (i = 0; i < tps65910->irq_num; i++) {
 
 		if (!(irq_sts & (1 << i)))
 			continue;
@@ -71,9 +81,15 @@ static irqreturn_t tps65910_irq(int irq, void *irq_data)
 
 	/* Write the STS register back to clear IRQs we handled */
 	reg = irq_sts & 0xFF;
+	irq_sts >>= 8;
 	tps65910->write(tps65910, TPS65910_INT_STS, 1, &reg);
-	reg = irq_sts >> 8;
+	reg = irq_sts & 0xFF;
 	tps65910->write(tps65910, TPS65910_INT_STS2, 1, &reg);
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65911:
+		reg = irq_sts >> 8;
+		tps65910->write(tps65910, TPS65910_INT_STS3, 1, &reg);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -88,19 +104,29 @@ static void tps65910_irq_lock(struct irq_data *data)
 static void tps65910_irq_sync_unlock(struct irq_data *data)
 {
 	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	u16 reg_mask;
+	u32 reg_mask;
 	u8 reg;
 
 	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
 	reg_mask = reg;
 	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
 	reg_mask |= reg << 8;
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65911:
+		tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg);
+		reg_mask |= reg << 16;
+	}
 
 	if (tps65910->irq_mask != reg_mask) {
 		reg = tps65910->irq_mask & 0xFF;
 		tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg);
-		reg = tps65910->irq_mask >> 8;
+		reg = tps65910->irq_mask >> 8 & 0xFF;
 		tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg);
+		switch (tps65910_chip_id(tps65910)) {
+		case TPS65911:
+			reg = tps65910->irq_mask >> 16;
+			tps65910->write(tps65910, TPS65910_INT_MSK3, 1, &reg);
+		}
 	}
 	mutex_unlock(&tps65910->irq_lock);
 }
@@ -132,7 +158,6 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 {
 	int ret, cur_irq;
 	int flags = IRQF_ONESHOT;
-	u8 reg;
 
 	if (!irq) {
 		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
@@ -144,19 +169,22 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		return -EINVAL;
 	}
 
-	/* Mask top level interrupts */
-	reg = 0xFF;
-	tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg);
-	reg = 0x03;
-	tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	tps65910->irq_mask = 0xFFFFFF;
 
 	mutex_init(&tps65910->irq_lock);
 	tps65910->chip_irq = irq;
 	tps65910->irq_base = pdata->irq_base;
 
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		tps65910->irq_num = TPS65910_NUM_IRQ;
+	case TPS65911:
+		tps65910->irq_num = TPS65911_NUM_IRQ;
+	}
+
 	/* Register with genirq */
 	for (cur_irq = tps65910->irq_base;
-	     cur_irq < TPS65910_NUM_IRQ + tps65910->irq_base;
+	     cur_irq < tps65910->irq_num + tps65910->irq_base;
 	     cur_irq++) {
 		irq_set_chip_data(cur_irq, tps65910);
 		irq_set_chip_and_handler(cur_irq, &tps65910_irq_chip,
@@ -174,6 +202,9 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 
 	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
 				   "tps65910", tps65910);
+
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+
 	if (ret != 0)
 		dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret);
 
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 20359e669ae7..32bb7b81f713 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -763,6 +763,35 @@
 #define TPS65910_IRQ_GPIO_F				9
 #define TPS65910_NUM_IRQ				10
 
+#define TPS65911_IRQ_VBAT_VMBDCH			0
+#define TPS65911_IRQ_VBAT_VMBDCH2L			1
+#define TPS65911_IRQ_VBAT_VMBDCH2H			2
+#define TPS65911_IRQ_VBAT_VMHI				3
+#define TPS65911_IRQ_PWRON				4
+#define TPS65911_IRQ_PWRON_LP				5
+#define TPS65911_IRQ_PWRHOLD_F				6
+#define TPS65911_IRQ_PWRHOLD_R				7
+#define TPS65911_IRQ_HOTDIE				8
+#define TPS65911_IRQ_RTC_ALARM				9
+#define TPS65911_IRQ_RTC_PERIOD				10
+#define TPS65911_IRQ_GPIO0_R				11
+#define TPS65911_IRQ_GPIO0_F				12
+#define TPS65911_IRQ_GPIO1_R				13
+#define TPS65911_IRQ_GPIO1_F				14
+#define TPS65911_IRQ_GPIO2_R				15
+#define TPS65911_IRQ_GPIO2_F				16
+#define TPS65911_IRQ_GPIO3_R				17
+#define TPS65911_IRQ_GPIO3_F				18
+#define TPS65911_IRQ_GPIO4_R				19
+#define TPS65911_IRQ_GPIO4_F				20
+#define TPS65911_IRQ_GPIO5_R				21
+#define TPS65911_IRQ_GPIO5_F				22
+#define TPS65911_IRQ_WTCHDG				23
+#define TPS65911_IRQ_PWRDN				24
+
+#define TPS65911_NUM_IRQ				25
+
+
 /* GPIO Register Definitions */
 #define TPS65910_GPIO_DEB				BIT(2)
 #define TPS65910_GPIO_PUEN				BIT(3)
@@ -806,7 +835,8 @@ struct tps65910 {
 	struct mutex irq_lock;
 	int chip_irq;
 	int irq_base;
-	u16 irq_mask;
+	int irq_num;
+	u32 irq_mask;
 };
 
 struct tps65910_platform_data {
-- 
cgit v1.2.3


From 11ad14f86a7847b084d3e3f114180be39b1c7322 Mon Sep 17 00:00:00 2001
From: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Date: Mon, 16 May 2011 18:35:42 -0500
Subject: TPS65911: Add support for added GPIO lines

GPIO 1 to 8 are added for TPS65911 chip version. The gpio driver
now handles more than one gpio lines. Subsequent versions of the
chip family can add new GPIO lines with minimal driver changes.

Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/gpio/tps65910-gpio.c | 29 +++++++++-----
 include/linux/mfd/tps65910.h | 90 +++++++-------------------------------------
 2 files changed, 33 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/tps65910-gpio.c b/drivers/gpio/tps65910-gpio.c
index f3ae37652446..8d1ddfdd63eb 100644
--- a/drivers/gpio/tps65910-gpio.c
+++ b/drivers/gpio/tps65910-gpio.c
@@ -25,9 +25,9 @@ static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset)
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
 	uint8_t val;
 
-	tps65910->read(tps65910, TPS65910_GPIO0, 1, &val);
+	tps65910->read(tps65910, TPS65910_GPIO0 + offset, 1, &val);
 
-	if (val & GPIO0_GPIO_STS_MASK)
+	if (val & GPIO_STS_MASK)
 		return 1;
 
 	return 0;
@@ -39,11 +39,11 @@ static void tps65910_gpio_set(struct gpio_chip *gc, unsigned offset,
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
 
 	if (value)
-		tps65910_set_bits(tps65910, TPS65910_GPIO0,
-					GPIO0_GPIO_SET_MASK);
+		tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset,
+						GPIO_SET_MASK);
 	else
-		tps65910_clear_bits(tps65910, TPS65910_GPIO0,
-					GPIO0_GPIO_SET_MASK);
+		tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset,
+						GPIO_SET_MASK);
 }
 
 static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset,
@@ -54,15 +54,16 @@ static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset,
 	/* Set the initial value */
 	tps65910_gpio_set(gc, 0, value);
 
-	return tps65910_set_bits(tps65910, TPS65910_GPIO0, GPIO0_GPIO_CFG_MASK);
+	return tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset,
+						GPIO_CFG_MASK);
 }
 
 static int tps65910_gpio_input(struct gpio_chip *gc, unsigned offset)
 {
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
 
-	return tps65910_clear_bits(tps65910, TPS65910_GPIO0,
-					GPIO0_GPIO_CFG_MASK);
+	return tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset,
+						GPIO_CFG_MASK);
 }
 
 void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base)
@@ -76,7 +77,15 @@ void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base)
 	tps65910->gpio.label		= tps65910->i2c_client->name;
 	tps65910->gpio.dev		= tps65910->dev;
 	tps65910->gpio.base		= gpio_base;
-	tps65910->gpio.ngpio		= 1;
+
+	switch(tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		tps65910->gpio.ngpio	= 6;
+	case TPS65911:
+		tps65910->gpio.ngpio	= 9;
+	default:
+		return;
+	}
 	tps65910->gpio.can_sleep	= 1;
 
 	tps65910->gpio.direction_input	= tps65910_gpio_input;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 32bb7b81f713..5f770064e0c5 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -101,6 +101,9 @@
 #define TPS65910_GPIO3					0x63
 #define TPS65910_GPIO4					0x64
 #define TPS65910_GPIO5					0x65
+#define TPS65910_GPIO6					0x66
+#define TPS65910_GPIO7					0x67
+#define TPS65910_GPIO8					0x68
 #define TPS65910_JTAGVERNUM				0x80
 #define TPS65910_MAX_REGISTER				0x80
 
@@ -650,82 +653,17 @@
 #define INT_MSK3_GPIO4_R_IT_MSK_SHIFT			0
 
 
-/*Register GPIO0  (0x80) register.RegisterDescription */
-#define GPIO0_GPIO_DEB_MASK				0x10
-#define GPIO0_GPIO_DEB_SHIFT				4
-#define GPIO0_GPIO_PUEN_MASK				0x08
-#define GPIO0_GPIO_PUEN_SHIFT				3
-#define GPIO0_GPIO_CFG_MASK				0x04
-#define GPIO0_GPIO_CFG_SHIFT				2
-#define GPIO0_GPIO_STS_MASK				0x02
-#define GPIO0_GPIO_STS_SHIFT				1
-#define GPIO0_GPIO_SET_MASK				0x01
-#define GPIO0_GPIO_SET_SHIFT				0
-
-
-/*Register GPIO1  (0x80) register.RegisterDescription */
-#define GPIO1_GPIO_DEB_MASK				0x10
-#define GPIO1_GPIO_DEB_SHIFT				4
-#define GPIO1_GPIO_PUEN_MASK				0x08
-#define GPIO1_GPIO_PUEN_SHIFT				3
-#define GPIO1_GPIO_CFG_MASK				0x04
-#define GPIO1_GPIO_CFG_SHIFT				2
-#define GPIO1_GPIO_STS_MASK				0x02
-#define GPIO1_GPIO_STS_SHIFT				1
-#define GPIO1_GPIO_SET_MASK				0x01
-#define GPIO1_GPIO_SET_SHIFT				0
-
-
-/*Register GPIO2  (0x80) register.RegisterDescription */
-#define GPIO2_GPIO_DEB_MASK				0x10
-#define GPIO2_GPIO_DEB_SHIFT				4
-#define GPIO2_GPIO_PUEN_MASK			 	0x08
-#define GPIO2_GPIO_PUEN_SHIFT				3
-#define GPIO2_GPIO_CFG_MASK				0x04
-#define GPIO2_GPIO_CFG_SHIFT				2
-#define GPIO2_GPIO_STS_MASK				0x02
-#define GPIO2_GPIO_STS_SHIFT				1
-#define GPIO2_GPIO_SET_MASK				0x01
-#define GPIO2_GPIO_SET_SHIFT				0
-
-
-/*Register GPIO3  (0x80) register.RegisterDescription */
-#define GPIO3_GPIO_DEB_MASK				0x10
-#define GPIO3_GPIO_DEB_SHIFT				4
-#define GPIO3_GPIO_PUEN_MASK				0x08
-#define GPIO3_GPIO_PUEN_SHIFT				3
-#define GPIO3_GPIO_CFG_MASK				0x04
-#define GPIO3_GPIO_CFG_SHIFT				2
-#define GPIO3_GPIO_STS_MASK			 	0x02
-#define GPIO3_GPIO_STS_SHIFT				1
-#define GPIO3_GPIO_SET_MASK				0x01
-#define GPIO3_GPIO_SET_SHIFT				0
-
-
-/*Register GPIO4  (0x80) register.RegisterDescription */
-#define GPIO4_GPIO_DEB_MASK				0x10
-#define GPIO4_GPIO_DEB_SHIFT				4
-#define GPIO4_GPIO_PUEN_MASK				0x08
-#define GPIO4_GPIO_PUEN_SHIFT				3
-#define GPIO4_GPIO_CFG_MASK				0x04
-#define GPIO4_GPIO_CFG_SHIFT				2
-#define GPIO4_GPIO_STS_MASK				0x02
-#define GPIO4_GPIO_STS_SHIFT				1
-#define GPIO4_GPIO_SET_MASK				0x01
-#define GPIO4_GPIO_SET_SHIFT				0
-
-
-/*Register GPIO5  (0x80) register.RegisterDescription */
-#define GPIO5_GPIO_DEB_MASK				0x10
-#define GPIO5_GPIO_DEB_SHIFT				4
-#define GPIO5_GPIO_PUEN_MASK				0x08
-#define GPIO5_GPIO_PUEN_SHIFT				3
-#define GPIO5_GPIO_CFG_MASK				0x04
-#define GPIO5_GPIO_CFG_SHIFT				2
-#define GPIO5_GPIO_STS_MASK				0x02
-#define GPIO5_GPIO_STS_SHIFT				1
-#define GPIO5_GPIO_SET_MASK				0x01
-#define GPIO5_GPIO_SET_SHIFT				0
+/*Register GPIO  (0x80) register.RegisterDescription */
+#define GPIO_DEB_MASK                           0x10
+#define GPIO_DEB_SHIFT                          4
+#define GPIO_PUEN_MASK                          0x08
+#define GPIO_PUEN_SHIFT                         3
+#define GPIO_CFG_MASK                           0x04
+#define GPIO_CFG_SHIFT                          2
+#define GPIO_STS_MASK                           0x02
+#define GPIO_STS_SHIFT                          1
+#define GPIO_SET_MASK                           0x01
+#define GPIO_SET_SHIFT                          0
 
 
 /*Register JTAGVERNUM  (0x80) register.RegisterDescription */
-- 
cgit v1.2.3


From 6851ad3ab3461966adfffe8789372fe8256da792 Mon Sep 17 00:00:00 2001
From: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Date: Mon, 16 May 2011 18:35:48 -0500
Subject: TPS65911: Comparator: Add comparator driver

This driver adds functionality to the tps65911 chip driver.

Two of the comparators are configurable by software and measures
VCCS voltage to detect high or low voltage scenarios.

Signed-off-by: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/mfd/tps65911-comparator.c | 188 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65910.h      |   4 +
 2 files changed, 192 insertions(+)
 create mode 100644 drivers/mfd/tps65911-comparator.c

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65911-comparator.c b/drivers/mfd/tps65911-comparator.c
new file mode 100644
index 000000000000..3d2dc56a3d40
--- /dev/null
+++ b/drivers/mfd/tps65911-comparator.c
@@ -0,0 +1,188 @@
+/*
+ * tps65910.c  --  TI TPS6591x
+ *
+ * Copyright 2010 Texas Instruments Inc.
+ *
+ * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/debugfs.h>
+#include <linux/gpio.h>
+#include <linux/mfd/tps65910.h>
+
+#define COMP					0
+#define COMP1					1
+#define COMP2					2
+
+/* Comparator 1 voltage selection table in milivolts */
+static const u16 COMP_VSEL_TABLE[] = {
+	0, 2500, 2500, 2500, 2500, 2550, 2600, 2650,
+	2700, 2750, 2800, 2850, 2900, 2950, 3000, 3050,
+	3100, 3150, 3200, 3250, 3300, 3350, 3400, 3450,
+	3500,
+};
+
+struct comparator {
+	const char *name;
+	int reg;
+	int uV_max;
+	const u16 *vsel_table;
+};
+
+static struct comparator tps_comparators[] = {
+	{
+		.name = "COMP1",
+		.reg = TPS65911_VMBCH,
+		.uV_max = 3500,
+		.vsel_table = COMP_VSEL_TABLE,
+	},
+	{
+		.name = "COMP2",
+		.reg = TPS65911_VMBCH2,
+		.uV_max = 3500,
+		.vsel_table = COMP_VSEL_TABLE,
+	},
+};
+
+static int comp_threshold_set(struct tps65910 *tps65910, int id, int voltage)
+{
+	struct comparator tps_comp = tps_comparators[id];
+	int curr_voltage = 0;
+	int ret;
+	u8 index = 0, val;
+
+	if (id == COMP)
+		return 0;
+
+	while (curr_voltage < tps_comp.uV_max) {
+		curr_voltage = tps_comp.vsel_table[index];
+		if (curr_voltage >= voltage)
+			break;
+		else if (curr_voltage < voltage)
+			index ++;
+	}
+
+	if (curr_voltage > tps_comp.uV_max)
+		return -EINVAL;
+
+	val = index << 1;
+	ret = tps65910->write(tps65910, tps_comp.reg, 1, &val);
+
+	return ret;
+}
+
+static int comp_threshold_get(struct tps65910 *tps65910, int id)
+{
+	struct comparator tps_comp = tps_comparators[id];
+	int ret;
+	u8 val;
+
+	if (id == COMP)
+		return 0;
+
+	ret = tps65910->read(tps65910, tps_comp.reg, 1, &val);
+	if (ret < 0)
+		return ret;
+
+	val >>= 1;
+	return tps_comp.vsel_table[val];
+}
+
+static ssize_t comp_threshold_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct tps65910 *tps65910 = dev_get_drvdata(dev->parent);
+	struct attribute comp_attr = attr->attr;
+	int id, uVolt;
+
+	if (!strcmp(comp_attr.name, "comp1_threshold"))
+		id = COMP1;
+	else if (!strcmp(comp_attr.name, "comp2_threshold"))
+		id = COMP2;
+	else
+		return -EINVAL;
+
+	uVolt = comp_threshold_get(tps65910, id);
+
+	return sprintf(buf, "%d\n", uVolt);
+}
+
+static DEVICE_ATTR(comp1_threshold, S_IRUGO, comp_threshold_show, NULL);
+static DEVICE_ATTR(comp2_threshold, S_IRUGO, comp_threshold_show, NULL);
+
+static __devinit int tps65911_comparator_probe(struct platform_device *pdev)
+{
+	struct tps65910 *tps65910 = dev_get_drvdata(pdev->dev.parent);
+	struct tps65910_platform_data *pdata = dev_get_platdata(tps65910->dev);
+	int ret;
+
+	ret = comp_threshold_set(tps65910, COMP1,  pdata->vmbch_threshold);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot set COMP1 threshold\n");
+		return ret;
+	}
+
+	ret = comp_threshold_set(tps65910, COMP2, pdata->vmbch2_threshold);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot set COMP2 theshold\n");
+		return ret;
+	}
+
+	/* Create sysfs entry */
+	ret = device_create_file(&pdev->dev, &dev_attr_comp1_threshold);
+	if (ret < 0)
+		dev_err(&pdev->dev, "failed to add COMP1 sysfs file\n");
+
+	ret = device_create_file(&pdev->dev, &dev_attr_comp2_threshold);
+	if (ret < 0)
+		dev_err(&pdev->dev, "failed to add COMP2 sysfs file\n");
+
+	return ret;
+}
+
+static __devexit int tps65911_comparator_remove(struct platform_device *pdev)
+{
+	struct tps65910 *tps65910;
+
+	tps65910 = dev_get_drvdata(pdev->dev.parent);
+
+	return 0;
+}
+
+static struct platform_driver tps65911_comparator_driver = {
+	.driver = {
+		.name = "tps65911-comparator",
+		.owner = THIS_MODULE,
+	},
+	.probe = tps65911_comparator_probe,
+	.remove = __devexit_p(tps65911_comparator_remove),
+};
+
+static int __init tps65911_comparator_init(void)
+{
+	return platform_driver_register(&tps65911_comparator_driver);
+}
+subsys_initcall(tps65911_comparator_init);
+
+static void __exit tps65911_comparator_exit(void)
+{
+	platform_driver_unregister(&tps65911_comparator_driver);
+}
+module_exit(tps65911_comparator_exit);
+
+MODULE_AUTHOR("Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>");
+MODULE_DESCRIPTION("TPS65911 comparator driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:tps65911-comparator");
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 5f770064e0c5..8bb85b930c07 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -121,6 +121,8 @@
 #define TPS65911_LDO6					0x35
 #define TPS65911_LDO4					0x36
 #define TPS65911_LDO3					0x37
+#define TPS65911_VMBCH					0x6A
+#define TPS65911_VMBCH2					0x6B
 
 /*
  * List of register bitfields for component TPS65910
@@ -746,6 +748,8 @@ struct tps65910_board {
 	int gpio_base;
 	int irq;
 	int irq_base;
+	int vmbch_threshold;
+	int vmbch2_threshold;
 	struct regulator_init_data *tps65910_pmic_init_data;
 };
 
-- 
cgit v1.2.3


From aa38572954ade525817fe88c54faebf85e5a61c0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 27 May 2011 06:53:02 -0400
Subject: fs: pass exact type of data dirties to ->dirty_inode

Tell the filesystem if we just updated timestamp (I_DIRTY_SYNC) or
anything else, so that the filesystem can track internally if it
needs to push out a transaction for fdatasync or not.

This is just the prototype change with no user for it yet.  I plan
to push large XFS changes for the next merge window, and getting
this trivial infrastructure in this window would help a lot to avoid
tree interdependencies.

Also remove incorrect comments that ->dirty_inode can't block.  That
has been changed a long time ago, and many implementations rely on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 4 ++--
 Documentation/filesystems/vfs.txt | 2 +-
 fs/btrfs/ctree.h                  | 2 +-
 fs/btrfs/inode.c                  | 2 +-
 fs/ext3/inode.c                   | 2 +-
 fs/ext4/ext4.h                    | 2 +-
 fs/ext4/inode.c                   | 2 +-
 fs/fs-writeback.c                 | 5 +----
 fs/jffs2/fs.c                     | 2 +-
 fs/jffs2/os-linux.h               | 2 +-
 fs/jfs/inode.c                    | 2 +-
 fs/jfs/jfs_inode.h                | 2 +-
 fs/nilfs2/inode.c                 | 2 +-
 fs/nilfs2/nilfs.h                 | 2 +-
 fs/reiserfs/super.c               | 2 +-
 fs/ubifs/super.c                  | 2 +-
 fs/xfs/linux-2.6/xfs_super.c      | 3 ++-
 include/linux/ext3_fs.h           | 2 +-
 include/linux/fs.h                | 2 +-
 19 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 61b31acb9176..57d827d6071d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -104,7 +104,7 @@ of the locking scheme for directory operations.
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
-	void (*dirty_inode) (struct inode *);
+	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
@@ -126,7 +126,7 @@ locking rules:
 			s_umount
 alloc_inode:
 destroy_inode:
-dirty_inode:				(must not sleep)
+dirty_inode:
 write_inode:
 drop_inode:				!!!inode->i_lock!!!
 evict_inode:
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 21a7dc467bba..88b9f5519af9 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -211,7 +211,7 @@ struct super_operations {
         struct inode *(*alloc_inode)(struct super_block *sb);
         void (*destroy_inode)(struct inode *);
 
-        void (*dirty_inode) (struct inode *);
+        void (*dirty_inode) (struct inode *, int flags);
         int (*write_inode) (struct inode *, int);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8f4b81de3ae2..d2177e7ad647 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2522,7 +2522,7 @@ int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-void btrfs_dirty_inode(struct inode *inode);
+void btrfs_dirty_inode(struct inode *inode, int flags);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7cd8ab0ef04d..ecff7d7a505f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4396,7 +4396,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
  * FIXME, needs more benchmarking...there are no reasons other than performance
  * to keep or drop this code.
  */
-void btrfs_dirty_inode(struct inode *inode)
+void btrfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 68b2e43d7c35..3451d23c3bae 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3392,7 +3392,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext3_dirty_inode(struct inode *inode)
+void ext3_dirty_inode(struct inode *inode, int flags)
 {
 	handle_t *current_handle = ext3_journal_current_handle();
 	handle_t *handle;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a74b89c09f90..1921392cd708 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1813,7 +1813,7 @@ extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
-extern void ext4_dirty_inode(struct inode *);
+extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern int ext4_can_truncate(struct inode *inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 50d0e9c64584..a5763e3505ba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5733,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext4_dirty_inode(struct inode *inode)
+void ext4_dirty_inode(struct inode *inode, int flags)
 {
 	handle_t *handle;
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 34591ee804b5..0f015a0468de 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1007,9 +1007,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  * In short, make sure you hash any inodes _before_ you start marking
  * them dirty.
  *
- * This function *must* be atomic for the I_DIRTY_PAGES case -
- * set_page_dirty() is called under spinlock in several places.
- *
  * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
  * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
  * the kernel-internal blockdev inode represents the dirtying time of the
@@ -1028,7 +1025,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	 */
 	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		if (sb->s_op->dirty_inode)
-			sb->s_op->dirty_inode(inode);
+			sb->s_op->dirty_inode(inode, flags);
 	}
 
 	/*
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index e896e67767eb..46ad619b6124 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -357,7 +357,7 @@ error:
 	return ERR_PTR(ret);
 }
 
-void jffs2_dirty_inode(struct inode *inode)
+void jffs2_dirty_inode(struct inode *inode, int flags)
 {
 	struct iattr iattr;
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 00bae7cc2e48..65c6c43ca482 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -172,7 +172,7 @@ int jffs2_setattr (struct dentry *, struct iattr *);
 int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
 void jffs2_evict_inode (struct inode *);
-void jffs2_dirty_inode(struct inode *inode);
+void jffs2_dirty_inode(struct inode *inode, int flags);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index eddbb373209e..109655904bbc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -173,7 +173,7 @@ void jfs_evict_inode(struct inode *inode)
 	dquot_drop(inode);
 }
 
-void jfs_dirty_inode(struct inode *inode)
+void jfs_dirty_inode(struct inode *inode, int flags)
 {
 	static int noisy = 5;
 
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 155e91eff07d..ec2fb8b945fc 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -28,7 +28,7 @@ extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode *, struct writeback_control *);
 extern void jfs_evict_inode(struct inode *);
-extern void jfs_dirty_inode(struct inode *);
+extern void jfs_dirty_inode(struct inode *, int);
 extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 587f18432832..b954878ad6ce 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -917,7 +917,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
  * construction. This function can be called both as a single operation
  * and as a part of indivisible file operations.
  */
-void nilfs_dirty_inode(struct inode *inode)
+void nilfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct nilfs_transaction_info ti;
 	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a9c6a531f80c..f02b9ad43a21 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -269,7 +269,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
 extern int nilfs_inode_dirty(struct inode *);
 int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
 extern int nilfs_mark_inode_dirty(struct inode *);
-extern void nilfs_dirty_inode(struct inode *);
+extern void nilfs_dirty_inode(struct inode *, int flags);
 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		 __u64 start, __u64 len);
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b216ff6be1c9..aa91089162cb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -568,7 +568,7 @@ static void destroy_inodecache(void)
 }
 
 /* we don't mark inodes dirty, we just log them */
-static void reiserfs_dirty_inode(struct inode *inode)
+static void reiserfs_dirty_inode(struct inode *inode, int flags)
 {
 	struct reiserfs_transaction_handle th;
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6db0bdaa9f74..1ab0d22e4c94 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -382,7 +382,7 @@ done:
 	end_writeback(inode);
 }
 
-static void ubifs_dirty_inode(struct inode *inode)
+static void ubifs_dirty_inode(struct inode *inode, int flags)
 {
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 98b9c91fcdf1..1e3a7ce804dc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -925,7 +925,8 @@ xfs_fs_inode_init_once(
  */
 STATIC void
 xfs_fs_dirty_inode(
-	struct inode	*inode)
+	struct inode	*inode,
+	int		flags)
 {
 	barrier();
 	XFS_I(inode)->i_update_core = 1;
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 85c1d302c12e..5e06acf95d0f 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -909,7 +909,7 @@ extern int  ext3_setattr (struct dentry *, struct iattr *);
 extern void ext3_evict_inode (struct inode *);
 extern int  ext3_sync_inode (handle_t *, struct inode *);
 extern void ext3_discard_reservation (struct inode *);
-extern void ext3_dirty_inode(struct inode *);
+extern void ext3_dirty_inode(struct inode *, int);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
 extern int ext3_can_truncate(struct inode *inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 241609346dfb..573028df050d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1618,7 +1618,7 @@ struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
-   	void (*dirty_inode) (struct inode *);
+   	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
-- 
cgit v1.2.3


From 9d931dd2ed62c14d7bf7c4c3ab3ef7610c46ca9b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 26 May 2011 16:30:57 -0400
Subject: net: Kill ether_table[] declaration.

This got missed back in 2006 when Jes Sorensen deleted
net/ethernet/sysctl_net_ether.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 0f1325d98295..0065ffd3226b 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -132,10 +132,6 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
 
 int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 
-#ifdef CONFIG_SYSCTL
-extern struct ctl_table ether_table[];
-#endif
-
 int mac_pton(const char *s, u8 *mac);
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
-- 
cgit v1.2.3


From bee95250f015ffc3a6efb99516489e70d1b52da2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 26 May 2011 16:40:37 -0400
Subject: net: Add linux/sysctl.h includes where needed.

Several networking headers were depending upon the implicit
linux/sysctl.h include they get when including linux/net.h

Add explicit includes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h   | 1 +
 include/net/net_namespace.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 7fa95df60146..857f5026ced6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -13,6 +13,7 @@
 #endif
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/sysctl.h>
 
 /* Responses from hook functions. */
 #define NF_DROP 0
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae491932bc8..07a60e8b749e 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -7,6 +7,7 @@
 #include <asm/atomic.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
+#include <linux/sysctl.h>
 
 #include <net/netns/core.h>
 #include <net/netns/mib.h>
-- 
cgit v1.2.3


From c5c177b4aca83338781e72be2e6dd1601c560cb3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 27 May 2011 13:41:33 -0400
Subject: net: Kill ratelimit.h dependency in linux/net.h

Ingo Molnar noticed that we have this unnecessary ratelimit.h
dependency in linux/net.h, which hid compilation problems from
people doing builds only with CONFIG_NET enabled.

Move this stuff out to a seperate net/net_ratelimit.h file and
include that in the only two places where this thing is needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/net.h         | 6 ------
 include/net/net_ratelimit.h | 8 ++++++++
 net/core/sysctl_net_core.c  | 1 +
 net/core/utils.c            | 1 +
 4 files changed, 10 insertions(+), 6 deletions(-)
 create mode 100644 include/net/net_ratelimit.h

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 1da55e9b6f01..b29923006b11 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -289,11 +289,5 @@ extern int kernel_sock_shutdown(struct socket *sock,
 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
 		     "-type-" __stringify(type))
 
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#include <linux/ratelimit.h>
-extern struct ratelimit_state net_ratelimit_state;
-#endif
-
 #endif /* __KERNEL__ */
 #endif	/* _LINUX_NET_H */
diff --git a/include/net/net_ratelimit.h b/include/net/net_ratelimit.h
new file mode 100644
index 000000000000..7727b4247daf
--- /dev/null
+++ b/include/net/net_ratelimit.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_NET_RATELIMIT_H
+#define _LINUX_NET_RATELIMIT_H
+
+#include <linux/ratelimit.h>
+
+extern struct ratelimit_state net_ratelimit_state;
+
+#endif	/* _LINUX_NET_RATELIMIT_H */
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a829e3f60aeb..77a65f031488 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -17,6 +17,7 @@
 
 #include <net/ip.h>
 #include <net/sock.h>
+#include <net/net_ratelimit.h>
 
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(ctl_table *table, int write,
diff --git a/net/core/utils.c b/net/core/utils.c
index 2012bc797f9c..386e263f6066 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -27,6 +27,7 @@
 #include <linux/ratelimit.h>
 
 #include <net/sock.h>
+#include <net/net_ratelimit.h>
 
 #include <asm/byteorder.h>
 #include <asm/system.h>
-- 
cgit v1.2.3


From 176e21ee2ec89cae8d45cf1a850ea45a45428fb8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@ORACLE.COM>
Date: Mon, 9 May 2011 15:22:44 -0400
Subject: SUNRPC: Support for RPC over AF_LOCAL transports

TI-RPC introduces the capability of performing RPC over AF_LOCAL
sockets.  It uses this mainly for registering and unregistering
local RPC services securely with the local rpcbind, but we could
also conceivably use it as a generic upcall mechanism.

This patch provides a client-side only implementation for the moment.
We might also consider a server-side implementation to provide
AF_LOCAL access to NLM (for statd downcalls, and such like).

Autobinding is not supported on kernel AF_LOCAL transports at this
time.  Kernel ULPs must specify the pathname of the remote endpoint
when an AF_LOCAL transport is created.  rpcbind supports registering
services available via AF_LOCAL, so the kernel could handle it with
some adjustment to ->rpcbind and ->set_port.  But we don't need this
feature for doing upcalls via well-known named sockets.

This has not been tested with ULPs that move a substantial amount of
data.  Thus, I can't attest to how robust the write_space and
congestion management logic is.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/msg_prot.h |   1 +
 include/linux/sunrpc/xprt.h     |   3 +-
 net/sunrpc/clnt.c               |   8 +
 net/sunrpc/xprtsock.c           | 395 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 403 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 77e624883393..c68a147939a6 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -145,6 +145,7 @@ typedef __be32	rpc_fraghdr;
 #define RPCBIND_NETID_TCP	"tcp"
 #define RPCBIND_NETID_UDP6	"udp6"
 #define RPCBIND_NETID_TCP6	"tcp6"
+#define RPCBIND_NETID_LOCAL	"local"
 
 /*
  * Note that RFC 1833 does not put any size restrictions on the
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a0f998c07c65..81cce3b3ee66 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -141,7 +141,8 @@ enum xprt_transports {
 	XPRT_TRANSPORT_UDP	= IPPROTO_UDP,
 	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
 	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
-	XPRT_TRANSPORT_RDMA	= 256
+	XPRT_TRANSPORT_RDMA	= 256,
+	XPRT_TRANSPORT_LOCAL	= 257,
 };
 
 struct rpc_xprt {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 08ed49629b86..b84d7395535e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,7 +28,9 @@
 #include <linux/slab.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
+#include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/un.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
@@ -294,6 +296,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	 * up a string representation of the passed-in address.
 	 */
 	if (args->servername == NULL) {
+		struct sockaddr_un *sun =
+				(struct sockaddr_un *)args->address;
 		struct sockaddr_in *sin =
 				(struct sockaddr_in *)args->address;
 		struct sockaddr_in6 *sin6 =
@@ -301,6 +305,10 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 
 		servername[0] = '\0';
 		switch (args->address->sa_family) {
+		case AF_LOCAL:
+			snprintf(servername, sizeof(servername), "%s",
+				 sun->sun_path);
+			break;
 		case AF_INET:
 			snprintf(servername, sizeof(servername), "%pI4",
 				 &sin->sin_addr.s_addr);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 079366974e1f..72abb7358933 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/capability.h>
@@ -28,6 +29,7 @@
 #include <linux/in.h>
 #include <linux/net.h>
 #include <linux/mm.h>
+#include <linux/un.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
@@ -45,6 +47,9 @@
 #include <net/tcp.h>
 
 #include "sunrpc.h"
+
+static void xs_close(struct rpc_xprt *xprt);
+
 /*
  * xprtsock tunables
  */
@@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
 	return (struct sockaddr *) &xprt->addr;
 }
 
+static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr_un *) &xprt->addr;
+}
+
 static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
 {
 	return (struct sockaddr_in *) &xprt->addr;
@@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 	struct sockaddr *sap = xs_addr(xprt);
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in *sin;
+	struct sockaddr_un *sun;
 	char buf[128];
 
-	(void)rpc_ntop(sap, buf, sizeof(buf));
-	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
-
 	switch (sap->sa_family) {
+	case AF_LOCAL:
+		sun = xs_addr_un(xprt);
+		strlcpy(buf, sun->sun_path, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
+		break;
 	case AF_INET:
+		(void)rpc_ntop(sap, buf, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
 		sin = xs_addr_in(xprt);
 		snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 		break;
 	case AF_INET6:
+		(void)rpc_ntop(sap, buf, sizeof(buf));
+		xprt->address_strings[RPC_DISPLAY_ADDR] =
+						kstrdup(buf, GFP_KERNEL);
 		sin6 = xs_addr_in6(xprt);
 		snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
 		break;
 	default:
 		BUG();
 	}
+
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 }
 
@@ -505,6 +526,60 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
 	*base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
 }
 
+/**
+ * xs_local_send_request - write an RPC request to an AF_LOCAL socket
+ * @task: RPC task that manages the state of an RPC request
+ *
+ * Return values:
+ *        0:	The request has been sent
+ *   EAGAIN:	The socket was blocked, please call again later to
+ *		complete the request
+ * ENOTCONN:	Caller needs to invoke connect logic then call again
+ *    other:	Some other error occured, the request was not sent
+ */
+static int xs_local_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct sock_xprt *transport =
+				container_of(xprt, struct sock_xprt, xprt);
+	struct xdr_buf *xdr = &req->rq_snd_buf;
+	int status;
+
+	xs_encode_stream_record_marker(&req->rq_snd_buf);
+
+	xs_pktdump("packet data:",
+			req->rq_svec->iov_base, req->rq_svec->iov_len);
+
+	status = xs_sendpages(transport->sock, NULL, 0,
+						xdr, req->rq_bytes_sent);
+	dprintk("RPC:       %s(%u) = %d\n",
+			__func__, xdr->len - req->rq_bytes_sent, status);
+	if (likely(status >= 0)) {
+		req->rq_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
+		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+			req->rq_bytes_sent = 0;
+			return 0;
+		}
+		status = -EAGAIN;
+	}
+
+	switch (status) {
+	case -EAGAIN:
+		status = xs_nospace(task);
+		break;
+	default:
+		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+			-status);
+	case -EPIPE:
+		xs_close(xprt);
+		status = -ENOTCONN;
+	}
+
+	return status;
+}
+
 /**
  * xs_udp_send_request - write an RPC request to a UDP socket
  * @task: address of RPC task that manages the state of an RPC request
@@ -788,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
 	return (struct rpc_xprt *) sk->sk_user_data;
 }
 
+static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
+{
+	struct xdr_skb_reader desc = {
+		.skb		= skb,
+		.offset		= sizeof(rpc_fraghdr),
+		.count		= skb->len - sizeof(rpc_fraghdr),
+	};
+
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
+		return -1;
+	if (desc.count)
+		return -1;
+	return 0;
+}
+
+/**
+ * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
+ * @sk: socket with data to read
+ * @len: how much data to read
+ *
+ * Currently this assumes we can read the whole reply in a single gulp.
+ */
+static void xs_local_data_ready(struct sock *sk, int len)
+{
+	struct rpc_task *task;
+	struct rpc_xprt *xprt;
+	struct rpc_rqst *rovr;
+	struct sk_buff *skb;
+	int err, repsize, copied;
+	u32 _xid;
+	__be32 *xp;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	dprintk("RPC:       %s...\n", __func__);
+	xprt = xprt_from_sock(sk);
+	if (xprt == NULL)
+		goto out;
+
+	skb = skb_recv_datagram(sk, 0, 1, &err);
+	if (skb == NULL)
+		goto out;
+
+	if (xprt->shutdown)
+		goto dropit;
+
+	repsize = skb->len - sizeof(rpc_fraghdr);
+	if (repsize < 4) {
+		dprintk("RPC:       impossible RPC reply size %d\n", repsize);
+		goto dropit;
+	}
+
+	/* Copy the XID from the skb... */
+	xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
+	if (xp == NULL)
+		goto dropit;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->transport_lock);
+	rovr = xprt_lookup_rqst(xprt, *xp);
+	if (!rovr)
+		goto out_unlock;
+	task = rovr->rq_task;
+
+	copied = rovr->rq_private_buf.buflen;
+	if (copied > repsize)
+		copied = repsize;
+
+	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
+		dprintk("RPC:       sk_buff copy failed\n");
+		goto out_unlock;
+	}
+
+	xprt_complete_rqst(task, copied);
+
+ out_unlock:
+	spin_unlock(&xprt->transport_lock);
+ dropit:
+	skb_free_datagram(sk, skb);
+ out:
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
 /**
  * xs_udp_data_ready - "data ready" callback for UDP sockets
  * @sk: socket with data to read
@@ -1573,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
 	return err;
 }
 
+/*
+ * We don't support autobind on AF_LOCAL sockets
+ */
+static void xs_local_rpcbind(struct rpc_task *task)
+{
+	xprt_set_bound(task->tk_xprt);
+}
+
+static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+}
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key xs_key[2];
 static struct lock_class_key xs_slock_key[2];
 
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	BUG_ON(sock_owned_by_user(sk));
+	sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
+		&xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
+}
+
 static inline void xs_reclassify_socket4(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -1599,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock)
 static inline void xs_reclassify_socket(int family, struct socket *sock)
 {
 	switch (family) {
+	case AF_LOCAL:
+		xs_reclassify_socketu(sock);
+		break;
 	case AF_INET:
 		xs_reclassify_socket4(sock);
 		break;
@@ -1608,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
 	}
 }
 #else
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+}
+
 static inline void xs_reclassify_socket4(struct socket *sock)
 {
 }
@@ -1646,6 +1830,94 @@ out:
 	return ERR_PTR(err);
 }
 
+static int xs_local_finish_connecting(struct rpc_xprt *xprt,
+				      struct socket *sock)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+									xprt);
+
+	if (!transport->inet) {
+		struct sock *sk = sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+
+		xs_save_old_callbacks(transport, sk);
+
+		sk->sk_user_data = xprt;
+		sk->sk_data_ready = xs_local_data_ready;
+		sk->sk_write_space = xs_udp_write_space;
+		sk->sk_error_report = xs_error_report;
+		sk->sk_allocation = GFP_ATOMIC;
+
+		xprt_clear_connected(xprt);
+
+		/* Reset to new socket */
+		transport->sock = sock;
+		transport->inet = sk;
+
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+
+	/* Tell the socket layer to start connecting... */
+	xprt->stat.connect_count++;
+	xprt->stat.connect_start = jiffies;
+	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
+}
+
+/**
+ * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_local_setup_socket(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock;
+	int status = -EIO;
+
+	if (xprt->shutdown)
+		goto out;
+
+	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+	status = __sock_create(xprt->xprt_net, AF_LOCAL,
+					SOCK_STREAM, 0, &sock, 1);
+	if (status < 0) {
+		dprintk("RPC:       can't create AF_LOCAL "
+			"transport socket (%d).\n", -status);
+		goto out;
+	}
+	xs_reclassify_socketu(sock);
+
+	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+	status = xs_local_finish_connecting(xprt, sock);
+	switch (status) {
+	case 0:
+		dprintk("RPC:       xprt %p connected to %s\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		xprt_set_connected(xprt);
+		break;
+	case -ENOENT:
+		dprintk("RPC:       xprt %p: socket %s does not exist\n",
+				xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		break;
+	default:
+		printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
+				__func__, -status,
+				xprt->address_strings[RPC_DISPLAY_ADDR]);
+	}
+
+out:
+	xprt_clear_connecting(xprt);
+	xprt_wake_pending_tasks(xprt, status);
+}
+
 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1929,6 +2201,32 @@ static void xs_connect(struct rpc_task *task)
 	}
 }
 
+/**
+ * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
+			"%llu %llu\n",
+			xprt->stat.bind_count,
+			xprt->stat.connect_count,
+			xprt->stat.connect_time,
+			idle_time,
+			xprt->stat.sends,
+			xprt->stat.recvs,
+			xprt->stat.bad_xids,
+			xprt->stat.req_u,
+			xprt->stat.bklog_u);
+}
+
 /**
  * xs_udp_print_stats - display UDP socket-specifc stats
  * @xprt: rpc_xprt struct containing statistics
@@ -2099,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt)
 {
 }
 
+static struct rpc_xprt_ops xs_local_ops = {
+	.reserve_xprt		= xprt_reserve_xprt,
+	.release_xprt		= xs_tcp_release_xprt,
+	.rpcbind		= xs_local_rpcbind,
+	.set_port		= xs_local_set_port,
+	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
+	.send_request		= xs_local_send_request,
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.close			= xs_close,
+	.destroy		= xs_destroy,
+	.print_stats		= xs_local_print_stats,
+};
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -2160,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
 	};
 
 	switch (family) {
+	case AF_LOCAL:
+		break;
 	case AF_INET:
 		memcpy(sap, &sin, sizeof(sin));
 		break;
@@ -2207,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	return xprt;
 }
 
+static const struct rpc_timeout xs_local_default_timeout = {
+	.to_initval = 10 * HZ,
+	.to_maxval = 10 * HZ,
+	.to_retries = 2,
+};
+
+/**
+ * xs_setup_local - Set up transport to use an AF_LOCAL socket
+ * @args: rpc transport creation arguments
+ *
+ * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
+ */
+static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
+{
+	struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
+	struct sock_xprt *transport;
+	struct rpc_xprt *xprt;
+	struct rpc_xprt *ret;
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = 0;
+	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+	xprt->bind_timeout = XS_BIND_TO;
+	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+	xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+	xprt->ops = &xs_local_ops;
+	xprt->timeout = &xs_local_default_timeout;
+
+	switch (sun->sun_family) {
+	case AF_LOCAL:
+		if (sun->sun_path[0] != '/') {
+			dprintk("RPC:       bad AF_LOCAL address: %s\n",
+					sun->sun_path);
+			ret = ERR_PTR(-EINVAL);
+			goto out_err;
+		}
+		xprt_set_bound(xprt);
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_local_setup_socket);
+		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+		break;
+	default:
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
+	}
+
+	dprintk("RPC:       set up xprt to %s via AF_LOCAL\n",
+			xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	ret = ERR_PTR(-EINVAL);
+out_err:
+	xprt_free(xprt);
+	return ret;
+}
+
 static const struct rpc_timeout xs_udp_default_timeout = {
 	.to_initval = 5 * HZ,
 	.to_maxval = 30 * HZ,
@@ -2448,6 +2827,14 @@ out_err:
 	return ret;
 }
 
+static struct xprt_class	xs_local_transport = {
+	.list		= LIST_HEAD_INIT(xs_local_transport.list),
+	.name		= "named UNIX socket",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_LOCAL,
+	.setup		= xs_setup_local,
+};
+
 static struct xprt_class	xs_udp_transport = {
 	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
 	.name		= "udp",
@@ -2483,6 +2870,7 @@ int init_socket_xprt(void)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
 
+	xprt_register_transport(&xs_local_transport);
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
 	xprt_register_transport(&xs_bc_tcp_transport);
@@ -2503,6 +2891,7 @@ void cleanup_socket_xprt(void)
 	}
 #endif
 
+	xprt_unregister_transport(&xs_local_transport);
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
 	xprt_unregister_transport(&xs_bc_tcp_transport);
-- 
cgit v1.2.3


From 7c295975a85b049385dfe0d5ee0d4d543619fbdc Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 25 May 2011 16:20:31 -0700
Subject: gpio: make gpio_{request,free}_array gpio array parameter const

gpio_{request,free}_array should not (and do not) modify the passed gpio
array, so make the parameter const.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/gpio/gpiolib.c     | 4 ++--
 include/asm-generic/gpio.h | 4 ++--
 include/linux/gpio.h       | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 137a8ca67822..a971e3d043ba 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1296,7 +1296,7 @@ EXPORT_SYMBOL_GPL(gpio_request_one);
  * @array:	array of the 'struct gpio'
  * @num:	how many GPIOs in the array
  */
-int gpio_request_array(struct gpio *array, size_t num)
+int gpio_request_array(const struct gpio *array, size_t num)
 {
 	int i, err;
 
@@ -1319,7 +1319,7 @@ EXPORT_SYMBOL_GPL(gpio_request_array);
  * @array:	array of the 'struct gpio'
  * @num:	how many GPIOs in the array
  */
-void gpio_free_array(struct gpio *array, size_t num)
+void gpio_free_array(const struct gpio *array, size_t num)
 {
 	while (num--)
 		gpio_free((array++)->gpio);
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index a98b52cb970b..fcdcb5d5c995 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -193,8 +193,8 @@ struct gpio {
 };
 
 extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
-extern int gpio_request_array(struct gpio *array, size_t num);
-extern void gpio_free_array(struct gpio *array, size_t num);
+extern int gpio_request_array(const struct gpio *array, size_t num);
+extern void gpio_free_array(const struct gpio *array, size_t num);
 
 #ifdef CONFIG_GPIO_SYSFS
 
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 0f8265f8e8c3..32d47e710661 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -41,7 +41,7 @@ static inline int gpio_request_one(unsigned gpio,
 	return -ENOSYS;
 }
 
-static inline int gpio_request_array(struct gpio *array, size_t num)
+static inline int gpio_request_array(const struct gpio *array, size_t num)
 {
 	return -ENOSYS;
 }
@@ -54,7 +54,7 @@ static inline void gpio_free(unsigned gpio)
 	WARN_ON(1);
 }
 
-static inline void gpio_free_array(struct gpio *array, size_t num)
+static inline void gpio_free_array(const struct gpio *array, size_t num)
 {
 	might_sleep();
 
-- 
cgit v1.2.3


From 1e1b6c511d1b23cb7c3b619d82fc7bd9f620565d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 19 May 2011 15:08:58 +0900
Subject: cpuset: Fix cpuset_cpus_allowed_fallback(), don't update
 tsk->rt.nr_cpus_allowed

The rule is, we have to update tsk->rt.nr_cpus_allowed if we change
tsk->cpus_allowed. Otherwise RT scheduler may confuse.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4DD4B3FA.5060901@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpuset.h |  2 +-
 include/linux/sched.h  |  7 +++++++
 kernel/cpuset.c        |  4 ++--
 kernel/kthread.c       |  4 ++--
 kernel/sched.c         | 19 ++++++++++++-------
 5 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f20eb8f16025..e9eaec522655 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -146,7 +146,7 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
 
 static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
 {
-	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+	do_set_cpus_allowed(p, cpu_possible_mask);
 	return cpumask_any(cpu_active_mask);
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc8871295a5a..8da84b7bc1b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1841,9 +1841,16 @@ static inline void rcu_copy_process(struct task_struct *p)
 #endif
 
 #ifdef CONFIG_SMP
+extern void do_set_cpus_allowed(struct task_struct *p,
+			       const struct cpumask *new_mask);
+
 extern int set_cpus_allowed_ptr(struct task_struct *p,
 				const struct cpumask *new_mask);
 #else
+static inline void do_set_cpus_allowed(struct task_struct *p,
+				      const struct cpumask *new_mask)
+{
+}
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
 				       const struct cpumask *new_mask)
 {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1ceeb049c827..9c9b7545c810 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 	rcu_read_lock();
 	cs = task_cs(tsk);
 	if (cs)
-		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+		do_set_cpus_allowed(tsk, cs->cpus_allowed);
 	rcu_read_unlock();
 
 	/*
@@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 		 * Like above we can temporary set any mask and rely on
 		 * set_cpus_allowed_ptr() as synchronization point.
 		 */
-		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+		do_set_cpus_allowed(tsk, cpu_possible_mask);
 		cpu = cpumask_any(cpu_active_mask);
 	}
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3b34d2732bce..4ba7cccb4994 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
 		return;
 	}
 
-	p->cpus_allowed = cpumask_of_cpu(cpu);
-	p->rt.nr_cpus_allowed = 1;
+	/* It's safe because the task is inactive. */
+	do_set_cpus_allowed(p, cpumask_of(cpu));
 	p->flags |= PF_THREAD_BOUND;
 }
 EXPORT_SYMBOL(kthread_bind);
diff --git a/kernel/sched.c b/kernel/sched.c
index a80ee911900e..cbb3a0eee58e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5860,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
-	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+	do_set_cpus_allowed(idle, cpumask_of(cpu));
 	/*
 	 * We're having a chicken and egg problem, even though we are
 	 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5948,6 +5948,16 @@ static inline void sched_init_granularity(void)
 }
 
 #ifdef CONFIG_SMP
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+	if (p->sched_class && p->sched_class->set_cpus_allowed)
+		p->sched_class->set_cpus_allowed(p, new_mask);
+	else {
+		cpumask_copy(&p->cpus_allowed, new_mask);
+		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+	}
+}
+
 /*
  * This is how migration works:
  *
@@ -5993,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 		goto out;
 	}
 
-	if (p->sched_class->set_cpus_allowed)
-		p->sched_class->set_cpus_allowed(p, new_mask);
-	else {
-		cpumask_copy(&p->cpus_allowed, new_mask);
-		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-	}
+	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
-- 
cgit v1.2.3


From 55c2945aa9d4d907ec5ca4f6a4e30ae908d8d30d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 11 May 2011 05:33:33 -0700
Subject: atomic: Add atomic_or()

An atomic_or() function is needed by TREE_RCU to avoid deadlock, so
add a generic version.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/atomic.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 96c038e43d66..ee456c79b0e6 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -34,4 +34,17 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
 }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
+static inline void atomic_or(int i, atomic_t *v)
+{
+	int old;
+	int new;
+
+	do {
+		old = atomic_read(v);
+		new = old | i;
+	} while (atomic_cmpxchg(v, old, new) != old);
+}
+#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
+
 #endif /* _LINUX_ATOMIC_H */
-- 
cgit v1.2.3


From 69b4573296469fd3f70cf7044693074980517067 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 28 May 2011 08:25:51 -0700
Subject: Cache xattr security drop check for write v2

Some recent benchmarking on btrfs showed that a major scaling bottleneck
on large systems on btrfs is currently the xattr lookup on every write.

Why xattr lookup on every write I hear you ask?

write wants to drop suid and security related xattrs that could set o
capabilities for executables.  To do that it currently looks up
security.capability on EVERY write (even for non executables) to decide
whether to drop it or not.

In btrfs this causes an additional tree walk, hitting some per file system
locks and quite bad scalability. In a simple read workload on a 8S
system I saw over 90% CPU time in spinlocks related to that.

Chris Mason tells me this is also a problem in ext4, where it hits
the global mbcache lock.

This patch adds a simple per inode to avoid this problem.  We only
do the lookup once per file and then if there is no xattr cache
the decision. All xattr changes clear the flag.

I also used the same flag to avoid the suid check, although
that one is pretty cheap.

A file system can also set this flag when it creates the inode,
if it has a cheap way to do so.  This is done for some common file systems
in followon patches.

With this patch a major part of the lock contention disappears
for btrfs. Some testing on smaller systems didn't show significant
performance changes, but at least it helps the larger systems
and is generally more efficient.

v2: Rename is_sgid. add file system helper.
Cc: chris.mason@oracle.com
Cc: josef@redhat.com
Cc: viro@zeniv.linux.org.uk
Cc: agruen@linbit.com
Cc: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c          |  7 +++++++
 fs/xattr.c         |  7 +++++--
 include/linux/fs.h | 13 +++++++++++++
 mm/filemap.c       | 14 ++++++++++++--
 4 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/attr.c b/fs/attr.c
index 91dbe2a107f2..caf2aa521e2b 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 			return -EPERM;
 	}
 
+	if ((ia_valid & ATTR_MODE)) {
+		mode_t amode = attr->ia_mode;
+		/* Flag setting protected by i_mutex */
+		if (is_sxid(amode))
+			inode->i_flags &= ~S_NOSEC;
+	}
+
 	now = current_fs_time(inode->i_sb);
 
 	attr->ia_ctime = now;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4be2e7666d02..f060663ab70c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -91,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
 {
 	struct inode *inode = dentry->d_inode;
 	int error = -EOPNOTSUPP;
+	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
+				   XATTR_SECURITY_PREFIX_LEN);
 
+	if (issec)
+		inode->i_flags &= ~S_NOSEC;
 	if (inode->i_op->setxattr) {
 		error = inode->i_op->setxattr(dentry, name, value, size, flags);
 		if (!error) {
@@ -99,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
 			security_inode_post_setxattr(dentry, name, value,
 						     size, flags);
 		}
-	} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
-				XATTR_SECURITY_PREFIX_LEN)) {
+	} else if (issec) {
 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
 		error = security_inode_setsecurity(inode, suffix, value,
 						   size, flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 573028df050d..c55d6b7cd5d6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -237,6 +237,7 @@ struct inodes_stat_t {
 #define S_PRIVATE	512	/* Inode is fs-internal */
 #define S_IMA		1024	/* Inode has an associated IMA struct */
 #define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
+#define S_NOSEC		4096	/* no suid or xattr security attributes */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -273,6 +274,7 @@ struct inodes_stat_t {
 #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
 #define IS_IMA(inode)		((inode)->i_flags & S_IMA)
 #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
+#define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
 
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
@@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf);
 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
 					    (flag & __FMODE_NONOTIFY)))
 
+static inline int is_sxid(mode_t mode)
+{
+	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
+}
+
+static inline void inode_has_no_xattr(struct inode *inode)
+{
+	if (!is_sxid(inode->i_mode))
+		inode->i_flags |= S_NOSEC;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index dac95a24deac..d7b10578a64b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill)
 int file_remove_suid(struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
-	int killsuid = should_remove_suid(dentry);
-	int killpriv = security_inode_need_killpriv(dentry);
+	struct inode *inode = dentry->d_inode;
+	int killsuid;
+	int killpriv;
 	int error = 0;
 
+	/* Fast path for nothing security related */
+	if (IS_NOSEC(inode))
+		return 0;
+
+	killsuid = should_remove_suid(dentry);
+	killpriv = security_inode_need_killpriv(dentry);
+
 	if (killpriv < 0)
 		return killpriv;
 	if (killpriv)
 		error = security_inode_killpriv(dentry);
 	if (!error && killsuid)
 		error = __remove_suid(dentry, killsuid);
+	if (!error)
+		inode->i_flags |= S_NOSEC;
 
 	return error;
 }
-- 
cgit v1.2.3


From 333c5ae9948194428fe6c5ef5c088304fc98263b Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Fri, 11 Feb 2011 12:49:04 -0800
Subject: idle governor: Avoid lock acquisition to read pm_qos before entering
 idle

Thanks to the reviews and comments by Rafael, James, Mark and Andi.
Here's version 2 of the patch incorporating your comments and also some
update to my previous patch comments.

I noticed that before entering idle state, the menu idle governor will
look up the current pm_qos target value according to the list of qos
requests received.  This look up currently needs the acquisition of a
lock to access the list of qos requests to find the qos target value,
slowing down the entrance into idle state due to contention by multiple
cpus to access this list.  The contention is severe when there are a lot
of cpus waking and going into idle.  For example, for a simple workload
that has 32 pair of processes ping ponging messages to each other, where
64 cpu cores are active in test system, I see the following profile with
37.82% of cpu cycles spent in contention of pm_qos_lock:

-     37.82%          swapper  [kernel.kallsyms]          [k]
_raw_spin_lock_irqsave
   - _raw_spin_lock_irqsave
      - 95.65% pm_qos_request
           menu_select
           cpuidle_idle_call
         - cpu_idle
              99.98% start_secondary

A better approach will be to cache the updated pm_qos target value so
reading it does not require lock acquisition as in the patch below.
With this patch the contention for pm_qos_lock is removed and I saw a
2.2X increase in throughput for my message passing workload.

cc: stable@kernel.org
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: James Bottomley <James.Bottomley@suse.de>
Acked-by: mark gross <markgross@thegnar.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/pm_qos_params.h |  4 ++++
 kernel/pm_qos_params.c        | 37 +++++++++++++++++++++++++------------
 2 files changed, 29 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 77cbddb3784c..a7d87f911cab 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -16,6 +16,10 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
+#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
+
 struct pm_qos_request_list {
 	struct plist_node list;
 	int pm_qos_class;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index aeaa7f846821..6a8fad82a3ad 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -53,11 +53,17 @@ enum pm_qos_type {
 	PM_QOS_MIN		/* return the smallest value */
 };
 
+/*
+ * Note: The lockless read path depends on the CPU accessing
+ * target_value atomically.  Atomic access is only guaranteed on all CPU
+ * types linux supports for 32 bit quantites
+ */
 struct pm_qos_object {
 	struct plist_head requests;
 	struct blocking_notifier_head *notifiers;
 	struct miscdevice pm_qos_power_miscdev;
 	char *name;
+	s32 target_value;	/* Do not change to 64 bit */
 	s32 default_value;
 	enum pm_qos_type type;
 };
@@ -70,7 +76,8 @@ static struct pm_qos_object cpu_dma_pm_qos = {
 	.requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
 	.notifiers = &cpu_dma_lat_notifier,
 	.name = "cpu_dma_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN,
 };
 
@@ -79,7 +86,8 @@ static struct pm_qos_object network_lat_pm_qos = {
 	.requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_lat_notifier,
 	.name = "network_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN
 };
 
@@ -89,7 +97,8 @@ static struct pm_qos_object network_throughput_pm_qos = {
 	.requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_throughput_notifier,
 	.name = "network_throughput",
-	.default_value = 0,
+	.target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
 	.type = PM_QOS_MAX,
 };
 
@@ -132,6 +141,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
 	}
 }
 
+static inline s32 pm_qos_read_value(struct pm_qos_object *o)
+{
+	return o->target_value;
+}
+
+static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
+{
+	o->target_value = value;
+}
+
 static void update_target(struct pm_qos_object *o, struct plist_node *node,
 			  int del, int value)
 {
@@ -156,6 +175,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node,
 		plist_add(node, &o->requests);
 	}
 	curr_value = pm_qos_get_value(o);
+	pm_qos_set_value(o, curr_value);
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
 
 	if (prev_value != curr_value)
@@ -190,18 +210,11 @@ static int find_pm_qos_object_by_minor(int minor)
  * pm_qos_request - returns current system wide qos expectation
  * @pm_qos_class: identification of which qos value is requested
  *
- * This function returns the current target value in an atomic manner.
+ * This function returns the current target value.
  */
 int pm_qos_request(int pm_qos_class)
 {
-	unsigned long flags;
-	int value;
-
-	spin_lock_irqsave(&pm_qos_lock, flags);
-	value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
-	spin_unlock_irqrestore(&pm_qos_lock, flags);
-
-	return value;
+	return pm_qos_read_value(pm_qos_array[pm_qos_class]);
 }
 EXPORT_SYMBOL_GPL(pm_qos_request);
 
-- 
cgit v1.2.3


From a43a9d93d40a69eceeb4e4a4c860cc20186d475c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 29 May 2011 12:40:50 +0200
Subject: [S390] mm: fix storage key handling

page_get_storage_key() and page_set_storage_key() expect a page address
and not its page frame number. This got inconsistent with 2d42552d
"[S390] merge page_test_dirty and page_clear_dirty".

Result is that we read/write storage keys from random pages and do not
have a working dirty bit tracking at all.
E.g. SetPageUpdate() doesn't clear the dirty bit of requested pages, which
for example ext4 doesn't like very much and panics after a while.

Unable to handle kernel paging request at virtual user address (null)
Oops: 0004 [#1] PREEMPT SMP DEBUG_PAGEALLOC
Modules linked in:
CPU: 1 Not tainted 2.6.39-07551-g139f37f-dirty #152
Process flush-94:0 (pid: 1576, task: 000000003eb34538, ksp: 000000003c287b70)
Krnl PSW : 0704c00180000000 0000000000316b12 (jbd2_journal_file_inode+0x10e/0x138)
           R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3
Krnl GPRS: 0000000000000000 0000000000000000 0000000000000000 0700000000000000
           0000000000316a62 000000003eb34cd0 0000000000000025 000000003c287b88
           0000000000000001 000000003c287a70 000000003f1ec678 000000003f1ec000
           0000000000000000 000000003e66ec00 0000000000316a62 000000003c287988
Krnl Code: 0000000000316b04: f0a0000407f4       srp     4(11,%r0),2036,0
           0000000000316b0a: b9020022           ltgr    %r2,%r2
           0000000000316b0e: a7740015           brc     7,316b38
          >0000000000316b12: e3d0c0000024       stg     %r13,0(%r12)
           0000000000316b18: 4120c010           la      %r2,16(%r12)
           0000000000316b1c: 4130d060           la      %r3,96(%r13)
           0000000000316b20: e340d0600004       lg      %r4,96(%r13)
           0000000000316b26: c0e50002b567       brasl   %r14,36d5f4
Call Trace:
([<0000000000316a62>] jbd2_journal_file_inode+0x5e/0x138)
 [<00000000002da13c>] mpage_da_map_and_submit+0x2e8/0x42c
 [<00000000002daac2>] ext4_da_writepages+0x2da/0x504
 [<00000000002597e8>] writeback_single_inode+0xf8/0x268
 [<0000000000259f06>] writeback_sb_inodes+0xd2/0x18c
 [<000000000025a700>] writeback_inodes_wb+0x80/0x168
 [<000000000025aa92>] wb_writeback+0x2aa/0x324
 [<000000000025abde>] wb_do_writeback+0xd2/0x274
 [<000000000025ae3a>] bdi_writeback_thread+0xba/0x1c4
 [<00000000001737be>] kthread+0xa6/0xb0
 [<000000000056c1da>] kernel_thread_starter+0x6/0xc
 [<000000000056c1d4>] kernel_thread_starter+0x0/0xc
INFO: lockdep is turned off.
Last Breaking-Event-Address:
 [<0000000000316a8a>] jbd2_journal_file_inode+0x86/0x138

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 16 ++++++++--------
 include/linux/page-flags.h      |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c4773a2ef3d3..e4efacfe1b63 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -577,16 +577,16 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long pfn, bits;
+	unsigned long address, bits;
 	unsigned char skey;
 
-	pfn = pte_val(*ptep) >> PAGE_SHIFT;
-	skey = page_get_storage_key(pfn);
+	address = pte_val(*ptep) & PAGE_MASK;
+	skey = page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	/* Clear page changed & referenced bit in the storage key */
 	if (bits) {
 		skey ^= bits;
-		page_set_storage_key(pfn, skey, 1);
+		page_set_storage_key(address, skey, 1);
 	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* RCP_GR_BIT & RCP_GC_BIT */
@@ -628,16 +628,16 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	unsigned long pfn;
+	unsigned long address;
 	unsigned long okey, nkey;
 
-	pfn = pte_val(*ptep) >> PAGE_SHIFT;
-	okey = nkey = page_get_storage_key(pfn);
+	address = pte_val(*ptep) & PAGE_MASK;
+	okey = nkey = page_get_storage_key(address);
 	nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
 	/* Set page access key and fetch protection bit from pgste */
 	nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
 	if (okey != nkey)
-		page_set_storage_key(pfn, nkey, 1);
+		page_set_storage_key(address, nkey, 1);
 #endif
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 79a6700b7162..6081493db68f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page)
 {
 #ifdef CONFIG_S390
 	if (!test_and_set_bit(PG_uptodate, &page->flags))
-		page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0);
+		page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0);
 #else
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,
-- 
cgit v1.2.3


From 4c2593270133708698d4b8cea2dab469479ad13b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sun, 29 May 2011 12:52:55 +0100
Subject: dm table: allow targets to support discards internally

Permit a target to support discards regardless of whether or not all its
underlying devices do.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 6 +++++-
 include/linux/device-mapper.h | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index cb8380c9767f..215e112d153e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1346,7 +1346,8 @@ bool dm_table_supports_discards(struct dm_table *t)
 		return 0;
 
 	/*
-	 * Ensure that at least one underlying device supports discards.
+	 * Unless any target used by the table set discards_supported,
+	 * require at least one underlying device to support discards.
 	 * t->devices includes internal dm devices such as mirror logs
 	 * so we need to use iterate_devices here, which targets
 	 * supporting discard must provide.
@@ -1354,6 +1355,9 @@ bool dm_table_supports_discards(struct dm_table *t)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
+		if (ti->discards_supported)
+			return 1;
+
 		if (ti->type->iterate_devices &&
 		    ti->type->iterate_devices(ti, device_discard_capable, NULL))
 			return 1;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 32a4423710f5..4427e0454051 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -191,6 +191,12 @@ struct dm_target {
 
 	/* Used to provide an error string from the ctr */
 	char *error;
+
+	/*
+	 * Set if this target needs to receive discards regardless of
+	 * whether or not its underlying devices have support.
+	 */
+	unsigned discards_supported:1;
 };
 
 /* Each target can link one of these into the table */
-- 
cgit v1.2.3


From bda8efec5c706a672e0714d341a342e811f0262a Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 29 May 2011 13:03:09 +0100
Subject: dm io: use fixed initial mempool size

Replace the arbitrary calculation of an initial io struct mempool size
with a constant.

The code calculated the number of reserved structures based on the request
size and used a "magic" multiplication constant of 4.  This patch changes
it to reserve a fixed number - itself still chosen quite arbitrarily.
Further testing might show if there is a better number to choose.

Note that if there is no memory pressure, we can still allocate an
arbitrary number of "struct io" structures.  One structure is enough to
process the whole request.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-io.c              | 27 +++++----------------------
 drivers/md/dm-kcopyd.c          |  2 +-
 drivers/md/dm-log.c             |  3 +--
 drivers/md/dm-raid1.c           |  3 +--
 drivers/md/dm-snap-persistent.c | 13 +------------
 include/linux/dm-io.h           |  3 +--
 6 files changed, 10 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 76a5af00a26b..2067288f61f9 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -19,6 +19,8 @@
 #define DM_MSG_PREFIX "io"
 
 #define DM_IO_MAX_REGIONS	BITS_PER_LONG
+#define MIN_IOS		16
+#define MIN_BIOS	16
 
 struct dm_io_client {
 	mempool_t *pool;
@@ -40,34 +42,22 @@ struct io {
 
 static struct kmem_cache *_dm_io_cache;
 
-/*
- * io contexts are only dynamically allocated for asynchronous
- * io.  Since async io is likely to be the majority of io we'll
- * have the same number of io contexts as bios! (FIXME: must reduce this).
- */
-
-static unsigned int pages_to_ios(unsigned int pages)
-{
-	return 4 * pages;	/* too many ? */
-}
-
 /*
  * Create a client with mempool and bioset.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages)
+struct dm_io_client *dm_io_client_create(void)
 {
-	unsigned ios = pages_to_ios(num_pages);
 	struct dm_io_client *client;
 
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
 		return ERR_PTR(-ENOMEM);
 
-	client->pool = mempool_create_slab_pool(ios, _dm_io_cache);
+	client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(16, 0);
+	client->bios = bioset_create(MIN_BIOS, 0);
 	if (!client->bios)
 		goto bad;
 
@@ -81,13 +71,6 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
 }
 EXPORT_SYMBOL(dm_io_client_create);
 
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client)
-{
-	return mempool_resize(client->pool, pages_to_ios(num_pages),
-			      GFP_KERNEL);
-}
-EXPORT_SYMBOL(dm_io_client_resize);
-
 void dm_io_client_destroy(struct dm_io_client *client)
 {
 	mempool_destroy(client->pool);
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 5dfbdcb40a47..719693340d1d 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -667,7 +667,7 @@ int dm_kcopyd_client_create(unsigned min_pages,
 	if (r)
 		goto bad_client_pages;
 
-	kc->io_client = dm_io_client_create(min_pages);
+	kc->io_client = dm_io_client_create();
 	if (IS_ERR(kc->io_client)) {
 		r = PTR_ERR(kc->io_client);
 		goto bad_io_client;
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a1f321889676..948e3f4925bf 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -449,8 +449,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 
 		lc->io_req.mem.type = DM_IO_VMA;
 		lc->io_req.notify.fn = NULL;
-		lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
-								   PAGE_SIZE));
+		lc->io_req.client = dm_io_client_create();
 		if (IS_ERR(lc->io_req.client)) {
 			r = PTR_ERR(lc->io_req.client);
 			DMWARN("couldn't allocate disk io client");
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 976ad4688afc..53089aa11387 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -22,7 +22,6 @@
 #define DM_MSG_PREFIX "raid1"
 
 #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
-#define DM_IO_PAGES 64
 #define DM_KCOPYD_PAGES 64
 
 #define DM_RAID1_HANDLE_ERRORS 0x01
@@ -887,7 +886,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 		return NULL;
 	}
 
-	ms->io_client = dm_io_client_create(DM_IO_PAGES);
+	ms->io_client = dm_io_client_create();
 	if (IS_ERR(ms->io_client)) {
 		ti->error = "Error creating dm_io client";
 		mempool_destroy(ms->read_record_pool);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 95891dfcbca0..135c2f1fdbfc 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -154,11 +154,6 @@ struct pstore {
 	struct workqueue_struct *metadata_wq;
 };
 
-static unsigned sectors_to_pages(unsigned sectors)
-{
-	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
-}
-
 static int alloc_area(struct pstore *ps)
 {
 	int r = -ENOMEM;
@@ -318,8 +313,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 		chunk_size_supplied = 0;
 	}
 
-	ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
-							     chunk_size));
+	ps->io_client = dm_io_client_create();
 	if (IS_ERR(ps->io_client))
 		return PTR_ERR(ps->io_client);
 
@@ -368,11 +362,6 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 		return r;
 	}
 
-	r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
-				ps->io_client);
-	if (r)
-		return r;
-
 	r = alloc_area(ps);
 	return r;
 
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index 5c9186b93fff..f4b0aa3126f5 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -69,8 +69,7 @@ struct dm_io_request {
  *
  * Create/destroy may block.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages);
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client);
+struct dm_io_client *dm_io_client_create(void);
 void dm_io_client_destroy(struct dm_io_client *client);
 
 /*
-- 
cgit v1.2.3


From 5f43ba2950414dc0abf4ac44c397d88069056746 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 29 May 2011 13:03:11 +0100
Subject: dm kcopyd: reserve fewer pages

Reserve just the minimum of pages needed to process one job.

Because we allocate pages from page allocator, we don't need to reserve
a large number of pages.  The maximum job size is SUB_JOB_SIZE and we
calculate the number of reserved pages based on this.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-kcopyd.c    | 6 +++---
 drivers/md/dm-raid1.c     | 3 +--
 drivers/md/dm-snap.c      | 7 +------
 include/linux/dm-kcopyd.h | 3 +--
 4 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 719693340d1d..579647f8b4d2 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -30,6 +30,7 @@
 #define SUB_JOB_SIZE	128
 #define SPLIT_COUNT	8
 #define MIN_JOBS	8
+#define RESERVE_PAGES	(DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
 
 /*-----------------------------------------------------------------
  * Each kcopyd client has its own little pool of preallocated
@@ -636,8 +637,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-int dm_kcopyd_client_create(unsigned min_pages,
-			    struct dm_kcopyd_client **result)
+int dm_kcopyd_client_create(struct dm_kcopyd_client **result)
 {
 	int r = -ENOMEM;
 	struct dm_kcopyd_client *kc;
@@ -663,7 +663,7 @@ int dm_kcopyd_client_create(unsigned min_pages,
 
 	kc->pages = NULL;
 	kc->nr_reserved_pages = kc->nr_free_pages = 0;
-	r = client_reserve_pages(kc, min_pages);
+	r = client_reserve_pages(kc, RESERVE_PAGES);
 	if (r)
 		goto bad_client_pages;
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 53089aa11387..9defad045418 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -22,7 +22,6 @@
 #define DM_MSG_PREFIX "raid1"
 
 #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
-#define DM_KCOPYD_PAGES 64
 
 #define DM_RAID1_HANDLE_ERRORS 0x01
 #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
@@ -1116,7 +1115,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto err_destroy_wq;
 	}
 
-	r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client);
+	r = dm_kcopyd_client_create(&ms->kcopyd_client);
 	if (r)
 		goto err_destroy_wq;
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a2d330942cb2..5a2296de84a3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -39,11 +39,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
  */
 #define SNAPSHOT_COPY_PRIORITY 2
 
-/*
- * Reserve 1MB for each snapshot initially (with minimum of 1 page).
- */
-#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
-
 /*
  * The size of the mempool used to track chunks in use.
  */
@@ -1116,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_hash_tables;
 	}
 
-	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
+	r = dm_kcopyd_client_create(&s->kcopyd_client);
 	if (r) {
 		ti->error = "Could not create kcopyd client";
 		goto bad_kcopyd;
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 5db216311695..312513d4741a 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -25,8 +25,7 @@
  * To use kcopyd you must first create a dm_kcopyd_client object.
  */
 struct dm_kcopyd_client;
-int dm_kcopyd_client_create(unsigned num_pages,
-			    struct dm_kcopyd_client **result);
+int dm_kcopyd_client_create(struct dm_kcopyd_client **result);
 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 
 /*
-- 
cgit v1.2.3


From fa34ce73072f90ecd90dcc43f29d82e70e5f8676 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 29 May 2011 13:03:13 +0100
Subject: dm kcopyd: return client directly and not through a pointer

Return client directly from dm_kcopyd_client_create, not through a
parameter, making it consistent with dm_io_client_create.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-kcopyd.c    | 9 ++++-----
 drivers/md/dm-raid1.c     | 6 ++++--
 drivers/md/dm-snap.c      | 5 +++--
 include/linux/dm-kcopyd.h | 2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 579647f8b4d2..819e37eaaeba 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -637,14 +637,14 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-int dm_kcopyd_client_create(struct dm_kcopyd_client **result)
+struct dm_kcopyd_client *dm_kcopyd_client_create(void)
 {
 	int r = -ENOMEM;
 	struct dm_kcopyd_client *kc;
 
 	kc = kmalloc(sizeof(*kc), GFP_KERNEL);
 	if (!kc)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&kc->job_lock);
 	INIT_LIST_HEAD(&kc->complete_jobs);
@@ -676,8 +676,7 @@ int dm_kcopyd_client_create(struct dm_kcopyd_client **result)
 	init_waitqueue_head(&kc->destroyq);
 	atomic_set(&kc->nr_jobs, 0);
 
-	*result = kc;
-	return 0;
+	return kc;
 
 bad_io_client:
 	client_free_pages(kc);
@@ -688,7 +687,7 @@ bad_workqueue:
 bad_slab:
 	kfree(kc);
 
-	return r;
+	return ERR_PTR(r);
 }
 EXPORT_SYMBOL(dm_kcopyd_client_create);
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9defad045418..9bfd057be686 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1115,9 +1115,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto err_destroy_wq;
 	}
 
-	r = dm_kcopyd_client_create(&ms->kcopyd_client);
-	if (r)
+	ms->kcopyd_client = dm_kcopyd_client_create();
+	if (IS_ERR(ms->kcopyd_client)) {
+		r = PTR_ERR(ms->kcopyd_client);
 		goto err_destroy_wq;
+	}
 
 	wakeup_mirrord(ms);
 	return 0;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5a2296de84a3..9ecff5f3023a 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1111,8 +1111,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_hash_tables;
 	}
 
-	r = dm_kcopyd_client_create(&s->kcopyd_client);
-	if (r) {
+	s->kcopyd_client = dm_kcopyd_client_create();
+	if (IS_ERR(s->kcopyd_client)) {
+		r = PTR_ERR(s->kcopyd_client);
 		ti->error = "Could not create kcopyd client";
 		goto bad_kcopyd;
 	}
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 312513d4741a..298d587e349b 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -25,7 +25,7 @@
  * To use kcopyd you must first create a dm_kcopyd_client object.
  */
 struct dm_kcopyd_client;
-int dm_kcopyd_client_create(struct dm_kcopyd_client **result);
+struct dm_kcopyd_client *dm_kcopyd_client_create(void);
 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 
 /*
-- 
cgit v1.2.3


From f7da7a129d57bfe0f74573dc03531c63e1360fae Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Thu, 19 May 2011 14:16:47 -0400
Subject: SUNRPC: introduce xdr_init_decode_pages

Initialize xdr_stream and xdr_buf using an array of page pointers
and length of buffer.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/dir.c               |  9 ++-------
 fs/nfs/nfs4filelayout.c    |  9 ++-------
 fs/nfs/nfs4filelayoutdev.c |  9 ++-------
 include/linux/sunrpc/xdr.h |  2 ++
 net/sunrpc/xdr.c           | 19 +++++++++++++++++++
 5 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..f673a9e1d95d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 				struct page **xdr_pages, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages = xdr_pages,
-		.page_len = buflen,
-		.buflen = buflen,
-		.len = buflen,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 	struct nfs_cache_array *array;
 	unsigned int count = 0;
@@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	if (scratch == NULL)
 		return -ENOMEM;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 501a9b86b318..33bda24e8cd2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -510,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages =  lgr->layoutp->pages,
-		.page_len =  lgr->layoutp->len,
-		.buflen =  lgr->layoutp->len,
-		.len = lgr->layoutp->len,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 	__be32 *p;
 	uint32_t nfl_util;
@@ -527,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 	if (!scratch)
 		return -ENOMEM;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 5914659c8ec5..3b7bf1377264 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -308,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 	u8 max_stripe_index;
 	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 	struct xdr_stream stream;
-	struct xdr_buf buf = {
-		.pages = pdev->pages,
-		.page_len = pdev->pglen,
-		.buflen = pdev->pglen,
-		.len = pdev->pglen,
-	};
+	struct xdr_buf buf;
 	struct page *scratch;
 
 	/* set up xdr stream */
@@ -321,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 	if (!scratch)
 		goto out_err;
 
-	xdr_init_decode(&stream, &buf, NULL);
+	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* Get the stripe count (number of stripe index) */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index fc84b7a19ca3..a20970ef9e4e 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+		struct page **pages, unsigned int len);
 extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 679cd674b81d..f008c14ad34c 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
+/**
+ * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer from which to decode data
+ * @pages: list of pages to decode into
+ * @len: length in bytes of buffer in pages
+ */
+void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+			   struct page **pages, unsigned int len)
+{
+	memset(buf, 0, sizeof(*buf));
+	buf->pages =  pages;
+	buf->page_len =  len;
+	buf->buflen =  len;
+	buf->len = len;
+	xdr_init_decode(xdr, buf, NULL);
+}
+EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+
 static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p = xdr->p;
-- 
cgit v1.2.3


From 38b7c401f6ade50543f246c4bc2c971edf2b19dd Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Sun, 22 May 2011 19:49:32 +0300
Subject: pnfs-obj: pnfs_osd XDR definitions

* Add the pnfs_osd_xdr.h header

* defintions the pnfs_osd_layout structure including all it's
  sub-types and constants.
* Declare the pnfs_osd_xdr_decode_layout API + all needed
  inline helpers.

* Define the pnfs_osd_deviceaddr structure and all its subtypes and
  constants.
* Declare API for decoding of a pnfs_osd_deviceaddr from XDR stream.

* Define the pnfs_osd_ioerr structure, its substructures and constants.
* Declare API for encoding of a pnfs_osd_ioerr into XDR stream.

* Define the pnfs_osd_layoutupdate structure and its substructures.
* Declare API for encoding of a pnfs_osd_layoutupdate into XDR stream.

[Remove server definitions]
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/pnfs_osd_xdr.h | 345 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 include/linux/pnfs_osd_xdr.h

(limited to 'include/linux')

diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
new file mode 100644
index 000000000000..76efbdd01622
--- /dev/null
+++ b/include/linux/pnfs_osd_xdr.h
@@ -0,0 +1,345 @@
+/*
+ *  pNFS-osd on-the-wire data structures
+ *
+ *  Copyright (C) 2007 Panasas Inc. [year of first publication]
+ *  All rights reserved.
+ *
+ *  Benny Halevy <bhalevy@panasas.com>
+ *  Boaz Harrosh <bharrosh@panasas.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  See the file COPYING included with this distribution for more details.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the Panasas company nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __PNFS_OSD_XDR_H__
+#define __PNFS_OSD_XDR_H__
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <scsi/osd_protocol.h>
+
+#define PNFS_OSD_OSDNAME_MAXSIZE 256
+
+/*
+ * draft-ietf-nfsv4-minorversion-22
+ * draft-ietf-nfsv4-pnfs-obj-12
+ */
+
+/* Layout Structure */
+
+enum pnfs_osd_raid_algorithm4 {
+	PNFS_OSD_RAID_0		= 1,
+	PNFS_OSD_RAID_4		= 2,
+	PNFS_OSD_RAID_5		= 3,
+	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
+};
+
+/*   struct pnfs_osd_data_map4 {
+ *       uint32_t                    odm_num_comps;
+ *       length4                     odm_stripe_unit;
+ *       uint32_t                    odm_group_width;
+ *       uint32_t                    odm_group_depth;
+ *       uint32_t                    odm_mirror_cnt;
+ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
+ *   };
+ */
+struct pnfs_osd_data_map {
+	u32	odm_num_comps;
+	u64	odm_stripe_unit;
+	u32	odm_group_width;
+	u32	odm_group_depth;
+	u32	odm_mirror_cnt;
+	u32	odm_raid_algorithm;
+};
+
+/*   struct pnfs_osd_objid4 {
+ *       deviceid4       oid_device_id;
+ *       uint64_t        oid_partition_id;
+ *       uint64_t        oid_object_id;
+ *   };
+ */
+struct pnfs_osd_objid {
+	struct nfs4_deviceid	oid_device_id;
+	u64			oid_partition_id;
+	u64			oid_object_id;
+};
+
+/* For printout. I use:
+ * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer));
+ * BE style
+ */
+#define _DEVID_LO(oid_device_id) \
+	(unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data)
+
+#define _DEVID_HI(oid_device_id) \
+	(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
+
+static inline int
+pnfs_osd_objid_xdr_sz(void)
+{
+	return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
+}
+
+enum pnfs_osd_version {
+	PNFS_OSD_MISSING              = 0,
+	PNFS_OSD_VERSION_1            = 1,
+	PNFS_OSD_VERSION_2            = 2
+};
+
+struct pnfs_osd_opaque_cred {
+	u32 cred_len;
+	void *cred;
+};
+
+enum pnfs_osd_cap_key_sec {
+	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
+	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
+};
+
+/*   struct pnfs_osd_object_cred4 {
+ *       pnfs_osd_objid4         oc_object_id;
+ *       pnfs_osd_version4       oc_osd_version;
+ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
+ *       opaque                  oc_capability_key<>;
+ *       opaque                  oc_capability<>;
+ *   };
+ */
+struct pnfs_osd_object_cred {
+	struct pnfs_osd_objid		oc_object_id;
+	u32				oc_osd_version;
+	u32				oc_cap_key_sec;
+	struct pnfs_osd_opaque_cred	oc_cap_key;
+	struct pnfs_osd_opaque_cred	oc_cap;
+};
+
+/*   struct pnfs_osd_layout4 {
+ *       pnfs_osd_data_map4      olo_map;
+ *       uint32_t                olo_comps_index;
+ *       pnfs_osd_object_cred4   olo_components<>;
+ *   };
+ */
+struct pnfs_osd_layout {
+	struct pnfs_osd_data_map	olo_map;
+	u32				olo_comps_index;
+	u32				olo_num_comps;
+	struct pnfs_osd_object_cred	*olo_comps;
+};
+
+/* Device Address */
+enum pnfs_osd_targetid_type {
+	OBJ_TARGET_ANON = 1,
+	OBJ_TARGET_SCSI_NAME = 2,
+	OBJ_TARGET_SCSI_DEVICE_ID = 3,
+};
+
+/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
+ *       case OBJ_TARGET_SCSI_NAME:
+ *           string              oti_scsi_name<>;
+ *
+ *       case OBJ_TARGET_SCSI_DEVICE_ID:
+ *           opaque              oti_scsi_device_id<>;
+ *
+ *       default:
+ *           void;
+ *   };
+ *
+ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
+ *       case TRUE:
+ *           netaddr4            ota_netaddr;
+ *       case FALSE:
+ *           void;
+ *   };
+ *
+ *   struct pnfs_osd_deviceaddr4 {
+ *       pnfs_osd_targetid4      oda_targetid;
+ *       pnfs_osd_targetaddr4    oda_targetaddr;
+ *       uint64_t                oda_lun;
+ *       opaque                  oda_systemid<>;
+ *       pnfs_osd_object_cred4   oda_root_obj_cred;
+ *       opaque                  oda_osdname<>;
+ *   };
+ */
+struct pnfs_osd_targetid {
+	u32				oti_type;
+	struct nfs4_string		oti_scsi_device_id;
+};
+
+enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
+
+/*   struct netaddr4 {
+ *       // see struct rpcb in RFC1833
+ *       string r_netid<>;    // network id
+ *       string r_addr<>;     // universal address
+ *   };
+ */
+struct pnfs_osd_net_addr {
+	struct nfs4_string	r_netid;
+	struct nfs4_string	r_addr;
+};
+
+struct pnfs_osd_targetaddr {
+	u32				ota_available;
+	struct pnfs_osd_net_addr	ota_netaddr;
+};
+
+enum {
+	NETWORK_ID_MAX = 16 / 4,
+	UNIVERSAL_ADDRESS_MAX = 64 / 4,
+	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
+};
+
+struct pnfs_osd_deviceaddr {
+	struct pnfs_osd_targetid	oda_targetid;
+	struct pnfs_osd_targetaddr	oda_targetaddr;
+	u8				oda_lun[8];
+	struct nfs4_string		oda_systemid;
+	struct pnfs_osd_object_cred	oda_root_obj_cred;
+	struct nfs4_string		oda_osdname;
+};
+
+enum {
+	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
+	PNFS_OSD_DEVICEADDR_MAX =
+		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
+		2 /*oda_lun*/ +
+		1 + OSD_SYSTEMID_LEN +
+		1 + ODA_OSDNAME_MAX,
+};
+
+/* LAYOUTCOMMIT: layoutupdate */
+
+/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
+ *       case TRUE:
+ *           int64_t     dsu_delta;
+ *       case FALSE:
+ *           void;
+ *   };
+ *
+ *   struct pnfs_osd_layoutupdate4 {
+ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
+ *       bool                        olu_ioerr_flag;
+ *   };
+ */
+struct pnfs_osd_layoutupdate {
+	u32	dsu_valid;
+	s64	dsu_delta;
+	u32	olu_ioerr_flag;
+};
+
+/* LAYOUTRETURN: I/O Rrror Report */
+
+enum pnfs_osd_errno {
+	PNFS_OSD_ERR_EIO		= 1,
+	PNFS_OSD_ERR_NOT_FOUND		= 2,
+	PNFS_OSD_ERR_NO_SPACE		= 3,
+	PNFS_OSD_ERR_BAD_CRED		= 4,
+	PNFS_OSD_ERR_NO_ACCESS		= 5,
+	PNFS_OSD_ERR_UNREACHABLE	= 6,
+	PNFS_OSD_ERR_RESOURCE		= 7
+};
+
+/*   struct pnfs_osd_ioerr4 {
+ *       pnfs_osd_objid4     oer_component;
+ *       length4             oer_comp_offset;
+ *       length4             oer_comp_length;
+ *       bool                oer_iswrite;
+ *       pnfs_osd_errno4     oer_errno;
+ *   };
+ */
+struct pnfs_osd_ioerr {
+	struct pnfs_osd_objid	oer_component;
+	u64			oer_comp_offset;
+	u64			oer_comp_length;
+	u32			oer_iswrite;
+	u32			oer_errno;
+};
+
+/* OSD XDR API */
+/* Layout helpers */
+/* Layout decoding is done in two parts:
+ * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
+ *    of the layout. @iter members need not be initialized.
+ *    Returned:
+ *             @layout members are set. (@layout->olo_comps set to NULL).
+ *
+ *             Zero on success, or negative error if passed xdr is broken.
+ *
+ * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns
+ *    false, to decode the next component.
+ *    Returned:
+ *       true if there is more to decode or false if we are done or error.
+ *
+ * Example:
+ *	struct pnfs_osd_xdr_decode_layout_iter iter;
+ *	struct pnfs_osd_layout layout;
+ *	struct pnfs_osd_object_cred comp;
+ *	int status;
+ *
+ *	status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
+ *	if (unlikely(status))
+ *		goto err;
+ *	while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) {
+ *		// All of @comp strings point to inside the xdr_buffer
+ *		// or scrach buffer. Copy them out to user memory eg.
+ *		copy_single_comp(dest_comp++, &comp);
+ *	}
+ *	if (unlikely(status))
+ *		goto err;
+ */
+
+struct pnfs_osd_xdr_decode_layout_iter {
+	unsigned total_comps;
+	unsigned decoded_comps;
+};
+
+extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr);
+
+extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
+	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
+	int *err);
+
+/* Device Info helpers */
+
+/* Note: All strings inside @deviceaddr point to space inside @p.
+ * @p should stay valid while @deviceaddr is in use.
+ */
+extern void pnfs_osd_xdr_decode_deviceaddr(
+	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p);
+
+/* layoutupdate (layout_commit) xdr helpers */
+extern int
+pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
+				 struct pnfs_osd_layoutupdate *lou);
+
+/* osd_ioerror encoding/decoding (layout_return) */
+/* Client */
+extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
+extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
+
+#endif /* __PNFS_OSD_XDR_H__ */
-- 
cgit v1.2.3


From d20581aa4be11407c9eeeb75992df5ef176bba0f Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Sun, 22 May 2011 19:52:03 +0300
Subject: pnfs: support for non-rpc layout drivers

Non-rpc layout driver such as for objects and blocks
implement their own I/O path and error handling logic.
Therefore bypass NFS-based error handling for these layout drivers.

[fix lseg ref-count bugs, and null de-refs]
[Fall out from: non-rpc layout drivers]
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
[get rid of PNFS_USE_RPC_CODE]
[get rid of __nfs4_write_done_cb]
[revert useless change in nfs4_write_done_cb]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/internal.h       |  1 +
 fs/nfs/nfs4proc.c       | 13 ++++++++++---
 fs/nfs/pnfs.c           | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/pnfs.h           |  2 ++
 include/linux/nfs_xdr.h |  2 ++
 5 files changed, 62 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..bcf0f0ff5eeb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *,
 #endif
 
 /* nfs4proc.c */
+extern void __nfs4_read_done_cb(struct nfs_read_data *);
 extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
 extern int nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf1b339c3937..92c8bc4b5f97 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3175,6 +3175,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
+void __nfs4_read_done_cb(struct nfs_read_data *data)
+{
+	nfs_invalidate_atime(data->inode);
+}
+
 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
@@ -3184,7 +3189,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 		return -EAGAIN;
 	}
 
-	nfs_invalidate_atime(data->inode);
+	__nfs4_read_done_cb(data);
 	if (task->tk_status > 0)
 		renew_lease(server, data->timestamp);
 	return 0;
@@ -3198,7 +3203,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
 
-	return data->read_done_cb(task, data);
+	return data->read_done_cb ? data->read_done_cb(task, data) :
+				    nfs4_read_done_cb(task, data);
 }
 
 static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
@@ -3243,7 +3249,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
-	return data->write_done_cb(task, data);
+	return data->write_done_cb ? data->write_done_cb(task, data) :
+		nfs4_write_done_cb(task, data);
 }
 
 /* Reset the the nfs_write_data to send the write to the MDS. */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ef535f2a2c74..171662114fdd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -243,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg)
 {
 	struct inode *inode = lseg->pls_layout->plh_inode;
 
-	BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
 	list_del_init(&lseg->pls_list);
 	if (list_empty(&lseg->pls_layout->plh_segs)) {
 		set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
@@ -1054,6 +1054,29 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 	pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
 }
 
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_write_done(struct nfs_write_data *data)
+{
+	int status;
+
+	if (!data->pnfs_error) {
+		pnfs_set_layoutcommit(data);
+		data->mds_ops->rpc_call_done(&data->task, data);
+		data->mds_ops->rpc_release(data);
+		return 0;
+	}
+
+	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+		data->pnfs_error);
+	status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
+				    data->mds_ops, NFS_FILE_SYNC);
+	return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
+
 enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
 			const struct rpc_call_ops *call_ops, int how)
@@ -1078,6 +1101,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 	return trypnfs;
 }
 
+/*
+ * Called by non rpc-based layout drivers
+ */
+int
+pnfs_ld_read_done(struct nfs_read_data *data)
+{
+	int status;
+
+	if (!data->pnfs_error) {
+		__nfs4_read_done_cb(data);
+		data->mds_ops->rpc_call_done(&data->task, data);
+		data->mds_ops->rpc_release(data);
+		return 0;
+	}
+
+	dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
+		data->pnfs_error);
+	status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
+				   data->mds_ops);
+	return status ? : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 925d6ef8ba79..0383e66e71f0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -165,6 +165,8 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int pnfs_ld_write_done(struct nfs_write_data *);
+int pnfs_ld_read_done(struct nfs_read_data *);
 
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7e371f7df9c4..7c8ff0984a84 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1087,6 +1087,7 @@ struct nfs_read_data {
 	const struct rpc_call_ops *mds_ops;
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
+	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
@@ -1112,6 +1113,7 @@ struct nfs_write_data {
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
 	__u64			mds_offset;	/* Filelayout dense stripe */
+	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
-- 
cgit v1.2.3


From cbe8260369c9f88eafa035cd327dc3e02fad528c Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Sun, 22 May 2011 19:52:37 +0300
Subject: pnfs: layoutreturn

NFSv4.1 LAYOUTRETURN implementation

Currently, does not support layout-type payload encoding.

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
[call pnfs_return_layout right before pnfs_destroy_layout]
[remove assert_spin_locked from pnfs_clear_lseg_list]
[remove wait parameter from the layoutreturn path.]
[remove return_type field from nfs4_layoutreturn_args]
[remove range from nfs4_layoutreturn_args]
[no need to send layoutcommit from _pnfs_return_layout]
[don't wait on sync layoutreturn]
[fix layout stateid in layoutreturn args]
[fixed NULL deref in _pnfs_return_layout]
[removed recaim member of nfs4_layoutreturn_args]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/inode.c          |   3 +-
 fs/nfs/nfs4proc.c       |  82 +++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        | 111 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/pnfs.c           |  45 ++++++++++++++++++++
 fs/nfs/pnfs.h           |  18 ++++++++
 include/linux/nfs4.h    |   1 +
 include/linux/nfs_xdr.h |  21 +++++++++
 7 files changed, 274 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 57bb31ad7a5e..e9c6d9f8f7e8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
 void nfs4_evict_inode(struct inode *inode)
 {
-	pnfs_destroy_layout(NFS_I(inode));
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
+	pnfs_return_layout(inode);
+	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92c8bc4b5f97..5b4124e4c22f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5673,6 +5673,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
 	return status;
 }
 
+static void
+nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
+				&lrp->res.seq_res, 0, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+	struct nfs_server *server;
+
+	dprintk("--> %s\n", __func__);
+
+	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
+		return;
+
+	server = NFS_SERVER(lrp->args.inode);
+	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		nfs_restart_rpc(task, lrp->clp);
+		return;
+	}
+	if (task->tk_status == 0) {
+		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+
+		if (lrp->res.lrs_present) {
+			spin_lock(&lo->plh_inode->i_lock);
+			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+			spin_unlock(&lo->plh_inode->i_lock);
+		} else
+			BUG_ON(!list_empty(&lo->plh_segs));
+	}
+	dprintk("<-- %s\n", __func__);
+}
+
+static void nfs4_layoutreturn_release(void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+	kfree(calldata);
+	dprintk("<-- %s\n", __func__);
+}
+
+static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
+	.rpc_call_prepare = nfs4_layoutreturn_prepare,
+	.rpc_call_done = nfs4_layoutreturn_done,
+	.rpc_release = nfs4_layoutreturn_release,
+};
+
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
+		.rpc_argp = &lrp->args,
+		.rpc_resp = &lrp->res,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = lrp->clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_layoutreturn_call_ops,
+		.callback_data = lrp,
+	};
+	int status;
+
+	dprintk("--> %s\n", __func__);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = task->tk_status;
+	dprintk("<-- %s status=%d\n", __func__, status);
+	rpc_put_task(task);
+	return status;
+}
+
 static int
 _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c3ccd2c46834..f24212064356 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int);
 				1 /* layoutupdate4 layout type */ + \
 				1 /* NULL filelayout layoutupdate4 payload */)
 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
-
+#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
+				encode_stateid_maxsz + \
+				1 /* FIXME: opaque lrf_body always empty at the moment */)
+#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
+				1 + decode_stateid_maxsz)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int);
 				decode_putfh_maxsz + \
 				decode_layoutcommit_maxsz + \
 				decode_getattr_maxsz)
-
+#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_layoutreturn_maxsz)
+#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_layoutreturn_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1889,6 +1900,31 @@ encode_layoutcommit(struct xdr_stream *xdr,
 	hdr->replen += decode_layoutcommit_maxsz;
 	return 0;
 }
+
+static void
+encode_layoutreturn(struct xdr_stream *xdr,
+		    const struct nfs4_layoutreturn_args *args,
+		    struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 20);
+	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
+	*p++ = cpu_to_be32(0);		/* reclaim. always 0 for now */
+	*p++ = cpu_to_be32(args->layout_type);
+	*p++ = cpu_to_be32(IOMODE_ANY);
+	*p = cpu_to_be32(RETURN_FILE);
+	p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
+	p = xdr_encode_hyper(p, 0);
+	p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
+	spin_lock(&args->inode->i_lock);
+	xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+	spin_unlock(&args->inode->i_lock);
+	p = reserve_space(xdr, 4);
+	*p = cpu_to_be32(0);
+	hdr->nops++;
+	hdr->replen += decode_layoutreturn_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2706,9 +2742,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 /*
  *  Encode LAYOUTCOMMIT request
  */
-static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
-				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_args *args)
+static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutcommit_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2720,7 +2756,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
 	encode_layoutcommit(xdr, args, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
+}
+
+/*
+ * Encode LAYOUTRETURN request
+ */
+static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutreturn_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutreturn(xdr, args, &hdr);
+	encode_nops(&hdr);
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -5203,6 +5256,27 @@ out_overflow:
 	return -EIO;
 }
 
+static int decode_layoutreturn(struct xdr_stream *xdr,
+			       struct nfs4_layoutreturn_res *res)
+{
+	__be32 *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
+	if (status)
+		return status;
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->lrs_present = be32_to_cpup(p);
+	if (res->lrs_present)
+		status = decode_stateid(xdr, &res->stateid);
+	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_layoutcommit(struct xdr_stream *xdr,
 			       struct rpc_rqst *req,
 			       struct nfs4_layoutcommit_res *res)
@@ -6319,6 +6393,30 @@ out:
 	return status;
 }
 
+/*
+ * Decode LAYOUTRETURN response
+ */
+static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs4_layoutreturn_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_layoutreturn(xdr, res);
+out:
+	return status;
+}
+
 /*
  * Decode LAYOUTCOMMIT response
  */
@@ -6547,6 +6645,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
 	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
 	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
+	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 171662114fdd..00b128241746 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -619,6 +619,51 @@ out_err_free:
 	return NULL;
 }
 
+/* Initiates a LAYOUTRETURN(FILE) */
+int
+_pnfs_return_layout(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo = NULL;
+	struct nfs_inode *nfsi = NFS_I(ino);
+	LIST_HEAD(tmp_list);
+	struct nfs4_layoutreturn *lrp;
+	nfs4_stateid stateid;
+	int status = 0;
+
+	dprintk("--> %s\n", __func__);
+
+	spin_lock(&ino->i_lock);
+	lo = nfsi->layout;
+	if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
+		spin_unlock(&ino->i_lock);
+		dprintk("%s: no layout segments to return\n", __func__);
+		goto out;
+	}
+	stateid = nfsi->layout->plh_stateid;
+	/* Reference matched in nfs4_layoutreturn_release */
+	get_layout_hdr(lo);
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+
+	WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
+
+	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+	if (unlikely(lrp == NULL)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
+	lrp->args.stateid = stateid;
+	lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
+	lrp->args.inode = ino;
+	lrp->clp = NFS_SERVER(ino)->nfs_client;
+
+	status = nfs4_proc_layoutreturn(lrp);
+out:
+	dprintk("<-- %s status: %d\n", __func__, status);
+	return status;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 0383e66e71f0..c34f7a0e3bc2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -129,6 +129,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev);
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -165,6 +166,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int _pnfs_return_layout(struct inode *);
 int pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
 
@@ -256,6 +258,17 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
 		put_lseg(req->wb_commit_lseg);
 }
 
+static inline int pnfs_return_layout(struct inode *ino)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct nfs_server *nfss = NFS_SERVER(ino);
+
+	if (pnfs_enabled_sb(nfss) && nfsi->layout)
+		return _pnfs_return_layout(ino);
+
+	return 0;
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -298,6 +311,11 @@ pnfs_try_to_write_data(struct nfs_write_data *data,
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline int pnfs_return_layout(struct inode *ino)
+{
+	return 0;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 178fafe0ff93..9376eaf26e15 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -562,6 +562,7 @@ enum {
 	NFSPROC4_CLNT_LAYOUTGET,
 	NFSPROC4_CLNT_GETDEVICEINFO,
 	NFSPROC4_CLNT_LAYOUTCOMMIT,
+	NFSPROC4_CLNT_LAYOUTRETURN,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7c8ff0984a84..5e8444a11adf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data {
 	struct nfs4_layoutcommit_res res;
 };
 
+struct nfs4_layoutreturn_args {
+	__u32   layout_type;
+	struct inode *inode;
+	nfs4_stateid stateid;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_layoutreturn_res {
+	struct nfs4_sequence_res seq_res;
+	u32 lrs_present;
+	nfs4_stateid stateid;
+};
+
+struct nfs4_layoutreturn {
+	struct nfs4_layoutreturn_args args;
+	struct nfs4_layoutreturn_res res;
+	struct rpc_cred *cred;
+	struct nfs_client *clp;
+	int rpc_status;
+};
+
 /*
  * Arguments to the open call.
  */
-- 
cgit v1.2.3


From 18ad0a9f2ccd260d37dd6bc5fa04c7819def4c84 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 25 May 2011 21:03:56 +0300
Subject: NFSv4.1: change pg_test return type to bool

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/nfs4filelayout.c      |  6 +++---
 fs/nfs/objlayout/objio_osd.c |  7 +++----
 fs/nfs/pagelist.c            | 22 +++++++++++-----------
 fs/nfs/pnfs.c                |  4 ++--
 fs/nfs/pnfs.h                |  4 ++--
 include/linux/nfs_page.h     |  2 +-
 6 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 33bda24e8cd2..24f05720daf1 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -651,10 +651,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return 1 :  coalesce page
- * return 0 :  don't coalesce page
+ * return true  : coalesce page
+ * return false : don't coalesce page
  */
-int
+bool
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 3be124160e97..8c2bd3eb8e8d 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -991,14 +991,13 @@ ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
 /*
  * objlayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return 1 :  coalesce page
- * return 0 :  don't coalesce page
+ * return true iff coalesce page
  */
-int
+bool
 objlayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
-	return 1;
+	return true;
 }
 
 static struct pnfs_layoutdriver_type objlayout_type = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b8704fedcd16..5344371a257c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -244,29 +244,29 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
  *
  * Return 'true' if this is the case, else return 'false'.
  */
-static int nfs_can_coalesce_requests(struct nfs_page *prev,
-				     struct nfs_page *req,
-				     struct nfs_pageio_descriptor *pgio)
+static bool nfs_can_coalesce_requests(struct nfs_page *prev,
+				      struct nfs_page *req,
+				      struct nfs_pageio_descriptor *pgio)
 {
 	if (req->wb_context->cred != prev->wb_context->cred)
-		return 0;
+		return false;
 	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
-		return 0;
+		return false;
 	if (req->wb_context->state != prev->wb_context->state)
-		return 0;
+		return false;
 	if (req->wb_index != (prev->wb_index + 1))
-		return 0;
+		return false;
 	if (req->wb_pgbase != 0)
-		return 0;
+		return false;
 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
-		return 0;
+		return false;
 	/*
 	 * Non-whole file layouts need to check that req is inside of
 	 * pgio->pg_lseg.
 	 */
 	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 /**
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 568ab0eef677..212fc292761a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1043,7 +1043,7 @@ out_forget_reply:
 	goto out;
 }
 
-int
+bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
 {
@@ -1070,7 +1070,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	} else if (pgio->pg_lseg &&
 		   req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
 						pgio->pg_lseg->pls_range.length))
-		return 0;
+		return false;
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c056688ee92b..65daae59c8ae 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,7 @@ struct pnfs_layoutdriver_type {
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
-	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 
 	/* Returns true if layoutdriver wants to divert this request to
 	 * driver's commit routine.
@@ -158,7 +158,7 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
 					     const struct rpc_call_ops *, int);
 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
 					    const struct rpc_call_ops *);
-int pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 91af2e49fa3a..3a34e80ae92f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,7 +68,7 @@ struct nfs_pageio_descriptor {
 	int 			pg_ioflags;
 	int			pg_error;
 	struct pnfs_layout_segment *pg_lseg;
-	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	bool			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
-- 
cgit v1.2.3


From 6345d24daf0c1fffe6642081d783cdf653ebaa5c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 29 May 2011 11:32:28 -0700
Subject: mm: Fix boot crash in mm_alloc()

Thomas Gleixner reports that we now have a boot crash triggered by
CONFIG_CPUMASK_OFFSTACK=y:

    BUG: unable to handle kernel NULL pointer dereference at   (null)
    IP: [<c11ae035>] find_next_bit+0x55/0xb0
    Call Trace:
     [<c11addda>] cpumask_any_but+0x2a/0x70
     [<c102396b>] flush_tlb_mm+0x2b/0x80
     [<c1022705>] pud_populate+0x35/0x50
     [<c10227ba>] pgd_alloc+0x9a/0xf0
     [<c103a3fc>] mm_init+0xec/0x120
     [<c103a7a3>] mm_alloc+0x53/0xd0

which was introduced by commit de03c72cfce5 ("mm: convert
mm->cpu_vm_cpumask into cpumask_var_t"), and is due to wrong ordering of
mm_init() vs mm_init_cpumask

Thomas wrote a patch to just fix the ordering of initialization, but I
hate the new double allocation in the fork path, so I ended up instead
doing some more radical surgery to clean it all up.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Ingo Molnar <mingo@elte.hu>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++++--
 include/linux/sched.h    |  1 -
 init/main.c              |  2 +-
 kernel/fork.c            | 42 ++++++++++--------------------------------
 4 files changed, 23 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2a78aae78c69..027935c86c68 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -264,6 +264,8 @@ struct mm_struct {
 
 	struct linux_binfmt *binfmt;
 
+	cpumask_var_t cpu_vm_mask_var;
+
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
@@ -311,10 +313,18 @@ struct mm_struct {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
-
-	cpumask_var_t cpu_vm_mask_var;
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	struct cpumask cpumask_allocation;
+#endif
 };
 
+static inline void mm_init_cpumask(struct mm_struct *mm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
+#endif
+}
+
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bcddd0138105..2a8621c4be1e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2194,7 +2194,6 @@ static inline void mmdrop(struct mm_struct * mm)
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
-extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
diff --git a/init/main.c b/init/main.c
index d2f1e086bf33..cafba67c13bf 100644
--- a/init/main.c
+++ b/init/main.c
@@ -487,6 +487,7 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE "%s", linux_banner);
 	setup_arch(&command_line);
 	mm_init_owner(&init_mm, &init_task);
+	mm_init_cpumask(&init_mm);
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
@@ -510,7 +511,6 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	mm_init();
-	BUG_ON(mm_init_cpumask(&init_mm, 0));
 
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
diff --git a/kernel/fork.c b/kernel/fork.c
index ca406d916713..0276c30401a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
-int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
-		return -ENOMEM;
-
-	if (oldmm)
-		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
-	else
-		memset(mm_cpumask(mm), 0, cpumask_size());
-#endif
-	return 0;
-}
-
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
-	mm = mm_init(mm, current);
-	if (!mm)
-		return NULL;
-
-	if (mm_init_cpumask(mm, NULL)) {
-		mm_free_pgd(mm);
-		free_mm(mm);
-		return NULL;
-	}
-
-	return mm;
+	mm_init_cpumask(mm);
+	return mm_init(mm, current);
 }
 
 /*
@@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void)
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
-	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 		goto fail_nomem;
 
 	memcpy(mm, oldmm, sizeof(*mm));
+	mm_init_cpumask(mm);
 
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
@@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
-	if (mm_init_cpumask(mm, oldmm))
-		goto fail_nocpumask;
-
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
 
@@ -794,9 +768,6 @@ fail_nomem:
 	return NULL;
 
 fail_nocontext:
-	free_cpumask_var(mm->cpu_vm_mask_var);
-
-fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
@@ -1591,6 +1562,13 @@ void __init proc_caches_init(void)
 	fs_cachep = kmem_cache_create("fs_cache",
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+	/*
+	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
+	 * whole struct cpumask for the OFFSTACK case. We could change
+	 * this to *only* allocate as much of it as required by the
+	 * maximum number of CPU's we can ever have.  The cpumask_allocation
+	 * is at the end of the structure, exactly for that reason.
+	 */
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
-- 
cgit v1.2.3


From a1b383870a28cfbd1657d4922c0fafc634a62ebd Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 30 May 2011 11:14:13 -0600
Subject: virtio: add full three-clause BSD text to headers.

It's unclear to me if it's important, but it's obviously causing my
technical colleages some headaches and I'd hate such imprecision to
slow virtio adoption.

I've emailed this to all non-trivial contributors for approval, too.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
Acked-by: Eric Van Hensbergen <ericvh@gmail.com>
Acked-by: john cooper <john.cooper@redhat.com>
Acked-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
---
 include/linux/virtio_9p.h      | 25 ++++++++++++++++++++++++-
 include/linux/virtio_balloon.h | 25 ++++++++++++++++++++++++-
 include/linux/virtio_blk.h     | 25 ++++++++++++++++++++++++-
 include/linux/virtio_config.h  | 25 ++++++++++++++++++++++++-
 include/linux/virtio_console.h | 26 +++++++++++++++++++++++++-
 include/linux/virtio_ids.h     | 24 +++++++++++++++++++++++-
 include/linux/virtio_net.h     | 25 ++++++++++++++++++++++++-
 include/linux/virtio_pci.h     | 23 +++++++++++++++++++++++
 include/linux/virtio_ring.h    | 23 +++++++++++++++++++++++
 9 files changed, 214 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index e68b439b2860..277c4ad44e84 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index a50ecd1b81a2..652dc8bea921 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 167720d695ed..e0edb40ca7aa 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 800617b4ddd5..39c88c5ad19d 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers. */
+ * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 
 /* Virtio devices use a standardized configuration space to define their
  * features and pass configuration information, but each implementation can
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index e4d333543a33..bdf4b0034739 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -5,7 +5,31 @@
 #include <linux/virtio_config.h>
 /*
  * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers.
+ * anyone can use the definitions to implement compatible drivers/servers:
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  *
  * Copyright (C) Red Hat, Inc., 2009, 2010, 2011
  * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 06660c0a78d7..85bb0bb66ffc 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h
@@ -5,7 +5,29 @@
  *
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 
 #define VIRTIO_ID_NET		1 /* virtio net */
 #define VIRTIO_ID_BLOCK		2 /* virtio block */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 085e42298ce5..136040bba3e3 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,7 +1,30 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
 /* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index 9a3d7c48c622..ea66f3f60d63 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -11,6 +11,29 @@
  *
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index e4d144b132b5..a813e5d460eb 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -7,6 +7,29 @@
  * This header is BSD licensed so anyone can use the definitions to implement
  * compatible drivers/servers.
  *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
  * Copyright Rusty Russell IBM Corporation 2007. */
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 770b31a85e000b0194974922f238a30ade4246b6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 20 May 2011 02:10:17 +0300
Subject: virtio: event index interface

Define a new feature bit for the guest and host to utilize
an event index (like Xen) instead if a flag bit to enable/disable
interrupts and kicks.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_ring.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index a813e5d460eb..c4eef73deb3f 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -52,6 +52,12 @@
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX		29
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
 	/* Address (guest-physical). */
@@ -106,6 +112,7 @@ struct vring {
  *	__u16 avail_flags;
  *	__u16 avail_idx;
  *	__u16 available[num];
+ *	__u16 used_event_idx;
  *
  *	// Padding to the next align boundary.
  *	char pad[];
@@ -114,8 +121,14 @@ struct vring {
  *	__u16 used_flags;
  *	__u16 used_idx;
  *	struct vring_used_elem used[num];
+ *	__u16 avail_event_idx;
  * };
  */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
+
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 			      unsigned long align)
 {
@@ -130,7 +143,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
 		 + align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
 }
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From bf7035bf20563a6cadcb9e870406e7b21daf5e30 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 20 May 2011 02:10:27 +0300
Subject: virtio ring: inline function to check for events

With the new used_event and avail_event and features, both
host and guest need similar logic to check whether events are
enabled, so it helps to put the common code in the header.

Note that Xen has similar logic for notification hold-off
in include/xen/interface/io/ring.h with req_event and req_prod
corresponding to event_idx + 1 and new_idx respectively.
+1 comes from the fact that req_event and req_prod in Xen start at 1,
while event index in virtio starts at 0.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_ring.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index c4eef73deb3f..4a32cb6da425 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -146,6 +146,20 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
 		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
 }
 
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
+{
+	/* Note: Xen has similar logic for notification hold-off
+	 * in include/xen/interface/io/ring.h with req_event and req_prod
+	 * corresponding to event_idx + 1 and new_idx respectively.
+	 * Note also that req_event and req_prod in Xen start at 1,
+	 * event indexes in virtio start at 0. */
+	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
+}
+
 #ifdef __KERNEL__
 #include <linux/irqreturn.h>
 struct virtio_device;
-- 
cgit v1.2.3


From 7ab358c23cbf15cea08129cd722d1ce77433a94d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 20 May 2011 02:11:14 +0300
Subject: virtio: add api for delayed callbacks

Add an API that tells the other side that callbacks
should be delayed until a lot of work has been done.
Implement using the new event_idx feature.

Note: it might seem advantageous to let the drivers
ask for a callback after a specific capacity has
been reached. However, as a single head can
free many entries in the descriptor table,
we don't really have a clue about capacity
until get_buf is called. The API is the simplest
to implement at the moment, we'll see what kind of
hints drivers can pass when there's more than one
user of the feature.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 27 +++++++++++++++++++++++++++
 include/linux/virtio.h       |  9 +++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a5fadc40c448..68b9136847af 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -376,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
+	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
+	virtio_mb();
+	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index aff5b4f74041..710885749605 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,13 @@ struct virtqueue {
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
+ *	vq: the struct virtqueue we're talking about.
+ *	This re-enables callbacks but hints to the other side to delay
+ *	interrupts until most of the available buffers have been processed;
+ *	it returns "false" if there are many pending buffers in the queue,
+ *	to detect a possible race between the driver checking for more work,
+ *	and enabling callbacks.
  * virtqueue_detach_unused_buf: detach first unused buffer
  * 	vq: the struct virtqueue we're talking about.
  * 	Returns NULL or the "data" token handed to add_buf
@@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 /**
-- 
cgit v1.2.3


From a1706ac4c0201ea0143dc0db0659001b26ceeabb Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Mon, 30 May 2011 07:42:51 +0200
Subject: Revert "block: Remove extra discard_alignment from hd_struct."

It was not a good idea to start dereferencing disk->queue from
the fs sysfs strategy for displaying discard alignment. We ran
into first a NULL pointer deref, and after fixing that we sometimes
see unvalid disk->queue pointer values.

Since discard is the only one of the bunch actually looking into
the queue, just revert the change.

This reverts commit 23ceb5b7719e9276d4fa72a3ecf94dd396755276.

Conflicts:
	fs/partitions/check.c
---
 fs/partitions/check.c | 10 +++-------
 include/linux/genhd.h |  1 +
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f82e762eeca2..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	struct gendisk *disk = dev_to_disk(dev);
-	unsigned int alignment = 0;
-
-	if (disk->queue)
-		alignment = queue_limit_discard_alignment(&disk->queue->limits,
-								p->start_sect);
-	return sprintf(buf, "%u\n", alignment);
+	return sprintf(buf, "%u\n", p->discard_alignment);
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->start_sect = start;
 	p->alignment_offset =
 		queue_limit_alignment_offset(&disk->queue->limits, start);
+	p->discard_alignment =
+		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index b78956b3c2e7..300d7582006e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,6 +100,7 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
+	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v1.2.3


From ea9d6553b3b3044e7374774cc33bb1b2eee19dd3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 31 May 2011 13:45:53 +0200
Subject: block: remove unwanted semicolons

Since those defined functions require additional semicolon
from the caller, they could cause potential syntax errors
when used in if-else statements.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ae9091a68480..1a23722e8878 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1282,8 +1282,8 @@ queue_max_integrity_segments(struct request_queue *q)
 #define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
-#define blk_integrity_unregister(a)		do { } while (0);
-#define blk_queue_max_integrity_segments(a, b)	do { } while (0);
+#define blk_integrity_unregister(a)		do { } while (0)
+#define blk_queue_max_integrity_segments(a, b)	do { } while (0)
 #define queue_max_integrity_segments(a)		(0)
 #define blk_integrity_merge_rq(a, b, c)		(0)
 #define blk_integrity_merge_bio(a, b, c)	(0)
-- 
cgit v1.2.3


From f339b9dc1f03591761d5d930800db24bc0eda1e1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 31 May 2011 10:49:20 +0200
Subject: sched: Fix schedstat.nr_wakeups_migrate

While looking over the code I found that with the ttwu rework the
nr_wakeups_migrate test broke since we now switch cpus prior to
calling ttwu_stat(), hence the test is always true.

Cure this by passing the migration state in wake_flags. Also move the
whole test under CONFIG_SMP, its hard to migrate tasks on UP :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-pwwxl7gdqs5676f1d4cx6pj7@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8da84b7bc1b8..483c1ed5bc4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1063,6 +1063,7 @@ struct sched_domain;
  */
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
 #define WF_FORK		0x02		/* child wakeup after fork */
+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
 
 #define ENQUEUE_WAKEUP		1
 #define ENQUEUE_HEAD		2
diff --git a/kernel/sched.c b/kernel/sched.c
index 49cc70b152cf..2fe98ed474da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2447,6 +2447,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 		}
 		rcu_read_unlock();
 	}
+
+	if (wake_flags & WF_MIGRATED)
+		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+
 #endif /* CONFIG_SMP */
 
 	schedstat_inc(rq, ttwu_count);
@@ -2455,9 +2459,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	if (wake_flags & WF_SYNC)
 		schedstat_inc(p, se.statistics.nr_wakeups_sync);
 
-	if (cpu != task_cpu(p))
-		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
 #endif /* CONFIG_SCHEDSTATS */
 }
 
@@ -2675,8 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		p->sched_class->task_waking(p);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-	if (task_cpu(p) != cpu)
+	if (task_cpu(p) != cpu) {
+		wake_flags |= WF_MIGRATED;
 		set_task_cpu(p, cpu);
+	}
 #endif /* CONFIG_SMP */
 
 	ttwu_queue(p, cpu);
-- 
cgit v1.2.3


From 63da029015b5255915cd6d61f19ffc276ad4635d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 23 May 2011 11:37:09 -0700
Subject: mtd: fix physmap.h warnings

Fix build warnings in physmap.h:

include/linux/mtd/physmap.h:25: warning: 'struct platform_device' declared inside parameter list
include/linux/mtd/physmap.h:25: warning: its scope is only this definition or declaration, which is probably not what you want
include/linux/mtd/physmap.h:26: warning: 'struct platform_device' declared inside parameter list
include/linux/mtd/physmap.h:27: warning: 'struct platform_device' declared inside parameter list

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/physmap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index d40bfa1d9c91..e5f21d293c70 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -19,6 +19,7 @@
 #include <linux/mtd/partitions.h>
 
 struct map_info;
+struct platform_device;
 
 struct physmap_flash_data {
 	unsigned int		width;
-- 
cgit v1.2.3


From 6dd9a7c73761a8a5f5475d5cfdc15368a0f4c06d Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Wed, 25 May 2011 19:13:49 +0100
Subject: intel-iommu: Enable super page (2MiB, 1GiB, etc.) support

There are no externally-visible changes with this. In the loop in the
internal __domain_mapping() function, we simply detect if we are mapping:
  - size >= 2MiB, and
  - virtual address aligned to 2MiB, and
  - physical address aligned to 2MiB, and
  - on hardware that supports superpages.

(and likewise for larger superpages).

We automatically use a superpage for such mappings. We never have to
worry about *breaking* superpages, since we trust that we will always
*unmap* the same range that was mapped. So all we need to do is ensure
that dma_pte_clear_range() will also cope with superpages.

Adjust pfn_to_dma_pte() to take a superpage 'level' as an argument, so
it can return a PTE at the appropriate level rather than always
extending the page tables all the way down to level 1. Again, this is
simplified by the fact that we should never encounter existing small
pages when we're creating a mapping; any old mapping that used the same
virtual range will have been entirely removed and its obsolete page
tables freed.

Provide an 'intel_iommu=sp_off' argument on the command line as a
chicken bit. Not that it should ever be required.

==

The original commit seen in the iommu-2.6.git was Youquan's
implementation (and completion) of my own half-baked code which I'd
typed into an email. Followed by half a dozen subsequent 'fixes'.

I've taken the unusual step of rewriting history and collapsing the
original commits in order to keep the main history simpler, and make
life easier for the people who are going to have to backport this to
older kernels. And also so I can give it a more coherent commit comment
which (hopefully) gives a better explanation of what's going on.

The original sequence of commits leading to identical code was:

Youquan Song (3):
      intel-iommu: super page support
      intel-iommu: Fix superpage alignment calculation error
      intel-iommu: Fix superpage level calculation error in dma_pfn_level_pte()

David Woodhouse (4):
      intel-iommu: Precalculate superpage support for dmar_domain
      intel-iommu: Fix hardware_largepage_caps()
      intel-iommu: Fix inappropriate use of superpages in __domain_mapping()
      intel-iommu: Fix phys_pfn in __domain_mapping for sglist pages

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/kernel-parameters.txt |   5 +-
 drivers/pci/intel-iommu.c           | 157 +++++++++++++++++++++++++++++++-----
 include/linux/dma_remapping.h       |   4 +
 3 files changed, 147 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cc85a9278190..d005487c1a22 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			With this option on every unmap_single operation will
 			result in a hardware IOTLB flush operation as opposed
 			to batching them for performance.
-
+		sp_off [Default Off]
+			By default, super page will be supported if Intel IOMMU
+			has the capability. With this option, super page will
+			not be supported.
 	intremap=	[X86-64, Intel-IOMMU]
 			Format: { on (default) | off | nosid }
 			on	enable Interrupt Remapping (default)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 395f253c0494..e6fe1994f9d3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -115,6 +115,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
 	return (pfn + level_size(level) - 1) & level_mask(level);
 }
 
+static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
+{
+	return  1 << ((lvl - 1) * LEVEL_STRIDE);
+}
+
 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
    are never going to work. */
 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -343,6 +348,9 @@ struct dmar_domain {
 	int		iommu_coherency;/* indicate coherency of iommu access */
 	int		iommu_snooping; /* indicate snooping control feature*/
 	int		iommu_count;	/* reference count of iommu */
+	int		iommu_superpage;/* Level of superpages supported:
+					   0 == 4KiB (no superpages), 1 == 2MiB,
+					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
 	u64		max_addr;	/* maximum mapped address */
 };
@@ -392,6 +400,7 @@ int dmar_disabled = 1;
 static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
+static int intel_iommu_superpage = 1;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -422,6 +431,10 @@ static int __init intel_iommu_setup(char *str)
 			printk(KERN_INFO
 				"Intel-IOMMU: disable batched IOTLB flush\n");
 			intel_iommu_strict = 1;
+		} else if (!strncmp(str, "sp_off", 6)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: disable supported super page\n");
+			intel_iommu_superpage = 0;
 		}
 
 		str += strcspn(str, ",");
@@ -560,11 +573,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
 	}
 }
 
+static void domain_update_iommu_superpage(struct dmar_domain *domain)
+{
+	int i, mask = 0xf;
+
+	if (!intel_iommu_superpage) {
+		domain->iommu_superpage = 0;
+		return;
+	}
+
+	domain->iommu_superpage = 4; /* 1TiB */
+
+	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+		mask |= cap_super_page_val(g_iommus[i]->cap);
+		if (!mask) {
+			break;
+		}
+	}
+	domain->iommu_superpage = fls(mask);
+}
+
 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
 	domain_update_iommu_coherency(domain);
 	domain_update_iommu_snooping(domain);
+	domain_update_iommu_superpage(domain);
 }
 
 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -694,23 +728,31 @@ out:
 }
 
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
-				      unsigned long pfn)
+				      unsigned long pfn, int large_level)
 {
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 	struct dma_pte *parent, *pte = NULL;
 	int level = agaw_to_level(domain->agaw);
-	int offset;
+	int offset, target_level;
 
 	BUG_ON(!domain->pgd);
 	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
 	parent = domain->pgd;
 
+	/* Search pte */
+	if (!large_level)
+		target_level = 1;
+	else
+		target_level = large_level;
+
 	while (level > 0) {
 		void *tmp_page;
 
 		offset = pfn_level_offset(pfn, level);
 		pte = &parent[offset];
-		if (level == 1)
+		if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
+			break;
+		if (level == target_level)
 			break;
 
 		if (!dma_pte_present(pte)) {
@@ -738,10 +780,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 	return pte;
 }
 
+
 /* return address's pte at specific level */
 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 					 unsigned long pfn,
-					 int level)
+					 int level, int *large_page)
 {
 	struct dma_pte *parent, *pte = NULL;
 	int total = agaw_to_level(domain->agaw);
@@ -754,8 +797,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 		if (level == total)
 			return pte;
 
-		if (!dma_pte_present(pte))
+		if (!dma_pte_present(pte)) {
+			*large_page = total;
 			break;
+		}
+
+		if (pte->val & DMA_PTE_LARGE_PAGE) {
+			*large_page = total;
+			return pte;
+		}
+
 		parent = phys_to_virt(dma_pte_addr(pte));
 		total--;
 	}
@@ -768,6 +819,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
 				unsigned long last_pfn)
 {
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+	unsigned int large_page = 1;
 	struct dma_pte *first_pte, *pte;
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -776,14 +828,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
 
 	/* we don't need lock here; nobody else touches the iova range */
 	do {
-		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
+		large_page = 1;
+		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
 		if (!pte) {
-			start_pfn = align_to_level(start_pfn + 1, 2);
+			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
 			continue;
 		}
-		do { 
+		do {
 			dma_clear_pte(pte);
-			start_pfn++;
+			start_pfn += lvl_to_nr_pages(large_page);
 			pte++;
 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 
@@ -803,6 +856,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 	int total = agaw_to_level(domain->agaw);
 	int level;
 	unsigned long tmp;
+	int large_page = 2;
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -818,7 +872,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 			return;
 
 		do {
-			first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
+			large_page = level;
+			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
+			if (large_page > level)
+				level = large_page + 1;
 			if (!pte) {
 				tmp = align_to_level(tmp + 1, level + 1);
 				continue;
@@ -1402,6 +1459,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	else
 		domain->iommu_snooping = 0;
 
+	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
 	domain->iommu_count = 1;
 	domain->nid = iommu->node;
 
@@ -1657,6 +1715,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
 }
 
+/* Return largest possible superpage level for a given mapping */
+static inline int hardware_largepage_caps(struct dmar_domain *domain,
+					  unsigned long iov_pfn,
+					  unsigned long phy_pfn,
+					  unsigned long pages)
+{
+	int support, level = 1;
+	unsigned long pfnmerge;
+
+	support = domain->iommu_superpage;
+
+	/* To use a large page, the virtual *and* physical addresses
+	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
+	   of them will mean we have to use smaller pages. So just
+	   merge them and check both at once. */
+	pfnmerge = iov_pfn | phy_pfn;
+
+	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
+		pages >>= VTD_STRIDE_SHIFT;
+		if (!pages)
+			break;
+		pfnmerge >>= VTD_STRIDE_SHIFT;
+		level++;
+		support--;
+	}
+	return level;
+}
+
 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			    struct scatterlist *sg, unsigned long phys_pfn,
 			    unsigned long nr_pages, int prot)
@@ -1665,6 +1751,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 	phys_addr_t uninitialized_var(pteval);
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 	unsigned long sg_res;
+	unsigned int largepage_lvl = 0;
+	unsigned long lvl_pages = 0;
 
 	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
 
@@ -1680,7 +1768,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
 	}
 
-	while (nr_pages--) {
+	while (nr_pages > 0) {
 		uint64_t tmp;
 
 		if (!sg_res) {
@@ -1688,11 +1776,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
 			sg->dma_length = sg->length;
 			pteval = page_to_phys(sg_page(sg)) | prot;
+			phys_pfn = pteval >> VTD_PAGE_SHIFT;
 		}
+
 		if (!pte) {
-			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
+			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
+
+			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
 			if (!pte)
 				return -ENOMEM;
+			/* It is large page*/
+			if (largepage_lvl > 1)
+				pteval |= DMA_PTE_LARGE_PAGE;
+			else
+				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
+
 		}
 		/* We don't need lock here, nobody else
 		 * touches the iova range
@@ -1708,16 +1806,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			}
 			WARN_ON(1);
 		}
+
+		lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
+		BUG_ON(nr_pages < lvl_pages);
+		BUG_ON(sg_res < lvl_pages);
+
+		nr_pages -= lvl_pages;
+		iov_pfn += lvl_pages;
+		phys_pfn += lvl_pages;
+		pteval += lvl_pages * VTD_PAGE_SIZE;
+		sg_res -= lvl_pages;
+
+		/* If the next PTE would be the first in a new page, then we
+		   need to flush the cache on the entries we've just written.
+		   And then we'll need to recalculate 'pte', so clear it and
+		   let it get set again in the if (!pte) block above.
+
+		   If we're done (!nr_pages) we need to flush the cache too.
+
+		   Also if we've been setting superpages, we may need to
+		   recalculate 'pte' and switch back to smaller pages for the
+		   end of the mapping, if the trailing size is not enough to
+		   use another superpage (i.e. sg_res < lvl_pages). */
 		pte++;
-		if (!nr_pages || first_pte_in_page(pte)) {
+		if (!nr_pages || first_pte_in_page(pte) ||
+		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
 			domain_flush_cache(domain, first_pte,
 					   (void *)pte - (void *)first_pte);
 			pte = NULL;
 		}
-		iov_pfn++;
-		pteval += VTD_PAGE_SIZE;
-		sg_res--;
-		if (!sg_res)
+
+		if (!sg_res && nr_pages)
 			sg = sg_next(sg);
 	}
 	return 0;
@@ -3527,6 +3647,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 	domain->iommu_count = 0;
 	domain->iommu_coherency = 0;
 	domain->iommu_snooping = 0;
+	domain->iommu_superpage = 0;
 	domain->max_addr = 0;
 	domain->nid = -1;
 
@@ -3742,7 +3863,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 	struct dma_pte *pte;
 	u64 phys = 0;
 
-	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
+	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
 	if (pte)
 		phys = dma_pte_addr(pte);
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 5619f8522738..bbd8661b3473 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,8 +9,12 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
+#define VTD_STRIDE_SHIFT        (9)
+#define VTD_STRIDE_MASK         (((u64)-1) << VTD_STRIDE_SHIFT)
+
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
+#define DMA_PTE_LARGE_PAGE (1 << 7)
 #define DMA_PTE_SNP (1 << 11)
 
 #define CONTEXT_TT_MULTI_LEVEL	0
-- 
cgit v1.2.3


From 333ba7325213f0a09dfa5ceeddb056d6ad74b3b5 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 29 May 2011 15:53:20 +0300
Subject: cfg80211: don't drop p2p probe responses

Commit 0a35d36 ("cfg80211: Use capability info to detect mesh beacons")
assumed that probe response with both ESS and IBSS bits cleared
means that the frame was sent by a mesh sta.

However, these capabilities are also being used in the p2p_find phase,
and the mesh-validation broke it.

Rename the WLAN_CAPABILITY_IS_MBSS macro, and verify that mesh ies
exist before assuming this frame was sent by a mesh sta.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  8 ++++++--
 net/wireless/scan.c       | 43 ++++++++++++++++++++++++-------------------
 2 files changed, 30 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b2eee5879883..bf56b6f78270 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1003,8 +1003,12 @@ struct ieee80211_ht_info {
 #define WLAN_CAPABILITY_ESS		(1<<0)
 #define WLAN_CAPABILITY_IBSS		(1<<1)
 
-/* A mesh STA sets the ESS and IBSS capability bits to zero */
-#define WLAN_CAPABILITY_IS_MBSS(cap)	\
+/*
+ * A mesh STA sets the ESS and IBSS capability bits to zero.
+ * however, this holds true for p2p probe responses (in the p2p_find
+ * phase) as well.
+ */
+#define WLAN_CAPABILITY_IS_STA_BSS(cap)	\
 	(!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)))
 
 #define WLAN_CAPABILITY_CF_POLLABLE	(1<<2)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 73a441d237b5..7a6c67667d70 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -267,13 +267,35 @@ static bool is_bss(struct cfg80211_bss *a,
 	return memcmp(ssidie + 2, ssid, ssid_len) == 0;
 }
 
+static bool is_mesh_bss(struct cfg80211_bss *a)
+{
+	const u8 *ie;
+
+	if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability))
+		return false;
+
+	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
+			      a->information_elements,
+			      a->len_information_elements);
+	if (!ie)
+		return false;
+
+	ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+			      a->information_elements,
+			      a->len_information_elements);
+	if (!ie)
+		return false;
+
+	return true;
+}
+
 static bool is_mesh(struct cfg80211_bss *a,
 		    const u8 *meshid, size_t meshidlen,
 		    const u8 *meshcfg)
 {
 	const u8 *ie;
 
-	if (!WLAN_CAPABILITY_IS_MBSS(a->capability))
+	if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability))
 		return false;
 
 	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
@@ -311,7 +333,7 @@ static int cmp_bss(struct cfg80211_bss *a,
 	if (a->channel != b->channel)
 		return b->channel->center_freq - a->channel->center_freq;
 
-	if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) {
+	if (is_mesh_bss(a) && is_mesh_bss(b)) {
 		r = cmp_ies(WLAN_EID_MESH_ID,
 			    a->information_elements,
 			    a->len_information_elements,
@@ -457,7 +479,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		    struct cfg80211_internal_bss *res)
 {
 	struct cfg80211_internal_bss *found = NULL;
-	const u8 *meshid, *meshcfg;
 
 	/*
 	 * The reference to "res" is donated to this function.
@@ -470,22 +491,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	res->ts = jiffies;
 
-	if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) {
-		/* must be mesh, verify */
-		meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
-					  res->pub.information_elements,
-					  res->pub.len_information_elements);
-		meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
-					   res->pub.information_elements,
-					   res->pub.len_information_elements);
-		if (!meshid || !meshcfg ||
-		    meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) {
-			/* bogus mesh */
-			kref_put(&res->ref, bss_release);
-			return NULL;
-		}
-	}
-
 	spin_lock_bh(&dev->bss_lock);
 
 	found = rb_find_bss(dev, res);
-- 
cgit v1.2.3


From a3bcc23e890a6d49d6763d9eb073d711de2e0469 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 1 Jun 2011 06:49:10 +0000
Subject: af-packet: Add flag to distinguish VID 0 from no-vlan.

Currently, user-space cannot determine if a 0 tcp_vlan_tci
means there is no VLAN tag or the VLAN ID was zero.

Add flag to make this explicit.  User-space can check for
TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards
compatible. Older could would have just checked for tp_vlan_tci,
so it will work no worse than before.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 72bfa5a034dd..6d66ce1791a9 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -70,6 +70,7 @@ struct tpacket_auxdata {
 #define TP_STATUS_COPY		0x2
 #define TP_STATUS_LOSING	0x4
 #define TP_STATUS_CSUMNOTREADY	0x8
+#define TP_STATUS_VLAN_VALID   0x10 /* auxdata has valid tp_vlan_tci */
 
 /* Tx ring - header status */
 #define TP_STATUS_AVAILABLE	0x0
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 925f715686a5..ba248d93399a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -798,7 +798,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			getnstimeofday(&ts);
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
-		h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (vlan_tx_tag_present(skb)) {
+			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+			status |= TP_STATUS_VLAN_VALID;
+		} else {
+			h.h2->tp_vlan_tci = 0;
+		}
 		hdrlen = sizeof(*h.h2);
 		break;
 	default:
@@ -1725,8 +1730,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
-		aux.tp_vlan_tci = vlan_tx_tag_get(skb);
-
+		if (vlan_tx_tag_present(skb)) {
+			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+			aux.tp_status |= TP_STATUS_VLAN_VALID;
+		} else {
+			aux.tp_vlan_tci = 0;
+		}
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
 
-- 
cgit v1.2.3


From 3a43e05f4d0600e906fa09f4a65d749288c44592 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Tue, 31 May 2011 08:56:11 +0200
Subject: irq: Handle spurios irq detection for threaded irqs

The detection of spurios interrupts is currently limited to first level
handler. In force-threaded mode we never notice if the threaded irq does
not feel responsible.
This patch catches the return value of the threaded handler and forwards
it to the spurious detector. If the primary handler returns only
IRQ_WAKE_THREAD then the spourious detector ignores it because it gets
called again from the threaded handler.

[ tglx: Report the erroneous return value early and bail out ]

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Link: http://lkml.kernel.org/r/1306824972-27067-2-git-send-email-sebastian@breakpoint.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqreturn.h |  6 +++---
 kernel/irq/handle.c       |  6 ------
 kernel/irq/manage.c       | 24 ++++++++++++++++++------
 kernel/irq/spurious.c     | 22 ++++++++++++++++++----
 4 files changed, 39 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index 819acaaac3f5..714ba08dc092 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -8,9 +8,9 @@
  * @IRQ_WAKE_THREAD	handler requests to wake the handler thread
  */
 enum irqreturn {
-	IRQ_NONE,
-	IRQ_HANDLED,
-	IRQ_WAKE_THREAD,
+	IRQ_NONE		= (0 << 0),
+	IRQ_HANDLED		= (1 << 0),
+	IRQ_WAKE_THREAD		= (1 << 1),
 };
 
 typedef enum irqreturn irqreturn_t;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 90cb55f6d7eb..470d08c82bbe 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,12 +132,6 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
 
 		switch (res) {
 		case IRQ_WAKE_THREAD:
-			/*
-			 * Set result to handled so the spurious check
-			 * does not trigger.
-			 */
-			res = IRQ_HANDLED;
-
 			/*
 			 * Catch drivers which return WAKE_THREAD but
 			 * did not set up a thread function
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f7ce0021e1c4..d64bafb1afd0 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -723,13 +723,16 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
  * context. So we need to disable bh here to avoid deadlocks and other
  * side effects.
  */
-static void
+static irqreturn_t
 irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
 {
+	irqreturn_t ret;
+
 	local_bh_disable();
-	action->thread_fn(action->irq, action->dev_id);
+	ret = action->thread_fn(action->irq, action->dev_id);
 	irq_finalize_oneshot(desc, action, false);
 	local_bh_enable();
+	return ret;
 }
 
 /*
@@ -737,10 +740,14 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
  * preemtible - many of them need to sleep and wait for slow busses to
  * complete.
  */
-static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
+static irqreturn_t irq_thread_fn(struct irq_desc *desc,
+		struct irqaction *action)
 {
-	action->thread_fn(action->irq, action->dev_id);
+	irqreturn_t ret;
+
+	ret = action->thread_fn(action->irq, action->dev_id);
 	irq_finalize_oneshot(desc, action, false);
+	return ret;
 }
 
 /*
@@ -753,7 +760,8 @@ static int irq_thread(void *data)
 	};
 	struct irqaction *action = data;
 	struct irq_desc *desc = irq_to_desc(action->irq);
-	void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
+	irqreturn_t (*handler_fn)(struct irq_desc *desc,
+			struct irqaction *action);
 	int wake;
 
 	if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
@@ -783,8 +791,12 @@ static int irq_thread(void *data)
 			desc->istate |= IRQS_PENDING;
 			raw_spin_unlock_irq(&desc->lock);
 		} else {
+			irqreturn_t action_ret;
+
 			raw_spin_unlock_irq(&desc->lock);
-			handler_fn(desc, action);
+			action_ret = handler_fn(desc, action);
+			if (!noirqdebug)
+				note_interrupt(action->irq, desc, action_ret);
 		}
 
 		wake = atomic_dec_and_test(&desc->threads_active);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c9a78ba30b6f..aa57d5da18c1 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -167,6 +167,13 @@ out:
 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 }
 
+static inline int bad_action_ret(irqreturn_t action_ret)
+{
+	if (likely(action_ret <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
+		return 0;
+	return 1;
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -182,7 +189,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
 	struct irqaction *action;
 	unsigned long flags;
 
-	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+	if (bad_action_ret(action_ret)) {
 		printk(KERN_ERR "irq event %d: bogus return value %x\n",
 				irq, action_ret);
 	} else {
@@ -263,7 +270,16 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 	if (desc->istate & IRQS_POLL_INPROGRESS)
 		return;
 
-	if (unlikely(action_ret != IRQ_HANDLED)) {
+	/* we get here again via the threaded handler */
+	if (action_ret == IRQ_WAKE_THREAD)
+		return;
+
+	if (bad_action_ret(action_ret)) {
+		report_bad_irq(irq, desc, action_ret);
+		return;
+	}
+
+	if (unlikely(action_ret == IRQ_NONE)) {
 		/*
 		 * If we are seeing only the odd spurious IRQ caused by
 		 * bus asynchronicity then don't eventually trigger an error,
@@ -275,8 +291,6 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		else
 			desc->irqs_unhandled++;
 		desc->last_unhandled = jiffies;
-		if (unlikely(action_ret != IRQ_NONE))
-			report_bad_irq(irq, desc, action_ret);
 	}
 
 	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
-- 
cgit v1.2.3


From d4d84fef6d0366b585b7de13527a0faeca84d9ce Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 2 Jun 2011 10:19:41 -0400
Subject: slub: always align cpu_slab to honor cmpxchg_double requirement

On an architecture without CMPXCHG_LOCAL but with DEBUG_VM enabled,
the VM_BUG_ON() in __pcpu_double_call_return_bool() will cause an early
panic during boot unless we always align cpu_slab properly.

In principle we could remove the alignment-testing VM_BUG_ON() for
architectures that don't have CMPXCHG_LOCAL, but leaving it in means
that new code will tend not to break x86 even if it is introduced
on another platform, and it's low cost to require alignment.

Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/percpu.h |  3 +++
 mm/slub.c              | 12 ++++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8b97308e65df..9ca008f0c542 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -259,6 +259,9 @@ extern void __bad_size_call_parameter(void);
  * Special handling for cmpxchg_double.  cmpxchg_double is passed two
  * percpu variables.  The first has to be aligned to a double word
  * boundary and the second has to follow directly thereafter.
+ * We enforce this on all architectures even if they don't support
+ * a double cmpxchg instruction, since it's a cheap requirement, and it
+ * avoids breaking the requirement for architectures with the instruction.
  */
 #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)		\
 ({									\
diff --git a/mm/slub.c b/mm/slub.c
index 7be0223531b0..35f351f26193 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2320,16 +2320,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
 			SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
 
-#ifdef CONFIG_CMPXCHG_LOCAL
 	/*
-	 * Must align to double word boundary for the double cmpxchg instructions
-	 * to work.
+	 * Must align to double word boundary for the double cmpxchg
+	 * instructions to work; see __pcpu_double_call_return_bool().
 	 */
-	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
-#else
-	/* Regular alignment is sufficient */
-	s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
-#endif
+	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
+				     2 * sizeof(void *));
 
 	if (!s->cpu_slab)
 		return 0;
-- 
cgit v1.2.3


From 55db4c64eddf37e31279ec15fe90314713bc9cfa Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 4 Jun 2011 06:33:24 +0900
Subject: Revert "tty: make receive_buf() return the amout of bytes received"

This reverts commit b1c43f82c5aa265442f82dba31ce985ebb7aa71c.

It was broken in so many ways, and results in random odd pty issues.

It re-introduced the buggy schedule_work() in flush_to_ldisc() that can
cause endless work-loops (see commit a5660b41af6a: "tty: fix endless
work loop when the buffer fills up").

It also used an "unsigned int" return value fo the ->receive_buf()
function, but then made multiple functions return a negative error code,
and didn't actually check for the error in the caller.

And it didn't actually work at all.  BenH bisected down odd tty behavior
to it:
  "It looks like the patch is causing some major malfunctions of the X
   server for me, possibly related to PTYs.  For example, cat'ing a
   large file in a gnome terminal hangs the kernel for -minutes- in a
   loop of what looks like flush_to_ldisc/workqueue code, (some ftrace
   data in the quoted bits further down).

   ...

   Some more data: It -looks- like what happens is that the
   flush_to_ldisc work queue entry constantly re-queues itself (because
   the PTY is full ?) and the workqueue thread will basically loop
   forver calling it without ever scheduling, thus starving the consumer
   process that could have emptied the PTY."

which is pretty much exactly the problem we fixed in a5660b41af6a.

Milton Miller pointed out the 'unsigned int' issue.

Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reported-by: Milton Miller <miltonm@bga.com>
Cc: Stefan Bigler <stefan.bigler@keymile.com>
Cc: Toby Gray <toby.gray@realvnc.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/bluetooth/hci_ldisc.c      | 17 ++++-------
 drivers/input/serio/serport.c      | 10 ++-----
 drivers/isdn/gigaset/ser-gigaset.c |  8 ++---
 drivers/misc/ti-st/st_core.c       |  6 ++--
 drivers/net/caif/caif_serial.c     |  6 ++--
 drivers/net/can/slcan.c            |  9 ++----
 drivers/net/hamradio/6pack.c       |  8 ++---
 drivers/net/hamradio/mkiss.c       | 11 +++----
 drivers/net/irda/irtty-sir.c       | 16 +++++-----
 drivers/net/ppp_async.c            |  6 ++--
 drivers/net/ppp_synctty.c          |  6 ++--
 drivers/net/slip.c                 | 11 +++----
 drivers/net/wan/x25_asy.c          |  7 ++---
 drivers/tty/n_gsm.c                |  6 ++--
 drivers/tty/n_hdlc.c               | 18 +++++------
 drivers/tty/n_r3964.c              | 10 +++----
 drivers/tty/n_tty.c                | 61 +++++++++++++++++++++++++++++---------
 drivers/tty/tty_buffer.c           | 15 ++++------
 drivers/tty/vt/selection.c         |  3 +-
 include/linux/tty_ldisc.h          |  9 +++---
 20 files changed, 115 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index b3f01996318f..48ad2a7ab080 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -355,29 +355,24 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty)
  *             flags        pointer to flags for data
  *             count        count of received data in bytes
  *     
- * Return Value:    Number of bytes received
+ * Return Value:    None
  */
-static unsigned int hci_uart_tty_receive(struct tty_struct *tty,
-		const u8 *data, char *flags, int count)
+static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count)
 {
 	struct hci_uart *hu = (void *)tty->disc_data;
-	int received;
 
 	if (!hu || tty != hu->tty)
-		return -ENODEV;
+		return;
 
 	if (!test_bit(HCI_UART_PROTO_SET, &hu->flags))
-		return -EINVAL;
+		return;
 
 	spin_lock(&hu->rx_lock);
-	received = hu->proto->recv(hu, (void *) data, count);
-	if (received > 0)
-		hu->hdev->stat.byte_rx += received;
+	hu->proto->recv(hu, (void *) data, count);
+	hu->hdev->stat.byte_rx += count;
 	spin_unlock(&hu->rx_lock);
 
 	tty_unthrottle(tty);
-
-	return received;
 }
 
 static int hci_uart_register_dev(struct hci_uart *hu)
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index f3698967edf6..8755f5f3ad37 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -120,21 +120,17 @@ static void serport_ldisc_close(struct tty_struct *tty)
  * 'interrupt' routine.
  */
 
-static unsigned int serport_ldisc_receive(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
 {
 	struct serport *serport = (struct serport*) tty->disc_data;
 	unsigned long flags;
 	unsigned int ch_flags;
-	int ret = 0;
 	int i;
 
 	spin_lock_irqsave(&serport->lock, flags);
 
-	if (!test_bit(SERPORT_ACTIVE, &serport->flags)) {
-		ret = -EINVAL;
+	if (!test_bit(SERPORT_ACTIVE, &serport->flags))
 		goto out;
-	}
 
 	for (i = 0; i < count; i++) {
 		switch (fp[i]) {
@@ -156,8 +152,6 @@ static unsigned int serport_ldisc_receive(struct tty_struct *tty,
 
 out:
 	spin_unlock_irqrestore(&serport->lock, flags);
-
-	return ret == 0 ? count : ret;
 }
 
 /*
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 1d44d470897c..86a5c4f7775e 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file,
  *	cflags	buffer containing error flags for received characters (ignored)
  *	count	number of received characters
  */
-static unsigned int
+static void
 gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 		    char *cflags, int count)
 {
@@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	struct inbuf_t *inbuf;
 
 	if (!cs)
-		return -ENODEV;
+		return;
 	inbuf = cs->inbuf;
 	if (!inbuf) {
 		dev_err(cs->dev, "%s: no inbuf\n", __func__);
 		cs_put(cs);
-		return -EINVAL;
+		return;
 	}
 
 	tail = inbuf->tail;
@@ -725,8 +725,6 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf,
 	gig_dbg(DEBUG_INTR, "%s-->BH", __func__);
 	gigaset_schedule_event(cs);
 	cs_put(cs);
-
-	return count;
 }
 
 /*
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1a05fe08e2cb..f91f82eabda7 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -747,8 +747,8 @@ static void st_tty_close(struct tty_struct *tty)
 	pr_debug("%s: done ", __func__);
 }
 
-static unsigned int st_tty_receive(struct tty_struct *tty,
-		const unsigned char *data, char *tty_flags, int count)
+static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
+			   char *tty_flags, int count)
 {
 #ifdef VERBOSE
 	print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
@@ -761,8 +761,6 @@ static unsigned int st_tty_receive(struct tty_struct *tty,
 	 */
 	st_recv(tty->disc_data, data, count);
 	pr_debug("done %s", __func__);
-
-	return count;
 }
 
 /* wake-up function called in from the TTY layer
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 73c7e03617ec..3df0c0f8b8bf 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size)
 
 #endif
 
-static unsigned int ldisc_receive(struct tty_struct *tty,
-		const u8 *data, char *flags, int count)
+static void ldisc_receive(struct tty_struct *tty, const u8 *data,
+			char *flags, int count)
 {
 	struct sk_buff *skb = NULL;
 	struct ser_device *ser;
@@ -215,8 +215,6 @@ static unsigned int ldisc_receive(struct tty_struct *tty,
 	} else
 		++ser->dev->stats.rx_dropped;
 	update_tty_status(ser);
-
-	return count;
 }
 
 static int handle_tx(struct ser_device *ser)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 75622d54581f..1b49df6b2470 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -425,17 +425,16 @@ static void slc_setup(struct net_device *dev)
  * in parallel
  */
 
-static unsigned int slcan_receive_buf(struct tty_struct *tty,
+static void slcan_receive_buf(struct tty_struct *tty,
 			      const unsigned char *cp, char *fp, int count)
 {
 	struct slcan *sl = (struct slcan *) tty->disc_data;
-	int bytes = count;
 
 	if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev))
-		return -ENODEV;
+		return;
 
 	/* Read the characters out of the buffer */
-	while (bytes--) {
+	while (count--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -444,8 +443,6 @@ static unsigned int slcan_receive_buf(struct tty_struct *tty,
 		}
 		slcan_unesc(sl, *cp++);
 	}
-
-	return count;
 }
 
 /************************************
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 992089639ea4..3e5d0b6b6516 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -456,7 +456,7 @@ out:
  * a block of 6pack data has been received, which can now be decapsulated
  * and sent on to some IP layer for further processing.
  */
-static unsigned int sixpack_receive_buf(struct tty_struct *tty,
+static void sixpack_receive_buf(struct tty_struct *tty,
 	const unsigned char *cp, char *fp, int count)
 {
 	struct sixpack *sp;
@@ -464,11 +464,11 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty,
 	int count1;
 
 	if (!count)
-		return 0;
+		return;
 
 	sp = sp_get(tty);
 	if (!sp)
-		return -ENODEV;
+		return;
 
 	memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf));
 
@@ -487,8 +487,6 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty,
 
 	sp_put(sp);
 	tty_unthrottle(tty);
-
-	return count1;
 }
 
 /*
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 0e4f23531140..4c628393c8b1 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -923,14 +923,13 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file,
  * a block of data has been received, which can now be decapsulated
  * and sent on to the AX.25 layer for further processing.
  */
-static unsigned int mkiss_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+	char *fp, int count)
 {
 	struct mkiss *ax = mkiss_get(tty);
-	int bytes = count;
 
 	if (!ax)
-		return -ENODEV;
+		return;
 
 	/*
 	 * Argh! mtu change time! - costs us the packet part received
@@ -940,7 +939,7 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty,
 		ax_changedmtu(ax);
 
 	/* Read the characters out of the buffer */
-	while (bytes--) {
+	while (count--) {
 		if (fp != NULL && *fp++) {
 			if (!test_and_set_bit(AXF_ERROR, &ax->flags))
 				ax->dev->stats.rx_errors++;
@@ -953,8 +952,6 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty,
 
 	mkiss_put(ax);
 	tty_unthrottle(tty);
-
-	return count;
 }
 
 /*
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 035861d8acb1..3352b2443e58 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t
  * usbserial:	urb-complete-interrupt / softint
  */
 
-static unsigned int irtty_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+			      char *fp, int count) 
 {
 	struct sir_dev *dev;
 	struct sirtty_cb *priv = tty->disc_data;
 	int	i;
 
-	IRDA_ASSERT(priv != NULL, return -ENODEV;);
-	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;);
+	IRDA_ASSERT(priv != NULL, return;);
+	IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;);
 
 	if (unlikely(count==0))		/* yes, this happens */
-		return 0;
+		return;
 
 	dev = priv->dev;
 	if (!dev) {
 		IRDA_WARNING("%s(), not ready yet!\n", __func__);
-		return -ENODEV;
+		return;
 	}
 
 	for (i = 0; i < count; i++) {
@@ -242,13 +242,11 @@ static unsigned int irtty_receive_buf(struct tty_struct *tty,
  		if (fp && *fp++) { 
 			IRDA_DEBUG(0, "Framing or parity error!\n");
 			sirdev_receive(dev, NULL, 0);	/* notify sir_dev (updating stats) */
-			return -EINVAL;
+			return;
  		}
 	}
 
 	sirdev_receive(dev, cp, count);
-
-	return count;
 }
 
 /*
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index 53872d7d7382..a1b82c9c67d2 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static unsigned int
+static void
 ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return -ENODEV;
+		return;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_async_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -356,8 +356,6 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	ap_put(ap);
 	tty_unthrottle(tty);
-
-	return count;
 }
 
 static void
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 0815790a5cf9..2573f525f11c 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 }
 
 /* May sleep, don't call from interrupt level or with interrupts disabled */
-static unsigned int
+static void
 ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		  char *cflags, int count)
 {
@@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 	unsigned long flags;
 
 	if (!ap)
-		return -ENODEV;
+		return;
 	spin_lock_irqsave(&ap->recv_lock, flags);
 	ppp_sync_input(ap, buf, cflags, count);
 	spin_unlock_irqrestore(&ap->recv_lock, flags);
@@ -397,8 +397,6 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf,
 		tasklet_schedule(&ap->tsk);
 	sp_put(ap);
 	tty_unthrottle(tty);
-
-	return count;
 }
 
 static void
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 584809c656d5..8ec1a9a0bb9a 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -670,17 +670,16 @@ static void sl_setup(struct net_device *dev)
  * in parallel
  */
 
-static unsigned int slip_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+							char *fp, int count)
 {
 	struct slip *sl = tty->disc_data;
-	int bytes = count;
 
 	if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev))
-		return -ENODEV;
+		return;
 
 	/* Read the characters out of the buffer */
-	while (bytes--) {
+	while (count--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -694,8 +693,6 @@ static unsigned int slip_receive_buf(struct tty_struct *tty,
 #endif
 			slip_unesc(sl, *cp++);
 	}
-
-	return count;
 }
 
 /************************************
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 40398bf7d036..24297b274cd4 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -517,18 +517,17 @@ static int x25_asy_close(struct net_device *dev)
  * and sent on to some IP layer for further processing.
  */
 
-static unsigned int x25_asy_receive_buf(struct tty_struct *tty,
+static void x25_asy_receive_buf(struct tty_struct *tty,
 				const unsigned char *cp, char *fp, int count)
 {
 	struct x25_asy *sl = tty->disc_data;
-	int bytes = count;
 
 	if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev))
 		return;
 
 
 	/* Read the characters out of the buffer */
-	while (bytes--) {
+	while (count--) {
 		if (fp && *fp++) {
 			if (!test_and_set_bit(SLF_ERROR, &sl->flags))
 				sl->dev->stats.rx_errors++;
@@ -537,8 +536,6 @@ static unsigned int x25_asy_receive_buf(struct tty_struct *tty,
 		}
 		x25_asy_unesc(sl, *cp++);
 	}
-
-	return count;
 }
 
 /*
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index a4c42a75a3bf..09e8c7d53af3 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2128,8 +2128,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
 	gsm->tty = NULL;
 }
 
-static unsigned int gsmld_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+			      char *fp, int count)
 {
 	struct gsm_mux *gsm = tty->disc_data;
 	const unsigned char *dp;
@@ -2162,8 +2162,6 @@ static unsigned int gsmld_receive_buf(struct tty_struct *tty,
 	}
 	/* FASYNC if needed ? */
 	/* If clogged call tty_throttle(tty); */
-
-	return count;
 }
 
 /**
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index cac666314aef..cea56033b34c 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
 				    poll_table *wait);
 static int n_hdlc_tty_open(struct tty_struct *tty);
 static void n_hdlc_tty_close(struct tty_struct *tty);
-static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
-		const __u8 *cp, char *fp, int count);
+static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp,
+			       char *fp, int count);
 static void n_hdlc_tty_wakeup(struct tty_struct *tty);
 
 #define bset(p,b)	((p)[(b) >> 5] |= (1 << ((b) & 0x1f)))
@@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
  * Called by tty low level driver when receive data is available. Data is
  * interpreted as one HDLC frame.
  */
-static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
-		const __u8 *data, char *flags, int count)
+static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data,
+			       char *flags, int count)
 {
 	register struct n_hdlc *n_hdlc = tty2n_hdlc (tty);
 	register struct n_hdlc_buf *buf;
@@ -521,20 +521,20 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
 		
 	/* This can happen if stuff comes in on the backup tty */
 	if (!n_hdlc || tty != n_hdlc->tty)
-		return -ENODEV;
+		return;
 		
 	/* verify line is using HDLC discipline */
 	if (n_hdlc->magic != HDLC_MAGIC) {
 		printk("%s(%d) line not using HDLC discipline\n",
 			__FILE__,__LINE__);
-		return -EINVAL;
+		return;
 	}
 	
 	if ( count>maxframe ) {
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) rx count>maxframesize, data discarded\n",
 			       __FILE__,__LINE__);
-		return -EINVAL;
+		return;
 	}
 
 	/* get a free HDLC buffer */	
@@ -550,7 +550,7 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d) no more rx buffers, data discarded\n",
 			       __FILE__,__LINE__);
-		return -EINVAL;
+		return;
 	}
 		
 	/* copy received data to HDLC buffer */
@@ -565,8 +565,6 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty,
 	if (n_hdlc->tty->fasync != NULL)
 		kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN);
 
-	return count;
-
 }	/* end of n_hdlc_tty_receive() */
 
 /**
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index a4bc39c21a43..5c6c31459a2f 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old);
 static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 		struct poll_table_struct *wait);
-static unsigned int r3964_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count);
+static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+		char *fp, int count);
 
 static struct tty_ldisc_ops tty_ldisc_N_R3964 = {
 	.owner = THIS_MODULE,
@@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file,
 	return result;
 }
 
-static unsigned int r3964_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+			char *fp, int count)
 {
 	struct r3964_info *pInfo = tty->disc_data;
 	const unsigned char *p;
@@ -1257,8 +1257,6 @@ static unsigned int r3964_receive_buf(struct tty_struct *tty,
 		}
 
 	}
-
-	return count;
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 95d0a9c2dd13..0ad32888091c 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -81,6 +81,38 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 	return put_user(x, ptr);
 }
 
+/**
+ *	n_tty_set__room	-	receive space
+ *	@tty: terminal
+ *
+ *	Called by the driver to find out how much data it is
+ *	permitted to feed to the line discipline without any being lost
+ *	and thus to manage flow control. Not serialized. Answers for the
+ *	"instant".
+ */
+
+static void n_tty_set_room(struct tty_struct *tty)
+{
+	/* tty->read_cnt is not read locked ? */
+	int	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
+	int old_left;
+
+	/*
+	 * If we are doing input canonicalization, and there are no
+	 * pending newlines, let characters through without limit, so
+	 * that erase characters will be handled.  Other excess
+	 * characters will be beeped.
+	 */
+	if (left <= 0)
+		left = tty->icanon && !tty->canon_data;
+	old_left = tty->receive_room;
+	tty->receive_room = left;
+
+	/* Did this open up the receive buffer? We may need to flip */
+	if (left && !old_left)
+		schedule_work(&tty->buf.work);
+}
+
 static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty)
 {
 	if (tty->read_cnt < N_TTY_BUF_SIZE) {
@@ -152,6 +184,7 @@ static void reset_buffer_flags(struct tty_struct *tty)
 
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
+	n_tty_set_room(tty);
 	check_unthrottle(tty);
 }
 
@@ -1327,19 +1360,17 @@ static void n_tty_write_wakeup(struct tty_struct *tty)
  *	calls one at a time and in order (or using flush_to_ldisc)
  */
 
-static unsigned int n_tty_receive_buf(struct tty_struct *tty,
-		const unsigned char *cp, char *fp, int count)
+static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+			      char *fp, int count)
 {
 	const unsigned char *p;
 	char *f, flags = TTY_NORMAL;
 	int	i;
 	char	buf[64];
 	unsigned long cpuflags;
-	int left;
-	int ret = 0;
 
 	if (!tty->read_buf)
-		return 0;
+		return;
 
 	if (tty->real_raw) {
 		spin_lock_irqsave(&tty->read_lock, cpuflags);
@@ -1349,7 +1380,6 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
-		ret += i;
 		cp += i;
 		count -= i;
 
@@ -1359,10 +1389,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty,
 		memcpy(tty->read_buf + tty->read_head, cp, i);
 		tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
 		tty->read_cnt += i;
-		ret += i;
 		spin_unlock_irqrestore(&tty->read_lock, cpuflags);
 	} else {
-		ret = count;
 		for (i = count, p = cp, f = fp; i; i--, p++) {
 			if (f)
 				flags = *f++;
@@ -1390,6 +1418,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty,
 			tty->ops->flush_chars(tty);
 	}
 
+	n_tty_set_room(tty);
+
 	if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) ||
 		L_EXTPROC(tty)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
@@ -1402,12 +1432,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty,
 	 * mode.  We don't want to throttle the driver if we're in
 	 * canonical mode and don't have a newline yet!
 	 */
-	left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
-
-	if (left < TTY_THRESHOLD_THROTTLE)
+	if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
 		tty_throttle(tty);
-
-	return ret;
 }
 
 int is_ignored(int sig)
@@ -1451,6 +1477,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	if (test_bit(TTY_HW_COOK_IN, &tty->flags)) {
 		tty->raw = 1;
 		tty->real_raw = 1;
+		n_tty_set_room(tty);
 		return;
 	}
 	if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
@@ -1503,6 +1530,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		else
 			tty->real_raw = 0;
 	}
+	n_tty_set_room(tty);
 	/* The termios change make the tty ready for I/O */
 	wake_up_interruptible(&tty->write_wait);
 	wake_up_interruptible(&tty->read_wait);
@@ -1784,6 +1812,8 @@ do_it_again:
 				retval = -ERESTARTSYS;
 				break;
 			}
+			/* FIXME: does n_tty_set_room need locking ? */
+			n_tty_set_room(tty);
 			timeout = schedule_timeout(timeout);
 			continue;
 		}
@@ -1855,8 +1885,10 @@ do_it_again:
 		 * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode,
 		 * we won't get any more characters.
 		 */
-		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE)
+		if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) {
+			n_tty_set_room(tty);
 			check_unthrottle(tty);
+		}
 
 		if (b - buf >= minimum)
 			break;
@@ -1878,6 +1910,7 @@ do_it_again:
 	} else if (test_and_clear_bit(TTY_PUSH, &tty->flags))
 		 goto do_it_again;
 
+	n_tty_set_room(tty);
 	return retval;
 }
 
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 46de2e075dac..f1a7918d71aa 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -416,7 +416,6 @@ static void flush_to_ldisc(struct work_struct *work)
 		struct tty_buffer *head, *tail = tty->buf.tail;
 		int seen_tail = 0;
 		while ((head = tty->buf.head) != NULL) {
-			int copied;
 			int count;
 			char *char_buf;
 			unsigned char *flag_buf;
@@ -443,19 +442,17 @@ static void flush_to_ldisc(struct work_struct *work)
 			   line discipline as we want to empty the queue */
 			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
 				break;
+			if (!tty->receive_room || seen_tail)
+				break;
+			if (count > tty->receive_room)
+				count = tty->receive_room;
 			char_buf = head->char_buf_ptr + head->read;
 			flag_buf = head->flag_buf_ptr + head->read;
+			head->read += count;
 			spin_unlock_irqrestore(&tty->buf.lock, flags);
-			copied = disc->ops->receive_buf(tty, char_buf,
+			disc->ops->receive_buf(tty, char_buf,
 							flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
-
-			head->read += copied;
-
-			if (copied == 0 || seen_tail) {
-				schedule_work(&tty->buf.work);
-				break;
-			}
 		}
 		clear_bit(TTY_FLUSHING, &tty->flags);
 	}
diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index 67b1d0d7c8ac..fb864e7fcd13 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -332,7 +332,8 @@ int paste_selection(struct tty_struct *tty)
 			continue;
 		}
 		count = sel_buffer_lth - pasted;
-		count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
+		count = min(count, tty->receive_room);
+		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 5b07792ccb46..ff7dc08696a8 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -76,7 +76,7 @@
  * 	tty device.  It is solely the responsibility of the line
  * 	discipline to handle poll requests.
  *
- * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp,
+ * void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
  * 		       char *fp, int count);
  *
  * 	This function is called by the low-level tty driver to send
@@ -84,8 +84,7 @@
  * 	processing.  <cp> is a pointer to the buffer of input
  * 	character received by the device.  <fp> is a pointer to a
  * 	pointer of flag bytes which indicate whether a character was
- * 	received with a parity error, etc. Returns the amount of bytes
- * 	received.
+ * 	received with a parity error, etc.
  * 
  * void	(*write_wakeup)(struct tty_struct *);
  *
@@ -141,8 +140,8 @@ struct tty_ldisc_ops {
 	/*
 	 * The following routines are called from below.
 	 */
-	unsigned int (*receive_buf)(struct tty_struct *,
-			const unsigned char *cp, char *fp, int count);
+	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
+			       char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
 	void	(*dcd_change)(struct tty_struct *, unsigned int,
 				struct pps_event_time *);
-- 
cgit v1.2.3


From 9e1f1de02c2275d7172e18dc4e7c2065777611bf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 3 Jun 2011 18:24:58 -0400
Subject: more conservative S_NOSEC handling

Caching "we have already removed suid/caps" was overenthusiastic as merged.
On network filesystems we might have had suid/caps set on another client,
silently picked by this client on revalidate, all of that *without* clearing
the S_NOSEC flag.

AFAICS, the only reasonably sane way to deal with that is
	* new superblock flag; unless set, S_NOSEC is not going to be set.
	* local block filesystems set it in their ->mount() (more accurately,
mount_bdev() does, so does btrfs ->mount(), users of mount_bdev() other than
local block ones clear it)
	* if any network filesystem (or a cluster one) wants to use S_NOSEC,
it'll need to set MS_NOSEC in sb->s_flags *AND* take care to clear S_NOSEC when
inode attribute changes are picked from other clients.

It's not an earth-shattering hole (anybody that can set suid on another client
will almost certainly be able to write to the file before doing that anyway),
but it's a bug that needs fixing.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/super.c   | 2 +-
 fs/fuse/inode.c    | 2 ++
 fs/ocfs2/super.c   | 2 +-
 fs/super.c         | 2 +-
 include/linux/fs.h | 3 ++-
 mm/filemap.c       | 2 +-
 6 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b2e7e5bc3ef..d158b672a2d2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -819,7 +819,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags;
+		s->s_flags = flags | MS_NOSEC;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		error = btrfs_fill_super(s, fs_devices, data,
 					 flags & MS_SILENT ? 1 : 0);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cc6ec4b2f0ff..38f84cd48b67 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -921,6 +921,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (sb->s_flags & MS_MANDLOCK)
 		goto err;
 
+	sb->s_flags &= ~MS_NOSEC;
+
 	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		goto err;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cdbaf5e97308..56f61027236b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1072,7 +1072,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_magic = OCFS2_SUPER_MAGIC;
 
-	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+	sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) |
 		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
diff --git a/fs/super.c b/fs/super.c
index c75593953c52..ab3d672db0de 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -822,7 +822,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 	} else {
 		char b[BDEVNAME_SIZE];
 
-		s->s_flags = flags;
+		s->s_flags = flags | MS_NOSEC;
 		s->s_mode = mode;
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c55d6b7cd5d6..646a1836152a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -208,6 +208,7 @@ struct inodes_stat_t {
 #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
 #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_NOSEC	(1<<28)
 #define MS_BORN		(1<<29)
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
@@ -2591,7 +2592,7 @@ static inline int is_sxid(mode_t mode)
 
 static inline void inode_has_no_xattr(struct inode *inode)
 {
-	if (!is_sxid(inode->i_mode))
+	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
 		inode->i_flags |= S_NOSEC;
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index d7b10578a64b..a8251a8d3457 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2000,7 +2000,7 @@ int file_remove_suid(struct file *file)
 		error = security_inode_killpriv(dentry);
 	if (!error && killsuid)
 		error = __remove_suid(dentry, killsuid);
-	if (!error)
+	if (!error && (inode->i_sb->s_flags & MS_NOSEC))
 		inode->i_flags |= S_NOSEC;
 
 	return error;
-- 
cgit v1.2.3


From d7ebe75b065a7c2d58ffc12f9d2e00d5ea4e71eb Mon Sep 17 00:00:00 2001
From: Vince Weaver <vweaver1@eecs.utk.edu>
Date: Fri, 3 Jun 2011 17:59:51 -0400
Subject: perf: Fix comments in include/linux/perf_event.h

Fix include/linux/perf_event.h comments to be consistent with
the actual #define names. This is trivial, but it can be a bit
confusing when first  reading through the file.

Signed-off-by: Vince Weaver <vweaver1@eecs.utk.edu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus@samba.org
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1106031757090.29381@cl320.eecs.utk.edu
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3412684ce5d5..e0786e35f247 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -137,14 +137,14 @@ enum perf_event_sample_format {
  *
  * struct read_format {
  *	{ u64		value;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		id;           } && PERF_FORMAT_ID
  *	} && !PERF_FORMAT_GROUP
  *
  *	{ u64		nr;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		value;
  *	    { u64	id;           } && PERF_FORMAT_ID
  *	  }		cntr[nr];
-- 
cgit v1.2.3


From fb04883371f2cb7867d24783e7d590036dc9b548 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 May 2011 15:44:27 +0200
Subject: netfilter: add more values to enum ip_conntrack_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following error is raised (and other similar ones) :

net/ipv4/netfilter/nf_nat_standalone.c: In function ‘nf_nat_fn’:
net/ipv4/netfilter/nf_nat_standalone.c:119:2: warning: case value ‘4’
not in enumerated type ‘enum ip_conntrack_info’

gcc barfs on adding two enum values and getting a not enumerated
result :

case IP_CT_RELATED+IP_CT_IS_REPLY:

Add missing enum values

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h  |  3 +++
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |  6 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c            |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  2 +-
 net/ipv4/netfilter/nf_nat_core.c               |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c               |  2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |  4 ++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              |  4 ++--
 net/netfilter/nf_conntrack_ftp.c               |  2 +-
 net/netfilter/nf_conntrack_h323_main.c         | 10 ++++------
 net/netfilter/nf_conntrack_irc.c               |  3 +--
 net/netfilter/nf_conntrack_pptp.c              |  3 +--
 net/netfilter/nf_conntrack_sane.c              |  2 +-
 net/netfilter/nf_conntrack_sip.c               |  2 +-
 net/netfilter/xt_socket.c                      |  4 ++--
 16 files changed, 26 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 50cdc2559a5a..0d3dd66322ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -18,6 +18,9 @@ enum ip_conntrack_info {
 	/* >= this indicates reply direction */
 	IP_CT_IS_REPLY,
 
+	IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY,
+	IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY,
+	IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY,	
 	/* Number of distinct IP_CT types (no NEW in reply dirn). */
 	IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
 };
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d609ac3cb9a4..5c9e97c79017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	 * error messages (RELATED) and information requests (see below) */
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
 	    (ctinfo == IP_CT_RELATED ||
-	     ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY))
+	     ctinfo == IP_CT_RELATED_REPLY))
 		return XT_CONTINUE;
 
 	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 			ct->mark = hash;
 			break;
 		case IP_CT_RELATED:
-		case IP_CT_RELATED+IP_CT_IS_REPLY:
+		case IP_CT_RELATED_REPLY:
 			/* FIXME: we don't handle expectations at the
 			 * moment.  they can arrive on a different node than
 			 * the master connection (e.g. FTP passive mode) */
 		case IP_CT_ESTABLISHED:
-		case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+		case IP_CT_ESTABLISHED_REPLY:
 			break;
 		default:
 			break;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d2ed9dc74ebc..9931152a78b5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat = nfct_nat(ct);
 
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+			    ctinfo == IP_CT_RELATED_REPLY));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a03c02af999..db10075dd88e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 		goto out;
 
 	help = nfct_help(ct);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 9c71b2755ce3..3346de5d94d0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 
 	/* Must be RELATED */
 	NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
-		     skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+		     skb->nfctinfo == IP_CT_RELATED_REPLY);
 
 	/* Redirects on non-null nats must be dropped, else they'll
 	   start talking to each other without our translation, and be
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 21c30426480b..733c9abc1cbd 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+			    ctinfo == IP_CT_RELATED_REPLY));
 	NF_CT_ASSERT(par->out != NULL);
 
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7317bdf1d457..483b76d042da 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum,
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
-	case IP_CT_RELATED+IP_CT_IS_REPLY:
+	case IP_CT_RELATED_REPLY:
 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
 							   hooknum, skb))
@@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum,
 	default:
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+			     ctinfo == IP_CT_ESTABLISHED_REPLY);
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c8af58b22562..4111050a9fc5 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 		goto out;
 
 	help = nfct_help(ct);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e1c11f78419..0bd568929403 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 
 	/* It exists; we have (non-exclusive) reference. */
 	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
-		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+		*ctinfo = IP_CT_ESTABLISHED_REPLY;
 		/* Please set reply bit if this packet OK */
 		*set_reply = 1;
 	} else {
@@ -1143,7 +1143,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 	/* This ICMP is in reverse direction to the packet which caused it */
 	ct = nf_ct_get(skb, &ctinfo);
 	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
-		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+		ctinfo = IP_CT_RELATED_REPLY;
 	else
 		ctinfo = IP_CT_RELATED;
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index e17cb7c7dd8f..6f5801eac999 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -368,7 +368,7 @@ static int help(struct sk_buff *skb,
 
 	/* Until there's been traffic both ways, don't look in packets. */
 	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+	    ctinfo != IP_CT_ESTABLISHED_REPLY) {
 		pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
 		return NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 18b2ce5c8ced..f03c2d4539f6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
 	int ret;
 
 	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
-	}
+
 	pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
@@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
 	int ret;
 
 	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
-	}
+
 	pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index b394aa318776..4f9390b98697 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 		return NF_ACCEPT;
 
 	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 088944824e13..2fd4565144de 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
 	u_int16_t msg;
 
 	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+	if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
 
 	nexthdr_off = protoff;
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index d9e27734b2a2..8501823b3f9b 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -78,7 +78,7 @@ static int help(struct sk_buff *skb,
 	ct_sane_info = &nfct_help(ct)->help.ct_sane_info;
 	/* Until there's been traffic both ways, don't look in packets. */
 	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY)
+	    ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index cb5a28581782..93faf6a3a637 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
-	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+	    ctinfo != IP_CT_ESTABLISHED_REPLY)
 		return NF_ACCEPT;
 
 	/* No Data ? */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 9cc46356b577..fe39f7e913df 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct && !nf_ct_is_untracked(ct) &&
 	    ((iph->protocol != IPPROTO_ICMP &&
-	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
+	      ctinfo == IP_CT_ESTABLISHED_REPLY) ||
 	     (iph->protocol == IPPROTO_ICMP &&
-	      ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) &&
+	      ctinfo == IP_CT_RELATED_REPLY)) &&
 	    (ct->status & IPS_SRC_NAT_DONE)) {
 
 		daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
-- 
cgit v1.2.3


From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 31 May 2011 12:24:58 -0400
Subject: nfsd: fix dependency of nfsd on auth_rpcgss

Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5
enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss
module.

It's a little ad hoc, but since the only piece of information nfsd needs
from rpcsec_gss_krb5 is a single static string, one solution is just to
share it with an include file.

Cc: stable@kernel.org
Reported-by: Michael Guntsche <mike@it-loops.com>
Cc: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c                         | 19 ++++++-------------
 include/linux/sunrpc/gss_krb5_enctypes.h |  4 ++++
 net/sunrpc/auth_gss/gss_krb5_mech.c      |  3 ++-
 3 files changed, 12 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f5eae40f34e..2b1449dd2f49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,6 +13,7 @@
 #include <linux/lockd/lockd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
 
 #include "idmap.h"
 #include "nfsd.h"
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {
 	.release	= single_release,
 };
 
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
 static int supported_enctypes_show(struct seq_file *m, void *v)
 {
-	struct gss_api_mech *k5mech;
-
-	k5mech = gss_mech_get_by_name("krb5");
-	if (k5mech == NULL)
-		goto out;
-	if (k5mech->gm_upcall_enctypes != NULL)
-		seq_printf(m, k5mech->gm_upcall_enctypes);
-	gss_mech_put(k5mech);
-out:
+	seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
 	return 0;
 }
 
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
 		[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h
new file mode 100644
index 000000000000..ec6234eee89c
--- /dev/null
+++ b/include/linux/sunrpc/gss_krb5_enctypes.h
@@ -0,0 +1,4 @@
+/*
+ * Dumb way to share this static piece of information with nfsd
+ */
+#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2"
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 0a9a2ec2e469..c3b75333b821 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -43,6 +43,7 @@
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
@@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
-	.gm_upcall_enctypes = "18,17,16,23,3,1,2",
+	.gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.3


From 5f98ecdbcef1920323d8777c0ba55dbd4335d3cf Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sun, 5 Jun 2011 11:47:29 +0900
Subject: swiotlb: Export swioltb_nr_tbl and utilize it as appropiate.

By default the io_tlb_nslabs is set to zero, and gets set to
whatever value is passed in via swiotlb_init_with_tbl function.
The default value passed in is 64MB. However, if the user provides
the 'swiotlb=<nslabs>' the default value is ignored and
the value provided by the user is used... Except when the SWIOTLB
is used under Xen - there the default value of 64MB is used and
the Xen-SWIOTLB has no mechanism to get the 'io_tlb_nslabs' filled
out by setup_io_tlb_npages functions. This patch provides a function
for the Xen-SWIOTLB to call to see if the io_tlb_nslabs is set
and if so use that value.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c | 12 +++++++++---
 include/linux/swiotlb.h   |  1 +
 lib/swiotlb.c             |  5 +++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 54469c3eeacd..5ac3f4843f5c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -147,9 +147,15 @@ void __init xen_swiotlb_init(int verbose)
 {
 	unsigned long bytes;
 	int rc;
-
-	xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
-	xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
+	unsigned long nr_tbl;
+
+	nr_tbl = swioltb_nr_tbl();
+	if (nr_tbl)
+		xen_io_tlb_nslabs = nr_tbl;
+	else {
+		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
+		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
 
 	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8c0e349f4a6c..445702c60d04 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -24,6 +24,7 @@ extern int swiotlb_force;
 
 extern void swiotlb_init(int verbose);
 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
+extern unsigned long swioltb_nr_tbl(void);
 
 /*
  * Enumeration for sync targets
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 93ca08b8a451..99093b396145 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,6 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
+unsigned long swioltb_nr_tbl(void)
+{
+	return io_tlb_nslabs;
+}
+
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
-- 
cgit v1.2.3


From 3019de124b9f5b1526cb3668b74af14371e21795 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 6 Jun 2011 16:41:33 -0700
Subject: net: Rework netdev_drivername() to avoid warning.

This interface uses a temporary buffer, but for no real reason.
And now can generate warnings like:

net/sched/sch_generic.c: In function dev_watchdog
net/sched/sch_generic.c:254:10: warning: unused variable drivername

Just return driver->name directly or "".

Reported-by: Connor Hansen <cmdkhh@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 16 +++++-----------
 net/sched/sch_generic.c   |  3 +--
 3 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca333e79e10f..54b8b4d7b68f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2555,7 +2555,7 @@ extern void netdev_class_remove_file(struct class_attribute *class_attr);
 
 extern struct kobj_ns_type_operations net_ns_type_operations;
 
-extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len);
+extern const char *netdev_drivername(const struct net_device *dev);
 
 extern void linkwatch_run_queue(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 939307891e71..1af6cb27f67a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6264,29 +6264,23 @@ err_name:
 /**
  *	netdev_drivername - network driver for the device
  *	@dev: network device
- *	@buffer: buffer for resulting name
- *	@len: size of buffer
  *
  *	Determine network driver for device.
  */
-char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
+const char *netdev_drivername(const struct net_device *dev)
 {
 	const struct device_driver *driver;
 	const struct device *parent;
-
-	if (len <= 0 || !buffer)
-		return buffer;
-	buffer[0] = 0;
+	const char *empty = "";
 
 	parent = dev->dev.parent;
-
 	if (!parent)
-		return buffer;
+		return empty;
 
 	driver = parent->driver;
 	if (driver && driver->name)
-		strlcpy(buffer, driver->name, len);
-	return buffer;
+		return driver->name;
+	return empty;
 }
 
 static int __netdev_printk(const char *level, const struct net_device *dev,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b1721d71c27c..b4c680900d7a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg)
 			}
 
 			if (some_queue_timedout) {
-				char drivername[64];
 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
-				       dev->name, netdev_drivername(dev, drivername, 64), i);
+				       dev->name, netdev_drivername(dev), i);
 				dev->netdev_ops->ndo_tx_timeout(dev);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
-- 
cgit v1.2.3


From 13fcb7bd322164c67926ffe272846d4860196dc6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 6 Jun 2011 22:42:06 -0700
Subject: af_packet: prevent information leak

In 2.6.27, commit 393e52e33c6c2 (packet: deliver VLAN TCI to userspace)
added a small information leak.

Add padding field and make sure its zeroed before copy to user.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h | 2 ++
 net/packet/af_packet.c    | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 6d66ce1791a9..7b318630139f 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -62,6 +62,7 @@ struct tpacket_auxdata {
 	__u16		tp_mac;
 	__u16		tp_net;
 	__u16		tp_vlan_tci;
+	__u16		tp_padding;
 };
 
 /* Rx ring - header status */
@@ -101,6 +102,7 @@ struct tpacket2_hdr {
 	__u32		tp_sec;
 	__u32		tp_nsec;
 	__u16		tp_vlan_tci;
+	__u16		tp_padding;
 };
 
 #define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ba248d93399a..c0c3cda19712 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -804,6 +804,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		} else {
 			h.h2->tp_vlan_tci = 0;
 		}
+		h.h2->tp_padding = 0;
 		hdrlen = sizeof(*h.h2);
 		break;
 	default:
@@ -1736,6 +1737,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		} else {
 			aux.tp_vlan_tci = 0;
 		}
+		aux.tp_padding = 0;
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
 
-- 
cgit v1.2.3


From 21c13a4f7bc185552c4b402b792c3bbb9aa69df0 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 7 Jun 2011 11:35:52 -0400
Subject: usb-storage: redo incorrect reads

Some USB mass-storage devices have bugs that cause them not to handle
the first READ(10) command they receive correctly.  The Corsair
Padlock v2 returns completely bogus data for its first read (possibly
it returns the data in encrypted form even though the device is
supposed to be unlocked).  The Feiya SD/SDHC card reader fails to
complete the first READ(10) command after it is plugged in or after a
new card is inserted, returning a status code that indicates it thinks
the command was invalid, which prevents the kernel from retrying the
read.

Since the first read of a new device or a new medium is for the
partition sector, the kernel is unable to retrieve the device's
partition table.  Users have to manually issue an "hdparm -z" or
"blockdev --rereadpt" command before they can access the device.

This patch (as1470) works around the problem.  It adds a new quirk
flag, US_FL_INVALID_READ10, indicating that the first READ(10) should
always be retried immediately, as should any failing READ(10) commands
(provided the preceding READ(10) command succeeded, to avoid getting
stuck in a loop).  The patch also adds appropriate unusual_devs
entries containing the new flag.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Sven Geggus <sven-usbst@geggus.net>
Tested-by: Paul Hartman <paul.hartman+linux@gmail.com>
CC: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
CC: <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kernel-parameters.txt |  2 ++
 drivers/usb/storage/transport.c     | 29 +++++++++++++++++++++++++++++
 drivers/usb/storage/unusual_devs.h  | 19 +++++++++++++++++++
 drivers/usb/storage/usb.c           | 13 ++++++++++++-
 drivers/usb/storage/usb.h           |  2 ++
 include/linux/usb_usual.h           |  4 +++-
 6 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d9a203b058f1..fd248a318211 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2598,6 +2598,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					unlock ejectable media);
 				m = MAX_SECTORS_64 (don't transfer more
 					than 64 sectors = 32 KB at a time);
+				n = INITIAL_READ10 (force a retry of the
+					initial READ(10) command);
 				o = CAPACITY_OK (accept the capacity
 					reported by the device);
 				r = IGNORE_RESIDUE (the device reports
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 00418995d8e9..e8ae21b2d387 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -819,6 +819,35 @@ Retry_Sense:
 		}
 	}
 
+	/*
+	 * Some devices don't work or return incorrect data the first
+	 * time they get a READ(10) command, or for the first READ(10)
+	 * after a media change.  If the INITIAL_READ10 flag is set,
+	 * keep track of whether READ(10) commands succeed.  If the
+	 * previous one succeeded and this one failed, set the REDO_READ10
+	 * flag to force a retry.
+	 */
+	if (unlikely((us->fflags & US_FL_INITIAL_READ10) &&
+			srb->cmnd[0] == READ_10)) {
+		if (srb->result == SAM_STAT_GOOD) {
+			set_bit(US_FLIDX_READ10_WORKED, &us->dflags);
+		} else if (test_bit(US_FLIDX_READ10_WORKED, &us->dflags)) {
+			clear_bit(US_FLIDX_READ10_WORKED, &us->dflags);
+			set_bit(US_FLIDX_REDO_READ10, &us->dflags);
+		}
+
+		/*
+		 * Next, if the REDO_READ10 flag is set, return a result
+		 * code that will cause the SCSI core to retry the READ(10)
+		 * command immediately.
+		 */
+		if (test_bit(US_FLIDX_REDO_READ10, &us->dflags)) {
+			clear_bit(US_FLIDX_REDO_READ10, &us->dflags);
+			srb->result = DID_IMM_RETRY << 16;
+			srb->sense_buffer[0] = 0;
+		}
+	}
+
 	/* Did we transfer less than the minimum amount required? */
 	if ((srb->result == SAM_STAT_GOOD || srb->sense_buffer[2] == 0) &&
 			scsi_bufflen(srb) - scsi_get_resid(srb) < srb->underflow)
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index c1602b8c5594..ccff3483eebc 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1114,6 +1114,16 @@ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
+/* Reported by Paul Hartman <paul.hartman+linux@gmail.com>
+ * This card reader returns "Illegal Request, Logical Block Address
+ * Out of Range" for the first READ(10) after a new card is inserted.
+ */
+UNUSUAL_DEV(  0x090c, 0x6000, 0x0100, 0x0100,
+		"Feiya",
+		"SD/SDHC Card Reader",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_INITIAL_READ10 ),
+
 /* This Pentax still camera is not conformant
  * to the USB storage specification: -
  * - It does not like the INQUIRY command. So we must handle this command
@@ -1888,6 +1898,15 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_READ_DISC_INFO ),
 
+/* Reported by Sven Geggus <sven-usbst@geggus.net>
+ * This encrypted pen drive returns bogus data for the initial READ(10).
+ */
+UNUSUAL_DEV(  0x1b1c, 0x1ab5, 0x0200, 0x0200,
+		"Corsair",
+		"Padlock v2",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_INITIAL_READ10 ),
+
 /* Patch by Richard Sch�tz <r.schtz@t-online.de>
  * This external hard drive enclosure uses a JMicron chip which
  * needs the US_FL_IGNORE_RESIDUE flag to work properly. */
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 5ee7ac42e08f..0ca095820f3e 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -440,7 +440,8 @@ static void adjust_quirks(struct us_data *us)
 			US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 |
 			US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE |
 			US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
-			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16);
+			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
+			US_FL_INITIAL_READ10);
 
 	p = quirks;
 	while (*p) {
@@ -490,6 +491,9 @@ static void adjust_quirks(struct us_data *us)
 		case 'm':
 			f |= US_FL_MAX_SECTORS_64;
 			break;
+		case 'n':
+			f |= US_FL_INITIAL_READ10;
+			break;
 		case 'o':
 			f |= US_FL_CAPACITY_OK;
 			break;
@@ -953,6 +957,13 @@ int usb_stor_probe2(struct us_data *us)
 	if (result)
 		goto BadDevice;
 
+	/*
+	 * If the device returns invalid data for the first READ(10)
+	 * command, indicate the command should be retried.
+	 */
+	if (us->fflags & US_FL_INITIAL_READ10)
+		set_bit(US_FLIDX_REDO_READ10, &us->dflags);
+
 	/* Acquire all the other resources and add the host */
 	result = usb_stor_acquire_resources(us);
 	if (result)
diff --git a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h
index 89d3bfff98df..7b0f2113632e 100644
--- a/drivers/usb/storage/usb.h
+++ b/drivers/usb/storage/usb.h
@@ -73,6 +73,8 @@ struct us_unusual_dev {
 #define US_FLIDX_RESETTING	4	/* device reset in progress */
 #define US_FLIDX_TIMED_OUT	5	/* SCSI midlayer timed out  */
 #define US_FLIDX_DONT_SCAN	6	/* don't scan (disconnect)  */
+#define US_FLIDX_REDO_READ10	7	/* redo READ(10) command    */
+#define US_FLIDX_READ10_WORKED	8	/* previous READ(10) succeeded */
 
 #define USB_STOR_STRING_LEN 32
 
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 71693d4a4fe1..17df3600bcef 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -62,7 +62,9 @@
 	US_FLAG(NO_READ_DISC_INFO,	0x00040000)		\
 		/* cannot handle READ_DISC_INFO */		\
 	US_FLAG(NO_READ_CAPACITY_16,	0x00080000)		\
-		/* cannot handle READ_CAPACITY_16 */
+		/* cannot handle READ_CAPACITY_16 */		\
+	US_FLAG(INITIAL_READ10,	0x00100000)			\
+		/* Initial READ(10) (and others) must be retried */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 13e12d14e2dccc7995b8f15a5678a338ab4e6a8c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 8 Jun 2011 15:18:19 -0700
Subject: vfs: reorganize 'struct inode' layout a bit

This tries to make the 'struct inode' accesses denser in the data cache
by moving a commonly accessed field (i_security) closer to other fields
that are accessed often.

It also makes 'i_state' just an 'unsigned int' rather than 'unsigned
long', since we only use a few bits of that field, and moves it next to
the existing 'i_flags' so that we potentially get better structure
layout (although depending on config options, i_flags may already have
packed in the same word as i_lock, so this improves packing only for the
case of spinlock debugging)

Out 'struct inode' is still way too big, and we should probably move
some other fields around too (the acl fields in particular) for better
data cache access density.  Other fields (like the inode hash) are
likely to be entirely irrelevant under most loads.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 646a1836152a..1c777878f1ea 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -744,9 +744,13 @@ struct inode {
 
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned int		i_flags;
+	unsigned int		i_state;
+#ifdef CONFIG_SECURITY
+	void			*i_security;
+#endif
 	struct mutex		i_mutex;
 
-	unsigned long		i_state;
+
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
 
 	struct hlist_node	i_hash;
@@ -798,9 +802,6 @@ struct inode {
 	atomic_t		i_readcount; /* struct files open RO */
 #endif
 	atomic_t		i_writecount;
-#ifdef CONFIG_SECURITY
-	void			*i_security;
-#endif
 #ifdef CONFIG_FS_POSIX_ACL
 	struct posix_acl	*i_acl;
 	struct posix_acl	*i_default_acl;
-- 
cgit v1.2.3


From b4c8cc88c18213688268d1d53a51d97ce2f19a64 Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegorslists@googlemail.com>
Date: Thu, 9 Jun 2011 15:05:48 -0700
Subject: ethtool.h: fix typos

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c6a850ab2ec5..439b173c5882 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -268,7 +268,7 @@ struct ethtool_pauseparam {
 	__u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
 
 	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
-	 * being true) the user may set 'autonet' here non-zero to have the
+	 * being true) the user may set 'autoneg' here non-zero to have the
 	 * pause parameters be auto-negotiated too.  In such a case, the
 	 * {rx,tx}_pause values below determine what capabilities are
 	 * advertised.
@@ -811,7 +811,7 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums
  *	are turned on or off.
  * @set_tx_csum: Deprecated in favour of generic netdev features.  Turn
- *	transmit checksums on or off.  Returns a egative error code or zero.
+ *	transmit checksums on or off.  Returns a negative error code or zero.
  * @get_sg: Deprecated as redundant.  Report whether scatter-gather is
  *	enabled.  
  * @set_sg: Deprecated in favour of generic netdev features.  Turn
@@ -1087,7 +1087,7 @@ struct ethtool_ops {
 /* The following are all involved in forcing a particular link
  * mode for the device for setting things.  When getting the
  * devices settings, these indicate the current mode and whether
- * it was foced up into this mode or autonegotiated.
+ * it was forced up into this mode or autonegotiated.
  */
 
 /* The forced speed, 10Mb, 100Mb, gigabit, 2.5Gb, 10GbE. */
-- 
cgit v1.2.3


From e5ea3f12d41d96edf4ad9374db2b1725e457acec Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 10 Jun 2011 13:44:49 +0100
Subject: gpio/basic_mmio: add missing include of spinlock_types.h

include/linux/basic_mmio_gpio.h uses a spinlock_t without including any
of the spinlock headers resulting in this compiler warning.

include/linux/basic_mmio_gpio.h:51:2: error: expected specifier-qualifier-list before 'spinlock_t'

Explicitly include linux/spinlock_types.h to fix it.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/basic_mmio_gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
index 1ae12710d732..98999cf107ce 100644
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -16,6 +16,7 @@
 #include <linux/gpio.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/spinlock_types.h>
 
 struct bgpio_pdata {
 	int base;
-- 
cgit v1.2.3


From 56a210526adb2854d5b7c398a40260720390ee05 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 11 Jun 2011 12:29:58 +0100
Subject: linux/seqlock.h should #include asm/processor.h for cpu_relax()

It uses cpu_relax(), and so needs <asm/processor.h>

Without this patch, I see:

   CC      arch/mn10300/kernel/asm-offsets.s
  In file included from include/linux/time.h:8,
                   from include/linux/timex.h:56,
                   from include/linux/sched.h:57,
                   from arch/mn10300/kernel/asm-offsets.c:7:
  include/linux/seqlock.h: In function 'read_seqbegin':
  include/linux/seqlock.h:91: error: implicit declaration of function 'cpu_relax'

whilst building asb2364_defconfig on MN10300.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seqlock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e9811892844f..c6db9fb33c44 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -28,6 +28,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/preempt.h>
+#include <asm/processor.h>
 
 typedef struct {
 	unsigned sequence;
-- 
cgit v1.2.3


From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 10 Jun 2011 06:56:58 +0000
Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check

Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
but rather in vlan_do_receive.  Otherwise the vlan header
will not be properly put on the packet in the case of
vlan header accelleration.

As we remove the check from vlan_check_reorder_header
rename it vlan_reorder_header to keep the naming clean.

Fix up the skb->pkt_type early so we don't look at the packet
after adding the vlan tag, which guarantees we don't goof
and look at the wrong field.

Use a simple if statement instead of a complicated switch
statement to decided that we need to increment rx_stats
for a multicast packet.

Hopefully at somepoint we will just declare the case where
VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
the code.  Until then this keeps it working correctly.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 25 ++++++++++++++++++---
 include/linux/skbuff.h  |  5 +++++
 net/8021q/vlan_core.c   | 60 +++++++++++++++++++++++++++----------------------
 net/core/dev.c          |  2 +-
 4 files changed, 61 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index dc01681fbb42..affa27380b72 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -225,7 +225,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
 }
 
 /**
- * __vlan_put_tag - regular VLAN tag inserting
+ * vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
  *
@@ -234,8 +234,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
  *
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -255,8 +257,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
 
-	skb->protocol = htons(ETH_P_8021Q);
+	return skb;
+}
 
+/**
+ * __vlan_put_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+{
+	skb = vlan_insert_tag(skb, vlan_tci);
+	if (skb)
+		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
 }
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e8b78ce14474..c0a4f3ab0cc0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1256,6 +1256,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_mac_len(struct sk_buff *skb)
+{
+	skb->mac_len = skb->network_header - skb->mac_header;
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc2a4c9..fcc684678af6 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		return false;
 
 	skb->dev = vlan_dev;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly. */
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					vlan_dev->dev_addr))
+			skb->pkt_type = PACKET_HOST;
+	}
+
+	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+		unsigned int offset = skb->data - skb_mac_header(skb);
+
+		/*
+		 * vlan_insert_tag expect skb->data pointing to mac header.
+		 * So change skb->data before calling it and change back to
+		 * original position later
+		 */
+		skb_push(skb, offset);
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		if (!skb)
+			return false;
+		skb_pull(skb, offset + VLAN_HLEN);
+		skb_reset_mac_len(skb);
+	}
+
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
@@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST:
-		break;
-	case PACKET_MULTICAST:
+	if (skb->pkt_type == PACKET_MULTICAST)
 		rx_stats->rx_multicast++;
-		break;
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly. */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					vlan_dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	}
 	u64_stats_update_end(&rx_stats->syncp);
 
 	return true;
@@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		return NULL;
+	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	skb->mac_header += VLAN_HLEN;
+	skb_reset_mac_len(skb);
 	return skb;
 }
 
@@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a54c9f87ddbb..9c58c1ec41a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
+	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
 
-- 
cgit v1.2.3


From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 8 Jun 2011 21:13:01 -0400
Subject: Delay struct net freeing while there's a sysfs instance refering to
 it

	* new refcount in struct net, controlling actual freeing of the memory
	* new method in kobj_ns_type_operations (->drop_ns())
	* ->current_ns() semantics change - it's supposed to be followed by
corresponding ->drop_ns().  For struct net in case of CONFIG_NET_NS it bumps
the new refcount; net_drop_ns() decrements it and calls net_free() if the
last reference has been dropped.  Method renamed to ->grab_current_ns().
	* old net_free() callers call net_drop_ns() instead.
	* sysfs_exit_ns() is gone, along with a large part of callchain
leading to it; now that the references stored in ->ns[...] stay valid we
do not need to hunt them down and replace them with NULL.  That fixes
problems in sysfs_lookup() and sysfs_readdir(), along with getting rid
of sb->s_instances abuse.

	Note that struct net *shutdown* logics has not changed - net_cleanup()
is called exactly when it used to be called.  The only thing postponed by
having a sysfs instance refering to that struct net is actual freeing of
memory occupied by struct net.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysfs/mount.c            | 37 +++++++++++--------------------------
 fs/sysfs/sysfs.h            |  2 +-
 include/linux/kobject_ns.h  | 10 ++++++----
 include/linux/sysfs.h       |  7 -------
 include/net/net_namespace.h | 10 +++++++++-
 lib/kobject.c               | 26 +++++++++-----------------
 net/core/net-sysfs.c        | 23 +++++++++--------------
 net/core/net_namespace.c    | 12 ++++++++++--
 8 files changed, 55 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 266895783b47..e34f0d99ea4e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
 	return error;
 }
 
+static void free_sysfs_super_info(struct sysfs_super_info *info)
+{
+	int type;
+	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
+		kobj_ns_drop(type, info->ns[type]);
+	kfree(info);
+}
+
 static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 		return ERR_PTR(-ENOMEM);
 
 	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
-		info->ns[type] = kobj_ns_current(type);
+		info->ns[type] = kobj_ns_grab_current(type);
 
 	sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
 	if (IS_ERR(sb) || sb->s_fs_info != info)
-		kfree(info);
+		free_sysfs_super_info(info);
 	if (IS_ERR(sb))
 		return ERR_CAST(sb);
 	if (!sb->s_root) {
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 static void sysfs_kill_sb(struct super_block *sb)
 {
 	struct sysfs_super_info *info = sysfs_info(sb);
-
 	/* Remove the superblock from fs_supers/s_instances
 	 * so we can't find it, before freeing sysfs_super_info.
 	 */
 	kill_anon_super(sb);
-	kfree(info);
+	free_sysfs_super_info(info);
 }
 
 static struct file_system_type sysfs_fs_type = {
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
 	.kill_sb	= sysfs_kill_sb,
 };
 
-void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
-{
-	struct super_block *sb;
-
-	mutex_lock(&sysfs_mutex);
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
-		struct sysfs_super_info *info = sysfs_info(sb);
-		/*
-		 * If we see a superblock on the fs_supers/s_instances
-		 * list the unmount has not completed and sb->s_fs_info
-		 * points to a valid struct sysfs_super_info.
-		 */
-		/* Ignore superblocks with the wrong ns */
-		if (info->ns[type] != ns)
-			continue;
-		info->ns[type] = NULL;
-	}
-	spin_unlock(&sb_lock);
-	mutex_unlock(&sysfs_mutex);
-}
-
 int __init sysfs_init(void)
 {
 	int err = -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3d28af31d863..2ed2404f3113 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
  * instance).
  */
 struct sysfs_super_info {
-	const void *ns[KOBJ_NS_TYPES];
+	void *ns[KOBJ_NS_TYPES];
 };
 #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
 extern struct sysfs_dirent sysfs_root;
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 82cb5bf461fb..f66b065a8b5f 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -32,15 +32,17 @@ enum kobj_ns_type {
 
 /*
  * Callbacks so sysfs can determine namespaces
- *   @current_ns: return calling task's namespace
+ *   @grab_current_ns: return a new reference to calling task's namespace
  *   @netlink_ns: return namespace to which a sock belongs (right?)
  *   @initial_ns: return the initial namespace (i.e. init_net_ns)
+ *   @drop_ns: drops a reference to namespace
  */
 struct kobj_ns_type_operations {
 	enum kobj_ns_type type;
-	const void *(*current_ns)(void);
+	void *(*grab_current_ns)(void);
 	const void *(*netlink_ns)(struct sock *sk);
 	const void *(*initial_ns)(void);
+	void (*drop_ns)(void *);
 };
 
 int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
@@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
 
-const void *kobj_ns_current(enum kobj_ns_type type);
+void *kobj_ns_grab_current(enum kobj_ns_type type);
 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
 const void *kobj_ns_initial(enum kobj_ns_type type);
-void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
+void kobj_ns_drop(enum kobj_ns_type type, void *ns);
 
 #endif /* _LINUX_KOBJECT_NS_H */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c3acda60eee0..e2696d76a599 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
 
-/* Called to clear a ns tag when it is no longer valid */
-void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
-
 int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
@@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd)
 {
 }
 
-static inline void sysfs_exit_ns(int type, const void *tag)
-{
-}
-
 static inline int __must_check sysfs_init(void)
 {
 	return 0;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2bf9ed9ef26b..aef430d779bd 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -35,8 +35,11 @@ struct netns_ipvs;
 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 
 struct net {
+	atomic_t		passive;	/* To decided when the network
+						 * namespace should be freed.
+						 */
 	atomic_t		count;		/* To decided when the network
-						 *  namespace should be freed.
+						 *  namespace should be shut down.
 						 */
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_t		use_count;	/* To track references we
@@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2)
 {
 	return net1 == net2;
 }
+
+extern void net_drop_ns(void *);
+
 #else
 
 static inline struct net *get_net(struct net *net)
@@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2)
 {
 	return 1;
 }
+
+#define net_drop_ns NULL
 #endif
 
 
diff --git a/lib/kobject.c b/lib/kobject.c
index 82dc34c095c2..640bd98a4c8a 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
 }
 
 
-const void *kobj_ns_current(enum kobj_ns_type type)
+void *kobj_ns_grab_current(enum kobj_ns_type type)
 {
-	const void *ns = NULL;
+	void *ns = NULL;
 
 	spin_lock(&kobj_ns_type_lock);
 	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
 	    kobj_ns_ops_tbl[type])
-		ns = kobj_ns_ops_tbl[type]->current_ns();
+		ns = kobj_ns_ops_tbl[type]->grab_current_ns();
 	spin_unlock(&kobj_ns_type_lock);
 
 	return ns;
@@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 	return ns;
 }
 
-/*
- * kobj_ns_exit - invalidate a namespace tag
- *
- * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
- * @ns: the actual namespace being invalidated
- *
- * This is called when a tag is no longer valid.  For instance,
- * when a network namespace exits, it uses this helper to
- * make sure no sb's sysfs_info points to the now-invalidated
- * netns.
- */
-void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+void kobj_ns_drop(enum kobj_ns_type type, void *ns)
 {
-	sysfs_exit_ns(type, ns);
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns)
+		kobj_ns_ops_tbl[type]->drop_ns(ns);
+	spin_unlock(&kobj_ns_type_lock);
 }
 
-
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 11b98bc2aa8f..33d2a1fba131 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net)
 #endif
 }
 
-static const void *net_current_ns(void)
+static void *net_grab_current_ns(void)
 {
-	return current->nsproxy->net_ns;
+	struct net *ns = current->nsproxy->net_ns;
+#ifdef CONFIG_NET_NS
+	if (ns)
+		atomic_inc(&ns->passive);
+#endif
+	return ns;
 }
 
 static const void *net_initial_ns(void)
@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk)
 
 struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
-	.current_ns = net_current_ns,
+	.grab_current_ns = net_grab_current_ns,
 	.netlink_ns = net_netlink_ns,
 	.initial_ns = net_initial_ns,
+	.drop_ns = net_drop_ns,
 };
 EXPORT_SYMBOL_GPL(net_ns_type_operations);
 
-static void net_kobj_ns_exit(struct net *net)
-{
-	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
-}
-
-static struct pernet_operations kobj_net_ops = {
-	.exit = net_kobj_ns_exit,
-};
-
-
 #ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 {
@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file);
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
-	register_pernet_subsys(&kobj_net_ops);
 	return class_register(&net_class);
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6c6b86d0da15..cdcbc3cb00a9 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net)
 	LIST_HEAD(net_exit_list);
 
 	atomic_set(&net->count, 1);
+	atomic_set(&net->passive, 1);
 
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
@@ -210,6 +211,13 @@ static void net_free(struct net *net)
 	kmem_cache_free(net_cachep, net);
 }
 
+void net_drop_ns(void *p)
+{
+	struct net *ns = p;
+	if (ns && atomic_dec_and_test(&ns->passive))
+		net_free(ns);
+}
+
 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 {
 	struct net *net;
@@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	}
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
-		net_free(net);
+		net_drop_ns(net);
 		return ERR_PTR(rv);
 	}
 	return net;
@@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work)
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 		list_del_init(&net->exit_list);
-		net_free(net);
+		net_drop_ns(net);
 	}
 }
 static DECLARE_WORK(net_cleanup_work, cleanup_net);
-- 
cgit v1.2.3


From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 14 Jun 2011 13:26:25 +0800
Subject: rcu: Use softirq to address performance regression

Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread)
introduced performance regression. In an AIM7 test, this commit degraded
performance by about 40%.

The commit runs rcu callbacks in a kthread instead of softirq. We observed
high rate of context switch which is caused by this. Out test system has
64 CPUs and HZ is 1000, so we saw more than 64k context switch per second
which is caused by RCU's per-CPU kthread.  A trace showed that most of
the time the RCU per-CPU kthread doesn't actually handle any callbacks,
but instead just does a very small amount of work handling grace periods.
This means that RCU's per-CPU kthreads are making the scheduler do quite
a bit of work in order to allow a very small amount of RCU-related
processing to be done.

Alex Shi's analysis determined that this slowdown is due to lock
contention within the scheduler.  Unfortunately, as Peter Zijlstra points
out, the scheduler's real-time semantics require global action, which
means that this contention is inherent in real-time scheduling.  (Yes,
perhaps someone will come up with a workaround -- otherwise, -rt is not
going to do well on large SMP systems -- but this patch will work around
this issue in the meantime.  And "the meantime" might well be forever.)

This patch therefore re-introduces softirq processing to RCU, but only
for core RCU work.  RCU callbacks are still executed in kthread context,
so that only a small amount of RCU work runs in softirq context in the
common case.  This should minimize ksoftirqd execution, allowing us to
skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Tested-by: "Alex,Shi" <alex.shi@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/filesystems/proc.txt  |  1 +
 include/linux/interrupt.h           |  1 +
 include/trace/events/irq.h          |  3 ++-
 kernel/rcutree.c                    | 23 +++++++++++++++++++----
 kernel/rcutree.h                    |  1 +
 kernel/rcutree_plugin.h             |  9 +++++++++
 kernel/softirq.c                    |  2 +-
 tools/perf/util/trace-event-parse.c |  1 +
 8 files changed, 35 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f48178024067..db3b1aba32a3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
  TASKLET:          0          0          0        290
    SCHED:      27035      26983      26971      26746
  HRTIMER:          0          0          0          0
+     RCU:       1678       1769       2178       2250
 
 
 1.3 IDE devices in /proc/ide
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6c12989839d9..f6efed0039ed 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,6 +414,7 @@ enum
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
 	HRTIMER_SOFTIRQ,
+	RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
 };
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index ae045ca7d356..1c09820df585 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,7 +20,8 @@ struct softirq_action;
 			 softirq_name(BLOCK_IOPOLL),	\
 			 softirq_name(TASKLET),		\
 			 softirq_name(SCHED),		\
-			 softirq_name(HRTIMER))
+			 softirq_name(HRTIMER),		\
+			 softirq_name(RCU))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0a8ec5b2e208..ae5c9ea68662 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable;
 
 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_cpu_kthread(void);
+static void __invoke_rcu_cpu_kthread(void);
 
 #define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
 
@@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 
 	/* If there are callbacks ready, invoke them. */
-	rcu_do_batch(rsp, rdp);
+	if (cpu_has_callbacks_ready_to_invoke(rdp))
+		__invoke_rcu_cpu_kthread();
+}
+
+static void rcu_kthread_do_work(void)
+{
+	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
+	rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+	rcu_preempt_do_callbacks();
 }
 
 /*
  * Do softirq processing for the current CPU.
  */
-static void rcu_process_callbacks(void)
+static void rcu_process_callbacks(struct softirq_action *unused)
 {
 	__rcu_process_callbacks(&rcu_sched_state,
 				&__get_cpu_var(rcu_sched_data));
@@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void)
  * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
  * cannot disappear out from under us.
  */
-static void invoke_rcu_cpu_kthread(void)
+static void __invoke_rcu_cpu_kthread(void)
 {
 	unsigned long flags;
 
@@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void)
 	local_irq_restore(flags);
 }
 
+static void invoke_rcu_cpu_kthread(void)
+{
+	raise_softirq(RCU_SOFTIRQ);
+}
+
 /*
  * Wake up the specified per-rcu_node-structure kthread.
  * Because the per-rcu_node kthreads are immortal, we don't need
@@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg)
 		*workp = 0;
 		local_irq_restore(flags);
 		if (work)
-			rcu_process_callbacks();
+			rcu_kthread_do_work();
 		local_bh_enable();
 		if (*workp != 0)
 			spincnt++;
@@ -2387,6 +2401,7 @@ void __init rcu_init(void)
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
+	 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7b9a08b4aaea..0fed6b934d2a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 static void rcu_preempt_check_callbacks(int cpu);
 static void rcu_preempt_process_callbacks(void);
+static void rcu_preempt_do_callbacks(void);
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ea2e2fb79e81..38d09c5f2b41 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void)
 				&__get_cpu_var(rcu_preempt_data));
 }
 
+static void rcu_preempt_do_callbacks(void)
+{
+	rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
+}
+
 /*
  * Queue a preemptible-RCU callback for invocation after a grace period.
  */
@@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void)
 {
 }
 
+static void rcu_preempt_do_callbacks(void)
+{
+}
+
 /*
  * Wait for an rcu-preempt grace period, but make it happen quickly.
  * But because preemptible RCU does not exist, map to rcu-sched.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 13960170cad4..40cf63ddd4b3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
-	"TASKLET", "SCHED", "HRTIMER"
+	"TASKLET", "SCHED", "HRTIMER", "RCU"
 };
 
 /*
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1e88485c16a0..0a7ed5b5e281 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,6 +2187,7 @@ static const struct flag flags[] = {
 	{ "TASKLET_SOFTIRQ", 6 },
 	{ "SCHED_SOFTIRQ", 7 },
 	{ "HRTIMER_SOFTIRQ", 8 },
+	{ "RCU_SOFTIRQ", 9 },
 
 	{ "HRTIMER_NORESTART", 0 },
 	{ "HRTIMER_RESTART", 1 },
-- 
cgit v1.2.3


From a59ec1e7ff98cc4365d5b1bff4e7102e86b5716b Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 15 Jun 2011 15:08:11 -0700
Subject: backlight: new driver for the ADP8870 backlight devices

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../testing/sysfs-class-backlight-driver-adp8870   |   56 ++
 drivers/video/backlight/Kconfig                    |   12 +
 drivers/video/backlight/Makefile                   |    1 +
 drivers/video/backlight/adp8870_bl.c               | 1011 ++++++++++++++++++++
 include/linux/i2c/adp8870.h                        |  153 +++
 5 files changed, 1233 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
 create mode 100644 drivers/video/backlight/adp8870_bl.c
 create mode 100644 include/linux/i2c/adp8870.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
new file mode 100644
index 000000000000..aa11dbdd794b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
@@ -0,0 +1,56 @@
+What:		/sys/class/backlight/<backlight>/<ambient light zone>_max
+What:		/sys/class/backlight/<backlight>/l1_daylight_max
+What:		/sys/class/backlight/<backlight>/l2_bright_max
+What:		/sys/class/backlight/<backlight>/l3_office_max
+What:		/sys/class/backlight/<backlight>/l4_indoor_max
+What:		/sys/class/backlight/<backlight>/l5_dark_max
+Date:		Mai 2011
+KernelVersion:	2.6.40
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Control the maximum brightness for <ambient light zone>
+		on this <backlight>. Values are between 0 and 127. This file
+		will also show the brightness level stored for this
+		<ambient light zone>.
+
+What:		/sys/class/backlight/<backlight>/<ambient light zone>_dim
+What:		/sys/class/backlight/<backlight>/l2_bright_dim
+What:		/sys/class/backlight/<backlight>/l3_office_dim
+What:		/sys/class/backlight/<backlight>/l4_indoor_dim
+What:		/sys/class/backlight/<backlight>/l5_dark_dim
+Date:		Mai 2011
+KernelVersion:	2.6.40
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Control the dim brightness for <ambient light zone>
+		on this <backlight>. Values are between 0 and 127, typically
+		set to 0. Full off when the backlight is disabled.
+		This file will also show the dim brightness level stored for
+		this <ambient light zone>.
+
+What:		/sys/class/backlight/<backlight>/ambient_light_level
+Date:		Mai 2011
+KernelVersion:	2.6.40
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Get conversion value of the light sensor.
+		This value is updated every 80 ms (when the light sensor
+		is enabled). Returns integer between 0 (dark) and
+		8000 (max ambient brightness)
+
+What:		/sys/class/backlight/<backlight>/ambient_light_zone
+Date:		Mai 2011
+KernelVersion:	2.6.40
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Get/Set current ambient light zone. Reading returns
+		integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
+		Writing a value between 1..5 forces the backlight controller
+		to enter the corresponding ambient light zone.
+		Writing 0 returns to normal/automatic ambient light level
+		operation. The ambient light sensing feature on these devices
+		is an extension to the API documented in
+		Documentation/ABI/stable/sysfs-class-backlight.
+		It can be enabled by writing the value stored in
+		/sys/class/backlight/<backlight>/max_brightness to
+		/sys/class/backlight/<backlight>/brightness.
\ No newline at end of file
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 0c9373bedd1f..2d93c8d61ad5 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -302,6 +302,18 @@ config BACKLIGHT_ADP8860
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp8860_bl.
 
+config BACKLIGHT_ADP8870
+	tristate "Backlight Driver for ADP8870 using WLED"
+	depends on BACKLIGHT_CLASS_DEVICE && I2C
+	select NEW_LEDS
+	select LEDS_CLASS
+	help
+	  If you have a LCD backlight connected to the ADP8870,
+	  say Y here to enable this driver.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called adp8870_bl.
+
 config BACKLIGHT_88PM860X
 	tristate "Backlight Driver for 88PM8606 using WLED"
 	depends on MFD_88PM860X
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index b9ca8490df87..ee72adb8786e 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_BACKLIGHT_WM831X)	+= wm831x_bl.o
 obj-$(CONFIG_BACKLIGHT_ADX)    += adx_bl.o
 obj-$(CONFIG_BACKLIGHT_ADP5520)	+= adp5520_bl.o
 obj-$(CONFIG_BACKLIGHT_ADP8860)	+= adp8860_bl.o
+obj-$(CONFIG_BACKLIGHT_ADP8870)	+= adp8870_bl.o
 obj-$(CONFIG_BACKLIGHT_88PM860X) += 88pm860x_bl.o
 obj-$(CONFIG_BACKLIGHT_PCF50633)	+= pcf50633-backlight.o
 
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
new file mode 100644
index 000000000000..e320b62576e2
--- /dev/null
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -0,0 +1,1011 @@
+/*
+ * Backlight driver for Analog Devices ADP8870 Backlight Devices
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/fb.h>
+#include <linux/backlight.h>
+#include <linux/leds.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+
+#include <linux/i2c/adp8870.h>
+#define ADP8870_EXT_FEATURES
+#define ADP8870_USE_LEDS
+
+
+#define ADP8870_MFDVID	0x00  /* Manufacturer and device ID */
+#define ADP8870_MDCR	0x01  /* Device mode and status */
+#define ADP8870_INT_STAT 0x02  /* Interrupts status */
+#define ADP8870_INT_EN	0x03  /* Interrupts enable */
+#define ADP8870_CFGR	0x04  /* Configuration register */
+#define ADP8870_BLSEL	0x05  /* Sink enable backlight or independent */
+#define ADP8870_PWMLED	0x06  /* PWM Enable Selection Register */
+#define ADP8870_BLOFF	0x07  /* Backlight off timeout */
+#define ADP8870_BLDIM	0x08  /* Backlight dim timeout */
+#define ADP8870_BLFR	0x09  /* Backlight fade in and out rates */
+#define ADP8870_BLMX1	0x0A  /* Backlight (Brightness Level 1-daylight) maximum current */
+#define ADP8870_BLDM1	0x0B  /* Backlight (Brightness Level 1-daylight) dim current */
+#define ADP8870_BLMX2	0x0C  /* Backlight (Brightness Level 2-bright) maximum current */
+#define ADP8870_BLDM2	0x0D  /* Backlight (Brightness Level 2-bright) dim current */
+#define ADP8870_BLMX3	0x0E  /* Backlight (Brightness Level 3-office) maximum current */
+#define ADP8870_BLDM3	0x0F  /* Backlight (Brightness Level 3-office) dim current */
+#define ADP8870_BLMX4	0x10  /* Backlight (Brightness Level 4-indoor) maximum current */
+#define ADP8870_BLDM4	0x11  /* Backlight (Brightness Level 4-indoor) dim current */
+#define ADP8870_BLMX5	0x12  /* Backlight (Brightness Level 5-dark) maximum current */
+#define ADP8870_BLDM5	0x13  /* Backlight (Brightness Level 5-dark) dim current */
+#define ADP8870_ISCLAW	0x1A  /* Independent sink current fade law register */
+#define ADP8870_ISCC	0x1B  /* Independent sink current control register */
+#define ADP8870_ISCT1	0x1C  /* Independent Sink Current Timer Register LED[7:5] */
+#define ADP8870_ISCT2	0x1D  /* Independent Sink Current Timer Register LED[4:1] */
+#define ADP8870_ISCF	0x1E  /* Independent sink current fade register */
+#define ADP8870_ISC1	0x1F  /* Independent Sink Current LED1 */
+#define ADP8870_ISC2	0x20  /* Independent Sink Current LED2 */
+#define ADP8870_ISC3	0x21  /* Independent Sink Current LED3 */
+#define ADP8870_ISC4	0x22  /* Independent Sink Current LED4 */
+#define ADP8870_ISC5	0x23  /* Independent Sink Current LED5 */
+#define ADP8870_ISC6	0x24  /* Independent Sink Current LED6 */
+#define ADP8870_ISC7	0x25  /* Independent Sink Current LED7 (Brightness Level 1-daylight) */
+#define ADP8870_ISC7_L2	0x26  /* Independent Sink Current LED7 (Brightness Level 2-bright) */
+#define ADP8870_ISC7_L3	0x27  /* Independent Sink Current LED7 (Brightness Level 3-office) */
+#define ADP8870_ISC7_L4	0x28  /* Independent Sink Current LED7 (Brightness Level 4-indoor) */
+#define ADP8870_ISC7_L5	0x29  /* Independent Sink Current LED7 (Brightness Level 5-dark) */
+#define ADP8870_CMP_CTL	0x2D  /* ALS Comparator Control Register */
+#define ADP8870_ALS1_EN	0x2E  /* Main ALS comparator level enable */
+#define ADP8870_ALS2_EN	0x2F  /* Second ALS comparator level enable */
+#define ADP8870_ALS1_STAT 0x30  /* Main ALS Comparator Status Register */
+#define ADP8870_ALS2_STAT 0x31  /* Second ALS Comparator Status Register */
+#define ADP8870_L2TRP	0x32  /* L2 comparator reference */
+#define ADP8870_L2HYS	0x33  /* L2 hysteresis */
+#define ADP8870_L3TRP	0x34  /* L3 comparator reference */
+#define ADP8870_L3HYS	0x35  /* L3 hysteresis */
+#define ADP8870_L4TRP	0x36  /* L4 comparator reference */
+#define ADP8870_L4HYS	0x37  /* L4 hysteresis */
+#define ADP8870_L5TRP	0x38  /* L5 comparator reference */
+#define ADP8870_L5HYS	0x39  /* L5 hysteresis */
+#define ADP8870_PH1LEVL	0x40  /* First phototransistor ambient light level-low byte register */
+#define ADP8870_PH1LEVH	0x41  /* First phototransistor ambient light level-high byte register */
+#define ADP8870_PH2LEVL	0x42  /* Second phototransistor ambient light level-low byte register */
+#define ADP8870_PH2LEVH	0x43  /* Second phototransistor ambient light level-high byte register */
+
+#define ADP8870_MANUFID		0x3  /* Analog Devices AD8870 Manufacturer and device ID */
+#define ADP8870_DEVID(x)	((x) & 0xF)
+#define ADP8870_MANID(x)	((x) >> 4)
+
+/* MDCR Device mode and status */
+#define D7ALSEN			(1 << 7)
+#define INT_CFG			(1 << 6)
+#define NSTBY			(1 << 5)
+#define DIM_EN			(1 << 4)
+#define GDWN_DIS		(1 << 3)
+#define SIS_EN			(1 << 2)
+#define CMP_AUTOEN		(1 << 1)
+#define BLEN			(1 << 0)
+
+/* ADP8870_ALS1_EN Main ALS comparator level enable */
+#define L5_EN			(1 << 3)
+#define L4_EN			(1 << 2)
+#define L3_EN			(1 << 1)
+#define L2_EN			(1 << 0)
+
+#define CFGR_BLV_SHIFT		3
+#define CFGR_BLV_MASK		0x7
+#define ADP8870_FLAG_LED_MASK	0xFF
+
+#define FADE_VAL(in, out)	((0xF & (in)) | ((0xF & (out)) << 4))
+#define BL_CFGR_VAL(law, blv)	((((blv) & CFGR_BLV_MASK) << CFGR_BLV_SHIFT) | ((0x3 & (law)) << 1))
+#define ALS_CMPR_CFG_VAL(filt)	((0x7 & (filt)) << 1)
+
+struct adp8870_bl {
+	struct i2c_client *client;
+	struct backlight_device *bl;
+	struct adp8870_led *led;
+	struct adp8870_backlight_platform_data *pdata;
+	struct mutex lock;
+	unsigned long cached_daylight_max;
+	int id;
+	int revid;
+	int current_brightness;
+};
+
+struct adp8870_led {
+	struct led_classdev	cdev;
+	struct work_struct	work;
+	struct i2c_client	*client;
+	enum led_brightness	new_brightness;
+	int			id;
+	int			flags;
+};
+
+static int adp8870_read(struct i2c_client *client, int reg, uint8_t *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading at 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = ret;
+	return 0;
+}
+
+
+static int adp8870_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+	if (ret)
+		dev_err(&client->dev, "failed to write\n");
+
+	return ret;
+}
+
+static int adp8870_set_bits(struct i2c_client *client, int reg, uint8_t bit_mask)
+{
+	struct adp8870_bl *data = i2c_get_clientdata(client);
+	uint8_t reg_val;
+	int ret;
+
+	mutex_lock(&data->lock);
+
+	ret = adp8870_read(client, reg, &reg_val);
+
+	if (!ret && ((reg_val & bit_mask) == 0)) {
+		reg_val |= bit_mask;
+		ret = adp8870_write(client, reg, reg_val);
+	}
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int adp8870_clr_bits(struct i2c_client *client, int reg, uint8_t bit_mask)
+{
+	struct adp8870_bl *data = i2c_get_clientdata(client);
+	uint8_t reg_val;
+	int ret;
+
+	mutex_lock(&data->lock);
+
+	ret = adp8870_read(client, reg, &reg_val);
+
+	if (!ret && (reg_val & bit_mask)) {
+		reg_val &= ~bit_mask;
+		ret = adp8870_write(client, reg, reg_val);
+	}
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/*
+ * Independent sink / LED
+ */
+#if defined(ADP8870_USE_LEDS)
+static void adp8870_led_work(struct work_struct *work)
+{
+	struct adp8870_led *led = container_of(work, struct adp8870_led, work);
+	adp8870_write(led->client, ADP8870_ISC1 + led->id - 1,
+			 led->new_brightness >> 1);
+}
+
+static void adp8870_led_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct adp8870_led *led;
+
+	led = container_of(led_cdev, struct adp8870_led, cdev);
+	led->new_brightness = value;
+	/*
+	 * Use workqueue for IO since I2C operations can sleep.
+	 */
+	schedule_work(&led->work);
+}
+
+static int adp8870_led_setup(struct adp8870_led *led)
+{
+	struct i2c_client *client = led->client;
+	int ret = 0;
+
+	ret = adp8870_write(client, ADP8870_ISC1 + led->id - 1, 0);
+	if (ret)
+		return ret;
+
+	ret = adp8870_set_bits(client, ADP8870_ISCC, 1 << (led->id - 1));
+	if (ret)
+		return ret;
+
+	if (led->id > 4)
+		ret = adp8870_set_bits(client, ADP8870_ISCT1,
+				(led->flags & 0x3) << ((led->id - 5) * 2));
+	else
+		ret = adp8870_set_bits(client, ADP8870_ISCT2,
+				(led->flags & 0x3) << ((led->id - 1) * 2));
+
+	return ret;
+}
+
+static int __devinit adp8870_led_probe(struct i2c_client *client)
+{
+	struct adp8870_backlight_platform_data *pdata =
+		client->dev.platform_data;
+	struct adp8870_bl *data = i2c_get_clientdata(client);
+	struct adp8870_led *led, *led_dat;
+	struct led_info *cur_led;
+	int ret, i;
+
+
+	led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL);
+	if (led == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law);
+	if (ret)
+		goto err_free;
+
+	ret = adp8870_write(client, ADP8870_ISCT1,
+			(pdata->led_on_time & 0x3) << 6);
+	if (ret)
+		goto err_free;
+
+	ret = adp8870_write(client, ADP8870_ISCF,
+			FADE_VAL(pdata->led_fade_in, pdata->led_fade_out));
+	if (ret)
+		goto err_free;
+
+	for (i = 0; i < pdata->num_leds; ++i) {
+		cur_led = &pdata->leds[i];
+		led_dat = &led[i];
+
+		led_dat->id = cur_led->flags & ADP8870_FLAG_LED_MASK;
+
+		if (led_dat->id > 7 || led_dat->id < 1) {
+			dev_err(&client->dev, "Invalid LED ID %d\n",
+				led_dat->id);
+			goto err;
+		}
+
+		if (pdata->bl_led_assign & (1 << (led_dat->id - 1))) {
+			dev_err(&client->dev, "LED %d used by Backlight\n",
+				led_dat->id);
+			goto err;
+		}
+
+		led_dat->cdev.name = cur_led->name;
+		led_dat->cdev.default_trigger = cur_led->default_trigger;
+		led_dat->cdev.brightness_set = adp8870_led_set;
+		led_dat->cdev.brightness = LED_OFF;
+		led_dat->flags = cur_led->flags >> FLAG_OFFT_SHIFT;
+		led_dat->client = client;
+		led_dat->new_brightness = LED_OFF;
+		INIT_WORK(&led_dat->work, adp8870_led_work);
+
+		ret = led_classdev_register(&client->dev, &led_dat->cdev);
+		if (ret) {
+			dev_err(&client->dev, "failed to register LED %d\n",
+				led_dat->id);
+			goto err;
+		}
+
+		ret = adp8870_led_setup(led_dat);
+		if (ret) {
+			dev_err(&client->dev, "failed to write\n");
+			i++;
+			goto err;
+		}
+	}
+
+	data->led = led;
+
+	return 0;
+
+ err:
+	for (i = i - 1; i >= 0; --i) {
+		led_classdev_unregister(&led[i].cdev);
+		cancel_work_sync(&led[i].work);
+	}
+
+ err_free:
+	kfree(led);
+
+	return ret;
+}
+
+static int __devexit adp8870_led_remove(struct i2c_client *client)
+{
+	struct adp8870_backlight_platform_data *pdata =
+		client->dev.platform_data;
+	struct adp8870_bl *data = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < pdata->num_leds; i++) {
+		led_classdev_unregister(&data->led[i].cdev);
+		cancel_work_sync(&data->led[i].work);
+	}
+
+	kfree(data->led);
+	return 0;
+}
+#else
+static int __devinit adp8870_led_probe(struct i2c_client *client)
+{
+	return 0;
+}
+
+static int __devexit adp8870_led_remove(struct i2c_client *client)
+{
+	return 0;
+}
+#endif
+
+static int adp8870_bl_set(struct backlight_device *bl, int brightness)
+{
+	struct adp8870_bl *data = bl_get_data(bl);
+	struct i2c_client *client = data->client;
+	int ret = 0;
+
+	if (data->pdata->en_ambl_sens) {
+		if ((brightness > 0) && (brightness < ADP8870_MAX_BRIGHTNESS)) {
+			/* Disable Ambient Light auto adjust */
+			ret = adp8870_clr_bits(client, ADP8870_MDCR,
+					CMP_AUTOEN);
+			if (ret)
+				return ret;
+			ret = adp8870_write(client, ADP8870_BLMX1, brightness);
+			if (ret)
+				return ret;
+		} else {
+			/*
+			 * MAX_BRIGHTNESS -> Enable Ambient Light auto adjust
+			 * restore daylight l1 sysfs brightness
+			 */
+			ret = adp8870_write(client, ADP8870_BLMX1,
+					 data->cached_daylight_max);
+			if (ret)
+				return ret;
+
+			ret = adp8870_set_bits(client, ADP8870_MDCR,
+					 CMP_AUTOEN);
+			if (ret)
+				return ret;
+		}
+	} else {
+		ret = adp8870_write(client, ADP8870_BLMX1, brightness);
+		if (ret)
+			return ret;
+	}
+
+	if (data->current_brightness && brightness == 0)
+		ret = adp8870_set_bits(client,
+				ADP8870_MDCR, DIM_EN);
+	else if (data->current_brightness == 0 && brightness)
+		ret = adp8870_clr_bits(client,
+				ADP8870_MDCR, DIM_EN);
+
+	if (!ret)
+		data->current_brightness = brightness;
+
+	return ret;
+}
+
+static int adp8870_bl_update_status(struct backlight_device *bl)
+{
+	int brightness = bl->props.brightness;
+	if (bl->props.power != FB_BLANK_UNBLANK)
+		brightness = 0;
+
+	if (bl->props.fb_blank != FB_BLANK_UNBLANK)
+		brightness = 0;
+
+	return adp8870_bl_set(bl, brightness);
+}
+
+static int adp8870_bl_get_brightness(struct backlight_device *bl)
+{
+	struct adp8870_bl *data = bl_get_data(bl);
+
+	return data->current_brightness;
+}
+
+static const struct backlight_ops adp8870_bl_ops = {
+	.update_status	= adp8870_bl_update_status,
+	.get_brightness	= adp8870_bl_get_brightness,
+};
+
+static int adp8870_bl_setup(struct backlight_device *bl)
+{
+	struct adp8870_bl *data = bl_get_data(bl);
+	struct i2c_client *client = data->client;
+	struct adp8870_backlight_platform_data *pdata = data->pdata;
+	int ret = 0;
+
+	ret = adp8870_write(client, ADP8870_BLSEL, ~pdata->bl_led_assign);
+	if (ret)
+		return ret;
+
+	ret = adp8870_write(client, ADP8870_PWMLED, pdata->pwm_assign);
+	if (ret)
+		return ret;
+
+	ret = adp8870_write(client, ADP8870_BLMX1, pdata->l1_daylight_max);
+	if (ret)
+		return ret;
+
+	ret = adp8870_write(client, ADP8870_BLDM1, pdata->l1_daylight_dim);
+	if (ret)
+		return ret;
+
+	if (pdata->en_ambl_sens) {
+		data->cached_daylight_max = pdata->l1_daylight_max;
+		ret = adp8870_write(client, ADP8870_BLMX2,
+						pdata->l2_bright_max);
+		if (ret)
+			return ret;
+		ret = adp8870_write(client, ADP8870_BLDM2,
+						pdata->l2_bright_dim);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_BLMX3,
+						pdata->l3_office_max);
+		if (ret)
+			return ret;
+		ret = adp8870_write(client, ADP8870_BLDM3,
+						pdata->l3_office_dim);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_BLMX4,
+						pdata->l4_indoor_max);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_BLDM4,
+						pdata->l4_indor_dim);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_BLMX5,
+						pdata->l5_dark_max);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_BLDM5,
+						pdata->l5_dark_dim);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L2TRP, pdata->l2_trip);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L2HYS, pdata->l2_hyst);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L3TRP, pdata->l3_trip);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L3HYS, pdata->l3_hyst);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L4TRP, pdata->l4_trip);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L4HYS, pdata->l4_hyst);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L5TRP, pdata->l5_trip);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_L5HYS, pdata->l5_hyst);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_ALS1_EN, L5_EN | L4_EN |
+						L3_EN | L2_EN);
+		if (ret)
+			return ret;
+
+		ret = adp8870_write(client, ADP8870_CMP_CTL,
+			ALS_CMPR_CFG_VAL(pdata->abml_filt));
+		if (ret)
+			return ret;
+	}
+
+	ret = adp8870_write(client, ADP8870_CFGR,
+			BL_CFGR_VAL(pdata->bl_fade_law, 0));
+	if (ret)
+		return ret;
+
+	ret = adp8870_write(client, ADP8870_BLFR, FADE_VAL(pdata->bl_fade_in,
+			pdata->bl_fade_out));
+	if (ret)
+		return ret;
+	/*
+	 * ADP8870 Rev0 requires GDWN_DIS bit set
+	 */
+
+	ret = adp8870_set_bits(client, ADP8870_MDCR, BLEN | DIM_EN | NSTBY |
+			(data->revid == 0 ? GDWN_DIS : 0));
+
+	return ret;
+}
+
+static ssize_t adp8870_show(struct device *dev, char *buf, int reg)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	int error;
+	uint8_t reg_val;
+
+	mutex_lock(&data->lock);
+	error = adp8870_read(data->client, reg, &reg_val);
+	mutex_unlock(&data->lock);
+
+	if (error < 0)
+		return error;
+
+	return sprintf(buf, "%u\n", reg_val);
+}
+
+static ssize_t adp8870_store(struct device *dev, const char *buf,
+			 size_t count, int reg)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&data->lock);
+	adp8870_write(data->client, reg, val);
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
+static ssize_t adp8870_bl_l5_dark_max_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLMX5);
+}
+
+static ssize_t adp8870_bl_l5_dark_max_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLMX5);
+}
+static DEVICE_ATTR(l5_dark_max, 0664, adp8870_bl_l5_dark_max_show,
+			adp8870_bl_l5_dark_max_store);
+
+
+static ssize_t adp8870_bl_l4_indoor_max_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLMX4);
+}
+
+static ssize_t adp8870_bl_l4_indoor_max_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLMX4);
+}
+static DEVICE_ATTR(l4_indoor_max, 0664, adp8870_bl_l4_indoor_max_show,
+			adp8870_bl_l4_indoor_max_store);
+
+
+static ssize_t adp8870_bl_l3_office_max_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLMX3);
+}
+
+static ssize_t adp8870_bl_l3_office_max_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLMX3);
+}
+
+static DEVICE_ATTR(l3_office_max, 0664, adp8870_bl_l3_office_max_show,
+			adp8870_bl_l3_office_max_store);
+
+static ssize_t adp8870_bl_l2_bright_max_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLMX2);
+}
+
+static ssize_t adp8870_bl_l2_bright_max_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLMX2);
+}
+static DEVICE_ATTR(l2_bright_max, 0664, adp8870_bl_l2_bright_max_show,
+			adp8870_bl_l2_bright_max_store);
+
+static ssize_t adp8870_bl_l1_daylight_max_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLMX1);
+}
+
+static ssize_t adp8870_bl_l1_daylight_max_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	int ret = strict_strtoul(buf, 10, &data->cached_daylight_max);
+	if (ret)
+		return ret;
+
+	return adp8870_store(dev, buf, count, ADP8870_BLMX1);
+}
+static DEVICE_ATTR(l1_daylight_max, 0664, adp8870_bl_l1_daylight_max_show,
+			adp8870_bl_l1_daylight_max_store);
+
+static ssize_t adp8870_bl_l5_dark_dim_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLDM5);
+}
+
+static ssize_t adp8870_bl_l5_dark_dim_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLDM5);
+}
+static DEVICE_ATTR(l5_dark_dim, 0664, adp8870_bl_l5_dark_dim_show,
+			adp8870_bl_l5_dark_dim_store);
+
+static ssize_t adp8870_bl_l4_indoor_dim_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLDM4);
+}
+
+static ssize_t adp8870_bl_l4_indoor_dim_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLDM4);
+}
+static DEVICE_ATTR(l4_indoor_dim, 0664, adp8870_bl_l4_indoor_dim_show,
+			adp8870_bl_l4_indoor_dim_store);
+
+
+static ssize_t adp8870_bl_l3_office_dim_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLDM3);
+}
+
+static ssize_t adp8870_bl_l3_office_dim_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLDM3);
+}
+static DEVICE_ATTR(l3_office_dim, 0664, adp8870_bl_l3_office_dim_show,
+			adp8870_bl_l3_office_dim_store);
+
+static ssize_t adp8870_bl_l2_bright_dim_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLDM2);
+}
+
+static ssize_t adp8870_bl_l2_bright_dim_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLDM2);
+}
+static DEVICE_ATTR(l2_bright_dim, 0664, adp8870_bl_l2_bright_dim_show,
+			adp8870_bl_l2_bright_dim_store);
+
+static ssize_t adp8870_bl_l1_daylight_dim_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return adp8870_show(dev, buf, ADP8870_BLDM1);
+}
+
+static ssize_t adp8870_bl_l1_daylight_dim_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	return adp8870_store(dev, buf, count, ADP8870_BLDM1);
+}
+static DEVICE_ATTR(l1_daylight_dim, 0664, adp8870_bl_l1_daylight_dim_show,
+			adp8870_bl_l1_daylight_dim_store);
+
+#ifdef ADP8870_EXT_FEATURES
+static ssize_t adp8870_bl_ambient_light_level_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	int error;
+	uint8_t reg_val;
+	uint16_t ret_val;
+
+	mutex_lock(&data->lock);
+	error = adp8870_read(data->client, ADP8870_PH1LEVL, &reg_val);
+	if (error < 0) {
+		mutex_unlock(&data->lock);
+		return error;
+	}
+	ret_val = reg_val;
+	error = adp8870_read(data->client, ADP8870_PH1LEVH, &reg_val);
+	mutex_unlock(&data->lock);
+
+	if (error < 0)
+		return error;
+
+	/* Return 13-bit conversion value for the first light sensor */
+	ret_val += (reg_val & 0x1F) << 8;
+
+	return sprintf(buf, "%u\n", ret_val);
+}
+static DEVICE_ATTR(ambient_light_level, 0444,
+		adp8870_bl_ambient_light_level_show, NULL);
+
+static ssize_t adp8870_bl_ambient_light_zone_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	int error;
+	uint8_t reg_val;
+
+	mutex_lock(&data->lock);
+	error = adp8870_read(data->client, ADP8870_CFGR, &reg_val);
+	mutex_unlock(&data->lock);
+
+	if (error < 0)
+		return error;
+
+	return sprintf(buf, "%u\n",
+		((reg_val >> CFGR_BLV_SHIFT) & CFGR_BLV_MASK) + 1);
+}
+
+static ssize_t adp8870_bl_ambient_light_zone_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct adp8870_bl *data = dev_get_drvdata(dev);
+	unsigned long val;
+	uint8_t reg_val;
+	int ret;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val == 0) {
+		/* Enable automatic ambient light sensing */
+		adp8870_set_bits(data->client, ADP8870_MDCR, CMP_AUTOEN);
+	} else if ((val > 0) && (val < 6)) {
+		/* Disable automatic ambient light sensing */
+		adp8870_clr_bits(data->client, ADP8870_MDCR, CMP_AUTOEN);
+
+		/* Set user supplied ambient light zone */
+		mutex_lock(&data->lock);
+		adp8870_read(data->client, ADP8870_CFGR, &reg_val);
+		reg_val &= ~(CFGR_BLV_MASK << CFGR_BLV_SHIFT);
+		reg_val |= (val - 1) << CFGR_BLV_SHIFT;
+		adp8870_write(data->client, ADP8870_CFGR, reg_val);
+		mutex_unlock(&data->lock);
+	}
+
+	return count;
+}
+static DEVICE_ATTR(ambient_light_zone, 0664,
+		adp8870_bl_ambient_light_zone_show,
+		adp8870_bl_ambient_light_zone_store);
+#endif
+
+static struct attribute *adp8870_bl_attributes[] = {
+	&dev_attr_l5_dark_max.attr,
+	&dev_attr_l5_dark_dim.attr,
+	&dev_attr_l4_indoor_max.attr,
+	&dev_attr_l4_indoor_dim.attr,
+	&dev_attr_l3_office_max.attr,
+	&dev_attr_l3_office_dim.attr,
+	&dev_attr_l2_bright_max.attr,
+	&dev_attr_l2_bright_dim.attr,
+	&dev_attr_l1_daylight_max.attr,
+	&dev_attr_l1_daylight_dim.attr,
+#ifdef ADP8870_EXT_FEATURES
+	&dev_attr_ambient_light_level.attr,
+	&dev_attr_ambient_light_zone.attr,
+#endif
+	NULL
+};
+
+static const struct attribute_group adp8870_bl_attr_group = {
+	.attrs = adp8870_bl_attributes,
+};
+
+static int __devinit adp8870_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct backlight_properties props;
+	struct backlight_device *bl;
+	struct adp8870_bl *data;
+	struct adp8870_backlight_platform_data *pdata =
+		client->dev.platform_data;
+	uint8_t reg_val;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	ret = adp8870_read(client, ADP8870_MFDVID, &reg_val);
+	if (ret < 0)
+		return -EIO;
+
+	if (ADP8870_MANID(reg_val) != ADP8870_MANUFID) {
+		dev_err(&client->dev, "failed to probe\n");
+		return -ENODEV;
+	}
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	data->revid = ADP8870_DEVID(reg_val);
+	data->client = client;
+	data->pdata = pdata;
+	data->id = id->driver_data;
+	data->current_brightness = 0;
+	i2c_set_clientdata(client, data);
+
+	mutex_init(&data->lock);
+
+	memset(&props, 0, sizeof(props));
+	props.max_brightness = props.brightness = ADP8870_MAX_BRIGHTNESS;
+	bl = backlight_device_register(dev_driver_string(&client->dev),
+			&client->dev, data, &adp8870_bl_ops, &props);
+	if (IS_ERR(bl)) {
+		dev_err(&client->dev, "failed to register backlight\n");
+		ret = PTR_ERR(bl);
+		goto out2;
+	}
+
+	data->bl = bl;
+
+	if (pdata->en_ambl_sens)
+		ret = sysfs_create_group(&bl->dev.kobj,
+			&adp8870_bl_attr_group);
+
+	if (ret) {
+		dev_err(&client->dev, "failed to register sysfs\n");
+		goto out1;
+	}
+
+	ret = adp8870_bl_setup(bl);
+	if (ret) {
+		ret = -EIO;
+		goto out;
+	}
+
+	backlight_update_status(bl);
+
+	dev_info(&client->dev, "Rev.%d Backlight\n", data->revid);
+
+	if (pdata->num_leds)
+		adp8870_led_probe(client);
+
+	return 0;
+
+out:
+	if (data->pdata->en_ambl_sens)
+		sysfs_remove_group(&data->bl->dev.kobj,
+			&adp8870_bl_attr_group);
+out1:
+	backlight_device_unregister(bl);
+out2:
+	i2c_set_clientdata(client, NULL);
+	kfree(data);
+
+	return ret;
+}
+
+static int __devexit adp8870_remove(struct i2c_client *client)
+{
+	struct adp8870_bl *data = i2c_get_clientdata(client);
+
+	adp8870_clr_bits(client, ADP8870_MDCR, NSTBY);
+
+	if (data->led)
+		adp8870_led_remove(client);
+
+	if (data->pdata->en_ambl_sens)
+		sysfs_remove_group(&data->bl->dev.kobj,
+			&adp8870_bl_attr_group);
+
+	backlight_device_unregister(data->bl);
+	i2c_set_clientdata(client, NULL);
+	kfree(data);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp8870_i2c_suspend(struct i2c_client *client, pm_message_t message)
+{
+	adp8870_clr_bits(client, ADP8870_MDCR, NSTBY);
+
+	return 0;
+}
+
+static int adp8870_i2c_resume(struct i2c_client *client)
+{
+	adp8870_set_bits(client, ADP8870_MDCR, NSTBY);
+
+	return 0;
+}
+#else
+#define adp8870_i2c_suspend NULL
+#define adp8870_i2c_resume NULL
+#endif
+
+static const struct i2c_device_id adp8870_id[] = {
+	{ "adp8870", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adp8870_id);
+
+static struct i2c_driver adp8870_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.probe    = adp8870_probe,
+	.remove   = __devexit_p(adp8870_remove),
+	.suspend = adp8870_i2c_suspend,
+	.resume  = adp8870_i2c_resume,
+	.id_table = adp8870_id,
+};
+
+static int __init adp8870_init(void)
+{
+	return i2c_add_driver(&adp8870_driver);
+}
+module_init(adp8870_init);
+
+static void __exit adp8870_exit(void)
+{
+	i2c_del_driver(&adp8870_driver);
+}
+module_exit(adp8870_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP8870 Backlight driver");
+MODULE_ALIAS("platform:adp8870-backlight");
diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h
new file mode 100644
index 000000000000..624dceccbd5b
--- /dev/null
+++ b/include/linux/i2c/adp8870.h
@@ -0,0 +1,153 @@
+/*
+ * Definitions and platform data for Analog Devices
+ * Backlight drivers ADP8870
+ *
+ * Copyright 2009-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_I2C_ADP8870_H
+#define __LINUX_I2C_ADP8870_H
+
+#define ID_ADP8870		8870
+
+#define ADP8870_MAX_BRIGHTNESS	0x7F
+#define FLAG_OFFT_SHIFT 8
+
+/*
+ * LEDs subdevice platform data
+ */
+
+#define ADP8870_LED_DIS_BLINK	(0 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_600ms	(1 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_1200ms	(2 << FLAG_OFFT_SHIFT)
+#define ADP8870_LED_OFFT_1800ms	(3 << FLAG_OFFT_SHIFT)
+
+#define ADP8870_LED_ONT_200ms	0
+#define ADP8870_LED_ONT_600ms	1
+#define ADP8870_LED_ONT_800ms	2
+#define ADP8870_LED_ONT_1200ms	3
+
+#define ADP8870_LED_D7		(7)
+#define ADP8870_LED_D6		(6)
+#define ADP8870_LED_D5		(5)
+#define ADP8870_LED_D4		(4)
+#define ADP8870_LED_D3		(3)
+#define ADP8870_LED_D2		(2)
+#define ADP8870_LED_D1		(1)
+
+/*
+ * Backlight subdevice platform data
+ */
+
+#define ADP8870_BL_D7		(1 << 6)
+#define ADP8870_BL_D6		(1 << 5)
+#define ADP8870_BL_D5		(1 << 4)
+#define ADP8870_BL_D4		(1 << 3)
+#define ADP8870_BL_D3		(1 << 2)
+#define ADP8870_BL_D2		(1 << 1)
+#define ADP8870_BL_D1		(1 << 0)
+
+#define ADP8870_FADE_T_DIS	0	/* Fade Timer Disabled */
+#define ADP8870_FADE_T_300ms	1	/* 0.3 Sec */
+#define ADP8870_FADE_T_600ms	2
+#define ADP8870_FADE_T_900ms	3
+#define ADP8870_FADE_T_1200ms	4
+#define ADP8870_FADE_T_1500ms	5
+#define ADP8870_FADE_T_1800ms	6
+#define ADP8870_FADE_T_2100ms	7
+#define ADP8870_FADE_T_2400ms	8
+#define ADP8870_FADE_T_2700ms	9
+#define ADP8870_FADE_T_3000ms	10
+#define ADP8870_FADE_T_3500ms	11
+#define ADP8870_FADE_T_4000ms	12
+#define ADP8870_FADE_T_4500ms	13
+#define ADP8870_FADE_T_5000ms	14
+#define ADP8870_FADE_T_5500ms	15	/* 5.5 Sec */
+
+#define ADP8870_FADE_LAW_LINEAR	0
+#define ADP8870_FADE_LAW_SQUARE	1
+#define ADP8870_FADE_LAW_CUBIC1	2
+#define ADP8870_FADE_LAW_CUBIC2	3
+
+#define ADP8870_BL_AMBL_FILT_80ms	0	/* Light sensor filter time */
+#define ADP8870_BL_AMBL_FILT_160ms	1
+#define ADP8870_BL_AMBL_FILT_320ms	2
+#define ADP8870_BL_AMBL_FILT_640ms	3
+#define ADP8870_BL_AMBL_FILT_1280ms	4
+#define ADP8870_BL_AMBL_FILT_2560ms	5
+#define ADP8870_BL_AMBL_FILT_5120ms	6
+#define ADP8870_BL_AMBL_FILT_10240ms	7	/* 10.24 sec */
+
+/*
+ * Blacklight current 0..30mA
+ */
+#define ADP8870_BL_CUR_mA(I)		((I * 127) / 30)
+
+/*
+ * L2 comparator current 0..1106uA
+ */
+#define ADP8870_L2_COMP_CURR_uA(I)	((I * 255) / 1106)
+
+/*
+ * L3 comparator current 0..551uA
+ */
+#define ADP8870_L3_COMP_CURR_uA(I)	((I * 255) / 551)
+
+/*
+ * L4 comparator current 0..275uA
+ */
+#define ADP8870_L4_COMP_CURR_uA(I)	((I * 255) / 275)
+
+/*
+ * L5 comparator current 0..138uA
+ */
+#define ADP8870_L5_COMP_CURR_uA(I)	((I * 255) / 138)
+
+struct adp8870_backlight_platform_data {
+	u8 bl_led_assign;	/* 1 = Backlight 0 = Individual LED */
+	u8 pwm_assign;		/* 1 = Enables PWM mode */
+
+	u8 bl_fade_in;		/* Backlight Fade-In Timer */
+	u8 bl_fade_out;		/* Backlight Fade-Out Timer */
+	u8 bl_fade_law;		/* fade-on/fade-off transfer characteristic */
+
+	u8 en_ambl_sens;	/* 1 = enable ambient light sensor */
+	u8 abml_filt;		/* Light sensor filter time */
+
+	u8 l1_daylight_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l1_daylight_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_bright_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l2_bright_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_office_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l3_office_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l4_indoor_max;	/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l4_indor_dim;	/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l5_dark_max;		/* use BL_CUR_mA(I) 0 <= I <= 30 mA */
+	u8 l5_dark_dim;		/* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */
+
+	u8 l2_trip;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l2_hyst;		/* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */
+	u8 l3_trip;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+	u8 l3_hyst;		/* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */
+	u8 l4_trip;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
+	u8 l4_hyst;		/* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */
+	u8 l5_trip;		/* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */
+	u8 l5_hyst;		/* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */
+
+	/**
+	 * Independent Current Sinks / LEDS
+	 * Sinks not assigned to the Backlight can be exposed to
+	 * user space using the LEDS CLASS interface
+	 */
+
+	int num_leds;
+	struct led_info	*leds;
+	u8 led_fade_in;		/* LED Fade-In Timer */
+	u8 led_fade_out;	/* LED Fade-Out Timer */
+	u8 led_fade_law;	/* fade-on/fade-off transfer characteristic */
+	u8 led_on_time;
+};
+
+#endif /* __LINUX_I2C_ADP8870_H */
-- 
cgit v1.2.3


From a433658c30974fc87ba3ff52d7e4e6299762aa3d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 15 Jun 2011 15:08:13 -0700
Subject: vmscan,memcg: memcg aware swap token

Currently, memcg reclaim can disable swap token even if the swap token mm
doesn't belong in its memory cgroup.  It's slightly risky.  If an admin
creates very small mem-cgroup and silly guy runs contentious heavy memory
pressure workload, every tasks are going to lose swap token and then
system may become unresponsive.  That's bad.

This patch adds 'memcg' parameter into disable_swap_token().  and if the
parameter doesn't match swap token, VM doesn't disable it.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Rik van Riel<riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++
 include/linux/swap.h       |  8 ++---
 mm/memcontrol.c            | 16 ++++-----
 mm/thrash.c                | 87 ++++++++++++++++++++++++++++++++++++----------
 mm/vmscan.c                |  4 +--
 5 files changed, 85 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9724a38ee69d..50940da6adf3 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
@@ -246,6 +247,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	return NULL;
 }
 
+static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+	return NULL;
+}
+
 static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 384eb5fe530b..e70564647039 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -358,6 +358,7 @@ struct backing_dev_info;
 extern struct mm_struct *swap_token_mm;
 extern void grab_swap_token(struct mm_struct *);
 extern void __put_swap_token(struct mm_struct *);
+extern void disable_swap_token(struct mem_cgroup *memcg);
 
 static inline int has_swap_token(struct mm_struct *mm)
 {
@@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm)
 		__put_swap_token(mm);
 }
 
-static inline void disable_swap_token(void)
-{
-	put_swap_token(swap_token_mm);
-}
-
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
@@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm)
 	return 0;
 }
 
-static inline void disable_swap_token(void)
+static inline void disable_swap_token(struct mem_cgroup *memcg)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bd9052a5d3ad..e37c44d88d46 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -735,7 +735,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 				struct mem_cgroup, css);
 }
 
-static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
 	struct mem_cgroup *mem = NULL;
 
@@ -5414,18 +5414,16 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *old_cont,
 				struct task_struct *p)
 {
-	struct mm_struct *mm;
+	struct mm_struct *mm = get_task_mm(p);
 
-	if (!mc.to)
-		/* no need to move charge */
-		return;
-
-	mm = get_task_mm(p);
 	if (mm) {
-		mem_cgroup_move_charge(mm);
+		if (mc.to)
+			mem_cgroup_move_charge(mm);
+		put_swap_token(mm);
 		mmput(mm);
 	}
-	mem_cgroup_clear_mc();
+	if (mc.to)
+		mem_cgroup_clear_mc();
 }
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
diff --git a/mm/thrash.c b/mm/thrash.c
index 2372d4ed5dd8..6cdf86511385 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -21,11 +21,31 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/swap.h>
+#include <linux/memcontrol.h>
 
 static DEFINE_SPINLOCK(swap_token_lock);
 struct mm_struct *swap_token_mm;
+struct mem_cgroup *swap_token_memcg;
 static unsigned int global_faults;
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = try_get_mem_cgroup_from_mm(mm);
+	if (memcg)
+		css_put(mem_cgroup_css(memcg));
+
+	return memcg;
+}
+#else
+static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
+{
+	return NULL;
+}
+#endif
+
 void grab_swap_token(struct mm_struct *mm)
 {
 	int current_interval;
@@ -38,40 +58,69 @@ void grab_swap_token(struct mm_struct *mm)
 		return;
 
 	/* First come first served */
-	if (swap_token_mm == NULL) {
-		mm->token_priority = mm->token_priority + 2;
-		swap_token_mm = mm;
+	if (!swap_token_mm)
+		goto replace_token;
+
+	if (mm == swap_token_mm) {
+		mm->token_priority += 2;
 		goto out;
 	}
 
-	if (mm != swap_token_mm) {
-		if (current_interval < mm->last_interval)
-			mm->token_priority++;
-		else {
-			if (likely(mm->token_priority > 0))
-				mm->token_priority--;
-		}
-		/* Check if we deserve the token */
-		if (mm->token_priority > swap_token_mm->token_priority) {
-			mm->token_priority += 2;
-			swap_token_mm = mm;
-		}
-	} else {
-		/* Token holder came in again! */
-		mm->token_priority += 2;
+	if (current_interval < mm->last_interval)
+		mm->token_priority++;
+	else {
+		if (likely(mm->token_priority > 0))
+			mm->token_priority--;
 	}
 
+	/* Check if we deserve the token */
+	if (mm->token_priority > swap_token_mm->token_priority)
+		goto replace_token;
+
 out:
 	mm->faultstamp = global_faults;
 	mm->last_interval = current_interval;
 	spin_unlock(&swap_token_lock);
+	return;
+
+replace_token:
+	mm->token_priority += 2;
+	swap_token_mm = mm;
+	swap_token_memcg = swap_token_memcg_from_mm(mm);
+	goto out;
 }
 
 /* Called on process exit. */
 void __put_swap_token(struct mm_struct *mm)
 {
 	spin_lock(&swap_token_lock);
-	if (likely(mm == swap_token_mm))
+	if (likely(mm == swap_token_mm)) {
 		swap_token_mm = NULL;
+		swap_token_memcg = NULL;
+	}
 	spin_unlock(&swap_token_lock);
 }
+
+static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
+{
+	if (!a)
+		return true;
+	if (!b)
+		return true;
+	if (a == b)
+		return true;
+	return false;
+}
+
+void disable_swap_token(struct mem_cgroup *memcg)
+{
+	/* memcg reclaim don't disable unrelated mm token. */
+	if (match_memcg(memcg, swap_token_memcg)) {
+		spin_lock(&swap_token_lock);
+		if (match_memcg(memcg, swap_token_memcg)) {
+			swap_token_mm = NULL;
+			swap_token_memcg = NULL;
+		}
+		spin_unlock(&swap_token_lock);
+	}
+}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index faa0a088f9cc..dbe6ea321df4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2081,7 +2081,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		sc->nr_scanned = 0;
 		if (!priority)
-			disable_swap_token();
+			disable_swap_token(sc->mem_cgroup);
 		total_scanned += shrink_zones(priority, zonelist, sc);
 		/*
 		 * Don't shrink slabs when reclaiming memory from
@@ -2407,7 +2407,7 @@ loop_again:
 
 		/* The swap token gets in the way of swapout... */
 		if (!priority)
-			disable_swap_token();
+			disable_swap_token(NULL);
 
 		all_zones_ok = 1;
 		balanced = 0;
-- 
cgit v1.2.3


From ac5622418bbff9cd3dc607aa57dfb4f62a7f2043 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 15 Jun 2011 15:08:17 -0700
Subject: kmsg_dump.h: fix build when CONFIG_PRINTK is disabled

Fix <linux/kmsg_dump.h> when CONFIG_PRINTK is not enabled:

  include/linux/kmsg_dump.h:56: error: 'EINVAL' undeclared (first use in this function)
  include/linux/kmsg_dump.h:61: error: 'EINVAL' undeclared (first use in this function)

Looks like commit 595dd3d8bf95 ("kmsg_dump: fix build for
CONFIG_PRINTK=n") uses EINVAL without having the needed header file(s),
but I'm sure that I build tested that patch also.  oh well.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmsg_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 2a0d7d651dc3..ee0c952188de 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -12,6 +12,7 @@
 #ifndef _LINUX_KMSG_DUMP_H
 #define _LINUX_KMSG_DUMP_H
 
+#include <linux/errno.h>
 #include <linux/list.h>
 
 enum kmsg_dump_reason {
-- 
cgit v1.2.3


From 32e45ff43eaf5c17f5a82c9ad358d515622c2562 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 15 Jun 2011 15:08:20 -0700
Subject: mm: increase RECLAIM_DISTANCE to 30

Recently, Robert Mueller reported (http://lkml.org/lkml/2010/9/12/236)
that zone_reclaim_mode doesn't work properly on his new NUMA server (Dual
Xeon E5520 + Intel S5520UR MB).  He is using Cyrus IMAPd and it's built on
a very traditional single-process model.

  * a master process which reads config files and manages the other
    process
  * multiple imapd processes, one per connection
  * multiple pop3d processes, one per connection
  * multiple lmtpd processes, one per connection
  * periodical "cleanup" processes.

There are thousands of independent processes.  The problem is, recent
Intel motherboard turn on zone_reclaim_mode by default and traditional
prefork model software don't work well on it.  Unfortunatelly, such models
are still typical even in the 21st century.  We can't ignore them.

This patch raises the zone_reclaim_mode threshold to 30.  30 doesn't have
any specific meaning.  but 20 means that one-hop QPI/Hypertransport and
such relatively cheap 2-4 socket machine are often used for traditional
servers as above.  The intention is that these machines don't use
zone_reclaim_mode.

Note: ia64 and Power have arch specific RECLAIM_DISTANCE definitions.
This patch doesn't change such high-end NUMA machine behavior.

Dave Hansen said:

: I know specifically of pieces of x86 hardware that set the information
: in the BIOS to '21' *specifically* so they'll get the zone_reclaim_mode
: behavior which that implies.
:
: They've done performance testing and run very large and scary benchmarks
: to make sure that they _want_ this turned on.  What this means for them
: is that they'll probably be de-optimized, at least on newer versions of
: the kernel.
:
: If you want to do this for particular systems, maybe _that_'s what we
: should do.  Have a list of specific configurations that need the
: defaults overridden either because they're buggy, or they have an
: unusual hardware configuration not really reflected in the distance
: table.

And later said:

: The original change in the hardware tables was for the benefit of a
: benchmark.  Said benchmark isn't going to get run on mainline until the
: next batch of enterprise distros drops, at which point the hardware where
: this was done will be irrelevant for the benchmark.  I'm sure any new
: hardware will just set this distance to another yet arbitrary value to
: make the kernel do what it wants.  :)
:
: Also, when the hardware got _set_ to this initially, I complained.  So, I
: guess I'm getting my way now, with this patch.  I'm cool with it.

Reported-by: Robert Mueller <robm@fastmail.fm>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index b91a40e847d2..fc839bfa7935 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -60,7 +60,7 @@ int arch_update_cpu_topology(void);
  * (in whatever arch specific measurement units returned by node_distance())
  * then switch on zone reclaim on boot.
  */
-#define RECLAIM_DISTANCE 20
+#define RECLAIM_DISTANCE 30
 #endif
 #ifndef PENALTY_FOR_NODE_WITH_CPUS
 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
-- 
cgit v1.2.3


From ca39599c633fb02aceac31a7e67563612e4fe347 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 15 Jun 2011 15:08:27 -0700
Subject: BUILD_BUG_ON_ZERO: fix sparse breakage

BUILD_BUG_ON_ZERO and BUILD_BUG_ON_NULL must return values, even in the
CHECKER case otherwise various users of it become syntactically invalid.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fb0e7329fee1..953352a88336 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -671,8 +671,8 @@ struct sysinfo {
 
 #ifdef __CHECKER__
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)
-#define BUILD_BUG_ON_ZERO(e)
-#define BUILD_BUG_ON_NULL(e)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void*)0)
 #define BUILD_BUG_ON(condition)
 #else /* __CHECKER__ */
 
-- 
cgit v1.2.3


From bd5dc17be87b3a3073d50b23802647db3ae3fa8e Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Wed, 15 Jun 2011 15:08:28 -0700
Subject: uts: make default hostname configurable, rather than always using
 "(none)"

The "hostname" tool falls back to setting the hostname to "localhost" if
/etc/hostname does not exist.  Distribution init scripts have the same
fallback.  However, if userspace never calls sethostname, such as when
booting with init=/bin/sh, or otherwise booting a minimal system without
the usual init scripts, the default hostname of "(none)" remains,
unhelpfully appearing in various places such as prompts ("root@(none):~#")
and logs.  Furthermore, "(none)" doesn't typically resolve to anything
useful.

Make the default hostname configurable.  This removes the need for the
standard fallback, provides a useful default for systems that never call
sethostname, and makes minimal systems that much more useful with less
configuration.  Distributions could choose to use "localhost" here to
avoid the fallback, while embedded systems may wish to use a specific
target hostname.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: David Miller <davem@davemloft.net>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Kel Modderman <kel@otaku42.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uts.h | 2 +-
 init/Kconfig        | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uts.h b/include/linux/uts.h
index 73eb1ed36ec4..6ddbd86377de 100644
--- a/include/linux/uts.h
+++ b/include/linux/uts.h
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef UTS_NODENAME
-#define UTS_NODENAME "(none)"	/* set by sethostname() */
+#define UTS_NODENAME CONFIG_DEFAULT_HOSTNAME /* set by sethostname() */
 #endif
 
 #ifndef UTS_DOMAINNAME
diff --git a/init/Kconfig b/init/Kconfig
index ebafac4231ee..e0a22e42d39a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -204,6 +204,15 @@ config KERNEL_LZO
 
 endchoice
 
+config DEFAULT_HOSTNAME
+	string "Default hostname"
+	default "(none)"
+	help
+	  This option determines the default system hostname before userspace
+	  calls sethostname(2). The kernel traditionally uses "(none)" here,
+	  but you may wish to use a different default here to make a minimal
+	  system more usable with less configuration.
+
 config SWAP
 	bool "Support for paging of anonymous memory (swap)"
 	depends on MMU && BLOCK
-- 
cgit v1.2.3


From c001fb72a7b705f902bdfdd05b5d2408efe6f848 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 14 Jun 2011 17:05:11 -0700
Subject: gpio: add GPIOF_ values regardless on kconfig settings

Make GPIOF_ defined values available even when GPIOLIB nor GENERIC_GPIO
is enabled by moving them to <linux/gpio.h>.

Fixes these build errors in linux-next:
sound/soc/codecs/ak4641.c:524: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function)
sound/soc/codecs/wm8915.c:2921: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/asm-generic/gpio.h | 10 ----------
 include/linux/gpio.h       | 11 +++++++++++
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index fcdcb5d5c995..d494001b1226 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -170,16 +170,6 @@ extern int __gpio_cansleep(unsigned gpio);
 
 extern int __gpio_to_irq(unsigned gpio);
 
-#define GPIOF_DIR_OUT	(0 << 0)
-#define GPIOF_DIR_IN	(1 << 0)
-
-#define GPIOF_INIT_LOW	(0 << 1)
-#define GPIOF_INIT_HIGH	(1 << 1)
-
-#define GPIOF_IN		(GPIOF_DIR_IN)
-#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
-#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
-
 /**
  * struct gpio - a structure describing a GPIO with configuration
  * @gpio:	the GPIO number
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 32d47e710661..17b5a0d80e42 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -3,6 +3,17 @@
 
 /* see Documentation/gpio.txt */
 
+/* make these flag values available regardless of GPIO kconfig options */
+#define GPIOF_DIR_OUT	(0 << 0)
+#define GPIOF_DIR_IN	(1 << 0)
+
+#define GPIOF_INIT_LOW	(0 << 1)
+#define GPIOF_INIT_HIGH	(1 << 1)
+
+#define GPIOF_IN		(GPIOF_DIR_IN)
+#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
+#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
+
 #ifdef CONFIG_GENERIC_GPIO
 #include <asm/gpio.h>
 
-- 
cgit v1.2.3


From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 16 Jun 2011 16:22:08 +0200
Subject: clocksource: Make watchdog robust vs. interruption

The clocksource watchdog code is interruptible and it has been
observed that this can trigger false positives which disable the TSC.

The reason is that an interrupt storm or a long running interrupt
handler between the read of the watchdog source and the read of the
TSC brings the two far enough apart that the delta is larger than the
unstable treshold. Move both reads into a short interrupt disabled
region to avoid that.

Reported-and-tested-by: Vernon Mauery <vernux@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 24 +++++++++++++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d4646b48dc4a..18a1baf31f2d 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -188,6 +188,7 @@ struct clocksource {
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
+	cycle_t cs_last;
 	cycle_t wd_last;
 #endif
 } ____cacheline_aligned;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1c95fd677328..e0980f0d9a0a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -185,7 +185,6 @@ static struct clocksource *watchdog;
 static struct timer_list watchdog_timer;
 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 static DEFINE_SPINLOCK(watchdog_lock);
-static cycle_t watchdog_last;
 static int watchdog_running;
 
 static int clocksource_watchdog_kthread(void *data);
@@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data)
 	if (!watchdog_running)
 		goto out;
 
-	wdnow = watchdog->read(watchdog);
-	wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
-				     watchdog->mult, watchdog->shift);
-	watchdog_last = wdnow;
-
 	list_for_each_entry(cs, &watchdog_list, wd_list) {
 
 		/* Clocksource already marked unstable? */
@@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 		}
 
+		local_irq_disable();
 		csnow = cs->read(cs);
+		wdnow = watchdog->read(watchdog);
+		local_irq_enable();
 
 		/* Clocksource initialized ? */
 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
 			cs->flags |= CLOCK_SOURCE_WATCHDOG;
-			cs->wd_last = csnow;
+			cs->wd_last = wdnow;
+			cs->cs_last = csnow;
 			continue;
 		}
 
-		/* Check the deviation from the watchdog clocksource. */
-		cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
+		wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
+					     watchdog->mult, watchdog->shift);
+
+		cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
 					     cs->mask, cs->mult, cs->shift);
-		cs->wd_last = csnow;
+		cs->cs_last = csnow;
+		cs->wd_last = wdnow;
+
+		/* Check the deviation from the watchdog clocksource. */
 		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			clocksource_unstable(cs, cs_nsec - wd_nsec);
 			continue;
@@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void)
 		return;
 	init_timer(&watchdog_timer);
 	watchdog_timer.function = clocksource_watchdog;
-	watchdog_last = watchdog->read(watchdog);
 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 	watchdog_running = 1;
-- 
cgit v1.2.3


From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001
From: Takao Indoh <indou.takao@jp.fujitsu.com>
Date: Tue, 29 Mar 2011 12:35:04 -0400
Subject: generic-ipi: Fix kexec boot crash by initializing call_single_queue
 before enabling interrupts

There is a problem that kdump(2nd kernel) sometimes hangs up due
to a pending IPI from 1st kernel. Kernel panic occurs because IPI
comes before call_single_queue is initialized.

To fix the crash, rename init_call_single_data() to call_function_init()
and call it in start_kernel() so that call_single_queue can be
initialized before enabling interrupts.

The details of the crash are:

 (1) 2nd kernel boots up

 (2) A pending IPI from 1st kernel comes when irqs are first enabled
     in start_kernel().

 (3) Kernel tries to handle the interrupt, but call_single_queue
     is not initialized yet at this point. As a result, in the
     generic_smp_call_function_single_interrupt(), NULL pointer
     dereference occurs when list_replace_init() tries to access
     &q->list.next.

Therefore this patch changes the name of init_call_single_data()
to call_function_init() and calls it before local_irq_enable()
in start_kernel().

Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Milton Miller <miltonm@bga.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: kexec@lists.infradead.org
Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/smp.h | 5 ++++-
 init/main.c         | 1 +
 kernel/smp.c        | 5 +----
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 7ad824d510a2..8cc38d3bab0c 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -85,12 +85,15 @@ int smp_call_function_any(const struct cpumask *mask,
  * Generic and arch helpers
  */
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+void __init call_function_init(void);
 void generic_smp_call_function_single_interrupt(void);
 void generic_smp_call_function_interrupt(void);
 void ipi_call_lock(void);
 void ipi_call_unlock(void);
 void ipi_call_lock_irq(void);
 void ipi_call_unlock_irq(void);
+#else
+static inline void call_function_init(void) { }
 #endif
 
 /*
@@ -134,7 +137,7 @@ static inline void smp_send_reschedule(int cpu) { }
 #define smp_prepare_boot_cpu()			do {} while (0)
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
-static inline void init_call_single_data(void) { }
+static inline void call_function_init(void) { }
 
 static inline int
 smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
diff --git a/init/main.c b/init/main.c
index cafba67c13bf..d7211faed2ad 100644
--- a/init/main.c
+++ b/init/main.c
@@ -542,6 +542,7 @@ asmlinkage void __init start_kernel(void)
 	timekeeping_init();
 	time_init();
 	profile_init();
+	call_function_init();
 	if (!irqs_disabled())
 		printk(KERN_CRIT "start_kernel(): bug: interrupts were "
 				 "enabled early\n");
diff --git a/kernel/smp.c b/kernel/smp.c
index 73a195193558..fb67dfa8394e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
 	.notifier_call		= hotplug_cfd,
 };
 
-static int __cpuinit init_call_single_data(void)
+void __init call_function_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int i;
@@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void)
 
 	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
 	register_cpu_notifier(&hotplug_cfd_notifier);
-
-	return 0;
 }
-early_initcall(init_call_single_data);
 
 /*
  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
-- 
cgit v1.2.3


From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 17 Jun 2011 11:25:59 +0100
Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session
 keyring

____call_usermodehelper() now erases any credentials set by the
subprocess_inf::init() function.  The problem is that commit
17f60a7da150 ("capabilites: allow the application of capability limits
to usermode helpers") creates and commits new credentials with
prepare_kernel_cred() after the call to the init() function.  This wipes
all keyrings after umh_keys_init() is called.

The best way to deal with this is to put the init() call just prior to
the commit_creds() call, and pass the cred pointer to init().  That
means that umh_keys_init() and suchlike can modify the credentials
_before_ they are published and potentially in use by the rest of the
system.

This prevents request_key() from working as it is prevented from passing
the session keyring it set up with the authorisation token to
/sbin/request-key, and so the latter can't assume the authority to
instantiate the key.  This causes the in-kernel DNS resolver to fail
with ENOKEY unconditionally.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Tested-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                   |  2 +-
 include/linux/kmod.h        |  8 ++++----
 kernel/kmod.c               | 16 +++++++++-------
 security/keys/request_key.c |  3 +--
 4 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 97e0d52d72fd..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
  * is a special value that we use to trap recursive
  * core dumps
  */
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
 	struct file *rp, *wp;
 	struct fdtable *fdt;
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index d4a5c84c503d..0da38cf7db7b 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -45,7 +45,7 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS;
 #endif
 
 
-struct key;
+struct cred;
 struct file;
 
 enum umh_wait {
@@ -62,7 +62,7 @@ struct subprocess_info {
 	char **envp;
 	enum umh_wait wait;
 	int retval;
-	int (*init)(struct subprocess_info *info);
+	int (*init)(struct subprocess_info *info, struct cred *new);
 	void (*cleanup)(struct subprocess_info *info);
 	void *data;
 };
@@ -73,7 +73,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 
 /* Set various pieces of state into the subprocess_info structure */
 void call_usermodehelper_setfns(struct subprocess_info *info,
-		    int (*init)(struct subprocess_info *info),
+		    int (*init)(struct subprocess_info *info, struct cred *new),
 		    void (*cleanup)(struct subprocess_info *info),
 		    void *data);
 
@@ -87,7 +87,7 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info);
 static inline int
 call_usermodehelper_fns(char *path, char **argv, char **envp,
 			enum umh_wait wait,
-			int (*init)(struct subprocess_info *info),
+			int (*init)(struct subprocess_info *info, struct cred *new),
 			void (*cleanup)(struct subprocess_info *), void *data)
 {
 	struct subprocess_info *info;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ad6a81c58b44..47613dfb7b28 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data)
 	 */
 	set_user_nice(current, 0);
 
-	if (sub_info->init) {
-		retval = sub_info->init(sub_info);
-		if (retval)
-			goto fail;
-	}
-
 	retval = -ENOMEM;
 	new = prepare_kernel_cred(current);
 	if (!new)
@@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data)
 					     new->cap_inheritable);
 	spin_unlock(&umh_sysctl_lock);
 
+	if (sub_info->init) {
+		retval = sub_info->init(sub_info, new);
+		if (retval) {
+			abort_creds(new);
+			goto fail;
+		}
+	}
+
 	commit_creds(new);
 
 	retval = kernel_execve(sub_info->path,
@@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
  * context in which call_usermodehelper_exec is called.
  */
 void call_usermodehelper_setfns(struct subprocess_info *info,
-		    int (*init)(struct subprocess_info *info),
+		    int (*init)(struct subprocess_info *info, struct cred *new),
 		    void (*cleanup)(struct subprocess_info *info),
 		    void *data)
 {
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index d31862e0aa1c..8e319a416eec 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -71,9 +71,8 @@ EXPORT_SYMBOL(complete_request_key);
  * This is called in context of freshly forked kthread before kernel_execve(),
  * so we can simply install the desired session_keyring at this point.
  */
-static int umh_keys_init(struct subprocess_info *info)
+static int umh_keys_init(struct subprocess_info *info, struct cred *cred)
 {
-	struct cred *cred = (struct cred*)current_cred();
 	struct key *keyring = info->data;
 
 	return install_session_keyring_to_cred(cred, keyring);
-- 
cgit v1.2.3


From 482e0cd3dbaa70f2a2bead4b5f2c0d203ef654ba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 19 Jun 2011 13:01:04 -0400
Subject: devcgroup_inode_permission: take "is it a device node" checks to
 inlined wrapper

inode_permission() calls devcgroup_inode_permission() and almost all such
calls are _not_ for device nodes; let's at least keep the common path
straight...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/device_cgroup.h | 10 +++++++++-
 security/device_cgroup.c      |  8 +-------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 0b0d9c39ed67..7aad1f440867 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -2,8 +2,16 @@
 #include <linux/fs.h>
 
 #ifdef CONFIG_CGROUP_DEVICE
-extern int devcgroup_inode_permission(struct inode *inode, int mask);
+extern int __devcgroup_inode_permission(struct inode *inode, int mask);
 extern int devcgroup_inode_mknod(int mode, dev_t dev);
+static inline int devcgroup_inode_permission(struct inode *inode, int mask)
+{
+	if (likely(!inode->i_rdev))
+		return 0;
+	if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
+		return 0;
+	return __devcgroup_inode_permission(inode, mask);
+}
 #else
 static inline int devcgroup_inode_permission(struct inode *inode, int mask)
 { return 0; }
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index cd1f779fa51d..1be68269e1c2 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -474,17 +474,11 @@ struct cgroup_subsys devices_subsys = {
 	.subsys_id = devices_subsys_id,
 };
 
-int devcgroup_inode_permission(struct inode *inode, int mask)
+int __devcgroup_inode_permission(struct inode *inode, int mask)
 {
 	struct dev_cgroup *dev_cgroup;
 	struct dev_whitelist_item *wh;
 
-	dev_t device = inode->i_rdev;
-	if (!device)
-		return 0;
-	if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
-		return 0;
-
 	rcu_read_lock();
 
 	dev_cgroup = task_devcgroup(current);
-- 
cgit v1.2.3


From 79568f5be06c91071697c065f01f3ebfbeb25a61 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 20 Jun 2011 20:13:49 -0700
Subject: vfs: i_state needs to be 'unsigned long' for now

Commit 13e12d14e2dc ("vfs: reorganize 'struct inode' layout a bit")
moved things around a bit changed i_state to be unsigned int instead of
unsigned long.  That was to help structure layout for the 64-bit case,
and shrink 'struct inode' a bit (admittedly that only happened when
spinlock debugging was on and i_flags didn't pack with i_lock).

However, Meelis Roos reports that this results in unaligned exceptions
on sprc, and it turns out that the bit-locking primitives that we use
for the I_NEW bit want to use the bitops.  Which want 'unsigned long',
not 'unsigned int'.

We really should fix the bit locking code to not have that kind of
requirement, but that's a much bigger change.  So for now, revert that
field back to 'unsigned long' (but keep the other re-ordering changes
from the commit that caused this).

Andi points out that we have played games with this in 'struct page', so
it's solvable with other hacks too, but since right now the struct inode
size advantage only happens with some rare config options, it's not
worth fighting.

It _would_ be worth fixing the bitlocking code, though.  Especially
since there is no type safety in the bitlocking code (this never caused
any warnings, and worked fine on x86-64, because the bitlocks take a
'void *' and x86-64 doesn't care that deeply about alignment).  So it's
currently a very easy problem to trigger by mistake and never notice.

Reported-by: Meelis Roos <mroos@linux.ee>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c777878f1ea..6e73e2e9ae33 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -744,7 +744,7 @@ struct inode {
 
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned int		i_flags;
-	unsigned int		i_state;
+	unsigned long		i_state;
 #ifdef CONFIG_SECURITY
 	void			*i_security;
 #endif
-- 
cgit v1.2.3


From 2456689b3b11ddecc091cd5f00b9adea6a9854cf Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 4 Jul 2011 19:22:00 -0700
Subject: Input: wm97xx - remove redundant define

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/atmel-wm97xx.c     | 2 +-
 drivers/input/touchscreen/mainstone-wm97xx.c | 6 +++---
 drivers/input/touchscreen/wm9713.c           | 4 ++--
 drivers/input/touchscreen/zylonite-wm97xx.c  | 6 +++---
 include/linux/wm97xx.h                       | 3 +--
 5 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c
index c804189ccc3b..8034cbb20f74 100644
--- a/drivers/input/touchscreen/atmel-wm97xx.c
+++ b/drivers/input/touchscreen/atmel-wm97xx.c
@@ -164,7 +164,7 @@ static irqreturn_t atmel_wm97xx_channel_b_interrupt(int irq, void *dev_id)
 
 		data = ac97c_readl(atmel_wm97xx, CBRHR);
 		value = data & 0x0fff;
-		source = data & WM97XX_ADCSRC_MASK;
+		source = data & WM97XX_ADCSEL_MASK;
 		pen_down = (data & WM97XX_PEN_DOWN) >> 8;
 
 		if (source == WM97XX_ADCSEL_X)
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 3242e7076258..e966c29ff1bb 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -157,9 +157,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
 			x, y, p);
 
 		/* are samples valid */
-		if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-		    (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-		    (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+		if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+		    (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+		    (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
 			goto up;
 
 		/* coordinate is good */
diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c
index 73ec99568f12..13cd9d1fa07a 100644
--- a/drivers/input/touchscreen/wm9713.c
+++ b/drivers/input/touchscreen/wm9713.c
@@ -304,9 +304,9 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) {
+	if ((*sample & WM97XX_ADCSEL_MASK) != ffs(adcsel >> 1) << 12) {
 		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-			*sample & WM97XX_ADCSRC_MASK);
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c
index 5b0f15ec874a..f6328c0cded6 100644
--- a/drivers/input/touchscreen/zylonite-wm97xx.c
+++ b/drivers/input/touchscreen/zylonite-wm97xx.c
@@ -122,9 +122,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm)
 			x, y, p);
 
 		/* are samples valid */
-		if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
-		    (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
-		    (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+		if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+		    (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+		    (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
 			goto up;
 
 		/* coordinate is good */
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 38e8c4d9289e..10b9e614fccd 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -38,7 +38,7 @@
 #define WM97XX_ADCSEL_X		0x1000	/* x coord measurement */
 #define WM97XX_ADCSEL_Y		0x2000	/* y coord measurement */
 #define WM97XX_ADCSEL_PRES	0x3000	/* pressure measurement */
-#define WM97XX_ADCSEL_MASK	0x7000
+#define WM97XX_ADCSEL_MASK	0x7000	/* ADC selection mask */
 #define WM97XX_COO		0x0800	/* enable coordinate mode */
 #define WM97XX_CTC		0x0400	/* enable continuous mode */
 #define WM97XX_CM_RATE_93	0x0000	/* 93.75Hz continuous rate */
@@ -61,7 +61,6 @@
 #define WM97XX_PRP_DET_DIG	0xc000	/* setect on, digitise on */
 #define WM97XX_RPR		0x2000	/* wake up on pen down */
 #define WM97XX_PEN_DOWN		0x8000	/* pen is down */
-#define WM97XX_ADCSRC_MASK	0x7000	/* ADC source mask */
 
 #define WM97XX_AUX_ID1		0x8001
 #define WM97XX_AUX_ID2		0x8002
-- 
cgit v1.2.3


From c8f205258bc8942e79cd37ebc1c8ec4652a1a501 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 4 Jul 2011 19:22:00 -0700
Subject: Input: wm97xx - refactor channel selection in poll_sample()

The current implementation of poll_sample() has the problem that one of
its arguments, the channel to be selected, differs from wm9713 to other
variants. This parameter gets passed to the (currently unused)
mach-specific functions pre_sample() and post_sample() which thus have
to deal with codec-specific differences. Refactor the routine so that
the argument to poll_sample() is generic for all codecs and do necessary
conversions only in the codec-specific driver. The outcome even uses
less code and removes the non-standard use of the PEN_DOWN bit to mark
the AUX-channels.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/wm9705.c | 14 ++++++--------
 drivers/input/touchscreen/wm9712.c | 14 ++++++--------
 drivers/input/touchscreen/wm9713.c | 18 +++++++++---------
 include/linux/wm97xx.h             | 10 ++++------
 4 files changed, 25 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/wm9705.c b/drivers/input/touchscreen/wm9705.c
index 98e61175d3f5..363d61dbe83c 100644
--- a/drivers/input/touchscreen/wm9705.c
+++ b/drivers/input/touchscreen/wm9705.c
@@ -224,13 +224,10 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-			 adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+				| WM97XX_POLL | WM97XX_DELAY(delay));
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -256,9 +253,10 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-		*sample & WM97XX_ADCSEL_MASK);
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
diff --git a/drivers/input/touchscreen/wm9712.c b/drivers/input/touchscreen/wm9712.c
index 2bc2fb801009..d26093f479d1 100644
--- a/drivers/input/touchscreen/wm9712.c
+++ b/drivers/input/touchscreen/wm9712.c
@@ -264,13 +264,10 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
-			 adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+	wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+				| WM97XX_POLL | WM97XX_DELAY(delay));
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -296,9 +293,10 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
-		*sample & WM97XX_ADCSEL_MASK);
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
+			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
 
diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c
index 13cd9d1fa07a..a7558015429b 100644
--- a/drivers/input/touchscreen/wm9713.c
+++ b/drivers/input/touchscreen/wm9713.c
@@ -270,15 +270,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 	}
 
 	/* set up digitiser */
-	if (adcsel & 0x8000)
-		adcsel = 1 << ((adcsel & 0x7fff) + 3);
-
 	dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1);
 	dig1 &= ~WM9713_ADCSEL_MASK;
+	/* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */
+	dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12);
 
 	if (wm->mach_ops && wm->mach_ops->pre_sample)
 		wm->mach_ops->pre_sample(adcsel);
-	wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL);
+	wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL);
 
 	/* wait 3 AC97 time slots + delay for conversion */
 	poll_delay(delay);
@@ -304,8 +303,9 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
 		wm->mach_ops->post_sample(adcsel);
 
 	/* check we have correct sample */
-	if ((*sample & WM97XX_ADCSEL_MASK) != ffs(adcsel >> 1) << 12) {
-		dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
+	if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+		dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+			adcsel & WM97XX_ADCSEL_MASK,
 			*sample & WM97XX_ADCSEL_MASK);
 		return RC_PENUP;
 	}
@@ -400,14 +400,14 @@ static int wm9713_poll_touch(struct wm97xx *wm, struct wm97xx_data *data)
 		if (rc != RC_VALID)
 			return rc;
 	} else {
-		rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x);
+		rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
 		if (rc != RC_VALID)
 			return rc;
-		rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y);
+		rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
 		if (rc != RC_VALID)
 			return rc;
 		if (pil) {
-			rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES,
+			rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES,
 						&data->p);
 			if (rc != RC_VALID)
 				return rc;
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 10b9e614fccd..fd98bb968219 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -38,6 +38,10 @@
 #define WM97XX_ADCSEL_X		0x1000	/* x coord measurement */
 #define WM97XX_ADCSEL_Y		0x2000	/* y coord measurement */
 #define WM97XX_ADCSEL_PRES	0x3000	/* pressure measurement */
+#define WM97XX_AUX_ID1		0x4000
+#define WM97XX_AUX_ID2		0x5000
+#define WM97XX_AUX_ID3		0x6000
+#define WM97XX_AUX_ID4		0x7000
 #define WM97XX_ADCSEL_MASK	0x7000	/* ADC selection mask */
 #define WM97XX_COO		0x0800	/* enable coordinate mode */
 #define WM97XX_CTC		0x0400	/* enable continuous mode */
@@ -62,12 +66,6 @@
 #define WM97XX_RPR		0x2000	/* wake up on pen down */
 #define WM97XX_PEN_DOWN		0x8000	/* pen is down */
 
-#define WM97XX_AUX_ID1		0x8001
-#define WM97XX_AUX_ID2		0x8002
-#define WM97XX_AUX_ID3		0x8003
-#define WM97XX_AUX_ID4		0x8004
-
-
 /* WM9712 Bits */
 #define WM9712_45W		0x1000	/* set for 5-wire touchscreen */
 #define WM9712_PDEN		0x0800	/* measure only when pen down */
-- 
cgit v1.2.3


From 7bed4b2c97d548c652ada6111604568ea6b0c423 Mon Sep 17 00:00:00 2001
From: Simon Budig <simon.budig@kernelconcepts.de>
Date: Mon, 4 Jul 2011 19:56:01 -0700
Subject: Input: add guarding parentheses to macros

Put parentheses around macro argument uses. This avoids pitfalls
for the programmer, where the argument expansion does not give the
expected result, for example:

	ioctl (fd, EVIOCGABS (have_mt ? ABS_MT_POSITION_X : ABS_X), &abs);

Signed-off-by: Simon Budig <simon.budig@kernelconcepts.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 771d6d85667d..068784e17972 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -119,9 +119,9 @@ struct input_keymap_entry {
 #define EVIOCGSND(len)		_IOC(_IOC_READ, 'E', 0x1a, len)		/* get all sounds status */
 #define EVIOCGSW(len)		_IOC(_IOC_READ, 'E', 0x1b, len)		/* get all switch states */
 
-#define EVIOCGBIT(ev,len)	_IOC(_IOC_READ, 'E', 0x20 + ev, len)	/* get event bits */
-#define EVIOCGABS(abs)		_IOR('E', 0x40 + abs, struct input_absinfo)	/* get abs value/limits */
-#define EVIOCSABS(abs)		_IOW('E', 0xc0 + abs, struct input_absinfo)	/* set abs value/limits */
+#define EVIOCGBIT(ev,len)	_IOC(_IOC_READ, 'E', 0x20 + (ev), len)	/* get event bits */
+#define EVIOCGABS(abs)		_IOR('E', 0x40 + (abs), struct input_absinfo)	/* get abs value/limits */
+#define EVIOCSABS(abs)		_IOW('E', 0xc0 + (abs), struct input_absinfo)	/* set abs value/limits */
 
 #define EVIOCSFF		_IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect))	/* send a force effect to a force feedback device */
 #define EVIOCRMFF		_IOW('E', 0x81, int)			/* Erase a force effect */
-- 
cgit v1.2.3


From e8e70d83912b40c5c9ea7b85a6110b9925fbed62 Mon Sep 17 00:00:00 2001
From: Chris Hudson <chudson@kionix.com>
Date: Wed, 6 Jul 2011 18:36:51 -0700
Subject: Input: add support for Kionix KXTJ9 accelerometer

Signed-off-by: Chris Hudson <chudson@kionix.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/Kconfig  |  17 ++
 drivers/input/misc/Makefile |   1 +
 drivers/input/misc/kxtj9.c  | 671 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/input/kxtj9.h |  70 +++++
 4 files changed, 759 insertions(+)
 create mode 100644 drivers/input/misc/kxtj9.c
 create mode 100644 include/linux/input/kxtj9.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 6fdce4b86856..01344280e145 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -230,6 +230,23 @@ config INPUT_KEYSPAN_REMOTE
 	  To compile this driver as a module, choose M here: the module will
 	  be called keyspan_remote.
 
+config INPUT_KXTJ9
+	tristate "Kionix KXTJ9 tri-axis digital accelerometer"
+	depends on I2C
+	help
+	  Say Y here to enable support for the Kionix KXTJ9 digital tri-axis
+	  accelerometer.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called kxtj9.
+
+config INPUT_KXTJ9_POLLED_MODE
+	bool "Enable polling mode support"
+	depends on INPUT_KXTJ9
+	select INPUT_POLLDEV
+	help
+	  Say Y here if you need accelerometer to work in polling mode.
+
 config INPUT_POWERMATE
 	tristate "Griffin PowerMate and Contour Jog support"
 	depends on USB_ARCH_HAS_HCD
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index b7e227aa117a..be39d813354d 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
+obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
 obj-$(CONFIG_INPUT_MAX8925_ONKEY)	+= max8925_onkey.o
 obj-$(CONFIG_INPUT_MMA8450)		+= mma8450.o
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
new file mode 100644
index 000000000000..a416f7f06738
--- /dev/null
+++ b/drivers/input/misc/kxtj9.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input/kxtj9.h>
+#include <linux/input-polldev.h>
+
+#define NAME			"kxtj9"
+#define G_MAX			8000
+/* OUTPUT REGISTERS */
+#define XOUT_L			0x06
+#define WHO_AM_I		0x0F
+/* CONTROL REGISTERS */
+#define INT_REL			0x1A
+#define CTRL_REG1		0x1B
+#define INT_CTRL1		0x1E
+#define DATA_CTRL		0x21
+/* CONTROL REGISTER 1 BITS */
+#define PC1_OFF			0x7F
+#define PC1_ON			(1 << 7)
+/* Data ready funtion enable bit: set during probe if using irq mode */
+#define DRDYE			(1 << 5)
+/* INTERRUPT CONTROL REGISTER 1 BITS */
+/* Set these during probe if using irq mode */
+#define KXTJ9_IEL		(1 << 3)
+#define KXTJ9_IEA		(1 << 4)
+#define KXTJ9_IEN		(1 << 5)
+/* INPUT_ABS CONSTANTS */
+#define FUZZ			3
+#define FLAT			3
+/* RESUME STATE INDICES */
+#define RES_DATA_CTRL		0
+#define RES_CTRL_REG1		1
+#define RES_INT_CTRL1		2
+#define RESUME_ENTRIES		3
+
+/*
+ * The following table lists the maximum appropriate poll interval for each
+ * available output data rate.
+ */
+static const struct {
+	unsigned int cutoff;
+	u8 mask;
+} kxtj9_odr_table[] = {
+	{ 3,	ODR800F },
+	{ 5,	ODR400F },
+	{ 10,	ODR200F },
+	{ 20,	ODR100F },
+	{ 40,	ODR50F  },
+	{ 80,	ODR25F  },
+	{ 0,	ODR12_5F},
+};
+
+struct kxtj9_data {
+	struct i2c_client *client;
+	struct kxtj9_platform_data pdata;
+	struct input_dev *input_dev;
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+	struct input_polled_dev *poll_dev;
+#endif
+	unsigned int last_poll_interval;
+	u8 shift;
+	u8 ctrl_reg1;
+	u8 data_ctrl;
+	u8 int_ctrl;
+};
+
+static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len)
+{
+	struct i2c_msg msgs[] = {
+		{
+			.addr = tj9->client->addr,
+			.flags = tj9->client->flags,
+			.len = 1,
+			.buf = &addr,
+		},
+		{
+			.addr = tj9->client->addr,
+			.flags = tj9->client->flags | I2C_M_RD,
+			.len = len,
+			.buf = data,
+		},
+	};
+
+	return i2c_transfer(tj9->client->adapter, msgs, 2);
+}
+
+static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9)
+{
+	s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */
+	s16 x, y, z;
+	int err;
+
+	err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6);
+	if (err < 0)
+		dev_err(&tj9->client->dev, "accelerometer data read failed\n");
+
+	x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift;
+	y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift;
+	z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift;
+
+	input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x);
+	input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y);
+	input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z);
+	input_sync(tj9->input_dev);
+}
+
+static irqreturn_t kxtj9_isr(int irq, void *dev)
+{
+	struct kxtj9_data *tj9 = dev;
+	int err;
+
+	/* data ready is the only possible interrupt type */
+	kxtj9_report_acceleration_data(tj9);
+
+	err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+	if (err < 0)
+		dev_err(&tj9->client->dev,
+			"error clearing interrupt status: %d\n", err);
+
+	return IRQ_HANDLED;
+}
+
+static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range)
+{
+	switch (new_g_range) {
+	case KXTJ9_G_2G:
+		tj9->shift = 4;
+		break;
+	case KXTJ9_G_4G:
+		tj9->shift = 3;
+		break;
+	case KXTJ9_G_8G:
+		tj9->shift = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	tj9->ctrl_reg1 &= 0xe7;
+	tj9->ctrl_reg1 |= new_g_range;
+
+	return 0;
+}
+
+static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval)
+{
+	int err;
+	int i;
+
+	/* Use the lowest ODR that can support the requested poll interval */
+	for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) {
+		tj9->data_ctrl = kxtj9_odr_table[i].mask;
+		if (poll_interval < kxtj9_odr_table[i].cutoff)
+			break;
+	}
+
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+	if (err < 0)
+		return err;
+
+	err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl);
+	if (err < 0)
+		return err;
+
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int kxtj9_device_power_on(struct kxtj9_data *tj9)
+{
+	if (tj9->pdata.power_on)
+		return tj9->pdata.power_on();
+
+	return 0;
+}
+
+static void kxtj9_device_power_off(struct kxtj9_data *tj9)
+{
+	int err;
+
+	tj9->ctrl_reg1 &= PC1_OFF;
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		dev_err(&tj9->client->dev, "soft power off failed\n");
+
+	if (tj9->pdata.power_off)
+		tj9->pdata.power_off();
+}
+
+static int kxtj9_enable(struct kxtj9_data *tj9)
+{
+	int err;
+
+	err = kxtj9_device_power_on(tj9);
+	if (err < 0)
+		return err;
+
+	/* ensure that PC1 is cleared before updating control registers */
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+	if (err < 0)
+		return err;
+
+	/* only write INT_CTRL_REG1 if in irq mode */
+	if (tj9->client->irq) {
+		err = i2c_smbus_write_byte_data(tj9->client,
+						INT_CTRL1, tj9->int_ctrl);
+		if (err < 0)
+			return err;
+	}
+
+	err = kxtj9_update_g_range(tj9, tj9->pdata.g_range);
+	if (err < 0)
+		return err;
+
+	/* turn on outputs */
+	tj9->ctrl_reg1 |= PC1_ON;
+	err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+	if (err < 0)
+		return err;
+
+	err = kxtj9_update_odr(tj9, tj9->last_poll_interval);
+	if (err < 0)
+		return err;
+
+	/* clear initial interrupt if in irq mode */
+	if (tj9->client->irq) {
+		err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+		if (err < 0) {
+			dev_err(&tj9->client->dev,
+				"error clearing interrupt: %d\n", err);
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	kxtj9_device_power_off(tj9);
+	return err;
+}
+
+static void kxtj9_disable(struct kxtj9_data *tj9)
+{
+	kxtj9_device_power_off(tj9);
+}
+
+static int kxtj9_input_open(struct input_dev *input)
+{
+	struct kxtj9_data *tj9 = input_get_drvdata(input);
+
+	return kxtj9_enable(tj9);
+}
+
+static void kxtj9_input_close(struct input_dev *dev)
+{
+	struct kxtj9_data *tj9 = input_get_drvdata(dev);
+
+	kxtj9_disable(tj9);
+}
+
+static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9,
+					      struct input_dev *input_dev)
+{
+	__set_bit(EV_ABS, input_dev->evbit);
+	input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT);
+	input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT);
+	input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT);
+
+	input_dev->name = "kxtj9_accel";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &tj9->client->dev;
+}
+
+static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9)
+{
+	struct input_dev *input_dev;
+	int err;
+
+	input_dev = input_allocate_device();
+	if (!tj9->input_dev) {
+		dev_err(&tj9->client->dev, "input device allocate failed\n");
+		return -ENOMEM;
+	}
+
+	tj9->input_dev = input_dev;
+
+	input_dev->open = kxtj9_input_open;
+	input_dev->close = kxtj9_input_close;
+	input_set_drvdata(input_dev, tj9);
+
+	kxtj9_init_input_device(tj9, input_dev);
+
+	err = input_register_device(tj9->input_dev);
+	if (err) {
+		dev_err(&tj9->client->dev,
+			"unable to register input polled device %s: %d\n",
+			tj9->input_dev->name, err);
+		input_free_device(tj9->input_dev);
+		return err;
+	}
+
+	return 0;
+}
+
+/*
+ * When IRQ mode is selected, we need to provide an interface to allow the user
+ * to change the output data rate of the part.  For consistency, we are using
+ * the set_poll method, which accepts a poll interval in milliseconds, and then
+ * calls update_odr() while passing this value as an argument.  In IRQ mode, the
+ * data outputs will not be read AT the requested poll interval, rather, the
+ * lowest ODR that can support the requested interval.  The client application
+ * will be responsible for retrieving data from the input node at the desired
+ * interval.
+ */
+
+/* Returns currently selected poll interval (in ms) */
+static ssize_t kxtj9_get_poll(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d\n", tj9->last_poll_interval);
+}
+
+/* Allow users to select a new poll interval (in ms) */
+static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr,
+						const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+	unsigned int interval;
+	int error;
+
+	error = kstrtouint(buf, 10, &interval);
+	if (error < 0)
+		return error;
+
+	/* Lock the device to prevent races with open/close (and itself) */
+	mutex_unlock(&input_dev->mutex);
+
+	disable_irq(client->irq);
+
+	/*
+	 * Set current interval to the greater of the minimum interval or
+	 * the requested interval
+	 */
+	tj9->last_poll_interval = max(interval, tj9->pdata.min_interval);
+
+	kxtj9_update_odr(tj9, tj9->last_poll_interval);
+
+	enable_irq(client->irq);
+	mutex_unlock(&input_dev->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll);
+
+static struct attribute *kxtj9_attributes[] = {
+	&dev_attr_poll.attr,
+	NULL
+};
+
+static struct attribute_group kxtj9_attribute_group = {
+	.attrs = kxtj9_attributes
+};
+
+
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+static void kxtj9_poll(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+	unsigned int poll_interval = dev->poll_interval;
+
+	kxtj9_report_acceleration_data(tj9);
+
+	if (poll_interval != tj9->last_poll_interval) {
+		kxtj9_update_odr(tj9, poll_interval);
+		tj9->last_poll_interval = poll_interval;
+	}
+}
+
+static void kxtj9_polled_input_open(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+
+	kxtj9_enable(tj9);
+}
+
+static void kxtj9_polled_input_close(struct input_polled_dev *dev)
+{
+	struct kxtj9_data *tj9 = dev->private;
+
+	kxtj9_disable(tj9);
+}
+
+static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+	int err;
+	struct input_polled_dev *poll_dev;
+	poll_dev = input_allocate_polled_device();
+
+	if (!poll_dev) {
+		dev_err(&tj9->client->dev,
+			"Failed to allocate polled device\n");
+		return -ENOMEM;
+	}
+
+	tj9->poll_dev = poll_dev;
+	tj9->input_dev = poll_dev->input;
+
+	poll_dev->private = tj9;
+	poll_dev->poll = kxtj9_poll;
+	poll_dev->open = kxtj9_polled_input_open;
+	poll_dev->close = kxtj9_polled_input_close;
+
+	kxtj9_init_input_device(tj9, poll_dev->input);
+
+	err = input_register_polled_device(poll_dev);
+	if (err) {
+		dev_err(&tj9->client->dev,
+			"Unable to register polled device, err=%d\n", err);
+		input_free_polled_device(poll_dev);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+	input_unregister_polled_device(tj9->poll_dev);
+	input_free_polled_device(tj9->poll_dev);
+}
+
+#else
+
+static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+	return -ENOSYS;
+}
+
+static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+}
+
+#endif
+
+static int __devinit kxtj9_verify(struct kxtj9_data *tj9)
+{
+	int retval;
+
+	retval = kxtj9_device_power_on(tj9);
+	if (retval < 0)
+		return retval;
+
+	retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I);
+	if (retval < 0) {
+		dev_err(&tj9->client->dev, "read err int source\n");
+		goto out;
+	}
+
+	retval = retval != 0x06 ? -EIO : 0;
+
+out:
+	kxtj9_device_power_off(tj9);
+	return retval;
+}
+
+static int __devinit kxtj9_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	const struct kxtj9_platform_data *pdata = client->dev.platform_data;
+	struct kxtj9_data *tj9;
+	int err;
+
+	if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "client is not i2c capable\n");
+		return -ENXIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "platform data is NULL; exiting\n");
+		return -EINVAL;
+	}
+
+	tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL);
+	if (!tj9) {
+		dev_err(&client->dev,
+			"failed to allocate memory for module data\n");
+		return -ENOMEM;
+	}
+
+	tj9->client = client;
+	tj9->pdata = *pdata;
+
+	if (pdata->init) {
+		err = pdata->init();
+		if (err < 0)
+			goto err_free_mem;
+	}
+
+	err = kxtj9_verify(tj9);
+	if (err < 0) {
+		dev_err(&client->dev, "device not recognized\n");
+		goto err_pdata_exit;
+	}
+
+	i2c_set_clientdata(client, tj9);
+
+	tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range;
+	tj9->data_ctrl = tj9->pdata.data_odr_init;
+
+	if (client->irq) {
+		/* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */
+		tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL;
+		tj9->ctrl_reg1 |= DRDYE;
+
+		err = kxtj9_setup_input_device(tj9);
+		if (err)
+			goto err_pdata_exit;
+
+		err = request_threaded_irq(client->irq, NULL, kxtj9_isr,
+					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					   "kxtj9-irq", tj9);
+		if (err) {
+			dev_err(&client->dev, "request irq failed: %d\n", err);
+			goto err_destroy_input;
+		}
+
+		err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group);
+		if (err) {
+			dev_err(&client->dev, "sysfs create failed: %d\n", err);
+			goto err_free_irq;
+		}
+
+	} else {
+		err = kxtj9_setup_polled_device(tj9);
+		if (err)
+			goto err_pdata_exit;
+	}
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, tj9);
+err_destroy_input:
+	input_unregister_device(tj9->input_dev);
+err_pdata_exit:
+	if (tj9->pdata.exit)
+		tj9->pdata.exit();
+err_free_mem:
+	kfree(tj9);
+	return err;
+}
+
+static int __devexit kxtj9_remove(struct i2c_client *client)
+{
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+	if (client->irq) {
+		sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group);
+		free_irq(client->irq, tj9);
+		input_unregister_device(tj9->input_dev);
+	} else {
+		kxtj9_teardown_polled_device(tj9);
+	}
+
+	if (tj9->pdata.exit)
+		tj9->pdata.exit();
+
+	kfree(tj9);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int kxtj9_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		kxtj9_disable(tj9);
+
+	mutex_unlock(&input_dev->mutex);
+	return 0;
+}
+
+static int kxtj9_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+	struct input_dev *input_dev = tj9->input_dev;
+	int retval = 0;
+
+	mutex_lock(&input_dev->mutex);
+
+	if (input_dev->users)
+		kxtj9_enable(tj9);
+
+	mutex_unlock(&input_dev->mutex);
+	return retval;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
+
+static const struct i2c_device_id kxtj9_id[] = {
+	{ NAME, 0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(i2c, kxtj9_id);
+
+static struct i2c_driver kxtj9_driver = {
+	.driver = {
+		.name	= NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &kxtj9_pm_ops,
+	},
+	.probe		= kxtj9_probe,
+	.remove		= __devexit_p(kxtj9_remove),
+	.id_table	= kxtj9_id,
+};
+
+static int __init kxtj9_init(void)
+{
+	return i2c_add_driver(&kxtj9_driver);
+}
+module_init(kxtj9_init);
+
+static void __exit kxtj9_exit(void)
+{
+	i2c_del_driver(&kxtj9_driver);
+}
+module_exit(kxtj9_exit);
+
+MODULE_DESCRIPTION("KXTJ9 accelerometer driver");
+MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/input/kxtj9.h b/include/linux/input/kxtj9.h
new file mode 100644
index 000000000000..f6bac89537b8
--- /dev/null
+++ b/include/linux/input/kxtj9.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __KXTJ9_H__
+#define __KXTJ9_H__
+
+#define KXTJ9_I2C_ADDR		0x0F
+
+struct kxtj9_platform_data {
+	unsigned int min_interval;	/* minimum poll interval (in milli-seconds) */
+
+	/*
+	 * By default, x is axis 0, y is axis 1, z is axis 2; these can be
+	 * changed to account for sensor orientation within the host device.
+	 */
+	u8 axis_map_x;
+	u8 axis_map_y;
+	u8 axis_map_z;
+
+	/*
+	 * Each axis can be negated to account for sensor orientation within
+	 * the host device.
+	 */
+	bool negate_x;
+	bool negate_y;
+	bool negate_z;
+
+	/* CTRL_REG1: set resolution, g-range, data ready enable */
+	/* Output resolution: 8-bit valid or 12-bit valid */
+	#define RES_8BIT		0
+	#define RES_12BIT		(1 << 6)
+	u8 res_12bit;
+	/* Output g-range: +/-2g, 4g, or 8g */
+	#define KXTJ9_G_2G		0
+	#define KXTJ9_G_4G		(1 << 3)
+	#define KXTJ9_G_8G		(1 << 4)
+	u8 g_range;
+
+	/* DATA_CTRL_REG: controls the output data rate of the part */
+	#define ODR12_5F		0
+	#define ODR25F			1
+	#define ODR50F			2
+	#define ODR100F			3
+	#define ODR200F			4
+	#define ODR400F			5
+	#define ODR800F			6
+	u8 data_odr_init;
+
+	int (*init)(void);
+	void (*exit)(void);
+	int (*power_on)(void);
+	int (*power_off)(void);
+};
+#endif  /* __KXTJ9_H__ */
-- 
cgit v1.2.3