From f84cb8a46a771f36a04a02c61ea635c968ed5f6a Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 19 Sep 2013 12:13:58 -0400
Subject: dm mpath: disable WRITE SAME if it fails

Workaround the SCSI layer's problematic WRITE SAME heuristics by
disabling WRITE SAME in the DM multipath device's queue_limits if an
underlying device disabled it.

The WRITE SAME heuristics, with both the original commit 5db44863b6eb
("[SCSI] sd: Implement support for WRITE SAME") and the updated commit
66c28f971 ("[SCSI] sd: Update WRITE SAME heuristics"), default to enabling
WRITE SAME(10) even without successfully determining it is supported.
After the first failed WRITE SAME the SCSI layer will disable WRITE SAME
for the device (by setting sdkp->device->no_write_same which results in
'max_write_same_sectors' in device's queue_limits to be set to 0).

When a device is stacked ontop of such a SCSI device any changes to that
SCSI device's queue_limits do not automatically propagate up the stack.
As such, a DM multipath device will not have its WRITE SAME support
disabled.  This causes the block layer to continue to issue WRITE SAME
requests to the mpath device which causes paths to fail and (if mpath IO
isn't configured to queue when no paths are available) it will result in
actual IO errors to the upper layers.

This fix doesn't help configurations that have additional devices
stacked ontop of the mpath device (e.g. LVM created linear DM devices
ontop).  A proper fix that restacks all the queue_limits from the bottom
of the device stack up will need to be explored if SCSI will continue to
use this model of optimistically allowing op codes and then disabling
them after they fail for the first time.

Before this patch:

EXT4-fs (dm-6): mounted filesystem with ordered data mode. Opts: (null)
device-mapper: multipath: XXX snitm debugging: got -EREMOTEIO (-121)
device-mapper: multipath: XXX snitm debugging: failing WRITE SAME IO with error=-121
end_request: critical target error, dev dm-6, sector 528
dm-6: WRITE SAME failed. Manually zeroing.
device-mapper: multipath: Failing path 8:112.
end_request: I/O error, dev dm-6, sector 4616
dm-6: WRITE SAME failed. Manually zeroing.
end_request: I/O error, dev dm-6, sector 4616
end_request: I/O error, dev dm-6, sector 5640
end_request: I/O error, dev dm-6, sector 6664
end_request: I/O error, dev dm-6, sector 7688
end_request: I/O error, dev dm-6, sector 524288
Buffer I/O error on device dm-6, logical block 65536
lost page write due to I/O error on dm-6
JBD2: Error -5 detected when updating journal superblock for dm-6-8.
end_request: I/O error, dev dm-6, sector 524296
Aborting journal on device dm-6-8.
end_request: I/O error, dev dm-6, sector 524288
Buffer I/O error on device dm-6, logical block 65536
lost page write due to I/O error on dm-6
JBD2: Error -5 detected when updating journal superblock for dm-6-8.

# cat /sys/block/sdh/queue/write_same_max_bytes
0
# cat /sys/block/dm-6/queue/write_same_max_bytes
33553920

After this patch:

EXT4-fs (dm-6): mounted filesystem with ordered data mode. Opts: (null)
device-mapper: multipath: XXX snitm debugging: got -EREMOTEIO (-121)
device-mapper: multipath: XXX snitm debugging: WRITE SAME I/O failed with error=-121
end_request: critical target error, dev dm-6, sector 528
dm-6: WRITE SAME failed. Manually zeroing.

# cat /sys/block/sdh/queue/write_same_max_bytes
0
# cat /sys/block/dm-6/queue/write_same_max_bytes
0

It should be noted that WRITE SAME support wasn't enabled in DM
multipath until v3.10.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: stable@vger.kernel.org # 3.10+
---
 include/linux/device-mapper.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 653073de09e3..ed419c62dde1 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
+struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
+
 /*
  * Geometry functions.
  */
 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
 
-
 /*-----------------------------------------------------------------
  * Functions for manipulating device-mapper tables.
  *---------------------------------------------------------------*/
-- 
cgit v1.2.3


From 9809b18fcf6b8d8ec4d3643677345907e6b50eca Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Tue, 24 Sep 2013 15:27:30 -0700
Subject: watchdog: update watchdog_thresh properly

watchdog_tresh controls how often nmi perf event counter checks per-cpu
hrtimer_interrupts counter and blows up if the counter hasn't changed
since the last check.  The counter is updated by per-cpu
watchdog_hrtimer hrtimer which is scheduled with 2/5 watchdog_thresh
period which guarantees that hrtimer is scheduled 2 times per the main
period.  Both hrtimer and perf event are started together when the
watchdog is enabled.

So far so good.  But...

But what happens when watchdog_thresh is updated from sysctl handler?

proc_dowatchdog will set a new sampling period and hrtimer callback
(watchdog_timer_fn) will use the new value in the next round.  The
problem, however, is that nobody tells the perf event that the sampling
period has changed so it is ticking with the period configured when it
has been set up.

This might result in an ear ripping dissonance between perf and hrtimer
parts if the watchdog_thresh is increased.  And even worse it might lead
to KABOOM if the watchdog is configured to panic on such a spurious
lockup.

This patch fixes the issue by updating both nmi perf even counter and
hrtimers if the threshold value has changed.

The nmi one is disabled and then reinitialized from scratch.  This has
an unpleasant side effect that the allocation of the new event might
fail theoretically so the hard lockup detector would be disabled for
such cpus.  On the other hand such a memory allocation failure is very
unlikely because the original event is deallocated right before.

It would be much nicer if we just changed perf event period but there
doesn't seem to be any API to do that right now.  It is also unfortunate
that perf_event_alloc uses GFP_KERNEL allocation unconditionally so we
cannot use on_each_cpu() and do the same thing from the per-cpu context.
The update from the current CPU should be safe because
perf_event_disable removes the event atomically before it clears the
per-cpu watchdog_ev so it cannot change anything under running handler
feet.

The hrtimer is simply restarted (thanks to Don Zickus who has pointed
this out) if it is queued because we cannot rely it will fire&adopt to
the new sampling period before a new nmi event triggers (when the
treshold is decreased).

[akpm@linux-foundation.org: the UP version of __smp_call_function_single ended up in the wrong place]
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h |  6 ++++++
 kernel/watchdog.c   | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 56 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfb7ca094b38..731f5237d5f4 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 
 static inline void kick_all_cpus_sync(void) {  }
 
+static inline void __smp_call_function_single(int cpuid,
+		struct call_single_data *data, int wait)
+{
+	on_each_cpu(data->func, data->info, wait);
+}
+
 #endif /* !SMP */
 
 /*
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ced7d0609931..4431610f049a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
 	.unpark			= watchdog_enable,
 };
 
-static int watchdog_enable_all_cpus(void)
+static void restart_watchdog_hrtimer(void *info)
+{
+	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
+	int ret;
+
+	/*
+	 * No need to cancel and restart hrtimer if it is currently executing
+	 * because it will reprogram itself with the new period now.
+	 * We should never see it unqueued here because we are running per-cpu
+	 * with interrupts disabled.
+	 */
+	ret = hrtimer_try_to_cancel(hrtimer);
+	if (ret == 1)
+		hrtimer_start(hrtimer, ns_to_ktime(sample_period),
+				HRTIMER_MODE_REL_PINNED);
+}
+
+static void update_timers(int cpu)
+{
+	struct call_single_data data = {.func = restart_watchdog_hrtimer};
+	/*
+	 * Make sure that perf event counter will adopt to a new
+	 * sampling period. Updating the sampling period directly would
+	 * be much nicer but we do not have an API for that now so
+	 * let's use a big hammer.
+	 * Hrtimer will adopt the new period on the next tick but this
+	 * might be late already so we have to restart the timer as well.
+	 */
+	watchdog_nmi_disable(cpu);
+	__smp_call_function_single(cpu, &data, 1);
+	watchdog_nmi_enable(cpu);
+}
+
+static void update_timers_all_cpus(void)
+{
+	int cpu;
+
+	get_online_cpus();
+	preempt_disable();
+	for_each_online_cpu(cpu)
+		update_timers(cpu);
+	preempt_enable();
+	put_online_cpus();
+}
+
+static int watchdog_enable_all_cpus(bool sample_period_changed)
 {
 	int err = 0;
 
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
 			pr_err("Failed to create watchdog threads, disabled\n");
 		else
 			watchdog_running = 1;
+	} else if (sample_period_changed) {
+		update_timers_all_cpus();
 	}
 
 	return err;
@@ -537,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	 * watchdog_*_all_cpus() function takes care of this.
 	 */
 	if (watchdog_user_enabled && watchdog_thresh)
-		err = watchdog_enable_all_cpus();
+		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
 	else
 		watchdog_disable_all_cpus();
 
@@ -557,5 +604,5 @@ void __init lockup_detector_init(void)
 	set_sample_period();
 
 	if (watchdog_user_enabled)
-		watchdog_enable_all_cpus();
+		watchdog_enable_all_cpus(false);
 }
-- 
cgit v1.2.3


From 694fbc0fe78518d06efa63910bf4ecee660e7852 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 24 Sep 2013 15:27:37 -0700
Subject: revert "memcg: enhance memcg iterator to support predicates"

Revert commit de57780dc659 ("memcg: enhance memcg iterator to support
predicates")

I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.

Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 49 ++++----------------------------
 mm/memcontrol.c            | 70 ++++++++++------------------------------------
 mm/vmscan.c                | 16 +++++++----
 3 files changed, 32 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 60e95872da29..ef2b9bd7fafa 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
 	unsigned int generation;
 };
 
-enum mem_cgroup_filter_t {
-	VISIT,		/* visit current node */
-	SKIP,		/* skip the current node and continue traversal */
-	SKIP_TREE,	/* skip the whole subtree and continue traversal */
-};
-
-/*
- * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
- * iterate through the hierarchy tree. Each tree element is checked by the
- * predicate before it is returned by the iterator. If a filter returns
- * SKIP or SKIP_TREE then the iterator code continues traversal (with the
- * next node down the hierarchy or the next node that doesn't belong under the
- * memcg's subtree).
- */
-typedef enum mem_cgroup_filter_t
-(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
-
 #ifdef CONFIG_MEMCG
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
-				   struct mem_cgroup *prev,
-				   struct mem_cgroup_reclaim_cookie *reclaim,
-				   mem_cgroup_iter_filter cond);
-
-static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
-				   struct mem_cgroup *prev,
-				   struct mem_cgroup_reclaim_cookie *reclaim)
-{
-	return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
-}
-
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+				   struct mem_cgroup *,
+				   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
 /*
@@ -260,8 +234,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 	mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 		struct mem_cgroup *root);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
@@ -376,15 +349,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 		struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
-static inline struct mem_cgroup *
-mem_cgroup_iter_cond(struct mem_cgroup *root,
-		struct mem_cgroup *prev,
-		struct mem_cgroup_reclaim_cookie *reclaim,
-		mem_cgroup_iter_filter cond)
-{
-	/* first call must return non-NULL, second return NULL */
-	return (struct mem_cgroup *)(unsigned long)!prev;
-}
 
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +435,10 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 static inline
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 		struct mem_cgroup *root)
 {
-	return VISIT;
+	return false;
 }
 
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 916892c2b8e0..65e7bec4b0f0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -862,15 +862,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return memcg;
 }
 
-static enum mem_cgroup_filter_t
-mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
-		mem_cgroup_iter_filter cond)
-{
-	if (!cond)
-		return VISIT;
-	return cond(memcg, root);
-}
-
 /*
  * Returns a next (in a pre-order walk) alive memcg (with elevated css
  * ref. count) or NULL if the whole root's subtree has been visited.
@@ -878,7 +869,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
  * helper function to be used by mem_cgroup_iter
  */
 static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
-		struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
+		struct mem_cgroup *last_visited)
 {
 	struct cgroup_subsys_state *prev_css, *next_css;
 
@@ -896,31 +887,11 @@ skip_node:
 	if (next_css) {
 		struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
 
-		switch (mem_cgroup_filter(mem, root, cond)) {
-		case SKIP:
+		if (css_tryget(&mem->css))
+			return mem;
+		else {
 			prev_css = next_css;
 			goto skip_node;
-		case SKIP_TREE:
-			if (mem == root)
-				return NULL;
-			/*
-			 * css_rightmost_descendant is not an optimal way to
-			 * skip through a subtree (especially for imbalanced
-			 * trees leaning to right) but that's what we have right
-			 * now. More effective solution would be traversing
-			 * right-up for first non-NULL without calling
-			 * css_next_descendant_pre afterwards.
-			 */
-			prev_css = css_rightmost_descendant(next_css);
-			goto skip_node;
-		case VISIT:
-			if (css_tryget(&mem->css))
-				return mem;
-			else {
-				prev_css = next_css;
-				goto skip_node;
-			}
-			break;
 		}
 	}
 
@@ -984,7 +955,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
  * @reclaim: cookie for shared reclaim walks, NULL for full walks
- * @cond: filter for visited nodes, NULL for no filter
  *
  * Returns references to children of the hierarchy below @root, or
  * @root itself, or %NULL after a full round-trip.
@@ -997,18 +967,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * divide up the memcgs in the hierarchy among all concurrent
  * reclaimers operating on the same zone and priority.
  */
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 				   struct mem_cgroup *prev,
-				   struct mem_cgroup_reclaim_cookie *reclaim,
-				   mem_cgroup_iter_filter cond)
+				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
 	struct mem_cgroup *memcg = NULL;
 	struct mem_cgroup *last_visited = NULL;
 
-	if (mem_cgroup_disabled()) {
-		/* first call must return non-NULL, second return NULL */
-		return (struct mem_cgroup *)(unsigned long)!prev;
-	}
+	if (mem_cgroup_disabled())
+		return NULL;
 
 	if (!root)
 		root = root_mem_cgroup;
@@ -1019,9 +986,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
 	if (!root->use_hierarchy && root != root_mem_cgroup) {
 		if (prev)
 			goto out_css_put;
-		if (mem_cgroup_filter(root, root, cond) == VISIT)
-			return root;
-		return NULL;
+		return root;
 	}
 
 	rcu_read_lock();
@@ -1044,7 +1009,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
 			last_visited = mem_cgroup_iter_load(iter, root, &seq);
 		}
 
-		memcg = __mem_cgroup_iter_next(root, last_visited, cond);
+		memcg = __mem_cgroup_iter_next(root, last_visited);
 
 		if (reclaim) {
 			mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1055,11 +1020,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
 				reclaim->generation = iter->generation;
 		}
 
-		/*
-		 * We have finished the whole tree walk or no group has been
-		 * visited because filter told us to skip the root node.
-		 */
-		if (!memcg && (prev || (cond && !last_visited)))
+		if (prev && !memcg)
 			goto out_unlock;
 	}
 out_unlock:
@@ -1804,14 +1765,13 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
  *	a) it is over its soft limit
  *	b) any parent up the hierarchy is over its soft limit
  */
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 		struct mem_cgroup *root)
 {
 	struct mem_cgroup *parent = memcg;
 
 	if (res_counter_soft_limit_excess(&memcg->res))
-		return VISIT;
+		return true;
 
 	/*
 	 * If any parent up to the root in the hierarchy is over its soft limit
@@ -1819,12 +1779,12 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 	 */
 	while ((parent = parent_mem_cgroup(parent))) {
 		if (res_counter_soft_limit_excess(&parent->res))
-			return VISIT;
+			return true;
 		if (parent == root)
 			break;
 	}
 
-	return SKIP;
+	return false;
 }
 
 static DEFINE_SPINLOCK(memcg_oom_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cdd300b81485..17a7134de4d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2185,16 +2185,21 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 			.zone = zone,
 			.priority = sc->priority,
 		};
-		struct mem_cgroup *memcg = NULL;
-		mem_cgroup_iter_filter filter = (soft_reclaim) ?
-			mem_cgroup_soft_reclaim_eligible : NULL;
+		struct mem_cgroup *memcg;
 
 		nr_reclaimed = sc->nr_reclaimed;
 		nr_scanned = sc->nr_scanned;
 
-		while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
+		memcg = mem_cgroup_iter(root, NULL, &reclaim);
+		do {
 			struct lruvec *lruvec;
 
+			if (soft_reclaim &&
+			    !mem_cgroup_soft_reclaim_eligible(memcg, root)) {
+				memcg = mem_cgroup_iter(root, memcg, &reclaim);
+				continue;
+			}
+
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
 			shrink_lruvec(lruvec, sc);
@@ -2214,7 +2219,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 				mem_cgroup_iter_break(root, memcg);
 				break;
 			}
-		}
+			memcg = mem_cgroup_iter(root, memcg, &reclaim);
+		} while (memcg);
 
 		vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
 			   sc->nr_scanned - nr_scanned,
-- 
cgit v1.2.3


From b1aff7fcf86c88472b0a70f15d89d7a4adba07bb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 24 Sep 2013 15:27:38 -0700
Subject: revert "vmscan, memcg: do softlimit reclaim also for targeted
 reclaim"

Revert commit a5b7c87f9207 ("vmscan, memcg: do softlimit reclaim also
for targeted reclaim")

I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.

Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++----
 mm/memcontrol.c            | 14 +++++---------
 mm/vmscan.c                |  4 ++--
 3 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ef2b9bd7fafa..6054c9f3a5e8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -234,8 +234,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 	mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-		struct mem_cgroup *root);
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -435,8 +434,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 static inline
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-		struct mem_cgroup *root)
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
 {
 	return false;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 65e7bec4b0f0..47cdc7eb1a6b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1760,13 +1760,11 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 #endif
 
 /*
- * A group is eligible for the soft limit reclaim under the given root
- * hierarchy if
- *	a) it is over its soft limit
+ * A group is eligible for the soft limit reclaim if
+ * 	a) it is over its soft limit
  *	b) any parent up the hierarchy is over its soft limit
  */
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-		struct mem_cgroup *root)
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *parent = memcg;
 
@@ -1774,14 +1772,12 @@ bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 		return true;
 
 	/*
-	 * If any parent up to the root in the hierarchy is over its soft limit
-	 * then we have to obey and reclaim from this group as well.
+	 * If any parent up the hierarchy is over its soft limit then we
+	 * have to obey and reclaim from this group as well.
 	 */
 	while ((parent = parent_mem_cgroup(parent))) {
 		if (res_counter_soft_limit_excess(&parent->res))
 			return true;
-		if (parent == root)
-			break;
 	}
 
 	return false;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17a7134de4d7..0e081cada4ba 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -142,7 +142,7 @@ static bool global_reclaim(struct scan_control *sc)
 
 static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
 {
-	return !mem_cgroup_disabled();
+	return !mem_cgroup_disabled() && global_reclaim(sc);
 }
 #else
 static bool global_reclaim(struct scan_control *sc)
@@ -2195,7 +2195,7 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 			struct lruvec *lruvec;
 
 			if (soft_reclaim &&
-			    !mem_cgroup_soft_reclaim_eligible(memcg, root)) {
+			    !mem_cgroup_soft_reclaim_eligible(memcg)) {
 				memcg = mem_cgroup_iter(root, memcg, &reclaim);
 				continue;
 			}
-- 
cgit v1.2.3


From 0608f43da64a1f1c42507304b5f25bc8b1227aa4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 24 Sep 2013 15:27:41 -0700
Subject: revert "memcg, vmscan: integrate soft reclaim tighter with zone
 shrinking code"

Revert commit 3b38722efd9f ("memcg, vmscan: integrate soft reclaim
tighter with zone shrinking code")

I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.

Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++-
 mm/memcontrol.c            | 163 +++++++++++++++++++++++++++++++++++++++------
 mm/vmscan.c                |  62 ++++++++---------
 3 files changed, 175 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6054c9f3a5e8..ecc82b37c4cc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -234,7 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 	mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+						gfp_t gfp_mask,
+						unsigned long *total_scanned);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -434,9 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 static inline
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+					    gfp_t gfp_mask,
+					    unsigned long *total_scanned)
 {
-	return false;
+	return 0;
 }
 
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 852dbec07ce6..1c52ddbc839b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 }
 #endif
 
-/*
- * A group is eligible for the soft limit reclaim if
- * 	a) it is over its soft limit
- *	b) any parent up the hierarchy is over its soft limit
- */
-bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
-{
-	struct mem_cgroup *parent = memcg;
-
-	if (res_counter_soft_limit_excess(&memcg->res))
-		return true;
-
-	/*
-	 * If any parent up the hierarchy is over its soft limit then we
-	 * have to obey and reclaim from this group as well.
-	 */
-	while ((parent = parent_mem_cgroup(parent))) {
-		if (res_counter_soft_limit_excess(&parent->res))
-			return true;
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+				   struct zone *zone,
+				   gfp_t gfp_mask,
+				   unsigned long *total_scanned)
+{
+	struct mem_cgroup *victim = NULL;
+	int total = 0;
+	int loop = 0;
+	unsigned long excess;
+	unsigned long nr_scanned;
+	struct mem_cgroup_reclaim_cookie reclaim = {
+		.zone = zone,
+		.priority = 0,
+	};
+
+	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
+
+	while (1) {
+		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+		if (!victim) {
+			loop++;
+			if (loop >= 2) {
+				/*
+				 * If we have not been able to reclaim
+				 * anything, it might because there are
+				 * no reclaimable pages under this hierarchy
+				 */
+				if (!total)
+					break;
+				/*
+				 * We want to do more targeted reclaim.
+				 * excess >> 2 is not to excessive so as to
+				 * reclaim too much, nor too less that we keep
+				 * coming back to reclaim from this cgroup
+				 */
+				if (total >= (excess >> 2) ||
+					(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
+					break;
+			}
+			continue;
+		}
+		if (!mem_cgroup_reclaimable(victim, false))
+			continue;
+		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+						     zone, &nr_scanned);
+		*total_scanned += nr_scanned;
+		if (!res_counter_soft_limit_excess(&root_memcg->res))
+			break;
 	}
-
-	return false;
+	mem_cgroup_iter_break(root_memcg, victim);
+	return total;
 }
 
 static DEFINE_SPINLOCK(memcg_oom_lock);
@@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 	return ret;
 }
 
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+					    gfp_t gfp_mask,
+					    unsigned long *total_scanned)
+{
+	unsigned long nr_reclaimed = 0;
+	struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+	unsigned long reclaimed;
+	int loop = 0;
+	struct mem_cgroup_tree_per_zone *mctz;
+	unsigned long long excess;
+	unsigned long nr_scanned;
+
+	if (order > 0)
+		return 0;
+
+	mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
+	/*
+	 * This loop can run a while, specially if mem_cgroup's continuously
+	 * keep exceeding their soft limit and putting the system under
+	 * pressure
+	 */
+	do {
+		if (next_mz)
+			mz = next_mz;
+		else
+			mz = mem_cgroup_largest_soft_limit_node(mctz);
+		if (!mz)
+			break;
+
+		nr_scanned = 0;
+		reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
+						    gfp_mask, &nr_scanned);
+		nr_reclaimed += reclaimed;
+		*total_scanned += nr_scanned;
+		spin_lock(&mctz->lock);
+
+		/*
+		 * If we failed to reclaim anything from this memory cgroup
+		 * it is time to move on to the next cgroup
+		 */
+		next_mz = NULL;
+		if (!reclaimed) {
+			do {
+				/*
+				 * Loop until we find yet another one.
+				 *
+				 * By the time we get the soft_limit lock
+				 * again, someone might have aded the
+				 * group back on the RB tree. Iterate to
+				 * make sure we get a different mem.
+				 * mem_cgroup_largest_soft_limit_node returns
+				 * NULL if no other cgroup is present on
+				 * the tree
+				 */
+				next_mz =
+				__mem_cgroup_largest_soft_limit_node(mctz);
+				if (next_mz == mz)
+					css_put(&next_mz->memcg->css);
+				else /* next_mz == NULL or other memcg */
+					break;
+			} while (1);
+		}
+		__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+		excess = res_counter_soft_limit_excess(&mz->memcg->res);
+		/*
+		 * One school of thought says that we should not add
+		 * back the node to the tree if reclaim returns 0.
+		 * But our reclaim could return 0, simply because due
+		 * to priority we are exposing a smaller subset of
+		 * memory to reclaim from. Consider this as a longer
+		 * term TODO.
+		 */
+		/* If excess == 0, no tree ops */
+		__mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
+		spin_unlock(&mctz->lock);
+		css_put(&mz->memcg->css);
+		loop++;
+		/*
+		 * Could not reclaim anything and there are no more
+		 * mem cgroups to try or we seem to be looping without
+		 * reclaiming anything.
+		 */
+		if (!nr_reclaimed &&
+			(next_mz == NULL ||
+			loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+			break;
+	} while (!nr_reclaimed);
+	if (next_mz)
+		css_put(&next_mz->memcg->css);
+	return nr_reclaimed;
+}
+
 /**
  * mem_cgroup_force_empty_list - clears LRU of a group
  * @memcg: group to clear
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0e081cada4ba..beb35778c69f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
 {
 	return !sc->target_mem_cgroup;
 }
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
-	return !mem_cgroup_disabled() && global_reclaim(sc);
-}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
 	return true;
 }
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
-	return false;
-}
 #endif
 
 unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
 	}
 }
 
-static void
-__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
 {
 	unsigned long nr_reclaimed, nr_scanned;
 
@@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 		do {
 			struct lruvec *lruvec;
 
-			if (soft_reclaim &&
-			    !mem_cgroup_soft_reclaim_eligible(memcg)) {
-				memcg = mem_cgroup_iter(root, memcg, &reclaim);
-				continue;
-			}
-
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
 			shrink_lruvec(lruvec, sc);
@@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 					 sc->nr_scanned - nr_scanned, sc));
 }
 
-
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
-{
-	bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
-	unsigned long nr_scanned = sc->nr_scanned;
-
-	__shrink_zone(zone, sc, do_soft_reclaim);
-
-	/*
-	 * No group is over the soft limit or those that are do not have
-	 * pages in the zone we are reclaiming so we have to reclaim everybody
-	 */
-	if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
-		__shrink_zone(zone, sc, false);
-		return;
-	}
-}
-
 /* Returns true if compaction should go ahead for a high-order request */
 static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 {
@@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
 	struct zoneref *z;
 	struct zone *zone;
+	unsigned long nr_soft_reclaimed;
+	unsigned long nr_soft_scanned;
 	bool aborted_reclaim = false;
 
 	/*
@@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 					continue;
 				}
 			}
+			/*
+			 * This steals pages from memory cgroups over softlimit
+			 * and returns the number of reclaimed pages and
+			 * scanned pages. This works for global memory pressure
+			 * and balancing, not for a memcg's limit.
+			 */
+			nr_soft_scanned = 0;
+			nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+						sc->order, sc->gfp_mask,
+						&nr_soft_scanned);
+			sc->nr_reclaimed += nr_soft_reclaimed;
+			sc->nr_scanned += nr_soft_scanned;
 			/* need some check for avoid more shrink_zone() */
 		}
 
@@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 {
 	int i;
 	int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
+	unsigned long nr_soft_reclaimed;
+	unsigned long nr_soft_scanned;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.priority = DEF_PRIORITY,
@@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 
 			sc.nr_scanned = 0;
 
+			nr_soft_scanned = 0;
+			/*
+			 * Call soft limit reclaim before calling shrink_zone.
+			 */
+			nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+							order, sc.gfp_mask,
+							&nr_soft_scanned);
+			sc.nr_reclaimed += nr_soft_reclaimed;
+
 			/*
 			 * There should be no need to raise the scanning
 			 * priority if enough pages are already being scanned
-- 
cgit v1.2.3


From b0b8c960ffcc5bfc82d1a09ad931673e3a36c15a Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Wed, 4 Sep 2013 10:52:57 -0500
Subject: of: clean-up ifdefs in of_irq.h

Much of of_irq.h is needlessly ifdef'ed. Clean this up and minimize the
amount ifdef'ed code. This fixes some  build warnings when CONFIG_OF
is not enabled (seen on i386 and x86_64):

include/linux/of_irq.h:82:7: warning: 'struct device_node' declared inside parameter list [enabled by default]
include/linux/of_irq.h:82:7: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]
include/linux/of_irq.h:87:47: warning: 'struct device_node' declared inside parameter list [enabled by default]

Compile tested on i386, sparc and arm.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of_irq.h | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 535cecf1e02f..fcd63baee5f2 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -1,8 +1,6 @@
 #ifndef __OF_IRQ_H
 #define __OF_IRQ_H
 
-#if defined(CONFIG_OF)
-struct of_irq;
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/irq.h>
@@ -10,14 +8,6 @@ struct of_irq;
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-/*
- * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
- * implements it differently.  However, the prototype is the same for all,
- * so declare it here regardless of the CONFIG_OF_IRQ setting.
- */
-extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-
-#if defined(CONFIG_OF_IRQ)
 /**
  * of_irq - container for device_node/irq_specifier pair for an irq controller
  * @controller: pointer to interrupt controller device tree node
@@ -71,11 +61,17 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
-extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 extern void of_irq_init(const struct of_device_id *matches);
 
-#endif /* CONFIG_OF_IRQ */
+#if defined(CONFIG_OF)
+/*
+ * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
+ * implements it differently.  However, the prototype is the same for all,
+ * so declare it here regardless of the CONFIG_OF_IRQ setting.
+ */
+extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #else /* !CONFIG_OF */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
-- 
cgit v1.2.3


From 3a4916050ba2e0f1d114ef540abdf02b2b173e61 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 6 Sep 2013 11:49:56 -0700
Subject: Drivers: hv: util: Correctly support ws2008R2 and earlier

The current code does not correctly negotiate the version numbers for the util
driver when hosted on earlier hosts. The version numbers presented by this
driver were not compatible with the version numbers supported by Windows Server
2008. Fix this problem.

I would like to thank Olaf Hering (ohering@suse.com) for identifying the problem.

Reported-by: Olaf Hering <ohering@suse.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_kvp.c      | 38 ++++++++++++++++++--------
 drivers/hv/hv_snapshot.c |  6 ++--
 drivers/hv/hv_util.c     | 71 ++++++++++++++++++++++++++++++++++++------------
 include/linux/hyperv.h   |  7 +++--
 4 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 28b03325b872..09988b289622 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -32,13 +32,17 @@
 /*
  * Pre win8 version numbers used in ws2008 and ws 2008 r2 (win7)
  */
+#define WS2008_SRV_MAJOR	1
+#define WS2008_SRV_MINOR	0
+#define WS2008_SRV_VERSION     (WS2008_SRV_MAJOR << 16 | WS2008_SRV_MINOR)
+
 #define WIN7_SRV_MAJOR   3
 #define WIN7_SRV_MINOR   0
-#define WIN7_SRV_MAJOR_MINOR     (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR)
+#define WIN7_SRV_VERSION     (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR)
 
 #define WIN8_SRV_MAJOR   4
 #define WIN8_SRV_MINOR   0
-#define WIN8_SRV_MAJOR_MINOR     (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
+#define WIN8_SRV_VERSION     (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
 
 /*
  * Global state maintained for transaction that is being processed.
@@ -587,6 +591,8 @@ void hv_kvp_onchannelcallback(void *context)
 
 	struct icmsg_hdr *icmsghdrp;
 	struct icmsg_negotiate *negop = NULL;
+	int util_fw_version;
+	int kvp_srv_version;
 
 	if (kvp_transaction.active) {
 		/*
@@ -606,17 +612,26 @@ void hv_kvp_onchannelcallback(void *context)
 
 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
 			/*
-			 * We start with win8 version and if the host cannot
-			 * support that we use the previous version.
+			 * Based on the host, select appropriate
+			 * framework and service versions we will
+			 * negotiate.
 			 */
-			if (vmbus_prep_negotiate_resp(icmsghdrp, negop,
-				 recv_buffer, UTIL_FW_MAJOR_MINOR,
-				 WIN8_SRV_MAJOR_MINOR))
-				goto done;
-
+			switch (vmbus_proto_version) {
+			case (VERSION_WS2008):
+				util_fw_version = UTIL_WS2K8_FW_VERSION;
+				kvp_srv_version = WS2008_SRV_VERSION;
+				break;
+			case (VERSION_WIN7):
+				util_fw_version = UTIL_FW_VERSION;
+				kvp_srv_version = WIN7_SRV_VERSION;
+				break;
+			default:
+				util_fw_version = UTIL_FW_VERSION;
+				kvp_srv_version = WIN8_SRV_VERSION;
+			}
 			vmbus_prep_negotiate_resp(icmsghdrp, negop,
-				 recv_buffer, UTIL_FW_MAJOR_MINOR,
-				 WIN7_SRV_MAJOR_MINOR);
+				 recv_buffer, util_fw_version,
+				 kvp_srv_version);
 
 		} else {
 			kvp_msg = (struct hv_kvp_msg *)&recv_buffer[
@@ -649,7 +664,6 @@ void hv_kvp_onchannelcallback(void *context)
 			return;
 
 		}
-done:
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
 			| ICMSGHDRFLAG_RESPONSE;
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index e4572f3f2834..0c3546224376 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -26,7 +26,7 @@
 
 #define VSS_MAJOR  5
 #define VSS_MINOR  0
-#define VSS_MAJOR_MINOR    (VSS_MAJOR << 16 | VSS_MINOR)
+#define VSS_VERSION    (VSS_MAJOR << 16 | VSS_MINOR)
 
 
@@ -190,8 +190,8 @@ void hv_vss_onchannelcallback(void *context)
 
 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
 			vmbus_prep_negotiate_resp(icmsghdrp, negop,
-				 recv_buffer, UTIL_FW_MAJOR_MINOR,
-				 VSS_MAJOR_MINOR);
+				 recv_buffer, UTIL_FW_VERSION,
+				 VSS_VERSION);
 		} else {
 			vss_msg = (struct hv_vss_msg *)&recv_buffer[
 				sizeof(struct vmbuspipe_hdr) +
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index cb82233541b1..273e3ddb3a20 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -28,17 +28,32 @@
 #include <linux/reboot.h>
 #include <linux/hyperv.h>
 
-#define SHUTDOWN_MAJOR	3
-#define SHUTDOWN_MINOR  0
-#define SHUTDOWN_MAJOR_MINOR	(SHUTDOWN_MAJOR << 16 | SHUTDOWN_MINOR)
 
-#define TIMESYNCH_MAJOR	3
-#define TIMESYNCH_MINOR 0
-#define TIMESYNCH_MAJOR_MINOR	(TIMESYNCH_MAJOR << 16 | TIMESYNCH_MINOR)
+#define SD_MAJOR	3
+#define SD_MINOR	0
+#define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
 
-#define HEARTBEAT_MAJOR	3
-#define HEARTBEAT_MINOR 0
-#define HEARTBEAT_MAJOR_MINOR	(HEARTBEAT_MAJOR << 16 | HEARTBEAT_MINOR)
+#define SD_WS2008_MAJOR		1
+#define SD_WS2008_VERSION	(SD_WS2008_MAJOR << 16 | SD_MINOR)
+
+#define TS_MAJOR	3
+#define TS_MINOR	0
+#define TS_VERSION	(TS_MAJOR << 16 | TS_MINOR)
+
+#define TS_WS2008_MAJOR		1
+#define TS_WS2008_VERSION	(TS_WS2008_MAJOR << 16 | TS_MINOR)
+
+#define HB_MAJOR	3
+#define HB_MINOR 0
+#define HB_VERSION	(HB_MAJOR << 16 | HB_MINOR)
+
+#define HB_WS2008_MAJOR	1
+#define HB_WS2008_VERSION	(HB_WS2008_MAJOR << 16 | HB_MINOR)
+
+static int sd_srv_version;
+static int ts_srv_version;
+static int hb_srv_version;
+static int util_fw_version;
 
 static void shutdown_onchannelcallback(void *context);
 static struct hv_util_service util_shutdown = {
@@ -99,8 +114,8 @@ static void shutdown_onchannelcallback(void *context)
 
 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
 			vmbus_prep_negotiate_resp(icmsghdrp, negop,
-					shut_txf_buf, UTIL_FW_MAJOR_MINOR,
-					SHUTDOWN_MAJOR_MINOR);
+					shut_txf_buf, util_fw_version,
+					sd_srv_version);
 		} else {
 			shutdown_msg =
 				(struct shutdown_msg_data *)&shut_txf_buf[
@@ -216,6 +231,7 @@ static void timesync_onchannelcallback(void *context)
 	struct icmsg_hdr *icmsghdrp;
 	struct ictimesync_data *timedatap;
 	u8 *time_txf_buf = util_timesynch.recv_buffer;
+	struct icmsg_negotiate *negop = NULL;
 
 	vmbus_recvpacket(channel, time_txf_buf,
 			 PAGE_SIZE, &recvlen, &requestid);
@@ -225,9 +241,10 @@ static void timesync_onchannelcallback(void *context)
 				sizeof(struct vmbuspipe_hdr)];
 
 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
-			vmbus_prep_negotiate_resp(icmsghdrp, NULL, time_txf_buf,
-						UTIL_FW_MAJOR_MINOR,
-						TIMESYNCH_MAJOR_MINOR);
+			vmbus_prep_negotiate_resp(icmsghdrp, negop,
+						time_txf_buf,
+						util_fw_version,
+						ts_srv_version);
 		} else {
 			timedatap = (struct ictimesync_data *)&time_txf_buf[
 				sizeof(struct vmbuspipe_hdr) +
@@ -257,6 +274,7 @@ static void heartbeat_onchannelcallback(void *context)
 	struct icmsg_hdr *icmsghdrp;
 	struct heartbeat_msg_data *heartbeat_msg;
 	u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
+	struct icmsg_negotiate *negop = NULL;
 
 	vmbus_recvpacket(channel, hbeat_txf_buf,
 			 PAGE_SIZE, &recvlen, &requestid);
@@ -266,9 +284,9 @@ static void heartbeat_onchannelcallback(void *context)
 				sizeof(struct vmbuspipe_hdr)];
 
 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
-			vmbus_prep_negotiate_resp(icmsghdrp, NULL,
-				hbeat_txf_buf, UTIL_FW_MAJOR_MINOR,
-				HEARTBEAT_MAJOR_MINOR);
+			vmbus_prep_negotiate_resp(icmsghdrp, negop,
+				hbeat_txf_buf, util_fw_version,
+				hb_srv_version);
 		} else {
 			heartbeat_msg =
 				(struct heartbeat_msg_data *)&hbeat_txf_buf[
@@ -321,6 +339,25 @@ static int util_probe(struct hv_device *dev,
 		goto error;
 
 	hv_set_drvdata(dev, srv);
+	/*
+	 * Based on the host; initialize the framework and
+	 * service version numbers we will negotiate.
+	 */
+	switch (vmbus_proto_version) {
+	case (VERSION_WS2008):
+		util_fw_version = UTIL_WS2K8_FW_VERSION;
+		sd_srv_version = SD_WS2008_VERSION;
+		ts_srv_version = TS_WS2008_VERSION;
+		hb_srv_version = HB_WS2008_VERSION;
+		break;
+
+	default:
+		util_fw_version = UTIL_FW_VERSION;
+		sd_srv_version = SD_VERSION;
+		ts_srv_version = TS_VERSION;
+		hb_srv_version = HB_VERSION;
+	}
+
 	return 0;
 
 error:
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a3b8b2e2d244..d98503bde7e9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -30,10 +30,13 @@
 /*
  * Framework version for util services.
  */
+#define UTIL_FW_MINOR  0
+
+#define UTIL_WS2K8_FW_MAJOR  1
+#define UTIL_WS2K8_FW_VERSION     (UTIL_WS2K8_FW_MAJOR << 16 | UTIL_FW_MINOR)
 
 #define UTIL_FW_MAJOR  3
-#define UTIL_FW_MINOR  0
-#define UTIL_FW_MAJOR_MINOR     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
+#define UTIL_FW_VERSION     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
 
 
 /*
-- 
cgit v1.2.3


From 083986e8248d978b6c961d3da6beb0c921c68220 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 28 Sep 2013 11:23:59 +0200
Subject: mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef

Linus suggested to replace

 #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
 #define arch_mutex_cpu_relax() cpu_relax()
 #endif

with just a simple

  #ifndef arch_mutex_cpu_relax
  # define arch_mutex_cpu_relax() cpu_relax()
  #endif

to get rid of CONFIG_HAVE_CPU_RELAX_SIMPLE. So architectures can
simply define arch_mutex_cpu_relax if they want an architecture
specific function instead of having to add a select statement in
their Kconfig in addition.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/Kconfig                      | 3 ---
 arch/s390/Kconfig                 | 1 -
 arch/s390/include/asm/mutex.h     | 2 --
 arch/s390/include/asm/processor.h | 2 ++
 include/linux/mutex.h             | 6 +++---
 5 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 1feb169274fe..af2cc6eabcc7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
-config HAVE_ARCH_MUTEX_CPU_RELAX
-	bool
-
 config HAVE_RCU_TABLE_FREE
 	bool
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index dcc6ac2d8026..d3fa84070b82 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -102,7 +102,6 @@ config S390
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
-	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f5f2e4..458c1f7fbc18 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
  */
 
 #include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax()	barrier()
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0eb37505cab1..ca7821f07260 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -198,6 +198,8 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
+#define arch_mutex_cpu_relax()  barrier()
+
 static inline void psw_set_key(unsigned int key)
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index ccd4260834c5..bab49da8a0f0 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -15,8 +15,8 @@
 #include <linux/spinlock_types.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
-
 #include <linux/atomic.h>
+#include <asm/processor.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -175,8 +175,8 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
-#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
-#define arch_mutex_cpu_relax()	cpu_relax()
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
 #endif
-- 
cgit v1.2.3


From ddeccb8d6b5bbe2c1e3a29f8c74b52f170c2207d Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 8 Oct 2013 06:42:10 +0900
Subject: dmaengine: add driver for Samsung s3c24xx SoCs

This adds a new driver to support the s3c24xx dma using the dmaengine
and makes the old one in mach-s3c24xx obsolete in the long run.

Conceptually the s3c24xx-dma feels like a distant relative of the pl08x
with numerous virtual channels being mapped to a lot less physical ones.
The driver therefore borrows a lot from the amba-pl08x driver in this
regard. Functionality-wise the driver gains a memcpy ability in addition
to the slave_sg one.

The driver supports both the method for requesting the peripheral used
by SoCs before the S3C2443 and the different method for S3C2443 and later.

On earlier SoCs the hardware channels usable for specific peripherals is
constrainted while on later SoCs all channels can be used for any peripheral.

Tested on a s3c2416-based board, memcpy using the dmatest module and
slave_sg partially using the spi-s3c64xx driver.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 drivers/dma/Kconfig                       |   12 +
 drivers/dma/Makefile                      |    1 +
 drivers/dma/s3c24xx-dma.c                 | 1340 +++++++++++++++++++++++++++++
 include/linux/platform_data/dma-s3c24xx.h |   43 +
 4 files changed, 1396 insertions(+)
 create mode 100644 drivers/dma/s3c24xx-dma.c
 create mode 100644 include/linux/platform_data/dma-s3c24xx.h

(limited to 'include/linux')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 526ec77c7ba0..d639115aa875 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -154,6 +154,18 @@ config TEGRA20_APB_DMA
 	  This DMA controller transfers data from memory to peripheral fifo
 	  or vice versa. It does not support memory to memory data transfer.
 
+config S3C24XX_DMAC
+	tristate "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX && !S3C24XX_DMA
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
+
 source "drivers/dma/sh/Kconfig"
 
 config COH901318
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index db89035b3626..0ce2da97e429 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 000000000000..56c92538bc4c
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1340 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS	8
+
+#define S3C24XX_DISRC			0x00
+#define S3C24XX_DISRCC			0x04
+#define S3C24XX_DISRCC_INC_INCREMENT	0
+#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB		0
+#define S3C24XX_DISRCC_LOC_APB		BIT(1)
+
+#define S3C24XX_DIDST			0x08
+#define S3C24XX_DIDSTC			0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT	0
+#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB		0
+#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0		0
+#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
+
+#define S3C24XX_DCON			0x10
+
+#define S3C24XX_DCON_TC_MASK		0xfffff
+#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
+#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
+#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT		20
+#define S3C24XX_DCON_AUTORELOAD		0
+#define S3C24XX_DCON_NORELOAD		BIT(22)
+#define S3C24XX_DCON_HWTRIG		BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT	24
+#define S3C24XX_DCON_SERV_SINGLE	0
+#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT		0
+#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
+#define S3C24XX_DCON_INT		BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK		0
+#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
+#define S3C24XX_DCON_DEMAND		0
+#define S3C24XX_DCON_HANDSHAKE		BIT(31)
+
+#define S3C24XX_DSTAT			0x14
+#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
+
+#define S3C24XX_DMASKTRIG		0x20
+#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
+#define S3C24XX_DMASKTRIG_ON		BIT(1)
+#define S3C24XX_DMASKTRIG_STOP		BIT(2)
+
+#define S3C24XX_DMAREQSEL		0x24
+#define S3C24XX_DMAREQSEL_HW		BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH		4
+#define S3C24XX_CHANSEL_VALID		BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK	7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+	int stride;
+	bool has_reqsel;
+	bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+	S3C24XX_DMA_CHAN_IDLE,
+	S3C24XX_DMA_CHAN_RUNNING,
+	S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ */
+struct s3c24xx_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	struct list_head *at;
+	u8 width;
+	u32 disrcc;
+	u32 didstc;
+	u32 dcon;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+	unsigned int			id;
+	bool				valid;
+	void __iomem			*base;
+	unsigned int			irq;
+	struct clk			*clk;
+	spinlock_t			lock;
+	struct s3c24xx_dma_chan		*serving;
+	struct s3c24xx_dma_engine	*host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrappped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+	int id;
+	const char *name;
+	struct virt_dma_chan vc;
+	struct s3c24xx_dma_phy *phy;
+	struct dma_slave_config cfg;
+	struct s3c24xx_txd *at;
+	struct s3c24xx_dma_engine *host;
+	enum s3c24xx_dma_chan_state state;
+	bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+	struct platform_device			*pdev;
+	const struct s3c24xx_dma_platdata	*pdata;
+	struct soc_data				*sdata;
+	void __iomem				*base;
+	struct dma_device			slave;
+	struct dma_device			memcpy;
+	struct s3c24xx_dma_phy			*phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+	return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+				  struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	int phyvalid;
+
+	/* every phy is valid for memcopy channels */
+	if (!s3cchan->slave)
+		return true;
+
+	/* On newer variants all phys can be used for all virtual channels */
+	if (s3cdma->sdata->has_reqsel)
+		return true;
+
+	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata;
+	struct s3c24xx_dma_phy *phy = NULL;
+	unsigned long flags;
+	int i;
+	int ret;
+
+	if (s3cchan->slave)
+		cdata = &pdata->channels[s3cchan->id];
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		phy = &s3cdma->phy_chans[i];
+
+		if (!phy->valid)
+			continue;
+
+		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+			continue;
+
+		spin_lock_irqsave(&phy->lock, flags);
+
+		if (!phy->serving) {
+			phy->serving = s3cchan;
+			spin_unlock_irqrestore(&phy->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&phy->lock, flags);
+	}
+
+	/* No physical channel available, cope with it */
+	if (i == s3cdma->pdata->num_phy_channels) {
+		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+		return NULL;
+	}
+
+	/* start the phy clock */
+	if (s3cdma->sdata->has_clocks) {
+		ret = clk_enable(phy->clk);
+		if (ret) {
+			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+				phy->id, ret);
+			phy->serving = NULL;
+			return NULL;
+		}
+	}
+
+	return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+	if (s3cdma->sdata->has_clocks)
+		clk_disable(phy->clk);
+
+	phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct s3c24xx_txd *txd = s3cchan->at;
+	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+	return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct s3c24xx_dma_chan *s3cchan,
+				  struct dma_slave_config *config)
+{
+	if (!s3cchan->slave)
+		return -EINVAL;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	s3cchan->cfg = *config;
+
+	return 0;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		INIT_LIST_HEAD(&txd->dsg_list);
+		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+	}
+
+	return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_sg *dsg, *_dsg;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+				       struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+	u32 dcon = txd->dcon;
+	u32 val;
+
+	/* transfer-size and -count from len and width */
+	switch (txd->width) {
+	case 1:
+		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+		break;
+	case 2:
+		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+		break;
+	case 4:
+		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+		break;
+	}
+
+	if (s3cchan->slave) {
+		struct s3c24xx_dma_channel *cdata =
+					&pdata->channels[s3cchan->id];
+
+		if (s3cdma->sdata->has_reqsel) {
+			writel_relaxed((cdata->chansel << 1) |
+							S3C24XX_DMAREQSEL_HW,
+					phy->base + S3C24XX_DMAREQSEL);
+		} else {
+			int csel = cdata->chansel >> (phy->id *
+							S3C24XX_CHANSEL_WIDTH);
+
+			csel &= S3C24XX_CHANSEL_REQ_MASK;
+			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+			dcon |= S3C24XX_DCON_HWTRIG;
+		}
+	} else {
+		if (s3cdma->sdata->has_reqsel)
+			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+	}
+
+	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+	val &= ~S3C24XX_DMASKTRIG_STOP;
+	val |= S3C24XX_DMASKTRIG_ON;
+
+	/* trigger the dma operation for memcpy transfers */
+	if (!s3cchan->slave)
+		val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+	writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+	list_del(&txd->vd.node);
+
+	s3cchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (s3c24xx_dma_phy_busy(phy))
+		cpu_relax();
+
+	/* point to the first element of the sg list */
+	txd->at = txd->dsg_list.next;
+	s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
+				struct s3c24xx_dma_chan *s3cchan)
+{
+	LIST_HEAD(head);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy;
+
+	phy = s3c24xx_dma_get_phy(s3cchan);
+	if (!phy) {
+		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+			s3cchan->name);
+		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+	struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	phy->serving = s3cchan;
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_chan *p, *next;
+
+retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next) {
+		list_for_each_entry(p, &s3cdma->slave.channels,
+				    vc.chan.device_node)
+			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == S3C24XX_DMA_CHAN_WAITING;
+		if (success)
+			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		s3c24xx_dma_put_phy(s3cchan->phy);
+	}
+
+	s3cchan->phy = NULL;
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_unmap_buffers(struct s3c24xx_txd *txd)
+{
+	struct device *dev = txd->vd.tx.chan->device->dev;
+	struct s3c24xx_sg *dsg;
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		else {
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->src_addr, dsg->len,
+						DMA_TO_DEVICE);
+		}
+	}
+
+	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+		else
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
+						DMA_FROM_DEVICE);
+	}
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+	if (!s3cchan->slave)
+		s3c24xx_dma_unmap_buffers(txd);
+
+	s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+	struct s3c24xx_dma_phy *phy = data;
+	struct s3c24xx_dma_chan *s3cchan = phy->serving;
+	struct s3c24xx_txd *txd;
+
+	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+	/*
+	 * Interrupts happen to notify the completion of a transfer and the
+	 * channel should have moved into its stop state already on its own.
+	 * Therefore interrupts on channels not bound to a virtual channel
+	 * should never happen. Nevertheless send a terminate command to the
+	 * channel if the unlikely case happens.
+	 */
+	if (unlikely(!s3cchan)) {
+		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+			phy->id);
+
+		s3c24xx_dma_terminate_phy(phy);
+
+		return IRQ_HANDLED;
+	}
+
+	spin_lock(&s3cchan->vc.lock);
+	txd = s3cchan->at;
+	if (txd) {
+		/* when more sg's are in this txd, start the next one */
+		if (!list_is_last(txd->at, &txd->dsg_list)) {
+			txd->at = txd->at->next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		} else {
+			s3cchan->at = NULL;
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * And start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&s3cchan->vc))
+				s3c24xx_dma_start_next_txd(s3cchan);
+			else
+				s3c24xx_dma_phy_free(s3cchan);
+		}
+	}
+	spin_unlock(&s3cchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		ret = s3c24xx_dma_set_runtime_config(s3cchan,
+					      (struct dma_slave_config *)arg);
+		break;
+	case DMA_TERMINATE_ALL:
+		if (!s3cchan->phy && !s3cchan->at) {
+			dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+				s3cchan->id);
+			ret = -EINVAL;
+			break;
+		}
+
+		s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		 /* Mark physical channel as free */
+		if (s3cchan->phy)
+			s3c24xx_dma_phy_free(s3cchan);
+
+		/* Dequeue current job */
+		if (s3cchan->at) {
+			s3c24xx_dma_desc_free(&s3cchan->at->vd);
+			s3cchan->at = NULL;
+		}
+
+		/* Dequeue jobs not yet fired as well */
+		s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	return ret;
+}
+
+static int s3c24xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (!txstate) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	vd = vchan_find_desc(&s3cchan->vc, cookie);
+	if (vd) {
+		/* On the issued list, so hasn't been processed yet */
+		txd = to_s3c24xx_txd(&vd->tx);
+
+		list_for_each_entry(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+	} else {
+		/*
+		 * Currently running, so sum over the pending sg's and
+		 * the currently active one.
+		 */
+		txd = s3cchan->at;
+
+		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+		list_for_each_entry_from(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+
+		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	int src_mod, dest_mod;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %d bytes from %s\n",
+			len, s3cchan->name);
+
+	if ((len & S3C24XX_DCON_TC_MASK) != len) {
+		dev_err(&s3cdma->pdev->dev, "memcpy size %d to large\n", len);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+	if (!dsg) {
+		s3c24xx_dma_free_txd(txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/*
+	 * Determine a suitable transfer width.
+	 * The DMA controller cannot fetch/store information which is not
+	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+	 * an address divisible by 4 - more generally addr % width must be 0.
+	 */
+	src_mod = src % 4;
+	dest_mod = dest % 4;
+	switch (len % 4) {
+	case 0:
+		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+		break;
+	case 2:
+		txd->width = ((src_mod == 2 || src_mod == 0) &&
+			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+		break;
+	default:
+		txd->width = 1;
+		break;
+	}
+
+	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+		     S3C24XX_DCON_SERV_WHOLE;
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int tmp;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+			sg_dma_len(sgl), s3cchan->name);
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	} else {
+		s3c24xx_dma_free_txd(txd);
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_dma_address(sg);
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_dma_address(sg);
+		}
+		break;
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	if (vchan_issue_pending(&s3cchan->vc)) {
+		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+			s3c24xx_dma_phy_alloc_and_start(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct s3c24xx_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(dmadev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->id = i;
+		chan->host = s3cdma;
+		chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		} else {
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		}
+		dev_dbg(dmadev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = s3c24xx_dma_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct s3c24xx_dma_chan *chan = NULL;
+	struct s3c24xx_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node)
+		list_del(&chan->vc.chan.device_node);
+}
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+	.stride = 0x40,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+	.stride = 0x100,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+	{
+		.name		= "s3c2412-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
+	}, {
+		.name		= "s3c2443-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
+	},
+	{ },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+	return (struct soc_data *)
+			 platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma;
+	struct soc_data *sdata;
+	struct resource *res;
+	int ret;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	/* Basic sanity check */
+	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+			pdata->num_phy_channels, MAX_DMA_CHANNELS);
+		return -EINVAL;
+	}
+
+	sdata = s3c24xx_dma_get_soc_data(pdev);
+	if (!sdata)
+		return -EINVAL;
+
+	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+	if (!s3cdma)
+		return -ENOMEM;
+
+	s3cdma->pdev = pdev;
+	s3cdma->pdata = pdata;
+	s3cdma->sdata = sdata;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(s3cdma->base))
+		return PTR_ERR(s3cdma->base);
+
+	s3cdma->phy_chans = devm_kzalloc(&pdev->dev,
+					      sizeof(struct s3c24xx_dma_phy) *
+							pdata->num_phy_channels,
+					      GFP_KERNEL);
+	if (!s3cdma->phy_chans)
+		return -ENOMEM;
+
+	/* aquire irqs and clocks for all physical channels */
+	for (i = 0; i < pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+		char clk_name[6];
+
+		phy->id = i;
+		phy->base = s3cdma->base + (i * sdata->stride);
+		phy->host = s3cdma;
+
+		phy->irq = platform_get_irq(pdev, i);
+		if (phy->irq < 0) {
+			dev_err(&pdev->dev, "failed to get irq %d, err %d\n",
+				i, phy->irq);
+			continue;
+		}
+
+		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+				       0, pdev->name, phy);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+				i, ret);
+			continue;
+		}
+
+		if (sdata->has_clocks) {
+			sprintf(clk_name, "dma.%d", i);
+			phy->clk = devm_clk_get(&pdev->dev, clk_name);
+			if (IS_ERR(phy->clk) && sdata->has_clocks) {
+				dev_err(&pdev->dev, "unable to aquire clock for channel %d, error %lu",
+					i, PTR_ERR(phy->clk));
+				continue;
+			}
+
+			ret = clk_prepare(phy->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+					i, ret);
+				continue;
+			}
+		}
+
+		spin_lock_init(&phy->lock);
+		phy->valid = true;
+
+		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+	s3cdma->memcpy.dev = &pdev->dev;
+	s3cdma->memcpy.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->memcpy.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->memcpy.device_control = s3c24xx_dma_control;
+
+	/* Initialize slave engine for SoC internal dedicated peripherals */
+	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+	s3cdma->slave.dev = &pdev->dev;
+	s3cdma->slave.device_alloc_chan_resources =
+					s3c24xx_dma_alloc_chan_resources;
+	s3cdma->slave.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_control = s3c24xx_dma_control;
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+						pdata->num_phy_channels, false);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto err_memcpy;
+	}
+
+	/* Register slave channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+				pdata->num_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto err_slave;
+	}
+
+	ret = dma_async_device_register(&s3cdma->memcpy);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto err_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&s3cdma->slave);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, s3cdma);
+	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+		 pdata->num_phy_channels);
+
+	return 0;
+
+err_slave_reg:
+	dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return ret;
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+	int i;
+
+	dma_async_device_unregister(&s3cdma->slave);
+	dma_async_device_unregister(&s3cdma->memcpy);
+
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+	.driver		= {
+		.name	= "s3c24xx-dma",
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= s3c24xx_dma_driver_ids,
+	.probe		= s3c24xx_dma_probe,
+	.remove		= s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct s3c24xx_dma_chan *s3cchan;
+
+	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+		return false;
+
+	s3cchan = to_s3c24xx_dma_chan(chan);
+
+	return s3cchan->id == (int)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
new file mode 100644
index 000000000000..5a0cfffe3bbb
--- /dev/null
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -0,0 +1,43 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+enum s3c24xx_dma_bus {
+	S3C24XX_DMA_APB,
+	S3C24XX_DMA_AHB,
+};
+
+/**
+ * @bus: on which bus does the peripheral reside - AHB or APB.
+ * @handshake: is a handshake with the peripheral necessary
+ * @chansel: channel selection information, depending on variant; reqsel for
+ *	     s3c2443 and later and channel-selection map for earlier SoCs
+ *	     see CHANSEL doc in s3c2443-dma.c
+ */
+struct s3c24xx_dma_channel {
+	enum s3c24xx_dma_bus bus;
+	bool handshake;
+	u16 chansel;
+};
+
+/**
+ * struct s3c24xx_dma_platdata - platform specific settings
+ * @num_phy_channels: number of physical channels
+ * @channels: array of virtual channel descriptions
+ * @num_channels: number of virtual channels
+ */
+struct s3c24xx_dma_platdata {
+	int num_phy_channels;
+	struct s3c24xx_dma_channel *channels;
+	int num_channels;
+};
+
+struct dma_chan;
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param);
-- 
cgit v1.2.3


From 1fecf8958eb7f90791f2c7e99afac393b64fa976 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Mon, 21 Oct 2013 05:32:48 +0900
Subject: ARM: S3C24XX: add dma pdata for s3c2410, s3c2440 and s3c2442

s3c2410 and s3c2442 share the same dma channels while s3c2440 has
slight differences. But on all three the reachable sources per dma
channel has constraints attached and thus encodes the usable
combinations using the S3C24XX_DMA_CHANREQ macro.

This also fixes the warning about s3c2410_dma_resource being unused
as reported by Olof Johansson.

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c24xx/common.c            | 100 ++++++++++++++++++++++++++++++
 arch/arm/mach-s3c24xx/common.h            |   2 +
 include/linux/platform_data/dma-s3c24xx.h |   3 +
 3 files changed, 105 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c24xx/common.c b/arch/arm/mach-s3c24xx/common.c
index bdcb29f4ea31..4adaa4b43ffe 100644
--- a/arch/arm/mach-s3c24xx/common.c
+++ b/arch/arm/mach-s3c24xx/common.c
@@ -343,6 +343,50 @@ static struct resource s3c2410_dma_resource[] = {
 };
 #endif
 
+#if defined(CONFIG_CPU_S3C2410) || defined(CONFIG_CPU_S3C2442)
+static struct s3c24xx_dma_channel s3c2410_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 2), },
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2410_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2410_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2410_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2410_dma_platdata,
+	},
+};
+#endif
+
 #ifdef CONFIG_CPU_S3C2412
 static struct s3c24xx_dma_channel s3c2412_dma_channels[DMACH_MAX] = {
 	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, 17 },
@@ -384,6 +428,62 @@ struct platform_device s3c2412_device_dma = {
 };
 #endif
 
+#if defined(CONFIG_CPU_S3C2440)
+static struct s3c24xx_dma_channel s3c2440_dma_channels[DMACH_MAX] = {
+	[DMACH_XD0] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 0), },
+	[DMACH_XD1] = { S3C24XX_DMA_AHB, true, S3C24XX_DMA_CHANREQ(0, 1), },
+	[DMACH_SDI] = { S3C24XX_DMA_APB, false, S3C24XX_DMA_CHANREQ(2, 0) |
+						S3C24XX_DMA_CHANREQ(6, 1) |
+						S3C24XX_DMA_CHANREQ(2, 2) |
+						S3C24XX_DMA_CHANREQ(1, 3),
+	},
+	[DMACH_SPI0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 1), },
+	[DMACH_SPI1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 3), },
+	[DMACH_UART0] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 0), },
+	[DMACH_UART1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(1, 1), },
+	[DMACH_UART2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(0, 3), },
+	[DMACH_TIMER] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(3, 0) |
+						 S3C24XX_DMA_CHANREQ(3, 2) |
+						 S3C24XX_DMA_CHANREQ(3, 3),
+	},
+	[DMACH_I2S_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(2, 1) |
+						  S3C24XX_DMA_CHANREQ(1, 2),
+	},
+	[DMACH_I2S_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 0) |
+						   S3C24XX_DMA_CHANREQ(0, 2),
+	},
+	[DMACH_PCM_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 0) |
+						  S3C24XX_DMA_CHANREQ(5, 2),
+	},
+	[DMACH_PCM_OUT] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(5, 1) |
+						  S3C24XX_DMA_CHANREQ(6, 3),
+	},
+	[DMACH_MIC_IN] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(6, 2) |
+						  S3C24XX_DMA_CHANREQ(5, 3),
+	},
+	[DMACH_USB_EP1] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 0), },
+	[DMACH_USB_EP2] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 1), },
+	[DMACH_USB_EP3] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 2), },
+	[DMACH_USB_EP4] = { S3C24XX_DMA_APB, true, S3C24XX_DMA_CHANREQ(4, 3), },
+};
+
+static struct s3c24xx_dma_platdata s3c2440_dma_platdata = {
+	.num_phy_channels = 4,
+	.channels = s3c2440_dma_channels,
+	.num_channels = DMACH_MAX,
+};
+
+struct platform_device s3c2440_device_dma = {
+	.name		= "s3c2410-dma",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(s3c2410_dma_resource),
+	.resource	= s3c2410_dma_resource,
+	.dev	= {
+		.platform_data	= &s3c2440_dma_platdata,
+	},
+};
+#endif
+
 #if defined(CONFIG_CPUS_3C2443) || defined(CONFIG_CPU_S3C2416)
 static struct resource s3c2443_dma_resource[] = {
 	[0] = DEFINE_RES_MEM(S3C24XX_PA_DMA, S3C24XX_SZ_DMA),
diff --git a/arch/arm/mach-s3c24xx/common.h b/arch/arm/mach-s3c24xx/common.h
index fe071893275f..e46c10417216 100644
--- a/arch/arm/mach-s3c24xx/common.h
+++ b/arch/arm/mach-s3c24xx/common.h
@@ -109,7 +109,9 @@ extern void s3c2443_init_irq(void);
 
 extern struct syscore_ops s3c24xx_irq_syscore_ops;
 
+extern struct platform_device s3c2410_device_dma;
 extern struct platform_device s3c2412_device_dma;
+extern struct platform_device s3c2440_device_dma;
 extern struct platform_device s3c2443_device_dma;
 
 #endif /* __ARCH_ARM_MACH_S3C24XX_COMMON_H */
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
index 5a0cfffe3bbb..89ba1b0c90e4 100644
--- a/include/linux/platform_data/dma-s3c24xx.h
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -9,6 +9,9 @@
  * any later version.
  */
 
+/* Helper to encode the source selection constraints for early s3c socs. */
+#define S3C24XX_DMA_CHANREQ(src, chan)	((BIT(3) | src) << chan * 4)
+
 enum s3c24xx_dma_bus {
 	S3C24XX_DMA_APB,
 	S3C24XX_DMA_AHB,
-- 
cgit v1.2.3