arch: remove tile port

The Tile architecture port was added by Chris Metcalf in 2010, and maintained until early 2018 when he orphaned it due to his departure from Mellanox, and nobody else stepped up to maintain it. The product line is still around in the form of the BlueField SoC, but no longer uses the Tile architecture. There are also still products for sale with Tile-GX SoCs, notably the Mikrotik CCR router family. The products all use old (linux-3.3) kernels with lots of patches and won't be upgraded by their manufacturers. There have been efforts to port both OpenWRT and Debian to these, but both projects have stalled and are very unlikely to be continued in the future. Given that we are reasonably sure that nobody is still using the port with an upstream kernel any more, it seems better to remove it now while the port is in a good shape than to let it bitrot for a few years first. Cc: Chris Metcalf <chris.d.metcalf@gmail.com> Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> Link: http://www.mellanox.com/page/npu_multicore_overview Link: https://jenkins.debian.net/view/rebootstrap/job/rebootstrap_tilegx_gcc7/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
author: Arnd Bergmann <arnd@arndb.de> 2018-03-09 16:13:42 +0300
committer: Arnd Bergmann <arnd@arndb.de> 2018-03-16 12:56:03 +0300
commit: bb9d812643d8a121df7d614a2b9c60193a92deb0 (patch)
tree: 419096f57ca0501d8813151a5236387074edb4ea /arch/tile/lib
parent: 4ba66a9760722ccbb691b8f7116cad2f791cca7b (diff)
download: linux-bb9d812643d8a121df7d614a2b9c60193a92deb0.tar.xz
29 files changed, 0 insertions, 3313 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
deleted file mode 100644
index 815a1fdeb2e4..000000000000
--- a/arch/tile/lib/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for TILE-specific library files..
-#
-
-lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
-	memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
-	strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o
-
-lib-$(CONFIG_TILEGX) += memcpy_user_64.o
-lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o
-lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
-
-obj-$(CONFIG_MODULES) += exports.o
-
-# The finv_buffer_remote() and copy_{to,from}_user() routines can't
-# have -pg added, since they both rely on being leaf functions.
-CFLAGS_REMOVE_cacheflush.o = -pg
-CFLAGS_REMOVE_memcpy_user_64.o = -pg
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
deleted file mode 100644
index f8128800dbf5..000000000000
--- a/arch/tile/lib/atomic_32.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/cache.h>
-#include <linux/delay.h>
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/atomic.h>
-#include <arch/chip.h>
-
-/* This page is remapped on startup to be hash-for-home. */
-int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
-
-int *__atomic_hashed_lock(volatile void *v)
-{
-	/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
-	/*
-	 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
-	 * Using mm works here because atomic_locks is page aligned.
-	 */
-	unsigned long ptr = __insn_mm((unsigned long)v >> 1,
-				      (unsigned long)atomic_locks,
-				      2, (ATOMIC_HASH_SHIFT + 2) - 1);
-	return (int *)ptr;
-}
-
-#ifdef CONFIG_SMP
-/* Return whether the passed pointer is a valid atomic lock pointer. */
-static int is_atomic_lock(int *p)
-{
-	return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
-}
-
-void __atomic_fault_unlock(int *irqlock_word)
-{
-	BUG_ON(!is_atomic_lock(irqlock_word));
-	BUG_ON(*irqlock_word != 1);
-	*irqlock_word = 0;
-}
-
-#endif /* CONFIG_SMP */
-
-static inline int *__atomic_setup(volatile void *v)
-{
-	/* Issue a load to the target to bring it into cache. */
-	*(volatile int *)v;
-	return __atomic_hashed_lock(v);
-}
-
-int _atomic_xchg(int *v, int n)
-{
-	return __atomic32_xchg(v, __atomic_setup(v), n).val;
-}
-EXPORT_SYMBOL(_atomic_xchg);
-
-int _atomic_xchg_add(int *v, int i)
-{
-	return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
-}
-EXPORT_SYMBOL(_atomic_xchg_add);
-
-int _atomic_xchg_add_unless(int *v, int a, int u)
-{
-	/*
-	 * Note: argument order is switched here since it is easier
-	 * to use the first argument consistently as the "old value"
-	 * in the assembly, as is done for _atomic_cmpxchg().
-	 */
-	return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
-}
-EXPORT_SYMBOL(_atomic_xchg_add_unless);
-
-int _atomic_cmpxchg(int *v, int o, int n)
-{
-	return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
-}
-EXPORT_SYMBOL(_atomic_cmpxchg);
-
-unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
-{
-	return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
-}
-EXPORT_SYMBOL(_atomic_fetch_or);
-
-unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
-{
-	return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
-}
-EXPORT_SYMBOL(_atomic_fetch_and);
-
-unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
-{
-	return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
-}
-EXPORT_SYMBOL(_atomic_fetch_andn);
-
-unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
-{
-	return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
-}
-EXPORT_SYMBOL(_atomic_fetch_xor);
-
-
-long long _atomic64_xchg(long long *v, long long n)
-{
-	return __atomic64_xchg(v, __atomic_setup(v), n);
-}
-EXPORT_SYMBOL(_atomic64_xchg);
-
-long long _atomic64_xchg_add(long long *v, long long i)
-{
-	return __atomic64_xchg_add(v, __atomic_setup(v), i);
-}
-EXPORT_SYMBOL(_atomic64_xchg_add);
-
-long long _atomic64_xchg_add_unless(long long *v, long long a, long long u)
-{
-	/*
-	 * Note: argument order is switched here since it is easier
-	 * to use the first argument consistently as the "old value"
-	 * in the assembly, as is done for _atomic_cmpxchg().
-	 */
-	return __atomic64_xchg_add_unless(v, __atomic_setup(v), u, a);
-}
-EXPORT_SYMBOL(_atomic64_xchg_add_unless);
-
-long long _atomic64_cmpxchg(long long *v, long long o, long long n)
-{
-	return __atomic64_cmpxchg(v, __atomic_setup(v), o, n);
-}
-EXPORT_SYMBOL(_atomic64_cmpxchg);
-
-long long _atomic64_fetch_and(long long *v, long long n)
-{
-	return __atomic64_fetch_and(v, __atomic_setup(v), n);
-}
-EXPORT_SYMBOL(_atomic64_fetch_and);
-
-long long _atomic64_fetch_or(long long *v, long long n)
-{
-	return __atomic64_fetch_or(v, __atomic_setup(v), n);
-}
-EXPORT_SYMBOL(_atomic64_fetch_or);
-
-long long _atomic64_fetch_xor(long long *v, long long n)
-{
-	return __atomic64_fetch_xor(v, __atomic_setup(v), n);
-}
-EXPORT_SYMBOL(_atomic64_fetch_xor);
-
-/*
- * If any of the atomic or futex routines hit a bad address (not in
- * the page tables at kernel PL) this routine is called.  The futex
- * routines are never used on kernel space, and the normal atomics and
- * bitops are never used on user space.  So a fault on kernel space
- * must be fatal, but a fault on userspace is a futex fault and we
- * need to return -EFAULT.  Note that the context this routine is
- * invoked in is the context of the "_atomic_xxx()" routines called
- * by the functions in this file.
- */
-struct __get_user __atomic_bad_address(int __user *addr)
-{
-	if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
-		panic("Bad address used for kernel atomic op: %p\n", addr);
-	return (struct __get_user) { .err = -EFAULT };
-}
-
-
-void __init __init_atomic_per_cpu(void)
-{
-	/* Validate power-of-two and "bigger than cpus" assumption */
-	BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
-	BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
-
-	/*
-	 * On TILEPro we prefer to use a single hash-for-home
-	 * page, since this means atomic operations are less
-	 * likely to encounter a TLB fault and thus should
-	 * in general perform faster.  You may wish to disable
-	 * this in situations where few hash-for-home tiles
-	 * are configured.
-	 */
-	BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
-
-	/* The locks must all fit on one page. */
-	BUILD_BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
-
-	/*
-	 * We use the page offset of the atomic value's address as
-	 * an index into atomic_locks, excluding the low 3 bits.
-	 * That should not produce more indices than ATOMIC_HASH_SIZE.
-	 */
-	BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
-}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
deleted file mode 100644
index 94709ab41ed8..000000000000
--- a/arch/tile/lib/atomic_asm_32.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * Support routines for atomic operations.  Each function takes:
- *
- * r0: address to manipulate
- * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
- * r2: new value to write, or for cmpxchg/add_unless, value to compare against
- * r3: (cmpxchg/xchg_add_unless) new value to write or add;
- *     (atomic64 ops) high word of value to write
- * r4/r5: (cmpxchg64/add_unless64) new value to write or add
- *
- * The 32-bit routines return a "struct __get_user" so that the futex code
- * has an opportunity to return -EFAULT to the user if needed.
- * The 64-bit routines just return a "long long" with the value,
- * since they are only used from kernel space and don't expect to fault.
- * Support for 16-bit ops is included in the framework but we don't provide any.
- *
- * Note that the caller is advised to issue a suitable L1 or L2
- * prefetch on the address being manipulated to avoid extra stalls.
- * In addition, the hot path is on two icache lines, and we start with
- * a jump to the second line to make sure they are both in cache so
- * that we never stall waiting on icache fill while holding the lock.
- * (This doesn't work out with most 64-bit ops, since they consume
- * too many bundles, so may take an extra i-cache stall.)
- *
- * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
- * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
- * the code, just page faults.
- *
- * If the load or store faults in a way that can be directly fixed in
- * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
- * directly, return to the instruction that faulted, and retry it.
- *
- * If the load or store faults in a way that potentially requires us
- * to release the atomic lock, then retry (e.g. a migrating PTE), we
- * reset the PC in do_page_fault_ics() to the "tns" instruction so
- * that on return we will reacquire the lock and restart the op.  We
- * are somewhat overloading the exception_table_entry notion by doing
- * this, since those entries are not normally used for migrating PTEs.
- *
- * If the main page fault handler discovers a bad address, it will see
- * the PC pointing to the "tns" instruction (due to the earlier
- * exception_table_entry processing in do_page_fault_ics), and
- * re-reset the PC to the fault handler, atomic_bad_address(), which
- * effectively takes over from the atomic op and can either return a
- * bad "struct __get_user" (for user addresses) or can just panic (for
- * bad kernel addresses).
- *
- * Note that if the value we would store is the same as what we
- * loaded, we bypass the store.  Other platforms with true atomics can
- * make the guarantee that a non-atomic __clear_bit(), for example,
- * can safely race with an atomic test_and_set_bit(); this example is
- * from bit_spinlock.h in slub_lock() / slub_unlock().  We can't do
- * that on Tile since the "atomic" op is really just a
- * read/modify/write, and can race with the non-atomic
- * read/modify/write.  However, if we can short-circuit the write when
- * it is not needed, in the atomic case, we avoid the race.
- */
-
-#include <linux/linkage.h>
-#include <asm/atomic_32.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-	.section .text.atomic,"ax"
-ENTRY(__start_atomic_asm_code)
-
-	.macro  atomic_op, name, bitwidth, body
-	.align  64
-STD_ENTRY_SECTION(__atomic\name, .text.atomic)
-	{
-	 movei  r24, 1
-	 j      4f		/* branch to second cache line */
-	}
-1:	{
-	 .ifc \bitwidth,16
-	 lh     r22, r0
-	 .else
-	 lw     r22, r0
-	 addi   r28, r0, 4
-	 .endif
-	}
-	.ifc \bitwidth,64
-	lw      r23, r28
-	.endif
-	\body /* set r24, and r25 if 64-bit */
-	{
-	 seq    r26, r22, r24
-	 seq    r27, r23, r25
-	}
-	.ifc \bitwidth,64
-	bbnst   r27, 2f
-	.endif
-	bbs     r26, 3f		/* skip write-back if it's the same value */
-2:	{
-	 .ifc \bitwidth,16
-	 sh     r0, r24
-	 .else
-	 sw     r0, r24
-	 .endif
-	}
-	.ifc \bitwidth,64
-	sw      r28, r25
-	.endif
-	mf
-3:	{
-	 move   r0, r22
-	 .ifc \bitwidth,64
-	 move   r1, r23
-	 .else
-	 move   r1, zero
-	 .endif
-	 sw     ATOMIC_LOCK_REG_NAME, zero
-	}
-	mtspr   INTERRUPT_CRITICAL_SECTION, zero
-	jrp     lr
-4:	{
-	 move   ATOMIC_LOCK_REG_NAME, r1
-	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
-	}
-#ifndef CONFIG_SMP
-	j       1b		/* no atomic locks */
-#else
-	{
-	 tns    r21, ATOMIC_LOCK_REG_NAME
-	 moveli r23, 2048       /* maximum backoff time in cycles */
-	}
-	{
-	 bzt    r21, 1b		/* branch if lock acquired */
-	 moveli r25, 32         /* starting backoff time in cycles */
-	}
-5:	mtspr   INTERRUPT_CRITICAL_SECTION, zero
-	mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
-6:	mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
-	sub     r22, r22, r26
-	slt     r22, r22, r25
-	bbst    r22, 6b
-	{
-	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
-	 shli   r25, r25, 1     /* double the backoff; retry the tns */
-	}
-	{
-	 tns    r21, ATOMIC_LOCK_REG_NAME
-	 slt    r26, r23, r25   /* is the proposed backoff too big? */
-	}
-	{
-	 bzt    r21, 1b		/* branch if lock acquired */
-	 mvnz   r25, r26, r23
-	}
-	j       5b
-#endif
-	STD_ENDPROC(__atomic\name)
-	.ifc \bitwidth,32
-	.pushsection __ex_table,"a"
-	.align  4
-	.word   1b, __atomic\name
-	.word   2b, __atomic\name
-	.word   __atomic\name, __atomic_bad_address
-	.popsection
-	.endif
-	.endm
-
-
-/*
- * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
- */
-
-atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op 32_xchg, 32, "move r24, r2"
-atomic_op 32_xchg_add, 32, "add r24, r22, r2"
-atomic_op 32_xchg_add_unless, 32, \
-	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op 32_fetch_or, 32, "or r24, r22, r2"
-atomic_op 32_fetch_and, 32, "and r24, r22, r2"
-atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
-
-atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
-	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
-atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
-atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
-	slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_xchg_add_unless, 64, \
-	"{ sne r26, r22, r2; sne r27, r23, r3 }; \
-	{ bbns r26, 3f; add r24, r22, r4 }; \
-	{ bbns r27, 3f; add r25, r23, r5 }; \
-	slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
-
-	jrp     lr              /* happy backtracer */
-
-ENTRY(__end_atomic_asm_code)
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
deleted file mode 100644
index c1ebc1065fc1..000000000000
--- a/arch/tile/lib/cacheflush.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/export.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <arch/icache.h>
-#include <arch/spr_def.h>
-
-
-void __flush_icache_range(unsigned long start, unsigned long end)
-{
-	invalidate_icache((const void *)start, end - start, PAGE_SIZE);
-}
-
-
-/* Force a load instruction to issue. */
-static inline void force_load(char *p)
-{
-	*(volatile char *)p;
-}
-
-/*
- * Flush and invalidate a VA range that is homed remotely on a single
- * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
- * until the memory controller holds the flushed values.
- */
-void __attribute__((optimize("omit-frame-pointer")))
-finv_buffer_remote(void *buffer, size_t size, int hfh)
-{
-	char *p, *base;
-	size_t step_size, load_count;
-
-	/*
-	 * On TILEPro the striping granularity is a fixed 8KB; on
-	 * TILE-Gx it is configurable, and we rely on the fact that
-	 * the hypervisor always configures maximum striping, so that
-	 * bits 9 and 10 of the PA are part of the stripe function, so
-	 * every 512 bytes we hit a striping boundary.
-	 *
-	 */
-#ifdef __tilegx__
-	const unsigned long STRIPE_WIDTH = 512;
-#else
-	const unsigned long STRIPE_WIDTH = 8192;
-#endif
-
-#ifdef __tilegx__
-	/*
-	 * On TILE-Gx, we must disable the dstream prefetcher before doing
-	 * a cache flush; otherwise, we could end up with data in the cache
-	 * that we don't want there.  Note that normally we'd do an mf
-	 * after the SPR write to disabling the prefetcher, but we do one
-	 * below, before any further loads, so there's no need to do it
-	 * here.
-	 */
-	uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
-	__insn_mtspr(SPR_DSTREAM_PF, 0);
-#endif
-
-	/*
-	 * Flush and invalidate the buffer out of the local L1/L2
-	 * and request the home cache to flush and invalidate as well.
-	 */
-	__finv_buffer(buffer, size);
-
-	/*
-	 * Wait for the home cache to acknowledge that it has processed
-	 * all the flush-and-invalidate requests.  This does not mean
-	 * that the flushed data has reached the memory controller yet,
-	 * but it does mean the home cache is processing the flushes.
-	 */
-	__insn_mf();
-
-	/*
-	 * Issue a load to the last cache line, which can't complete
-	 * until all the previously-issued flushes to the same memory
-	 * controller have also completed.  If we weren't striping
-	 * memory, that one load would be sufficient, but since we may
-	 * be, we also need to back up to the last load issued to
-	 * another memory controller, which would be the point where
-	 * we crossed a "striping" boundary (the granularity of striping
-	 * across memory controllers).  Keep backing up and doing this
-	 * until we are before the beginning of the buffer, or have
-	 * hit all the controllers.
-	 *
-	 * If we are flushing a hash-for-home buffer, it's even worse.
-	 * Each line may be homed on a different tile, and each tile
-	 * may have up to four lines that are on different
-	 * controllers.  So as we walk backwards, we have to touch
-	 * enough cache lines to satisfy these constraints.  In
-	 * practice this ends up being close enough to "load from
-	 * every cache line on a full memory stripe on each
-	 * controller" that we simply do that, to simplify the logic.
-	 *
-	 * On TILE-Gx the hash-for-home function is much more complex,
-	 * with the upshot being we can't readily guarantee we have
-	 * hit both entries in the 128-entry AMT that were hit by any
-	 * load in the entire range, so we just re-load them all.
-	 * With larger buffers, we may want to consider using a hypervisor
-	 * trap to issue loads directly to each hash-for-home tile for
-	 * each controller (doing it from Linux would trash the TLB).
-	 */
-	if (hfh) {
-		step_size = L2_CACHE_BYTES;
-#ifdef __tilegx__
-		load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
-#else
-		load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
-			      (1 << CHIP_LOG_NUM_MSHIMS());
-#endif
-	} else {
-		step_size = STRIPE_WIDTH;
-		load_count = (1 << CHIP_LOG_NUM_MSHIMS());
-	}
-
-	/* Load the last byte of the buffer. */
-	p = (char *)buffer + size - 1;
-	force_load(p);
-
-	/* Bump down to the end of the previous stripe or cache line. */
-	p -= step_size;
-	p = (char *)((unsigned long)p | (step_size - 1));
-
-	/* Figure out how far back we need to go. */
-	base = p - (step_size * (load_count - 2));
-	if ((unsigned long)base < (unsigned long)buffer)
-		base = buffer;
-
-	/* Fire all the loads we need. */
-	for (; p >= base; p -= step_size)
-		force_load(p);
-
-	/*
-	 * Repeat, but with finv's instead of loads, to get rid of the
-	 * data we just loaded into our own cache and the old home L3.
-	 * The finv's are guaranteed not to actually flush the data in
-	 * the buffer back to their home, since we just read it, so the
-	 * lines are clean in cache; we will only invalidate those lines.
-	 */
-	p = (char *)buffer + size - 1;
-	__insn_finv(p);
-	p -= step_size;
-	p = (char *)((unsigned long)p | (step_size - 1));
-	for (; p >= base; p -= step_size)
-		__insn_finv(p);
-
-	/* Wait for these finv's (and thus the first finvs) to be done. */
-	__insn_mf();
-
-#ifdef __tilegx__
-	/* Reenable the prefetcher. */
-	__insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf);
-#endif
-}
-EXPORT_SYMBOL_GPL(finv_buffer_remote);
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
deleted file mode 100644
index c3ca3e64d9d9..000000000000
--- a/arch/tile/lib/checksum.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- * Support code for the main lib/checksum.c.
- */
-
-#include <net/checksum.h>
-#include <linux/module.h>
-
-__wsum do_csum(const unsigned char *buff, int len)
-{
-	int odd, count;
-	unsigned long result = 0;
-
-	if (len <= 0)
-		goto out;
-	odd = 1 & (unsigned long) buff;
-	if (odd) {
-		result = (*buff << 8);
-		len--;
-		buff++;
-	}
-	count = len >> 1;		/* nr of 16-bit words.. */
-	if (count) {
-		if (2 & (unsigned long) buff) {
-			result += *(const unsigned short *)buff;
-			count--;
-			len -= 2;
-			buff += 2;
-		}
-		count >>= 1;		/* nr of 32-bit words.. */
-		if (count) {
-#ifdef __tilegx__
-			if (4 & (unsigned long) buff) {
-				unsigned int w = *(const unsigned int *)buff;
-				result = __insn_v2sadau(result, w, 0);
-				count--;
-				len -= 4;
-				buff += 4;
-			}
-			count >>= 1;		/* nr of 64-bit words.. */
-#endif
-
-			/*
-			 * This algorithm could wrap around for very
-			 * large buffers, but those should be impossible.
-			 */
-			BUG_ON(count >= 65530);
-
-			while (count) {
-				unsigned long w = *(const unsigned long *)buff;
-				count--;
-				buff += sizeof(w);
-#ifdef __tilegx__
-				result = __insn_v2sadau(result, w, 0);
-#else
-				result = __insn_sadah_u(result, w, 0);
-#endif
-			}
-#ifdef __tilegx__
-			if (len & 4) {
-				unsigned int w = *(const unsigned int *)buff;
-				result = __insn_v2sadau(result, w, 0);
-				buff += 4;
-			}
-#endif
-		}
-		if (len & 2) {
-			result += *(const unsigned short *) buff;
-			buff += 2;
-		}
-	}
-	if (len & 1)
-		result += *buff;
-	result = csum_long(result);
-	if (odd)
-		result = swab16(result);
-out:
-	return result;
-}
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
deleted file mode 100644
index 75947edccb26..000000000000
--- a/arch/tile/lib/cpumask.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/cpumask.h>
-#include <linux/ctype.h>
-#include <linux/errno.h>
-#include <linux/smp.h>
-#include <linux/export.h>
-
-/*
- * Allow cropping out bits beyond the end of the array.
- * Move to "lib" directory if more clients want to use this routine.
- */
-int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
-{
-	unsigned a, b;
-
-	bitmap_zero(maskp, nmaskbits);
-	do {
-		if (!isdigit(*bp))
-			return -EINVAL;
-		a = simple_strtoul(bp, (char **)&bp, 10);
-		b = a;
-		if (*bp == '-') {
-			bp++;
-			if (!isdigit(*bp))
-				return -EINVAL;
-			b = simple_strtoul(bp, (char **)&bp, 10);
-		}
-		if (!(a <= b))
-			return -EINVAL;
-		if (b >= nmaskbits)
-			b = nmaskbits-1;
-		while (a <= b) {
-			set_bit(a, maskp);
-			a++;
-		}
-		if (*bp == ',')
-			bp++;
-	} while (*bp != '\0' && *bp != '\n');
-	return 0;
-}
-EXPORT_SYMBOL(bitmap_parselist_crop);
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
deleted file mode 100644
index cdacdd11d360..000000000000
--- a/arch/tile/lib/delay.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/thread_info.h>
-#include <asm/timex.h>
-
-void __udelay(unsigned long usecs)
-{
-	if (usecs > ULONG_MAX / 1000) {
-		WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
-		usecs = ULONG_MAX / 1000;
-	}
-	__ndelay(usecs * 1000);
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
-	cycles_t target = get_cycles();
-	target += ns2cycles(nsecs);
-	while (get_cycles() < target)
-		cpu_relax();
-}
-EXPORT_SYMBOL(__ndelay);
-
-void __delay(unsigned long cycles)
-{
-	cycles_t target = get_cycles() + cycles;
-	while (get_cycles() < target)
-		cpu_relax();
-}
-EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
deleted file mode 100644
index ecce8e177e3f..000000000000
--- a/arch/tile/lib/exports.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * Exports from assembler code and from libtile-cc.
- */
-
-#include <linux/module.h>
-
-/* arch/tile/lib/usercopy.S */
-#include <linux/uaccess.h>
-EXPORT_SYMBOL(clear_user_asm);
-EXPORT_SYMBOL(flush_user_asm);
-EXPORT_SYMBOL(finv_user_asm);
-
-/* arch/tile/kernel/entry.S */
-#include <linux/kernel.h>
-#include <asm/processor.h>
-EXPORT_SYMBOL(current_text_addr);
-
-/* arch/tile/kernel/head.S */
-EXPORT_SYMBOL(empty_zero_page);
-
-#ifdef CONFIG_FUNCTION_TRACER
-/* arch/tile/kernel/mcount_64.S */
-#include <asm/ftrace.h>
-EXPORT_SYMBOL(__mcount);
-#endif /* CONFIG_FUNCTION_TRACER */
-
-/* arch/tile/lib/, various memcpy files */
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(raw_copy_to_user);
-EXPORT_SYMBOL(raw_copy_from_user);
-#ifdef __tilegx__
-EXPORT_SYMBOL(raw_copy_in_user);
-#endif
-
-/* hypervisor glue */
-#include <hv/hypervisor.h>
-EXPORT_SYMBOL(hv_dev_open);
-EXPORT_SYMBOL(hv_dev_pread);
-EXPORT_SYMBOL(hv_dev_pwrite);
-EXPORT_SYMBOL(hv_dev_preada);
-EXPORT_SYMBOL(hv_dev_pwritea);
-EXPORT_SYMBOL(hv_dev_poll);
-EXPORT_SYMBOL(hv_dev_poll_cancel);
-EXPORT_SYMBOL(hv_dev_close);
-EXPORT_SYMBOL(hv_sysconf);
-EXPORT_SYMBOL(hv_confstr);
-EXPORT_SYMBOL(hv_get_rtc);
-EXPORT_SYMBOL(hv_set_rtc);
-
-/* libgcc.a */
-uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
-EXPORT_SYMBOL(__udivsi3);
-int32_t __divsi3(int32_t dividend, int32_t divisor);
-EXPORT_SYMBOL(__divsi3);
-uint64_t __udivdi3(uint64_t dividend, uint64_t divisor);
-EXPORT_SYMBOL(__udivdi3);
-int64_t __divdi3(int64_t dividend, int64_t divisor);
-EXPORT_SYMBOL(__divdi3);
-uint32_t __umodsi3(uint32_t dividend, uint32_t divisor);
-EXPORT_SYMBOL(__umodsi3);
-int32_t __modsi3(int32_t dividend, int32_t divisor);
-EXPORT_SYMBOL(__modsi3);
-uint64_t __umoddi3(uint64_t dividend, uint64_t divisor);
-EXPORT_SYMBOL(__umoddi3);
-int64_t __moddi3(int64_t dividend, int64_t divisor);
-EXPORT_SYMBOL(__moddi3);
-#ifdef __tilegx__
-typedef int TItype __attribute__((mode(TI)));
-TItype __multi3(TItype a, TItype b);
-EXPORT_SYMBOL(__multi3);  /* required for gcc 7 and later */
-#else
-int64_t __muldi3(int64_t, int64_t);
-EXPORT_SYMBOL(__muldi3);
-uint64_t __lshrdi3(uint64_t, unsigned int);
-EXPORT_SYMBOL(__lshrdi3);
-uint64_t __ashrdi3(uint64_t, unsigned int);
-EXPORT_SYMBOL(__ashrdi3);
-uint64_t __ashldi3(uint64_t, unsigned int);
-EXPORT_SYMBOL(__ashldi3);
-int __ffsdi2(uint64_t);
-EXPORT_SYMBOL(__ffsdi2);
-#endif
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
deleted file mode 100644
index cc3d9badf030..000000000000
--- a/arch/tile/lib/memchr_32.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-
-void *memchr(const void *s, int c, size_t n)
-{
-	const uint32_t *last_word_ptr;
-	const uint32_t *p;
-	const char *last_byte_ptr;
-	uintptr_t s_int;
-	uint32_t goal, before_mask, v, bits;
-	char *ret;
-
-	if (__builtin_expect(n == 0, 0)) {
-		/* Don't dereference any memory if the array is empty. */
-		return NULL;
-	}
-
-	/* Get an aligned pointer. */
-	s_int = (uintptr_t) s;
-	p = (const uint32_t *)(s_int & -4);
-
-	/* Create four copies of the byte for which we are looking. */
-	goal = 0x01010101 * (uint8_t) c;
-
-	/* Read the first word, but munge it so that bytes before the array
-	 * will not match goal.
-	 *
-	 * Note that this shift count expression works because we know
-	 * shift counts are taken mod 32.
-	 */
-	before_mask = (1 << (s_int << 3)) - 1;
-	v = (*p | before_mask) ^ (goal & before_mask);
-
-	/* Compute the address of the last byte. */
-	last_byte_ptr = (const char *)s + n - 1;
-
-	/* Compute the address of the word containing the last byte. */
-	last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
-
-	while ((bits = __insn_seqb(v, goal)) == 0) {
-		if (__builtin_expect(p == last_word_ptr, 0)) {
-			/* We already read the last word in the array,
-			 * so give up.
-			 */
-			return NULL;
-		}
-		v = *++p;
-	}
-
-	/* We found a match, but it might be in a byte past the end
-	 * of the array.
-	 */
-	ret = ((char *)p) + (__insn_ctz(bits) >> 3);
-	return (ret <= last_byte_ptr) ? ret : NULL;
-}
-EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
deleted file mode 100644
index f8196b3a950e..000000000000
--- a/arch/tile/lib/memchr_64.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include "string-endian.h"
-
-void *memchr(const void *s, int c, size_t n)
-{
-	const uint64_t *last_word_ptr;
-	const uint64_t *p;
-	const char *last_byte_ptr;
-	uintptr_t s_int;
-	uint64_t goal, before_mask, v, bits;
-	char *ret;
-
-	if (__builtin_expect(n == 0, 0)) {
-		/* Don't dereference any memory if the array is empty. */
-		return NULL;
-	}
-
-	/* Get an aligned pointer. */
-	s_int = (uintptr_t) s;
-	p = (const uint64_t *)(s_int & -8);
-
-	/* Create eight copies of the byte for which we are looking. */
-	goal = copy_byte(c);
-
-	/* Read the first word, but munge it so that bytes before the array
-	 * will not match goal.
-	 */
-	before_mask = MASK(s_int);
-	v = (*p | before_mask) ^ (goal & before_mask);
-
-	/* Compute the address of the last byte. */
-	last_byte_ptr = (const char *)s + n - 1;
-
-	/* Compute the address of the word containing the last byte. */
-	last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8);
-
-	while ((bits = __insn_v1cmpeq(v, goal)) == 0) {
-		if (__builtin_expect(p == last_word_ptr, 0)) {
-			/* We already read the last word in the array,
-			 * so give up.
-			 */
-			return NULL;
-		}
-		v = *++p;
-	}
-
-	/* We found a match, but it might be in a byte past the end
-	 * of the array.
-	 */
-	ret = ((char *)p) + (CFZ(bits) >> 3);
-	return (ret <= last_byte_ptr) ? ret : NULL;
-}
-EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
deleted file mode 100644
index 270f1267cd18..000000000000
--- a/arch/tile/lib/memcpy_32.S
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <arch/chip.h>
-
-
-/*
- * This file shares the implementation of the userspace memcpy and
- * the kernel's memcpy, copy_to_user and copy_from_user.
- */
-
-#include <linux/linkage.h>
-
-#define IS_MEMCPY	  0
-#define IS_COPY_FROM_USER  1
-#define IS_COPY_TO_USER   -1
-
-	.section .text.memcpy_common, "ax"
-	.align 64
-
-/* Use this to preface each bundle that can cause an exception so
- * the kernel can clean up properly. The special cleanup code should
- * not use these, since it knows what it is doing.
- */
-#define EX \
-	.pushsection __ex_table, "a"; \
-	.align 4; \
-	.word 9f, memcpy_common_fixup; \
-	.popsection; \
-	9
-
-
-/* raw_copy_from_user takes the kernel target address in r0,
- * the user source in r1, and the bytes to copy in r2.
- * It returns the number of uncopiable bytes (hopefully zero) in r0.
- */
-ENTRY(raw_copy_from_user)
-.type raw_copy_from_user, @function
-	FEEDBACK_ENTER_EXPLICIT(raw_copy_from_user, \
-	  .text.memcpy_common, \
-	  .Lend_memcpy_common - raw_copy_from_user)
-	{ movei r29, IS_COPY_FROM_USER; j memcpy_common }
-	.size raw_copy_from_user, . - raw_copy_from_user
-
-/* raw_copy_to_user takes the user target address in r0,
- * the kernel source in r1, and the bytes to copy in r2.
- * It returns the number of uncopiable bytes (hopefully zero) in r0.
- */
-ENTRY(raw_copy_to_user)
-.type raw_copy_to_user, @function
-	FEEDBACK_REENTER(raw_copy_from_user)
-	{ movei r29, IS_COPY_TO_USER; j memcpy_common }
-	.size raw_copy_to_user, . - raw_copy_to_user
-
-ENTRY(memcpy)
-.type memcpy, @function
-	FEEDBACK_REENTER(raw_copy_from_user)
-	{ movei r29, IS_MEMCPY }
-	.size memcpy, . - memcpy
-	/* Fall through */
-
-	.type memcpy_common, @function
-memcpy_common:
-	/* On entry, r29 holds one of the IS_* macro values from above. */
-
-
-	/* r0 is the dest, r1 is the source, r2 is the size. */
-
-	/* Save aside original dest so we can return it at the end. */
-	{ sw sp, lr; move r23, r0; or r4, r0, r1 }
-
-	/* Check for an empty size. */
-	{ bz r2, .Ldone; andi r4, r4, 3 }
-
-	/* Save aside original values in case of a fault. */
-	{ move r24, r1; move r25, r2 }
-	move r27, lr
-
-	/* Check for an unaligned source or dest. */
-	{ bnz r4, .Lcopy_unaligned_maybe_many; addli r4, r2, -256 }
-
-.Lcheck_aligned_copy_size:
-	/* If we are copying < 256 bytes, branch to simple case. */
-	{ blzt r4, .Lcopy_8_check; slti_u r8, r2, 8 }
-
-	/* Copying >= 256 bytes, so jump to complex prefetching loop. */
-	{ andi r6, r1, 63; j .Lcopy_many }
-
-/*
- *
- * Aligned 4 byte at a time copy loop
- *
- */
-
-.Lcopy_8_loop:
-	/* Copy two words at a time to hide load latency. */
-EX:	{ lw r3, r1; addi r1, r1, 4; slti_u r8, r2, 16 }
-EX:	{ lw r4, r1; addi r1, r1, 4 }
-EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
-EX:	{ sw r0, r4; addi r0, r0, 4; addi r2, r2, -4 }
-.Lcopy_8_check:
-	{ bzt r8, .Lcopy_8_loop; slti_u r4, r2, 4 }
-
-	/* Copy odd leftover word, if any. */
-	{ bnzt r4, .Lcheck_odd_stragglers }
-EX:	{ lw r3, r1; addi r1, r1, 4 }
-EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
-
-.Lcheck_odd_stragglers:
-	{ bnz r2, .Lcopy_unaligned_few }
-
-.Ldone:
-	/* For memcpy return original dest address, else zero. */
-	{ mz r0, r29, r23; jrp lr }
-
-
-/*
- *
- * Prefetching multiple cache line copy handler (for large transfers).
- *
- */
-
-	/* Copy words until r1 is cache-line-aligned. */
-.Lalign_loop:
-EX:	{ lw r3, r1; addi r1, r1, 4 }
-	{ andi r6, r1, 63 }
-EX:	{ sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
-.Lcopy_many:
-	{ bnzt r6, .Lalign_loop; addi r9, r0, 63 }
-
-	{ addi r3, r1, 60; andi r9, r9, -64 }
-
-	/* No need to prefetch dst, we'll just do the wh64
-	 * right before we copy a line.
-	 */
-EX:	{ lw r5, r3; addi r3, r3, 64; movei r4, 1 }
-	/* Intentionally stall for a few cycles to leave L2 cache alone. */
-	{ bnzt zero, .; move r27, lr }
-EX:	{ lw r6, r3; addi r3, r3, 64 }
-	/* Intentionally stall for a few cycles to leave L2 cache alone. */
-	{ bnzt zero, . }
-EX:	{ lw r7, r3; addi r3, r3, 64 }
-	/* Intentionally stall for a few cycles to leave L2 cache alone. */
-	{ bz zero, .Lbig_loop2 }
-
-	/* On entry to this loop:
-	 * - r0 points to the start of dst line 0
-	 * - r1 points to start of src line 0
-	 * - r2 >= (256 - 60), only the first time the loop trips.
-	 * - r3 contains r1 + 128 + 60    [pointer to end of source line 2]
-	 *   This is our prefetch address. When we get near the end
-	 *   rather than prefetching off the end this is changed to point
-	 *   to some "safe" recently loaded address.
-	 * - r5 contains *(r1 + 60)       [i.e. last word of source line 0]
-	 * - r6 contains *(r1 + 64 + 60)  [i.e. last word of source line 1]
-	 * - r9 contains ((r0 + 63) & -64)
-	 *     [start of next dst cache line.]
-	 */
-
-.Lbig_loop:
-	{ jal .Lcopy_line2; add r15, r1, r2 }
-
-.Lbig_loop2:
-	/* Copy line 0, first stalling until r5 is ready. */
-EX:	{ move r12, r5; lw r16, r1 }
-	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
-	/* Prefetch several lines ahead. */
-EX:	{ lw r5, r3; addi r3, r3, 64 }
-	{ jal .Lcopy_line }
-
-	/* Copy line 1, first stalling until r6 is ready. */
-EX:	{ move r12, r6; lw r16, r1 }
-	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
-	/* Prefetch several lines ahead. */
-EX:	{ lw r6, r3; addi r3, r3, 64 }
-	{ jal .Lcopy_line }
-
-	/* Copy line 2, first stalling until r7 is ready. */
-EX:	{ move r12, r7; lw r16, r1 }
-	{ bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
-	/* Prefetch several lines ahead. */
-EX:	{ lw r7, r3; addi r3, r3, 64 }
-	/* Use up a caches-busy cycle by jumping back to the top of the
-	 * loop. Might as well get it out of the way now.
-	 */
-	{ j .Lbig_loop }
-
-
-	/* On entry:
-	 * - r0 points to the destination line.
-	 * - r1 points to the source line.
-	 * - r3 is the next prefetch address.
-	 * - r9 holds the last address used for wh64.
-	 * - r12 = WORD_15
-	 * - r16 = WORD_0.
-	 * - r17 == r1 + 16.
-	 * - r27 holds saved lr to restore.
-	 *
-	 * On exit:
-	 * - r0 is incremented by 64.
-	 * - r1 is incremented by 64, unless that would point to a word
-	 *   beyond the end of the source array, in which case it is redirected
-	 *   to point to an arbitrary word already in the cache.
-	 * - r2 is decremented by 64.
-	 * - r3 is unchanged, unless it points to a word beyond the
-	 *   end of the source array, in which case it is redirected
-	 *   to point to an arbitrary word already in the cache.
-	 *   Redirecting is OK since if we are that close to the end
-	 *   of the array we will not come back to this subroutine
-	 *   and use the contents of the prefetched address.
-	 * - r4 is nonzero iff r2 >= 64.
-	 * - r9 is incremented by 64, unless it points beyond the
-	 *   end of the last full destination cache line, in which
-	 *   case it is redirected to a "safe address" that can be
-	 *   clobbered (sp - 64)
-	 * - lr contains the value in r27.
-	 */
-
-/* r26 unused */
-
-.Lcopy_line:
-	/* TODO: when r3 goes past the end, we would like to redirect it
-	 * to prefetch the last partial cache line (if any) just once, for the
-	 * benefit of the final cleanup loop. But we don't want to
-	 * prefetch that line more than once, or subsequent prefetches
-	 * will go into the RTF. But then .Lbig_loop should unconditionally
-	 * branch to top of loop to execute final prefetch, and its
-	 * nop should become a conditional branch.
-	 */
-
-	/* We need two non-memory cycles here to cover the resources
-	 * used by the loads initiated by the caller.
-	 */
-	{ add r15, r1, r2 }
-.Lcopy_line2:
-	{ slt_u r13, r3, r15; addi r17, r1, 16 }
-
-	/* NOTE: this will stall for one cycle as L1 is busy. */
-
-	/* Fill second L1D line. */
-EX:	{ lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
-
-	/* Prepare destination line for writing. */
-EX:	{ wh64 r9; addi r9, r9, 64 }
-	/* Load seven words that are L1D hits to cover wh64 L2 usage. */
-
-	/* Load the three remaining words from the last L1D line, which
-	 * we know has already filled the L1D.
-	 */
-EX:	{ lw r4, r1;  addi r1, r1, 4;   addi r20, r1, 16 }   /* r4 = WORD_12 */
-EX:	{ lw r8, r1;  addi r1, r1, 4;   slt_u r13, r20, r15 }/* r8 = WORD_13 */
-EX:	{ lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 }  /* r11 = WORD_14 */
-
-	/* Load the three remaining words from the first L1D line, first
-	 * stalling until it has filled by "looking at" r16.
-	 */
-EX:	{ lw r13, r1; addi r1, r1, 4; move zero, r16 }   /* r13 = WORD_1 */
-EX:	{ lw r14, r1; addi r1, r1, 4 }                   /* r14 = WORD_2 */
-EX:	{ lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */
-
-	/* Load second word from the second L1D line, first
-	 * stalling until it has filled by "looking at" r17.
-	 */
-EX:	{ lw r19, r1; addi r1, r1, 4; move zero, r17 }  /* r19 = WORD_5 */
-
-	/* Store last word to the destination line, potentially dirtying it
-	 * for the first time, which keeps the L2 busy for two cycles.
-	 */
-EX:	{ sw r10, r12 }                                 /* store(WORD_15) */
-
-	/* Use two L1D hits to cover the sw L2 access above. */
-EX:	{ lw r10, r1; addi r1, r1, 4 }                  /* r10 = WORD_6 */
-EX:	{ lw r12, r1; addi r1, r1, 4 }                  /* r12 = WORD_7 */
-
-	/* Fill third L1D line. */
-EX:	{ lw r18, r1; addi r1, r1, 4 }                  /* r18 = WORD_8 */
-
-	/* Store first L1D line. */
-EX:	{ sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
-EX:	{ sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
-EX:	{ sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
-EX:	{ sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
-	/* Store second L1D line. */
-EX:	{ sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
-EX:	{ sw r0, r19; addi r0, r0, 4 }                  /* store(WORD_5) */
-EX:	{ sw r0, r10; addi r0, r0, 4 }                  /* store(WORD_6) */
-EX:	{ sw r0, r12; addi r0, r0, 4 }                  /* store(WORD_7) */
-
-EX:	{ lw r13, r1; addi r1, r1, 4; move zero, r18 }  /* r13 = WORD_9 */
-EX:	{ lw r14, r1; addi r1, r1, 4 }                  /* r14 = WORD_10 */
-EX:	{ lw r15, r1; move r1, r20   }                  /* r15 = WORD_11 */
-
-	/* Store third L1D line. */
-EX:	{ sw r0, r18; addi r0, r0, 4 }                  /* store(WORD_8) */
-EX:	{ sw r0, r13; addi r0, r0, 4 }                  /* store(WORD_9) */
-EX:	{ sw r0, r14; addi r0, r0, 4 }                  /* store(WORD_10) */
-EX:	{ sw r0, r15; addi r0, r0, 4 }                  /* store(WORD_11) */
-
-	/* Store rest of fourth L1D line. */
-EX:	{ sw r0, r4;  addi r0, r0, 4 }                  /* store(WORD_12) */
-	{
-EX:	sw r0, r8                                       /* store(WORD_13) */
-	addi r0, r0, 4
-	/* Will r2 be > 64 after we subtract 64 below? */
-	shri r4, r2, 7
-	}
-	{
-EX:	sw r0, r11                                      /* store(WORD_14) */
-	addi r0, r0, 8
-	/* Record 64 bytes successfully copied. */
-	addi r2, r2, -64
-	}
-
-	{ jrp lr; move lr, r27 }
-
-	/* Convey to the backtrace library that the stack frame is size
-	 * zero, and the real return address is on the stack rather than
-	 * in 'lr'.
-	 */
-	{ info 8 }
-
-	.align 64
-.Lcopy_unaligned_maybe_many:
-	/* Skip the setup overhead if we aren't copying many bytes. */
-	{ slti_u r8, r2, 20; sub r4, zero, r0 }
-	{ bnzt r8, .Lcopy_unaligned_few; andi r4, r4, 3 }
-	{ bz r4, .Ldest_is_word_aligned; add r18, r1, r2 }
-
-/*
- *
- * unaligned 4 byte at a time copy handler.
- *
- */
-
-	/* Copy single bytes until r0 == 0 mod 4, so we can store words. */
-.Lalign_dest_loop:
-EX:	{ lb_u r3, r1; addi r1, r1, 1; addi r4, r4, -1 }
-EX:	{ sb r0, r3;   addi r0, r0, 1; addi r2, r2, -1 }
-	{ bnzt r4, .Lalign_dest_loop; andi r3, r1, 3 }
-
-	/* If source and dest are now *both* aligned, do an aligned copy. */
-	{ bz r3, .Lcheck_aligned_copy_size; addli r4, r2, -256 }
-
-.Ldest_is_word_aligned:
-
-EX:	{ andi r8, r0, 63; lwadd_na r6, r1, 4}
-	{ slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
-
-	/* This copies unaligned words until either there are fewer
-	 * than 4 bytes left to copy, or until the destination pointer
-	 * is cache-aligned, whichever comes first.
-	 *
-	 * On entry:
-	 * - r0 is the next store address.
-	 * - r1 points 4 bytes past the load address corresponding to r0.
-	 * - r2 >= 4
-	 * - r6 is the next aligned word loaded.
-	 */
-.Lcopy_unaligned_src_words:
-EX:	{ lwadd_na r7, r1, 4; slti_u r8, r2, 4 + 4 }
-	/* stall */
-	{ dword_align r6, r7, r1; slti_u r9, r2, 64 + 4 }
-EX:	{ swadd r0, r6, 4; addi r2, r2, -4 }
-	{ bnz r8, .Lcleanup_unaligned_words; andi r8, r0, 63 }
-	{ bnzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
-
-	/* On entry:
-	 * - r0 is the next store address.
-	 * - r1 points 4 bytes past the load address corresponding to r0.
-	 * - r2 >= 4 (# of bytes left to store).
-	 * - r6 is the next aligned src word value.
-	 * - r9 = (r2 < 64U).
-	 * - r18 points one byte past the end of source memory.
-	 */
-.Ldest_is_L2_line_aligned:
-
-	{
-	/* Not a full cache line remains. */
-	bnz r9, .Lcleanup_unaligned_words
-	move r7, r6
-	}
-
-	/* r2 >= 64 */
-
-	/* Kick off two prefetches, but don't go past the end. */
-	{ addi r3, r1, 63 - 4; addi r8, r1, 64 + 63 - 4 }
-	{ prefetch r3; move r3, r8; slt_u r8, r8, r18 }
-	{ mvz r3, r8, r1; addi r8, r3, 64 }
-	{ prefetch r3; move r3, r8; slt_u r8, r8, r18 }
-	{ mvz r3, r8, r1; movei r17, 0 }
-
-.Lcopy_unaligned_line:
-	/* Prefetch another line. */
-	{ prefetch r3; addi r15, r1, 60; addi r3, r3, 64 }
-	/* Fire off a load of the last word we are about to copy. */
-EX:	{ lw_na r15, r15; slt_u r8, r3, r18 }
-
-EX:	{ mvz r3, r8, r1; wh64 r0 }
-
-	/* This loop runs twice.
-	 *
-	 * On entry:
-	 * - r17 is even before the first iteration, and odd before
-	 *   the second.  It is incremented inside the loop.  Encountering
-	 *   an even value at the end of the loop makes it stop.
-	 */
-.Lcopy_half_an_unaligned_line:
-EX:	{
-	/* Stall until the last byte is ready. In the steady state this
-	 * guarantees all words to load below will be in the L2 cache, which
-	 * avoids shunting the loads to the RTF.
-	 */
-	move zero, r15
-	lwadd_na r7, r1, 16
-	}
-EX:	{ lwadd_na r11, r1, 12 }
-EX:	{ lwadd_na r14, r1, -24 }
-EX:	{ lwadd_na r8, r1, 4 }
-EX:	{ lwadd_na r9, r1, 4 }
-EX:	{
-	lwadd_na r10, r1, 8
-	/* r16 = (r2 < 64), after we subtract 32 from r2 below. */
-	slti_u r16, r2, 64 + 32
-	}
-EX:	{ lwadd_na r12, r1, 4; addi r17, r17, 1 }
-EX:	{ lwadd_na r13, r1, 8; dword_align r6, r7, r1 }
-EX:	{ swadd r0, r6,  4; dword_align r7,  r8,  r1 }
-EX:	{ swadd r0, r7,  4; dword_align r8,  r9,  r1 }
-EX:	{ swadd r0, r8,  4; dword_align r9,  r10, r1 }
-EX:	{ swadd r0, r9,  4; dword_align r10, r11, r1 }
-EX:	{ swadd r0, r10, 4; dword_align r11, r12, r1 }
-EX:	{ swadd r0, r11, 4; dword_align r12, r13, r1 }
-EX:	{ swadd r0, r12, 4; dword_align r13, r14, r1 }
-EX:	{ swadd r0, r13, 4; addi r2, r2, -32 }
-	{ move r6, r14; bbst r17, .Lcopy_half_an_unaligned_line }
-
-	{ bzt r16, .Lcopy_unaligned_line; move r7, r6 }
-
-	/* On entry:
-	 * - r0 is the next store address.
-	 * - r1 points 4 bytes past the load address corresponding to r0.
-	 * - r2 >= 0 (# of bytes left to store).
-	 * - r7 is the next aligned src word value.
-	 */
-.Lcleanup_unaligned_words:
-	/* Handle any trailing bytes. */
-	{ bz r2, .Lcopy_unaligned_done; slti_u r8, r2, 4 }
-	{ bzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
-
-	/* Move r1 back to the point where it corresponds to r0. */
-	{ addi r1, r1, -4 }
-
-	/* Fall through */
-
-/*
- *
- * 1 byte at a time copy handler.
- *
- */
-
-.Lcopy_unaligned_few:
-EX:	{ lb_u r3, r1; addi r1, r1, 1 }
-EX:	{ sb r0, r3;   addi r0, r0, 1; addi r2, r2, -1 }
-	{ bnzt r2, .Lcopy_unaligned_few }
-
-.Lcopy_unaligned_done:
-
-	/* For memcpy return original dest address, else zero. */
-	{ mz r0, r29, r23; jrp lr }
-
-.Lend_memcpy_common:
-	.size memcpy_common, .Lend_memcpy_common - memcpy_common
-
-	.section .fixup,"ax"
-memcpy_common_fixup:
-	.type memcpy_common_fixup, @function
-
-	/* Skip any bytes we already successfully copied.
-	 * r2 (num remaining) is correct, but r0 (dst) and r1 (src)
-	 * may not be quite right because of unrolling and prefetching.
-	 * So we need to recompute their values as the address just
-	 * after the last byte we are sure was successfully loaded and
-	 * then stored.
-	 */
-
-	/* Determine how many bytes we successfully copied. */
-	{ sub r3, r25, r2 }
-
-	/* Add this to the original r0 and r1 to get their new values. */
-	{ add r0, r23, r3; add r1, r24, r3 }
-
-	{ bzt r29, memcpy_fixup_loop }
-	{ blzt r29, copy_to_user_fixup_loop }
-
-copy_from_user_fixup_loop:
-	/* Try copying the rest one byte at a time, expecting a load fault. */
-.Lcfu:	{ lb_u r3, r1; addi r1, r1, 1 }
-	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
-	{ bnzt r2, copy_from_user_fixup_loop }
-
-.Lcopy_from_user_fixup_zero_remainder:
-	move lr, r27
-	{ move r0, r2; jrp lr }
-
-copy_to_user_fixup_loop:
-	/* Try copying the rest one byte at a time, expecting a store fault. */
-	{ lb_u r3, r1; addi r1, r1, 1 }
-.Lctu:	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
-	{ bnzt r2, copy_to_user_fixup_loop }
-.Lcopy_to_user_fixup_done:
-	move lr, r27
-	{ move r0, r2; jrp lr }
-
-memcpy_fixup_loop:
-	/* Try copying the rest one byte at a time. We expect a disastrous
-	 * fault to happen since we are in fixup code, but let it happen.
-	 */
-	{ lb_u r3, r1; addi r1, r1, 1 }
-	{ sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
-	{ bnzt r2, memcpy_fixup_loop }
-	/* This should be unreachable, we should have faulted again.
-	 * But be paranoid and handle it in case some interrupt changed
-	 * the TLB or something.
-	 */
-	move lr, r27
-	{ move r0, r23; jrp lr }
-
-	.size memcpy_common_fixup, . - memcpy_common_fixup
-
-	.section __ex_table,"a"
-	.align 4
-	.word .Lcfu, .Lcopy_from_user_fixup_zero_remainder
-	.word .Lctu, .Lcopy_to_user_fixup_done
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
deleted file mode 100644
index 4815354b8cd2..000000000000
--- a/arch/tile/lib/memcpy_64.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
-
-/* Must be 8 bytes in size. */
-#define op_t uint64_t
-
-/* Threshold value for when to enter the unrolled loops. */
-#define	OP_T_THRES	16
-
-#if CHIP_L2_LINE_SIZE() != 64
-#error "Assumes 64 byte line size"
-#endif
-
-/* How many cache lines ahead should we prefetch? */
-#define PREFETCH_LINES_AHEAD 4
-
-/*
- * Provide "base versions" of load and store for the normal code path.
- * The kernel provides other versions for userspace copies.
- */
-#define ST(p, v) (*(p) = (v))
-#define LD(p) (*(p))
-
-#ifndef USERCOPY_FUNC
-#define ST1 ST
-#define ST2 ST
-#define ST4 ST
-#define ST8 ST
-#define LD1 LD
-#define LD2 LD
-#define LD4 LD
-#define LD8 LD
-#define RETVAL dstv
-void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
-#else
-/*
- * Special kernel version will provide implementation of the LDn/STn
- * macros to return a count of uncopied bytes due to mm fault.
- */
-#define RETVAL 0
-int __attribute__((optimize("omit-frame-pointer")))
-USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
-#endif
-{
-	char *__restrict dst1 = (char *)dstv;
-	const char *__restrict src1 = (const char *)srcv;
-	const char *__restrict src1_end;
-	const char *__restrict prefetch;
-	op_t *__restrict dst8;    /* 8-byte pointer to destination memory. */
-	op_t final; /* Final bytes to write to trailing word, if any */
-	long i;
-
-	if (n < 16) {
-		for (; n; n--)
-			ST1(dst1++, LD1(src1++));
-		return RETVAL;
-	}
-
-	/*
-	 * Locate the end of source memory we will copy.  Don't
-	 * prefetch past this.
-	 */
-	src1_end = src1 + n - 1;
-
-	/* Prefetch ahead a few cache lines, but not past the end. */
-	prefetch = src1;
-	for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
-		__insn_prefetch(prefetch);
-		prefetch += CHIP_L2_LINE_SIZE();
-		prefetch = (prefetch < src1_end) ? prefetch : src1;
-	}
-
-	/* Copy bytes until dst is word-aligned. */
-	for (; (uintptr_t)dst1 & (sizeof(op_t) - 1); n--)
-		ST1(dst1++, LD1(src1++));
-
-	/* 8-byte pointer to destination memory. */
-	dst8 = (op_t *)dst1;
-
-	if (__builtin_expect((uintptr_t)src1 & (sizeof(op_t) - 1), 0)) {
-		/* Unaligned copy. */
-
-		op_t  tmp0 = 0, tmp1 = 0, tmp2, tmp3;
-		const op_t *src8 = (const op_t *) ((uintptr_t)src1 &
-						   -sizeof(op_t));
-		const void *srci = (void *)src1;
-		int m;
-
-		m = (CHIP_L2_LINE_SIZE() << 2) -
-			(((uintptr_t)dst8) & ((CHIP_L2_LINE_SIZE() << 2) - 1));
-		m = (n < m) ? n : m;
-		m /= sizeof(op_t);
-
-		/* Copy until 'dst' is cache-line-aligned. */
-		n -= (sizeof(op_t) * m);
-
-		switch (m % 4) {
-		case 0:
-			if (__builtin_expect(!m, 0))
-				goto _M0;
-			tmp1 = LD8(src8++);
-			tmp2 = LD8(src8++);
-			goto _8B3;
-		case 2:
-			m += 2;
-			tmp3 = LD8(src8++);
-			tmp0 = LD8(src8++);
-			goto _8B1;
-		case 3:
-			m += 1;
-			tmp2 = LD8(src8++);
-			tmp3 = LD8(src8++);
-			goto _8B2;
-		case 1:
-			m--;
-			tmp0 = LD8(src8++);
-			tmp1 = LD8(src8++);
-			if (__builtin_expect(!m, 0))
-				goto _8B0;
-		}
-
-		do {
-			tmp2 = LD8(src8++);
-			tmp0 =  __insn_dblalign(tmp0, tmp1, srci);
-			ST8(dst8++, tmp0);
-_8B3:
-			tmp3 = LD8(src8++);
-			tmp1 = __insn_dblalign(tmp1, tmp2, srci);
-			ST8(dst8++, tmp1);
-_8B2:
-			tmp0 = LD8(src8++);
-			tmp2 = __insn_dblalign(tmp2, tmp3, srci);
-			ST8(dst8++, tmp2);
-_8B1:
-			tmp1 = LD8(src8++);
-			tmp3 = __insn_dblalign(tmp3, tmp0, srci);
-			ST8(dst8++, tmp3);
-			m -= 4;
-		} while (m);
-
-_8B0:
-		tmp0 = __insn_dblalign(tmp0, tmp1, srci);
-		ST8(dst8++, tmp0);
-		src8--;
-
-_M0:
-		if (__builtin_expect(n >= CHIP_L2_LINE_SIZE(), 0)) {
-			op_t tmp4, tmp5, tmp6, tmp7, tmp8;
-
-			prefetch = ((const char *)src8) +
-				CHIP_L2_LINE_SIZE() * PREFETCH_LINES_AHEAD;
-
-			for (tmp0 = LD8(src8++); n >= CHIP_L2_LINE_SIZE();
-			     n -= CHIP_L2_LINE_SIZE()) {
-				/* Prefetch and advance to next line to
-				   prefetch, but don't go past the end.  */
-				__insn_prefetch(prefetch);
-
-				/* Make sure prefetch got scheduled
-				   earlier.  */
-				__asm__ ("" : : : "memory");
-
-				prefetch += CHIP_L2_LINE_SIZE();
-				prefetch = (prefetch < src1_end) ? prefetch :
-					(const char *) src8;
-
-				tmp1 = LD8(src8++);
-				tmp2 = LD8(src8++);
-				tmp3 = LD8(src8++);
-				tmp4 = LD8(src8++);
-				tmp5 = LD8(src8++);
-				tmp6 = LD8(src8++);
-				tmp7 = LD8(src8++);
-				tmp8 = LD8(src8++);
-
-				tmp0 = __insn_dblalign(tmp0, tmp1, srci);
-				tmp1 = __insn_dblalign(tmp1, tmp2, srci);
-				tmp2 = __insn_dblalign(tmp2, tmp3, srci);
-				tmp3 = __insn_dblalign(tmp3, tmp4, srci);
-				tmp4 = __insn_dblalign(tmp4, tmp5, srci);
-				tmp5 = __insn_dblalign(tmp5, tmp6, srci);
-				tmp6 = __insn_dblalign(tmp6, tmp7, srci);
-				tmp7 = __insn_dblalign(tmp7, tmp8, srci);
-
-				__insn_wh64(dst8);
-
-				ST8(dst8++, tmp0);
-				ST8(dst8++, tmp1);
-				ST8(dst8++, tmp2);
-				ST8(dst8++, tmp3);
-				ST8(dst8++, tmp4);
-				ST8(dst8++, tmp5);
-				ST8(dst8++, tmp6);
-				ST8(dst8++, tmp7);
-
-				tmp0 = tmp8;
-			}
-			src8--;
-		}
-
-		/* Copy the rest 8-byte chunks. */
-		if (n >= sizeof(op_t)) {
-			tmp0 = LD8(src8++);
-			for (; n >= sizeof(op_t); n -= sizeof(op_t)) {
-				tmp1 = LD8(src8++);
-				tmp0 = __insn_dblalign(tmp0, tmp1, srci);
-				ST8(dst8++, tmp0);
-				tmp0 = tmp1;
-			}
-			src8--;
-		}
-
-		if (n == 0)
-			return RETVAL;
-
-		tmp0 = LD8(src8++);
-		tmp1 = ((const char *)src8 <= src1_end)
-			? LD8((op_t *)src8) : 0;
-		final = __insn_dblalign(tmp0, tmp1, srci);
-
-	} else {
-		/* Aligned copy. */
-
-		const op_t *__restrict src8 = (const op_t *)src1;
-
-		/* src8 and dst8 are both word-aligned. */
-		if (n >= CHIP_L2_LINE_SIZE()) {
-			/* Copy until 'dst' is cache-line-aligned. */
-			for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
-			     n -= sizeof(op_t))
-				ST8(dst8++, LD8(src8++));
-
-			for (; n >= CHIP_L2_LINE_SIZE(); ) {
-				op_t tmp0, tmp1, tmp2, tmp3;
-				op_t tmp4, tmp5, tmp6, tmp7;
-
-				/*
-				 * Prefetch and advance to next line
-				 * to prefetch, but don't go past the
-				 * end.
-				 */
-				__insn_prefetch(prefetch);
-
-				/* Make sure prefetch got scheduled
-				   earlier.  */
-				__asm__ ("" : : : "memory");
-
-				prefetch += CHIP_L2_LINE_SIZE();
-				prefetch = (prefetch < src1_end) ? prefetch :
-					(const char *)src8;
-
-				/*
-				 * Do all the loads before wh64.  This
-				 * is necessary if [src8, src8+7] and
-				 * [dst8, dst8+7] share the same cache
-				 * line and dst8 <= src8, as can be
-				 * the case when called from memmove,
-				 * or with code tested on x86 whose
-				 * memcpy always works with forward
-				 * copies.
-				 */
-				tmp0 = LD8(src8++);
-				tmp1 = LD8(src8++);
-				tmp2 = LD8(src8++);
-				tmp3 = LD8(src8++);
-				tmp4 = LD8(src8++);
-				tmp5 = LD8(src8++);
-				tmp6 = LD8(src8++);
-				tmp7 = LD8(src8++);
-
-				/* wh64 and wait for tmp7 load completion. */
-				__asm__ ("move %0, %0; wh64 %1\n"
-					 : : "r"(tmp7), "r"(dst8));
-
-				ST8(dst8++, tmp0);
-				ST8(dst8++, tmp1);
-				ST8(dst8++, tmp2);
-				ST8(dst8++, tmp3);
-				ST8(dst8++, tmp4);
-				ST8(dst8++, tmp5);
-				ST8(dst8++, tmp6);
-				ST8(dst8++, tmp7);
-
-				n -= CHIP_L2_LINE_SIZE();
-			}
-#if CHIP_L2_LINE_SIZE() != 64
-# error "Fix code that assumes particular L2 cache line size."
-#endif
-		}
-
-		for (; n >= sizeof(op_t); n -= sizeof(op_t))
-			ST8(dst8++, LD8(src8++));
-
-		if (__builtin_expect(n == 0, 1))
-			return RETVAL;
-
-		final = LD8(src8);
-	}
-
-	/* n != 0 if we get here.  Write out any trailing bytes. */
-	dst1 = (char *)dst8;
-#ifndef __BIG_ENDIAN__
-	if (n & 4) {
-		ST4((uint32_t *)dst1, final);
-		dst1 += 4;
-		final >>= 32;
-		n &= 3;
-	}
-	if (n & 2) {
-		ST2((uint16_t *)dst1, final);
-		dst1 += 2;
-		final >>= 16;
-		n &= 1;
-	}
-	if (n)
-		ST1((uint8_t *)dst1, final);
-#else
-	if (n & 4) {
-		ST4((uint32_t *)dst1, final >> 32);
-		dst1 += 4;
-        }
-        else
-        {
-		final >>= 32;
-        }
-	if (n & 2) {
-		ST2((uint16_t *)dst1, final >> 16);
-		dst1 += 2;
-        }
-        else
-        {
-		final >>= 16;
-        }
-	if (n & 1)
-		ST1((uint8_t *)dst1, final >> 8);
-#endif
-
-	return RETVAL;
-}
-
-#ifdef USERCOPY_FUNC
-#undef ST1
-#undef ST2
-#undef ST4
-#undef ST8
-#undef LD1
-#undef LD2
-#undef LD4
-#undef LD8
-#undef USERCOPY_FUNC
-#endif
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
deleted file mode 100644
index a3fea9fd973e..000000000000
--- a/arch/tile/lib/memcpy_user_64.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * Do memcpy(), but trap and return "n" when a load or store faults.
- *
- * Note: this idiom only works when memcpy() compiles to a leaf function.
- * Here leaf function not only means it does not have calls, but also
- * requires no stack operations (sp, stack frame pointer) and no
- * use of callee-saved registers, else "jrp lr" will be incorrect since
- * unwinding stack frame is bypassed. Since memcpy() is not complex so
- * these conditions are satisfied here, but we need to be careful when
- * modifying this file. This is not a clean solution but is the best
- * one so far.
- *
- * Also note that we are capturing "n" from the containing scope here.
- */
-
-#define _ST(p, inst, v)						\
-	({							\
-		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext,\"ax\";"	\
-		    "2: { move r0, %2; jrp lr };"		\
-		    ".section __ex_table,\"a\";"		\
-		    ".align 8;"					\
-		    ".quad 1b, 2b;"				\
-		    ".popsection"				\
-		    : "=m" (*(p)) : "r" (v), "r" (n));		\
-	})
-
-#define _LD(p, inst)						\
-	({							\
-		unsigned long __v;				\
-		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext,\"ax\";"	\
-		    "2: { move r0, %2; jrp lr };"		\
-		    ".section __ex_table,\"a\";"		\
-		    ".align 8;"					\
-		    ".quad 1b, 2b;"				\
-		    ".popsection"				\
-		    : "=r" (__v) : "m" (*(p)), "r" (n));	\
-		__v;						\
-	})
-
-#define USERCOPY_FUNC raw_copy_to_user
-#define ST1(p, v) _ST((p), st1, (v))
-#define ST2(p, v) _ST((p), st2, (v))
-#define ST4(p, v) _ST((p), st4, (v))
-#define ST8(p, v) _ST((p), st, (v))
-#define LD1 LD
-#define LD2 LD
-#define LD4 LD
-#define LD8 LD
-#include "memcpy_64.c"
-
-#define USERCOPY_FUNC raw_copy_from_user
-#define ST1 ST
-#define ST2 ST
-#define ST4 ST
-#define ST8 ST
-#define LD1(p) _LD((p), ld1u)
-#define LD2(p) _LD((p), ld2u)
-#define LD4(p) _LD((p), ld4u)
-#define LD8(p) _LD((p), ld)
-#include "memcpy_64.c"
-
-#define USERCOPY_FUNC raw_copy_in_user
-#define ST1(p, v) _ST((p), st1, (v))
-#define ST2(p, v) _ST((p), st2, (v))
-#define ST4(p, v) _ST((p), st4, (v))
-#define ST8(p, v) _ST((p), st, (v))
-#define LD1(p) _LD((p), ld1u)
-#define LD2(p) _LD((p), ld2u)
-#define LD4(p) _LD((p), ld4u)
-#define LD8(p) _LD((p), ld)
-#include "memcpy_64.c"
diff --git a/arch/tile/lib/memmove.c b/arch/tile/lib/memmove.c
deleted file mode 100644
index fd615ae6ade7..000000000000
--- a/arch/tile/lib/memmove.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-
-void *memmove(void *dest, const void *src, size_t n)
-{
-	if ((const char *)src >= (char *)dest + n
-	    || (char *)dest >= (const char *)src + n) {
-		/* We found no overlap, so let memcpy do all the heavy
-		 * lifting (prefetching, etc.)
-		 */
-		return memcpy(dest, src, n);
-	}
-
-	if (n != 0) {
-		const uint8_t *in;
-		uint8_t x;
-		uint8_t *out;
-		int stride;
-
-		if (src < dest) {
-			/* copy backwards */
-			in = (const uint8_t *)src + n - 1;
-			out = (uint8_t *)dest + n - 1;
-			stride = -1;
-		} else {
-			/* copy forwards */
-			in = (const uint8_t *)src;
-			out = (uint8_t *)dest;
-			stride = 1;
-		}
-
-		/* Manually software-pipeline this loop. */
-		x = *in;
-		in += stride;
-
-		while (--n != 0) {
-			*out = x;
-			out += stride;
-			x = *in;
-			in += stride;
-		}
-
-		*out = x;
-	}
-
-	return dest;
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
deleted file mode 100644
index 2042bfe6595f..000000000000
--- a/arch/tile/lib/memset_32.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <arch/chip.h>
-
-void *memset(void *s, int c, size_t n)
-{
-	uint32_t *out32;
-	int n32;
-	uint32_t v16, v32;
-	uint8_t *out8 = s;
-	int to_align32;
-
-	/* Experimentation shows that a trivial tight loop is a win up until
-	 * around a size of 20, where writing a word at a time starts to win.
-	 */
-#define BYTE_CUTOFF 20
-
-#if BYTE_CUTOFF < 3
-	/* This must be at least at least this big, or some code later
-	 * on doesn't work.
-	 */
-#error "BYTE_CUTOFF is too small"
-#endif
-
-	if (n < BYTE_CUTOFF) {
-		/* Strangely, this turns out to be the tightest way to
-		 * write this loop.
-		 */
-		if (n != 0) {
-			do {
-				/* Strangely, combining these into one line
-				 * performs worse.
-				 */
-				*out8 = c;
-				out8++;
-			} while (--n != 0);
-		}
-
-		return s;
-	}
-
-	/* Align 'out8'. We know n >= 3 so this won't write past the end. */
-	while (((uintptr_t) out8 & 3) != 0) {
-		*out8++ = c;
-		--n;
-	}
-
-	/* Align 'n'. */
-	while (n & 3)
-		out8[--n] = c;
-
-	out32 = (uint32_t *) out8;
-	n32 = n >> 2;
-
-	/* Tile input byte out to 32 bits. */
-	v16 = __insn_intlb(c, c);
-	v32 = __insn_intlh(v16, v16);
-
-	/* This must be at least 8 or the following loop doesn't work. */
-#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
-
-	/* Determine how many words we need to emit before the 'out32'
-	 * pointer becomes aligned modulo the cache line size.
-	 */
-	to_align32 =
-		(-((uintptr_t)out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1);
-
-	/* Only bother aligning and using wh64 if there is at least
-	 * one full cache line to process.  This check also prevents
-	 * overrunning the end of the buffer with alignment words.
-	 */
-	if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) {
-		int lines_left;
-
-		/* Align out32 mod the cache line size so we can use wh64. */
-		n32 -= to_align32;
-		for (; to_align32 != 0; to_align32--) {
-			*out32 = v32;
-			out32++;
-		}
-
-		/* Use unsigned divide to turn this into a right shift. */
-		lines_left = (unsigned)n32 / CACHE_LINE_SIZE_IN_WORDS;
-
-		do {
-			/* Only wh64 a few lines at a time, so we don't
-			 * exceed the maximum number of victim lines.
-			 */
-			int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
-				  ? lines_left
-				  : CHIP_MAX_OUTSTANDING_VICTIMS());
-			uint32_t *wh = out32;
-			int i = x;
-			int j;
-
-			lines_left -= x;
-
-			do {
-				__insn_wh64(wh);
-				wh += CACHE_LINE_SIZE_IN_WORDS;
-			} while (--i);
-
-			for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4);
-			     j != 0; j--) {
-				*out32++ = v32;
-				*out32++ = v32;
-				*out32++ = v32;
-				*out32++ = v32;
-			}
-		} while (lines_left != 0);
-
-		/* We processed all full lines above, so only this many
-		 * words remain to be processed.
-		 */
-		n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
-	}
-
-	/* Now handle any leftover values. */
-	if (n32 != 0) {
-		do {
-			*out32 = v32;
-			out32++;
-		} while (--n32 != 0);
-	}
-
-	return s;
-}
-EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c
deleted file mode 100644
index 03ef69cd73de..000000000000
--- a/arch/tile/lib/memset_64.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <arch/chip.h>
-#include "string-endian.h"
-
-void *memset(void *s, int c, size_t n)
-{
-	uint64_t *out64;
-	int n64, to_align64;
-	uint64_t v64;
-	uint8_t *out8 = s;
-
-	/* Experimentation shows that a trivial tight loop is a win up until
-	 * around a size of 20, where writing a word at a time starts to win.
-	 */
-#define BYTE_CUTOFF 20
-
-#if BYTE_CUTOFF < 7
-	/* This must be at least at least this big, or some code later
-	 * on doesn't work.
-	 */
-#error "BYTE_CUTOFF is too small"
-#endif
-
-	if (n < BYTE_CUTOFF) {
-		/* Strangely, this turns out to be the tightest way to
-		 * write this loop.
-		 */
-		if (n != 0) {
-			do {
-				/* Strangely, combining these into one line
-				 * performs worse.
-				 */
-				*out8 = c;
-				out8++;
-			} while (--n != 0);
-		}
-
-		return s;
-	}
-
-	/* Align 'out8'. We know n >= 7 so this won't write past the end. */
-	while (((uintptr_t) out8 & 7) != 0) {
-		*out8++ = c;
-		--n;
-	}
-
-	/* Align 'n'. */
-	while (n & 7)
-		out8[--n] = c;
-
-	out64 = (uint64_t *) out8;
-	n64 = n >> 3;
-
-	/* Tile input byte out to 64 bits. */
-	v64 = copy_byte(c);
-
-	/* This must be at least 8 or the following loop doesn't work. */
-#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
-
-	/* Determine how many words we need to emit before the 'out32'
-	 * pointer becomes aligned modulo the cache line size.
-	 */
-	to_align64 = (-((uintptr_t)out64 >> 3)) &
-		(CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1);
-
-	/* Only bother aligning and using wh64 if there is at least
-	 * one full cache line to process.  This check also prevents
-	 * overrunning the end of the buffer with alignment words.
-	 */
-	if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) {
-		int lines_left;
-
-		/* Align out64 mod the cache line size so we can use wh64. */
-		n64 -= to_align64;
-		for (; to_align64 != 0; to_align64--) {
-			*out64 = v64;
-			out64++;
-		}
-
-		/* Use unsigned divide to turn this into a right shift. */
-		lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS;
-
-		do {
-			/* Only wh64 a few lines at a time, so we don't
-			 * exceed the maximum number of victim lines.
-			 */
-			int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
-				  ? lines_left
-				  : CHIP_MAX_OUTSTANDING_VICTIMS());
-			uint64_t *wh = out64;
-			int i = x;
-			int j;
-
-			lines_left -= x;
-
-			do {
-				__insn_wh64(wh);
-				wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS;
-			} while (--i);
-
-			for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4);
-			     j != 0; j--) {
-				*out64++ = v64;
-				*out64++ = v64;
-				*out64++ = v64;
-				*out64++ = v64;
-			}
-		} while (lines_left != 0);
-
-		/* We processed all full lines above, so only this many
-		 * words remain to be processed.
-		 */
-		n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1;
-	}
-
-	/* Now handle any leftover values. */
-	if (n64 != 0) {
-		do {
-			*out64 = v64;
-			out64++;
-		} while (--n64 != 0);
-	}
-
-	return s;
-}
-EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
deleted file mode 100644
index db9333f2447c..000000000000
--- a/arch/tile/lib/spinlock_32.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <arch/spr_def.h>
-
-#include "spinlock_common.h"
-
-void arch_spin_lock(arch_spinlock_t *lock)
-{
-	int my_ticket;
-	int iterations = 0;
-	int delta;
-
-	while ((my_ticket = __insn_tns((void *)&lock->next_ticket)) & 1)
-		delay_backoff(iterations++);
-
-	/* Increment the next ticket number, implicitly releasing tns lock. */
-	lock->next_ticket = my_ticket + TICKET_QUANTUM;
-
-	/* Wait until it's our turn. */
-	while ((delta = my_ticket - lock->current_ticket) != 0)
-		relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
-}
-EXPORT_SYMBOL(arch_spin_lock);
-
-int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	/*
-	 * Grab a ticket; no need to retry if it's busy, we'll just
-	 * treat that the same as "locked", since someone else
-	 * will lock it momentarily anyway.
-	 */
-	int my_ticket = __insn_tns((void *)&lock->next_ticket);
-
-	if (my_ticket == lock->current_ticket) {
-		/* Not currently locked, so lock it by keeping this ticket. */
-		lock->next_ticket = my_ticket + TICKET_QUANTUM;
-		/* Success! */
-		return 1;
-	}
-
-	if (!(my_ticket & 1)) {
-		/* Release next_ticket. */
-		lock->next_ticket = my_ticket;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(arch_spin_trylock);
-
-/*
- * The low byte is always reserved to be the marker for a "tns" operation
- * since the low bit is set to "1" by a tns.  The next seven bits are
- * zeroes.  The next byte holds the "next" writer value, i.e. the ticket
- * available for the next task that wants to write.  The third byte holds
- * the current writer value, i.e. the writer who holds the current ticket.
- * If current == next == 0, there are no interested writers.
- */
-#define WR_NEXT_SHIFT   _WR_NEXT_SHIFT
-#define WR_CURR_SHIFT   _WR_CURR_SHIFT
-#define WR_WIDTH        _WR_WIDTH
-#define WR_MASK         ((1 << WR_WIDTH) - 1)
-
-/*
- * The last eight bits hold the active reader count.  This has to be
- * zero before a writer can start to write.
- */
-#define RD_COUNT_SHIFT  _RD_COUNT_SHIFT
-#define RD_COUNT_WIDTH  _RD_COUNT_WIDTH
-#define RD_COUNT_MASK   ((1 << RD_COUNT_WIDTH) - 1)
-
-
-/*
- * We can get the read lock if everything but the reader bits (which
- * are in the high part of the word) is zero, i.e. no active or
- * waiting writers, no tns.
- *
- * We guard the tns/store-back with an interrupt critical section to
- * preserve the semantic that the same read lock can be acquired in an
- * interrupt context.
- */
-int arch_read_trylock(arch_rwlock_t *rwlock)
-{
-	u32 val;
-	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
-	val = __insn_tns((int *)&rwlock->lock);
-	if (likely((val << _RD_COUNT_WIDTH) == 0)) {
-		val += 1 << RD_COUNT_SHIFT;
-		rwlock->lock = val;
-		__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
-		BUG_ON(val == 0);  /* we don't expect wraparound */
-		return 1;
-	}
-	if ((val & 1) == 0)
-		rwlock->lock = val;
-	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
-	return 0;
-}
-EXPORT_SYMBOL(arch_read_trylock);
-
-/*
- * Spin doing arch_read_trylock() until we acquire the lock.
- * ISSUE: This approach can permanently starve readers.  A reader who sees
- * a writer could instead take a ticket lock (just like a writer would),
- * and atomically enter read mode (with 1 reader) when it gets the ticket.
- * This way both readers and writers would always make forward progress
- * in a finite time.
- */
-void arch_read_lock(arch_rwlock_t *rwlock)
-{
-	u32 iterations = 0;
-	while (unlikely(!arch_read_trylock(rwlock)))
-		delay_backoff(iterations++);
-}
-EXPORT_SYMBOL(arch_read_lock);
-
-void arch_read_unlock(arch_rwlock_t *rwlock)
-{
-	u32 val, iterations = 0;
-
-	mb();  /* guarantee anything modified under the lock is visible */
-	for (;;) {
-		__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
-		val = __insn_tns((int *)&rwlock->lock);
-		if (likely((val & 1) == 0)) {
-			rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
-			__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
-			break;
-		}
-		__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
-		delay_backoff(iterations++);
-	}
-}
-EXPORT_SYMBOL(arch_read_unlock);
-
-/*
- * We don't need an interrupt critical section here (unlike for
- * arch_read_lock) since we should never use a bare write lock where
- * it could be interrupted by code that could try to re-acquire it.
- */
-void arch_write_lock(arch_rwlock_t *rwlock)
-{
-	/*
-	 * The trailing underscore on this variable (and curr_ below)
-	 * reminds us that the high bits are garbage; we mask them out
-	 * when we compare them.
-	 */
-	u32 my_ticket_;
-	u32 iterations = 0;
-	u32 val = __insn_tns((int *)&rwlock->lock);
-
-	if (likely(val == 0)) {
-		rwlock->lock = 1 << _WR_NEXT_SHIFT;
-		return;
-	}
-
-	/*
-	 * Wait until there are no readers, then bump up the next
-	 * field and capture the ticket value.
-	 */
-	for (;;) {
-		if (!(val & 1)) {
-			if ((val >> RD_COUNT_SHIFT) == 0)
-				break;
-			rwlock->lock = val;
-		}
-		delay_backoff(iterations++);
-		val = __insn_tns((int *)&rwlock->lock);
-	}
-
-	/* Take out the next ticket and extract my ticket value. */
-	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
-	my_ticket_ = val >> WR_NEXT_SHIFT;
-
-	/* Wait until the "current" field matches our ticket. */
-	for (;;) {
-		u32 curr_ = val >> WR_CURR_SHIFT;
-		u32 delta = ((my_ticket_ - curr_) & WR_MASK);
-		if (likely(delta == 0))
-			break;
-
-		/* Delay based on how many lock-holders are still out there. */
-		relax((256 / CYCLES_PER_RELAX_LOOP) * delta);
-
-		/*
-		 * Get a non-tns value to check; we don't need to tns
-		 * it ourselves.  Since we're not tns'ing, we retry
-		 * more rapidly to get a valid value.
-		 */
-		while ((val = rwlock->lock) & 1)
-			relax(4);
-	}
-}
-EXPORT_SYMBOL(arch_write_lock);
-
-int arch_write_trylock(arch_rwlock_t *rwlock)
-{
-	u32 val = __insn_tns((int *)&rwlock->lock);
-
-	/*
-	 * If a tns is in progress, or there's a waiting or active locker,
-	 * or active readers, we can't take the lock, so give up.
-	 */
-	if (unlikely(val != 0)) {
-		if (!(val & 1))
-			rwlock->lock = val;
-		return 0;
-	}
-
-	/* Set the "next" field to mark it locked. */
-	rwlock->lock = 1 << _WR_NEXT_SHIFT;
-	return 1;
-}
-EXPORT_SYMBOL(arch_write_trylock);
-
-void arch_write_unlock(arch_rwlock_t *rwlock)
-{
-	u32 val, eq, mask;
-
-	mb();  /* guarantee anything modified under the lock is visible */
-	val = __insn_tns((int *)&rwlock->lock);
-	if (likely(val == (1 << _WR_NEXT_SHIFT))) {
-		rwlock->lock = 0;
-		return;
-	}
-	while (unlikely(val & 1)) {
-		/* Limited backoff since we are the highest-priority task. */
-		relax(4);
-		val = __insn_tns((int *)&rwlock->lock);
-	}
-	mask = 1 << WR_CURR_SHIFT;
-	val = __insn_addb(val, mask);
-	eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
-	val = __insn_mz(eq & mask, val);
-	rwlock->lock = val;
-}
-EXPORT_SYMBOL(arch_write_unlock);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
deleted file mode 100644
index de414c22892f..000000000000
--- a/arch/tile/lib/spinlock_64.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-
-#include "spinlock_common.h"
-
-/*
- * Read the spinlock value without allocating in our cache and without
- * causing an invalidation to another cpu with a copy of the cacheline.
- * This is important when we are spinning waiting for the lock.
- */
-static inline u32 arch_spin_read_noalloc(void *lock)
-{
-	return atomic_cmpxchg((atomic_t *)lock, -1, -1);
-}
-
-/*
- * Wait until the high bits (current) match my ticket.
- * If we notice the overflow bit set on entry, we clear it.
- */
-void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)
-{
-	if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {
-		__insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);
-		my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;
-	}
-
-	for (;;) {
-		u32 val = arch_spin_read_noalloc(lock);
-		u32 delta = my_ticket - arch_spin_current(val);
-		if (delta == 0)
-			return;
-		relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
-	}
-}
-EXPORT_SYMBOL(arch_spin_lock_slow);
-
-/*
- * Check the lock to see if it is plausible, and try to get it with cmpxchg().
- */
-int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	u32 val = arch_spin_read_noalloc(lock);
-	if (unlikely(arch_spin_current(val) != arch_spin_next(val)))
-		return 0;
-	return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)
-		== val;
-}
-EXPORT_SYMBOL(arch_spin_trylock);
-
-
-/*
- * If the read lock fails due to a writer, we retry periodically
- * until the value is positive and we write our incremented reader count.
- */
-void __read_lock_failed(arch_rwlock_t *rw)
-{
-	u32 val;
-	int iterations = 0;
-	do {
-		delay_backoff(iterations++);
-		val = __insn_fetchaddgez4(&rw->lock, 1);
-	} while (unlikely(arch_write_val_locked(val)));
-}
-EXPORT_SYMBOL(__read_lock_failed);
-
-/*
- * If we failed because there were readers, clear the "writer" bit
- * so we don't block additional readers.  Otherwise, there was another
- * writer anyway, so our "fetchor" made no difference.  Then wait,
- * issuing periodic fetchor instructions, till we get the lock.
- */
-void __write_lock_failed(arch_rwlock_t *rw, u32 val)
-{
-	int iterations = 0;
-	do {
-		if (!arch_write_val_locked(val))
-			val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
-		delay_backoff(iterations++);
-		val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
-	} while (val != 0);
-}
-EXPORT_SYMBOL(__write_lock_failed);
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
deleted file mode 100644
index 6ac37509faca..000000000000
--- a/arch/tile/lib/spinlock_common.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- * This file is included into spinlock_32.c or _64.c.
- */
-
-/*
- * The mfspr in __spinlock_relax() is 5 or 6 cycles plus 2 for loop
- * overhead.
- */
-#ifdef __tilegx__
-#define CYCLES_PER_RELAX_LOOP 7
-#else
-#define CYCLES_PER_RELAX_LOOP 8
-#endif
-
-/*
- * Idle the core for CYCLES_PER_RELAX_LOOP * iterations cycles.
- */
-static inline void
-relax(int iterations)
-{
-	for (/*above*/; iterations > 0; iterations--)
-		__insn_mfspr(SPR_PASS);
-	barrier();
-}
-
-/* Perform bounded exponential backoff.*/
-static void delay_backoff(int iterations)
-{
-	u32 exponent, loops;
-
-	/*
-	 * 2^exponent is how many times we go around the loop,
-	 * which takes 8 cycles.  We want to start with a 16- to 31-cycle
-	 * loop, so we need to go around minimum 2 = 2^1 times, so we
-	 * bias the original value up by 1.
-	 */
-	exponent = iterations + 1;
-
-	/*
-	 * Don't allow exponent to exceed 7, so we have 128 loops,
-	 * or 1,024 (to 2,047) cycles, as our maximum.
-	 */
-	if (exponent > 8)
-		exponent = 8;
-
-	loops = 1 << exponent;
-
-	/* Add a randomness factor so two cpus never get in lock step. */
-	loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
-		(loops - 1);
-
-	relax(loops);
-}
diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c
deleted file mode 100644
index 841fe6963019..000000000000
--- a/arch/tile/lib/strchr_32.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-
-char *strchr(const char *s, int c)
-{
-	int z, g;
-
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint32_t *p = (const uint32_t *)(s_int & -4);
-
-	/* Create four copies of the byte for which we are looking. */
-	const uint32_t goal = 0x01010101 * (uint8_t) c;
-
-	/* Read the first aligned word, but force bytes before the string to
-	 * match neither zero nor goal (we make sure the high bit of each
-	 * byte is 1, and the low 7 bits are all the opposite of the goal
-	 * byte).
-	 *
-	 * Note that this shift count expression works because we know shift
-	 * counts are taken mod 32.
-	 */
-	const uint32_t before_mask = (1 << (s_int << 3)) - 1;
-	uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib(before_mask, 1));
-
-	uint32_t zero_matches, goal_matches;
-	while (1) {
-		/* Look for a terminating '\0'. */
-		zero_matches = __insn_seqb(v, 0);
-
-		/* Look for the goal byte. */
-		goal_matches = __insn_seqb(v, goal);
-
-		if (__builtin_expect(zero_matches | goal_matches, 0))
-			break;
-
-		v = *++p;
-	}
-
-	z = __insn_ctz(zero_matches);
-	g = __insn_ctz(goal_matches);
-
-	/* If we found c before '\0' we got a match. Note that if c == '\0'
-	 * then g == z, and we correctly return the address of the '\0'
-	 * rather than NULL.
-	 */
-	return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
-}
-EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
deleted file mode 100644
index fe6e31c06f8d..000000000000
--- a/arch/tile/lib/strchr_64.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include "string-endian.h"
-
-char *strchr(const char *s, int c)
-{
-	int z, g;
-
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint64_t *p = (const uint64_t *)(s_int & -8);
-
-	/* Create eight copies of the byte for which we are looking. */
-	const uint64_t goal = copy_byte(c);
-
-	/* Read the first aligned word, but force bytes before the string to
-	 * match neither zero nor goal (we make sure the high bit of each
-	 * byte is 1, and the low 7 bits are all the opposite of the goal
-	 * byte).
-	 */
-	const uint64_t before_mask = MASK(s_int);
-	uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1));
-
-	uint64_t zero_matches, goal_matches;
-	while (1) {
-		/* Look for a terminating '\0'. */
-		zero_matches = __insn_v1cmpeqi(v, 0);
-
-		/* Look for the goal byte. */
-		goal_matches = __insn_v1cmpeq(v, goal);
-
-		if (__builtin_expect((zero_matches | goal_matches) != 0, 0))
-			break;
-
-		v = *++p;
-	}
-
-	z = CFZ(zero_matches);
-	g = CFZ(goal_matches);
-
-	/* If we found c before '\0' we got a match. Note that if c == '\0'
-	 * then g == z, and we correctly return the address of the '\0'
-	 * rather than NULL.
-	 */
-	return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
-}
-EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h
deleted file mode 100644
index 2e49cbfe9371..000000000000
--- a/arch/tile/lib/string-endian.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2013 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * Provide a mask based on the pointer alignment that
- * sets up non-zero bytes before the beginning of the string.
- * The MASK expression works because shift counts are taken mod 64.
- * Also, specify how to count "first" and "last" bits
- * when the bits have been read as a word.
- */
-
-#include <asm/byteorder.h>
-
-#ifdef __LITTLE_ENDIAN
-#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
-#define NULMASK(x) ((2ULL << x) - 1)
-#define CFZ(x) __insn_ctz(x)
-#define REVCZ(x) __insn_clz(x)
-#else
-#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
-#define NULMASK(x) (-2LL << (63 - x))
-#define CFZ(x) __insn_clz(x)
-#define REVCZ(x) __insn_ctz(x)
-#endif
-
-/*
- * Create eight copies of the byte in a uint64_t.  Byte Shuffle uses
- * the bytes of srcB as the index into the dest vector to select a
- * byte.  With all indices of zero, the first byte is copied into all
- * the other bytes.
- */
-static inline uint64_t copy_byte(uint8_t byte)
-{
-	return __insn_shufflebytes(byte, 0, 0);
-}
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c
deleted file mode 100644
index f26f88e11e4a..000000000000
--- a/arch/tile/lib/strlen_32.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-
-size_t strlen(const char *s)
-{
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint32_t *p = (const uint32_t *)(s_int & -4);
-
-	/* Read the first word, but force bytes before the string to be nonzero.
-	 * This expression works because we know shift counts are taken mod 32.
-	 */
-	uint32_t v = *p | ((1 << (s_int << 3)) - 1);
-
-	uint32_t bits;
-	while ((bits = __insn_seqb(v, 0)) == 0)
-		v = *++p;
-
-	return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
-}
-EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
deleted file mode 100644
index 9583fc3361fa..000000000000
--- a/arch/tile/lib/strlen_64.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include "string-endian.h"
-
-size_t strlen(const char *s)
-{
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint64_t *p = (const uint64_t *)(s_int & -8);
-
-	/* Read and MASK the first word. */
-	uint64_t v = *p | MASK(s_int);
-
-	uint64_t bits;
-	while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
-		v = *++p;
-
-	return ((const char *)p) + (CFZ(bits) >> 3) - s;
-}
-EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/strnlen_32.c b/arch/tile/lib/strnlen_32.c
deleted file mode 100644
index 1434141d9e01..000000000000
--- a/arch/tile/lib/strnlen_32.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2013 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-
-size_t strnlen(const char *s, size_t count)
-{
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint32_t *p = (const uint32_t *)(s_int & -4);
-	size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
-	size_t len;
-	uint32_t v, bits;
-
-	/* Avoid page fault risk by not reading any bytes when count is 0. */
-	if (count == 0)
-		return 0;
-
-	/* Read first word, but force bytes before the string to be nonzero. */
-	v = *p | ((1 << ((s_int << 3) & 31)) - 1);
-
-	while ((bits = __insn_seqb(v, 0)) == 0) {
-		if (bytes_read >= count) {
-			/* Read COUNT bytes and didn't find the terminator. */
-			return count;
-		}
-		v = *++p;
-		bytes_read += sizeof(v);
-	}
-
-	len = ((const char *) p) + (__insn_ctz(bits) >> 3) - s;
-	return (len < count ? len : count);
-}
-EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/strnlen_64.c b/arch/tile/lib/strnlen_64.c
deleted file mode 100644
index 2e8de6a5136f..000000000000
--- a/arch/tile/lib/strnlen_64.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2013 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include "string-endian.h"
-
-size_t strnlen(const char *s, size_t count)
-{
-	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint64_t *p = (const uint64_t *)(s_int & -8);
-	size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
-	size_t len;
-	uint64_t v, bits;
-
-	/* Avoid page fault risk by not reading any bytes when count is 0. */
-	if (count == 0)
-		return 0;
-
-	/* Read and MASK the first word. */
-	v = *p | MASK(s_int);
-
-	while ((bits = __insn_v1cmpeqi(v, 0)) == 0) {
-		if (bytes_read >= count) {
-			/* Read COUNT bytes and didn't find the terminator. */
-			return count;
-		}
-		v = *++p;
-		bytes_read += sizeof(v);
-	}
-
-	len = ((const char *) p) + (CFZ(bits) >> 3) - s;
-	return (len < count ? len : count);
-}
-EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c
deleted file mode 100644
index 030abe3ee4f1..000000000000
--- a/arch/tile/lib/uaccess.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/uaccess.h>
-#include <linux/module.h>
-
-int __range_ok(unsigned long addr, unsigned long size)
-{
-	unsigned long limit = current_thread_info()->addr_limit.seg;
-	return !((addr < limit && size <= limit - addr) ||
-		 is_arch_mappable_range(addr, size));
-}
-EXPORT_SYMBOL(__range_ok);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
deleted file mode 100644
index db93ad5fae25..000000000000
--- a/arch/tile/lib/usercopy_32.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/cache.h>
-#include <arch/chip.h>
-
-/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-
-/*
- * clear_user_asm takes the user target address in r0 and the
- * number of bytes to zero in r1.
- * It returns the number of uncopiable bytes (hopefully zero) in r0.
- * Note that we don't use a separate .fixup section here since we fall
- * through into the "fixup" code as the last straight-line bundle anyway.
- */
-STD_ENTRY(clear_user_asm)
-	{ bz r1, 2f; or r2, r0, r1 }
-	andi r2, r2, 3
-	bzt r2, .Lclear_aligned_user_asm
-1:      { sb r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
-	bnzt r1, 1b
-2:      { move r0, r1; jrp lr }
-	.pushsection __ex_table,"a"
-	.align 4
-	.word 1b, 2b
-	.popsection
-
-.Lclear_aligned_user_asm:
-1:      { sw r0, zero; addi r0, r0, 4; addi r1, r1, -4 }
-	bnzt r1, 1b
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(clear_user_asm)
-	.pushsection __ex_table,"a"
-	.align 4
-	.word 1b, 2b
-	.popsection
-
-/*
- * flush_user_asm takes the user target address in r0 and the
- * number of bytes to flush in r1.
- * It returns the number of unflushable bytes (hopefully zero) in r0.
- */
-STD_ENTRY(flush_user_asm)
-	bz r1, 2f
-	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
-	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
-	{ and r0, r0, r2; and r1, r1, r2 }
-	{ sub r1, r1, r0 }
-1:      { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
-	{ addi r0, r0, CHIP_FLUSH_STRIDE(); bnzt r1, 1b }
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(flush_user_asm)
-	.pushsection __ex_table,"a"
-	.align 4
-	.word 1b, 2b
-	.popsection
-
-/*
- * finv_user_asm takes the user target address in r0 and the
- * number of bytes to flush-invalidate in r1.
- * It returns the number of not finv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(finv_user_asm)
-	bz r1, 2f
-	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
-	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
-	{ and r0, r0, r2; and r1, r1, r2 }
-	{ sub r1, r1, r0 }
-1:      { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
-	{ addi r0, r0, CHIP_FINV_STRIDE(); bnzt r1, 1b }
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(finv_user_asm)
-	.pushsection __ex_table,"a"
-	.align 4
-	.word 1b, 2b
-	.popsection
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
deleted file mode 100644
index 9322dc551e91..000000000000
--- a/arch/tile/lib/usercopy_64.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/cache.h>
-#include <arch/chip.h>
-
-/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-
-/*
- * clear_user_asm takes the user target address in r0 and the
- * number of bytes to zero in r1.
- * It returns the number of uncopiable bytes (hopefully zero) in r0.
- * Note that we don't use a separate .fixup section here since we fall
- * through into the "fixup" code as the last straight-line bundle anyway.
- */
-STD_ENTRY(clear_user_asm)
-	{ beqz r1, 2f; or r2, r0, r1 }
-	andi r2, r2, 7
-	beqzt r2, .Lclear_aligned_user_asm
-1:      { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
-	bnezt r1, 1b
-2:      { move r0, r1; jrp lr }
-	.pushsection __ex_table,"a"
-	.align 8
-	.quad 1b, 2b
-	.popsection
-
-.Lclear_aligned_user_asm:
-1:      { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 }
-	bnezt r1, 1b
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(clear_user_asm)
-	.pushsection __ex_table,"a"
-	.align 8
-	.quad 1b, 2b
-	.popsection
-
-/*
- * flush_user_asm takes the user target address in r0 and the
- * number of bytes to flush in r1.
- * It returns the number of unflushable bytes (hopefully zero) in r0.
- */
-STD_ENTRY(flush_user_asm)
-	beqz r1, 2f
-	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
-	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
-	{ and r0, r0, r2; and r1, r1, r2 }
-	{ sub r1, r1, r0 }
-1:      { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
-	{ addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b }
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(flush_user_asm)
-	.pushsection __ex_table,"a"
-	.align 8
-	.quad 1b, 2b
-	.popsection
-
-/*
- * finv_user_asm takes the user target address in r0 and the
- * number of bytes to flush-invalidate in r1.
- * It returns the number of not finv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(finv_user_asm)
-	beqz r1, 2f
-	{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
-	{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
-	{ and r0, r0, r2; and r1, r1, r2 }
-	{ sub r1, r1, r0 }
-1:      { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
-	{ addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b }
-2:      { move r0, r1; jrp lr }
-	STD_ENDPROC(finv_user_asm)
-	.pushsection __ex_table,"a"
-	.align 8
-	.quad 1b, 2b
-	.popsection
author	Arnd Bergmann <arnd@arndb.de>	2018-03-09 16:13:42 +0300
committer	Arnd Bergmann <arnd@arndb.de>	2018-03-16 12:56:03 +0300
commit	bb9d812643d8a121df7d614a2b9c60193a92deb0 (patch)
tree	419096f57ca0501d8813151a5236387074edb4ea /arch/tile/lib
parent	4ba66a9760722ccbb691b8f7116cad2f791cca7b (diff)
download	linux-bb9d812643d8a121df7d614a2b9c60193a92deb0.tar.xz