summaryrefslogtreecommitdiff
path: root/arch/x86_64/ia32
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/ia32')
-rw-r--r--arch/x86_64/ia32/Makefile32
-rw-r--r--arch/x86_64/ia32/fpu32.c184
-rw-r--r--arch/x86_64/ia32/ia32_aout.c529
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c434
-rw-r--r--arch/x86_64/ia32/ia32_ioctl.c201
-rw-r--r--arch/x86_64/ia32/ia32_signal.c621
-rw-r--r--arch/x86_64/ia32/ia32entry.S602
-rw-r--r--arch/x86_64/ia32/ipc32.c57
-rw-r--r--arch/x86_64/ia32/ptrace32.c379
-rw-r--r--arch/x86_64/ia32/sys_ia32.c1050
-rw-r--r--arch/x86_64/ia32/syscall32.c111
-rw-r--r--arch/x86_64/ia32/tls32.c163
-rw-r--r--arch/x86_64/ia32/vsyscall-sigreturn.S120
-rw-r--r--arch/x86_64/ia32/vsyscall-syscall.S68
-rw-r--r--arch/x86_64/ia32/vsyscall-sysenter.S94
-rw-r--r--arch/x86_64/ia32/vsyscall.lds77
16 files changed, 4722 insertions, 0 deletions
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
new file mode 100644
index 000000000000..a12b19da4b59
--- /dev/null
+++ b/arch/x86_64/ia32/Makefile
@@ -0,0 +1,32 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \
+ ia32_signal.o tls32.o \
+ ia32_binfmt.o fpu32.o ptrace32.o syscall32.o
+
+sysv-$(CONFIG_SYSVIPC) := ipc32.o
+obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
+
+obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
+
+$(obj)/syscall32.o: $(src)/syscall32.c \
+ $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
+
+# Teach kbuild about targets
+targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+
+# The DSO images are built using a special linker script
+quiet_cmd_syscall = SYSCALL $@
+ cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
+ -Wl,-soname=linux-gate.so.1 -o $@ \
+ -Wl,-T,$(filter-out FORCE,$^)
+
+$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+ $(call if_changed,syscall)
+
+AFLAGS_vsyscall-sysenter.o = -m32
+AFLAGS_vsyscall-syscall.o = -m32
+CFLAGS_ia32_ioctl.o += -Ifs/
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
new file mode 100644
index 000000000000..1c23095f1813
--- /dev/null
+++ b/arch/x86_64/ia32/fpu32.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
+ * This is used for ptrace, signals and coredumps in 32bit emulation.
+ * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
+ */
+
+#include <linux/sched.h>
+#include <asm/sigcontext32.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+ return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
+{
+ struct _fpxreg *st = NULL;
+ unsigned long tos = (fxsave->swd >> 11) & 7;
+ unsigned long twd = (unsigned long) fxsave->twd;
+ unsigned long tag;
+ unsigned long ret = 0xffff0000;
+ int i;
+
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
+
+ for (i = 0 ; i < 8 ; i++) {
+ if (twd & 0x1) {
+ st = FPREG_ADDR( fxsave, (i - tos) & 7 );
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = 2; /* Special */
+ break;
+ case 0x0000:
+ if ( !st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3] ) {
+ tag = 1; /* Zero */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ default:
+ if (st->significand[3] & 0x8000) {
+ tag = 0; /* Valid */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ }
+ } else {
+ tag = 3; /* Empty */
+ }
+ ret |= (tag << (2 * i));
+ twd = twd >> 1;
+ }
+ return ret;
+}
+
+
+static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
+ struct _fpstate_ia32 __user *buf)
+{
+ struct _fpxreg *to;
+ struct _fpreg __user *from;
+ int i;
+ u32 v;
+ int err = 0;
+
+#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf)
+ G(0, fxsave->cwd);
+ G(1, fxsave->swd);
+ G(2, fxsave->twd);
+ fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
+ G(3, fxsave->rip);
+ G(4, v);
+ fxsave->fop = v>>16; /* cs ignored */
+ G(5, fxsave->rdp);
+ /* 6: ds ignored */
+#undef G
+ if (err)
+ return -1;
+
+ to = (struct _fpxreg *)&fxsave->st_space[0];
+ from = &buf->_st[0];
+ for (i = 0 ; i < 8 ; i++, to++, from++) {
+ if (__copy_from_user(to, from, sizeof(*from)))
+ return -1;
+ }
+ return 0;
+}
+
+
+static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
+ struct i387_fxsave_struct *fxsave,
+ struct pt_regs *regs,
+ struct task_struct *tsk)
+{
+ struct _fpreg __user *to;
+ struct _fpxreg *from;
+ int i;
+ u16 cs,ds;
+ int err = 0;
+
+ if (tsk == current) {
+ /* should be actually ds/cs at fpu exception time,
+ but that information is not available in 64bit mode. */
+ asm("movw %%ds,%0 " : "=r" (ds));
+ asm("movw %%cs,%0 " : "=r" (cs));
+ } else { /* ptrace. task has stopped. */
+ ds = tsk->thread.ds;
+ cs = regs->cs;
+ }
+
+#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf)
+ P(0, (u32)fxsave->cwd | 0xffff0000);
+ P(1, (u32)fxsave->swd | 0xffff0000);
+ P(2, twd_fxsr_to_i387(fxsave));
+ P(3, (u32)fxsave->rip);
+ P(4, cs | ((u32)fxsave->fop) << 16);
+ P(5, fxsave->rdp);
+ P(6, 0xffff0000 | ds);
+#undef P
+
+ if (err)
+ return -1;
+
+ to = &buf->_st[0];
+ from = (struct _fpxreg *) &fxsave->st_space[0];
+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ if (__copy_to_user(to, from, sizeof(*to)))
+ return -1;
+ }
+ return 0;
+}
+
+int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave)
+{
+ clear_fpu(tsk);
+ if (!fsave) {
+ if (__copy_from_user(&tsk->thread.i387.fxsave,
+ &buf->_fxsr_env[0],
+ sizeof(struct i387_fxsave_struct)))
+ return -1;
+ tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+ set_stopped_child_used_math(tsk);
+ }
+ return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
+}
+
+int save_i387_ia32(struct task_struct *tsk,
+ struct _fpstate_ia32 __user *buf,
+ struct pt_regs *regs,
+ int fsave)
+{
+ int err = 0;
+
+ init_fpu(tsk);
+ if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
+ return -1;
+ if (fsave)
+ return 0;
+ err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
+ if (fsave)
+ return err ? -1 : 1;
+ err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
+ err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+ sizeof(struct i387_fxsave_struct));
+ return err ? -1 : 1;
+}
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
new file mode 100644
index 000000000000..1965efc974dc
--- /dev/null
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -0,0 +1,529 @@
+/*
+ * a.out loader for x86-64
+ *
+ * Copyright (C) 1991, 1992, 1996 Linus Torvalds
+ * Hacked together by Andi Kleen
+ */
+
+#include <linux/module.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/a.out.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/slab.h>
+#include <linux/binfmts.h>
+#include <linux/personality.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/user32.h>
+#include <asm/ia32.h>
+
+#undef WARN_OLD
+#undef CORE_DUMP /* probably broken */
+
+extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
+ unsigned long stack_top, int exec_stack);
+
+static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_aout_library(struct file*);
+
+#if CORE_DUMP
+static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
+
+/*
+ * fill in the user structure for a core dump..
+ */
+static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
+{
+ u32 fs,gs;
+
+/* changed the size calculations - should hopefully work better. lbt */
+ dump->magic = CMAGIC;
+ dump->start_code = 0;
+ dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
+ dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+ dump->u_dsize -= dump->u_tsize;
+ dump->u_ssize = 0;
+ dump->u_debugreg[0] = current->thread.debugreg0;
+ dump->u_debugreg[1] = current->thread.debugreg1;
+ dump->u_debugreg[2] = current->thread.debugreg2;
+ dump->u_debugreg[3] = current->thread.debugreg3;
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+ dump->u_debugreg[7] = current->thread.debugreg7;
+
+ if (dump->start_stack < 0xc0000000)
+ dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
+
+ dump->regs.ebx = regs->rbx;
+ dump->regs.ecx = regs->rcx;
+ dump->regs.edx = regs->rdx;
+ dump->regs.esi = regs->rsi;
+ dump->regs.edi = regs->rdi;
+ dump->regs.ebp = regs->rbp;
+ dump->regs.eax = regs->rax;
+ dump->regs.ds = current->thread.ds;
+ dump->regs.es = current->thread.es;
+ asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
+ asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+ dump->regs.orig_eax = regs->orig_rax;
+ dump->regs.eip = regs->rip;
+ dump->regs.cs = regs->cs;
+ dump->regs.eflags = regs->eflags;
+ dump->regs.esp = regs->rsp;
+ dump->regs.ss = regs->ss;
+
+#if 1 /* FIXME */
+ dump->u_fpvalid = 0;
+#else
+ dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+#endif
+}
+
+#endif
+
+static struct linux_binfmt aout_format = {
+ .module = THIS_MODULE,
+ .load_binary = load_aout_binary,
+ .load_shlib = load_aout_library,
+#if CORE_DUMP
+ .core_dump = aout_core_dump,
+#endif
+ .min_coredump = PAGE_SIZE
+};
+
+static void set_brk(unsigned long start, unsigned long end)
+{
+ start = PAGE_ALIGN(start);
+ end = PAGE_ALIGN(end);
+ if (end <= start)
+ return;
+ down_write(&current->mm->mmap_sem);
+ do_brk(start, end - start);
+ up_write(&current->mm->mmap_sem);
+}
+
+#if CORE_DUMP
+/*
+ * These are the only things you should do on a core-file: use only these
+ * macros to write out all the necessary info.
+ */
+
+static int dump_write(struct file *file, const void *addr, int nr)
+{
+ return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+#define DUMP_WRITE(addr, nr) \
+ if (!dump_write(file, (void *)(addr), (nr))) \
+ goto end_coredump;
+
+#define DUMP_SEEK(offset) \
+if (file->f_op->llseek) { \
+ if (file->f_op->llseek(file,(offset),0) != (offset)) \
+ goto end_coredump; \
+} else file->f_pos = (offset)
+
+/*
+ * Routine writes a core dump image in the current directory.
+ * Currently only a stub-function.
+ *
+ * Note that setuid/setgid files won't make a core-dump if the uid/gid
+ * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
+ * field, which also makes sure the core-dumps won't be recursive if the
+ * dumping of the process results in another error..
+ */
+
+static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file)
+{
+ mm_segment_t fs;
+ int has_dumped = 0;
+ unsigned long dump_start, dump_size;
+ struct user32 dump;
+# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
+# define START_STACK(u) (u.start_stack)
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ has_dumped = 1;
+ current->flags |= PF_DUMPCORE;
+ strncpy(dump.u_comm, current->comm, sizeof(current->comm));
+ dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
+ dump.signal = signr;
+ dump_thread32(regs, &dump);
+
+/* If the size of the dump file exceeds the rlimit, then see what would happen
+ if we wrote the stack, but not the data area. */
+ if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
+ current->signal->rlim[RLIMIT_CORE].rlim_cur)
+ dump.u_dsize = 0;
+
+/* Make sure we have enough room to write the stack and data areas. */
+ if ((dump.u_ssize+1) * PAGE_SIZE >
+ current->signal->rlim[RLIMIT_CORE].rlim_cur)
+ dump.u_ssize = 0;
+
+/* make sure we actually have a data and stack area to dump */
+ set_fs(USER_DS);
+ if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+ dump.u_dsize = 0;
+ if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+ dump.u_ssize = 0;
+
+ set_fs(KERNEL_DS);
+/* struct user */
+ DUMP_WRITE(&dump,sizeof(dump));
+/* Now dump all of the user data. Include malloced stuff as well */
+ DUMP_SEEK(PAGE_SIZE);
+/* now we start writing out the user space info */
+ set_fs(USER_DS);
+/* Dump the data area */
+ if (dump.u_dsize != 0) {
+ dump_start = START_DATA(dump);
+ dump_size = dump.u_dsize << PAGE_SHIFT;
+ DUMP_WRITE(dump_start,dump_size);
+ }
+/* Now prepare to dump the stack area */
+ if (dump.u_ssize != 0) {
+ dump_start = START_STACK(dump);
+ dump_size = dump.u_ssize << PAGE_SHIFT;
+ DUMP_WRITE(dump_start,dump_size);
+ }
+/* Finally dump the task struct. Not be used by gdb, but could be useful */
+ set_fs(KERNEL_DS);
+ DUMP_WRITE(current,sizeof(*current));
+end_coredump:
+ set_fs(fs);
+ return has_dumped;
+}
+#endif
+
+/*
+ * create_aout_tables() parses the env- and arg-strings in new user
+ * memory and creates the pointer tables from them, and puts their
+ * addresses on the "stack", returning the new stack pointer value.
+ */
+static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
+{
+ u32 __user *argv;
+ u32 __user *envp;
+ u32 __user *sp;
+ int argc = bprm->argc;
+ int envc = bprm->envc;
+
+ sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
+ sp -= envc+1;
+ envp = sp;
+ sp -= argc+1;
+ argv = sp;
+ put_user((unsigned long) envp,--sp);
+ put_user((unsigned long) argv,--sp);
+ put_user(argc,--sp);
+ current->mm->arg_start = (unsigned long) p;
+ while (argc-->0) {
+ char c;
+ put_user((u32)(unsigned long)p,argv++);
+ do {
+ get_user(c,p++);
+ } while (c);
+ }
+ put_user(NULL,argv);
+ current->mm->arg_end = current->mm->env_start = (unsigned long) p;
+ while (envc-->0) {
+ char c;
+ put_user((u32)(unsigned long)p,envp++);
+ do {
+ get_user(c,p++);
+ } while (c);
+ }
+ put_user(NULL,envp);
+ current->mm->env_end = (unsigned long) p;
+ return sp;
+}
+
+/*
+ * These are the functions used to load a.out style executables and shared
+ * libraries. There is no binary dependent code anywhere else.
+ */
+
+static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+{
+ struct exec ex;
+ unsigned long error;
+ unsigned long fd_offset;
+ unsigned long rlim;
+ int retval;
+
+ ex = *((struct exec *) bprm->buf); /* exec-header */
+ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
+ N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
+ N_TRSIZE(ex) || N_DRSIZE(ex) ||
+ i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+ return -ENOEXEC;
+ }
+
+ fd_offset = N_TXTOFF(ex);
+
+ /* Check initial limits. This avoids letting people circumvent
+ * size limits imposed on them by creating programs with large
+ * arrays in the data or bss.
+ */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim >= RLIM_INFINITY)
+ rlim = ~0;
+ if (ex.a_data + ex.a_bss > rlim)
+ return -ENOMEM;
+
+ /* Flush all traces of the currently running executable */
+ retval = flush_old_exec(bprm);
+ if (retval)
+ return retval;
+
+ regs->cs = __USER32_CS;
+ regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+ regs->r13 = regs->r14 = regs->r15 = 0;
+
+ /* OK, This is the point of no return */
+ set_personality(PER_LINUX);
+ set_thread_flag(TIF_IA32);
+ clear_thread_flag(TIF_ABI_PENDING);
+
+ current->mm->end_code = ex.a_text +
+ (current->mm->start_code = N_TXTADDR(ex));
+ current->mm->end_data = ex.a_data +
+ (current->mm->start_data = N_DATADDR(ex));
+ current->mm->brk = ex.a_bss +
+ (current->mm->start_brk = N_BSSADDR(ex));
+ current->mm->free_area_cache = TASK_UNMAPPED_BASE;
+
+ set_mm_counter(current->mm, rss, 0);
+ current->mm->mmap = NULL;
+ compute_creds(bprm);
+ current->flags &= ~PF_FORKNOEXEC;
+
+ if (N_MAGIC(ex) == OMAGIC) {
+ unsigned long text_addr, map_size;
+ loff_t pos;
+
+ text_addr = N_TXTADDR(ex);
+
+ pos = 32;
+ map_size = ex.a_text+ex.a_data;
+
+ down_write(&current->mm->mmap_sem);
+ error = do_brk(text_addr & PAGE_MASK, map_size);
+ up_write(&current->mm->mmap_sem);
+
+ if (error != (text_addr & PAGE_MASK)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+
+ error = bprm->file->f_op->read(bprm->file, (char *)text_addr,
+ ex.a_text+ex.a_data, &pos);
+ if ((signed long)error < 0) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+
+ flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
+ } else {
+#ifdef WARN_OLD
+ static unsigned long error_time, error_time2;
+ if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
+ (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ {
+ printk(KERN_NOTICE "executable not page aligned\n");
+ error_time2 = jiffies;
+ }
+
+ if ((fd_offset & ~PAGE_MASK) != 0 &&
+ (jiffies-error_time) > 5*HZ)
+ {
+ printk(KERN_WARNING
+ "fd_offset is not page aligned. Please convert program: %s\n",
+ bprm->file->f_dentry->d_name.name);
+ error_time = jiffies;
+ }
+#endif
+
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
+ down_write(&current->mm->mmap_sem);
+ do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
+ up_write(&current->mm->mmap_sem);
+ bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
+ flush_icache_range((unsigned long) N_TXTADDR(ex),
+ (unsigned long) N_TXTADDR(ex) +
+ ex.a_text+ex.a_data);
+ goto beyond_if;
+ }
+
+ down_write(&current->mm->mmap_sem);
+ error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+ PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
+ fd_offset);
+ up_write(&current->mm->mmap_sem);
+
+ if (error != N_TXTADDR(ex)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+
+ down_write(&current->mm->mmap_sem);
+ error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
+ fd_offset + ex.a_text);
+ up_write(&current->mm->mmap_sem);
+ if (error != N_DATADDR(ex)) {
+ send_sig(SIGKILL, current, 0);
+ return error;
+ }
+ }
+beyond_if:
+ set_binfmt(&aout_format);
+
+ set_brk(current->mm->start_brk, current->mm->brk);
+
+ retval = ia32_setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
+ return retval;
+ }
+
+ current->mm->start_stack =
+ (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
+ /* start thread */
+ asm volatile("movl %0,%%fs" :: "r" (0)); \
+ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+ load_gs_index(0);
+ (regs)->rip = ex.a_entry;
+ (regs)->rsp = current->mm->start_stack;
+ (regs)->eflags = 0x200;
+ (regs)->cs = __USER32_CS;
+ (regs)->ss = __USER32_DS;
+ set_fs(USER_DS);
+ if (unlikely(current->ptrace & PT_PTRACED)) {
+ if (current->ptrace & PT_TRACE_EXEC)
+ ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+ else
+ send_sig(SIGTRAP, current, 0);
+ }
+ return 0;
+}
+
+static int load_aout_library(struct file *file)
+{
+ struct inode * inode;
+ unsigned long bss, start_addr, len;
+ unsigned long error;
+ int retval;
+ struct exec ex;
+
+ inode = file->f_dentry->d_inode;
+
+ retval = -ENOEXEC;
+ error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
+ if (error != sizeof(ex))
+ goto out;
+
+ /* We come in here for the regular a.out style of shared libraries */
+ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
+ N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
+ i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+ goto out;
+ }
+
+ if (N_FLAGS(ex))
+ goto out;
+
+ /* For QMAGIC, the starting address is 0x20 into the page. We mask
+ this off to get the starting address for the page */
+
+ start_addr = ex.a_entry & 0xfffff000;
+
+ if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
+ loff_t pos = N_TXTOFF(ex);
+
+#ifdef WARN_OLD
+ static unsigned long error_time;
+ if ((jiffies-error_time) > 5*HZ)
+ {
+ printk(KERN_WARNING
+ "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
+ }
+#endif
+ down_write(&current->mm->mmap_sem);
+ do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
+ up_write(&current->mm->mmap_sem);
+
+ file->f_op->read(file, (char *)start_addr,
+ ex.a_text + ex.a_data, &pos);
+ flush_icache_range((unsigned long) start_addr,
+ (unsigned long) start_addr + ex.a_text + ex.a_data);
+
+ retval = 0;
+ goto out;
+ }
+ /* Now use mmap to map the library into memory. */
+ down_write(&current->mm->mmap_sem);
+ error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
+ N_TXTOFF(ex));
+ up_write(&current->mm->mmap_sem);
+ retval = error;
+ if (error != start_addr)
+ goto out;
+
+ len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ bss = ex.a_text + ex.a_data + ex.a_bss;
+ if (bss > len) {
+ down_write(&current->mm->mmap_sem);
+ error = do_brk(start_addr + len, bss - len);
+ up_write(&current->mm->mmap_sem);
+ retval = error;
+ if (error != start_addr + len)
+ goto out;
+ }
+ retval = 0;
+out:
+ return retval;
+}
+
+static int __init init_aout_binfmt(void)
+{
+ return register_binfmt(&aout_format);
+}
+
+static void __exit exit_aout_binfmt(void)
+{
+ unregister_binfmt(&aout_format);
+}
+
+module_init(init_aout_binfmt);
+module_exit(exit_aout_binfmt);
+MODULE_LICENSE("GPL");
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
new file mode 100644
index 000000000000..93d568dfa762
--- /dev/null
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -0,0 +1,434 @@
+/*
+ * Written 2000,2002 by Andi Kleen.
+ *
+ * Loosely based on the sparc64 and IA64 32bit emulation loaders.
+ * This tricks binfmt_elf.c into loading 32bit binaries using lots
+ * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
+ */
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/binfmts.h>
+#include <linux/mm.h>
+#include <linux/security.h>
+
+#include <asm/segment.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/user32.h>
+#include <asm/sigcontext32.h>
+#include <asm/fpu32.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/ia32.h>
+#include <asm/vsyscall32.h>
+
+#define ELF_NAME "elf/i386"
+
+#define AT_SYSINFO 32
+#define AT_SYSINFO_EHDR 33
+
+int sysctl_vsyscall32 = 1;
+
+#define ARCH_DLINFO do { \
+ if (sysctl_vsyscall32) { \
+ NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \
+ } \
+} while(0)
+
+struct file;
+struct elf_phdr;
+
+#define IA32_EMULATOR 1
+
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_32 + 0x1000000)
+
+#undef ELF_ARCH
+#define ELF_ARCH EM_386
+
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+
+#define ELF_DATA ELFDATA2LSB
+
+#define USE_ELF_CORE_DUMP 1
+
+/* Overwrite elfcore.h */
+#define _LINUX_ELFCORE_H 1
+typedef unsigned int elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+/*
+ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the vsyscall DSO contents. Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the vsyscall DSO was being used.
+ */
+#define ELF_CORE_EXTRA_PHDRS (VSYSCALL32_EHDR->e_phnum)
+#define ELF_CORE_WRITE_EXTRA_PHDRS \
+do { \
+ const struct elf32_phdr *const vsyscall_phdrs = \
+ (const struct elf32_phdr *) (VSYSCALL32_BASE \
+ + VSYSCALL32_EHDR->e_phoff); \
+ int i; \
+ Elf32_Off ofs = 0; \
+ for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \
+ struct elf32_phdr phdr = vsyscall_phdrs[i]; \
+ if (phdr.p_type == PT_LOAD) { \
+ BUG_ON(ofs != 0); \
+ ofs = phdr.p_offset = offset; \
+ phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \
+ phdr.p_filesz = phdr.p_memsz; \
+ offset += phdr.p_filesz; \
+ } \
+ else \
+ phdr.p_offset += ofs; \
+ phdr.p_paddr = 0; /* match other core phdrs */ \
+ DUMP_WRITE(&phdr, sizeof(phdr)); \
+ } \
+} while (0)
+#define ELF_CORE_WRITE_EXTRA_DATA \
+do { \
+ const struct elf32_phdr *const vsyscall_phdrs = \
+ (const struct elf32_phdr *) (VSYSCALL32_BASE \
+ + VSYSCALL32_EHDR->e_phoff); \
+ int i; \
+ for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \
+ if (vsyscall_phdrs[i].p_type == PT_LOAD) \
+ DUMP_WRITE((void *) (u64) vsyscall_phdrs[i].p_vaddr, \
+ PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
+ } \
+} while (0)
+
+struct elf_siginfo
+{
+ int si_signo; /* signal number */
+ int si_code; /* extra code */
+ int si_errno; /* errno */
+};
+
+#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+
+struct elf_prstatus
+{
+ struct elf_siginfo pr_info; /* Info associated with signal */
+ short pr_cursig; /* Current signal */
+ unsigned int pr_sigpend; /* Set of pending signals */
+ unsigned int pr_sighold; /* Set of held signals */
+ pid_t pr_pid;
+ pid_t pr_ppid;
+ pid_t pr_pgrp;
+ pid_t pr_sid;
+ struct compat_timeval pr_utime; /* User time */
+ struct compat_timeval pr_stime; /* System time */
+ struct compat_timeval pr_cutime; /* Cumulative user time */
+ struct compat_timeval pr_cstime; /* Cumulative system time */
+ elf_gregset_t pr_reg; /* GP registers */
+ int pr_fpvalid; /* True if math co-processor being used. */
+};
+
+#define ELF_PRARGSZ (80) /* Number of chars for args */
+
+struct elf_prpsinfo
+{
+ char pr_state; /* numeric process state */
+ char pr_sname; /* char for pr_state */
+ char pr_zomb; /* zombie */
+ char pr_nice; /* nice val */
+ unsigned int pr_flag; /* flags */
+ __u16 pr_uid;
+ __u16 pr_gid;
+ pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
+ /* Lots missing */
+ char pr_fname[16]; /* filename of executable */
+ char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
+};
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define _GET_SEG(x) \
+ ({ __u32 seg; asm("movl %%" STR(x) ",%0" : "=r"(seg)); seg; })
+
+/* Assumes current==process to be dumped */
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
+ pr_reg[0] = regs->rbx; \
+ pr_reg[1] = regs->rcx; \
+ pr_reg[2] = regs->rdx; \
+ pr_reg[3] = regs->rsi; \
+ pr_reg[4] = regs->rdi; \
+ pr_reg[5] = regs->rbp; \
+ pr_reg[6] = regs->rax; \
+ pr_reg[7] = _GET_SEG(ds); \
+ pr_reg[8] = _GET_SEG(es); \
+ pr_reg[9] = _GET_SEG(fs); \
+ pr_reg[10] = _GET_SEG(gs); \
+ pr_reg[11] = regs->orig_rax; \
+ pr_reg[12] = regs->rip; \
+ pr_reg[13] = regs->cs; \
+ pr_reg[14] = regs->eflags; \
+ pr_reg[15] = regs->rsp; \
+ pr_reg[16] = regs->ss;
+
+#define user user32
+
+#define __ASM_X86_64_ELF_H 1
+#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
+//#include <asm/ia32.h>
+#include <linux/elf.h>
+
+typedef struct user_i387_ia32_struct elf_fpregset_t;
+typedef struct user32_fxsr_struct elf_fpxregset_t;
+
+
+static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+{
+ ELF_CORE_COPY_REGS((*elfregs), regs)
+}
+
+static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
+{
+ struct pt_regs *pp = (struct pt_regs *)(t->thread.rsp0);
+ --pp;
+ ELF_CORE_COPY_REGS((*elfregs), pp);
+ /* fix wrong segments */
+ (*elfregs)[7] = t->thread.ds;
+ (*elfregs)[9] = t->thread.fsindex;
+ (*elfregs)[10] = t->thread.gsindex;
+ (*elfregs)[8] = t->thread.es;
+ return 1;
+}
+
+static inline int
+elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+ struct _fpstate_ia32 *fpstate = (void*)fpu;
+ mm_segment_t oldfs = get_fs();
+
+ if (!tsk_used_math(tsk))
+ return 0;
+ if (!regs)
+ regs = (struct pt_regs *)tsk->thread.rsp0;
+ --regs;
+ if (tsk == current)
+ unlazy_fpu(tsk);
+ set_fs(KERNEL_DS);
+ save_i387_ia32(tsk, fpstate, regs, 1);
+ /* Correct for i386 bug. It puts the fop into the upper 16bits of
+ the tag word (like FXSAVE), not into the fcs*/
+ fpstate->cssel |= fpstate->tag & 0xffff0000;
+ set_fs(oldfs);
+ return 1;
+}
+
+#define ELF_CORE_COPY_XFPREGS 1
+static inline int
+elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
+{
+ struct pt_regs *regs = ((struct pt_regs *)(t->thread.rsp0))-1;
+ if (!tsk_used_math(t))
+ return 0;
+ if (t == current)
+ unlazy_fpu(t);
+ memcpy(xfpu, &t->thread.i387.fxsave, sizeof(elf_fpxregset_t));
+ xfpu->fcs = regs->cs;
+ xfpu->fos = t->thread.ds; /* right? */
+ return 1;
+}
+
+#undef elf_check_arch
+#define elf_check_arch(x) \
+ ((x)->e_machine == EM_386)
+
+extern int force_personality32;
+
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
+#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
+#define ELF_PLATFORM ("i686")
+#define SET_PERSONALITY(ex, ibcs2) \
+do { \
+ unsigned long new_flags = 0; \
+ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
+ new_flags = _TIF_IA32; \
+ if ((current_thread_info()->flags & _TIF_IA32) \
+ != new_flags) \
+ set_thread_flag(TIF_ABI_PENDING); \
+ else \
+ clear_thread_flag(TIF_ABI_PENDING); \
+ /* XXX This overwrites the user set personality */ \
+ current->personality |= force_personality32; \
+} while (0)
+
+/* Override some function names */
+#define elf_format elf32_format
+
+#define init_elf_binfmt init_elf32_binfmt
+#define exit_elf_binfmt exit_elf32_binfmt
+
+#define load_elf_binary load_elf32_binary
+
+#define ELF_PLAT_INIT(r, load_addr) elf32_init(r)
+#define setup_arg_pages(bprm, stack_top, exec_stack) \
+ ia32_setup_arg_pages(bprm, stack_top, exec_stack)
+int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack);
+
+#undef start_thread
+#define start_thread(regs,new_rip,new_rsp) do { \
+ asm volatile("movl %0,%%fs" :: "r" (0)); \
+ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
+ load_gs_index(0); \
+ (regs)->rip = (new_rip); \
+ (regs)->rsp = (new_rsp); \
+ (regs)->eflags = 0x200; \
+ (regs)->cs = __USER32_CS; \
+ (regs)->ss = __USER32_DS; \
+ set_fs(USER_DS); \
+} while(0)
+
+
+#define elf_map elf32_map
+
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
+MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
+
+#undef MODULE_DESCRIPTION
+#undef MODULE_AUTHOR
+
+#define elf_addr_t __u32
+
+#undef TASK_SIZE
+#define TASK_SIZE 0xffffffff
+
+static void elf32_init(struct pt_regs *);
+
+#include "../../../fs/binfmt_elf.c"
+
+static void elf32_init(struct pt_regs *regs)
+{
+ struct task_struct *me = current;
+ regs->rdi = 0;
+ regs->rsi = 0;
+ regs->rdx = 0;
+ regs->rcx = 0;
+ regs->rax = 0;
+ regs->rbx = 0;
+ regs->rbp = 0;
+ regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+ regs->r13 = regs->r14 = regs->r15 = 0;
+ me->thread.fs = 0;
+ me->thread.gs = 0;
+ me->thread.fsindex = 0;
+ me->thread.gsindex = 0;
+ me->thread.ds = __USER_DS;
+ me->thread.es = __USER_DS;
+}
+
+int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack)
+{
+ unsigned long stack_base;
+ struct vm_area_struct *mpnt;
+ struct mm_struct *mm = current->mm;
+ int i, ret;
+
+ stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE;
+ mm->arg_start = bprm->p + stack_base;
+
+ bprm->p += stack_base;
+ if (bprm->loader)
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+ return -ENOMEM;
+
+ if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+ kmem_cache_free(vm_area_cachep, mpnt);
+ return -ENOMEM;
+ }
+
+ memset(mpnt, 0, sizeof(*mpnt));
+
+ down_write(&mm->mmap_sem);
+ {
+ mpnt->vm_mm = mm;
+ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+ mpnt->vm_end = IA32_STACK_TOP;
+ if (executable_stack == EXSTACK_ENABLE_X)
+ mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
+ else if (executable_stack == EXSTACK_DISABLE_X)
+ mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
+ else
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
+ PAGE_COPY_EXEC : PAGE_COPY;
+ if ((ret = insert_vm_struct(mm, mpnt))) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, mpnt);
+ return ret;
+ }
+ mm->stack_vm = mm->total_vm = vma_pages(mpnt);
+ }
+
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+ struct page *page = bprm->page[i];
+ if (page) {
+ bprm->page[i] = NULL;
+ install_arg_page(mpnt, page, stack_base);
+ }
+ stack_base += PAGE_SIZE;
+ }
+ up_write(&mm->mmap_sem);
+
+ return 0;
+}
+
+static unsigned long
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+{
+ unsigned long map_addr;
+ struct task_struct *me = current;
+
+ down_write(&me->mm->mmap_sem);
+ map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+ eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot,
+ type,
+ eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
+ up_write(&me->mm->mmap_sem);
+ return(map_addr);
+}
+
+#ifdef CONFIG_SYSCTL
+/* Register vsyscall32 into the ABI table */
+#include <linux/sysctl.h>
+
+static ctl_table abi_table2[] = {
+ { 99, "vsyscall32", &sysctl_vsyscall32, sizeof(int), 0644, NULL,
+ proc_dointvec },
+ { 0, }
+};
+
+static ctl_table abi_root_table2[] = {
+ { .ctl_name = CTL_ABI, .procname = "abi", .mode = 0555,
+ .child = abi_table2 },
+ { 0 },
+};
+
+static __init int ia32_binfmt_init(void)
+{
+ register_sysctl_table(abi_root_table2, 1);
+ return 0;
+}
+__initcall(ia32_binfmt_init);
+#endif
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
new file mode 100644
index 000000000000..d259f8a6f811
--- /dev/null
+++ b/arch/x86_64/ia32/ia32_ioctl.c
@@ -0,0 +1,201 @@
+/* $Id: ia32_ioctl.c,v 1.25 2002/10/11 07:17:06 ak Exp $
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ */
+
+#define INCLUDES
+#include <linux/syscalls.h>
+#include "compat_ioctl.c"
+#include <asm/mtrr.h>
+#include <asm/ia32.h>
+
+#define CODE
+#include "compat_ioctl.c"
+
+#ifndef TIOCGDEV
+#define TIOCGDEV _IOR('T',0x32, unsigned int)
+#endif
+static int tiocgdev(unsigned fd, unsigned cmd, unsigned int __user *ptr)
+{
+
+ struct file *file = fget(fd);
+ struct tty_struct *real_tty;
+
+ if (!file)
+ return -EBADF;
+ if (file->f_op->ioctl != tty_ioctl)
+ return -EINVAL;
+ real_tty = (struct tty_struct *)file->private_data;
+ if (!real_tty)
+ return -EINVAL;
+ return put_user(new_encode_dev(tty_devnum(real_tty)), ptr);
+}
+
+#define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */
+#define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */
+#define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */
+#define RTC_EPOCH_SET32 _IOW('p', 0x0e, unsigned) /* Set epoch */
+
+static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
+{
+ unsigned long val;
+ mm_segment_t oldfs = get_fs();
+ int ret;
+
+ switch (cmd) {
+ case RTC_IRQP_READ32:
+ set_fs(KERNEL_DS);
+ ret = sys_ioctl(fd, RTC_IRQP_READ, (unsigned long)&val);
+ set_fs(oldfs);
+ if (!ret)
+ ret = put_user(val, (unsigned int __user *) arg);
+ return ret;
+
+ case RTC_IRQP_SET32:
+ cmd = RTC_IRQP_SET;
+ break;
+
+ case RTC_EPOCH_READ32:
+ set_fs(KERNEL_DS);
+ ret = sys_ioctl(fd, RTC_EPOCH_READ, (unsigned long) &val);
+ set_fs(oldfs);
+ if (!ret)
+ ret = put_user(val, (unsigned int __user *) arg);
+ return ret;
+
+ case RTC_EPOCH_SET32:
+ cmd = RTC_EPOCH_SET;
+ break;
+ }
+ return sys_ioctl(fd,cmd,arg);
+}
+
+/* /proc/mtrr ioctls */
+
+
+struct mtrr_sentry32
+{
+ compat_ulong_t base; /* Base address */
+ compat_uint_t size; /* Size of region */
+ compat_uint_t type; /* Type of region */
+};
+
+struct mtrr_gentry32
+{
+ compat_ulong_t regnum; /* Register number */
+ compat_uint_t base; /* Base address */
+ compat_uint_t size; /* Size of region */
+ compat_uint_t type; /* Type of region */
+};
+
+#define MTRR_IOCTL_BASE 'M'
+
+#define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32)
+#define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32)
+#define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32)
+#define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32)
+#define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32)
+#define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32)
+#define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32)
+#define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32)
+#define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32)
+#define MTRRIOC32_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
+
+
+static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ struct mtrr_gentry g;
+ struct mtrr_sentry s;
+ int get = 0, err = 0;
+ struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)arg;
+ mm_segment_t oldfs = get_fs();
+
+ switch (cmd) {
+#define SET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; break
+#define GET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; get=1; break
+ SET(ADD);
+ SET(SET);
+ SET(DEL);
+ GET(GET);
+ SET(KILL);
+ SET(ADD_PAGE);
+ SET(SET_PAGE);
+ SET(DEL_PAGE);
+ GET(GET_PAGE);
+ SET(KILL_PAGE);
+ }
+
+ if (get) {
+ err = get_user(g.regnum, &g32->regnum);
+ err |= get_user(g.base, &g32->base);
+ err |= get_user(g.size, &g32->size);
+ err |= get_user(g.type, &g32->type);
+
+ arg = (unsigned long)&g;
+ } else {
+ struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)arg;
+ err = get_user(s.base, &s32->base);
+ err |= get_user(s.size, &s32->size);
+ err |= get_user(s.type, &s32->type);
+
+ arg = (unsigned long)&s;
+ }
+ if (err) return err;
+
+ set_fs(KERNEL_DS);
+ err = sys_ioctl(fd, cmd, arg);
+ set_fs(oldfs);
+
+ if (!err && get) {
+ err = put_user(g.base, &g32->base);
+ err |= put_user(g.size, &g32->size);
+ err |= put_user(g.regnum, &g32->regnum);
+ err |= put_user(g.type, &g32->type);
+ }
+ return err;
+}
+
+#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) },
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+
+struct ioctl_trans ioctl_start[] = {
+#include <linux/compat_ioctl.h>
+#define DECLARES
+#include "compat_ioctl.c"
+COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS)
+COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
+COMPATIBLE_IOCTL(BLKRASET)
+COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
+COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
+COMPATIBLE_IOCTL(FIOQSIZE)
+
+/* And these ioctls need translation */
+HANDLE_IOCTL(TIOCGDEV, tiocgdev)
+/* realtime device */
+HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl)
+HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl)
+HANDLE_IOCTL(RTC_IRQP_SET32, rtc32_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl)
+HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl)
+/* take care of sizeof(sizeof()) breakage */
+/* mtrr */
+HANDLE_IOCTL(MTRRIOC32_ADD_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_SET_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_DEL_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_GET_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_KILL_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_ADD_PAGE_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_SET_PAGE_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_DEL_PAGE_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_GET_PAGE_ENTRY, mtrr_ioctl32)
+HANDLE_IOCTL(MTRRIOC32_KILL_PAGE_ENTRY, mtrr_ioctl32)
+};
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
+
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
new file mode 100644
index 000000000000..fbd09b5126ce
--- /dev/null
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -0,0 +1,621 @@
+/*
+ * linux/arch/x86_64/ia32/ia32_signal.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
+ * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen
+ *
+ * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include <asm/ia32.h>
+#include <asm/ptrace.h>
+#include <asm/ia32_unistd.h>
+#include <asm/user32.h>
+#include <asm/sigcontext32.h>
+#include <asm/fpu32.h>
+#include <asm/proto.h>
+#include <asm/vsyscall32.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+{
+ int err;
+ if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please make sure that
+ this code is fixed accordingly.
+ It should never copy any pad contained in the structure
+ to avoid security leaks, but must copy the generic
+ 3 ints plus the relevant union member. */
+ err = __put_user(from->si_signo, &to->si_signo);
+ err |= __put_user(from->si_errno, &to->si_errno);
+ err |= __put_user((short)from->si_code, &to->si_code);
+
+ if (from->si_code < 0) {
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+ } else {
+ /* First 32bits of unions are always present:
+ * si_pid === si_band === si_tid === si_addr(LS half) */
+ err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]);
+ switch (from->si_code >> 16) {
+ case __SI_FAULT >> 16:
+ break;
+ case __SI_CHLD >> 16:
+ err |= __put_user(from->si_utime, &to->si_utime);
+ err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __put_user(from->si_status, &to->si_status);
+ /* FALL THROUGH */
+ default:
+ case __SI_KILL >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ break;
+ case __SI_POLL >> 16:
+ err |= __put_user(from->si_fd, &to->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(from->si_overrun, &to->si_overrun);
+ err |= __put_user(ptr_to_compat(from->si_ptr),
+ &to->si_ptr);
+ break;
+ case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_int, &to->si_int);
+ break;
+ }
+ }
+ return err;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+{
+ int err;
+ u32 ptr32;
+ if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ return -EFAULT;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+ err |= __get_user(to->si_errno, &from->si_errno);
+ err |= __get_user(to->si_code, &from->si_code);
+
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ err |= __get_user(ptr32, &from->si_ptr);
+ to->si_ptr = compat_ptr(ptr32);
+
+ return err;
+}
+
+asmlinkage long
+sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
+ struct pt_regs *regs)
+{
+ sigset_t saveset;
+
+ mask &= _BLOCKABLE;
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
+ siginitset(&current->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ regs->rax = -EINTR;
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (do_signal(regs, &saveset))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
+ stack_ia32_t __user *uoss_ptr,
+ struct pt_regs *regs)
+{
+ stack_t uss,uoss;
+ int ret;
+ mm_segment_t seg;
+ if (uss_ptr) {
+ u32 ptr;
+ memset(&uss,0,sizeof(stack_t));
+ if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
+ __get_user(ptr, &uss_ptr->ss_sp) ||
+ __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
+ __get_user(uss.ss_size, &uss_ptr->ss_size))
+ return -EFAULT;
+ uss.ss_sp = compat_ptr(ptr);
+ }
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+ ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
+ set_fs(seg);
+ if (ret >= 0 && uoss_ptr) {
+ if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) ||
+ __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+ __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+ __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+ ret = -EFAULT;
+ }
+ return ret;
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct sigframe
+{
+ u32 pretcode;
+ int sig;
+ struct sigcontext_ia32 sc;
+ struct _fpstate_ia32 fpstate;
+ unsigned int extramask[_COMPAT_NSIG_WORDS-1];
+ char retcode[8];
+};
+
+struct rt_sigframe
+{
+ u32 pretcode;
+ int sig;
+ u32 pinfo;
+ u32 puc;
+ compat_siginfo_t info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ char retcode[8];
+};
+
+static int
+ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax)
+{
+ unsigned int err = 0;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#if DEBUG_SIG
+ printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+ sc, sc->err, sc->eip, sc->cs, sc->eflags);
+#endif
+#define COPY(x) { \
+ unsigned int reg; \
+ err |= __get_user(reg, &sc->e ##x); \
+ regs->r ## x = reg; \
+}
+
+#define RELOAD_SEG(seg,mask) \
+ { unsigned int cur; \
+ unsigned short pre; \
+ err |= __get_user(pre, &sc->seg); \
+ asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \
+ pre |= mask; \
+ if (pre != cur) loadsegment(seg,pre); }
+
+ /* Reload fs and gs if they have changed in the signal handler.
+ This does not handle long fs/gs base changes in the handler, but
+ does not clobber them at least in the normal case. */
+
+ {
+ unsigned gs, oldgs;
+ err |= __get_user(gs, &sc->gs);
+ gs |= 3;
+ asm("movl %%gs,%0" : "=r" (oldgs));
+ if (gs != oldgs)
+ load_gs_index(gs);
+ }
+ RELOAD_SEG(fs,3);
+ RELOAD_SEG(ds,3);
+ RELOAD_SEG(es,3);
+
+ COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+ COPY(dx); COPY(cx); COPY(ip);
+ /* Don't touch extended registers */
+
+ err |= __get_user(regs->cs, &sc->cs);
+ regs->cs |= 3;
+ err |= __get_user(regs->ss, &sc->ss);
+ regs->ss |= 3;
+
+ {
+ unsigned int tmpflags;
+ err |= __get_user(tmpflags, &sc->eflags);
+ regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+ regs->orig_rax = -1; /* disable syscall checks */
+ }
+
+ {
+ u32 tmp;
+ struct _fpstate_ia32 __user * buf;
+ err |= __get_user(tmp, &sc->fpstate);
+ buf = compat_ptr(tmp);
+ if (buf) {
+ if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387_ia32(current, buf, 0);
+ } else {
+ struct task_struct *me = current;
+ if (used_math()) {
+ clear_fpu(me);
+ clear_used_math();
+ }
+ }
+ }
+
+ {
+ u32 tmp;
+ err |= __get_user(tmp, &sc->eax);
+ *peax = tmp;
+ }
+ return err;
+
+badframe:
+ return 1;
+}
+
+asmlinkage long sys32_sigreturn(struct pt_regs *regs)
+{
+ struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8);
+ sigset_t set;
+ unsigned int eax;
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__get_user(set.sig[0], &frame->sc.oldmask)
+ || (_COMPAT_NSIG_WORDS > 1
+ && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
+ sizeof(frame->extramask))))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
+ goto badframe;
+ return eax;
+
+badframe:
+ signal_fault(regs, frame, "32bit sigreturn");
+ return 0;
+}
+
+asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ sigset_t set;
+ unsigned int eax;
+ struct pt_regs tregs;
+
+ frame = (struct rt_sigframe __user *)(regs->rsp - 4);
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+ goto badframe;
+
+ tregs = *regs;
+ if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+ goto badframe;
+
+ return eax;
+
+badframe:
+ signal_fault(regs,frame,"32bit rt sigreturn");
+ return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
+ struct pt_regs *regs, unsigned int mask)
+{
+ int tmp, err = 0;
+ u32 eflags;
+
+ tmp = 0;
+ __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+ err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+ __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+ err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+ __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+ err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
+ __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+ err |= __put_user(tmp, (unsigned int __user *)&sc->es);
+
+ err |= __put_user((u32)regs->rdi, &sc->edi);
+ err |= __put_user((u32)regs->rsi, &sc->esi);
+ err |= __put_user((u32)regs->rbp, &sc->ebp);
+ err |= __put_user((u32)regs->rsp, &sc->esp);
+ err |= __put_user((u32)regs->rbx, &sc->ebx);
+ err |= __put_user((u32)regs->rdx, &sc->edx);
+ err |= __put_user((u32)regs->rcx, &sc->ecx);
+ err |= __put_user((u32)regs->rax, &sc->eax);
+ err |= __put_user((u32)regs->cs, &sc->cs);
+ err |= __put_user((u32)regs->ss, &sc->ss);
+ err |= __put_user(current->thread.trap_no, &sc->trapno);
+ err |= __put_user(current->thread.error_code, &sc->err);
+ err |= __put_user((u32)regs->rip, &sc->eip);
+ eflags = regs->eflags;
+ if (current->ptrace & PT_PTRACED)
+ eflags &= ~TF_MASK;
+ err |= __put_user((u32)eflags, &sc->eflags);
+ err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
+
+ tmp = save_i387_ia32(current, fpstate, regs, 0);
+ if (tmp < 0)
+ err = -EFAULT;
+ else {
+ clear_used_math();
+ stts();
+ err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
+ &sc->fpstate);
+ }
+
+ /* non-iBCS2 extensions.. */
+ err |= __put_user(mask, &sc->oldmask);
+ err |= __put_user(current->thread.cr2, &sc->cr2);
+
+ return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long rsp;
+
+ /* Default to using normal stack */
+ rsp = regs->rsp;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(rsp) == 0)
+ rsp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ /* This is the legacy signal stack switching. */
+ else if ((regs->ss & 0xffff) != __USER_DS &&
+ !(ka->sa.sa_flags & SA_RESTORER) &&
+ ka->sa.sa_restorer) {
+ rsp = (unsigned long) ka->sa.sa_restorer;
+ }
+
+ return (void __user *)((rsp - frame_size) & -8UL);
+}
+
+void ia32_setup_frame(int sig, struct k_sigaction *ka,
+ compat_sigset_t *set, struct pt_regs * regs)
+{
+ struct sigframe __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ {
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ err |= __put_user((ed
+ && ed->signal_invmap
+ && sig < 32
+ ? ed->signal_invmap[sig]
+ : sig),
+ &frame->sig);
+ }
+ if (err)
+ goto give_sigsegv;
+
+ err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs,
+ set->sig[0]);
+ if (err)
+ goto give_sigsegv;
+
+ if (_COMPAT_NSIG_WORDS > 1) {
+ err |= __copy_to_user(frame->extramask, &set->sig[1],
+ sizeof(frame->extramask));
+ }
+ if (err)
+ goto give_sigsegv;
+
+ /* Return stub is in 32bit vsyscall page */
+ {
+ void __user *restorer = VSYSCALL32_SIGRETURN;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ka->sa.sa_restorer;
+ err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+ }
+ /* These are actually not used anymore, but left because some
+ gdb versions depend on them as a marker. */
+ {
+ /* copy_to_user optimizes that into a single 8 byte store */
+ static const struct {
+ u16 poplmovl;
+ u32 val;
+ u16 int80;
+ u16 pad;
+ } __attribute__((packed)) code = {
+ 0xb858, /* popl %eax ; movl $...,%eax */
+ __NR_ia32_sigreturn,
+ 0x80cd, /* int $0x80 */
+ 0,
+ };
+ err |= __copy_to_user(frame->retcode, &code, 8);
+ }
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->rsp = (unsigned long) frame;
+ regs->rip = (unsigned long) ka->sa.sa_handler;
+
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+
+ regs->cs = __USER32_CS;
+ regs->ss = __USER32_DS;
+
+ set_fs(USER_DS);
+ if (regs->eflags & TF_MASK) {
+ if (current->ptrace & PT_PTRACED) {
+ ptrace_notify(SIGTRAP);
+ } else {
+ regs->eflags &= ~TF_MASK;
+ }
+ }
+
+#if DEBUG_SIG
+ printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+ return;
+
+give_sigsegv:
+ force_sigsegv(sig, current);
+}
+
+void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ compat_sigset_t *set, struct pt_regs * regs)
+{
+ struct rt_sigframe __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ {
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ err |= __put_user((ed
+ && ed->signal_invmap
+ && sig < 32
+ ? ed->signal_invmap[sig]
+ : sig),
+ &frame->sig);
+ }
+ err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
+ err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
+ err |= copy_siginfo_to_user32(&frame->info, info);
+ if (err)
+ goto give_sigsegv;
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->rsp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
+
+
+ {
+ void __user *restorer = VSYSCALL32_RTSIGRETURN;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ka->sa.sa_restorer;
+ err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+ }
+
+ /* This is movl $,%eax ; int $0x80 */
+ /* Not actually used anymore, but left because some gdb versions
+ need it. */
+ {
+ /* __copy_to_user optimizes that into a single 8 byte store */
+ static const struct {
+ u8 movl;
+ u32 val;
+ u16 int80;
+ u16 pad;
+ u8 pad2;
+ } __attribute__((packed)) code = {
+ 0xb8,
+ __NR_ia32_rt_sigreturn,
+ 0x80cd,
+ 0,
+ };
+ err |= __copy_to_user(frame->retcode, &code, 8);
+ }
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->rsp = (unsigned long) frame;
+ regs->rip = (unsigned long) ka->sa.sa_handler;
+
+ asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+ asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+
+ regs->cs = __USER32_CS;
+ regs->ss = __USER32_DS;
+
+ set_fs(USER_DS);
+ if (regs->eflags & TF_MASK) {
+ if (current->ptrace & PT_PTRACED) {
+ ptrace_notify(SIGTRAP);
+ } else {
+ regs->eflags &= ~TF_MASK;
+ }
+ }
+
+#if DEBUG_SIG
+ printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+ return;
+
+give_sigsegv:
+ force_sigsegv(sig, current);
+}
+
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
new file mode 100644
index 000000000000..f3ca0db85b5b
--- /dev/null
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -0,0 +1,602 @@
+/*
+ * Compatibility mode system call entry point for x86-64.
+ *
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */
+
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/current.h>
+#include <asm/errno.h>
+#include <asm/ia32_unistd.h>
+#include <asm/thread_info.h>
+#include <asm/segment.h>
+#include <asm/vsyscall32.h>
+#include <linux/linkage.h>
+
+ .macro IA32_ARG_FIXUP noebp=0
+ movl %edi,%r8d
+ .if \noebp
+ .else
+ movl %ebp,%r9d
+ .endif
+ xchg %ecx,%esi
+ movl %ebx,%edi
+ movl %edx,%edx /* zero extension */
+ .endm
+
+ /* clobbers %eax */
+ .macro CLEAR_RREGS
+ xorl %eax,%eax
+ movq %rax,R11(%rsp)
+ movq %rax,R10(%rsp)
+ movq %rax,R9(%rsp)
+ movq %rax,R8(%rsp)
+ .endm
+
+/*
+ * 32bit SYSENTER instruction entry.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp user stack
+ * 0(%ebp) Arg6
+ *
+ * Interrupts off.
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(ia32_sysenter_target)
+ CFI_STARTPROC
+ swapgs
+ movq %gs:pda_kernelstack, %rsp
+ addq $(PDA_STACKOFFSET),%rsp
+ sti
+ movl %ebp,%ebp /* zero extension */
+ pushq $__USER32_DS
+ pushq %rbp
+ pushfq
+ movl $VSYSCALL32_SYSEXIT, %r10d
+ pushq $__USER32_CS
+ movl %eax, %eax
+ pushq %r10
+ pushq %rax
+ cld
+ SAVE_ARGS 0,0,1
+ /* no need to do an access_ok check here because rbp has been
+ 32bit zero extended */
+1: movl (%rbp),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jnz sysenter_tracesys
+sysenter_do_call:
+ cmpl $(IA32_NR_syscalls),%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP 1
+ call *ia32_sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
+ cli
+ testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ /* clear IF, that popfq doesn't enable interrupts early */
+ andl $~0x200,EFLAGS-R11(%rsp)
+ RESTORE_ARGS 1,24,1,1,1,1
+ popfq
+ popq %rcx /* User %esp */
+ movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
+ swapgs
+ sti /* sti only takes effect after the next instruction */
+ /* sysexit */
+ .byte 0xf, 0x35
+
+sysenter_tracesys:
+ SAVE_REST
+ CLEAR_RREGS
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ movl %ebp, %ebp
+ /* no need to do an access_ok check here because rbp has been
+ 32bit zero extended */
+1: movl (%rbp),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ jmp sysenter_do_call
+ CFI_ENDPROC
+
+/*
+ * 32bit SYSCALL instruction entry.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx return EIP
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
+ * %esp user stack
+ * 0(%esp) Arg6
+ *
+ * Interrupts off.
+ *
+ * This is purely a fast path. For anything complicated we use the int 0x80
+ * path below. Set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
+ENTRY(ia32_cstar_target)
+ CFI_STARTPROC
+ swapgs
+ movl %esp,%r8d
+ movq %gs:pda_kernelstack,%rsp
+ sti
+ SAVE_ARGS 8,1,1
+ movl %eax,%eax /* zero extension */
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rcx,RIP-ARGOFFSET(%rsp)
+ movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+ movl %ebp,%ecx
+ movq $__USER32_CS,CS-ARGOFFSET(%rsp)
+ movq $__USER32_DS,SS-ARGOFFSET(%rsp)
+ movq %r11,EFLAGS-ARGOFFSET(%rsp)
+ movq %r8,RSP-ARGOFFSET(%rsp)
+ /* no need to do an access_ok check here because r8 has been
+ 32bit zero extended */
+ /* hardware stack frame is complete now */
+1: movl (%r8),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jnz cstar_tracesys
+cstar_do_call:
+ cmpl $IA32_NR_syscalls,%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP 1
+ call *ia32_sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ GET_THREAD_INFO(%r10)
+ cli
+ testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+ jnz int_ret_from_sys_call
+ RESTORE_ARGS 1,-ARG_SKIP,1,1,1
+ movl RIP-ARGOFFSET(%rsp),%ecx
+ movl EFLAGS-ARGOFFSET(%rsp),%r11d
+ movl RSP-ARGOFFSET(%rsp),%esp
+ swapgs
+ sysretl
+
+cstar_tracesys:
+ SAVE_REST
+ CLEAR_RREGS
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ movl RSP-ARGOFFSET(%rsp), %r8d
+ /* no need to do an access_ok check here because r8 has been
+ 32bit zero extended */
+1: movl (%r8),%r9d
+ .section __ex_table,"a"
+ .quad 1b,ia32_badarg
+ .previous
+ jmp cstar_do_call
+
+ia32_badarg:
+ movq $-EFAULT,%rax
+ jmp ia32_sysret
+ CFI_ENDPROC
+
+/*
+ * Emulated IA32 system calls via int 0x80.
+ *
+ * Arguments:
+ * %eax System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
+ *
+ * Notes:
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except %eax must be saved (but ptrace may violate that)
+ * Arguments are zero extended. For system calls that want sign extension and
+ * take long arguments a wrapper is needed. Most calls can just be called
+ * directly.
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
+
+ENTRY(ia32_syscall)
+ CFI_STARTPROC
+ swapgs
+ sti
+ movl %eax,%eax
+ pushq %rax
+ cld
+ /* note the registers are not zero extended to the sf.
+ this could be a problem. */
+ SAVE_ARGS 0,0,1
+ GET_THREAD_INFO(%r10)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ jnz ia32_tracesys
+ia32_do_syscall:
+ cmpl $(IA32_NR_syscalls),%eax
+ jae ia32_badsys
+ IA32_ARG_FIXUP
+ call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
+ia32_sysret:
+ movq %rax,RAX-ARGOFFSET(%rsp)
+ jmp int_ret_from_sys_call
+
+ia32_tracesys:
+ SAVE_REST
+ movq $-ENOSYS,RAX(%rsp) /* really needed? */
+ movq %rsp,%rdi /* &pt_regs -> arg1 */
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ jmp ia32_do_syscall
+
+ia32_badsys:
+ movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp int_ret_from_sys_call
+
+ni_syscall:
+ movq %rax,%rdi
+ jmp sys32_ni_syscall
+
+quiet_ni_syscall:
+ movq $-ENOSYS,%rax
+ ret
+ CFI_ENDPROC
+
+ .macro PTREGSCALL label, func, arg
+ .globl \label
+\label:
+ leaq \func(%rip),%rax
+ leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
+ jmp ia32_ptregs_common
+ .endm
+
+ PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
+ PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
+ PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
+ PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
+ PTREGSCALL stub32_execve, sys32_execve, %rcx
+ PTREGSCALL stub32_fork, sys_fork, %rdi
+ PTREGSCALL stub32_clone, sys32_clone, %rdx
+ PTREGSCALL stub32_vfork, sys_vfork, %rdi
+ PTREGSCALL stub32_iopl, sys_iopl, %rsi
+ PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
+
+ENTRY(ia32_ptregs_common)
+ CFI_STARTPROC
+ popq %r11
+ SAVE_REST
+ call *%rax
+ RESTORE_REST
+ jmp ia32_sysret /* misbalances the return cache */
+ CFI_ENDPROC
+
+ .data
+ .align 8
+ .globl ia32_sys_call_table
+ia32_sys_call_table:
+ .quad sys_restart_syscall
+ .quad sys_exit
+ .quad stub32_fork
+ .quad sys_read
+ .quad sys_write
+ .quad sys32_open /* 5 */
+ .quad sys_close
+ .quad sys32_waitpid
+ .quad sys_creat
+ .quad sys_link
+ .quad sys_unlink /* 10 */
+ .quad stub32_execve
+ .quad sys_chdir
+ .quad compat_sys_time
+ .quad sys_mknod
+ .quad sys_chmod /* 15 */
+ .quad sys_lchown16
+ .quad quiet_ni_syscall /* old break syscall holder */
+ .quad sys_stat
+ .quad sys32_lseek
+ .quad sys_getpid /* 20 */
+ .quad compat_sys_mount /* mount */
+ .quad sys_oldumount /* old_umount */
+ .quad sys_setuid16
+ .quad sys_getuid16
+ .quad compat_sys_stime /* stime */ /* 25 */
+ .quad sys32_ptrace /* ptrace */
+ .quad sys_alarm
+ .quad sys_fstat /* (old)fstat */
+ .quad sys_pause
+ .quad compat_sys_utime /* 30 */
+ .quad quiet_ni_syscall /* old stty syscall holder */
+ .quad quiet_ni_syscall /* old gtty syscall holder */
+ .quad sys_access
+ .quad sys_nice
+ .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
+ .quad sys_sync
+ .quad sys32_kill
+ .quad sys_rename
+ .quad sys_mkdir
+ .quad sys_rmdir /* 40 */
+ .quad sys_dup
+ .quad sys32_pipe
+ .quad compat_sys_times
+ .quad quiet_ni_syscall /* old prof syscall holder */
+ .quad sys_brk /* 45 */
+ .quad sys_setgid16
+ .quad sys_getgid16
+ .quad sys_signal
+ .quad sys_geteuid16
+ .quad sys_getegid16 /* 50 */
+ .quad sys_acct
+ .quad sys_umount /* new_umount */
+ .quad quiet_ni_syscall /* old lock syscall holder */
+ .quad compat_sys_ioctl
+ .quad compat_sys_fcntl64 /* 55 */
+ .quad quiet_ni_syscall /* old mpx syscall holder */
+ .quad sys_setpgid
+ .quad quiet_ni_syscall /* old ulimit syscall holder */
+ .quad sys32_olduname
+ .quad sys_umask /* 60 */
+ .quad sys_chroot
+ .quad sys32_ustat
+ .quad sys_dup2
+ .quad sys_getppid
+ .quad sys_getpgrp /* 65 */
+ .quad sys_setsid
+ .quad sys32_sigaction
+ .quad sys_sgetmask
+ .quad sys_ssetmask
+ .quad sys_setreuid16 /* 70 */
+ .quad sys_setregid16
+ .quad stub32_sigsuspend
+ .quad compat_sys_sigpending
+ .quad sys_sethostname
+ .quad compat_sys_setrlimit /* 75 */
+ .quad compat_sys_old_getrlimit /* old_getrlimit */
+ .quad compat_sys_getrusage
+ .quad sys32_gettimeofday
+ .quad sys32_settimeofday
+ .quad sys_getgroups16 /* 80 */
+ .quad sys_setgroups16
+ .quad sys32_old_select
+ .quad sys_symlink
+ .quad sys_lstat
+ .quad sys_readlink /* 85 */
+#ifdef CONFIG_IA32_AOUT
+ .quad sys_uselib
+#else
+ .quad quiet_ni_syscall
+#endif
+ .quad sys_swapon
+ .quad sys_reboot
+ .quad compat_sys_old_readdir
+ .quad sys32_mmap /* 90 */
+ .quad sys_munmap
+ .quad sys_truncate
+ .quad sys_ftruncate
+ .quad sys_fchmod
+ .quad sys_fchown16 /* 95 */
+ .quad sys_getpriority
+ .quad sys_setpriority
+ .quad quiet_ni_syscall /* old profil syscall holder */
+ .quad compat_sys_statfs
+ .quad compat_sys_fstatfs /* 100 */
+ .quad sys_ioperm
+ .quad compat_sys_socketcall
+ .quad sys_syslog
+ .quad compat_sys_setitimer
+ .quad compat_sys_getitimer /* 105 */
+ .quad compat_sys_newstat
+ .quad compat_sys_newlstat
+ .quad compat_sys_newfstat
+ .quad sys32_uname
+ .quad stub32_iopl /* 110 */
+ .quad sys_vhangup
+ .quad quiet_ni_syscall /* old "idle" system call */
+ .quad sys32_vm86_warning /* vm86old */
+ .quad compat_sys_wait4
+ .quad sys_swapoff /* 115 */
+ .quad sys32_sysinfo
+ .quad sys32_ipc
+ .quad sys_fsync
+ .quad stub32_sigreturn
+ .quad stub32_clone /* 120 */
+ .quad sys_setdomainname
+ .quad sys_uname
+ .quad sys_modify_ldt
+ .quad sys32_adjtimex
+ .quad sys32_mprotect /* 125 */
+ .quad compat_sys_sigprocmask
+ .quad quiet_ni_syscall /* create_module */
+ .quad sys_init_module
+ .quad sys_delete_module
+ .quad quiet_ni_syscall /* 130 get_kernel_syms */
+ .quad sys_quotactl
+ .quad sys_getpgid
+ .quad sys_fchdir
+ .quad quiet_ni_syscall /* bdflush */
+ .quad sys_sysfs /* 135 */
+ .quad sys_personality
+ .quad quiet_ni_syscall /* for afs_syscall */
+ .quad sys_setfsuid16
+ .quad sys_setfsgid16
+ .quad sys_llseek /* 140 */
+ .quad compat_sys_getdents
+ .quad compat_sys_select
+ .quad sys_flock
+ .quad sys_msync
+ .quad compat_sys_readv /* 145 */
+ .quad compat_sys_writev
+ .quad sys_getsid
+ .quad sys_fdatasync
+ .quad sys32_sysctl /* sysctl */
+ .quad sys_mlock /* 150 */
+ .quad sys_munlock
+ .quad sys_mlockall
+ .quad sys_munlockall
+ .quad sys_sched_setparam
+ .quad sys_sched_getparam /* 155 */
+ .quad sys_sched_setscheduler
+ .quad sys_sched_getscheduler
+ .quad sys_sched_yield
+ .quad sys_sched_get_priority_max
+ .quad sys_sched_get_priority_min /* 160 */
+ .quad sys_sched_rr_get_interval
+ .quad compat_sys_nanosleep
+ .quad sys_mremap
+ .quad sys_setresuid16
+ .quad sys_getresuid16 /* 165 */
+ .quad sys32_vm86_warning /* vm86 */
+ .quad quiet_ni_syscall /* query_module */
+ .quad sys_poll
+ .quad compat_sys_nfsservctl
+ .quad sys_setresgid16 /* 170 */
+ .quad sys_getresgid16
+ .quad sys_prctl
+ .quad stub32_rt_sigreturn
+ .quad sys32_rt_sigaction
+ .quad sys32_rt_sigprocmask /* 175 */
+ .quad sys32_rt_sigpending
+ .quad compat_sys_rt_sigtimedwait
+ .quad sys32_rt_sigqueueinfo
+ .quad stub32_rt_sigsuspend
+ .quad sys32_pread /* 180 */
+ .quad sys32_pwrite
+ .quad sys_chown16
+ .quad sys_getcwd
+ .quad sys_capget
+ .quad sys_capset
+ .quad stub32_sigaltstack
+ .quad sys32_sendfile
+ .quad quiet_ni_syscall /* streams1 */
+ .quad quiet_ni_syscall /* streams2 */
+ .quad stub32_vfork /* 190 */
+ .quad compat_sys_getrlimit
+ .quad sys32_mmap2
+ .quad sys32_truncate64
+ .quad sys32_ftruncate64
+ .quad sys32_stat64 /* 195 */
+ .quad sys32_lstat64
+ .quad sys32_fstat64
+ .quad sys_lchown
+ .quad sys_getuid
+ .quad sys_getgid /* 200 */
+ .quad sys_geteuid
+ .quad sys_getegid
+ .quad sys_setreuid
+ .quad sys_setregid
+ .quad sys_getgroups /* 205 */
+ .quad sys_setgroups
+ .quad sys_fchown
+ .quad sys_setresuid
+ .quad sys_getresuid
+ .quad sys_setresgid /* 210 */
+ .quad sys_getresgid
+ .quad sys_chown
+ .quad sys_setuid
+ .quad sys_setgid
+ .quad sys_setfsuid /* 215 */
+ .quad sys_setfsgid
+ .quad sys_pivot_root
+ .quad sys_mincore
+ .quad sys_madvise
+ .quad compat_sys_getdents64 /* 220 getdents64 */
+ .quad compat_sys_fcntl64
+ .quad quiet_ni_syscall /* tux */
+ .quad quiet_ni_syscall /* security */
+ .quad sys_gettid
+ .quad sys_readahead /* 225 */
+ .quad sys_setxattr
+ .quad sys_lsetxattr
+ .quad sys_fsetxattr
+ .quad sys_getxattr
+ .quad sys_lgetxattr /* 230 */
+ .quad sys_fgetxattr
+ .quad sys_listxattr
+ .quad sys_llistxattr
+ .quad sys_flistxattr
+ .quad sys_removexattr /* 235 */
+ .quad sys_lremovexattr
+ .quad sys_fremovexattr
+ .quad sys_tkill
+ .quad sys_sendfile64
+ .quad compat_sys_futex /* 240 */
+ .quad compat_sys_sched_setaffinity
+ .quad compat_sys_sched_getaffinity
+ .quad sys32_set_thread_area
+ .quad sys32_get_thread_area
+ .quad compat_sys_io_setup /* 245 */
+ .quad sys_io_destroy
+ .quad compat_sys_io_getevents
+ .quad compat_sys_io_submit
+ .quad sys_io_cancel
+ .quad sys_fadvise64 /* 250 */
+ .quad quiet_ni_syscall /* free_huge_pages */
+ .quad sys_exit_group
+ .quad sys32_lookup_dcookie
+ .quad sys_epoll_create
+ .quad sys_epoll_ctl /* 255 */
+ .quad sys_epoll_wait
+ .quad sys_remap_file_pages
+ .quad sys_set_tid_address
+ .quad sys32_timer_create
+ .quad compat_sys_timer_settime /* 260 */
+ .quad compat_sys_timer_gettime
+ .quad sys_timer_getoverrun
+ .quad sys_timer_delete
+ .quad compat_sys_clock_settime
+ .quad compat_sys_clock_gettime /* 265 */
+ .quad compat_sys_clock_getres
+ .quad compat_sys_clock_nanosleep
+ .quad compat_sys_statfs64
+ .quad compat_sys_fstatfs64
+ .quad sys_tgkill /* 270 */
+ .quad compat_sys_utimes
+ .quad sys32_fadvise64_64
+ .quad quiet_ni_syscall /* sys_vserver */
+ .quad sys_mbind
+ .quad compat_sys_get_mempolicy /* 275 */
+ .quad sys_set_mempolicy
+ .quad compat_sys_mq_open
+ .quad sys_mq_unlink
+ .quad compat_sys_mq_timedsend
+ .quad compat_sys_mq_timedreceive /* 280 */
+ .quad compat_sys_mq_notify
+ .quad compat_sys_mq_getsetattr
+ .quad quiet_ni_syscall /* reserved for kexec */
+ .quad compat_sys_waitid
+ .quad quiet_ni_syscall /* sys_altroot */
+ .quad sys_add_key
+ .quad sys_request_key
+ .quad sys_keyctl
+ /* don't forget to change IA32_NR_syscalls */
+ia32_syscall_end:
+ .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
+ .quad ni_syscall
+ .endr
diff --git a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c
new file mode 100644
index 000000000000..369151dc3213
--- /dev/null
+++ b/arch/x86_64/ia32/ipc32.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/ipc.h>
+#include <linux/compat.h>
+
+#include <asm-i386/ipc.h>
+
+asmlinkage long
+sys32_ipc(u32 call, int first, int second, int third,
+ compat_uptr_t ptr, u32 fifth)
+{
+ int version;
+
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ switch (call) {
+ case SEMOP:
+ /* struct sembuf is the same on 32 and 64bit :)) */
+ return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
+ case SEMTIMEDOP:
+ return compat_sys_semtimedop(first, compat_ptr(ptr), second,
+ compat_ptr(fifth));
+ case SEMGET:
+ return sys_semget(first, second, third);
+ case SEMCTL:
+ return compat_sys_semctl(first, second, third, compat_ptr(ptr));
+
+ case MSGSND:
+ return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
+ case MSGRCV:
+ return compat_sys_msgrcv(first, second, fifth, third,
+ version, compat_ptr(ptr));
+ case MSGGET:
+ return sys_msgget((key_t) first, second);
+ case MSGCTL:
+ return compat_sys_msgctl(first, second, compat_ptr(ptr));
+
+ case SHMAT:
+ return compat_sys_shmat(first, second, third, version,
+ compat_ptr(ptr));
+ break;
+ case SHMDT:
+ return sys_shmdt(compat_ptr(ptr));
+ case SHMGET:
+ return sys_shmget(first, (unsigned)second, third);
+ case SHMCTL:
+ return compat_sys_shmctl(first, second, compat_ptr(ptr));
+ }
+ return -ENOSYS;
+}
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
new file mode 100644
index 000000000000..b98b6d2462f6
--- /dev/null
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -0,0 +1,379 @@
+/*
+ * 32bit ptrace for x86-64.
+ *
+ * Copyright 2001,2002 Andi Kleen, SuSE Labs.
+ * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier
+ * copyright.
+ *
+ * This allows to access 64bit processes too; but there is no way to see the extended
+ * register contents.
+ *
+ * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
+ */
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <asm/ptrace.h>
+#include <asm/compat.h>
+#include <asm/uaccess.h>
+#include <asm/user32.h>
+#include <asm/user.h>
+#include <asm/errno.h>
+#include <asm/debugreg.h>
+#include <asm/i387.h>
+#include <asm/fpu32.h>
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x44dd5UL
+
+#define R32(l,q) \
+ case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 val)
+{
+ int i;
+ __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs));
+
+ switch (regno) {
+ case offsetof(struct user32, regs.fs):
+ if (val && (val & 3) != 3) return -EIO;
+ child->thread.fs = val & 0xffff;
+ break;
+ case offsetof(struct user32, regs.gs):
+ if (val && (val & 3) != 3) return -EIO;
+ child->thread.gs = val & 0xffff;
+ break;
+ case offsetof(struct user32, regs.ds):
+ if (val && (val & 3) != 3) return -EIO;
+ child->thread.ds = val & 0xffff;
+ break;
+ case offsetof(struct user32, regs.es):
+ child->thread.es = val & 0xffff;
+ break;
+ case offsetof(struct user32, regs.ss):
+ if ((val & 3) != 3) return -EIO;
+ stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
+ break;
+ case offsetof(struct user32, regs.cs):
+ if ((val & 3) != 3) return -EIO;
+ stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
+ break;
+
+ R32(ebx, rbx);
+ R32(ecx, rcx);
+ R32(edx, rdx);
+ R32(edi, rdi);
+ R32(esi, rsi);
+ R32(ebp, rbp);
+ R32(eax, rax);
+ R32(orig_eax, orig_rax);
+ R32(eip, rip);
+ R32(esp, rsp);
+
+ case offsetof(struct user32, regs.eflags): {
+ __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
+ val &= FLAG_MASK;
+ *flags = val | (*flags & ~FLAG_MASK);
+ break;
+ }
+
+ case offsetof(struct user32, u_debugreg[4]):
+ case offsetof(struct user32, u_debugreg[5]):
+ return -EIO;
+
+ case offsetof(struct user32, u_debugreg[0]):
+ child->thread.debugreg0 = val;
+ break;
+
+ case offsetof(struct user32, u_debugreg[1]):
+ child->thread.debugreg1 = val;
+ break;
+
+ case offsetof(struct user32, u_debugreg[2]):
+ child->thread.debugreg2 = val;
+ break;
+
+ case offsetof(struct user32, u_debugreg[3]):
+ child->thread.debugreg3 = val;
+ break;
+
+ case offsetof(struct user32, u_debugreg[6]):
+ child->thread.debugreg6 = val;
+ break;
+
+ case offsetof(struct user32, u_debugreg[7]):
+ val &= ~DR_CONTROL_RESERVED;
+ /* See arch/i386/kernel/ptrace.c for an explanation of
+ * this awkward check.*/
+ for(i=0; i<4; i++)
+ if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
+ return -EIO;
+ child->thread.debugreg7 = val;
+ break;
+
+ default:
+ if (regno > sizeof(struct user32) || (regno & 3))
+ return -EIO;
+
+ /* Other dummy fields in the virtual user structure are ignored */
+ break;
+ }
+ return 0;
+}
+
+#undef R32
+
+#define R32(l,q) \
+ case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
+{
+ __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs));
+
+ switch (regno) {
+ case offsetof(struct user32, regs.fs):
+ *val = child->thread.fs;
+ break;
+ case offsetof(struct user32, regs.gs):
+ *val = child->thread.gs;
+ break;
+ case offsetof(struct user32, regs.ds):
+ *val = child->thread.ds;
+ break;
+ case offsetof(struct user32, regs.es):
+ *val = child->thread.es;
+ break;
+
+ R32(cs, cs);
+ R32(ss, ss);
+ R32(ebx, rbx);
+ R32(ecx, rcx);
+ R32(edx, rdx);
+ R32(edi, rdi);
+ R32(esi, rsi);
+ R32(ebp, rbp);
+ R32(eax, rax);
+ R32(orig_eax, orig_rax);
+ R32(eip, rip);
+ R32(eflags, eflags);
+ R32(esp, rsp);
+
+ case offsetof(struct user32, u_debugreg[0]):
+ *val = child->thread.debugreg0;
+ break;
+ case offsetof(struct user32, u_debugreg[1]):
+ *val = child->thread.debugreg1;
+ break;
+ case offsetof(struct user32, u_debugreg[2]):
+ *val = child->thread.debugreg2;
+ break;
+ case offsetof(struct user32, u_debugreg[3]):
+ *val = child->thread.debugreg3;
+ break;
+ case offsetof(struct user32, u_debugreg[6]):
+ *val = child->thread.debugreg6;
+ break;
+ case offsetof(struct user32, u_debugreg[7]):
+ *val = child->thread.debugreg7;
+ break;
+
+ default:
+ if (regno > sizeof(struct user32) || (regno & 3))
+ return -EIO;
+
+ /* Other dummy fields in the virtual user structure are ignored */
+ *val = 0;
+ break;
+ }
+ return 0;
+}
+
+#undef R32
+
+static struct task_struct *find_target(int request, int pid, int *err)
+{
+ struct task_struct *child;
+
+ *err = -EPERM;
+ if (pid == 1)
+ return NULL;
+
+ *err = -ESRCH;
+ read_lock(&tasklist_lock);
+ child = find_task_by_pid(pid);
+ if (child)
+ get_task_struct(child);
+ read_unlock(&tasklist_lock);
+ if (child) {
+ *err = -EPERM;
+ if (child->pid == 1)
+ goto out;
+ *err = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (*err < 0)
+ goto out;
+ return child;
+ }
+ out:
+ if (child)
+ put_task_struct(child);
+ return NULL;
+
+}
+
+asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+{
+ struct task_struct *child;
+ struct pt_regs *childregs;
+ void __user *datap = compat_ptr(data);
+ int ret;
+ __u32 val;
+
+ switch (request) {
+ default:
+ return sys_ptrace(request, pid, addr, data);
+
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ case PTRACE_POKEDATA:
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEUSR:
+ case PTRACE_PEEKUSR:
+ case PTRACE_GETREGS:
+ case PTRACE_SETREGS:
+ case PTRACE_SETFPREGS:
+ case PTRACE_GETFPREGS:
+ case PTRACE_SETFPXREGS:
+ case PTRACE_GETFPXREGS:
+ case PTRACE_GETEVENTMSG:
+ break;
+ }
+
+ child = find_target(request, pid, &ret);
+ if (!child)
+ return ret;
+
+ childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs));
+
+ switch (request) {
+ case PTRACE_PEEKDATA:
+ case PTRACE_PEEKTEXT:
+ ret = 0;
+ if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32))
+ ret = -EIO;
+ else
+ ret = put_user(val, (unsigned int __user *)datap);
+ break;
+
+ case PTRACE_POKEDATA:
+ case PTRACE_POKETEXT:
+ ret = 0;
+ if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32))
+ ret = -EIO;
+ break;
+
+ case PTRACE_PEEKUSR:
+ ret = getreg32(child, addr, &val);
+ if (ret == 0)
+ ret = put_user(val, (__u32 __user *)datap);
+ break;
+
+ case PTRACE_POKEUSR:
+ ret = putreg32(child, addr, data);
+ break;
+
+ case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+ int i;
+ if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
+ ret = -EIO;
+ break;
+ }
+ ret = 0;
+ for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
+ getreg32(child, i, &val);
+ ret |= __put_user(val,(u32 __user *)datap);
+ datap += sizeof(u32);
+ }
+ break;
+ }
+
+ case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+ unsigned long tmp;
+ int i;
+ if (!access_ok(VERIFY_READ, datap, 16*4)) {
+ ret = -EIO;
+ break;
+ }
+ ret = 0;
+ for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
+ ret |= __get_user(tmp, (u32 __user *)datap);
+ putreg32(child, i, tmp);
+ datap += sizeof(u32);
+ }
+ break;
+ }
+
+ case PTRACE_GETFPREGS:
+ ret = -EIO;
+ if (!access_ok(VERIFY_READ, compat_ptr(data),
+ sizeof(struct user_i387_struct)))
+ break;
+ save_i387_ia32(child, datap, childregs, 1);
+ ret = 0;
+ break;
+
+ case PTRACE_SETFPREGS:
+ ret = -EIO;
+ if (!access_ok(VERIFY_WRITE, datap,
+ sizeof(struct user_i387_struct)))
+ break;
+ ret = 0;
+ /* don't check EFAULT to be bug-to-bug compatible to i386 */
+ restore_i387_ia32(child, datap, 1);
+ break;
+
+ case PTRACE_GETFPXREGS: {
+ struct user32_fxsr_struct __user *u = datap;
+ init_fpu(child);
+ ret = -EIO;
+ if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
+ break;
+ ret = -EFAULT;
+ if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
+ break;
+ ret = __put_user(childregs->cs, &u->fcs);
+ ret |= __put_user(child->thread.ds, &u->fos);
+ break;
+ }
+ case PTRACE_SETFPXREGS: {
+ struct user32_fxsr_struct __user *u = datap;
+ unlazy_fpu(child);
+ ret = -EIO;
+ if (!access_ok(VERIFY_READ, u, sizeof(*u)))
+ break;
+ /* no checking to be bug-to-bug compatible with i386 */
+ __copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
+ set_stopped_child_used_math(child);
+ child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+ ret = 0;
+ break;
+ }
+
+ case PTRACE_GETEVENTMSG:
+ ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data));
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ put_task_struct(child);
+ return ret;
+}
+
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
new file mode 100644
index 000000000000..68a9ab06ee7c
--- /dev/null
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -0,0 +1,1050 @@
+/*
+ * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
+ * sys_sparc32
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000 Hewlett-Packard Co.
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment. In 2.5 most of this should be moved to a generic directory.
+ *
+ * This file assumes that there is a hole at the end of user address space.
+ *
+ * Some of the functions are LE specific currently. These are hopefully all marked.
+ * This should be fixed.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/syscalls.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/quota.h>
+#include <linux/module.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/ipc.h>
+#include <linux/rwsem.h>
+#include <linux/binfmts.h>
+#include <linux/init.h>
+#include <linux/aio_abi.h>
+#include <linux/aio.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/highuid.h>
+#include <linux/vmalloc.h>
+#include <asm/mman.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <asm/atomic.h>
+#include <asm/ldt.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+#include <asm/ia32.h>
+
+#define AA(__x) ((unsigned long)(__x))
+
+int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf)
+{
+ typeof(ubuf->st_uid) uid = 0;
+ typeof(ubuf->st_gid) gid = 0;
+ SET_UID(uid, kbuf->uid);
+ SET_GID(gid, kbuf->gid);
+ if (!old_valid_dev(kbuf->dev) || !old_valid_dev(kbuf->rdev))
+ return -EOVERFLOW;
+ if (kbuf->size >= 0x7fffffff)
+ return -EOVERFLOW;
+ if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
+ __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
+ __put_user (kbuf->ino, &ubuf->st_ino) ||
+ __put_user (kbuf->mode, &ubuf->st_mode) ||
+ __put_user (kbuf->nlink, &ubuf->st_nlink) ||
+ __put_user (uid, &ubuf->st_uid) ||
+ __put_user (gid, &ubuf->st_gid) ||
+ __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
+ __put_user (kbuf->size, &ubuf->st_size) ||
+ __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) ||
+ __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+ __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
+ __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+ __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
+ __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+ __put_user (kbuf->blksize, &ubuf->st_blksize) ||
+ __put_user (kbuf->blocks, &ubuf->st_blocks))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long
+sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high)
+{
+ return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
+}
+
+asmlinkage long
+sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high)
+{
+ return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
+}
+
+/* Another set for IA32/LFS -- x86_64 struct stat is different due to
+ support for 64bit inode numbers. */
+
+static int
+cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
+{
+ typeof(ubuf->st_uid) uid = 0;
+ typeof(ubuf->st_gid) gid = 0;
+ SET_UID(uid, stat->uid);
+ SET_GID(gid, stat->gid);
+ if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
+ __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
+ __put_user (stat->ino, &ubuf->__st_ino) ||
+ __put_user (stat->ino, &ubuf->st_ino) ||
+ __put_user (stat->mode, &ubuf->st_mode) ||
+ __put_user (stat->nlink, &ubuf->st_nlink) ||
+ __put_user (uid, &ubuf->st_uid) ||
+ __put_user (gid, &ubuf->st_gid) ||
+ __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
+ __put_user (stat->size, &ubuf->st_size) ||
+ __put_user (stat->atime.tv_sec, &ubuf->st_atime) ||
+ __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+ __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) ||
+ __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+ __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) ||
+ __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+ __put_user (stat->blksize, &ubuf->st_blksize) ||
+ __put_user (stat->blocks, &ubuf->st_blocks))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long
+sys32_stat64(char __user * filename, struct stat64 __user *statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_stat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+asmlinkage long
+sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_lstat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+asmlinkage long
+sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_fstat(fd, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int prot;
+ unsigned int flags;
+ unsigned int fd;
+ unsigned int offset;
+};
+
+asmlinkage long
+sys32_mmap(struct mmap_arg_struct __user *arg)
+{
+ struct mmap_arg_struct a;
+ struct file *file = NULL;
+ unsigned long retval;
+ struct mm_struct *mm ;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+
+ if (a.offset & ~PAGE_MASK)
+ return -EINVAL;
+
+ if (!(a.flags & MAP_ANONYMOUS)) {
+ file = fget(a.fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ mm = current->mm;
+ down_write(&mm->mmap_sem);
+ retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
+ if (file)
+ fput(file);
+
+ up_write(&mm->mmap_sem);
+
+ return retval;
+}
+
+asmlinkage long
+sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
+{
+ return sys_mprotect(start,len,prot);
+}
+
+asmlinkage long
+sys32_pipe(int __user *fd)
+{
+ int retval;
+ int fds[2];
+
+ retval = do_pipe(fds);
+ if (retval)
+ goto out;
+ if (copy_to_user(fd, fds, sizeof(fds)))
+ retval = -EFAULT;
+ out:
+ return retval;
+}
+
+asmlinkage long
+sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
+ struct sigaction32 __user *oact, unsigned int sigsetsize)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+ compat_sigset_t set32;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+
+ if (act) {
+ compat_uptr_t handler, restorer;
+
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(handler, &act->sa_handler) ||
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+ __get_user(restorer, &act->sa_restorer)||
+ __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ new_ka.sa.sa_handler = compat_ptr(handler);
+ new_ka.sa.sa_restorer = compat_ptr(restorer);
+ /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+ switch (_NSIG_WORDS) {
+ case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
+ | (((long)set32.sig[7]) << 32);
+ case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
+ | (((long)set32.sig[5]) << 32);
+ case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
+ | (((long)set32.sig[3]) << 32);
+ case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
+ | (((long)set32.sig[1]) << 32);
+ }
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+ switch (_NSIG_WORDS) {
+ case 4:
+ set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
+ set32.sig[6] = old_ka.sa.sa_mask.sig[3];
+ case 3:
+ set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
+ set32.sig[4] = old_ka.sa.sa_mask.sig[2];
+ case 2:
+ set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
+ set32.sig[2] = old_ka.sa.sa_mask.sig[1];
+ case 1:
+ set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
+ set32.sig[0] = old_ka.sa.sa_mask.sig[0];
+ }
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ if (act) {
+ compat_old_sigset_t mask;
+ compat_uptr_t handler, restorer;
+
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(handler, &act->sa_handler) ||
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+ __get_user(restorer, &act->sa_restorer) ||
+ __get_user(mask, &act->sa_mask))
+ return -EFAULT;
+
+ new_ka.sa.sa_handler = compat_ptr(handler);
+ new_ka.sa.sa_restorer = compat_ptr(restorer);
+
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+asmlinkage long
+sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+ compat_sigset_t __user *oset, unsigned int sigsetsize)
+{
+ sigset_t s;
+ compat_sigset_t s32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (set) {
+ if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ switch (_NSIG_WORDS) {
+ case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+ case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+ case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+ case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+ }
+ }
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL,
+ sigsetsize);
+ set_fs (old_fs);
+ if (ret) return ret;
+ if (oset) {
+ switch (_NSIG_WORDS) {
+ case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+ case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+ case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+ case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+ }
+ if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static inline long
+get_tv32(struct timeval *o, struct compat_timeval __user *i)
+{
+ int err = -EFAULT;
+ if (access_ok(VERIFY_READ, i, sizeof(*i))) {
+ err = __get_user(o->tv_sec, &i->tv_sec);
+ err |= __get_user(o->tv_usec, &i->tv_usec);
+ }
+ return err;
+}
+
+static inline long
+put_tv32(struct compat_timeval __user *o, struct timeval *i)
+{
+ int err = -EFAULT;
+ if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
+ err = __put_user(i->tv_sec, &o->tv_sec);
+ err |= __put_user(i->tv_usec, &o->tv_usec);
+ }
+ return err;
+}
+
+extern int do_setitimer(int which, struct itimerval *, struct itimerval *);
+
+asmlinkage long
+sys32_alarm(unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+ unsigned int oldalarm;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return 0 if we have an alarm pending.. */
+ /* And we'd better return too much than too little anyway */
+ if (it_old.it_value.tv_usec)
+ oldalarm++;
+ return oldalarm;
+}
+
+/* Translations due to time_t size differences. Which affects all
+ sorts of things, like timeval and itimerval. */
+
+extern struct timezone sys_tz;
+
+asmlinkage long
+sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+{
+ if (tv) {
+ struct timeval ktv;
+ do_gettimeofday(&ktv);
+ if (put_tv32(tv, &ktv))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+asmlinkage long
+sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+{
+ struct timeval ktv;
+ struct timespec kts;
+ struct timezone ktz;
+
+ if (tv) {
+ if (get_tv32(&ktv, tv))
+ return -EFAULT;
+ kts.tv_sec = ktv.tv_sec;
+ kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC;
+ }
+ if (tz) {
+ if (copy_from_user(&ktz, tz, sizeof(ktz)))
+ return -EFAULT;
+ }
+
+ return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
+}
+
+struct sel_arg_struct {
+ unsigned int n;
+ unsigned int inp;
+ unsigned int outp;
+ unsigned int exp;
+ unsigned int tvp;
+};
+
+asmlinkage long
+sys32_old_select(struct sel_arg_struct __user *arg)
+{
+ struct sel_arg_struct a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+ compat_ptr(a.exp), compat_ptr(a.tvp));
+}
+
+extern asmlinkage long
+compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options,
+ struct compat_rusage *ru);
+
+asmlinkage long
+sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
+{
+ return compat_sys_wait4(pid, stat_addr, options, NULL);
+}
+
+int sys32_ni_syscall(int call)
+{
+ struct task_struct *me = current;
+ static char lastcomm[sizeof(me->comm)];
+
+ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+ printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
+ call, me->comm);
+ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+ return -ENOSYS;
+}
+
+/* 32-bit timeval and related flotsam. */
+
+asmlinkage long
+sys32_sysfs(int option, u32 arg1, u32 arg2)
+{
+ return sys_sysfs(option, arg1, arg2);
+}
+
+struct sysinfo32 {
+ s32 uptime;
+ u32 loads[3];
+ u32 totalram;
+ u32 freeram;
+ u32 sharedram;
+ u32 bufferram;
+ u32 totalswap;
+ u32 freeswap;
+ unsigned short procs;
+ unsigned short pad;
+ u32 totalhigh;
+ u32 freehigh;
+ u32 mem_unit;
+ char _f[20-2*sizeof(u32)-sizeof(int)];
+};
+
+asmlinkage long
+sys32_sysinfo(struct sysinfo32 __user *info)
+{
+ struct sysinfo s;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+ int bitcount = 0;
+
+ set_fs (KERNEL_DS);
+ ret = sys_sysinfo(&s);
+ set_fs (old_fs);
+
+ /* Check to see if any memory value is too large for 32-bit and scale
+ * down if needed
+ */
+ if ((s.totalram >> 32) || (s.totalswap >> 32)) {
+ while (s.mem_unit < PAGE_SIZE) {
+ s.mem_unit <<= 1;
+ bitcount++;
+ }
+ s.totalram >>= bitcount;
+ s.freeram >>= bitcount;
+ s.sharedram >>= bitcount;
+ s.bufferram >>= bitcount;
+ s.totalswap >>= bitcount;
+ s.freeswap >>= bitcount;
+ s.totalhigh >>= bitcount;
+ s.freehigh >>= bitcount;
+ }
+
+ if (!access_ok(VERIFY_WRITE, info, sizeof(struct sysinfo32)) ||
+ __put_user (s.uptime, &info->uptime) ||
+ __put_user (s.loads[0], &info->loads[0]) ||
+ __put_user (s.loads[1], &info->loads[1]) ||
+ __put_user (s.loads[2], &info->loads[2]) ||
+ __put_user (s.totalram, &info->totalram) ||
+ __put_user (s.freeram, &info->freeram) ||
+ __put_user (s.sharedram, &info->sharedram) ||
+ __put_user (s.bufferram, &info->bufferram) ||
+ __put_user (s.totalswap, &info->totalswap) ||
+ __put_user (s.freeswap, &info->freeswap) ||
+ __put_user (s.procs, &info->procs) ||
+ __put_user (s.totalhigh, &info->totalhigh) ||
+ __put_user (s.freehigh, &info->freehigh) ||
+ __put_user (s.mem_unit, &info->mem_unit))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long
+sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
+{
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ set_fs (KERNEL_DS);
+ ret = sys_sched_rr_get_interval(pid, &t);
+ set_fs (old_fs);
+ if (put_compat_timespec(&t, interval))
+ return -EFAULT;
+ return ret;
+}
+
+asmlinkage long
+sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
+{
+ sigset_t s;
+ compat_sigset_t s32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigpending(&s, sigsetsize);
+ set_fs (old_fs);
+ if (!ret) {
+ switch (_NSIG_WORDS) {
+ case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+ case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+ case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+ case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+ }
+ if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+asmlinkage long
+sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+{
+ siginfo_t info;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (copy_siginfo_from_user32(&info, uinfo))
+ return -EFAULT;
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigqueueinfo(pid, sig, &info);
+ set_fs (old_fs);
+ return ret;
+}
+
+/* These are here just in case some old ia32 binary calls it. */
+asmlinkage long
+sys32_pause(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
+
+#ifdef CONFIG_SYSCTL
+struct sysctl_ia32 {
+ unsigned int name;
+ int nlen;
+ unsigned int oldval;
+ unsigned int oldlenp;
+ unsigned int newval;
+ unsigned int newlen;
+ unsigned int __unused[4];
+};
+
+
+asmlinkage long
+sys32_sysctl(struct sysctl_ia32 __user *args32)
+{
+ struct sysctl_ia32 a32;
+ mm_segment_t old_fs = get_fs ();
+ void __user *oldvalp, *newvalp;
+ size_t oldlen;
+ int __user *namep;
+ long ret;
+ extern int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen);
+
+
+ if (copy_from_user(&a32, args32, sizeof (a32)))
+ return -EFAULT;
+
+ /*
+ * We need to pre-validate these because we have to disable address checking
+ * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
+ * user specifying bad addresses here. Well, since we're dealing with 32 bit
+ * addresses, we KNOW that access_ok() will always succeed, so this is an
+ * expensive NOP, but so what...
+ */
+ namep = compat_ptr(a32.name);
+ oldvalp = compat_ptr(a32.oldval);
+ newvalp = compat_ptr(a32.newval);
+
+ if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
+ || !access_ok(VERIFY_WRITE, namep, 0)
+ || !access_ok(VERIFY_WRITE, oldvalp, 0)
+ || !access_ok(VERIFY_WRITE, newvalp, 0))
+ return -EFAULT;
+
+ set_fs(KERNEL_DS);
+ lock_kernel();
+ ret = do_sysctl(namep, a32.nlen, oldvalp, &oldlen, newvalp, (size_t) a32.newlen);
+ unlock_kernel();
+ set_fs(old_fs);
+
+ if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp)))
+ return -EFAULT;
+
+ return ret;
+}
+#endif
+
+/* warning: next two assume little endian */
+asmlinkage long
+sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+{
+ return sys_pread64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+asmlinkage long
+sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+{
+ return sys_pwrite64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+
+asmlinkage long
+sys32_personality(unsigned long personality)
+{
+ int ret;
+ if (personality(current->personality) == PER_LINUX32 &&
+ personality == PER_LINUX)
+ personality = PER_LINUX32;
+ ret = sys_personality(personality);
+ if (ret == PER_LINUX32)
+ ret = PER_LINUX;
+ return ret;
+}
+
+asmlinkage long
+sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
+{
+ mm_segment_t old_fs = get_fs();
+ int ret;
+ off_t of;
+
+ if (offset && get_user(of, offset))
+ return -EFAULT;
+
+ set_fs(KERNEL_DS);
+ ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ set_fs(old_fs);
+
+ if (!ret && offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+}
+
+/* Handle adjtimex compatibility. */
+
+struct timex32 {
+ u32 modes;
+ s32 offset, freq, maxerror, esterror;
+ s32 status, constant, precision, tolerance;
+ struct compat_timeval time;
+ s32 tick;
+ s32 ppsfreq, jitter, shift, stabil;
+ s32 jitcnt, calcnt, errcnt, stbcnt;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+};
+
+extern int do_adjtimex(struct timex *);
+
+asmlinkage long
+sys32_adjtimex(struct timex32 __user *utp)
+{
+ struct timex txc;
+ int ret;
+
+ memset(&txc, 0, sizeof(struct timex));
+
+ if (!access_ok(VERIFY_READ, utp, sizeof(struct timex32)) ||
+ __get_user(txc.modes, &utp->modes) ||
+ __get_user(txc.offset, &utp->offset) ||
+ __get_user(txc.freq, &utp->freq) ||
+ __get_user(txc.maxerror, &utp->maxerror) ||
+ __get_user(txc.esterror, &utp->esterror) ||
+ __get_user(txc.status, &utp->status) ||
+ __get_user(txc.constant, &utp->constant) ||
+ __get_user(txc.precision, &utp->precision) ||
+ __get_user(txc.tolerance, &utp->tolerance) ||
+ __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc.tick, &utp->tick) ||
+ __get_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc.jitter, &utp->jitter) ||
+ __get_user(txc.shift, &utp->shift) ||
+ __get_user(txc.stabil, &utp->stabil) ||
+ __get_user(txc.jitcnt, &utp->jitcnt) ||
+ __get_user(txc.calcnt, &utp->calcnt) ||
+ __get_user(txc.errcnt, &utp->errcnt) ||
+ __get_user(txc.stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ ret = do_adjtimex(&txc);
+
+ if (!access_ok(VERIFY_WRITE, utp, sizeof(struct timex32)) ||
+ __put_user(txc.modes, &utp->modes) ||
+ __put_user(txc.offset, &utp->offset) ||
+ __put_user(txc.freq, &utp->freq) ||
+ __put_user(txc.maxerror, &utp->maxerror) ||
+ __put_user(txc.esterror, &utp->esterror) ||
+ __put_user(txc.status, &utp->status) ||
+ __put_user(txc.constant, &utp->constant) ||
+ __put_user(txc.precision, &utp->precision) ||
+ __put_user(txc.tolerance, &utp->tolerance) ||
+ __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc.tick, &utp->tick) ||
+ __put_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc.jitter, &utp->jitter) ||
+ __put_user(txc.shift, &utp->shift) ||
+ __put_user(txc.stabil, &utp->stabil) ||
+ __put_user(txc.jitcnt, &utp->jitcnt) ||
+ __put_user(txc.calcnt, &utp->calcnt) ||
+ __put_user(txc.errcnt, &utp->errcnt) ||
+ __put_user(txc.stbcnt, &utp->stbcnt))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long error;
+ struct file * file = NULL;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ down_write(&mm->mmap_sem);
+ error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ up_write(&mm->mmap_sem);
+
+ if (file)
+ fput(file);
+ return error;
+}
+
+asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
+{
+ int error;
+
+ if (!name)
+ return -EFAULT;
+ if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+ return -EFAULT;
+
+ down_read(&uts_sem);
+
+ error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ __put_user(0,name->sysname+__OLD_UTS_LEN);
+ __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ __put_user(0,name->nodename+__OLD_UTS_LEN);
+ __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ __put_user(0,name->release+__OLD_UTS_LEN);
+ __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ __put_user(0,name->version+__OLD_UTS_LEN);
+ {
+ char *arch = "x86_64";
+ if (personality(current->personality) == PER_LINUX32)
+ arch = "i686";
+
+ __copy_to_user(&name->machine,arch,strlen(arch)+1);
+ }
+
+ up_read(&uts_sem);
+
+ error = error ? -EFAULT : 0;
+
+ return error;
+}
+
+long sys32_uname(struct old_utsname __user * name)
+{
+ int err;
+ if (!name)
+ return -EFAULT;
+ down_read(&uts_sem);
+ err=copy_to_user(name, &system_utsname, sizeof (*name));
+ up_read(&uts_sem);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
+ return err?-EFAULT:0;
+}
+
+long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
+{
+ struct ustat u;
+ mm_segment_t seg;
+ int ret;
+
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_ustat(dev,&u);
+ set_fs(seg);
+ if (ret >= 0) {
+ if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) ||
+ __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
+ __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
+ __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
+ __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
+ ret = -EFAULT;
+ }
+ return ret;
+}
+
+asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
+ compat_uptr_t __user *envp, struct pt_regs *regs)
+{
+ long error;
+ char * filename;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+ error = compat_do_execve(filename, argv, envp, regs);
+ if (error == 0) {
+ task_lock(current);
+ current->ptrace &= ~PT_DTRACE;
+ task_unlock(current);
+ }
+ putname(filename);
+ return error;
+}
+
+asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
+ struct pt_regs *regs)
+{
+ void __user *parent_tid = (void __user *)regs->rdx;
+ void __user *child_tid = (void __user *)regs->rdi;
+ if (!newsp)
+ newsp = regs->rsp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
+ */
+
+long sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+{
+ return sys_lseek(fd, offset, whence);
+}
+
+long sys32_kill(int pid, int sig)
+{
+ return sys_kill(pid, sig);
+}
+
+asmlinkage long sys32_open(const char __user * filename, int flags, int mode)
+{
+ char * tmp;
+ int fd, error;
+
+ /* don't force O_LARGEFILE */
+ tmp = getname(filename);
+ fd = PTR_ERR(tmp);
+ if (!IS_ERR(tmp)) {
+ fd = get_unused_fd();
+ if (fd >= 0) {
+ struct file *f = filp_open(tmp, flags, mode);
+ error = PTR_ERR(f);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = error;
+ } else
+ fd_install(fd, f);
+ }
+ putname(tmp);
+ }
+ return fd;
+}
+
+extern asmlinkage long
+sys_timer_create(clockid_t which_clock,
+ struct sigevent __user *timer_event_spec,
+ timer_t __user * created_timer_id);
+
+long
+sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id)
+{
+ struct sigevent __user *p = NULL;
+ if (se32) {
+ struct sigevent se;
+ p = compat_alloc_user_space(sizeof(struct sigevent));
+ if (get_compat_sigevent(&se, se32) ||
+ copy_to_user(p, &se, sizeof(se)))
+ return -EFAULT;
+ }
+ return sys_timer_create(clock, p, timer_id);
+}
+
+long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
+ __u32 len_low, __u32 len_high, int advice)
+{
+ return sys_fadvise64_64(fd,
+ (((u64)offset_high)<<32) | offset_low,
+ (((u64)len_high)<<32) | len_low,
+ advice);
+}
+
+long sys32_vm86_warning(void)
+{
+ struct task_struct *me = current;
+ static char lastcomm[sizeof(me->comm)];
+ if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
+ printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+ me->comm);
+ strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ }
+ return -ENOSYS;
+}
+
+long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
+ char __user * buf, size_t len)
+{
+ return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
+}
+
+static int __init ia32_init (void)
+{
+ printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
+ return 0;
+}
+
+__initcall(ia32_init);
+
+extern unsigned long ia32_sys_call_table[];
+EXPORT_SYMBOL(ia32_sys_call_table);
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
new file mode 100644
index 000000000000..399ff4985099
--- /dev/null
+++ b/arch/x86_64/ia32/syscall32.c
@@ -0,0 +1,111 @@
+/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
+
+/* vsyscall handling for 32bit processes. Map a stub page into it
+ on demand because 32bit cannot reach the kernel's fixmaps */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/stringify.h>
+#include <asm/proto.h>
+#include <asm/tlbflush.h>
+#include <asm/ia32_unistd.h>
+
+/* 32bit VDSOs mapped into user space. */
+asm(".section \".init.data\",\"aw\"\n"
+ "syscall32_syscall:\n"
+ ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n"
+ "syscall32_syscall_end:\n"
+ "syscall32_sysenter:\n"
+ ".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n"
+ "syscall32_sysenter_end:\n"
+ ".previous");
+
+extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
+extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
+extern int sysctl_vsyscall32;
+
+char *syscall32_page;
+static int use_sysenter = -1;
+
+/*
+ * Map the 32bit vsyscall page on demand.
+ *
+ * RED-PEN: This knows too much about high level VM.
+ *
+ * Alternative would be to generate a vma with appropriate backing options
+ * and let it be handled by generic VM.
+ */
+int __map_syscall32(struct mm_struct *mm, unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pte_t *pte;
+ pmd_t *pmd;
+ int err = -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, address);
+ pud = pud_alloc(mm, pgd, address);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, address);
+ if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
+ if (pte_none(*pte)) {
+ set_pte(pte,
+ mk_pte(virt_to_page(syscall32_page),
+ PAGE_KERNEL_VSYSCALL32));
+ }
+ /* Flush only the local CPU. Other CPUs taking a fault
+ will just end up here again
+ This probably not needed and just paranoia. */
+ __flush_tlb_one(address);
+ err = 0;
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+ return err;
+}
+
+int map_syscall32(struct mm_struct *mm, unsigned long address)
+{
+ int err;
+ down_read(&mm->mmap_sem);
+ err = __map_syscall32(mm, address);
+ up_read(&mm->mmap_sem);
+ return err;
+}
+
+static int __init init_syscall32(void)
+{
+ syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!syscall32_page)
+ panic("Cannot allocate syscall32 page");
+ SetPageReserved(virt_to_page(syscall32_page));
+ if (use_sysenter > 0) {
+ memcpy(syscall32_page, syscall32_sysenter,
+ syscall32_sysenter_end - syscall32_sysenter);
+ } else {
+ memcpy(syscall32_page, syscall32_syscall,
+ syscall32_syscall_end - syscall32_syscall);
+ }
+ return 0;
+}
+
+__initcall(init_syscall32);
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+ if (use_sysenter < 0)
+ use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+ /* Load these always in case some future AMD CPU supports
+ SYSENTER from compat mode too. */
+ checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+ checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+ wrmsrl(MSR_CSTAR, ia32_cstar_target);
+}
diff --git a/arch/x86_64/ia32/tls32.c b/arch/x86_64/ia32/tls32.c
new file mode 100644
index 000000000000..1cc4340de3ca
--- /dev/null
+++ b/arch/x86_64/ia32/tls32.c
@@ -0,0 +1,163 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/user.h>
+
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/proto.h>
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+ struct thread_struct *t = &current->thread;
+ int idx;
+
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ * When you want addresses > 32bit use arch_prctl()
+ */
+int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
+{
+ struct user_desc info;
+ struct n_desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
+
+ /*
+ * We must not get preempted while modifying the TLS.
+ */
+ cpu = get_cpu();
+
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ if (t == &current->thread)
+ load_TLS(t, cpu);
+
+ put_cpu();
+ return 0;
+}
+
+asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
+{
+ return do_set_thread_area(&current->thread, u_info);
+}
+
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+#define GET_LONGMODE(desc) (((desc)->b >> 21) & 1)
+
+int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
+{
+ struct user_desc info;
+ struct n_desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
+
+ memset(&info, 0, sizeof(struct user_desc));
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+ info.lm = GET_LONGMODE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
+{
+ return do_get_thread_area(&current->thread, u_info);
+}
+
+
+int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
+{
+ struct n_desc_struct *desc;
+ struct user_desc info;
+ struct user_desc __user *cp;
+ int idx;
+
+ cp = (void __user *)childregs->rsi;
+ if (copy_from_user(&info, cp, sizeof(info)))
+ return -EFAULT;
+ if (LDT_empty(&info))
+ return -EINVAL;
+
+ idx = info.entry_number;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+
+ return 0;
+}
diff --git a/arch/x86_64/ia32/vsyscall-sigreturn.S b/arch/x86_64/ia32/vsyscall-sigreturn.S
new file mode 100644
index 000000000000..ba4067d350e4
--- /dev/null
+++ b/arch/x86_64/ia32/vsyscall-sigreturn.S
@@ -0,0 +1,120 @@
+/*
+ * Common code for the sigreturn entry points on the vsyscall page.
+ * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
+ * to enter the kernel.
+ * This file is #include'd by vsyscall-*.S to define them after the
+ * vsyscall entry point. The addresses we get for these entry points
+ * by doing ".balign 32" must match in both versions of the page.
+ */
+
+ .section .text.sigreturn,"ax"
+ .balign 32
+ .globl __kernel_sigreturn
+ .type __kernel_sigreturn,@function
+__kernel_sigreturn:
+.LSTART_sigreturn:
+ popl %eax
+ movl $__NR_ia32_sigreturn, %eax
+ SYSCALL_ENTER_KERNEL
+.LEND_sigreturn:
+ .size __kernel_sigreturn,.-.LSTART_sigreturn
+
+ .section .text.rtsigreturn,"ax"
+ .balign 32
+ .globl __kernel_rt_sigreturn
+ .type __kernel_rt_sigreturn,@function
+__kernel_rt_sigreturn:
+.LSTART_rt_sigreturn:
+ movl $__NR_ia32_rt_sigreturn, %eax
+ SYSCALL_ENTER_KERNEL
+.LEND_rt_sigreturn:
+ .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+
+ .section .eh_frame,"a",@progbits
+ .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
+.LSTARTFDE2:
+ .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */
+ /* HACK: The dwarf2 unwind routines will subtract 1 from the
+ return address to get an address in the middle of the
+ presumed call instruction. Since we didn't get here via
+ a call, we need to include the nop before the real start
+ to make up for it. */
+ .long .LSTART_sigreturn-1-. /* PC-relative start address */
+ .long .LEND_sigreturn-.LSTART_sigreturn+1
+ .uleb128 0 /* Augmentation length */
+ /* What follows are the instructions for the table generation.
+ We record the locations of each register saved. This is
+ complicated by the fact that the "CFA" is always assumed to
+ be the value of the stack pointer in the caller. This means
+ that we must define the CFA of this body of code to be the
+ saved value of the stack pointer in the sigcontext. Which
+ also means that there is no fixed relation to the other
+ saved registers, which means that we must use DW_CFA_expression
+ to compute their addresses. It also means that when we
+ adjust the stack with the popl, we have to do it all over again. */
+
+#define do_cfa_expr(offset) \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 1f-0f; /* length */ \
+0: .byte 0x74; /* DW_OP_breg4 */ \
+ .sleb128 offset; /* offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+1:
+
+#define do_expr(regno, offset) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 1f-0f; /* length */ \
+0: .byte 0x74; /* DW_OP_breg4 */ \
+ .sleb128 offset; /* offset */ \
+1:
+
+ do_cfa_expr(IA32_SIGCONTEXT_esp+4)
+ do_expr(0, IA32_SIGCONTEXT_eax+4)
+ do_expr(1, IA32_SIGCONTEXT_ecx+4)
+ do_expr(2, IA32_SIGCONTEXT_edx+4)
+ do_expr(3, IA32_SIGCONTEXT_ebx+4)
+ do_expr(5, IA32_SIGCONTEXT_ebp+4)
+ do_expr(6, IA32_SIGCONTEXT_esi+4)
+ do_expr(7, IA32_SIGCONTEXT_edi+4)
+ do_expr(8, IA32_SIGCONTEXT_eip+4)
+
+ .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
+
+ do_cfa_expr(IA32_SIGCONTEXT_esp)
+ do_expr(0, IA32_SIGCONTEXT_eax)
+ do_expr(1, IA32_SIGCONTEXT_ecx)
+ do_expr(2, IA32_SIGCONTEXT_edx)
+ do_expr(3, IA32_SIGCONTEXT_ebx)
+ do_expr(5, IA32_SIGCONTEXT_ebp)
+ do_expr(6, IA32_SIGCONTEXT_esi)
+ do_expr(7, IA32_SIGCONTEXT_edi)
+ do_expr(8, IA32_SIGCONTEXT_eip)
+
+ .align 4
+.LENDFDE2:
+
+ .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */
+.LSTARTFDE3:
+ .long .LSTARTFDE3-.LSTARTFRAME /* CIE pointer */
+ /* HACK: See above wrt unwind library assumptions. */
+ .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
+ .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
+ .uleb128 0 /* Augmentation */
+ /* What follows are the instructions for the table generation.
+ We record the locations of each register saved. This is
+ slightly less complicated than the above, since we don't
+ modify the stack pointer in the process. */
+
+ do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
+ do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
+ do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
+ do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
+ do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
+ do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
+ do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
+ do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
+ do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
+
+ .align 4
+.LENDFDE3:
diff --git a/arch/x86_64/ia32/vsyscall-syscall.S b/arch/x86_64/ia32/vsyscall-syscall.S
new file mode 100644
index 000000000000..e2aaf3de8a42
--- /dev/null
+++ b/arch/x86_64/ia32/vsyscall-syscall.S
@@ -0,0 +1,68 @@
+/*
+ * Code for the vsyscall page. This version uses the syscall instruction.
+ */
+
+#include <asm/ia32_unistd.h>
+#include <asm/offset.h>
+#include <asm/segment.h>
+
+ .text
+ .section .text.vsyscall,"ax"
+ .globl __kernel_vsyscall
+ .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+ push %ebp
+.Lpush_ebp:
+ movl %ecx, %ebp
+ syscall
+ movl $__USER32_DS, %ecx
+ movl %ecx, %ss
+ movl %ebp, %ecx
+ popl %ebp
+.Lpop_ebp:
+ ret
+.LEND_vsyscall:
+ .size __kernel_vsyscall,.-.LSTART_vsyscall
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+ .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIE:
+
+ .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
+.LSTARTFDE1:
+ .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
+ .long .LSTART_vsyscall-. /* PC-relative start address */
+ .long .LEND_vsyscall-.LSTART_vsyscall
+ .uleb128 0 /* Augmentation length */
+ /* What follows are the instructions for the table generation.
+ We have to record all changes of the stack pointer. */
+ .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .uleb128 8
+ .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
+ .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
+ .byte 0xc5 /* DW_CFA_restore %ebp */
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .uleb128 4
+ .align 4
+.LENDFDE1:
+
+#define SYSCALL_ENTER_KERNEL syscall
+#include "vsyscall-sigreturn.S"
diff --git a/arch/x86_64/ia32/vsyscall-sysenter.S b/arch/x86_64/ia32/vsyscall-sysenter.S
new file mode 100644
index 000000000000..8fb8e0ff3afa
--- /dev/null
+++ b/arch/x86_64/ia32/vsyscall-sysenter.S
@@ -0,0 +1,94 @@
+/*
+ * Code for the vsyscall page. This version uses the sysenter instruction.
+ */
+
+#include <asm/ia32_unistd.h>
+#include <asm/offset.h>
+
+ .text
+ .section .text.vsyscall,"ax"
+ .globl __kernel_vsyscall
+ .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+ push %ecx
+.Lpush_ecx:
+ push %edx
+.Lpush_edx:
+ push %ebp
+.Lenter_kernel:
+ movl %esp,%ebp
+ sysenter
+ .space 7,0x90
+ jmp .Lenter_kernel
+ /* 16: System call normal return point is here! */
+ pop %ebp
+.Lpop_ebp:
+ pop %edx
+.Lpop_edx:
+ pop %ecx
+.Lpop_ecx:
+ ret
+.LEND_vsyscall:
+ .size __kernel_vsyscall,.-.LSTART_vsyscall
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+ .long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIE:
+
+ .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
+.LSTARTFDE1:
+ .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
+ .long .LSTART_vsyscall-. /* PC-relative start address */
+ .long .LEND_vsyscall-.LSTART_vsyscall
+ .uleb128 0 /* Augmentation length */
+ /* What follows are the instructions for the table generation.
+ We have to record all changes of the stack pointer. */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpush_ecx-.LSTART_vsyscall
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x08 /* RA at offset 8 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpush_edx-.Lpush_ecx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x0c /* RA at offset 12 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lenter_kernel-.Lpush_edx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x10 /* RA at offset 16 now */
+ .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
+ /* Finally the epilogue. */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_ebp-.Lenter_kernel
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x12 /* RA at offset 12 now */
+ .byte 0xc5 /* DW_CFA_restore %ebp */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_edx-.Lpop_ebp
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x08 /* RA at offset 8 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_ecx-.Lpop_edx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x04 /* RA at offset 4 now */
+ .align 4
+.LENDFDE1:
+
+#define SYSCALL_ENTER_KERNEL int $0x80
+#include "vsyscall-sigreturn.S"
diff --git a/arch/x86_64/ia32/vsyscall.lds b/arch/x86_64/ia32/vsyscall.lds
new file mode 100644
index 000000000000..fa4b4dd4a9ff
--- /dev/null
+++ b/arch/x86_64/ia32/vsyscall.lds
@@ -0,0 +1,77 @@
+/*
+ * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
+ * object prelinked to its virtual address. This script controls its layout.
+ */
+
+/* This must match <asm/fixmap.h>. */
+VSYSCALL_BASE = 0xffffe000;
+
+SECTIONS
+{
+ . = VSYSCALL_BASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ /* This linker script is used both with -r and with -shared.
+ For the layouts to match, we need to skip more than enough
+ space for the dynamic symbol table et al. If this amount
+ is insufficient, ld -shared will barf. Just increase it here. */
+ . = VSYSCALL_BASE + 0x400;
+
+ .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090
+
+ /* This is an 32bit object and we cannot easily get the offsets
+ into the 64bit kernel. Just hardcode them here. This assumes
+ that all the stubs don't need more than 0x100 bytes. */
+ . = VSYSCALL_BASE + 0x500;
+
+ .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090
+
+ . = VSYSCALL_BASE + 0x600;
+
+ .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .useless : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ } :text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ LINUX_2.5 {
+ global:
+ __kernel_vsyscall;
+ __kernel_sigreturn;
+ __kernel_rt_sigreturn;
+
+ local: *;
+ };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__kernel_vsyscall);