diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-06-16 13:24:17 +0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-06-16 13:24:17 +0400 |
commit | 9583f3d9c00974911daf227cbaa12d6c1caaabad (patch) | |
tree | f37466204ecbfea37c7d567fd06c9bece0cc663d /fs | |
parent | 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd (diff) | |
parent | 066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff) | |
download | linux-9583f3d9c00974911daf227cbaa12d6c1caaabad.tar.xz |
Merge branch 'linus' into core/softirq
Diffstat (limited to 'fs')
53 files changed, 645 insertions, 426 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 55e8ee1900a5..3263084eef9e 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU + depends on !MMU && (!FRV || BROKEN) help Support uClinux FLAT format binaries. diff --git a/fs/afs/callback.c b/fs/afs/callback.c index a78d5b236bb1..587ef5123cd8 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse <dwmw2@cambridge.redhat.com> + * Authors: David Woodhouse <dwmw2@infradead.org> * David Howells <dhowells@redhat.com> * */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 08db82e1343a..bb47217f6a18 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse <dwmw2@cambridge.redhat.com> + * Authors: David Woodhouse <dwmw2@infradead.org> * David Howells <dhowells@redhat.com> * */ diff --git a/fs/afs/super.c b/fs/afs/super.c index 4b572b801d8d..7e3faeef6818 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -10,7 +10,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells <dhowells@redhat.com> - * David Woodhouse <dwmw2@redhat.com> + * David Woodhouse <dwmw2@infradead.org> * */ @@ -591,10 +591,6 @@ static void use_mm(struct mm_struct *mm) atomic_inc(&mm->mm_count); tsk->mm = mm; tsk->active_mm = mm; - /* - * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise - * it won't work. Update it accordingly if you change it here - */ switch_mm(active_mm, mm, tsk); task_unlock(tsk); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index ddd35d873391..d051a32e6270 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -390,7 +390,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, } /* expand the stack mapping to use up the entire allocation granule */ - fullsize = ksize((char *) current->mm->start_brk); + fullsize = kobjsize((char *) current->mm->start_brk); if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, fullsize, 0, 0))) stack_size = fullsize; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 3b40d45a3a16..2cb1acda3a82 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -548,7 +548,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (realdatastart && (realdatastart < (unsigned long)-4096)) { - reallen = ksize((void *)realdatastart); + reallen = kobjsize((void *)realdatastart); if (reallen > len) { realdatastart = do_mremap(realdatastart, len, reallen, MREMAP_FIXED, realdatastart); @@ -600,7 +600,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (textpos && (textpos < (unsigned long) -4096)) { - reallen = ksize((void *)textpos); + reallen = kobjsize((void *)textpos); if (reallen > len) { textpos = do_mremap(textpos, len, reallen, MREMAP_FIXED, textpos); @@ -683,7 +683,7 @@ static int load_flat_file(struct linux_binprm * bprm, */ current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; - current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; + current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; } if (flags & FLAT_FLAG_KTRACE) @@ -790,7 +790,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* zero the BSS, BRK and stack areas */ memset((void*)(datapos + data_len), 0, bss_len + - (memp + ksize((void *) memp) - stack_len - /* end brk */ + (memp + kobjsize((void *) memp) - stack_len - /* end brk */ libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d822fae7765..470c10ceb0fb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -12,6 +12,7 @@ #include <linux/kmod.h> #include <linux/major.h> #include <linux/smp_lock.h> +#include <linux/device_cgroup.h> #include <linux/highmem.h> #include <linux/blkdev.h> #include <linux/module.h> @@ -928,9 +929,14 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; - int ret = -ENXIO; + int ret; int part; + ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (ret != 0) + return ret; + + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); disk = get_gendisk(bdev->bd_dev, &part); diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 28e3d5c5fcac..1f3465201fdf 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -2,6 +2,11 @@ Version 1.53 ------------ DFS support added (Microsoft Distributed File System client support needed for referrals which enable a hierarchical name space among servers). +Disable temporary caching of mode bits to servers which do not support +storing of mode (e.g. Windows servers, when client mounts without cifsacl +mount option) and add new "dynperm" mount option to enable temporary caching +of mode (enable old behavior). Fix hang on mount caused when server crashes +tcp session during negotiate protocol. Version 1.52 ------------ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cb52cbbe45ff..f58e41d3ba48 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -186,6 +186,11 @@ asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len) } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -203,6 +208,10 @@ asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -389,6 +398,11 @@ asn1_oid_decode(struct asn1_ctx *ctx, unsigned long *optr; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) return 0; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5df93fd6303f..86b4d5f405ae 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data, { struct inode *inode; struct cifs_sb_info *cifs_sb; -#ifdef CONFIG_CIFS_DFS_UPCALL - int len; -#endif int rc = 0; /* BB should we make this contingent on mount parm? */ @@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data, * complex operation (mount), and in case of fail * just exit instead of doing mount and attempting * undo it if this copy fails?*/ - len = strlen(data); - cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); - if (cifs_sb->mountdata == NULL) { - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; - return -ENOMEM; + if (data) { + int len = strlen(data); + cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); + if (cifs_sb->mountdata == NULL) { + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + return -ENOMEM; + } + strncpy(cifs_sb->mountdata, data, len + 1); + cifs_sb->mountdata[len] = '\0'; } - strncpy(cifs_sb->mountdata, data, len + 1); - cifs_sb->mountdata[len] = '\0'; #endif rc = cifs_mount(sb, cifs_sb, data, devname); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 08914053242b..9cfcf326ead3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -333,7 +333,6 @@ struct cifsFileInfo { bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ - char *search_resume_name; /* BB removeme BB */ struct cifs_search_info srch_inf; }; @@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount; GLOBAL_EXTERN atomic_t tcpSesReconnectCount; GLOBAL_EXTERN atomic_t tconInfoReconnectCount; -/* Various Debug counters to remove someday (BB) */ +/* Various Debug counters */ GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ #ifdef CONFIG_CIFS_STATS2 GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 65d58b4e6a61..0f327c224da3 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -79,6 +79,19 @@ #define TRANS2_GET_DFS_REFERRAL 0x10 #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 +/* SMB Transact (Named Pipe) subcommand codes */ +#define TRANS_SET_NMPIPE_STATE 0x0001 +#define TRANS_RAW_READ_NMPIPE 0x0011 +#define TRANS_QUERY_NMPIPE_STATE 0x0021 +#define TRANS_QUERY_NMPIPE_INFO 0x0022 +#define TRANS_PEEK_NMPIPE 0x0023 +#define TRANS_TRANSACT_NMPIPE 0x0026 +#define TRANS_RAW_WRITE_NMPIPE 0x0031 +#define TRANS_READ_NMPIPE 0x0036 +#define TRANS_WRITE_NMPIPE 0x0037 +#define TRANS_WAIT_NMPIPE 0x0053 +#define TRANS_CALL_NMPIPE 0x0054 + /* NT Transact subcommand codes */ #define NT_TRANSACT_CREATE 0x01 #define NT_TRANSACT_IOCTL 0x02 @@ -328,12 +341,13 @@ #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ #define CREATE_NO_EA_KNOWLEDGE 0x00000200 #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete - open for recovery flag - should - be zero */ + "open for recovery" flag - should + be zero in any case */ +#define CREATE_OPEN_FOR_RECOVERY 0x00000400 #define CREATE_RANDOM_ACCESS 0x00000800 #define CREATE_DELETE_ON_CLOSE 0x00001000 #define CREATE_OPEN_BY_ID 0x00002000 -#define CREATE_OPEN_BACKUP_INTN 0x00004000 +#define CREATE_OPEN_BACKUP_INTENT 0x00004000 #define CREATE_NO_COMPRESSION 0x00008000 #define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ #define OPEN_REPARSE_POINT 0x00200000 @@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext { #define SMB_CSC_CACHE_AUTO_REINT 0x0004 #define SMB_CSC_CACHE_VDO 0x0008 #define SMB_CSC_NO_CACHING 0x000C - #define SMB_UNIQUE_FILE_NAME 0x0010 #define SMB_EXTENDED_SIGNATURES 0x0020 @@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req { #define ICOUNT_MASK 0x00FF #define PIPE_READ_MODE 0x0100 #define NAMED_PIPE_TYPE 0x0400 -#define PIPE_END_POINT 0x0800 +#define PIPE_END_POINT 0x4000 #define BLOCKING_NAMED_PIPE 0x8000 typedef struct smb_com_open_req { /* also handles create */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9b8b4cfdf993..4511b708f0f3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, { int rc = 0; LOCK_REQ *pSMB = NULL; - LOCK_RSP *pSMBr = NULL; +/* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ int bytes_returned; int timeout = 0; __u16 count; @@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */ - if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = CIFS_ASYNC_OP; /* no response expected */ pSMB->Timeout = 0; @@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (waitFlag) { rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned); + (struct smb_hdr *) pSMB, &bytes_returned); cifs_small_buf_release(pSMB); } else { rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, @@ -3927,9 +3925,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, } ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); - if (ref->VersionNumber != 3) { + if (ref->VersionNumber != cpu_to_le16(3)) { cERROR(1, ("Referrals of V%d version are not supported," - "should be V3", ref->VersionNumber)); + "should be V3", le16_to_cpu(ref->VersionNumber))); rc = -EINVAL; goto parse_DFS_referrals_exit; } @@ -3977,7 +3975,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, if (rc) goto parse_DFS_referrals_exit; - ref += ref->Size; + ref += le16_to_cpu(ref->Size); } parse_DFS_referrals_exit: diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 023434f72c15..e8fa46c7cff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -653,6 +653,7 @@ multi_t2_fnd: spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); + wake_up_all(&server->response_q); /* don't exit until kthread_stop is called */ set_current_state(TASK_UNINTERRUPTIBLE); @@ -2120,6 +2121,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; } + if ((volume_info.cifs_acl) && (volume_info.dynperm)) + cERROR(1, ("mount option dynperm ignored if cifsacl " + "mount option supported")); + tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, volume_info.username); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f0b5b5f3dd2e..fb69c1fa85c9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, buf, inode->i_sb, xid, &fileHandle); if (newinode) { - newinode->i_mode = mode; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + newinode->i_mode = mode; if ((oplock & CIFS_CREATE_ACTION) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8636cec2642c..0aac824371a5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file) msleep(timeout); timeout *= 8; } - kfree(pSMBFile->search_resume_name); kfree(file->private_data); file->private_data = NULL; } else @@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file) else cifs_buf_release(ptmp); } - ptmp = pCFileStruct->search_resume_name; - if (ptmp) { - cFYI(1, ("closedir free resume name")); - pCFileStruct->search_resume_name = NULL; - kfree(ptmp); - } kfree(file->private_data); file->private_data = NULL; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 129dbfe4dca7..722be543ceec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode, char *buf = NULL; bool adjustTZ = false; bool is_dfs_referral = false; + umode_t default_mode; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); @@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode, inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; } - /* set default mode. will override for dirs below */ - if (atomic_read(&cifsInfo->inUse) == 0) - /* new inode, can safely set these fields */ - inode->i_mode = cifs_sb->mnt_file_mode; - else /* since we set the inode type below we need to mask off - to avoid strange results if type changes and both - get orred in */ - inode->i_mode &= ~S_IFMT; -/* if (attr & ATTR_REPARSE) */ - /* We no longer handle these as symlinks because we could not - follow them due to the absolute path with drive letter */ - if (attr & ATTR_DIRECTORY) { - /* override default perms since we do not do byte range locking - on dirs */ - inode->i_mode = cifs_sb->mnt_dir_mode; - inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM) && - /* No need to le64 convert size of zero */ - (pfindData->EndOfFile == 0)) { - inode->i_mode = cifs_sb->mnt_file_mode; - inode->i_mode |= S_IFIFO; -/* BB Finish for SFU style symlinks and devices */ - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { - if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), - full_path, cifs_sb, xid)) - cFYI(1, ("Unrecognized sfu inode type")); - - cFYI(1, ("sfu mode 0%o", inode->i_mode)); + /* get default inode mode */ + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set permission bits */ + if (atomic_read(&cifsInfo->inUse) == 0 || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + inode->i_mode |= (S_IWUGO & default_mode); + inode->i_mode &= ~S_IFMT; + } + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { + /* no need to fix endianness on 0 */ + if (pfindData->EndOfFile == 0) + inode->i_mode |= S_IFIFO; + else if (decode_sfu_inode(inode, + le64_to_cpu(pfindData->EndOfFile), + full_path, cifs_sb, xid)) + cFYI(1, ("unknown SFU file type\n")); } else { - inode->i_mode |= S_IFREG; - /* treat dos attribute of read-only as read-only mode eg 555 */ - if (cifsInfo->cifsAttrs & ATTR_READONLY) - inode->i_mode &= ~(S_IWUGO); - else if ((inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been */ - /* changed on server -- set any w bits */ - /* allowed by mnt_file_mode */ - inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - /* BB add code to validate if device or weird share or device type? */ + if (attr & ATTR_DIRECTORY) + inode->i_mode |= S_IFDIR; + else + inode->i_mode |= S_IFREG; } spin_lock(&inode->i_lock); @@ -1019,8 +1015,11 @@ mkdir_get_info: CIFS_MOUNT_MAP_SPECIAL_CHR); } if (direntry->d_inode) { - direntry->d_inode->i_mode = mode; - direntry->d_inode->i_mode |= S_IFDIR; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + direntry->d_inode->i_mode = + (mode | S_IFDIR); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = @@ -1547,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } else goto cifs_setattr_exit; } - if (attrs->ia_valid & ATTR_UID) { - cFYI(1, ("UID changed to %d", attrs->ia_uid)); - uid = attrs->ia_uid; - } - if (attrs->ia_valid & ATTR_GID) { - cFYI(1, ("GID changed to %d", attrs->ia_gid)); - gid = attrs->ia_gid; + + /* + * Without unix extensions we can't send ownership changes to the + * server, so silently ignore them. This is consistent with how + * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With + * CIFSACL support + proper Windows to Unix idmapping, we may be + * able to support this in the future. + */ + if (!pTcon->unix_ext && + !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); + } else { + if (attrs->ia_valid & ATTR_UID) { + cFYI(1, ("UID changed to %d", attrs->ia_uid)); + uid = attrs->ia_uid; + } + if (attrs->ia_valid & ATTR_GID) { + cFYI(1, ("GID changed to %d", attrs->ia_gid)); + gid = attrs->ia_gid; + } } time_buf.Attributes = 0; @@ -1563,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) attrs->ia_valid &= ~ATTR_MODE; if (attrs->ia_valid & ATTR_MODE) { - cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); + cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); mode = attrs->ia_mode; } @@ -1578,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) rc = mode_to_acl(inode, full_path, mode); - else if ((mode & S_IWUGO) == 0) { -#else - if ((mode & S_IWUGO) == 0) { + else #endif - /* not writeable */ - if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = true; - time_buf.Attributes = - cpu_to_le32(cifsInode->cifsAttrs | - ATTR_READONLY); - } - } else if (cifsInode->cifsAttrs & ATTR_READONLY) { + if (((mode & S_IWUGO) == 0) && + (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { + set_dosattr = true; + time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | + ATTR_READONLY); + /* fix up mode if we're not using dynperm */ + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + attrs->ia_mode = inode->i_mode & ~S_IWUGO; + } else if ((mode & S_IWUGO) && + (cifsInode->cifsAttrs & ATTR_READONLY)) { /* If file is readonly on server, we would not be able to write to it - so if any write bit is enabled for user or group or other we @@ -1600,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* Windows ignores set to zero */ if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); + + /* reset local inode permissions to normal */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + attrs->ia_mode &= ~(S_IALLUGO); + if (S_ISDIR(inode->i_mode)) + attrs->ia_mode |= + cifs_sb->mnt_dir_mode; + else + attrs->ia_mode |= + cifs_sb->mnt_file_mode; + } + } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + /* ignore mode change - ATTR_READONLY hasn't changed */ + attrs->ia_valid &= ~ATTR_MODE; } } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1d69b8014e0b..4b17f8fe3157 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); cFYI(1, ("dnotify on %s Action: 0x%x", - pnotify->FileName, - pnotify->Action)); /* BB removeme BB */ + pnotify->FileName, pnotify->Action)); /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ return true; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 713c25110197..83f306954883 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, __u32 attr; __u64 allocation_size; __u64 end_of_file; + umode_t default_mode; /* save mtime and size */ local_mtime = tmp_inode->i_mtime; @@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, if (atomic_read(&cifsInfo->inUse) == 0) { tmp_inode->i_uid = cifs_sb->mnt_uid; tmp_inode->i_gid = cifs_sb->mnt_gid; - /* set default mode. will override for dirs below */ - tmp_inode->i_mode = cifs_sb->mnt_file_mode; - } else { - /* mask off the type bits since it gets set - below and we do not want to get two type - bits set */ + } + + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set initial permissions */ + if ((atomic_read(&cifsInfo->inUse) == 0) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + tmp_inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((tmp_inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + tmp_inode->i_mode |= (S_IWUGO & default_mode); + tmp_inode->i_mode &= ~S_IFMT; } - if (attr & ATTR_DIRECTORY) { - *pobject_type = DT_DIR; - /* override default perms since we do not lock dirs */ - if (atomic_read(&cifsInfo->inUse) == 0) - tmp_inode->i_mode = cifs_sb->mnt_dir_mode; - tmp_inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (attr & ATTR_SYSTEM)) { + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + tmp_inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { if (end_of_file == 0) { - *pobject_type = DT_FIFO; tmp_inode->i_mode |= S_IFIFO; + *pobject_type = DT_FIFO; } else { - /* rather than get the type here, we mark the - inode as needing revalidate and get the real type - (blk vs chr vs. symlink) later ie in lookup */ - *pobject_type = DT_REG; + /* + * trying to get the type can be slow, so just call + * this a regular file for now, and mark for reval + */ tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; cifsInfo->time = 0; } -/* we no longer mark these because we could not follow them */ -/* } else if (attr & ATTR_REPARSE) { - *pobject_type = DT_LNK; - tmp_inode->i_mode |= S_IFLNK; */ } else { - *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - if (attr & ATTR_READONLY) - tmp_inode->i_mode &= ~(S_IWUGO); - else if ((tmp_inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been changed on */ - /* server -- set any w bits allowed by mnt_file_mode */ - tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - } /* could add code here - to validate if device or weird share type? */ + if (attr & ATTR_DIRECTORY) { + tmp_inode->i_mode |= S_IFDIR; + *pobject_type = DT_DIR; + } else { + tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; + } + } /* can not fill in nlink here as in qpathinfo version and Unx search */ if (atomic_read(&cifsInfo->inUse) == 0) @@ -675,8 +682,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; if (cifsFile->srch_inf.ntwrk_buf_start) { cFYI(1, ("freeing SMB ff cache buf on search rewind")); if (cifsFile->srch_inf.smallBuf) @@ -1043,9 +1048,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } /* else { cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); - } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; */ + } */ rc = find_cifs_entry(xid, pTcon, file, ¤t_entry, &num_to_fill); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cd62d75b2cc0..e2832bc7869a 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1906,9 +1906,9 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, goto out; } } - mutex_unlock(&key_tfm_list_mutex); (*tfm) = key_tfm->key_tfm; (*tfm_mutex) = &key_tfm->key_tfm_mutex; out: + mutex_unlock(&key_tfm_list_mutex); return rc; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 951ee33a022d..c15c25745e05 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -660,8 +660,6 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, struct ecryptfs_auth_tok **auth_tok, char *sig); -int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, - int num_zeros); int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size); int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index ebf55150be56..75c2ea9fee35 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -157,20 +157,6 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - if (start_offset_in_page) { - /* Read in the page from the lower - * into the eCryptfs inode page cache, - * decrypting */ - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", - __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); /* @@ -349,14 +335,6 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, ecryptfs_page_idx, rc); goto out; } - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); memcpy((data + data_offset), ((char *)ecryptfs_page_virt + start_offset_in_page), diff --git a/fs/exec.c b/fs/exec.c index 3c2ba7ce11d4..9448f1b50b4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -860,6 +860,7 @@ static int de_thread(struct task_struct *tsk) no_thread_group: exit_itimers(sig); + flush_itimer_signals(); if (leader) release_task(leader); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 28cfd0b40527..77278e947e94 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -580,7 +580,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % + EXT3_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 30494c5da843..9cc80b9cc8d8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, + ext4_group_t block_group) +{ + ext4_group_t actual_group; + ext4_get_group_no_and_offset(sb, block, &actual_group, 0); + if (actual_group == block_group) + return 1; + return 0; +} + +static int ext4_group_used_meta_blocks(struct super_block *sb, + ext4_group_t block_group) +{ + ext4_fsblk_t tmp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + /* block bitmap, inode bitmap, and inode table blocks */ + int used_blocks = sbi->s_itb_per_group + 2; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + struct ext4_group_desc *gdp; + struct buffer_head *bh; + + gdp = ext4_get_group_desc(sb, block_group, &bh); + if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), + block_group)) + used_blocks--; + + if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), + block_group)) + used_blocks--; + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!ext4_block_in_group(sb, tmp, block_group)) + used_blocks -= 1; + } + } + return used_blocks; +} /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { - ext4_fsblk_t start; + ext4_fsblk_t start, tmp; + int flex_bg = 0; for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); start = ext4_group_first_block_no(sb, block_group); - /* Set bits for block and inode bitmaps, and inode table */ - ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); - ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); - for (bit = (ext4_inode_table(sb, gdp) - start), - bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) - ext4_set_bit(bit, bh->b_data); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_FLEX_BG)) + flex_bg = 1; + /* Set bits for block and inode bitmaps, and inode table */ + tmp = ext4_block_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!flex_bg || + ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + } /* * Also if the number of blocks within the group is * less than the blocksize * 8 ( which is the size @@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); } - - return free_blocks - sbi->s_itb_per_group - 2; + return free_blocks - ext4_group_used_meta_blocks(sb, block_group); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 873ad9b3418c..c9900aade150 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sbi = EXT4_SB(sb); es = sbi->s_es; - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); err = -EIO; bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); @@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!gdp) goto out_err; + ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, + gdp->bg_free_blocks_count); + err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { - unsigned len = ac->ac_o_ex.fe_len; - + unsigned int len = ac->ac_o_ex.fe_len; ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f086a6a472b..9ecb92f68543 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % + EXT4_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09d9359c8055..cb96f127c366 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) unsigned long def_mount_opts; struct super_block *sb = vfs->mnt_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); + journal_t *journal = sbi->s_journal; struct ext4_super_block *es = sbi->s_es; def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -729,8 +730,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + /* + * We're changing the default of barrier mount option, so + * let's always display its mount state so it's clear what its + * status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) + seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) @@ -1907,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_resgid = le16_to_cpu(es->s_def_resgid); set_opt(sbi->s_mount_opt, RESERVATION); + set_opt(sbi->s_mount_opt, BARRIER); /* * turn on extents feature by default in ext4 filesystem @@ -2189,6 +2198,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; + if (!(sb->s_flags & MS_RDONLY) && + EXT4_SB(sb)->s_journal->j_failed_commit) { + printk(KERN_CRIT "EXT4-fs error (device %s): " + "ext4_fill_super: Journal transaction " + "%u is corrupt\n", sb->s_id, + EXT4_SB(sb)->s_journal->j_failed_commit); + if (test_opt (sb, ERRORS_RO)) { + printk (KERN_CRIT + "Mounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + } + if (test_opt(sb, ERRORS_PANIC)) { + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_commit_super(sb, es, 1); + printk(KERN_CRIT + "EXT4-fs (device %s): mount failed\n", + sb->s_id); + goto failed_mount4; + } + } } else if (journal_inum) { if (ext4_create_journal(sb, es, journal_inum)) goto failed_mount3; diff --git a/fs/fat/file.c b/fs/fat/file.c index 27cc1164ec36..771326b8047e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -257,26 +257,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) } EXPORT_SYMBOL_GPL(fat_getattr); -static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode, - mode_t mode) +static int fat_sanitize_mode(const struct msdos_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) { - mode_t mask, req = mode & ~S_IFMT; + mode_t mask, perm; - if (S_ISREG(mode)) + /* + * Note, the basic check is already done by a caller of + * (attr->ia_mode & ~MSDOS_VALID_MODE) + */ + + if (S_ISREG(inode->i_mode)) mask = sbi->options.fs_fmask; else mask = sbi->options.fs_dmask; + perm = *mode_ptr & ~(S_IFMT | mask); + /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. */ - req &= ~mask; - if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) + if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask))) + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) return -EPERM; + *mode_ptr &= S_IFMT | perm; + return 0; } @@ -299,7 +307,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int mask, error = 0; + int error = 0; unsigned int ia_valid; lock_kernel(); @@ -332,12 +340,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) error = 0; goto out; } + if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - fat_check_mode(sbi, inode, attr->ia_mode) < 0)) + (attr->ia_mode & ~MSDOS_VALID_MODE))) error = -EPERM; if (error) { @@ -346,15 +355,16 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - error = inode_setattr(inode, attr); - if (error) - goto out; + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } - if (S_ISDIR(inode->i_mode)) - mask = sbi->options.fs_dmask; - else - mask = sbi->options.fs_fmask; - inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); + error = inode_setattr(inode, attr); out: unlock_kernel(); return error; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fb77e0962132..43e99513334a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -488,7 +488,12 @@ static struct fuse_conn *new_conn(struct super_block *sb) err = bdi_init(&fc->bdi); if (err) goto error_kfree; - err = bdi_register_dev(&fc->bdi, fc->dev); + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } if (err) goto error_bdi_destroy; /* diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4d99685fdce4..a2ed72f7ceee 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ + lock_buffer(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5d0405a9e7ca..058f50f65b76 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, obh->b_size); } + put_bh(obh); } return 0; } @@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal, chksum_err = chksum_seen = 0; if (info->end_transaction) { - printk(KERN_ERR "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID - 1); + journal->j_failed_commit = + info->end_transaction; brelse(bh); break; } @@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal, if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ - printk(KERN_ERR - "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID); + journal->j_failed_commit = + next_commit_ID; brelse(bh); break; } diff --git a/fs/libfs.c b/fs/libfs.c index b004dfadd891..892d41cb3382 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -528,6 +528,23 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, return count; } +ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, + const void *from, size_t available) +{ + loff_t pos = *ppos; + + if (pos < 0) + return -EINVAL; + if (pos >= available) + return 0; + if (count > available - pos) + count = available - pos; + memcpy(to, from + pos, count); + *ppos = pos + count; + + return count; +} + /* * Transaction based IO. * The file expects a single write which triggers the transaction, and then @@ -800,6 +817,7 @@ EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(simple_sync_file); EXPORT_SYMBOL(simple_unlink); EXPORT_SYMBOL(simple_read_from_buffer); +EXPORT_SYMBOL(memory_read_from_buffer); EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c index 9101807dc81a..e2f72ca98037 100644 --- a/fs/ntfs/upcase.c +++ b/fs/ntfs/upcase.c @@ -77,11 +77,10 @@ ntfschar *generate_default_upcase(void) uc[i] = cpu_to_le16(i); for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) - uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) + - uc_run_table[r][2]); + le16_add_cpu(&uc[i], uc_run_table[r][2]); for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) - uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + le16_add_cpu(&uc[i + 1], -1); for (r = 0; uc_word_table[r][0]; r++) uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); return uc; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1e44ad14881a..a27d61581bd6 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -142,53 +142,43 @@ static void o2net_idle_timer(unsigned long data); static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); -static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) -{ #ifdef CONFIG_DEBUG_FS +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ INIT_LIST_HEAD(&nst->st_net_debug_item); nst->st_task = task; nst->st_msg_type = msgtype; nst->st_msg_key = msgkey; nst->st_node = node; -#endif } -static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_sock_time); -#endif } -static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_send_time); -#endif } -static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_status_time); -#endif } -static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, struct o2net_sock_container *sc) { -#ifdef CONFIG_DEBUG_FS nst->st_sc = sc; -#endif } -static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { -#ifdef CONFIG_DEBUG_FS nst->st_id = msg_id; -#endif } +#endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) { diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index a705d5d19036..fd6179eb26d4 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -128,23 +128,23 @@ void o2net_debug_del_nst(struct o2net_send_tracking *nst); void o2net_debug_add_sc(struct o2net_sock_container *sc); void o2net_debug_del_sc(struct o2net_sock_container *sc); #else -static int o2net_debugfs_init(void) +static inline int o2net_debugfs_init(void) { return 0; } -static void o2net_debugfs_exit(void) +static inline void o2net_debugfs_exit(void) { } -static void o2net_debug_add_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_add_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_del_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_del_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_add_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_add_sc(struct o2net_sock_container *sc) { } -static void o2net_debug_del_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_del_sc(struct o2net_sock_container *sc) { } #endif /* CONFIG_DEBUG_FS */ diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 8d58cfe410b1..18307ff81b77 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -224,10 +224,42 @@ struct o2net_send_tracking { struct timeval st_send_time; struct timeval st_status_time; }; + +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node); +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); +void o2net_set_nst_send_time(struct o2net_send_tracking *nst); +void o2net_set_nst_status_time(struct o2net_send_tracking *nst); +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc); +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); + #else struct o2net_send_tracking { u32 dummy; }; + +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ +} +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ +} +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) +{ +} #endif /* CONFIG_DEBUG_FS */ #endif /* O2CLUSTER_TCP_INTERNAL_H */ diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index d34a62a3a625..8c686d22f9c7 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -60,25 +60,25 @@ void dlm_destroy_debugfs_root(void); #else -static int dlm_debug_init(struct dlm_ctxt *dlm) +static inline int dlm_debug_init(struct dlm_ctxt *dlm) { return 0; } -static void dlm_debug_shutdown(struct dlm_ctxt *dlm) +static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) +static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { return 0; } -static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) +static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_root(void) +static inline int dlm_create_debugfs_root(void) { return 0; } -static void dlm_destroy_debugfs_root(void) +static inline void dlm_destroy_debugfs_root(void) { } diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index b503772cd0ec..6b97d11f6bf8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -61,7 +61,7 @@ * negotiated by the client. The client negotiates based on the maximum * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major * number from the "SETV" message must match - * user_stack.sp_proto->lp_max_version.pv_major, and the minor number + * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number * must be less than or equal to ...->lp_max_version.pv_minor. * * Once this information has been set, mounts will be allowed. From this @@ -153,7 +153,7 @@ union ocfs2_control_message { struct ocfs2_control_message_down u_down; }; -static struct ocfs2_stack_plugin user_stack; +static struct ocfs2_stack_plugin ocfs2_user_plugin; static atomic_t ocfs2_control_opened; static int ocfs2_control_this_node = -1; @@ -399,7 +399,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file, char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = - &user_stack.sp_proto->lp_max_version; + &ocfs2_user_plugin.sp_proto->lp_max_version; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_PROTOCOL) @@ -680,7 +680,7 @@ static void fsdlm_lock_ast_wrapper(void *astarg) struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); int status = lksb->sb_status; - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); /* * For now we're punting on the issue of other non-standard errors @@ -693,16 +693,16 @@ static void fsdlm_lock_ast_wrapper(void *astarg) */ if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) - user_stack.sp_proto->lp_unlock_ast(astarg, 0); + ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); else - user_stack.sp_proto->lp_lock_ast(astarg); + ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); } static void fsdlm_blocking_ast_wrapper(void *astarg, int level) { - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); - user_stack.sp_proto->lp_blocking_ast(astarg, level); + ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); } static int user_dlm_lock(struct ocfs2_cluster_connection *conn, @@ -838,7 +838,7 @@ static int user_cluster_this_node(unsigned int *this_node) return 0; } -static struct ocfs2_stack_operations user_stack_ops = { +static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, @@ -849,20 +849,20 @@ static struct ocfs2_stack_operations user_stack_ops = { .dump_lksb = user_dlm_dump_lksb, }; -static struct ocfs2_stack_plugin user_stack = { +static struct ocfs2_stack_plugin ocfs2_user_plugin = { .sp_name = "user", - .sp_ops = &user_stack_ops, + .sp_ops = &ocfs2_user_plugin_ops, .sp_owner = THIS_MODULE, }; -static int __init user_stack_init(void) +static int __init ocfs2_user_plugin_init(void) { int rc; rc = ocfs2_control_init(); if (!rc) { - rc = ocfs2_stack_glue_register(&user_stack); + rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); if (rc) ocfs2_control_exit(); } @@ -870,14 +870,14 @@ static int __init user_stack_init(void) return rc; } -static void __exit user_stack_exit(void) +static void __exit ocfs2_user_plugin_exit(void) { - ocfs2_stack_glue_unregister(&user_stack); + ocfs2_stack_glue_unregister(&ocfs2_user_plugin); ocfs2_control_exit(); } MODULE_AUTHOR("Oracle"); MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); MODULE_LICENSE("GPL"); -module_init(user_stack_init); -module_exit(user_stack_exit); +module_init(ocfs2_user_plugin_init); +module_exit(ocfs2_user_plugin_exit); diff --git a/fs/proc/array.c b/fs/proc/array.c index 9e3b8c33c24b..797d775e0354 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_printf(m, "%s", header); CAP_FOR_EACH_U32(__capi) { seq_printf(m, "%08x", - a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); } seq_printf(m, "\n"); } diff --git a/fs/proc/base.c b/fs/proc/base.c index c447e0743a3c..3b455371e7ff 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -127,6 +127,25 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = &proc_##OTYPE } ) +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) +{ + unsigned int i; + unsigned int count; + + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; + } + + return count; +} + int maps_protect; EXPORT_SYMBOL(maps_protect); @@ -2585,10 +2604,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 5; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2816,10 +2834,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff)); dentry->d_op = &pid_dentry_operations; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6f4e8dc97da1..b08d10017911 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -425,7 +425,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, } } unlock_new_inode(inode); - } + } else + module_put(de->owner); return inode; out_ino: diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..7e277f2ad466 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_read(&vm_committed_space); + committed = atomic_long_read(&vm_committed_space); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; @@ -716,7 +716,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; @@ -726,7 +726,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, if (!ppage) pcount = 0; else - pcount = atomic_read(&ppage->_count); + pcount = page_mapcount(ppage); if (put_user(pcount, out++)) { ret = -EFAULT; @@ -782,7 +782,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 88717c0f941b..ab8ccc9d14ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -315,9 +315,9 @@ struct mem_size_stats { }; static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct mem_size_stats *mss = private; + struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; @@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } -static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; - static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss; int ret; + struct mm_walk smaps_walk = { + .pmd_entry = smaps_pte_range, + .mm = vma->vm_mm, + .private = &mss, + }; memset(&mss, 0, sizeof mss); mss.vma = vma; if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, - &smaps_walk, &mss); + walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ret = show_map(m, v); if (ret) @@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { }; static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, void *private) + unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = private; + struct vm_area_struct *vma = walk->private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { + static struct mm_walk clear_refs_walk; + memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); + clear_refs_walk.pmd_entry = clear_refs_pte_range; + clear_refs_walk.mm = mm; down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) + for (vma = mm->mmap; vma; vma = vma->vm_next) { + clear_refs_walk.private = vma; if (!is_vm_hugetlb_page(vma)) - walk_page_range(mm, vma->vm_start, vma->vm_end, - &clear_refs_walk, vma); + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); + } flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -496,7 +502,7 @@ const struct file_operations proc_clear_refs_operations = { }; struct pagemapread { - char __user *out, *end; + u64 __user *out, *end; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -519,28 +525,18 @@ struct pagemapread { static int add_to_pagemap(unsigned long addr, u64 pfn, struct pagemapread *pm) { - /* - * Make sure there's room in the buffer for an - * entire entry. Otherwise, only copy part of - * the pfn. - */ - if (pm->out + PM_ENTRY_BYTES >= pm->end) { - if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) - return -EFAULT; - pm->out = pm->end; - return PM_END_OF_BUFFER; - } - if (put_user(pfn, pm->out)) return -EFAULT; - pm->out += PM_ENTRY_BYTES; + pm->out++; + if (pm->out >= pm->end) + return PM_END_OF_BUFFER; return 0; } static int pagemap_pte_hole(unsigned long start, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; for (addr = start; addr < end; addr += PAGE_SIZE) { @@ -557,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } +static unsigned long pte_to_pagemap_entry(pte_t pte) +{ + unsigned long pme = 0; + if (is_swap_pte(pte)) + pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; + else if (pte_present(pte)) + pme = PM_PFRAME(pte_pfn(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return pme; +} + static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct vm_area_struct *vma; + struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; + /* find the first VMA at or above 'addr' */ + vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { u64 pfn = PM_NOT_PRESENT; - pte = pte_offset_map(pmd, addr); - if (is_swap_pte(*pte)) - pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; - else if (pte_present(*pte)) - pfn = PM_PFRAME(pte_pfn(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - /* unmap so we're not in atomic when we copy to userspace */ - pte_unmap(pte); + + /* check to see if we've left 'vma' behind + * and need a new, higher one */ + if (vma && (addr >= vma->vm_end)) + vma = find_vma(walk->mm, addr); + + /* check that 'vma' actually covers this address, + * and that it isn't a huge page vma */ + if (vma && (vma->vm_start <= addr) && + !is_vm_hugetlb_page(vma)) { + pte = pte_offset_map(pmd, addr); + pfn = pte_to_pagemap_entry(*pte); + /* unmap before userspace copy */ + pte_unmap(pte); + } err = add_to_pagemap(addr, pfn, pm); if (err) return err; @@ -634,7 +651,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = -EINVAL; /* file position must be aligned */ - if (*ppos % PM_ENTRY_BYTES) + if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_task; ret = 0; @@ -664,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_pages; } - pm.out = buf; - pm.end = buf + count; + pm.out = (u64 *)buf; + pm.end = (u64 *)(buf + count); if (!ptrace_may_attach(task)) { ret = -EIO; @@ -685,14 +702,14 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, * user buffer is tracked in "pm", and the walk * will stop when we hit the end of the buffer. */ - ret = walk_page_range(mm, start_vaddr, end_vaddr, - &pagemap_walk, &pm); + ret = walk_page_range(start_vaddr, end_vaddr, + &pagemap_walk); if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += pm.out - buf; + *ppos += (char *)pm.out - buf; if (!ret) - ret = pm.out - buf; + ret = (char *)pm.out - buf; } out_pages: diff --git a/fs/splice.c b/fs/splice.c index 78150038b584..aa5f6f60b305 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, GFP_KERNEL); + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag @@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ +out_unlock: unlock_page(page); return 1; } @@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, while (len) { size_t read_len; - loff_t pos = sd->pos; + loff_t pos = sd->pos, prev_pos = pos; ret = do_splice_to(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) @@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); - if (unlikely(ret <= 0)) + if (unlikely(ret <= 0)) { + sd->pos = prev_pos; goto out_release; + } bytes += ret; len -= ret; sd->pos = pos; - if (ret < read_len) + if (ret < read_len) { + sd->pos = prev_pos + ret; goto out_release; + } } done: @@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5105015a75ad..98e0e86093b4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { + if (blocksize >= PAGE_CACHE_SIZE) { + if (flags & XBF_READ) + bp->b_flags |= _XBF_PAGE_LOCKED; + } else if (!PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } - unlock_page(page); bp->b_pages[i] = page; offset = 0; } + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -746,6 +755,7 @@ xfs_buf_associate_memory( bp->b_count_desired = len; bp->b_buffer_length = buflen; bp->b_flags |= XBF_MAPPED; + bp->b_flags &= ~_XBF_PAGE_LOCKED; return 0; } @@ -1093,8 +1103,10 @@ _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + bp->b_flags &= ~_XBF_PAGE_LOCKED; xfs_buf_ioend(bp, schedule); + } } STATIC void @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + + if (bp->b_flags & _XBF_PAGE_LOCKED) + unlock_page(page); } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == + (XBF_READ|_XBF_PAGE_LOCKED)) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 841d7883528d..f948ec7ba9a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -66,6 +66,25 @@ typedef enum { _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ + + /* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ + _XBF_PAGE_LOCKED = (1 << 22), } xfs_buf_flags_t; typedef enum { diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 65e78c13d4ae..5f60363b9343 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -184,19 +184,24 @@ xfs_file_release( return -xfs_release(XFS_I(inode)); } +/* + * We ignore the datasync flag here because a datasync is effectively + * identical to an fsync. That is, datasync implies that we need to write + * only the metadata needed to be able to access the data that is written + * if we crash after the call completes. Hence if we are writing beyond + * EOF we have to log the inode size change as well, which makes it a + * full fsync. If we don't write beyond EOF, the inode core will be + * clean in memory and so we don't need to log the inode, just like + * fsync. + */ STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { - int flags = FSYNC_WAIT; - - if (datasync) - flags |= FSYNC_DATA; xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode), flags, - (xfs_off_t)0, (xfs_off_t)-1); + return -xfs_fsync(XFS_I(dentry->d_inode)); } /* diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9d73cb5c0fc7..25eb2a9e8d9b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -230,14 +230,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ /* - * Flags to vop_fsync/reclaim. - */ -#define FSYNC_NOWAIT 0 /* asynchronous flush */ -#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ -#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ -#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ - -/* * Tracking vnode activity. */ #if defined(XFS_INODE_TRACE) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cf0bb9c1d621..e569bf5d6cf0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2974,6 +2974,7 @@ xfs_iflush_cluster( xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); unsigned long first_index, mask; + unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; @@ -2985,8 +2986,9 @@ xfs_iflush_cluster( ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); - ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); - ilist = kmem_alloc(ilist_size, KM_MAYFAIL); + inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); + ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; @@ -2995,8 +2997,7 @@ xfs_iflush_cluster( read_lock(&pag->pag_ici_lock); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, - first_index, - XFS_INODE_CLUSTER_SIZE(mp)); + first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -856,18 +856,14 @@ xfs_readlink( /* * xfs_fsync * - * This is called to sync the inode and its data out to disk. - * We need to hold the I/O lock while flushing the data, and - * the inode lock while flushing the inode. The inode lock CANNOT - * be held while flushing the data, so acquire after we're done - * with that. + * This is called to sync the inode and its data out to disk. We need to hold + * the I/O lock while flushing the data, and the inode lock while flushing the + * inode. The inode lock CANNOT be held while flushing the data, so acquire + * after we're done with that. */ int xfs_fsync( - xfs_inode_t *ip, - int flag, - xfs_off_t start, - xfs_off_t stop) + xfs_inode_t *ip) { xfs_trans_t *tp; int error; @@ -875,103 +871,79 @@ xfs_fsync( xfs_itrace_entry(ip); - ASSERT(start >= 0 && stop >= -1); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - if (flag & FSYNC_DATA) - filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* capture size updates in I/O completion before writing the inode. */ + error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + if (error) + return XFS_ERROR(error); /* - * We always need to make sure that the required inode state - * is safe on disk. The vnode might be clean but because - * of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional - * changes to the inode core that have to go to disk. + * We always need to make sure that the required inode state is safe on + * disk. The vnode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. * - * The following code depends on one assumption: that - * any transaction that changes an inode logs the core - * because it has to change some field in the inode core - * (typically nextents or nblocks). That assumption - * implies that any transactions against an inode will - * catch any non-transactional updates. If inode-altering - * transactions exist that violate this assumption, the - * code breaks. Right now, it figures that if the involved - * update_* field is clear and the inode is unpinned, the - * inode is clean. Either it's been flushed or it's been - * committed and the commit has hit the disk unpinning the inode. - * (Note that xfs_inode_item_format() called at commit clears - * the update_* fields.) + * This code relies on the assumption that if the update_* fields + * of the inode are clear and the inode is unpinned then it is clean + * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - /* If we are flushing data then we care about update_size - * being set, otherwise we care about update_core - */ - if ((flag & FSYNC_DATA) ? - (ip->i_update_size == 0) : - (ip->i_update_core == 0)) { + if (!(ip->i_update_size || ip->i_update_core)) { /* - * Timestamps/size haven't changed since last inode - * flush or inode transaction commit. That means - * either nothing got written or a transaction - * committed which caught the updates. If the - * latter happened and the transaction hasn't - * hit the disk yet, the inode will be still - * be pinned. If it is, force the log. + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | - ((flag & FSYNC_WAIT) - ? XFS_LOG_SYNC : 0), + error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC, &log_flushed); } else { /* - * If the inode is not pinned and nothing - * has changed we don't need to flush the - * cache. + * If the inode is not pinned and nothing has changed + * we don't need to flush the cache. */ changed = 0; } - error = 0; } else { /* - * Kick off a transaction to log the inode - * core to get the updates. Make it - * sync if FSYNC_WAIT is passed in (which - * is done by everybody but specfs). The - * sync transaction will also force the log. + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); - if ((error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(ip->i_mount), - 0, 0, 0))) { + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); + if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* - * Note - it's possible that we might have pushed - * ourselves out of the way during trans_reserve - * which would flush the inode. But there's no - * guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer - * could be pinned anyway if it's part of an - * inode in another recent transaction. So we - * play it safe and fire off the transaction anyway. + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (flag & FSYNC_WAIT) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 8abe8f186e20..57335ba4ce53 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, - xfs_off_t stop); +int xfs_fsync(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |