diff options
Diffstat (limited to 'fs')
77 files changed, 1749 insertions, 1192 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 3031e3233dd6..2a983d49d19c 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -45,7 +45,7 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) struct v9fs_dentry *dent; P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", - fid->fid, dentry->d_iname); + fid->fid, dentry->d_name.name); dent = dentry->d_fsdata; if (!dent) { @@ -79,7 +79,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) struct p9_fid *fid, *ret; P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", - dentry->d_iname, dentry, uid, any); + dentry->d_name.name, dentry, uid, any); dent = (struct v9fs_dentry *) dentry->d_fsdata; ret = NULL; if (dent) { diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 24eb01087b6d..332b5ff02fec 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -160,7 +160,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->flags |= V9FS_ACCESS_ANY; else { v9ses->flags |= V9FS_ACCESS_SINGLE; - v9ses->uid = simple_strtol(s, &e, 10); + v9ses->uid = simple_strtoul(s, &e, 10); if (*e != '\0') v9ses->uid = ~0; } diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f9534f18df0a..06dcc7c4f234 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -52,7 +52,8 @@ static int v9fs_dentry_delete(struct dentry *dentry) { - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, + dentry); return 1; } @@ -69,7 +70,8 @@ static int v9fs_dentry_delete(struct dentry *dentry) static int v9fs_cached_dentry_delete(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, + dentry); if(!inode) return 1; @@ -88,7 +90,8 @@ void v9fs_dentry_release(struct dentry *dentry) struct v9fs_dentry *dent; struct p9_fid *temp, *current_fid; - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, + dentry); dent = dentry->d_fsdata; if (dent) { list_for_each_entry_safe(current_fid, temp, &dent->fidlist, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8314d3f43b71..2dfcf5487efe 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -963,7 +963,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, if (buflen > PATH_MAX) buflen = PATH_MAX; - P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, + dentry); retval = v9fs_readlink(dentry, link, buflen); @@ -1022,7 +1023,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); - P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); + P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, + IS_ERR(s) ? "<error>" : s); if (!IS_ERR(s)) __putname(s); } diff --git a/fs/block_dev.c b/fs/block_dev.c index db831efbdbbd..99e0ae1a4c78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1135,12 +1135,15 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (res) return res; - if (!(filp->f_mode & FMODE_EXCL)) - return 0; + if (filp->f_mode & FMODE_EXCL) { + res = bd_claim(bdev, filp); + if (res) + goto out_blkdev_put; + } - if (!(res = bd_claim(bdev, filp))) - return 0; + return 0; + out_blkdev_put: blkdev_put(bdev, filp->f_mode); return res; } @@ -1203,8 +1206,16 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + return blkdev_ioctl(bdev, mode, cmd, arg); } diff --git a/fs/buffer.c b/fs/buffer.c index 6569fda5cfed..10179cfa1152 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -878,6 +878,7 @@ void invalidate_inode_buffers(struct inode *inode) spin_unlock(&buffer_mapping->private_lock); } } +EXPORT_SYMBOL(invalidate_inode_buffers); /* * Remove any clean buffers from the inode's buffer list. This is called diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8855331b2fba..e078b7aea143 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -8,7 +8,11 @@ handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp sends, and also let tcp autotune the socket send and receive buffers. This reduces the number of EAGAIN errors returned by TCP/IP in high stress workloads (and the number of retries on socket writes -when sending large SMBWriteX requests). +when sending large SMBWriteX requests). Fix case in which a portion of +data can in some cases not get written to the file on the server before the +file is closed. Fix DFS parsing to properly handle path consumed field, +and to handle certain codepage conversions better. Fix mount and +umount race that can cause oops in mount or umount or reconnect. Version 1.54 ------------ diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 69a12aae91d3..490e34bbf27a 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -107,12 +107,13 @@ void cifs_dump_mids(struct TCP_Server_Info *server) #ifdef CONFIG_PROC_FS static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { - struct list_head *tmp; - struct list_head *tmp1; + struct list_head *tmp1, *tmp2, *tmp3; struct mid_q_entry *mid_entry; + struct TCP_Server_Info *server; struct cifsSesInfo *ses; struct cifsTconInfo *tcon; - int i; + int i, j; + __u32 dev_type; seq_puts(m, "Display Internal CIFS Data Structures for Debugging\n" @@ -122,46 +123,78 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "Servers:"); i = 0; - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalSMBSessionList) { + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp1, &cifs_tcp_ses_list) { + server = list_entry(tmp1, struct TCP_Server_Info, + tcp_ses_list); i++; - ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || - (ses->serverNOS == NULL)) { - seq_printf(m, "\nentry for %s not fully " - "displayed\n\t", ses->serverName); - } else { - seq_printf(m, - "\n%d) Name: %s Domain: %s Mounts: %d OS:" - " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" + list_for_each(tmp2, &server->smb_ses_list) { + ses = list_entry(tmp2, struct cifsSesInfo, + smb_ses_list); + if ((ses->serverDomain == NULL) || + (ses->serverOS == NULL) || + (ses->serverNOS == NULL)) { + seq_printf(m, "\n%d) entry for %s not fully " + "displayed\n\t", i, ses->serverName); + } else { + seq_printf(m, + "\n%d) Name: %s Domain: %s Uses: %d OS:" + " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB" " session status: %d\t", i, ses->serverName, ses->serverDomain, - atomic_read(&ses->inUse), - ses->serverOS, ses->serverNOS, + ses->ses_count, ses->serverOS, ses->serverNOS, ses->capabilities, ses->status); - } - if (ses->server) { + } seq_printf(m, "TCP status: %d\n\tLocal Users To " - "Server: %d SecMode: 0x%x Req On Wire: %d", - ses->server->tcpStatus, - atomic_read(&ses->server->socketUseCount), - ses->server->secMode, - atomic_read(&ses->server->inFlight)); + "Server: %d SecMode: 0x%x Req On Wire: %d", + server->tcpStatus, server->srv_count, + server->secMode, + atomic_read(&server->inFlight)); #ifdef CONFIG_CIFS_STATS2 seq_printf(m, " In Send: %d In MaxReq Wait: %d", - atomic_read(&ses->server->inSend), - atomic_read(&ses->server->num_waiters)); + atomic_read(&server->inSend), + atomic_read(&server->num_waiters)); #endif - seq_puts(m, "\nMIDs:\n"); + seq_puts(m, "\n\tShares:"); + j = 0; + list_for_each(tmp3, &ses->tcon_list) { + tcon = list_entry(tmp3, struct cifsTconInfo, + tcon_list); + ++j; + dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); + seq_printf(m, "\n\t%d) %s Mounts: %d ", j, + tcon->treeName, tcon->tc_count); + if (tcon->nativeFileSystem) { + seq_printf(m, "Type: %s ", + tcon->nativeFileSystem); + } + seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" + "\nPathComponentMax: %d Status: 0x%d", + le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), + le32_to_cpu(tcon->fsAttrInfo.Attributes), + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), + tcon->tidStatus); + if (dev_type == FILE_DEVICE_DISK) + seq_puts(m, " type: DISK "); + else if (dev_type == FILE_DEVICE_CD_ROM) + seq_puts(m, " type: CDROM "); + else + seq_printf(m, " type: %d ", dev_type); + + if (tcon->need_reconnect) + seq_puts(m, "\tDISCONNECTED "); + seq_putc(m, '\n'); + } + + seq_puts(m, "\n\tMIDs:\n"); spin_lock(&GlobalMid_Lock); - list_for_each(tmp1, &ses->server->pending_mid_q) { - mid_entry = list_entry(tmp1, struct - mid_q_entry, + list_for_each(tmp3, &server->pending_mid_q) { + mid_entry = list_entry(tmp3, struct mid_q_entry, qhead); - seq_printf(m, "State: %d com: %d pid:" + seq_printf(m, "\tState: %d com: %d pid:" " %d tsk: %p mid %d\n", mid_entry->midState, (int)mid_entry->command, @@ -171,44 +204,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) } spin_unlock(&GlobalMid_Lock); } - - } - read_unlock(&GlobalSMBSeslock); - seq_putc(m, '\n'); - - seq_puts(m, "Shares:"); - - i = 0; - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalTreeConnectionList) { - __u32 dev_type; - i++; - tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); - seq_printf(m, "\n%d) %s Uses: %d ", i, - tcon->treeName, atomic_read(&tcon->useCount)); - if (tcon->nativeFileSystem) { - seq_printf(m, "Type: %s ", - tcon->nativeFileSystem); - } - seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" - "\nPathComponentMax: %d Status: %d", - le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), - le32_to_cpu(tcon->fsAttrInfo.Attributes), - le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), - tcon->tidStatus); - if (dev_type == FILE_DEVICE_DISK) - seq_puts(m, " type: DISK "); - else if (dev_type == FILE_DEVICE_CD_ROM) - seq_puts(m, " type: CDROM "); - else - seq_printf(m, " type: %d ", dev_type); - - if (tcon->tidStatus == CifsNeedReconnect) - seq_puts(m, "\tDISCONNECTED "); } - read_unlock(&GlobalSMBSeslock); - + read_unlock(&cifs_tcp_ses_lock); seq_putc(m, '\n'); /* BB add code to dump additional info such as TCP session info now */ @@ -234,7 +231,9 @@ static ssize_t cifs_stats_proc_write(struct file *file, { char c; int rc; - struct list_head *tmp; + struct list_head *tmp1, *tmp2, *tmp3; + struct TCP_Server_Info *server; + struct cifsSesInfo *ses; struct cifsTconInfo *tcon; rc = get_user(c, buffer); @@ -242,33 +241,42 @@ static ssize_t cifs_stats_proc_write(struct file *file, return rc; if (c == '1' || c == 'y' || c == 'Y' || c == '0') { - read_lock(&GlobalSMBSeslock); #ifdef CONFIG_CIFS_STATS2 atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ - list_for_each(tmp, &GlobalTreeConnectionList) { - tcon = list_entry(tmp, struct cifsTconInfo, - cifsConnectionList); - atomic_set(&tcon->num_smbs_sent, 0); - atomic_set(&tcon->num_writes, 0); - atomic_set(&tcon->num_reads, 0); - atomic_set(&tcon->num_oplock_brks, 0); - atomic_set(&tcon->num_opens, 0); - atomic_set(&tcon->num_closes, 0); - atomic_set(&tcon->num_deletes, 0); - atomic_set(&tcon->num_mkdirs, 0); - atomic_set(&tcon->num_rmdirs, 0); - atomic_set(&tcon->num_renames, 0); - atomic_set(&tcon->num_t2renames, 0); - atomic_set(&tcon->num_ffirst, 0); - atomic_set(&tcon->num_fnext, 0); - atomic_set(&tcon->num_fclose, 0); - atomic_set(&tcon->num_hardlinks, 0); - atomic_set(&tcon->num_symlinks, 0); - atomic_set(&tcon->num_locks, 0); + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp1, &cifs_tcp_ses_list) { + server = list_entry(tmp1, struct TCP_Server_Info, + tcp_ses_list); + list_for_each(tmp2, &server->smb_ses_list) { + ses = list_entry(tmp2, struct cifsSesInfo, + smb_ses_list); + list_for_each(tmp3, &ses->tcon_list) { + tcon = list_entry(tmp3, + struct cifsTconInfo, + tcon_list); + atomic_set(&tcon->num_smbs_sent, 0); + atomic_set(&tcon->num_writes, 0); + atomic_set(&tcon->num_reads, 0); + atomic_set(&tcon->num_oplock_brks, 0); + atomic_set(&tcon->num_opens, 0); + atomic_set(&tcon->num_closes, 0); + atomic_set(&tcon->num_deletes, 0); + atomic_set(&tcon->num_mkdirs, 0); + atomic_set(&tcon->num_rmdirs, 0); + atomic_set(&tcon->num_renames, 0); + atomic_set(&tcon->num_t2renames, 0); + atomic_set(&tcon->num_ffirst, 0); + atomic_set(&tcon->num_fnext, 0); + atomic_set(&tcon->num_fclose, 0); + atomic_set(&tcon->num_hardlinks, 0); + atomic_set(&tcon->num_symlinks, 0); + atomic_set(&tcon->num_locks, 0); + } + } } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); } return count; @@ -277,7 +285,9 @@ static ssize_t cifs_stats_proc_write(struct file *file, static int cifs_stats_proc_show(struct seq_file *m, void *v) { int i; - struct list_head *tmp; + struct list_head *tmp1, *tmp2, *tmp3; + struct TCP_Server_Info *server; + struct cifsSesInfo *ses; struct cifsTconInfo *tcon; seq_printf(m, @@ -306,44 +316,55 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) GlobalCurrentXid, GlobalMaxActiveXid); i = 0; - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalTreeConnectionList) { - i++; - tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - seq_printf(m, "\n%d) %s", i, tcon->treeName); - if (tcon->tidStatus == CifsNeedReconnect) - seq_puts(m, "\tDISCONNECTED "); - seq_printf(m, "\nSMBs: %d Oplock Breaks: %d", - atomic_read(&tcon->num_smbs_sent), - atomic_read(&tcon->num_oplock_brks)); - seq_printf(m, "\nReads: %d Bytes: %lld", - atomic_read(&tcon->num_reads), - (long long)(tcon->bytes_read)); - seq_printf(m, "\nWrites: %d Bytes: %lld", - atomic_read(&tcon->num_writes), - (long long)(tcon->bytes_written)); - seq_printf(m, - "\nLocks: %d HardLinks: %d Symlinks: %d", - atomic_read(&tcon->num_locks), - atomic_read(&tcon->num_hardlinks), - atomic_read(&tcon->num_symlinks)); - - seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d", - atomic_read(&tcon->num_opens), - atomic_read(&tcon->num_closes), - atomic_read(&tcon->num_deletes)); - seq_printf(m, "\nMkdirs: %d Rmdirs: %d", - atomic_read(&tcon->num_mkdirs), - atomic_read(&tcon->num_rmdirs)); - seq_printf(m, "\nRenames: %d T2 Renames %d", - atomic_read(&tcon->num_renames), - atomic_read(&tcon->num_t2renames)); - seq_printf(m, "\nFindFirst: %d FNext %d FClose %d", - atomic_read(&tcon->num_ffirst), - atomic_read(&tcon->num_fnext), - atomic_read(&tcon->num_fclose)); + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp1, &cifs_tcp_ses_list) { + server = list_entry(tmp1, struct TCP_Server_Info, + tcp_ses_list); + list_for_each(tmp2, &server->smb_ses_list) { + ses = list_entry(tmp2, struct cifsSesInfo, + smb_ses_list); + list_for_each(tmp3, &ses->tcon_list) { + tcon = list_entry(tmp3, + struct cifsTconInfo, + tcon_list); + i++; + seq_printf(m, "\n%d) %s", i, tcon->treeName); + if (tcon->need_reconnect) + seq_puts(m, "\tDISCONNECTED "); + seq_printf(m, "\nSMBs: %d Oplock Breaks: %d", + atomic_read(&tcon->num_smbs_sent), + atomic_read(&tcon->num_oplock_brks)); + seq_printf(m, "\nReads: %d Bytes: %lld", + atomic_read(&tcon->num_reads), + (long long)(tcon->bytes_read)); + seq_printf(m, "\nWrites: %d Bytes: %lld", + atomic_read(&tcon->num_writes), + (long long)(tcon->bytes_written)); + seq_printf(m, "\nLocks: %d HardLinks: %d " + "Symlinks: %d", + atomic_read(&tcon->num_locks), + atomic_read(&tcon->num_hardlinks), + atomic_read(&tcon->num_symlinks)); + seq_printf(m, "\nOpens: %d Closes: %d" + "Deletes: %d", + atomic_read(&tcon->num_opens), + atomic_read(&tcon->num_closes), + atomic_read(&tcon->num_deletes)); + seq_printf(m, "\nMkdirs: %d Rmdirs: %d", + atomic_read(&tcon->num_mkdirs), + atomic_read(&tcon->num_rmdirs)); + seq_printf(m, "\nRenames: %d T2 Renames %d", + atomic_read(&tcon->num_renames), + atomic_read(&tcon->num_t2renames)); + seq_printf(m, "\nFindFirst: %d FNext %d " + "FClose %d", + atomic_read(&tcon->num_ffirst), + atomic_read(&tcon->num_fnext), + atomic_read(&tcon->num_fclose)); + } + } } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); seq_putc(m, '\n'); return 0; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index d2c8eef84f3c..e1c18362ba46 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -106,7 +106,8 @@ static char *cifs_get_share_name(const char *node_name) /** * compose_mount_options - creates mount options for refferral * @sb_mountdata: parent/root DFS mount options (template) - * @ref_unc: refferral server UNC + * @dentry: point where we are going to mount + * @ref: server's referral * @devname: pointer for saving device name * * creates mount options for submount based on template options sb_mountdata @@ -116,7 +117,8 @@ static char *cifs_get_share_name(const char *node_name) * Caller is responcible for freeing retunrned value if it is not error. */ static char *compose_mount_options(const char *sb_mountdata, - const char *ref_unc, + struct dentry *dentry, + const struct dfs_info3_param *ref, char **devname) { int rc; @@ -126,11 +128,12 @@ static char *compose_mount_options(const char *sb_mountdata, char *srvIP = NULL; char sep = ','; int off, noff; + char *fullpath; if (sb_mountdata == NULL) return ERR_PTR(-EINVAL); - *devname = cifs_get_share_name(ref_unc); + *devname = cifs_get_share_name(ref->node_name); rc = dns_resolve_server_name_to_ip(*devname, &srvIP); if (rc != 0) { cERROR(1, ("%s: Failed to resolve server part of %s to IP", @@ -138,7 +141,12 @@ static char *compose_mount_options(const char *sb_mountdata, mountdata = ERR_PTR(rc); goto compose_mount_options_out; } - md_len = strlen(sb_mountdata) + strlen(srvIP) + strlen(ref_unc) + 3; + /* md_len = strlen(...) + 12 for 'sep+prefixpath=' + * assuming that we have 'unc=' and 'ip=' in + * the original sb_mountdata + */ + md_len = strlen(sb_mountdata) + strlen(srvIP) + + strlen(ref->node_name) + 12; mountdata = kzalloc(md_len+1, GFP_KERNEL); if (mountdata == NULL) { mountdata = ERR_PTR(-ENOMEM); @@ -152,41 +160,56 @@ static char *compose_mount_options(const char *sb_mountdata, strncpy(mountdata, sb_mountdata, 5); off += 5; } - while ((tkn_e = strchr(sb_mountdata+off, sep))) { - noff = (tkn_e - (sb_mountdata+off)) + 1; - if (strnicmp(sb_mountdata+off, "unc=", 4) == 0) { + + do { + tkn_e = strchr(sb_mountdata + off, sep); + if (tkn_e == NULL) + noff = strlen(sb_mountdata + off); + else + noff = tkn_e - (sb_mountdata + off) + 1; + + if (strnicmp(sb_mountdata + off, "unc=", 4) == 0) { off += noff; continue; } - if (strnicmp(sb_mountdata+off, "ip=", 3) == 0) { + if (strnicmp(sb_mountdata + off, "ip=", 3) == 0) { off += noff; continue; } - if (strnicmp(sb_mountdata+off, "prefixpath=", 3) == 0) { + if (strnicmp(sb_mountdata + off, "prefixpath=", 11) == 0) { off += noff; continue; } - strncat(mountdata, sb_mountdata+off, noff); + strncat(mountdata, sb_mountdata + off, noff); off += noff; - } - strcat(mountdata, sb_mountdata+off); + } while (tkn_e); + strcat(mountdata, sb_mountdata + off); mountdata[md_len] = '\0'; /* copy new IP and ref share name */ - strcat(mountdata, ",ip="); + if (mountdata[strlen(mountdata) - 1] != sep) + strncat(mountdata, &sep, 1); + strcat(mountdata, "ip="); strcat(mountdata, srvIP); - strcat(mountdata, ",unc="); + strncat(mountdata, &sep, 1); + strcat(mountdata, "unc="); strcat(mountdata, *devname); /* find & copy prefixpath */ - tkn_e = strchr(ref_unc+2, '\\'); - if (tkn_e) { - tkn_e = strchr(tkn_e+1, '\\'); - if (tkn_e) { - strcat(mountdata, ",prefixpath="); - strcat(mountdata, tkn_e+1); - } + tkn_e = strchr(ref->node_name + 2, '\\'); + if (tkn_e == NULL) /* invalid unc, missing share name*/ + goto compose_mount_options_out; + + fullpath = build_path_from_dentry(dentry); + tkn_e = strchr(tkn_e + 1, '\\'); + if (tkn_e || strlen(fullpath) - (ref->path_consumed)) { + strncat(mountdata, &sep, 1); + strcat(mountdata, "prefixpath="); + if (tkn_e) + strcat(mountdata, tkn_e + 1); + strcat(mountdata, fullpath + (ref->path_consumed)); } + kfree(fullpath); /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/ @@ -198,7 +221,7 @@ compose_mount_options_out: static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, - struct dentry *dentry, char *ref_unc) + struct dentry *dentry, const struct dfs_info3_param *ref) { struct cifs_sb_info *cifs_sb; struct vfsmount *mnt; @@ -207,7 +230,7 @@ static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, cifs_sb = CIFS_SB(dentry->d_inode->i_sb); mountdata = compose_mount_options(cifs_sb->mountdata, - ref_unc, &devname); + dentry, ref, &devname); if (IS_ERR(mountdata)) return (struct vfsmount *)mountdata; @@ -310,7 +333,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) } mnt = cifs_dfs_do_refmount(nd->path.mnt, nd->path.dentry, - referrals[i].node_name); + referrals + i); cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, referrals[i].node_name, mnt)); diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index fcee9298b620..0ab2fb5afef1 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -73,8 +73,8 @@ struct key_type cifs_spnego_key_type = { * strlen(";sec=ntlmsspi") */ #define MAX_MECH_STR_LEN 13 -/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ -#define MAX_IPV6_ADDR_LEN 42 +/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */ +#define MAX_IPV6_ADDR_LEN 43 /* strlen of "host=" */ #define HOST_KEY_LEN 5 diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ac5915d61dca..d9cf467309e8 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -514,10 +514,11 @@ static void cifs_umount_begin(struct super_block *sb) tcon = cifs_sb->tcon; if (tcon == NULL) return; - down(&tcon->tconSem); - if (atomic_read(&tcon->useCount) == 1) + + read_lock(&cifs_tcp_ses_lock); + if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; - up(&tcon->tconSem); + read_unlock(&cifs_tcp_ses_lock); /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ @@ -1013,7 +1014,7 @@ static int cifs_oplock_thread(void *dummyarg) not bother sending an oplock release if session to server still is disconnected since oplock already released by the server in that case */ - if (pTcon->tidStatus != CifsNeedReconnect) { + if (!pTcon->need_reconnect) { rc = CIFSSMBLock(0, pTcon, netfid, 0 /* len */ , 0 /* offset */, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, @@ -1031,24 +1032,24 @@ static int cifs_oplock_thread(void *dummyarg) static int cifs_dnotify_thread(void *dummyarg) { struct list_head *tmp; - struct cifsSesInfo *ses; + struct TCP_Server_Info *server; do { if (try_to_freeze()) continue; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(15*HZ); - read_lock(&GlobalSMBSeslock); /* check if any stuck requests that need to be woken up and wakeq so the thread can wake up and error out */ - list_for_each(tmp, &GlobalSMBSessionList) { - ses = list_entry(tmp, struct cifsSesInfo, - cifsSessionList); - if (ses->server && atomic_read(&ses->server->inFlight)) - wake_up_all(&ses->server->response_q); + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, + tcp_ses_list); + if (atomic_read(&server->inFlight)) + wake_up_all(&server->response_q); } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); } while (!kthread_should_stop()); return 0; @@ -1059,9 +1060,7 @@ init_cifs(void) { int rc = 0; cifs_proc_init(); -/* INIT_LIST_HEAD(&GlobalServerList);*/ /* BB not implemented yet */ - INIT_LIST_HEAD(&GlobalSMBSessionList); - INIT_LIST_HEAD(&GlobalTreeConnectionList); + INIT_LIST_HEAD(&cifs_tcp_ses_list); INIT_LIST_HEAD(&GlobalOplock_Q); #ifdef CONFIG_CIFS_EXPERIMENTAL INIT_LIST_HEAD(&GlobalDnotifyReqList); @@ -1089,6 +1088,7 @@ init_cifs(void) GlobalMaxActiveXid = 0; memset(Local_System_Name, 0, 15); rwlock_init(&GlobalSMBSeslock); + rwlock_init(&cifs_tcp_ses_lock); spin_lock_init(&GlobalMid_Lock); if (cifs_max_pending < 2) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1cb1189f24e0..c57c0565547f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -85,8 +85,7 @@ enum securityEnum { }; enum protocolEnum { - IPV4 = 0, - IPV6, + TCP = 0, SCTP /* Netbios frames protocol not supported at this time */ }; @@ -122,6 +121,9 @@ struct cifs_cred { */ struct TCP_Server_Info { + struct list_head tcp_ses_list; + struct list_head smb_ses_list; + int srv_count; /* reference counter */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2]; @@ -143,7 +145,6 @@ struct TCP_Server_Info { bool svlocal:1; /* local server or remote */ bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ - atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 atomic_t inSend; /* requests trying to send */ @@ -194,13 +195,14 @@ struct cifsUidInfo { * Session structure. One of these for each uid session with a particular host */ struct cifsSesInfo { - struct list_head cifsSessionList; + struct list_head smb_ses_list; + struct list_head tcon_list; struct semaphore sesSem; #if 0 struct cifsUidInfo *uidInfo; /* pointer to user info */ #endif struct TCP_Server_Info *server; /* pointer to server info */ - atomic_t inUse; /* # of mounts (tree connections) on this ses */ + int ses_count; /* reference counter */ enum statusEnum status; unsigned overrideSecFlg; /* if non-zero override global sec flags */ __u16 ipc_tid; /* special tid for connection to IPC share */ @@ -216,6 +218,7 @@ struct cifsSesInfo { char userName[MAX_USERNAME_SIZE + 1]; char *domainName; char *password; + bool need_reconnect:1; /* connection reset, uid now invalid */ }; /* no more than one of the following three session flags may be set */ #define CIFS_SES_NT4 1 @@ -230,16 +233,15 @@ struct cifsSesInfo { * session */ struct cifsTconInfo { - struct list_head cifsConnectionList; + struct list_head tcon_list; + int tc_count; struct list_head openFileList; - struct semaphore tconSem; struct cifsSesInfo *ses; /* pointer to session associated with */ char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ char *nativeFileSystem; __u16 tid; /* The 2 byte tree id */ __u16 Flags; /* optional support bits */ enum statusEnum tidStatus; - atomic_t useCount; /* how many explicit/implicit mounts to share */ #ifdef CONFIG_CIFS_STATS atomic_t num_smbs_sent; atomic_t num_writes; @@ -288,6 +290,7 @@ struct cifsTconInfo { bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ bool local_lease:1; /* check leases (only) on local system not remote */ + bool need_reconnect:1; /* connection reset, tid now invalid */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -588,22 +591,30 @@ require use of the stronger protocol */ #endif /* - * The list of servers that did not respond with NT LM 0.12. - * This list helps improve performance and eliminate the messages indicating - * that we had a communications error talking to the server in this list. + * the list of TCP_Server_Info structures, ie each of the sockets + * connecting our client to a distinct server (ip address), is + * chained together by cifs_tcp_ses_list. The list of all our SMB + * sessions (and from that the tree connections) can be found + * by iterating over cifs_tcp_ses_list */ -/* Feature not supported */ -/* GLOBAL_EXTERN struct servers_not_supported *NotSuppList; */ +GLOBAL_EXTERN struct list_head cifs_tcp_ses_list; /* - * The following is a hash table of all the users we know about. + * This lock protects the cifs_tcp_ses_list, the list of smb sessions per + * tcp session, and the list of tcon's per smb session. It also protects + * the reference counters for the server, smb session, and tcon. Finally, + * changes to the tcon->tidStatus should be done while holding this lock. */ -GLOBAL_EXTERN struct smbUidInfo *GlobalUidList[UID_HASH]; +GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; -/* GLOBAL_EXTERN struct list_head GlobalServerList; BB not implemented yet */ -GLOBAL_EXTERN struct list_head GlobalSMBSessionList; -GLOBAL_EXTERN struct list_head GlobalTreeConnectionList; -GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; /* protects list inserts on 3 above */ +/* + * This lock protects the cifs_file->llist and cifs_file->flist + * list operations, and updates to some flags (cifs_file->invalidHandle) + * It will be moved to either use the tcon->stat_lock or equivalent later. + * If cifs_tcp_ses_lock and the lock below are both needed to be held, then + * the cifs_tcp_ses_lock must be grabbed first and released last. + */ +GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; GLOBAL_EXTERN struct list_head GlobalOplock_Q; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d5eac48fc415..6d51696dc762 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -190,10 +190,10 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ down(&tcon->ses->sesSem); - if (tcon->ses->status == CifsNeedReconnect) + if (tcon->ses->need_reconnect) rc = cifs_setup_session(0, tcon->ses, nls_codepage); - if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { + if (!rc && (tcon->need_reconnect)) { mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); @@ -337,10 +337,10 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ down(&tcon->ses->sesSem); - if (tcon->ses->status == CifsNeedReconnect) + if (tcon->ses->need_reconnect) rc = cifs_setup_session(0, tcon->ses, nls_codepage); - if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { + if (!rc && (tcon->need_reconnect)) { mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); @@ -664,8 +664,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) rc = -EIO; goto neg_err_exit; } - - if (server->socketUseCount.counter > 1) { + read_lock(&cifs_tcp_ses_lock); + if (server->srv_count > 1) { + read_unlock(&cifs_tcp_ses_lock); if (memcmp(server->server_GUID, pSMBr->u.extended_response. GUID, 16) != 0) { @@ -674,9 +675,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) pSMBr->u.extended_response.GUID, 16); } - } else + } else { + read_unlock(&cifs_tcp_ses_lock); memcpy(server->server_GUID, pSMBr->u.extended_response.GUID, 16); + } if (count == 16) { server->secType = RawNTLMSSP; @@ -739,50 +742,31 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) int rc = 0; cFYI(1, ("In tree disconnect")); - /* - * If last user of the connection and - * connection alive - disconnect it - * If this is the last connection on the server session disconnect it - * (and inside session disconnect we should check if tcp socket needs - * to be freed and kernel thread woken up). - */ - if (tcon) - down(&tcon->tconSem); - else - return -EIO; - atomic_dec(&tcon->useCount); - if (atomic_read(&tcon->useCount) > 0) { - up(&tcon->tconSem); - return -EBUSY; - } + /* BB: do we need to check this? These should never be NULL. */ + if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) + return -EIO; - /* No need to return error on this operation if tid invalidated and - closed on server already e.g. due to tcp session crashing */ - if (tcon->tidStatus == CifsNeedReconnect) { - up(&tcon->tconSem); + /* + * No need to return error on this operation if tid invalidated and + * closed on server already e.g. due to tcp session crashing. Also, + * the tcon is no longer on the list, so no need to take lock before + * checking this. + */ + if (tcon->need_reconnect) return 0; - } - if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) { - up(&tcon->tconSem); - return -EIO; - } rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, (void **)&smb_buffer); - if (rc) { - up(&tcon->tconSem); + if (rc) return rc; - } rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0); if (rc) cFYI(1, ("Tree disconnect failed %d", rc)); - up(&tcon->tconSem); - /* No need to return error on this operation if tid invalidated and - closed on server already e.g. due to tcp session crashing */ + closed on server already e.g. due to tcp session crashing */ if (rc == -EAGAIN) rc = 0; @@ -796,43 +780,36 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) int rc = 0; cFYI(1, ("In SMBLogoff for session disconnect")); - if (ses) - down(&ses->sesSem); - else + + /* + * BB: do we need to check validity of ses and server? They should + * always be valid since we have an active reference. If not, that + * should probably be a BUG() + */ + if (!ses || !ses->server) return -EIO; - atomic_dec(&ses->inUse); - if (atomic_read(&ses->inUse) > 0) { - up(&ses->sesSem); - return -EBUSY; - } + down(&ses->sesSem); + if (ses->need_reconnect) + goto session_already_dead; /* no need to send SMBlogoff if uid + already closed due to reconnect */ rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); if (rc) { up(&ses->sesSem); return rc; } - if (ses->server) { - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = GetNextMid(ses->server); - if (ses->server->secMode & + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - } pSMB->hdr.Uid = ses->Suid; pSMB->AndXCommand = 0xFF; rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0); - if (ses->server) { - atomic_dec(&ses->server->socketUseCount); - if (atomic_read(&ses->server->socketUseCount) == 0) { - spin_lock(&GlobalMid_Lock); - ses->server->tcpStatus = CifsExiting; - spin_unlock(&GlobalMid_Lock); - rc = -ESHUTDOWN; - } - } +session_already_dead: up(&ses->sesSem); /* if session dead then we do not need to do ulogoff, @@ -3922,6 +3899,27 @@ GetInodeNumOut: return rc; } +/* computes length of UCS string converted to host codepage + * @src: UCS string + * @maxlen: length of the input string in UCS characters + * (not in bytes) + * + * return: size of input string in host codepage + */ +static int hostlen_fromUCS(const __le16 *src, const int maxlen, + const struct nls_table *nls_codepage) { + int i; + int hostlen = 0; + char to[4]; + int charlen; + for (i = 0; (i < maxlen) && src[i]; ++i) { + charlen = nls_codepage->uni2char(le16_to_cpu(src[i]), + to, NLS_MAX_CHARSET_SIZE); + hostlen += charlen > 0 ? charlen : 1; + } + return hostlen; +} + /* parses DFS refferal V3 structure * caller is responsible for freeing target_nodes * returns: @@ -3932,7 +3930,8 @@ static int parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, unsigned int *num_of_nodes, struct dfs_info3_param **target_nodes, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap, + const char *searchName) { int i, rc = 0; char *data_end; @@ -3983,7 +3982,18 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, struct dfs_info3_param *node = (*target_nodes)+i; node->flags = le16_to_cpu(pSMBr->DFSFlags); - node->path_consumed = le16_to_cpu(pSMBr->PathConsumed); + if (is_unicode) { + __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, + GFP_KERNEL); + cifsConvertToUCS((__le16 *) tmp, searchName, + PATH_MAX, nls_codepage, remap); + node->path_consumed = hostlen_fromUCS(tmp, + le16_to_cpu(pSMBr->PathConsumed)/2, + nls_codepage); + kfree(tmp); + } else + node->path_consumed = le16_to_cpu(pSMBr->PathConsumed); + node->server_type = le16_to_cpu(ref->ServerType); node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags); @@ -4116,7 +4126,8 @@ getDFSRetry: /* parse returned result into more usable form */ rc = parse_DFS_referrals(pSMBr, num_of_nodes, - target_nodes, nls_codepage); + target_nodes, nls_codepage, remap, + searchName); GetDFSRefExit: cifs_buf_release(pSMB); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e9f9248cb3fe..c7d341714586 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -124,7 +124,7 @@ static int cifs_reconnect(struct TCP_Server_Info *server) { int rc = 0; - struct list_head *tmp; + struct list_head *tmp, *tmp2; struct cifsSesInfo *ses; struct cifsTconInfo *tcon; struct mid_q_entry *mid_entry; @@ -144,23 +144,17 @@ cifs_reconnect(struct TCP_Server_Info *server) /* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they are not used until reconnected */ - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalSMBSessionList) { - ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses->server) { - if (ses->server == server) { - ses->status = CifsNeedReconnect; - ses->ipc_tid = 0; - } + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); + ses->need_reconnect = true; + ses->ipc_tid = 0; + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifsTconInfo, tcon_list); + tcon->need_reconnect = true; } - /* else tcp and smb sessions need reconnection */ - } - list_for_each(tmp, &GlobalTreeConnectionList) { - tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if ((tcon->ses) && (tcon->ses->server == server)) - tcon->tidStatus = CifsNeedReconnect; } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); /* do not want to be sending data on a socket we are freeing */ down(&server->tcpSem); if (server->ssocket) { @@ -193,7 +187,7 @@ cifs_reconnect(struct TCP_Server_Info *server) while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { try_to_freeze(); - if (server->protocolType == IPV6) { + if (server->addr.sockAddr6.sin6_family == AF_INET6) { rc = ipv6_connect(&server->addr.sockAddr6, &server->ssocket, server->noautotune); } else { @@ -417,9 +411,14 @@ incomplete_rcv: msleep(1); /* minimum sleep to prevent looping allowing socket to clear and app threads to set tcpStatus CifsNeedReconnect if server hung */ - if (pdu_length < 4) + if (pdu_length < 4) { + iov.iov_base = (4 - pdu_length) + + (char *)smb_buffer; + iov.iov_len = pdu_length; + smb_msg.msg_control = NULL; + smb_msg.msg_controllen = 0; goto incomplete_rcv; - else + } else continue; } else if (length <= 0) { if (server->tcpStatus == CifsNew) { @@ -654,6 +653,11 @@ multi_t2_fnd: } } /* end while !EXITING */ + /* take it off the list, if it's not already */ + write_lock(&cifs_tcp_ses_lock); + list_del_init(&server->tcp_ses_list); + write_unlock(&cifs_tcp_ses_lock); + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); @@ -686,29 +690,29 @@ multi_t2_fnd: if (smallbuf) /* no sense logging a debug message if NULL */ cifs_small_buf_release(smallbuf); - read_lock(&GlobalSMBSeslock); + /* + * BB: we shouldn't have to do any of this. It shouldn't be + * possible to exit from the thread with active SMB sessions + */ + read_lock(&cifs_tcp_ses_lock); if (list_empty(&server->pending_mid_q)) { /* loop through server session structures attached to this and mark them dead */ - list_for_each(tmp, &GlobalSMBSessionList) { - ses = - list_entry(tmp, struct cifsSesInfo, - cifsSessionList); - if (ses->server == server) { - ses->status = CifsExiting; - ses->server = NULL; - } + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifsSesInfo, + smb_ses_list); + ses->status = CifsExiting; + ses->server = NULL; } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); } else { /* although we can not zero the server struct pointer yet, since there are active requests which may depnd on them, mark the corresponding SMB sessions as exiting too */ - list_for_each(tmp, &GlobalSMBSessionList) { + list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifsSesInfo, - cifsSessionList); - if (ses->server == server) - ses->status = CifsExiting; + smb_ses_list); + ses->status = CifsExiting; } spin_lock(&GlobalMid_Lock); @@ -723,7 +727,7 @@ multi_t2_fnd: } } spin_unlock(&GlobalMid_Lock); - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); /* 1/8th of sec is more than enough time for them to exit */ msleep(125); } @@ -745,14 +749,13 @@ multi_t2_fnd: if there are any pointing to this (e.g if a crazy root user tried to kill cifsd kernel thread explicitly this might happen) */ - write_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalSMBSessionList) { - ses = list_entry(tmp, struct cifsSesInfo, - cifsSessionList); - if (ses->server == server) - ses->server = NULL; + /* BB: This shouldn't be necessary, see above */ + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); + ses->server = NULL; } - write_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); kfree(server->hostname); task_to_wake = xchg(&server->tsk, NULL); @@ -1352,94 +1355,158 @@ cifs_parse_mount_options(char *options, const char *devname, return 0; } -static struct cifsSesInfo * -cifs_find_tcp_session(struct in_addr *target_ip_addr, - struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) +static struct TCP_Server_Info * +cifs_find_tcp_session(struct sockaddr *addr) { struct list_head *tmp; - struct cifsSesInfo *ses; - - *psrvTcp = NULL; + struct TCP_Server_Info *server; + struct sockaddr_in *addr4 = (struct sockaddr_in *) addr; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr; + + write_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, + tcp_ses_list); + /* + * the demux thread can exit on its own while still in CifsNew + * so don't accept any sockets in that state. Since the + * tcpStatus never changes back to CifsNew it's safe to check + * for this without a lock. + */ + if (server->tcpStatus == CifsNew) + continue; - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalSMBSessionList) { - ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (!ses->server) + if (addr->sa_family == AF_INET && + (addr4->sin_addr.s_addr != + server->addr.sockAddr.sin_addr.s_addr)) + continue; + else if (addr->sa_family == AF_INET6 && + memcmp(&server->addr.sockAddr6.sin6_addr, + &addr6->sin6_addr, sizeof(addr6->sin6_addr))) continue; - if (target_ip_addr && - ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr) - continue; - else if (target_ip6_addr && - memcmp(&ses->server->addr.sockAddr6.sin6_addr, - target_ip6_addr, sizeof(*target_ip6_addr))) - continue; - /* BB lock server and tcp session; increment use count here?? */ + ++server->srv_count; + write_unlock(&cifs_tcp_ses_lock); + cFYI(1, ("Existing tcp session with server found")); + return server; + } + write_unlock(&cifs_tcp_ses_lock); + return NULL; +} - /* found a match on the TCP session */ - *psrvTcp = ses->server; +static void +cifs_put_tcp_session(struct TCP_Server_Info *server) +{ + struct task_struct *task; - /* BB check if reconnection needed */ - if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) { - read_unlock(&GlobalSMBSeslock); - /* Found exact match on both TCP and - SMB sessions */ - return ses; - } - /* else tcp and smb sessions need reconnection */ + write_lock(&cifs_tcp_ses_lock); + if (--server->srv_count > 0) { + write_unlock(&cifs_tcp_ses_lock); + return; } - read_unlock(&GlobalSMBSeslock); - return NULL; + list_del_init(&server->tcp_ses_list); + write_unlock(&cifs_tcp_ses_lock); + + spin_lock(&GlobalMid_Lock); + server->tcpStatus = CifsExiting; + spin_unlock(&GlobalMid_Lock); + + task = xchg(&server->tsk, NULL); + if (task) + force_sig(SIGKILL, task); } -static struct cifsTconInfo * -find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) +static struct cifsSesInfo * +cifs_find_smb_ses(struct TCP_Server_Info *server, char *username) { struct list_head *tmp; - struct cifsTconInfo *tcon; - __be32 old_ip; - - read_lock(&GlobalSMBSeslock); + struct cifsSesInfo *ses; - list_for_each(tmp, &GlobalTreeConnectionList) { - cFYI(1, ("Next tcon")); - tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if (!tcon->ses || !tcon->ses->server) + write_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); + if (strncmp(ses->userName, username, MAX_USERNAME_SIZE)) continue; - old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr; - cFYI(1, ("old ip addr: %x == new ip %x ?", - old_ip, new_target_ip_addr)); + ++ses->ses_count; + write_unlock(&cifs_tcp_ses_lock); + return ses; + } + write_unlock(&cifs_tcp_ses_lock); + return NULL; +} - if (old_ip != new_target_ip_addr) - continue; +static void +cifs_put_smb_ses(struct cifsSesInfo *ses) +{ + int xid; + struct TCP_Server_Info *server = ses->server; - /* BB lock tcon, server, tcp session and increment use count? */ - /* found a match on the TCP session */ - /* BB check if reconnection needed */ - cFYI(1, ("IP match, old UNC: %s new: %s", - tcon->treeName, uncName)); + write_lock(&cifs_tcp_ses_lock); + if (--ses->ses_count > 0) { + write_unlock(&cifs_tcp_ses_lock); + return; + } - if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE)) - continue; + list_del_init(&ses->smb_ses_list); + write_unlock(&cifs_tcp_ses_lock); - cFYI(1, ("and old usr: %s new: %s", - tcon->treeName, uncName)); + if (ses->status == CifsGood) { + xid = GetXid(); + CIFSSMBLogoff(xid, ses); + _FreeXid(xid); + } + sesInfoFree(ses); + cifs_put_tcp_session(server); +} - if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE)) +static struct cifsTconInfo * +cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) +{ + struct list_head *tmp; + struct cifsTconInfo *tcon; + + write_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &ses->tcon_list) { + tcon = list_entry(tmp, struct cifsTconInfo, tcon_list); + if (tcon->tidStatus == CifsExiting) + continue; + if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE)) continue; - /* matched smb session (user name) */ - read_unlock(&GlobalSMBSeslock); + ++tcon->tc_count; + write_unlock(&cifs_tcp_ses_lock); return tcon; } - - read_unlock(&GlobalSMBSeslock); + write_unlock(&cifs_tcp_ses_lock); return NULL; } +static void +cifs_put_tcon(struct cifsTconInfo *tcon) +{ + int xid; + struct cifsSesInfo *ses = tcon->ses; + + write_lock(&cifs_tcp_ses_lock); + if (--tcon->tc_count > 0) { + write_unlock(&cifs_tcp_ses_lock); + return; + } + + list_del_init(&tcon->tcon_list); + write_unlock(&cifs_tcp_ses_lock); + + xid = GetXid(); + CIFSSMBTDis(xid, tcon); + _FreeXid(xid); + + DeleteTconOplockQEntries(tcon); + tconInfoFree(tcon); + cifs_put_smb_ses(ses); +} + int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, const struct nls_table *nls_codepage, unsigned int *pnum_referrals, @@ -1876,14 +1943,90 @@ convert_delimiter(char *path, char delim) } } -static void -kill_cifsd(struct TCP_Server_Info *server) +static void setup_cifs_sb(struct smb_vol *pvolume_info, + struct cifs_sb_info *cifs_sb) { - struct task_struct *task; - - task = xchg(&server->tsk, NULL); - if (task) - force_sig(SIGKILL, task); + if (pvolume_info->rsize > CIFSMaxBufSize) { + cERROR(1, ("rsize %d too large, using MaxBufSize", + pvolume_info->rsize)); + cifs_sb->rsize = CIFSMaxBufSize; + } else if ((pvolume_info->rsize) && + (pvolume_info->rsize <= CIFSMaxBufSize)) + cifs_sb->rsize = pvolume_info->rsize; + else /* default */ + cifs_sb->rsize = CIFSMaxBufSize; + + if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { + cERROR(1, ("wsize %d too large, using 4096 instead", + pvolume_info->wsize)); + cifs_sb->wsize = 4096; + } else if (pvolume_info->wsize) + cifs_sb->wsize = pvolume_info->wsize; + else + cifs_sb->wsize = min_t(const int, + PAGEVEC_SIZE * PAGE_CACHE_SIZE, + 127*1024); + /* old default of CIFSMaxBufSize was too small now + that SMB Write2 can send multiple pages in kvec. + RFC1001 does not describe what happens when frame + bigger than 128K is sent so use that as max in + conjunction with 52K kvec constraint on arch with 4K + page size */ + + if (cifs_sb->rsize < 2048) { + cifs_sb->rsize = 2048; + /* Windows ME may prefer this */ + cFYI(1, ("readsize set to minimum: 2048")); + } + /* calculate prepath */ + cifs_sb->prepath = pvolume_info->prepath; + if (cifs_sb->prepath) { + cifs_sb->prepathlen = strlen(cifs_sb->prepath); + /* we can not convert the / to \ in the path + separators in the prefixpath yet because we do not + know (until reset_cifs_unix_caps is called later) + whether POSIX PATH CAP is available. We normalize + the / to \ after reset_cifs_unix_caps is called */ + pvolume_info->prepath = NULL; + } else + cifs_sb->prepathlen = 0; + cifs_sb->mnt_uid = pvolume_info->linux_uid; + cifs_sb->mnt_gid = pvolume_info->linux_gid; + cifs_sb->mnt_file_mode = pvolume_info->file_mode; + cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; + cFYI(1, ("file mode: 0x%x dir mode: 0x%x", + cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode)); + + if (pvolume_info->noperm) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; + if (pvolume_info->setuids) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; + if (pvolume_info->server_ino) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; + if (pvolume_info->remap) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; + if (pvolume_info->no_xattr) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; + if (pvolume_info->sfu_emul) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; + if (pvolume_info->nobrl) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; + if (pvolume_info->cifs_acl) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; + if (pvolume_info->override_uid) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; + if (pvolume_info->override_gid) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; + if (pvolume_info->dynperm) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; + if (pvolume_info->direct_io) { + cFYI(1, ("mounting share using direct i/o")); + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; + } + + if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) + cERROR(1, ("mount option dynperm ignored if cifsacl " + "mount option supported")); } int @@ -1892,13 +2035,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, { int rc = 0; int xid; - int address_type = AF_INET; struct socket *csocket = NULL; - struct sockaddr_in sin_server; - struct sockaddr_in6 sin_server6; + struct sockaddr addr; + struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr; + struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr; struct smb_vol volume_info; struct cifsSesInfo *pSesInfo = NULL; - struct cifsSesInfo *existingCifsSes = NULL; struct cifsTconInfo *tcon = NULL; struct TCP_Server_Info *srvTcp = NULL; @@ -1906,6 +2048,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */ + memset(&addr, 0, sizeof(struct sockaddr)); memset(&volume_info, 0, sizeof(struct smb_vol)); if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { rc = -EINVAL; @@ -1928,16 +2071,16 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (volume_info.UNCip && volume_info.UNC) { rc = cifs_inet_pton(AF_INET, volume_info.UNCip, - &sin_server.sin_addr.s_addr); + &sin_server->sin_addr.s_addr); if (rc <= 0) { /* not ipv4 address, try ipv6 */ rc = cifs_inet_pton(AF_INET6, volume_info.UNCip, - &sin_server6.sin6_addr.in6_u); + &sin_server6->sin6_addr.in6_u); if (rc > 0) - address_type = AF_INET6; + addr.sa_family = AF_INET6; } else { - address_type = AF_INET; + addr.sa_family = AF_INET; } if (rc <= 0) { @@ -1977,41 +2120,25 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } } - if (address_type == AF_INET) - existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, - NULL /* no ipv6 addr */, - volume_info.username, &srvTcp); - else if (address_type == AF_INET6) { - cFYI(1, ("looking for ipv6 address")); - existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, - &sin_server6.sin6_addr, - volume_info.username, &srvTcp); - } else { - rc = -EINVAL; - goto out; - } - - if (srvTcp) { - cFYI(1, ("Existing tcp session with server found")); - } else { /* create socket */ - if (volume_info.port) - sin_server.sin_port = htons(volume_info.port); - else - sin_server.sin_port = 0; - if (address_type == AF_INET6) { + srvTcp = cifs_find_tcp_session(&addr); + if (!srvTcp) { /* create socket */ + if (addr.sa_family == AF_INET6) { cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket, + sin_server6->sin6_port = htons(volume_info.port); + rc = ipv6_connect(sin_server6, &csocket, volume_info.noblocksnd); - } else - rc = ipv4_connect(&sin_server, &csocket, + } else { + sin_server->sin_port = htons(volume_info.port); + rc = ipv4_connect(sin_server, &csocket, volume_info.source_rfc1001_name, volume_info.target_rfc1001_name, volume_info.noblocksnd, volume_info.noautotune); + } if (rc < 0) { - cERROR(1, ("Error connecting to IPv4 socket. " + cERROR(1, ("Error connecting to socket. " "Aborting operation")); if (csocket != NULL) sock_release(csocket); @@ -2026,12 +2153,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } else { srvTcp->noblocksnd = volume_info.noblocksnd; srvTcp->noautotune = volume_info.noautotune; - memcpy(&srvTcp->addr.sockAddr, &sin_server, - sizeof(struct sockaddr_in)); + if (addr.sa_family == AF_INET6) + memcpy(&srvTcp->addr.sockAddr6, sin_server6, + sizeof(struct sockaddr_in6)); + else + memcpy(&srvTcp->addr.sockAddr, sin_server, + sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; - srvTcp->protocolType = IPV4; srvTcp->hostname = extract_hostname(volume_info.UNC); if (IS_ERR(srvTcp->hostname)) { rc = PTR_ERR(srvTcp->hostname); @@ -2061,15 +2191,28 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, memcpy(srvTcp->server_RFC1001_name, volume_info.target_rfc1001_name, 16); srvTcp->sequence_number = 0; + INIT_LIST_HEAD(&srvTcp->tcp_ses_list); + INIT_LIST_HEAD(&srvTcp->smb_ses_list); + ++srvTcp->srv_count; + write_lock(&cifs_tcp_ses_lock); + list_add(&srvTcp->tcp_ses_list, + &cifs_tcp_ses_list); + write_unlock(&cifs_tcp_ses_lock); } } - if (existingCifsSes) { - pSesInfo = existingCifsSes; + pSesInfo = cifs_find_smb_ses(srvTcp, volume_info.username); + if (pSesInfo) { cFYI(1, ("Existing smb sess found (status=%d)", pSesInfo->status)); + /* + * The existing SMB session already has a reference to srvTcp, + * so we can put back the extra one we got before + */ + cifs_put_tcp_session(srvTcp); + down(&pSesInfo->sesSem); - if (pSesInfo->status == CifsNeedReconnect) { + if (pSesInfo->need_reconnect) { cFYI(1, ("Session needs reconnect")); rc = cifs_setup_session(xid, pSesInfo, cifs_sb->local_nls); @@ -2078,187 +2221,101 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } else if (!rc) { cFYI(1, ("Existing smb sess not found")); pSesInfo = sesInfoAlloc(); - if (pSesInfo == NULL) + if (pSesInfo == NULL) { rc = -ENOMEM; - else { - pSesInfo->server = srvTcp; - sprintf(pSesInfo->serverName, "%u.%u.%u.%u", - NIPQUAD(sin_server.sin_addr.s_addr)); - } + goto mount_fail_check; + } + + /* new SMB session uses our srvTcp ref */ + pSesInfo->server = srvTcp; + sprintf(pSesInfo->serverName, "%u.%u.%u.%u", + NIPQUAD(sin_server->sin_addr.s_addr)); + + write_lock(&cifs_tcp_ses_lock); + list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list); + write_unlock(&cifs_tcp_ses_lock); + + /* volume_info.password freed at unmount */ + if (volume_info.password) { + pSesInfo->password = volume_info.password; + /* set to NULL to prevent freeing on exit */ + volume_info.password = NULL; + } + if (volume_info.username) + strncpy(pSesInfo->userName, volume_info.username, + MAX_USERNAME_SIZE); + if (volume_info.domainname) { + int len = strlen(volume_info.domainname); + pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL); + if (pSesInfo->domainName) + strcpy(pSesInfo->domainName, + volume_info.domainname); + } + pSesInfo->linux_uid = volume_info.linux_uid; + pSesInfo->overrideSecFlg = volume_info.secFlg; + down(&pSesInfo->sesSem); - if (!rc) { - /* volume_info.password freed at unmount */ - if (volume_info.password) { - pSesInfo->password = volume_info.password; - /* set to NULL to prevent freeing on exit */ - volume_info.password = NULL; - } - if (volume_info.username) - strncpy(pSesInfo->userName, - volume_info.username, - MAX_USERNAME_SIZE); - if (volume_info.domainname) { - int len = strlen(volume_info.domainname); - pSesInfo->domainName = - kmalloc(len + 1, GFP_KERNEL); - if (pSesInfo->domainName) - strcpy(pSesInfo->domainName, - volume_info.domainname); - } - pSesInfo->linux_uid = volume_info.linux_uid; - pSesInfo->overrideSecFlg = volume_info.secFlg; - down(&pSesInfo->sesSem); - /* BB FIXME need to pass vol->secFlgs BB */ - rc = cifs_setup_session(xid, pSesInfo, - cifs_sb->local_nls); - up(&pSesInfo->sesSem); - if (!rc) - atomic_inc(&srvTcp->socketUseCount); - } + /* BB FIXME need to pass vol->secFlgs BB */ + rc = cifs_setup_session(xid, pSesInfo, + cifs_sb->local_nls); + up(&pSesInfo->sesSem); } /* search for existing tcon to this server share */ if (!rc) { - if (volume_info.rsize > CIFSMaxBufSize) { - cERROR(1, ("rsize %d too large, using MaxBufSize", - volume_info.rsize)); - cifs_sb->rsize = CIFSMaxBufSize; - } else if ((volume_info.rsize) && - (volume_info.rsize <= CIFSMaxBufSize)) - cifs_sb->rsize = volume_info.rsize; - else /* default */ - cifs_sb->rsize = CIFSMaxBufSize; - - if (volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { - cERROR(1, ("wsize %d too large, using 4096 instead", - volume_info.wsize)); - cifs_sb->wsize = 4096; - } else if (volume_info.wsize) - cifs_sb->wsize = volume_info.wsize; - else - cifs_sb->wsize = - min_t(const int, PAGEVEC_SIZE * PAGE_CACHE_SIZE, - 127*1024); - /* old default of CIFSMaxBufSize was too small now - that SMB Write2 can send multiple pages in kvec. - RFC1001 does not describe what happens when frame - bigger than 128K is sent so use that as max in - conjunction with 52K kvec constraint on arch with 4K - page size */ - - if (cifs_sb->rsize < 2048) { - cifs_sb->rsize = 2048; - /* Windows ME may prefer this */ - cFYI(1, ("readsize set to minimum: 2048")); - } - /* calculate prepath */ - cifs_sb->prepath = volume_info.prepath; - if (cifs_sb->prepath) { - cifs_sb->prepathlen = strlen(cifs_sb->prepath); - /* we can not convert the / to \ in the path - separators in the prefixpath yet because we do not - know (until reset_cifs_unix_caps is called later) - whether POSIX PATH CAP is available. We normalize - the / to \ after reset_cifs_unix_caps is called */ - volume_info.prepath = NULL; - } else - cifs_sb->prepathlen = 0; - cifs_sb->mnt_uid = volume_info.linux_uid; - cifs_sb->mnt_gid = volume_info.linux_gid; - cifs_sb->mnt_file_mode = volume_info.file_mode; - cifs_sb->mnt_dir_mode = volume_info.dir_mode; - cFYI(1, ("file mode: 0x%x dir mode: 0x%x", - cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode)); - - if (volume_info.noperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; - if (volume_info.setuids) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; - if (volume_info.server_ino) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; - if (volume_info.remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; - if (volume_info.no_xattr) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; - if (volume_info.sfu_emul) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; - if (volume_info.nobrl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; - if (volume_info.cifs_acl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; - if (volume_info.override_uid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; - if (volume_info.override_gid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; - if (volume_info.dynperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; - if (volume_info.direct_io) { - cFYI(1, ("mounting share using direct i/o")); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; - } + setup_cifs_sb(&volume_info, cifs_sb); - if ((volume_info.cifs_acl) && (volume_info.dynperm)) - cERROR(1, ("mount option dynperm ignored if cifsacl " - "mount option supported")); - - tcon = - find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, - volume_info.username); + tcon = cifs_find_tcon(pSesInfo, volume_info.UNC); if (tcon) { cFYI(1, ("Found match on UNC path")); - /* we can have only one retry value for a connection - to a share so for resources mounted more than once - to the same server share the last value passed in - for the retry flag is used */ - tcon->retry = volume_info.retry; - tcon->nocase = volume_info.nocase; - tcon->local_lease = volume_info.local_lease; + /* existing tcon already has a reference */ + cifs_put_smb_ses(pSesInfo); if (tcon->seal != volume_info.seal) cERROR(1, ("transport encryption setting " "conflicts with existing tid")); } else { tcon = tconInfoAlloc(); - if (tcon == NULL) + if (tcon == NULL) { rc = -ENOMEM; - else { - /* check for null share name ie connecting to - * dfs root */ - - /* BB check if this works for exactly length - * three strings */ - if ((strchr(volume_info.UNC + 3, '\\') == NULL) - && (strchr(volume_info.UNC + 3, '/') == - NULL)) { -/* rc = connect_to_dfs_path(xid, pSesInfo, - "", cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR);*/ - cFYI(1, ("DFS root not supported")); - rc = -ENODEV; - goto out; - } else { - /* BB Do we need to wrap sesSem around - * this TCon call and Unix SetFS as - * we do on SessSetup and reconnect? */ - rc = CIFSTCon(xid, pSesInfo, - volume_info.UNC, - tcon, cifs_sb->local_nls); - cFYI(1, ("CIFS Tcon rc = %d", rc)); - if (volume_info.nodfs) { - tcon->Flags &= - ~SMB_SHARE_IS_IN_DFS; - cFYI(1, ("DFS disabled (%d)", - tcon->Flags)); - } - } - if (!rc) { - atomic_inc(&pSesInfo->inUse); - tcon->retry = volume_info.retry; - tcon->nocase = volume_info.nocase; - tcon->seal = volume_info.seal; + goto mount_fail_check; + } + tcon->ses = pSesInfo; + + /* check for null share name ie connect to dfs root */ + if ((strchr(volume_info.UNC + 3, '\\') == NULL) + && (strchr(volume_info.UNC + 3, '/') == NULL)) { + /* rc = connect_to_dfs_path(...) */ + cFYI(1, ("DFS root not supported")); + rc = -ENODEV; + goto mount_fail_check; + } else { + /* BB Do we need to wrap sesSem around + * this TCon call and Unix SetFS as + * we do on SessSetup and reconnect? */ + rc = CIFSTCon(xid, pSesInfo, volume_info.UNC, + tcon, cifs_sb->local_nls); + cFYI(1, ("CIFS Tcon rc = %d", rc)); + if (volume_info.nodfs) { + tcon->Flags &= ~SMB_SHARE_IS_IN_DFS; + cFYI(1, ("DFS disabled (%d)", + tcon->Flags)); } } - } + if (rc) + goto mount_fail_check; + tcon->seal = volume_info.seal; + write_lock(&cifs_tcp_ses_lock); + list_add(&tcon->tcon_list, &pSesInfo->tcon_list); + write_unlock(&cifs_tcp_ses_lock); + } + + /* we can have only one retry value for a connection + to a share so for resources mounted more than once + to the same server share the last value passed in + for the retry flag is used */ + tcon->retry = volume_info.retry; + tcon->nocase = volume_info.nocase; + tcon->local_lease = volume_info.local_lease; } if (pSesInfo) { if (pSesInfo->capabilities & CAP_LARGE_FILES) { @@ -2270,80 +2327,49 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* BB FIXME fix time_gran to be larger for LANMAN sessions */ sb->s_time_gran = 100; -/* on error free sesinfo and tcon struct if needed */ +mount_fail_check: + /* on error free sesinfo and tcon struct if needed */ if (rc) { - /* if session setup failed, use count is zero but - we still need to free cifsd thread */ - if (atomic_read(&srvTcp->socketUseCount) == 0) { - spin_lock(&GlobalMid_Lock); - srvTcp->tcpStatus = CifsExiting; - spin_unlock(&GlobalMid_Lock); - kill_cifsd(srvTcp); - } - /* If find_unc succeeded then rc == 0 so we can not end */ - if (tcon) /* up accidently freeing someone elses tcon struct */ - tconInfoFree(tcon); - if (existingCifsSes == NULL) { - if (pSesInfo) { - if ((pSesInfo->server) && - (pSesInfo->status == CifsGood)) { - int temp_rc; - temp_rc = CIFSSMBLogoff(xid, pSesInfo); - /* if the socketUseCount is now zero */ - if ((temp_rc == -ESHUTDOWN) && - (pSesInfo->server)) - kill_cifsd(pSesInfo->server); - } else { - cFYI(1, ("No session or bad tcon")); - if (pSesInfo->server) { - spin_lock(&GlobalMid_Lock); - srvTcp->tcpStatus = CifsExiting; - spin_unlock(&GlobalMid_Lock); - kill_cifsd(pSesInfo->server); - } - } - sesInfoFree(pSesInfo); - /* pSesInfo = NULL; */ - } - } - } else { - atomic_inc(&tcon->useCount); - cifs_sb->tcon = tcon; - tcon->ses = pSesInfo; - - /* do not care if following two calls succeed - informational */ - if (!tcon->ipc) { - CIFSSMBQFSDeviceInfo(xid, tcon); - CIFSSMBQFSAttributeInfo(xid, tcon); - } - - /* tell server which Unix caps we support */ - if (tcon->ses->capabilities & CAP_UNIX) - /* reset of caps checks mount to see if unix extensions - disabled for just this mount */ - reset_cifs_unix_caps(xid, tcon, sb, &volume_info); + /* If find_unc succeeded then rc == 0 so we can not end */ + /* up accidently freeing someone elses tcon struct */ + if (tcon) + cifs_put_tcon(tcon); + else if (pSesInfo) + cifs_put_smb_ses(pSesInfo); else - tcon->unix_ext = 0; /* server does not support them */ + cifs_put_tcp_session(srvTcp); + goto out; + } + cifs_sb->tcon = tcon; - /* convert forward to back slashes in prepath here if needed */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) - convert_delimiter(cifs_sb->prepath, - CIFS_DIR_SEP(cifs_sb)); + /* do not care if following two calls succeed - informational */ + if (!tcon->ipc) { + CIFSSMBQFSDeviceInfo(xid, tcon); + CIFSSMBQFSAttributeInfo(xid, tcon); + } - if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { - cifs_sb->rsize = 1024 * 127; - cFYI(DBG2, - ("no very large read support, rsize now 127K")); - } - if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) - cifs_sb->wsize = min(cifs_sb->wsize, - (tcon->ses->server->maxBuf - - MAX_CIFS_HDR_SIZE)); - if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) - cifs_sb->rsize = min(cifs_sb->rsize, - (tcon->ses->server->maxBuf - - MAX_CIFS_HDR_SIZE)); + /* tell server which Unix caps we support */ + if (tcon->ses->capabilities & CAP_UNIX) + /* reset of caps checks mount to see if unix extensions + disabled for just this mount */ + reset_cifs_unix_caps(xid, tcon, sb, &volume_info); + else + tcon->unix_ext = 0; /* server does not support them */ + + /* convert forward to back slashes in prepath here if needed */ + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) + convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); + + if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { + cifs_sb->rsize = 1024 * 127; + cFYI(DBG2, ("no very large read support, rsize now 127K")); } + if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) + cifs_sb->wsize = min(cifs_sb->wsize, + (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); + if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) + cifs_sb->rsize = min(cifs_sb->rsize, + (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); /* volume_info.password is freed above when existing session found (in which case it is not needed anymore) but when new sesion is created @@ -3513,6 +3539,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, /* above now done in SendReceive */ if ((rc == 0) && (tcon != NULL)) { tcon->tidStatus = CifsGood; + tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); @@ -3584,48 +3611,17 @@ int cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) { int rc = 0; - int xid; - struct cifsSesInfo *ses = NULL; char *tmp; - xid = GetXid(); - - if (cifs_sb->tcon) { - ses = cifs_sb->tcon->ses; /* save ptr to ses before delete tcon!*/ - rc = CIFSSMBTDis(xid, cifs_sb->tcon); - if (rc == -EBUSY) { - FreeXid(xid); - return 0; - } - DeleteTconOplockQEntries(cifs_sb->tcon); - tconInfoFree(cifs_sb->tcon); - if ((ses) && (ses->server)) { - /* save off task so we do not refer to ses later */ - cFYI(1, ("About to do SMBLogoff ")); - rc = CIFSSMBLogoff(xid, ses); - if (rc == -EBUSY) { - FreeXid(xid); - return 0; - } else if (rc == -ESHUTDOWN) { - cFYI(1, ("Waking up socket by sending signal")); - if (ses->server) - kill_cifsd(ses->server); - rc = 0; - } /* else - we have an smb session - left on this socket do not kill cifsd */ - } else - cFYI(1, ("No session or bad tcon")); - } + if (cifs_sb->tcon) + cifs_put_tcon(cifs_sb->tcon); cifs_sb->tcon = NULL; tmp = cifs_sb->prepath; cifs_sb->prepathlen = 0; cifs_sb->prepath = NULL; kfree(tmp); - if (ses) - sesInfoFree(ses); - FreeXid(xid); return rc; } @@ -3741,6 +3737,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, cFYI(1, ("CIFS Session Established successfully")); spin_lock(&GlobalMid_Lock); pSesInfo->status = CifsGood; + pSesInfo->need_reconnect = false; spin_unlock(&GlobalMid_Lock); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ead1a3bb0256..f0a81e631ae6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -488,12 +488,13 @@ int cifs_close(struct inode *inode, struct file *file) pTcon = cifs_sb->tcon; if (pSMBFile) { struct cifsLockInfo *li, *tmp; - + write_lock(&GlobalSMBSeslock); pSMBFile->closePend = true; if (pTcon) { /* no sense reconnecting to close a file that is already closed */ - if (pTcon->tidStatus != CifsNeedReconnect) { + if (!pTcon->need_reconnect) { + write_unlock(&GlobalSMBSeslock); timeout = 2; while ((atomic_read(&pSMBFile->wrtPending) != 0) && (timeout <= 2048)) { @@ -510,12 +511,15 @@ int cifs_close(struct inode *inode, struct file *file) timeout *= 4; } if (atomic_read(&pSMBFile->wrtPending)) - cERROR(1, - ("close with pending writes")); - rc = CIFSSMBClose(xid, pTcon, + cERROR(1, ("close with pending write")); + if (!pTcon->need_reconnect && + !pSMBFile->invalidHandle) + rc = CIFSSMBClose(xid, pTcon, pSMBFile->netfid); - } - } + } else + write_unlock(&GlobalSMBSeslock); + } else + write_unlock(&GlobalSMBSeslock); /* Delete any outstanding lock records. We'll lose them when the file is closed anyway. */ @@ -587,15 +591,18 @@ int cifs_closedir(struct inode *inode, struct file *file) pTcon = cifs_sb->tcon; cFYI(1, ("Freeing private data in close dir")); + write_lock(&GlobalSMBSeslock); if (!pCFileStruct->srch_inf.endOfSearch && !pCFileStruct->invalidHandle) { pCFileStruct->invalidHandle = true; + write_unlock(&GlobalSMBSeslock); rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); cFYI(1, ("Closing uncompleted readdir with rc %d", rc)); /* not much we can do if it fails anyway, ignore rc */ rc = 0; - } + } else + write_unlock(&GlobalSMBSeslock); ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; if (ptmp) { cFYI(1, ("closedir free smb buf in srch struct")); @@ -1404,7 +1411,10 @@ retry: if ((wbc->nr_to_write -= n_iov) <= 0) done = 1; index = next; - } + } else + /* Need to re-find the pages we skipped */ + index = pvec.pages[0]->index + 1; + pagevec_release(&pvec); } if (!scanned && !done) { @@ -1465,7 +1475,11 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, cFYI(1, ("write_end for page %p from pos %lld with %d bytes", page, pos, copied)); - if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) + if (PageChecked(page)) { + if (copied == len) + SetPageUptodate(page); + ClearPageChecked(page); + } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) SetPageUptodate(page); if (!PageUptodate(page)) { @@ -2052,39 +2066,70 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, { pgoff_t index = pos >> PAGE_CACHE_SHIFT; loff_t offset = pos & (PAGE_CACHE_SIZE - 1); + loff_t page_start = pos & PAGE_MASK; + loff_t i_size; + struct page *page; + int rc = 0; cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); - *pagep = __grab_cache_page(mapping, index); - if (!*pagep) - return -ENOMEM; - - if (PageUptodate(*pagep)) - return 0; + page = __grab_cache_page(mapping, index); + if (!page) { + rc = -ENOMEM; + goto out; + } - /* If we are writing a full page it will be up to date, - no need to read from the server */ - if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) - return 0; + if (PageUptodate(page)) + goto out; - if ((file->f_flags & O_ACCMODE) != O_WRONLY) { - int rc; + /* + * If we write a full page it will be up to date, no need to read from + * the server. If the write is short, we'll end up doing a sync write + * instead. + */ + if (len == PAGE_CACHE_SIZE) + goto out; - /* might as well read a page, it is fast enough */ - rc = cifs_readpage_worker(file, *pagep, &offset); + /* + * optimize away the read when we have an oplock, and we're not + * expecting to use any of the data we'd be reading in. That + * is, when the page lies beyond the EOF, or straddles the EOF + * and the write will cover all of the existing data. + */ + if (CIFS_I(mapping->host)->clientCanCacheRead) { + i_size = i_size_read(mapping->host); + if (page_start >= i_size || + (offset == 0 && (pos + len) >= i_size)) { + zero_user_segments(page, 0, offset, + offset + len, + PAGE_CACHE_SIZE); + /* + * PageChecked means that the parts of the page + * to which we're not writing are considered up + * to date. Once the data is copied to the + * page, it can be set uptodate. + */ + SetPageChecked(page); + goto out; + } + } - /* we do not need to pass errors back - e.g. if we do not have read access to the file - because cifs_write_end will attempt synchronous writes - -- shaggy */ + if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + /* + * might as well read a page, it is fast enough. If we get + * an error, we don't need to return it. cifs_write_end will + * do a sync write instead since PG_uptodate isn't set. + */ + cifs_readpage_worker(file, page, &page_start); } else { /* we could try using another file handle if there is one - but how would we lock it to prevent close of that handle racing with this read? In any case this will be written out by write_end so is fine */ } - - return 0; +out: + *pagep = page; + return rc; } const struct address_space_operations cifs_addr_ops = { diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 88786ba02d27..9ee3f689c2b0 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -75,12 +75,12 @@ sesInfoAlloc(void) ret_buf = kzalloc(sizeof(struct cifsSesInfo), GFP_KERNEL); if (ret_buf) { - write_lock(&GlobalSMBSeslock); atomic_inc(&sesInfoAllocCount); ret_buf->status = CifsNew; - list_add(&ret_buf->cifsSessionList, &GlobalSMBSessionList); + ++ret_buf->ses_count; + INIT_LIST_HEAD(&ret_buf->smb_ses_list); + INIT_LIST_HEAD(&ret_buf->tcon_list); init_MUTEX(&ret_buf->sesSem); - write_unlock(&GlobalSMBSeslock); } return ret_buf; } @@ -93,10 +93,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free) return; } - write_lock(&GlobalSMBSeslock); atomic_dec(&sesInfoAllocCount); - list_del(&buf_to_free->cifsSessionList); - write_unlock(&GlobalSMBSeslock); kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); @@ -111,17 +108,14 @@ tconInfoAlloc(void) struct cifsTconInfo *ret_buf; ret_buf = kzalloc(sizeof(struct cifsTconInfo), GFP_KERNEL); if (ret_buf) { - write_lock(&GlobalSMBSeslock); atomic_inc(&tconInfoAllocCount); - list_add(&ret_buf->cifsConnectionList, - &GlobalTreeConnectionList); ret_buf->tidStatus = CifsNew; + ++ret_buf->tc_count; INIT_LIST_HEAD(&ret_buf->openFileList); - init_MUTEX(&ret_buf->tconSem); + INIT_LIST_HEAD(&ret_buf->tcon_list); #ifdef CONFIG_CIFS_STATS spin_lock_init(&ret_buf->stat_lock); #endif - write_unlock(&GlobalSMBSeslock); } return ret_buf; } @@ -133,10 +127,7 @@ tconInfoFree(struct cifsTconInfo *buf_to_free) cFYI(1, ("Null buffer passed to tconInfoFree")); return; } - write_lock(&GlobalSMBSeslock); atomic_dec(&tconInfoAllocCount); - list_del(&buf_to_free->cifsConnectionList); - write_unlock(&GlobalSMBSeslock); kfree(buf_to_free->nativeFileSystem); kfree(buf_to_free); } @@ -350,9 +341,9 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , if (current->fsuid != treeCon->ses->linux_uid) { cFYI(1, ("Multiuser mode and UID " "did not match tcon uid")); - read_lock(&GlobalSMBSeslock); - list_for_each(temp_item, &GlobalSMBSessionList) { - ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList); + read_lock(&cifs_tcp_ses_lock); + list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { + ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); if (ses->linux_uid == current->fsuid) { if (ses->server == treeCon->ses->server) { cFYI(1, ("found matching uid substitute right smb_uid")); @@ -364,7 +355,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , } } } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); } } } @@ -497,9 +488,10 @@ bool is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) { struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; - struct list_head *tmp; - struct list_head *tmp1; + struct list_head *tmp, *tmp1, *tmp2; + struct cifsSesInfo *ses; struct cifsTconInfo *tcon; + struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; cFYI(1, ("Checking for oplock break or dnotify response")); @@ -554,42 +546,45 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) return false; /* look up tcon based on tid & uid */ - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &GlobalTreeConnectionList) { - tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if ((tcon->tid == buf->Tid) && (srv == tcon->ses->server)) { + read_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &srv->smb_ses_list) { + ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifsTconInfo, tcon_list); + if (tcon->tid != buf->Tid) + continue; + cifs_stats_inc(&tcon->num_oplock_brks); - list_for_each(tmp1, &tcon->openFileList) { - netfile = list_entry(tmp1, struct cifsFileInfo, + write_lock(&GlobalSMBSeslock); + list_for_each(tmp2, &tcon->openFileList) { + netfile = list_entry(tmp2, struct cifsFileInfo, tlist); - if (pSMB->Fid == netfile->netfid) { - struct cifsInodeInfo *pCifsInode; - read_unlock(&GlobalSMBSeslock); - cFYI(1, - ("file id match, oplock break")); - pCifsInode = - CIFS_I(netfile->pInode); - pCifsInode->clientCanCacheAll = false; - if (pSMB->OplockLevel == 0) - pCifsInode->clientCanCacheRead - = false; - pCifsInode->oplockPending = true; - AllocOplockQEntry(netfile->pInode, - netfile->netfid, - tcon); - cFYI(1, - ("about to wake up oplock thread")); - if (oplockThread) - wake_up_process(oplockThread); - return true; - } + if (pSMB->Fid != netfile->netfid) + continue; + + write_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); + cFYI(1, ("file id match, oplock break")); + pCifsInode = CIFS_I(netfile->pInode); + pCifsInode->clientCanCacheAll = false; + if (pSMB->OplockLevel == 0) + pCifsInode->clientCanCacheRead = false; + pCifsInode->oplockPending = true; + AllocOplockQEntry(netfile->pInode, + netfile->netfid, tcon); + cFYI(1, ("about to wake up oplock thread")); + if (oplockThread) + wake_up_process(oplockThread); + + return true; } - read_unlock(&GlobalSMBSeslock); + write_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("No matching file for oplock break")); return true; } } - read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("Can not process oplock break for non-existent connection")); return true; } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 58d57299f2a0..9f51f9bf0292 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -741,11 +741,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, ("search backing up - close and restart search")); + write_lock(&GlobalSMBSeslock); if (!cifsFile->srch_inf.endOfSearch && !cifsFile->invalidHandle) { cifsFile->invalidHandle = true; + write_unlock(&GlobalSMBSeslock); CIFSFindClose(xid, pTcon, cifsFile->netfid); - } + } else + write_unlock(&GlobalSMBSeslock); if (cifsFile->srch_inf.ntwrk_buf_start) { cFYI(1, ("freeing SMB ff cache buf on search rewind")); if (cifsFile->srch_inf.smallBuf) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d910501de6d2..8d86b7960f0d 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -812,7 +812,7 @@ int dlm_release_lockspace(void *lockspace, int force) error = release_lockspace(ls, force); if (!error) ls_count--; - else if (!ls_count) + if (!ls_count) threads_stop(); mutex_unlock(&ls_lock); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index e22bc3961345..0d713b691941 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1037,17 +1037,14 @@ static int decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat) { - struct scatterlist dst_sg; - struct scatterlist src_sg; + struct scatterlist dst_sg[2]; + struct scatterlist src_sg[2]; struct mutex *tfm_mutex; struct blkcipher_desc desc = { .flags = CRYPTO_TFM_REQ_MAY_SLEEP }; int rc = 0; - sg_init_table(&dst_sg, 1); - sg_init_table(&src_sg, 1); - if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk( KERN_DEBUG, "Session key encryption key (size [%d]):\n", @@ -1066,8 +1063,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key, auth_tok->session_key.encrypted_key_size, - &src_sg, 1); - if (rc != 1) { + src_sg, 2); + if (rc < 1 || rc > 2) { printk(KERN_ERR "Internal error whilst attempting to convert " "auth_tok->session_key.encrypted_key to scatterlist; " "expected rc = 1; got rc = [%d]. " @@ -1079,8 +1076,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, auth_tok->session_key.encrypted_key_size; rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key, auth_tok->session_key.decrypted_key_size, - &dst_sg, 1); - if (rc != 1) { + dst_sg, 2); + if (rc < 1 || rc > 2) { printk(KERN_ERR "Internal error whilst attempting to convert " "auth_tok->session_key.decrypted_key to scatterlist; " "expected rc = 1; got rc = [%d]\n", rc); @@ -1096,7 +1093,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = -EINVAL; goto out; } - rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg, + rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg, auth_tok->session_key.encrypted_key_size); mutex_unlock(tfm_mutex); if (unlikely(rc)) { @@ -1539,8 +1536,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, size_t i; size_t encrypted_session_key_valid = 0; char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; - struct scatterlist dst_sg; - struct scatterlist src_sg; + struct scatterlist dst_sg[2]; + struct scatterlist src_sg[2]; struct mutex *tfm_mutex = NULL; u8 cipher_code; size_t packet_size_length; @@ -1619,8 +1616,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, ecryptfs_dump_hex(session_key_encryption_key, 16); } rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size, - &src_sg, 1); - if (rc != 1) { + src_sg, 2); + if (rc < 1 || rc > 2) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat session key; expected rc = 1; " "got rc = [%d]. key_rec->enc_key_size = [%d]\n", @@ -1629,8 +1626,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, goto out; } rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size, - &dst_sg, 1); - if (rc != 1) { + dst_sg, 2); + if (rc < 1 || rc > 2) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat encrypted session key; " "expected rc = 1; got rc = [%d]. " @@ -1651,7 +1648,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", crypt_stat->key_size); - rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg, + rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); mutex_unlock(tfm_mutex); if (rc) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aec5c13f6341..96355d505347 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -102,6 +102,8 @@ #define EP_UNACTIVE_PTR ((void *) -1L) +#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) + struct epoll_filefd { struct file *file; int fd; @@ -200,6 +202,9 @@ struct eventpoll { * holding ->lock. */ struct epitem *ovflist; + + /* The user that created the eventpoll descriptor */ + struct user_struct *user; }; /* Wait structure used by the poll hooks */ @@ -227,9 +232,17 @@ struct ep_pqueue { }; /* + * Configuration options available inside /proc/sys/fs/epoll/ + */ +/* Maximum number of epoll devices, per user */ +static int max_user_instances __read_mostly; +/* Maximum number of epoll watched descriptors, per user */ +static int max_user_watches __read_mostly; + +/* * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ -static struct mutex epmutex; +static DEFINE_MUTEX(epmutex); /* Safe wake up implementation */ static struct poll_safewake psw; @@ -240,6 +253,33 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; +#ifdef CONFIG_SYSCTL + +#include <linux/sysctl.h> + +static int zero; + +ctl_table epoll_table[] = { + { + .procname = "max_user_instances", + .data = &max_user_instances, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + }, + { + .procname = "max_user_watches", + .data = &max_user_watches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + }, + { .ctl_name = 0 } +}; +#endif /* CONFIG_SYSCTL */ + /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, @@ -402,6 +442,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); + atomic_dec(&ep->user->epoll_watches); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", current, ep, file)); @@ -449,6 +491,8 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); + atomic_dec(&ep->user->epoll_devs); + free_uid(ep->user); kfree(ep); } @@ -532,10 +576,19 @@ void eventpoll_release_file(struct file *file) static int ep_alloc(struct eventpoll **pep) { - struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); + int error; + struct user_struct *user; + struct eventpoll *ep; - if (!ep) - return -ENOMEM; + user = get_current_user(); + error = -EMFILE; + if (unlikely(atomic_read(&user->epoll_devs) >= + max_user_instances)) + goto free_uid; + error = -ENOMEM; + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (unlikely(!ep)) + goto free_uid; spin_lock_init(&ep->lock); mutex_init(&ep->mtx); @@ -544,12 +597,17 @@ static int ep_alloc(struct eventpoll **pep) INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; ep->ovflist = EP_UNACTIVE_PTR; + ep->user = user; *pep = ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", current, ep)); return 0; + +free_uid: + free_uid(user); + return error; } /* @@ -703,9 +761,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct epitem *epi; struct ep_pqueue epq; - error = -ENOMEM; + if (unlikely(atomic_read(&ep->user->epoll_watches) >= + max_user_watches)) + return -ENOSPC; if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) - goto error_return; + return -ENOMEM; /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); @@ -735,6 +795,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ + error = -ENOMEM; if (epi->nwait < 0) goto error_unregister; @@ -765,6 +826,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, spin_unlock_irqrestore(&ep->lock, flags); + atomic_inc(&ep->user->epoll_watches); + /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(&psw, &ep->poll_wait); @@ -789,7 +852,7 @@ error_unregister: spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); -error_return: + return error; } @@ -1078,6 +1141,7 @@ asmlinkage long sys_epoll_create1(int flags) flags & O_CLOEXEC); if (fd < 0) ep_free(ep); + atomic_inc(&ep->user->epoll_devs); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", @@ -1299,7 +1363,12 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, static int __init eventpoll_init(void) { - mutex_init(&epmutex); + struct sysinfo si; + + si_meminfo(&si); + max_user_instances = 128; + max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) / + EP_ITEM_COST; /* Initialize the structure used to perform safe poll wait head wake ups */ ep_poll_safewake_init(&psw); diff --git a/fs/exec.c b/fs/exec.c index 4e834f16d9da..ec5df9a38313 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1159,6 +1159,7 @@ EXPORT_SYMBOL(remove_arg_zero); */ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { + unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; #ifdef __alpha__ @@ -1219,8 +1220,15 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) continue; read_unlock(&binfmt_lock); retval = fn(bprm, regs); + /* + * Restore the depth counter to its starting value + * in this call, so we don't have to rely on every + * load_binary function to restore it on return. + */ + bprm->recursion_depth = depth; if (retval >= 0) { - tracehook_report_exec(fmt, bprm, regs); + if (depth == 0) + tracehook_report_exec(fmt, bprm, regs); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 80246bad1b7f..890e01828817 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -367,6 +367,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * Try to get any dentry for the given file handle from the filesystem. */ result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); + if (!result) + result = ERR_PTR(-ESTALE); if (IS_ERR(result)) return result; @@ -420,6 +422,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, fh_len, fileid_type); + if (!target_dir) + goto err_result; err = PTR_ERR(target_dir); if (IS_ERR(target_dir)) goto err_result; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dec6d1356c4..f6c94f232ec1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2375,12 +2375,9 @@ int ext3_force_commit(struct super_block *sb) /* * Ext3 always journals updates to the superblock itself, so we don't * have to propagate any other updates to the superblock on disk at this - * point. Just start an async writeback to get the buffers on their way - * to the disk. - * - * This implicitly triggers the writebehind on sync(). + * point. (We can probably nuke this function altogether, and remove + * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...) */ - static void ext3_write_super (struct super_block * sb) { if (mutex_trylock(&sb->s_lock) != 0) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2003cdc36aa..db35cfdb3c8b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); + free_blocks = percpu_counter_sum_positive(fbc); + dirty_blocks = percpu_counter_sum_positive(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/fs/fcntl.c b/fs/fcntl.c index ac4f7db9f134..549daf8005fb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,6 +19,7 @@ #include <linux/signal.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> +#include <linux/smp_lock.h> #include <asm/poll.h> #include <asm/siginfo.h> @@ -175,6 +176,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; + /* + * We still need a lock here for now to keep multiple FASYNC calls + * from racing with each other. + */ + lock_kernel(); if ((arg ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); @@ -185,6 +191,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); out: + unlock_kernel(); return error; } diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 6ae9011b95eb..2f34f8f2134b 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -81,7 +81,7 @@ extern int do_rmdir(const char *file); extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor); extern int link_file(const char *from, const char *to); -extern int do_readlink(char *file, char *buf, int size); +extern int hostfs_do_readlink(char *file, char *buf, int size); extern int rename_file(char *from, char *to); extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, long long *bfree_out, long long *bavail_out, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7f34f4385de0..3a31451ac170 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -168,7 +168,7 @@ static char *follow_link(char *link) if (name == NULL) goto out; - n = do_readlink(link, name, len); + n = hostfs_do_readlink(link, name, len); if (n < len) break; len *= 2; @@ -943,7 +943,7 @@ int hostfs_link_readpage(struct file *file, struct page *page) name = inode_name(page->mapping->host, 0); if (name == NULL) return -ENOMEM; - err = do_readlink(name, buffer, PAGE_CACHE_SIZE); + err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); kfree(name); if (err == PAGE_CACHE_SIZE) err = -E2BIG; diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 53fd0a67c11a..b79424f93282 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -377,7 +377,7 @@ int link_file(const char *to, const char *from) return 0; } -int do_readlink(char *file, char *buf, int size) +int hostfs_do_readlink(char *file, char *buf, int size) { int n; diff --git a/fs/inotify.c b/fs/inotify.c index 690e72595e6e..dae3f28f30d4 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(get_inotify_watch); +int pin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + atomic_inc(&watch->count); + return 1; + } + spin_unlock(&sb_lock); + return 0; +} + /** * put_inotify_watch - decrements the ref count on a given watch. cleans up * watch references if the count reaches zero. inotify_watch is freed by @@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(put_inotify_watch); +void unpin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + deactivate_super(sb); +} + /* * inotify_handle_get_wd - returns the next WD for use by the given handle * @@ -407,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list) watches = &inode->inotify_watches; list_for_each_entry_safe(watch, next_w, watches, i_list) { struct inotify_handle *ih= watch->ih; + get_inotify_watch(watch); mutex_lock(&ih->mutex); ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); inotify_remove_watch_locked(ih, watch); mutex_unlock(&ih->mutex); + put_inotify_watch(watch); } mutex_unlock(&inode->inotify_mutex); iput(inode); @@ -479,6 +502,112 @@ void inotify_init_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(inotify_init_watch); +/* + * Watch removals suck violently. To kick the watch out we need (in this + * order) inode->inotify_mutex and ih->mutex. That's fine if we have + * a hold on inode; however, for all other cases we need to make damn sure + * we don't race with umount. We can *NOT* just grab a reference to a + * watch - inotify_unmount_inodes() will happily sail past it and we'll end + * with reference to inode potentially outliving its superblock. Ideally + * we just want to grab an active reference to superblock if we can; that + * will make sure we won't go into inotify_umount_inodes() until we are + * done. Cleanup is just deactivate_super(). However, that leaves a messy + * case - what if we *are* racing with umount() and active references to + * superblock can't be acquired anymore? We can bump ->s_count, grab + * ->s_umount, which will almost certainly wait until the superblock is shut + * down and the watch in question is pining for fjords. That's fine, but + * there is a problem - we might have hit the window between ->s_active + * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock + * is past the point of no return and is heading for shutdown) and the + * moment when deactivate_super() acquires ->s_umount. We could just do + * drop_super() yield() and retry, but that's rather antisocial and this + * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having + * found that we'd got there first (i.e. that ->s_root is non-NULL) we know + * that we won't race with inotify_umount_inodes(). So we could grab a + * reference to watch and do the rest as above, just with drop_super() instead + * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we + * could grab ->s_umount. So the watch could've been gone already. + * + * That still can be dealt with - we need to save watch->wd, do idr_find() + * and compare its result with our pointer. If they match, we either have + * the damn thing still alive or we'd lost not one but two races at once, + * the watch had been killed and a new one got created with the same ->wd + * at the same address. That couldn't have happened in inotify_destroy(), + * but inotify_rm_wd() could run into that. Still, "new one got created" + * is not a problem - we have every right to kill it or leave it alone, + * whatever's more convenient. + * + * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as + * "grab it and kill it" check. If it's been our original watch, we are + * fine, if it's a newcomer - nevermind, just pretend that we'd won the + * race and kill the fscker anyway; we are safe since we know that its + * superblock won't be going away. + * + * And yes, this is far beyond mere "not very pretty"; so's the entire + * concept of inotify to start with. + */ + +/** + * pin_to_kill - pin the watch down for removal + * @ih: inotify handle + * @watch: watch to kill + * + * Called with ih->mutex held, drops it. Possible return values: + * 0 - nothing to do, it has died + * 1 - remove it, drop the reference and deactivate_super() + * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid + * that variant, since it involved a lot of PITA, but that's the best that + * could've been done. + */ +static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + s32 wd = watch->wd; + + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 1; /* the best outcome */ + } + sb->s_count++; + spin_unlock(&sb_lock); + mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ + down_read(&sb->s_umount); + if (likely(!sb->s_root)) { + /* fs is already shut down; the watch is dead */ + drop_super(sb); + return 0; + } + /* raced with the final deactivate_super() */ + mutex_lock(&ih->mutex); + if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { + /* the watch is dead */ + mutex_unlock(&ih->mutex); + drop_super(sb); + return 0; + } + /* still alive or freed and reused with the same sb and wd; kill */ + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 2; +} + +static void unpin_and_kill(struct inotify_watch *watch, int how) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + switch (how) { + case 1: + deactivate_super(sb); + break; + case 2: + drop_super(sb); + } +} + /** * inotify_destroy - clean up and destroy an inotify instance * @ih: inotify handle @@ -490,11 +619,15 @@ void inotify_destroy(struct inotify_handle *ih) * pretty. We cannot do a simple iteration over the list, because we * do not know the inode until we iterate to the watch. But we need to * hold inode->inotify_mutex before ih->mutex. The following works. + * + * AV: it had to become even uglier to start working ;-/ */ while (1) { struct inotify_watch *watch; struct list_head *watches; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watches = &ih->watches; @@ -503,8 +636,10 @@ void inotify_destroy(struct inotify_handle *ih) break; } watch = list_first_entry(watches, struct inotify_watch, h_list); - get_inotify_watch(watch); - mutex_unlock(&ih->mutex); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + continue; inode = watch->inode; mutex_lock(&inode->inotify_mutex); @@ -518,7 +653,7 @@ void inotify_destroy(struct inotify_handle *ih) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); } /* free this handle: the put matching the get in inotify_init() */ @@ -719,7 +854,9 @@ void inotify_evict_watch(struct inotify_watch *watch) int inotify_rm_wd(struct inotify_handle *ih, u32 wd) { struct inotify_watch *watch; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watch = idr_find(&ih->idr, wd); @@ -727,9 +864,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); return -EINVAL; } - get_inotify_watch(watch); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + return 0; + inode = watch->inode; - mutex_unlock(&ih->mutex); mutex_lock(&inode->inotify_mutex); mutex_lock(&ih->mutex); @@ -740,7 +880,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); return 0; } diff --git a/fs/ioctl.c b/fs/ioctl.c index d152856c371b..43e8b2c0664b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -400,11 +400,9 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - lock_kernel(); + if (filp->f_op && filp->f_op->fasync) error = filp->f_op->fasync(fd, filp, on); - unlock_kernel(); - } else + else error = -ENOTTY; } if (error) @@ -440,11 +438,17 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIONBIO: + /* BKL needed to avoid races tweaking f_flags */ + lock_kernel(); error = ioctl_fionbio(filp, argp); + unlock_kernel(); break; case FIOASYNC: + /* BKL needed to avoid races tweaking f_flags */ + lock_kernel(); error = ioctl_fioasync(fd, filp, argp); + unlock_kernel(); break; case FIOQSIZE: diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 9fd8889097b7..70fc63a1727b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -167,7 +167,8 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) continue; if (host->h_server != ni->server) continue; - if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) + if (ni->server && + !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; /* Move to head of hash chain. */ diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c631a83931ce..56b076736b56 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -181,6 +181,7 @@ lockd(void *vrqstp) } flush_signals(current); cancel_delayed_work_sync(&grace_period_end); + locks_end_grace(&lockd_manager); if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); diff --git a/fs/namei.c b/fs/namei.c index 09ce58e49e72..d34e0f9681c6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1378,7 +1378,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) diff --git a/fs/namespace.c b/fs/namespace.c index cce46702d33c..65b3dc844c87 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1815,8 +1815,8 @@ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) while (!list_empty(&graveyard)) { m = list_first_entry(&graveyard, struct vfsmount, mnt_expire); - touch_mnt_namespace(mnt->mnt_ns); - umount_tree(mnt, 1, umounts); + touch_mnt_namespace(m->mnt_ns); + umount_tree(m, 1, umounts); } } } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb93946ace22..b79ec930d9f1 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -225,12 +225,12 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) return 0; nfs4_save_user(&uid, &gid); + INIT_LIST_HEAD(dentries); filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); status = PTR_ERR(filp); if (IS_ERR(filp)) goto out; - INIT_LIST_HEAD(dentries); status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); fput(filp); while (!list_empty(dentries)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b0bebc552a11..1a052ac2bde9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3261,6 +3261,7 @@ nfs4_state_shutdown(void) { cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); destroy_workqueue(laundry_wq); + locks_end_grace(&nfsd4_manager); nfs4_lock_state(); nfs4_release_reclaim(); __nfs4_state_shutdown(); diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 5e6724c1afd1..2142b1c68b61 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -30,7 +30,8 @@ extern int debug_msgs; -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +extern void __ntfs_debug(const char *file, int line, const char *function, + const char *format, ...) __attribute__ ((format (printf, 4, 5))); /** * ntfs_debug - write a debug level message to syslog * @f: a printf format string containing the message @@ -39,11 +40,6 @@ extern int debug_msgs; * ntfs_debug() writes a DEBUG level message to the syslog but only if the * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. */ -static void ntfs_debug(const char *f, ...); -#endif - -extern void __ntfs_debug (const char *file, int line, const char *function, - const char *format, ...) __attribute__ ((format (printf, 4, 5))); #define ntfs_debug(f, a...) \ __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 7e947c672469..3a178ec48d7c 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -112,7 +112,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, bh = bhs[i]; if (buffer_jbd(bh)) { - mlog(ML_ERROR, + mlog(ML_BH_IO, "trying to sync read a jbd " "managed bh (blocknr = %llu), skipping\n", (unsigned long long)bh->b_blocknr); @@ -147,15 +147,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, for (i = nr; i > 0; i--) { bh = bhs[i - 1]; - if (buffer_jbd(bh)) { - mlog(ML_ERROR, - "the journal got the buffer while it was " - "locked for io! (blocknr = %llu)\n", - (unsigned long long)bh->b_blocknr); - BUG(); - } + /* No need to wait on the buffer if it's managed by JBD. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); - wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* Status won't be cleared from here on out, * so we can safely record this and loop back @@ -251,8 +246,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, ignore_cache = 1; } - /* XXX: Can we ever get this and *not* have the cached - * flag set? */ if (buffer_jbd(bh)) { if (ignore_cache) mlog(ML_BH_IO, "trying to sync read a jbd " diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 533a789c3ef8..ba962d71b34d 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void) 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), dlmfs_init_once); - if (!dlmfs_inode_cache) + if (!dlmfs_inode_cache) { + status = -ENOMEM; goto bail; + } cleanup_inode = 1; user_dlm_worker = create_singlethread_workqueue("user_dlm"); diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index 39ec27738499..0c3cc03c61fa 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h @@ -33,7 +33,7 @@ #include <linux/workqueue.h> /* user_lock_res->l_flags flags. */ -#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized +#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define USER_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ec684426034b..6e6cc0a2e5f7 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2841,9 +2841,8 @@ static void ocfs2_unlock_ast(void *opaque, int error) lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; - spin_unlock_irqrestore(&lockres->l_lock, flags); - wake_up(&lockres->l_event); + spin_unlock_irqrestore(&lockres->l_lock, flags); mlog_exit_void(); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7efe937a415f..e2570a3bc2b2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -247,8 +247,8 @@ int ocfs2_update_inode_atime(struct inode *inode, mlog_entry_void(); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -312,8 +312,8 @@ static int ocfs2_simple_size_update(struct inode *inode, handle_t *handle = NULL; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1055,8 +1055,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1259,8 +1259,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode, } handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1352,8 +1352,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, goto out; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1866,6 +1866,13 @@ relock: written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, ppos, count, ocount); if (written < 0) { + /* + * direct write may have instantiated a few + * blocks outside i_size. Trim these off again. + * Don't need i_size_read because we hold i_mutex. + */ + if (*ppos + count > inode->i_size) + vmtruncate(inode, inode->i_size); ret = written; goto out_dio; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 4903688f72a9..7aa00d511874 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1106,6 +1106,12 @@ void ocfs2_clear_inode(struct inode *inode) oi->ip_last_trans = 0; oi->ip_dir_start_lookup = 0; oi->ip_blkno = 0ULL; + + /* + * ip_jinode is used to track txns against this inode. We ensure that + * the journal is flushed before journal shutdown. Thus it is safe to + * have inodes get cleaned up after journal shutdown. + */ jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, &oi->ip_jinode); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 81e40677eecb..99fe9d584f3c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -690,6 +690,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) /* Shutdown the kernel journal system */ jbd2_journal_destroy(journal->j_journal); + journal->j_journal = NULL; OCFS2_I(inode)->ip_open_count--; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3dc18d67557c..eea1d24713ea 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -113,7 +113,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, * ocfs2_write_begin_nolock(). */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) { - ret = -EINVAL; + /* + * the page has been umapped in ocfs2_data_downconvert_worker. + * So return 0 here and let VFS retry. + */ + ret = 0; goto out; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 485a6aa0ad39..f4967e634ffd 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -378,8 +378,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } inode = new_inode(dir->i_sb); - if (IS_ERR(inode)) { - status = PTR_ERR(inode); + if (!inode) { + status = -ENOMEM; mlog(ML_ERROR, "new_inode failed!\n"); goto leave; } @@ -491,8 +491,10 @@ leave: brelse(*new_fe_bh); *new_fe_bh = NULL; } - if (inode) + if (inode) { + clear_nlink(inode); iput(inode); + } } mlog_exit(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a21a465490c4..3fed9e3d8992 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -85,7 +85,7 @@ enum ocfs2_unlock_action { }; /* ocfs2_lock_res->l_flags flags. */ -#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized +#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ @@ -473,6 +473,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) (____gd)->bg_signature); \ } while (0) +#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ + (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) + static inline unsigned long ino_from_blkno(struct super_block *sb, u64 blkno) { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index f24ce3d3f956..5e0c0d0aef7d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -86,7 +86,8 @@ #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ OCFS2_SB(sb)->s_feature_incompat &= ~(mask) -#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB +#define OCFS2_FEATURE_COMPAT_SUPP (OCFS2_FEATURE_COMPAT_BACKUP_SB \ + | OCFS2_FEATURE_COMPAT_JBD2_SB) #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ @@ -153,6 +154,11 @@ #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 /* + * The filesystem will correctly handle journal feature bits. + */ +#define OCFS2_FEATURE_COMPAT_JBD2_SB 0x0002 + +/* * Unwritten extents support. */ #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 @@ -742,12 +748,12 @@ struct ocfs2_group_desc */ struct ocfs2_xattr_entry { __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ - __le16 xe_name_offset; /* byte offset from the 1st etnry in the local + __le16 xe_name_offset; /* byte offset from the 1st entry in the local xattr storage(inode, xattr block or xattr bucket). */ __u8 xe_name_len; /* xattr name len, does't include prefix. */ - __u8 xe_type; /* the low 7 bits indicates the name prefix's - * type and the highest 1 bits indicate whether + __u8 xe_type; /* the low 7 bits indicate the name prefix + * type and the highest bit indicates whether * the EA is stored in the local storage. */ __le64 xe_value_size; /* real xattr value length. */ }; @@ -766,9 +772,10 @@ struct ocfs2_xattr_header { xattr. */ __le16 xh_name_value_len; /* total length of name/value length in this bucket. */ - __le16 xh_num_buckets; /* bucket nums in one extent - record, only valid in the - first bucket. */ + __le16 xh_num_buckets; /* Number of xattr buckets + in this extent record, + only valid in the first + bucket. */ __le64 xh_csum; struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ }; @@ -776,8 +783,8 @@ struct ocfs2_xattr_header { /* * On disk structure for xattr value root. * - * It is used when one extended attribute's size is larger, and we will save it - * in an outside cluster. It will stored in a b-tree like file content. + * When an xattr's value is large enough, it is stored in an external + * b-tree like file data. The xattr value root points to this structure. */ struct ocfs2_xattr_value_root { /*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index faec2d879357..9b76d41a8ac6 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { + if (!lksb->lksb_fsdlm.sb_lvbptr) + lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + + sizeof(struct dlm_lksb); return (void *)(lksb->lksb_fsdlm.sb_lvbptr); } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 802c41492214..74d7367ade13 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3,25 +3,20 @@ * * xattr.c * - * Copyright (C) 2008 Oracle. All rights reserved. + * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * CREDITS: - * Lots of code in this file is taken from ext3. + * Lots of code in this file is copy from linux/fs/ext3/xattr.c. + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * License version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/capability.h> @@ -83,7 +78,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { NULL }; -static struct xattr_handler *ocfs2_xattr_handler_map[] = { +static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, }; @@ -116,6 +111,10 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, int *block_off, int *new_offset); +static int ocfs2_xattr_block_find(struct inode *inode, + int name_index, + const char *name, + struct ocfs2_xattr_search *xs); static int ocfs2_xattr_index_block_find(struct inode *inode, struct buffer_head *root_bh, int name_index, @@ -137,6 +136,24 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); +static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) +{ + return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; +} + +static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) +{ + return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); +} + +static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) +{ + u16 len = sb->s_blocksize - + offsetof(struct ocfs2_xattr_header, xh_entries); + + return len / sizeof(struct ocfs2_xattr_entry); +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -542,14 +559,12 @@ static int ocfs2_xattr_block_list(struct inode *inode, mlog_errno(ret); return ret; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; - goto cleanup; - } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; + goto cleanup; + } if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; @@ -749,47 +764,25 @@ static int ocfs2_xattr_block_get(struct inode *inode, size_t buffer_size, struct ocfs2_xattr_search *xs) { - struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; - struct buffer_head *blk_bh = NULL; struct ocfs2_xattr_block *xb; struct ocfs2_xattr_value_root *xv; size_t size; int ret = -ENODATA, name_offset, name_len, block_off, i; - if (!di->i_xattr_loc) - return ret; - memset(&xs->bucket, 0, sizeof(xs->bucket)); - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); - if (ret < 0) { + ret = ocfs2_xattr_block_find(inode, name_index, name, xs); + if (ret) { mlog_errno(ret); - return ret; - } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; goto cleanup; } - xs->xattr_bh = blk_bh; - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - - if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { - xs->header = &xb->xb_attrs.xb_header; - xs->base = (void *)xs->header; - xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; - xs->here = xs->header->xh_entries; - - ret = ocfs2_xattr_find_entry(name_index, name, xs); - } else - ret = ocfs2_xattr_index_block_find(inode, blk_bh, - name_index, - name, xs); - - if (ret) + if (xs->not_found) { + ret = -ENODATA; goto cleanup; + } + + xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; size = le64_to_cpu(xs->here->xe_value_size); if (buffer) { ret = -ERANGE; @@ -828,7 +821,8 @@ cleanup: brelse(xs->bucket.bhs[i]); memset(&xs->bucket, 0, sizeof(xs->bucket)); - brelse(blk_bh); + brelse(xs->xattr_bh); + xs->xattr_bh = NULL; return ret; } @@ -837,11 +831,11 @@ cleanup: * Copy an extended attribute into the buffer provided. * Buffer is NULL to compute the size of buffer required. */ -int ocfs2_xattr_get(struct inode *inode, - int name_index, - const char *name, - void *buffer, - size_t buffer_size) +static int ocfs2_xattr_get(struct inode *inode, + int name_index, + const char *name, + void *buffer, + size_t buffer_size) { int ret; struct ocfs2_dinode *di = NULL; @@ -871,7 +865,7 @@ int ocfs2_xattr_get(struct inode *inode, down_read(&oi->ip_xattr_sem); ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); - if (ret == -ENODATA) + if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); up_read(&oi->ip_xattr_sem); @@ -1229,7 +1223,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, free = min_offs - ((void *)last - xs->base) - sizeof(__u32); if (free < 0) - return -EFAULT; + return -EIO; if (!xs->not_found) { size_t size = 0; @@ -1514,10 +1508,9 @@ static int ocfs2_xattr_free_block(struct inode *inode, goto out; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; goto out; } @@ -1527,7 +1520,6 @@ static int ocfs2_xattr_free_block(struct inode *inode, goto out; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; blk = le64_to_cpu(xb->xb_blkno); bit = le16_to_cpu(xb->xb_suballoc_bit); bg_blkno = ocfs2_which_suballoc_group(blk, bit); @@ -1771,15 +1763,14 @@ static int ocfs2_xattr_block_find(struct inode *inode, mlog_errno(ret); return ret; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; - goto cleanup; + + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; + goto cleanup; } xs->xattr_bh = blk_bh; - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { xs->header = &xb->xb_attrs.xb_header; @@ -1806,52 +1797,6 @@ cleanup: } /* - * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree - * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header - * re-initialized. - */ -static int ocfs2_restore_xattr_block(struct inode *inode, - struct ocfs2_xattr_search *xs) -{ - int ret; - handle_t *handle; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_xattr_block *xb = - (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; - struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; - u16 xb_flags = le16_to_cpu(xb->xb_flags); - - BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) || - le16_to_cpu(el->l_next_free_rec) != 0); - - handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - goto out; - } - - ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } - - memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - - offsetof(struct ocfs2_xattr_block, xb_attrs)); - - xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED); - - ocfs2_journal_dirty(handle, xs->xattr_bh); - -out_commit: - ocfs2_commit_trans(osb, handle); -out: - return ret; -} - -/* * ocfs2_xattr_block_set() * * Set, replace or remove an extended attribute into external block. @@ -1961,8 +1906,6 @@ out: } ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); - if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0) - ret = ocfs2_restore_xattr_block(inode, xs); end: @@ -2398,7 +2341,8 @@ static int ocfs2_xattr_index_block_find(struct inode *inode, BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " - "in the rec is %u\n", num_clusters, p_blkno, first_hash); + "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno, + first_hash); ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, p_blkno, first_hash, num_clusters, xs); @@ -2422,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, memset(&bucket, 0, sizeof(bucket)); mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", - clusters, blkno); + clusters, (unsigned long long)blkno); for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, @@ -2440,7 +2384,8 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, if (i == 0) num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); - mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, + mlog(0, "iterating xattr bucket %llu, first hash %u\n", + (unsigned long long)blkno, le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, &bucket, para); @@ -2700,9 +2645,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, return ret; } - i = xs->here - old_xh->xh_entries; - xs->here = &xs->header->xh_entries[i]; } + i = xs->here - old_xh->xh_entries; + xs->here = &xs->header->xh_entries[i]; } return ret; @@ -2776,7 +2721,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, */ blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); + mlog(0, "allocate 1 cluster from %llu to xattr block\n", + (unsigned long long)blkno); xh_bh = sb_getblk(inode->i_sb, blkno); if (!xh_bh) { @@ -2818,7 +2764,11 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, if (data_bh) ocfs2_journal_dirty(handle, data_bh); - ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); + ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - @@ -2941,8 +2891,8 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, mlog(0, "adjust xattr bucket in %llu, count = %u, " "xh_free_start = %u, xh_name_value_len = %u.\n", - blkno, le16_to_cpu(xh->xh_count), xh_free_start, - le16_to_cpu(xh->xh_name_value_len)); + (unsigned long long)blkno, le16_to_cpu(xh->xh_count), + xh_free_start, le16_to_cpu(xh->xh_name_value_len)); /* * sort all the entries by their offset. @@ -3058,7 +3008,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, prev_blkno += (num_clusters - 1) * bpc + bpc / 2; mlog(0, "move half of xattrs in cluster %llu to %llu\n", - prev_blkno, new_blkno); + (unsigned long long)prev_blkno, (unsigned long long)new_blkno); /* * We need to update the 1st half of the new cluster and @@ -3168,26 +3118,74 @@ static int ocfs2_read_xattr_bucket(struct inode *inode, } /* - * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). + * Find the suitable pos when we divide a bucket into 2. + * We have to make sure the xattrs with the same hash value exist + * in the same bucket. + * + * If this ocfs2_xattr_header covers more than one hash value, find a + * place where the hash value changes. Try to find the most even split. + * The most common case is that all entries have different hash values, + * and the first check we make will find a place to split. + */ +static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) +{ + struct ocfs2_xattr_entry *entries = xh->xh_entries; + int count = le16_to_cpu(xh->xh_count); + int delta, middle = count / 2; + + /* + * We start at the middle. Each step gets farther away in both + * directions. We therefore hit the change in hash value + * nearest to the middle. Note that this loop does not execute for + * count < 2. + */ + for (delta = 0; delta < middle; delta++) { + /* Let's check delta earlier than middle */ + if (cmp_xe(&entries[middle - delta - 1], + &entries[middle - delta])) + return middle - delta; + + /* For even counts, don't walk off the end */ + if ((middle + delta + 1) == count) + continue; + + /* Now try delta past middle */ + if (cmp_xe(&entries[middle + delta], + &entries[middle + delta + 1])) + return middle + delta + 1; + } + + /* Every entry had the same hash */ + return count; +} + +/* + * Move some xattrs in old bucket(blk) to new bucket(new_blk). * first_hash will record the 1st hash of the new bucket. + * + * Normally half of the xattrs will be moved. But we have to make + * sure that the xattrs with the same hash value are stored in the + * same bucket. If all the xattrs in this bucket have the same hash + * value, the new bucket will be initialized as an empty one and the + * first_hash will be initialized as (hash_value+1). */ -static int ocfs2_half_xattr_bucket(struct inode *inode, - handle_t *handle, - u64 blk, - u64 new_blk, - u32 *first_hash, - int new_bucket_head) +static int ocfs2_divide_xattr_bucket(struct inode *inode, + handle_t *handle, + u64 blk, + u64 new_blk, + u32 *first_hash, + int new_bucket_head) { int ret, i; - u16 count, start, len, name_value_len, xe_len, name_offset; + int count, start, len, name_value_len = 0, xe_len, name_offset = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); struct buffer_head **s_bhs, **t_bhs = NULL; struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; int blocksize = inode->i_sb->s_blocksize; - mlog(0, "move half of xattrs from bucket %llu to %llu\n", - blk, new_blk); + mlog(0, "move some of xattrs from bucket %llu to %llu\n", + (unsigned long long)blk, (unsigned long long)new_blk); s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); if (!s_bhs) @@ -3220,21 +3218,44 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, for (i = 0; i < blk_per_bucket; i++) { ret = ocfs2_journal_access(handle, inode, t_bhs[i], - OCFS2_JOURNAL_ACCESS_CREATE); + new_bucket_head ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } } + xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + count = le16_to_cpu(xh->xh_count); + start = ocfs2_xattr_find_divide_pos(xh); + + if (start == count) { + xe = &xh->xh_entries[start-1]; + + /* + * initialized a new empty bucket here. + * The hash value is set as one larger than + * that of the last entry in the previous bucket. + */ + for (i = 0; i < blk_per_bucket; i++) + memset(t_bhs[i]->b_data, 0, blocksize); + + xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh->xh_free_start = cpu_to_le16(blocksize); + xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; + le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); + + goto set_num_buckets; + } + /* copy the whole bucket to the new first. */ for (i = 0; i < blk_per_bucket; i++) memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); /* update the new bucket. */ xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; - count = le16_to_cpu(xh->xh_count); - start = count / 2; /* * Calculate the total name/value len and xh_free_start for @@ -3291,6 +3312,7 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, xh->xh_free_start = xe->xe_name_offset; } +set_num_buckets: /* set xh->xh_num_buckets for the new xh. */ if (new_bucket_head) xh->xh_num_buckets = cpu_to_le16(1); @@ -3308,9 +3330,13 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); /* - * Now only update the 1st block of the old bucket. - * Please note that the entry has been sorted already above. + * Now only update the 1st block of the old bucket. If we + * just added a new empty bucket, there is no need to modify + * it. */ + if (start == count) + goto out; + xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; memset(&xh->xh_entries[start], 0, sizeof(struct ocfs2_xattr_entry) * (count - start)); @@ -3358,7 +3384,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, BUG_ON(s_blkno == t_blkno); mlog(0, "cp bucket %llu to %llu, target is %d\n", - s_blkno, t_blkno, t_is_new); + (unsigned long long)s_blkno, (unsigned long long)t_blkno, + t_is_new); s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, GFP_NOFS); @@ -3382,6 +3409,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, for (i = 0; i < blk_per_bucket; i++) { ret = ocfs2_journal_access(handle, inode, t_bhs[i], + t_is_new ? + OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); if (ret) goto out; @@ -3428,7 +3457,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, struct ocfs2_xattr_header *xh; u64 to_blk_start = to_blk; - mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); + mlog(0, "cp xattrs from cluster %llu to %llu\n", + (unsigned long long)src_blk, (unsigned long long)to_blk); /* * We need to update the new cluster and 1 more for the update of @@ -3493,15 +3523,15 @@ out: } /* - * Move half of the xattrs in this cluster to the new cluster. + * Move some xattrs in this cluster to the new cluster. * This function should only be called when bucket size == cluster size. * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. */ -static int ocfs2_half_xattr_cluster(struct inode *inode, - handle_t *handle, - u64 prev_blk, - u64 new_blk, - u32 *first_hash) +static int ocfs2_divide_xattr_cluster(struct inode *inode, + handle_t *handle, + u64 prev_blk, + u64 new_blk, + u32 *first_hash) { u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int ret, credits = 2 * blk_per_bucket; @@ -3515,8 +3545,8 @@ static int ocfs2_half_xattr_cluster(struct inode *inode, } /* Move half of the xattr in start_blk to the next bucket. */ - return ocfs2_half_xattr_bucket(inode, handle, prev_blk, - new_blk, first_hash, 1); + return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, + new_blk, first_hash, 1); } /* @@ -3559,7 +3589,8 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", - prev_blk, prev_clusters, new_blk); + (unsigned long long)prev_blk, prev_clusters, + (unsigned long long)new_blk); if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, @@ -3578,9 +3609,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, last_blk, new_blk, v_start); else { - ret = ocfs2_half_xattr_cluster(inode, handle, - last_blk, new_blk, - v_start); + ret = ocfs2_divide_xattr_cluster(inode, handle, + last_blk, new_blk, + v_start); if ((*header_bh)->b_blocknr == last_blk && extend) *extend = 0; @@ -3629,7 +3660,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " "previous xattr blkno = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, - prev_cpos, prev_blkno); + prev_cpos, (unsigned long long)prev_blkno); ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); @@ -3716,7 +3747,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", - num_bits, block, v_start); + num_bits, (unsigned long long)block, v_start); ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, num_bits, 0, meta_ac); if (ret < 0) { @@ -3761,7 +3792,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " - "from %llu, len = %u\n", start_blk, + "from %llu, len = %u\n", (unsigned long long)start_blk, (unsigned long long)first_bh->b_blocknr, num_clusters); BUG_ON(bucket >= num_buckets); @@ -3797,8 +3828,8 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, } /* Move half of the xattr in start_blk to the next bucket. */ - ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, - start_blk + blk_per_bucket, NULL, 0); + ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, + start_blk + blk_per_bucket, NULL, 0); le16_add_cpu(&first_xh->xh_num_buckets, 1); ocfs2_journal_dirty(handle, first_bh); @@ -4146,7 +4177,7 @@ static int ocfs2_xattr_value_update_size(struct inode *inode, handle_t *handle = NULL; handle = ocfs2_start_trans(osb, 1); - if (handle == NULL) { + if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -4313,7 +4344,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, } handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (handle == NULL) { + if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -4489,11 +4520,21 @@ out: return ret; } -/* check whether the xattr bucket is filled up with the same hash value. */ +/* + * check whether the xattr bucket is filled up with the same hash value. + * If we want to insert the xattr with the same hash, return -ENOSPC. + * If we want to insert a xattr with different hash value, go ahead + * and ocfs2_divide_xattr_bucket will handle this. + */ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, - struct ocfs2_xattr_bucket *bucket) + struct ocfs2_xattr_bucket *bucket, + const char *name) { struct ocfs2_xattr_header *xh = bucket->xh; + u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); + + if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) + return 0; if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == xh->xh_entries[0].xe_name_hash) { @@ -4616,7 +4657,9 @@ try_again: * one bucket's worth, so check it here whether we need to * add a new bucket for the insert. */ - ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); + ret = ocfs2_check_xattr_bucket_collision(inode, + &xs->bucket, + xi->name); if (ret) { mlog_errno(ret); goto out; @@ -4727,14 +4770,11 @@ out: /* * 'trusted' attributes support */ - -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; + const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { @@ -4771,18 +4811,14 @@ struct xattr_handler ocfs2_xattr_trusted_handler = { .set = ocfs2_xattr_trusted_set, }; - /* * 'user' attributes support */ - -#define XATTR_USER_PREFIX "user." - static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index c25c7c62a059..1d8314c7656d 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -3,24 +3,16 @@ * * xattr.h * - * Function prototypes - * - * Copyright (C) 2008 Oracle. All rights reserved. + * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * License version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_XATTR_H @@ -40,29 +32,11 @@ enum ocfs2_xattr_type { extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; - -extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); -extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, - size_t, int); -extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); extern struct xattr_handler *ocfs2_xattr_handlers[]; -static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) -{ - return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; -} - -static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) -{ - return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); -} - -static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) -{ - u16 len = sb->s_blocksize - - offsetof(struct ocfs2_xattr_header, xh_entries); +ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); +int ocfs2_xattr_set(struct inode *, int, const char *, const void *, + size_t, int); +int ocfs2_xattr_remove(struct inode *, struct buffer_head *); - return len / sizeof(struct ocfs2_xattr_entry); -} #endif /* OCFS2_XATTR_H */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 633f7a0ebb2c..6d5b213b8a9b 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -348,8 +348,8 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -int add_partition(struct gendisk *disk, int partno, - sector_t start, sector_t len, int flags) +struct hd_struct *add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags) { struct hd_struct *p; dev_t devt = MKDEV(0, 0); @@ -361,15 +361,15 @@ int add_partition(struct gendisk *disk, int partno, err = disk_expand_part_tbl(disk, partno); if (err) - return err; + return ERR_PTR(err); ptbl = disk->part_tbl; if (ptbl->part[partno]) - return -EBUSY; + return ERR_PTR(-EBUSY); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return -ENOMEM; + return ERR_PTR(-EBUSY); if (!init_part_stats(p)) { err = -ENOMEM; @@ -395,7 +395,7 @@ int add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(p, &devt); if (err) - goto out_free; + goto out_free_stats; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -424,18 +424,20 @@ int add_partition(struct gendisk *disk, int partno, if (!ddev->uevent_suppress) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return 0; + return p; +out_free_stats: + free_part_stats(p); out_free: kfree(p); - return err; + return ERR_PTR(err); out_del: kobject_put(p->holder_dir); device_del(pdev); out_put: put_device(pdev); blk_free_devt(devt); - return err; + return ERR_PTR(err); } /* Not exported, helper to add_disk(). */ @@ -566,15 +568,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) disk->disk_name, p, (unsigned long long) size); size = get_capacity(disk) - from; } - res = add_partition(disk, p, from, size, state->parts[p].flags); - if (res) { - printk(KERN_ERR " %s: p%d could not be added: %d\n", - disk->disk_name, p, -res); + part = add_partition(disk, p, from, size, + state->parts[p].flags); + if (IS_ERR(part)) { + printk(KERN_ERR " %s: p%d could not be added: %ld\n", + disk->disk_name, p, -PTR_ERR(part)); continue; } #ifdef CONFIG_BLK_DEV_MD if (state->parts[p].flags & ADDPART_FLAG_RAID) - md_autodetect_dev(bdev->bd_dev+p); + md_autodetect_dev(part_to_dev(part)->devt); #endif } kfree(state); diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..d4677603c889 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v) task->latency_record[i].time, task->latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!task->latency_record[i].backtrace[q]) break; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 94fcfff6863a..06ed10b7da9e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -31,6 +31,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ inode->i_mode = table->mode; + inode->i_uid = inode->i_gid = 0; if (!table->child) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b770c095e45c..3a8bdd7f5756 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -557,9 +557,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -static unsigned long pte_to_pagemap_entry(pte_t pte) +static u64 pte_to_pagemap_entry(pte_t pte) { - unsigned long pme = 0; + u64 pme = 0; if (is_swap_pte(pte)) pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 0a6aa2cc78f0..b49884c8c10e 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -234,8 +234,8 @@ int ubifs_bg_thread(void *info) int err; struct ubifs_info *c = info; - ubifs_msg("background thread \"%s\" started, PID %d", - c->bgt_name, current->pid); + dbg_msg("background thread \"%s\" started, PID %d", + c->bgt_name, current->pid); set_freezable(); while (1) { diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 7186400750e7..510ffa0bbda4 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -101,21 +101,24 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { switch (type) { case UBIFS_INO_KEY: - sprintf(p, "(%lu, %s)", key_inum(c, key), + sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), get_key_type(type)); break; case UBIFS_DENT_KEY: case UBIFS_XENT_KEY: - sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key), + sprintf(p, "(%lu, %s, %#08x)", + (unsigned long)key_inum(c, key), get_key_type(type), key_hash(c, key)); break; case UBIFS_DATA_KEY: - sprintf(p, "(%lu, %s, %u)", key_inum(c, key), + sprintf(p, "(%lu, %s, %u)", + (unsigned long)key_inum(c, key), get_key_type(type), key_block(c, key)); break; case UBIFS_TRUN_KEY: sprintf(p, "(%lu, %s)", - key_inum(c, key), get_key_type(type)); + (unsigned long)key_inum(c, key), + get_key_type(type)); break; default: sprintf(p, "(bad key type: %#08x, %#08x)", @@ -364,8 +367,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) le32_to_cpu(mst->ihead_lnum)); printk(KERN_DEBUG "\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); - printk(KERN_DEBUG "\tindex_size %u\n", - le32_to_cpu(mst->index_size)); + printk(KERN_DEBUG "\tindex_size %llu\n", + (unsigned long long)le64_to_cpu(mst->index_size)); printk(KERN_DEBUG "\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); printk(KERN_DEBUG "\tlpt_offs %u\n", @@ -1589,7 +1592,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, if (inum > c->highest_inum) { ubifs_err("too high inode number, max. is %lu", - c->highest_inum); + (unsigned long)c->highest_inum); return ERR_PTR(-EINVAL); } @@ -1668,16 +1671,18 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c, ino_key_init(c, &key, inum); err = ubifs_lookup_level0(c, &key, &znode, &n); if (!err) { - ubifs_err("inode %lu not found in index", inum); + ubifs_err("inode %lu not found in index", (unsigned long)inum); return ERR_PTR(-ENOENT); } else if (err < 0) { - ubifs_err("error %d while looking up inode %lu", err, inum); + ubifs_err("error %d while looking up inode %lu", + err, (unsigned long)inum); return ERR_PTR(err); } zbr = &znode->zbranch[n]; if (zbr->len < UBIFS_INO_NODE_SZ) { - ubifs_err("bad node %lu node length %d", inum, zbr->len); + ubifs_err("bad node %lu node length %d", + (unsigned long)inum, zbr->len); return ERR_PTR(-EINVAL); } @@ -1697,7 +1702,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c, kfree(ino); if (IS_ERR(fscki)) { ubifs_err("error %ld while adding inode %lu node", - PTR_ERR(fscki), inum); + PTR_ERR(fscki), (unsigned long)inum); return fscki; } @@ -1786,7 +1791,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (IS_ERR(fscki)) { err = PTR_ERR(fscki); ubifs_err("error %d while processing data node and " - "trying to find inode node %lu", err, inum); + "trying to find inode node %lu", + err, (unsigned long)inum); goto out_dump; } @@ -1819,7 +1825,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (IS_ERR(fscki)) { err = PTR_ERR(fscki); ubifs_err("error %d while processing entry node and " - "trying to find inode node %lu", err, inum); + "trying to find inode node %lu", + err, (unsigned long)inum); goto out_dump; } @@ -1832,7 +1839,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = PTR_ERR(fscki); ubifs_err("error %d while processing entry node and " "trying to find parent inode node %lu", - err, inum); + err, (unsigned long)inum); goto out_dump; } @@ -1923,7 +1930,8 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) fscki->references != 1) { ubifs_err("directory inode %lu has %d " "direntries which refer it, but " - "should be 1", fscki->inum, + "should be 1", + (unsigned long)fscki->inum, fscki->references); goto out_dump; } @@ -1931,27 +1939,29 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) fscki->references != 0) { ubifs_err("root inode %lu has non-zero (%d) " "direntries which refer it", - fscki->inum, fscki->references); + (unsigned long)fscki->inum, + fscki->references); goto out_dump; } if (fscki->calc_sz != fscki->size) { ubifs_err("directory inode %lu size is %lld, " "but calculated size is %lld", - fscki->inum, fscki->size, - fscki->calc_sz); + (unsigned long)fscki->inum, + fscki->size, fscki->calc_sz); goto out_dump; } if (fscki->calc_cnt != fscki->nlink) { ubifs_err("directory inode %lu nlink is %d, " "but calculated nlink is %d", - fscki->inum, fscki->nlink, - fscki->calc_cnt); + (unsigned long)fscki->inum, + fscki->nlink, fscki->calc_cnt); goto out_dump; } } else { if (fscki->references != fscki->nlink) { ubifs_err("inode %lu nlink is %d, but " - "calculated nlink is %d", fscki->inum, + "calculated nlink is %d", + (unsigned long)fscki->inum, fscki->nlink, fscki->references); goto out_dump; } @@ -1959,20 +1969,21 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) if (fscki->xattr_sz != fscki->calc_xsz) { ubifs_err("inode %lu has xattr size %u, but " "calculated size is %lld", - fscki->inum, fscki->xattr_sz, + (unsigned long)fscki->inum, fscki->xattr_sz, fscki->calc_xsz); goto out_dump; } if (fscki->xattr_cnt != fscki->calc_xcnt) { ubifs_err("inode %lu has %u xattrs, but " - "calculated count is %lld", fscki->inum, + "calculated count is %lld", + (unsigned long)fscki->inum, fscki->xattr_cnt, fscki->calc_xcnt); goto out_dump; } if (fscki->xattr_nms != fscki->calc_xnms) { ubifs_err("inode %lu has xattr names' size %u, but " "calculated names' size is %lld", - fscki->inum, fscki->xattr_nms, + (unsigned long)fscki->inum, fscki->xattr_nms, fscki->calc_xnms); goto out_dump; } @@ -1985,11 +1996,12 @@ out_dump: ino_key_init(c, &key, fscki->inum); err = ubifs_lookup_level0(c, &key, &znode, &n); if (!err) { - ubifs_err("inode %lu not found in index", fscki->inum); + ubifs_err("inode %lu not found in index", + (unsigned long)fscki->inum); return -ENOENT; } else if (err < 0) { ubifs_err("error %d while looking up inode %lu", - err, fscki->inum); + err, (unsigned long)fscki->inum); return err; } @@ -2007,7 +2019,7 @@ out_dump: } ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", - fscki->inum, zbr->lnum, zbr->offs); + (unsigned long)fscki->inum, zbr->lnum, zbr->offs); dbg_dump_node(c, ino); kfree(ino); return -EINVAL; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 526c01ec8003..0422c98e1793 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -161,7 +161,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, return ERR_PTR(-EINVAL); } ubifs_warn("running out of inode numbers (current %lu, max %d)", - c->highest_inum, INUM_WATERMARK); + (unsigned long)c->highest_inum, INUM_WATERMARK); } inode->i_ino = ++c->highest_inum; @@ -428,7 +428,8 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) dbg_gen("feed '%s', ino %llu, new f_pos %#x", dent->name, (unsigned long long)le64_to_cpu(dent->inum), key_hash_flash(c, &dent->key)); - ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); + ubifs_assert(le64_to_cpu(dent->ch.sqnum) > + ubifs_inode(dir)->creat_sqnum); nm.len = le16_to_cpu(dent->nlen); over = filldir(dirent, dent->name, nm.len, file->f_pos, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 51cf511d44d9..2624411d9758 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -72,7 +72,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, return err; } - ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); len = le32_to_cpu(dn->size); if (len <= 0 || len > UBIFS_BLOCK_SIZE) @@ -626,7 +626,7 @@ static int populate_page(struct ubifs_info *c, struct page *page, dn = bu->buf + (bu->zbranch[nn].offs - offs); - ubifs_assert(dn->ch.sqnum > + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); len = le32_to_cpu(dn->size); @@ -691,32 +691,22 @@ out_err: /** * ubifs_do_bulk_read - do bulk-read. * @c: UBIFS file-system description object - * @page1: first page + * @bu: bulk-read information + * @page1: first page to read * * This function returns %1 if the bulk-read is done, otherwise %0 is returned. */ -static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) +static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, + struct page *page1) { pgoff_t offset = page1->index, end_index; struct address_space *mapping = page1->mapping; struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); - struct bu_info *bu; int err, page_idx, page_cnt, ret = 0, n = 0; + int allocate = bu->buf ? 0 : 1; loff_t isize; - bu = kmalloc(sizeof(struct bu_info), GFP_NOFS); - if (!bu) - return 0; - - bu->buf_len = c->bulk_read_buf_size; - bu->buf = kmalloc(bu->buf_len, GFP_NOFS); - if (!bu->buf) - goto out_free; - - data_key_init(c, &bu->key, inode->i_ino, - offset << UBIFS_BLOCKS_PER_PAGE_SHIFT); - err = ubifs_tnc_get_bu_keys(c, bu); if (err) goto out_warn; @@ -735,12 +725,25 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) * together. If all the pages were like this, bulk-read would * reduce performance, so we turn it off for a while. */ - ui->read_in_a_row = 0; - ui->bulk_read = 0; - goto out_free; + goto out_bu_off; } if (bu->cnt) { + if (allocate) { + /* + * Allocate bulk-read buffer depending on how many data + * nodes we are going to read. + */ + bu->buf_len = bu->zbranch[bu->cnt - 1].offs + + bu->zbranch[bu->cnt - 1].len - + bu->zbranch[0].offs; + ubifs_assert(bu->buf_len > 0); + ubifs_assert(bu->buf_len <= c->leb_size); + bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN); + if (!bu->buf) + goto out_bu_off; + } + err = ubifs_tnc_bulk_read(c, bu); if (err) goto out_warn; @@ -779,13 +782,17 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) ui->last_page_read = offset + page_idx - 1; out_free: - kfree(bu->buf); - kfree(bu); + if (allocate) + kfree(bu->buf); return ret; out_warn: ubifs_warn("ignoring error %d and skipping bulk-read", err); goto out_free; + +out_bu_off: + ui->read_in_a_row = ui->bulk_read = 0; + goto out_free; } /** @@ -803,18 +810,20 @@ static int ubifs_bulk_read(struct page *page) struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = page->index, last_page_read = ui->last_page_read; - int ret = 0; + struct bu_info *bu; + int err = 0, allocated = 0; ui->last_page_read = index; - if (!c->bulk_read) return 0; + /* - * Bulk-read is protected by ui_mutex, but it is an optimization, so - * don't bother if we cannot lock the mutex. + * Bulk-read is protected by @ui->ui_mutex, but it is an optimization, + * so don't bother if we cannot lock the mutex. */ if (!mutex_trylock(&ui->ui_mutex)) return 0; + if (index != last_page_read + 1) { /* Turn off bulk-read if we stop reading sequentially */ ui->read_in_a_row = 1; @@ -822,6 +831,7 @@ static int ubifs_bulk_read(struct page *page) ui->bulk_read = 0; goto out_unlock; } + if (!ui->bulk_read) { ui->read_in_a_row += 1; if (ui->read_in_a_row < 3) @@ -829,10 +839,35 @@ static int ubifs_bulk_read(struct page *page) /* Three reads in a row, so switch on bulk-read */ ui->bulk_read = 1; } - ret = ubifs_do_bulk_read(c, page); + + /* + * If possible, try to use pre-allocated bulk-read information, which + * is protected by @c->bu_mutex. + */ + if (mutex_trylock(&c->bu_mutex)) + bu = &c->bu; + else { + bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN); + if (!bu) + goto out_unlock; + + bu->buf = NULL; + allocated = 1; + } + + bu->buf_len = c->max_bu_buf_len; + data_key_init(c, &bu->key, inode->i_ino, + page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); + err = ubifs_do_bulk_read(c, bu, page); + + if (!allocated) + mutex_unlock(&c->bu_mutex); + else + kfree(bu); + out_unlock: mutex_unlock(&ui->ui_mutex); - return ret; + return err; } static int ubifs_readpage(struct file *file, struct page *page) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 22993f867d19..f91b745908ea 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -690,8 +690,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; struct ubifs_inode *ui = ubifs_inode(inode); - dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key), - key_block(c, key), len, DBGKEY(key)); + dbg_jnl("ino %lu, blk %u, len %d, key %s", + (unsigned long)key_inum(c, key), key_block(c, key), len, + DBGKEY(key)); ubifs_assert(len <= UBIFS_BLOCK_SIZE); data = kmalloc(dlen, GFP_NOFS); @@ -1128,7 +1129,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ino_t inum = inode->i_ino; unsigned int blk; - dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size); + dbg_jnl("ino %lu, size %lld -> %lld", + (unsigned long)inum, old_size, new_size); ubifs_assert(!ui->data_len); ubifs_assert(S_ISREG(inode->i_mode)); ubifs_assert(mutex_is_locked(&ui->ui_mutex)); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 9ee65086f627..3f1f16bc25c9 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -345,7 +345,7 @@ static inline int key_type_flash(const struct ubifs_info *c, const void *k) { const union ubifs_key *key = k; - return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS; + return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS; } /** @@ -416,7 +416,7 @@ static inline unsigned int key_block_flash(const struct ubifs_info *c, { const union ubifs_key *key = k; - return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK; + return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK; } /** diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index eed5a0025d63..a41434b42785 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -571,8 +571,6 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, /* We assume here that LEB zero is never an LPT LEB */ if (nnode->nbranch[iip].lnum) return ubifs_get_pnode(c, nnode, iip); - else - return NULL; } /* Go up while can't go right */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 02d3462f4d3e..9bd5a43d4526 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -105,7 +105,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) list_add_tail(&orphan->list, &c->orph_list); list_add_tail(&orphan->new_list, &c->orph_new); spin_unlock(&c->orphan_lock); - dbg_gen("ino %lu", inum); + dbg_gen("ino %lu", (unsigned long)inum); return 0; } @@ -132,14 +132,16 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) else { if (o->dnext) { spin_unlock(&c->orphan_lock); - dbg_gen("deleted twice ino %lu", inum); + dbg_gen("deleted twice ino %lu", + (unsigned long)inum); return; } if (o->cnext) { o->dnext = c->orph_dnext; c->orph_dnext = o; spin_unlock(&c->orphan_lock); - dbg_gen("delete later ino %lu", inum); + dbg_gen("delete later ino %lu", + (unsigned long)inum); return; } rb_erase(p, &c->orph_tree); @@ -151,12 +153,12 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) } spin_unlock(&c->orphan_lock); kfree(o); - dbg_gen("inum %lu", inum); + dbg_gen("inum %lu", (unsigned long)inum); return; } } spin_unlock(&c->orphan_lock); - dbg_err("missing orphan ino %lu", inum); + dbg_err("missing orphan ino %lu", (unsigned long)inum); dbg_dump_stack(); } @@ -448,7 +450,7 @@ static void erase_deleted(struct ubifs_info *c) rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; - dbg_gen("deleting orphan ino %lu", orphan->inum); + dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); kfree(orphan); } c->orph_dnext = NULL; @@ -536,8 +538,8 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) list_add_tail(&orphan->list, &c->orph_list); orphan->dnext = c->orph_dnext; c->orph_dnext = orphan; - dbg_mnt("ino %lu, new %d, tot %d", - inum, c->new_orphans, c->tot_orphans); + dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, + c->new_orphans, c->tot_orphans); return 0; } @@ -609,7 +611,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { inum = le64_to_cpu(orph->inos[i]); - dbg_rcvry("deleting orphaned inode %lu", inum); + dbg_rcvry("deleting orphaned inode %lu", + (unsigned long)inum); err = ubifs_tnc_remove_ino(c, inum); if (err) return err; @@ -840,8 +843,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (inum != ci->last_ino) { /* Lowest node type is the inode node, so it comes first */ if (key_type(c, &zbr->key) != UBIFS_INO_KEY) - ubifs_err("found orphan node ino %lu, type %d", inum, - key_type(c, &zbr->key)); + ubifs_err("found orphan node ino %lu, type %d", + (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; ci->tot_inos += 1; err = ubifs_tnc_read_node(c, zbr, ci->node); @@ -853,7 +856,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, /* Must be recorded as an orphan */ if (!dbg_find_check_orphan(&ci->root, inum) && !dbg_find_orphan(c, inum)) { - ubifs_err("missing orphan, ino %lu", inum); + ubifs_err("missing orphan, ino %lu", + (unsigned long)inum); ci->missing += 1; } } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 77d26c141cf6..90acac603e63 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -168,12 +168,12 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, struct ubifs_mst_node *mst) { int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; - uint32_t save_flags; + __le32 save_flags; dbg_rcvry("recovery"); save_flags = mst->flags; - mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY); + mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); @@ -1435,13 +1435,13 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; - dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs, - i_size, e->d_size); + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", + (unsigned long)e->inum, lnum, offs, i_size, e->d_size); return 0; out: ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", - e->inum, e->i_size, e->d_size, err); + (unsigned long)e->inum, e->i_size, e->d_size, err); return err; } @@ -1472,7 +1472,8 @@ int ubifs_recover_size(struct ubifs_info *c) return err; if (err == -ENOENT) { /* Remove data nodes that have no inode */ - dbg_rcvry("removing ino %lu", e->inum); + dbg_rcvry("removing ino %lu", + (unsigned long)e->inum); err = ubifs_tnc_remove_ino(c, e->inum); if (err) return err; @@ -1493,8 +1494,8 @@ int ubifs_recover_size(struct ubifs_info *c) return PTR_ERR(inode); if (inode->i_size < e->d_size) { dbg_rcvry("ino %lu size %lld -> %lld", - e->inum, e->d_size, - inode->i_size); + (unsigned long)e->inum, + e->d_size, inode->i_size); inode->i_size = e->d_size; ubifs_inode(inode)->ui_size = e->d_size; e->inode = inode; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 7399692af859..21f7d047c306 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1065,7 +1065,7 @@ int ubifs_replay_journal(struct ubifs_info *c) ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, - c->highest_inum); + (unsigned long)c->highest_inum); out: destroy_replay_tree(c); destroy_bud_list(c); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 2bf753b38889..0f392351dc5a 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -81,6 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c) int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; int min_leb_cnt = UBIFS_MIN_LEB_CNT; uint64_t tmp64, main_bytes; + __le64 tmp_le64; /* Some functions called from here depend on the @c->key_len filed */ c->key_len = UBIFS_SK_LEN; @@ -295,10 +296,10 @@ static int create_default_filesystem(struct ubifs_info *c) ino->ch.node_type = UBIFS_INO_NODE; ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); ino->nlink = cpu_to_le32(2); - tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); - ino->atime_sec = tmp; - ino->ctime_sec = tmp; - ino->mtime_sec = tmp; + tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + ino->atime_sec = tmp_le64; + ino->ctime_sec = tmp_le64; + ino->mtime_sec = tmp_le64; ino->atime_nsec = 0; ino->ctime_nsec = 0; ino->mtime_nsec = 0; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 8780efbf40ac..d80b2aef42b6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -36,6 +36,12 @@ #include <linux/mount.h> #include "ubifs.h" +/* + * Maximum amount of memory we may 'kmalloc()' without worrying that we are + * allocating too much. + */ +#define UBIFS_KMALLOC_OK (128*1024) + /* Slab cache for UBIFS inodes */ struct kmem_cache *ubifs_inode_slab; @@ -561,18 +567,11 @@ static int init_constants_early(struct ubifs_info *c) * calculations when reporting free space. */ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; - /* Buffer size for bulk-reads */ - c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; - if (c->bulk_read_buf_size > c->leb_size) - c->bulk_read_buf_size = c->leb_size; - if (c->bulk_read_buf_size > 128 * 1024) { - /* Check if we can kmalloc more than 128KiB */ - void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL); - kfree(try); - if (!try) - c->bulk_read_buf_size = 128 * 1024; - } + /* Buffer size for bulk-reads */ + c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; + if (c->max_bu_buf_len > c->leb_size) + c->max_bu_buf_len = c->leb_size; return 0; } @@ -992,6 +991,34 @@ static void destroy_journal(struct ubifs_info *c) } /** + * bu_init - initialize bulk-read information. + * @c: UBIFS file-system description object + */ +static void bu_init(struct ubifs_info *c) +{ + ubifs_assert(c->bulk_read == 1); + + if (c->bu.buf) + return; /* Already initialized */ + +again: + c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN); + if (!c->bu.buf) { + if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) { + c->max_bu_buf_len = UBIFS_KMALLOC_OK; + goto again; + } + + /* Just disable bulk-read */ + ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " + "disabling it", c->max_bu_buf_len); + c->mount_opts.bulk_read = 1; + c->bulk_read = 0; + return; + } +} + +/** * mount_ubifs - mount UBIFS file-system. * @c: UBIFS file-system description object * @@ -1059,6 +1086,13 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } + if (c->bulk_read == 1) + bu_init(c); + + /* + * We have to check all CRCs, even for data nodes, when we mount the FS + * (specifically, when we are replaying). + */ c->always_chk_crc = 1; err = ubifs_read_superblock(c); @@ -1289,6 +1323,7 @@ out_cbuf: out_dereg: dbg_failure_mode_deregistration(c); out_free: + kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); @@ -1325,10 +1360,11 @@ static void ubifs_umount(struct ubifs_info *c) kfree(c->cbuf); kfree(c->rcvrd_mst_node); kfree(c->mst_node); + kfree(c->bu.buf); + vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); UBIFS_DBG(vfree(c->dbg_buf)); - vfree(c->ileb_buf); dbg_failure_mode_deregistration(c); } @@ -1626,6 +1662,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) ubifs_err("invalid or unknown remount parameter"); return err; } + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { err = ubifs_remount_rw(c); if (err) @@ -1633,6 +1670,14 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) ubifs_remount_ro(c); + if (c->bulk_read == 1) + bu_init(c); + else { + dbg_gen("disable bulk-read"); + kfree(c->bu.buf); + c->bu.buf = NULL; + } + return 0; } @@ -1723,6 +1768,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&c->log_mutex); mutex_init(&c->mst_mutex); mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); init_waitqueue_head(&c->cmt_wq); c->buds = RB_ROOT; c->old_idx = RB_ROOT; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index d27fd918b9c9..6eef5344a145 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1501,7 +1501,12 @@ out: * @bu: bulk-read parameters and results * * Lookup consecutive data node keys for the same inode that reside - * consecutively in the same LEB. + * consecutively in the same LEB. This function returns zero in case of success + * and a negative error code in case of failure. + * + * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function + * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares + * maxumum possible amount of nodes for bulk-read. */ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) { @@ -2677,7 +2682,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) struct ubifs_dent_node *xent, *pxent = NULL; struct qstr nm = { .name = NULL }; - dbg_tnc("ino %lu", inum); + dbg_tnc("ino %lu", (unsigned long)inum); /* * Walk all extended attribute entries and remove them together with @@ -2697,7 +2702,8 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) } xattr_inum = le64_to_cpu(xent->inum); - dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum); + dbg_tnc("xent '%s', ino %lu", xent->name, + (unsigned long)xattr_inum); nm.name = xent->name; nm.len = le16_to_cpu(xent->nlen); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a7bd32fa15b9..46b172560a06 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -753,7 +753,7 @@ struct ubifs_znode { }; /** - * struct bu_info - bulk-read information + * struct bu_info - bulk-read information. * @key: first data node key * @zbranch: zbranches of data nodes to bulk read * @buf: buffer to read into @@ -969,7 +969,10 @@ struct ubifs_mount_opts { * @mst_node: master node * @mst_offs: offset of valid master node * @mst_mutex: protects the master node area, @mst_node, and @mst_offs - * @bulk_read_buf_size: buffer size for bulk-reads + * + * @max_bu_buf_len: maximum bulk-read buffer length + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information * * @log_lebs: number of logical eraseblocks in the log * @log_bytes: log size in bytes @@ -1217,7 +1220,10 @@ struct ubifs_info { struct ubifs_mst_node *mst_node; int mst_offs; struct mutex mst_mutex; - int bulk_read_buf_size; + + int max_bu_buf_len; + struct mutex bu_mutex; + struct bu_info bu; int log_lebs; long long log_bytes; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6e74b117aaf0..30ebde490f7f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -106,6 +106,7 @@ void udf_clear_inode(struct inode *inode) udf_truncate_tail_extent(inode); unlock_kernel(); write_inode_now(inode, 0); + invalidate_inode_buffers(inode); } iinfo = UDF_I(inode); kfree(iinfo->i_ext.i_data); diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9e561a9cefca..a11a8390bf6c 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) int nmap, error, w, count, c, got, i, mapi; xfs_trans_t *tp; xfs_mount_t *mp; + xfs_drfsbno_t nblks; dp = args->dp; mp = dp->i_mount; w = args->whichfork; tp = args->trans; + nblks = dp->i_d.di_nblocks; + /* * For new directories adjust the file offset and block count. */ @@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) } if (mapp != &map) kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *new_blkno = (xfs_dablk_t)bno; return 0; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 80e0dc51361c..1afb12278b8d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -525,11 +525,13 @@ xfs_dir2_grow_inode( xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; + xfs_drfsbno_t nblks; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ @@ -622,7 +624,11 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp); + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + /* * Update file's size if this is the data space and it grew. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dbd9cef852ec..a391b955df01 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1414,7 +1414,7 @@ xfs_itruncate_start( mp = ip->i_mount; /* wait for the completion of any pending DIOs */ - if (new_size < ip->i_size) + if (new_size == 0 || new_size < ip->i_size) vn_iowait(ip); /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0b02c6443551..3608a0f0a5f6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -563,6 +563,11 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; + goto out; + } /* * Initialize the AIL now we have a log. @@ -601,6 +606,7 @@ xfs_log_mount( return 0; error: xfs_log_unmount_dealloc(mp); +out: return error; } /* xfs_log_mount */ @@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp, int i; int iclogsize; - log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); @@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp, iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = (xlog_in_core_t *) - kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); + *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); + if (!*iclogp) + goto out_free_iclog; + iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); @@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log; + +out_free_iclog: + for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { + prev_iclog = iclog->ic_next; + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); + xfs_buf_free(iclog->ic_bp); + xlog_trace_iclog_dealloc(iclog); + } + kmem_free(iclog); + } + spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); + xlog_trace_loggrant_dealloc(log); + xfs_buf_free(log->l_xbuf); +out_free_log: + kmem_free(log); + return NULL; } /* xlog_alloc_log */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82d46ce69d5f..70e3ba32e6be 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1419,7 +1419,13 @@ xlog_recover_add_to_trans( return 0; item = trans->r_itemq; if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a4503f5e9497..15f5dd22fbb2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1245,6 +1245,9 @@ xfs_unmountfs( XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); + /* * Flush out the log synchronously so that we know for sure * that nothing is pinned. This is important because bflush() @@ -1297,8 +1300,6 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); } STATIC void diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d700dacdb10e..c903130be7fd 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -212,7 +212,7 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); + xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, cancel_flags); goto std_return; } |